mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-04-05 20:55:01 +08:00
New GetText() option: NegativeGapAsWhitespace
When parsing PDF files with tables containing multiple lines in a cell or "merged" cells, the separate words can appear out of horizontal order. This option can better predict the spaces between the words.
This commit is contained in:
parent
97ae62ce6f
commit
a2ae1f16d6
@ -110,6 +110,11 @@
|
||||
{
|
||||
var gap = letter.StartBaseLine.X - previous.EndBaseLine.X;
|
||||
|
||||
if (options.NegativeGapAsWhitespace)
|
||||
{
|
||||
gap = Math.Abs(gap);
|
||||
}
|
||||
|
||||
if (WhitespaceSizeStatistics.IsProbablyWhitespace(gap, previous))
|
||||
{
|
||||
sb.Append(" ");
|
||||
@ -178,6 +183,13 @@
|
||||
/// character. Default <see langword="false"/>.
|
||||
/// </summary>
|
||||
public bool ReplaceWhitespaceWithSpace { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// When parsing PDF files with tables containing multiple lines in a cell or "merged" cells,
|
||||
/// the separate words can appear out of horizontal order. This option can better predict the
|
||||
// spaces between the words. Default <see langword="false"/>.
|
||||
/// </summary>
|
||||
public bool NegativeGapAsWhitespace { get; set; }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user