diff --git a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextExtractor/ContentOrderTextExtractor.cs b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextExtractor/ContentOrderTextExtractor.cs
index 80f5f58a..49d8d912 100644
--- a/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextExtractor/ContentOrderTextExtractor.cs
+++ b/src/UglyToad.PdfPig.DocumentLayoutAnalysis/TextExtractor/ContentOrderTextExtractor.cs
@@ -110,6 +110,11 @@
{
var gap = letter.StartBaseLine.X - previous.EndBaseLine.X;
+ if (options.NegativeGapAsWhitespace)
+ {
+ gap = Math.Abs(gap);
+ }
+
if (WhitespaceSizeStatistics.IsProbablyWhitespace(gap, previous))
{
sb.Append(" ");
@@ -178,6 +183,13 @@
/// character. Default .
///
public bool ReplaceWhitespaceWithSpace { get; set; }
+
+ ///
+ /// When parsing PDF files with tables containing multiple lines in a cell or "merged" cells,
+ /// the separate words can appear out of horizontal order. This option can better predict the
+ // spaces between the words. Default .
+ ///
+ public bool NegativeGapAsWhitespace { get; set; }
}
}
}