mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-04-05 20:55:01 +08:00
56 lines
1.8 KiB
C#
56 lines
1.8 KiB
C#
namespace UglyToad.Examples
|
|
{
|
|
using System;
|
|
using System.Text;
|
|
using PdfPig;
|
|
using PdfPig.Content;
|
|
|
|
public static class OpenDocumentAndExtractWords
|
|
{
|
|
public static void Run(string filePath)
|
|
{
|
|
var sb = new StringBuilder();
|
|
|
|
using (var document = PdfDocument.Open(filePath))
|
|
{
|
|
Word previous = null;
|
|
foreach (var page in document.GetPages())
|
|
{
|
|
foreach (var word in page.GetWords())
|
|
{
|
|
if (previous != null)
|
|
{
|
|
var hasInsertedWhitespace = false;
|
|
var bothNonEmpty = previous.Letters.Count > 0 && word.Letters.Count > 0;
|
|
if (bothNonEmpty)
|
|
{
|
|
var prevLetter1 = previous.Letters[0];
|
|
var currentLetter1 = word.Letters[0];
|
|
|
|
var baselineGap = Math.Abs(prevLetter1.StartBaseLine.Y - currentLetter1.StartBaseLine.Y);
|
|
|
|
if (baselineGap > 3)
|
|
{
|
|
hasInsertedWhitespace = true;
|
|
sb.AppendLine();
|
|
}
|
|
}
|
|
|
|
if (!hasInsertedWhitespace)
|
|
{
|
|
sb.Append(" ");
|
|
}
|
|
}
|
|
|
|
sb.Append(word.Text);
|
|
|
|
previous = word;
|
|
}
|
|
}
|
|
}
|
|
|
|
Console.WriteLine(sb.ToString());
|
|
}
|
|
}
|
|
}
|