mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-04-05 20:55:01 +08:00
add examples directory and first example
This commit is contained in:
parent
25314cc79d
commit
563985e2b4
55
examples/OpenDocumentAndExtractWords.cs
Normal file
55
examples/OpenDocumentAndExtractWords.cs
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
namespace UglyToad.Examples
|
||||||
|
{
|
||||||
|
using System;
|
||||||
|
using System.Text;
|
||||||
|
using PdfPig;
|
||||||
|
using PdfPig.Content;
|
||||||
|
|
||||||
|
public static class OpenDocumentAndExtractWords
|
||||||
|
{
|
||||||
|
public static void Run(string filePath)
|
||||||
|
{
|
||||||
|
var sb = new StringBuilder();
|
||||||
|
|
||||||
|
using (var document = PdfDocument.Open(filePath))
|
||||||
|
{
|
||||||
|
Word previous = null;
|
||||||
|
foreach (var page in document.GetPages())
|
||||||
|
{
|
||||||
|
foreach (var word in page.GetWords())
|
||||||
|
{
|
||||||
|
if (previous != null)
|
||||||
|
{
|
||||||
|
var hasInsertedWhitespace = false;
|
||||||
|
var bothNonEmpty = previous.Letters.Count > 0 && word.Letters.Count > 0;
|
||||||
|
if (bothNonEmpty)
|
||||||
|
{
|
||||||
|
var prevLetter1 = previous.Letters[0];
|
||||||
|
var currentLetter1 = word.Letters[0];
|
||||||
|
|
||||||
|
var baselineGap = Math.Abs(prevLetter1.StartBaseLine.Y - currentLetter1.StartBaseLine.Y);
|
||||||
|
|
||||||
|
if (baselineGap > 3)
|
||||||
|
{
|
||||||
|
hasInsertedWhitespace = true;
|
||||||
|
sb.AppendLine();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!hasInsertedWhitespace)
|
||||||
|
{
|
||||||
|
sb.Append(" ");
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
sb.Append(word.Text);
|
||||||
|
|
||||||
|
previous = word;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
Console.WriteLine(sb.ToString());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
51
examples/Program.cs
Normal file
51
examples/Program.cs
Normal file
@ -0,0 +1,51 @@
|
|||||||
|
namespace UglyToad.Examples
|
||||||
|
{
|
||||||
|
using System;
|
||||||
|
using System.Collections.Generic;
|
||||||
|
using System.IO;
|
||||||
|
using System.Linq;
|
||||||
|
|
||||||
|
public static class Program
|
||||||
|
{
|
||||||
|
public static void Main()
|
||||||
|
{
|
||||||
|
Console.WriteLine("Welcome to the PdfPig examples gallery!");
|
||||||
|
|
||||||
|
var baseDirectory = AppDomain.CurrentDomain.BaseDirectory;
|
||||||
|
var filesDirectory = Path.Combine(baseDirectory, "..", "..", "..", "..", "src", "UglyToad.PdfPig.Tests", "Integration", "Documents");
|
||||||
|
|
||||||
|
var examples = new Dictionary<int, (string name, Action action)>
|
||||||
|
{
|
||||||
|
{1,
|
||||||
|
("Extract Words with newline detection",
|
||||||
|
() => OpenDocumentAndExtractWords.Run(Path.Combine(filesDirectory, "Two Page Text Only - from libre office.pdf")))
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
|
var choices = string.Join(Environment.NewLine, examples.Select(x => $"{x.Key}: {x.Value.name}"));
|
||||||
|
|
||||||
|
Console.WriteLine(choices);
|
||||||
|
Console.WriteLine();
|
||||||
|
|
||||||
|
do
|
||||||
|
{
|
||||||
|
Console.Write("Enter a number to pick an example (enter 'q' to exit):");
|
||||||
|
|
||||||
|
var val = Console.ReadLine();
|
||||||
|
|
||||||
|
if (!int.TryParse(val, out var opt) || !examples.TryGetValue(opt, out var act))
|
||||||
|
{
|
||||||
|
if (string.Equals(val, "q", StringComparison.OrdinalIgnoreCase))
|
||||||
|
{
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
Console.WriteLine($"No option with value: {val}.");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
act.action.Invoke();
|
||||||
|
} while (true);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
11
examples/UglyToad.Examples.csproj
Normal file
11
examples/UglyToad.Examples.csproj
Normal file
@ -0,0 +1,11 @@
|
|||||||
|
<Project Sdk="Microsoft.NET.Sdk">
|
||||||
|
|
||||||
|
<PropertyGroup>
|
||||||
|
<OutputType>Exe</OutputType>
|
||||||
|
<TargetFramework>netcoreapp2.1</TargetFramework>
|
||||||
|
</PropertyGroup>
|
||||||
|
|
||||||
|
<ItemGroup>
|
||||||
|
<ProjectReference Include="..\src\UglyToad.PdfPig\UglyToad.PdfPig.csproj" />
|
||||||
|
</ItemGroup>
|
||||||
|
</Project>
|
55
examples/UglyToad.Examples.sln
Normal file
55
examples/UglyToad.Examples.sln
Normal file
@ -0,0 +1,55 @@
|
|||||||
|
|
||||||
|
Microsoft Visual Studio Solution File, Format Version 12.00
|
||||||
|
# Visual Studio 15
|
||||||
|
VisualStudioVersion = 15.0.28307.1022
|
||||||
|
MinimumVisualStudioVersion = 10.0.40219.1
|
||||||
|
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UglyToad.Examples", "UglyToad.Examples.csproj", "{F72DA3EE-FBED-4271-88CC-05D883FD4DAD}"
|
||||||
|
EndProject
|
||||||
|
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UglyToad.PdfPig.Core", "..\src\UglyToad.PdfPig.Core\UglyToad.PdfPig.Core.csproj", "{B12C9CFF-879B-4C70-8C7B-7DBF953608C5}"
|
||||||
|
EndProject
|
||||||
|
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UglyToad.PdfPig.Fonts", "..\src\UglyToad.PdfPig.Fonts\UglyToad.PdfPig.Fonts.csproj", "{371FB56D-E9BE-40A2-8419-5F5F6F8FE0C0}"
|
||||||
|
EndProject
|
||||||
|
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UglyToad.PdfPig.Tokenization", "..\src\UglyToad.PdfPig.Tokenization\UglyToad.PdfPig.Tokenization.csproj", "{E5BD532A-B3D0-4975-80CD-8B0B6D70FD17}"
|
||||||
|
EndProject
|
||||||
|
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UglyToad.PdfPig.Tokens", "..\src\UglyToad.PdfPig.Tokens\UglyToad.PdfPig.Tokens.csproj", "{50308C34-4074-4D36-AA93-57CFC812D831}"
|
||||||
|
EndProject
|
||||||
|
Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UglyToad.PdfPig", "..\src\UglyToad.PdfPig\UglyToad.PdfPig.csproj", "{75ED54D6-308F-44AD-B85E-C027F3AA80AE}"
|
||||||
|
EndProject
|
||||||
|
Global
|
||||||
|
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||||
|
Debug|Any CPU = Debug|Any CPU
|
||||||
|
Release|Any CPU = Release|Any CPU
|
||||||
|
EndGlobalSection
|
||||||
|
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||||
|
{F72DA3EE-FBED-4271-88CC-05D883FD4DAD}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{F72DA3EE-FBED-4271-88CC-05D883FD4DAD}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{F72DA3EE-FBED-4271-88CC-05D883FD4DAD}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{F72DA3EE-FBED-4271-88CC-05D883FD4DAD}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
{B12C9CFF-879B-4C70-8C7B-7DBF953608C5}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{B12C9CFF-879B-4C70-8C7B-7DBF953608C5}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{B12C9CFF-879B-4C70-8C7B-7DBF953608C5}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{B12C9CFF-879B-4C70-8C7B-7DBF953608C5}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
{371FB56D-E9BE-40A2-8419-5F5F6F8FE0C0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{371FB56D-E9BE-40A2-8419-5F5F6F8FE0C0}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{371FB56D-E9BE-40A2-8419-5F5F6F8FE0C0}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{371FB56D-E9BE-40A2-8419-5F5F6F8FE0C0}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
{E5BD532A-B3D0-4975-80CD-8B0B6D70FD17}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{E5BD532A-B3D0-4975-80CD-8B0B6D70FD17}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{E5BD532A-B3D0-4975-80CD-8B0B6D70FD17}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{E5BD532A-B3D0-4975-80CD-8B0B6D70FD17}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
{50308C34-4074-4D36-AA93-57CFC812D831}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{50308C34-4074-4D36-AA93-57CFC812D831}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{50308C34-4074-4D36-AA93-57CFC812D831}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{50308C34-4074-4D36-AA93-57CFC812D831}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
{75ED54D6-308F-44AD-B85E-C027F3AA80AE}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
|
||||||
|
{75ED54D6-308F-44AD-B85E-C027F3AA80AE}.Debug|Any CPU.Build.0 = Debug|Any CPU
|
||||||
|
{75ED54D6-308F-44AD-B85E-C027F3AA80AE}.Release|Any CPU.ActiveCfg = Release|Any CPU
|
||||||
|
{75ED54D6-308F-44AD-B85E-C027F3AA80AE}.Release|Any CPU.Build.0 = Release|Any CPU
|
||||||
|
EndGlobalSection
|
||||||
|
GlobalSection(SolutionProperties) = preSolution
|
||||||
|
HideSolutionNode = FALSE
|
||||||
|
EndGlobalSection
|
||||||
|
GlobalSection(ExtensibilityGlobals) = postSolution
|
||||||
|
SolutionGuid = {03F738BD-74E0-4DE7-8063-0ACE16E931F1}
|
||||||
|
EndGlobalSection
|
||||||
|
EndGlobal
|
Loading…
Reference in New Issue
Block a user