From 557d8bc9486e374641887b61f4275756311c71a7 Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Sun, 7 Jul 2019 13:53:25 +0100 Subject: [PATCH] map missing character codes directly #44 previously if no matching unicode was found for a character code we would return a null letter. instead we now map from the character code directly to a character. this seems to work for most documents, except where there are ligatures, e.g. fi or ff, but is still better than not returning anything. --- src/UglyToad.PdfPig.Tests/Integration/LaTexTests.cs | 2 +- src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/src/UglyToad.PdfPig.Tests/Integration/LaTexTests.cs b/src/UglyToad.PdfPig.Tests/Integration/LaTexTests.cs index cba50576..c1f028be 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/LaTexTests.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/LaTexTests.cs @@ -24,7 +24,7 @@ var page2 = document.GetPage(2); - Assert.Contains("isθc={θc1,θc2,...,θcn},", page2.Text); + Assert.Contains("is~θc={θc1,θc2,...,θcn},", page2.Text); } } diff --git a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs index 915f8130..7568dc54 100644 --- a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs +++ b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs @@ -137,6 +137,8 @@ if (!foundUnicode || unicode == null) { log.Warn($"We could not find the corresponding character with code {code} in font {font.Name}."); + // Try casting directly to string as in PDFBox 1.8. + unicode = new string((char)code, 1); } var wordSpacing = 0m;