diff --git a/src/UglyToad.PdfPig.Tests/Integration/Documents/MOZILLA-2775-1.pdf b/src/UglyToad.PdfPig.Tests/Integration/Documents/MOZILLA-2775-1.pdf new file mode 100644 index 00000000..69b23864 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Integration/Documents/MOZILLA-2775-1.pdf differ diff --git a/src/UglyToad.PdfPig.Tests/Integration/Documents/MOZILLA-LINK-5251-1.pdf b/src/UglyToad.PdfPig.Tests/Integration/Documents/MOZILLA-LINK-5251-1.pdf new file mode 100644 index 00000000..144e9a76 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Integration/Documents/MOZILLA-LINK-5251-1.pdf differ diff --git a/src/UglyToad.PdfPig.Tests/Integration/Documents/PDFBOX-492-4.jar-8.pdf b/src/UglyToad.PdfPig.Tests/Integration/Documents/PDFBOX-492-4.jar-8.pdf new file mode 100644 index 00000000..69ca9cd8 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Integration/Documents/PDFBOX-492-4.jar-8.pdf differ diff --git a/src/UglyToad.PdfPig.Tests/Integration/Documents/TIKA-469-0.pdf b/src/UglyToad.PdfPig.Tests/Integration/Documents/TIKA-469-0.pdf new file mode 100644 index 00000000..46ae1456 Binary files /dev/null and b/src/UglyToad.PdfPig.Tests/Integration/Documents/TIKA-469-0.pdf differ diff --git a/src/UglyToad.PdfPig.Tests/Integration/ZapfDingbatsTests.cs b/src/UglyToad.PdfPig.Tests/Integration/ZapfDingbatsTests.cs new file mode 100644 index 00000000..86a7c9b1 --- /dev/null +++ b/src/UglyToad.PdfPig.Tests/Integration/ZapfDingbatsTests.cs @@ -0,0 +1,55 @@ +namespace UglyToad.PdfPig.Tests.Integration +{ + using System.Linq; + + public class ZapfDingbatsTests + { + [Fact] + public void Type1Standard14Font1() + { + using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("TIKA-469-0"))) + { + var page = document.GetPage(2); + Assert.Contains("●", page.Letters.Select(l => l.Value)); + } + } + + [Fact] + public void Type1Standard14Font2() + { + // This document does not actually contain circular references + using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("MOZILLA-LINK-5251-1"))) + { + var page = document.GetPage(1); + Assert.Contains("✁", page.Letters.Select(l => l.Value)); + Assert.Contains("✂", page.Letters.Select(l => l.Value)); + Assert.Contains("✄", page.Letters.Select(l => l.Value)); + Assert.Contains("☎", page.Letters.Select(l => l.Value)); + Assert.Contains("✆", page.Letters.Select(l => l.Value)); + Assert.Contains("✇", page.Letters.Select(l => l.Value)); + } + } + + [Fact] + public void Type1FontSimple1() + { + // This document does not actually contain circular references + using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("MOZILLA-2775-1"))) + { + var page = document.GetPage(11); + Assert.Contains("●", page.Letters.Select(l => l.Value)); + } + } + + [Fact] + public void Type1FontSimple2() + { + // This document does not actually contain circular references + using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("PDFBOX-492-4.jar-8"))) + { + var page = document.GetPage(1); + Assert.Contains("\u25a0", page.Letters.Select(l => l.Value)); + } + } + } +} diff --git a/src/UglyToad.PdfPig/PdfFonts/Simple/TrueTypeSimpleFont.cs b/src/UglyToad.PdfPig/PdfFonts/Simple/TrueTypeSimpleFont.cs index 9418d8a9..d23beba6 100644 --- a/src/UglyToad.PdfPig/PdfFonts/Simple/TrueTypeSimpleFont.cs +++ b/src/UglyToad.PdfPig/PdfFonts/Simple/TrueTypeSimpleFont.cs @@ -62,6 +62,9 @@ Details = descriptor?.ToDetails(Name?.Data) ?? FontDetails.GetDefault(Name?.Data); + + // Assumption is ZapfDingbats is not possible here. We need to change the behaviour if not the case + System.Diagnostics.Debug.Assert(!(encoding is ZapfDingbatsEncoding || Details.Name.Contains("ZapfDingbats"))); } public int ReadCharacterCode(IInputBytes bytes, out int codeLength) diff --git a/src/UglyToad.PdfPig/PdfFonts/Simple/TrueTypeStandard14FallbackSimpleFont.cs b/src/UglyToad.PdfPig/PdfFonts/Simple/TrueTypeStandard14FallbackSimpleFont.cs index 0b3ad147..c4aa78a9 100644 --- a/src/UglyToad.PdfPig/PdfFonts/Simple/TrueTypeStandard14FallbackSimpleFont.cs +++ b/src/UglyToad.PdfPig/PdfFonts/Simple/TrueTypeStandard14FallbackSimpleFont.cs @@ -42,6 +42,9 @@ fontMetrics.Weight == "Bold", fontMetrics.Weight == "Bold" ? 700 : FontDetails.DefaultWeight, fontMetrics.ItalicAngle != 0); + + // Assumption is ZapfDingbats is not possible here. We need to change the behaviour if not the case + System.Diagnostics.Debug.Assert(!(encoding is ZapfDingbatsEncoding || Details.Name.Contains("ZapfDingbats"))); } public int ReadCharacterCode(IInputBytes bytes, out int codeLength) diff --git a/src/UglyToad.PdfPig/PdfFonts/Simple/Type1FontSimple.cs b/src/UglyToad.PdfPig/PdfFonts/Simple/Type1FontSimple.cs index 04f4e604..ca007695 100644 --- a/src/UglyToad.PdfPig/PdfFonts/Simple/Type1FontSimple.cs +++ b/src/UglyToad.PdfPig/PdfFonts/Simple/Type1FontSimple.cs @@ -37,6 +37,8 @@ private readonly TransformationMatrix fontMatrix; + private readonly bool isZapfDingbats; + public NameToken Name { get; } public bool IsVertical { get; } = false; @@ -80,6 +82,7 @@ Name = name; Details = fontDescriptor?.ToDetails(name?.Data) ?? FontDetails.GetDefault(name?.Data); + isZapfDingbats = encoding is ZapfDingbatsEncoding || Details.Name.Contains("ZapfDingbats"); } public int ReadCharacterCode(IInputBytes bytes, out int codeLength) @@ -124,6 +127,14 @@ try { + if (isZapfDingbats) + { + value = GlyphList.ZapfDingbats.NameToUnicode(name); + if (value is not null) + { + return true; + } + } value = GlyphList.AdobeGlyphList.NameToUnicode(name); } catch diff --git a/src/UglyToad.PdfPig/PdfFonts/Simple/Type1Standard14Font.cs b/src/UglyToad.PdfPig/PdfFonts/Simple/Type1Standard14Font.cs index 51316f81..f8442613 100644 --- a/src/UglyToad.PdfPig/PdfFonts/Simple/Type1Standard14Font.cs +++ b/src/UglyToad.PdfPig/PdfFonts/Simple/Type1Standard14Font.cs @@ -18,6 +18,7 @@ namespace UglyToad.PdfPig.PdfFonts.Simple { private readonly AdobeFontMetrics standardFontMetrics; private readonly Encoding encoding; + private readonly bool isZapfDingbats; public NameToken Name { get; } @@ -39,6 +40,7 @@ namespace UglyToad.PdfPig.PdfFonts.Simple standardFontMetrics.Weight == "Bold", standardFontMetrics.Weight == "Bold" ? 700 : FontDetails.DefaultWeight, standardFontMetrics.ItalicAngle != 0); + isZapfDingbats = encoding is ZapfDingbatsEncoding || Details.Name.Contains("ZapfDingbats"); } public int ReadCharacterCode(IInputBytes bytes, out int codeLength) @@ -49,39 +51,35 @@ namespace UglyToad.PdfPig.PdfFonts.Simple public bool TryGetUnicode(int characterCode, [NotNullWhen(true)] out string? value) { + value = null; + var name = encoding.GetName(characterCode); + if (string.Equals(name, GlyphList.NotDefined, StringComparison.OrdinalIgnoreCase)) { - value = null; return false; } - if (encoding is ZapfDingbatsEncoding) + try { - var listed = GlyphList.ZapfDingbats.NameToUnicode(name); + if (isZapfDingbats) + { + value = GlyphList.ZapfDingbats.NameToUnicode(name); - value = listed; + if (value is not null) + { + return true; + } + } - return true; + value = GlyphList.AdobeGlyphList.NameToUnicode(name); + } + catch + { + return false; } - if (encoding is StandardEncoding || encoding is SymbolEncoding) - { - var listed = GlyphList.AdobeGlyphList.NameToUnicode(name); - - value = listed; - - return true; - } - else - { - Debug.WriteLine($"Warning: Type1Standard14Font with unexpected encoding: '{encoding.EncodingName}' Expected: 'ZapfDingbatsEncoding','SymbolEncoding' or 'StandardEncoding' . Font: '{standardFontMetrics.FontName}'"); - var listed = GlyphList.AdobeGlyphList.NameToUnicode(name); - - value = listed; - - return true; - } + return value is not null; } public CharacterBoundingBox GetBoundingBox(int characterCode) diff --git a/src/UglyToad.PdfPig/PdfFonts/Simple/Type3Font.cs b/src/UglyToad.PdfPig/PdfFonts/Simple/Type3Font.cs index f3097b54..5fc4f7a0 100644 --- a/src/UglyToad.PdfPig/PdfFonts/Simple/Type3Font.cs +++ b/src/UglyToad.PdfPig/PdfFonts/Simple/Type3Font.cs @@ -42,6 +42,9 @@ this.widths = widths; this.toUnicodeCMap = new ToUnicodeCMap(toUnicodeCMap); Details = FontDetails.GetDefault(name?.Data); + + // Assumption is ZapfDingbats is not possible here. We need to change the behaviour if not the case + System.Diagnostics.Debug.Assert(!(encoding is ZapfDingbatsEncoding || Details.Name.Contains("ZapfDingbats"))); } public int ReadCharacterCode(IInputBytes bytes, out int codeLength)