diff --git a/src/UglyToad.PdfPig.Fonts/GlyphList.cs b/src/UglyToad.PdfPig.Fonts/GlyphList.cs index aa264b05..b9ce8c53 100644 --- a/src/UglyToad.PdfPig.Fonts/GlyphList.cs +++ b/src/UglyToad.PdfPig.Fonts/GlyphList.cs @@ -22,20 +22,13 @@ private readonly Dictionary oddNameToUnicodeCache = new Dictionary(); - private static readonly Lazy LazyAdobeGlyphList = new Lazy(() => GlyphListFactory.Get("glyphlist")); + private static readonly Lazy LazyAdobeGlyphList = new Lazy(() => GlyphListFactory.Get("glyphlist", "additional")); /// - /// The Adobe Glyph List. + /// The Adobe Glyph List (includes an extension to the Adobe Glyph List.). /// public static GlyphList AdobeGlyphList => LazyAdobeGlyphList.Value; - private static readonly Lazy LazyAdditionalGlyphList = new Lazy(() => GlyphListFactory.Get("additional")); - - /// - /// An extension to the Adobe Glyph List. - /// - public static GlyphList AdditionalGlyphList => LazyAdditionalGlyphList.Value; - private static readonly Lazy LazyZapfDingbatsGlyphList = new Lazy(() => GlyphListFactory.Get("zapfdingbats")); /// @@ -103,7 +96,7 @@ return result; } - string unicode; + string? unicode; // 1. Drop all the characters from the glyph name starting with the first occurrence of a period (U+002E FULL STOP), if any. if (name.IndexOf('.') > 0) { diff --git a/src/UglyToad.PdfPig.Fonts/GlyphListFactory.cs b/src/UglyToad.PdfPig.Fonts/GlyphListFactory.cs index 39a6cedf..667f5de9 100644 --- a/src/UglyToad.PdfPig.Fonts/GlyphListFactory.cs +++ b/src/UglyToad.PdfPig.Fonts/GlyphListFactory.cs @@ -4,49 +4,56 @@ using System.Collections.Generic; using System.Globalization; using System.IO; + using System.Linq; using Util; - internal class GlyphListFactory + internal static class GlyphListFactory { - public static GlyphList Get(string listName) +#if NET + private const char Semicolon = ';'; +#else + private static readonly char[] Semicolon = [';']; +#endif + + public static GlyphList Get(params string[] listNames) { - using (var resource = - typeof(GlyphListFactory).Assembly.GetManifestResourceStream( - $"UglyToad.PdfPig.Fonts.Resources.GlyphList.{listName}")) + var result = new Dictionary(listNames.Any(n => string.Equals("glyphlist", n, StringComparison.OrdinalIgnoreCase)) ? 4300 : 0); + + foreach (var listName in listNames) { - if (resource == null) + using (var resource = + typeof(GlyphListFactory).Assembly.GetManifestResourceStream( + $"UglyToad.PdfPig.Fonts.Resources.GlyphList.{listName}")) { - throw new ArgumentException($"No embedded glyph list resource was found with the name {listName}."); - } + if (resource == null) + { + throw new ArgumentException($"No embedded glyph list resource was found with the name {listName}."); + } - int? capacity = null; - // Prevent too much wasted memory capacity for Adobe GlyphList - if (string.Equals("glyphlist", listName, StringComparison.OrdinalIgnoreCase)) - { - capacity = 4300; + ReadInternal(resource, result); } - - return ReadInternal(resource, capacity); } + +#if NET + result.TrimExcess(); +#endif + return new GlyphList(result); } public static GlyphList Read(Stream stream) { - return ReadInternal(stream); + var result = new Dictionary(); + ReadInternal(stream, result); + return new GlyphList(result); } - private static readonly char[] Semicolon = [';']; - - private static GlyphList ReadInternal(Stream stream, int? defaultDictionaryCapacity = 0) + private static void ReadInternal(Stream stream, Dictionary result) { if (stream == null) { throw new ArgumentNullException(nameof(stream)); } - var result = defaultDictionaryCapacity.HasValue ? new Dictionary(defaultDictionaryCapacity.Value) : []; - - using (var reader = new StreamReader(stream)) { while (!reader.EndOfStream) @@ -62,7 +69,7 @@ { continue; } - + var parts = line.Split(Semicolon, StringSplitOptions.RemoveEmptyEntries); if (parts.Length != 2) @@ -86,11 +93,10 @@ value += char.ConvertFromUtf32(code); } + System.Diagnostics.Debug.Assert(!result.ContainsKey(key)); result[key] = value; } } - - return new GlyphList(result); } } } diff --git a/src/UglyToad.PdfPig.Tests/Integration/AdditionalGlyphListTests.cs b/src/UglyToad.PdfPig.Tests/Integration/AdditionalGlyphListTests.cs new file mode 100644 index 00000000..002b2167 --- /dev/null +++ b/src/UglyToad.PdfPig.Tests/Integration/AdditionalGlyphListTests.cs @@ -0,0 +1,42 @@ +namespace UglyToad.PdfPig.Tests.Integration +{ + using System.Linq; + + public class AdditionalGlyphListTests + { + [Fact] + public void Type1FontSimple1() + { + using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("2108.11480"))) + { + var page = document.GetPage(2); + Assert.Contains("\u22c3", page.Letters.Select(l => l.Value)); + } + } + + [Fact] + public void Type1FontSimple2() + { + using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("ICML03-081"))) + { + var page = document.GetPage(2); + Assert.Contains("\u2211", page.Letters.Select(l => l.Value)); + Assert.Contains("\u220f", page.Letters.Select(l => l.Value)); + Assert.Contains("[", page.Letters.Select(l => l.Value)); + Assert.Contains("]", page.Letters.Select(l => l.Value)); + } + } + + [Fact] + public void Type1FontSimple3() + { + using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("Math119FakingData"))) + { + var page = document.GetPage(4); + Assert.Contains("(", page.Letters.Select(l => l.Value)); + Assert.Contains(")", page.Letters.Select(l => l.Value)); + Assert.Contains("\u2211", page.Letters.Select(l => l.Value)); + } + } + } +} diff --git a/src/UglyToad.PdfPig/PdfFonts/Simple/TrueTypeSimpleFont.cs b/src/UglyToad.PdfPig/PdfFonts/Simple/TrueTypeSimpleFont.cs index 13e4162d..9418d8a9 100644 --- a/src/UglyToad.PdfPig/PdfFonts/Simple/TrueTypeSimpleFont.cs +++ b/src/UglyToad.PdfPig/PdfFonts/Simple/TrueTypeSimpleFont.cs @@ -102,8 +102,7 @@ // Look up the character name in the Adobe Glyph List or additional Glyph List. try { - value = GlyphList.AdobeGlyphList.NameToUnicode(encodedCharacterName) - ?? GlyphList.AdditionalGlyphList.NameToUnicode(encodedCharacterName); + value = GlyphList.AdobeGlyphList.NameToUnicode(encodedCharacterName); } catch {