Properly handle ZapfDingbats font for Type1FontSimple and Type1Standard14Font and add tests

This commit is contained in:
BobLd 2025-01-05 14:18:00 +00:00
parent 585e940acf
commit 4430a01e43
10 changed files with 95 additions and 22 deletions

View File

@ -0,0 +1,55 @@
namespace UglyToad.PdfPig.Tests.Integration
{
using System.Linq;
public class ZapfDingbatsTests
{
[Fact]
public void Type1Standard14Font1()
{
using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("TIKA-469-0")))
{
var page = document.GetPage(2);
Assert.Contains("●", page.Letters.Select(l => l.Value));
}
}
[Fact]
public void Type1Standard14Font2()
{
// This document does not actually contain circular references
using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("MOZILLA-LINK-5251-1")))
{
var page = document.GetPage(1);
Assert.Contains("✁", page.Letters.Select(l => l.Value));
Assert.Contains("✂", page.Letters.Select(l => l.Value));
Assert.Contains("✄", page.Letters.Select(l => l.Value));
Assert.Contains("☎", page.Letters.Select(l => l.Value));
Assert.Contains("✆", page.Letters.Select(l => l.Value));
Assert.Contains("✇", page.Letters.Select(l => l.Value));
}
}
[Fact]
public void Type1FontSimple1()
{
// This document does not actually contain circular references
using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("MOZILLA-2775-1")))
{
var page = document.GetPage(11);
Assert.Contains("●", page.Letters.Select(l => l.Value));
}
}
[Fact]
public void Type1FontSimple2()
{
// This document does not actually contain circular references
using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("PDFBOX-492-4.jar-8")))
{
var page = document.GetPage(1);
Assert.Contains("\u25a0", page.Letters.Select(l => l.Value));
}
}
}
}

View File

@ -62,6 +62,9 @@
Details = descriptor?.ToDetails(Name?.Data)
?? FontDetails.GetDefault(Name?.Data);
// Assumption is ZapfDingbats is not possible here. We need to change the behaviour if not the case
System.Diagnostics.Debug.Assert(!(encoding is ZapfDingbatsEncoding || Details.Name.Contains("ZapfDingbats")));
}
public int ReadCharacterCode(IInputBytes bytes, out int codeLength)

View File

@ -42,6 +42,9 @@
fontMetrics.Weight == "Bold",
fontMetrics.Weight == "Bold" ? 700 : FontDetails.DefaultWeight,
fontMetrics.ItalicAngle != 0);
// Assumption is ZapfDingbats is not possible here. We need to change the behaviour if not the case
System.Diagnostics.Debug.Assert(!(encoding is ZapfDingbatsEncoding || Details.Name.Contains("ZapfDingbats")));
}
public int ReadCharacterCode(IInputBytes bytes, out int codeLength)

View File

@ -37,6 +37,8 @@
private readonly TransformationMatrix fontMatrix;
private readonly bool isZapfDingbats;
public NameToken Name { get; }
public bool IsVertical { get; } = false;
@ -80,6 +82,7 @@
Name = name;
Details = fontDescriptor?.ToDetails(name?.Data)
?? FontDetails.GetDefault(name?.Data);
isZapfDingbats = encoding is ZapfDingbatsEncoding || Details.Name.Contains("ZapfDingbats");
}
public int ReadCharacterCode(IInputBytes bytes, out int codeLength)
@ -124,6 +127,14 @@
try
{
if (isZapfDingbats)
{
value = GlyphList.ZapfDingbats.NameToUnicode(name);
if (value is not null)
{
return true;
}
}
value = GlyphList.AdobeGlyphList.NameToUnicode(name);
}
catch

View File

@ -18,6 +18,7 @@ namespace UglyToad.PdfPig.PdfFonts.Simple
{
private readonly AdobeFontMetrics standardFontMetrics;
private readonly Encoding encoding;
private readonly bool isZapfDingbats;
public NameToken Name { get; }
@ -39,6 +40,7 @@ namespace UglyToad.PdfPig.PdfFonts.Simple
standardFontMetrics.Weight == "Bold",
standardFontMetrics.Weight == "Bold" ? 700 : FontDetails.DefaultWeight,
standardFontMetrics.ItalicAngle != 0);
isZapfDingbats = encoding is ZapfDingbatsEncoding || Details.Name.Contains("ZapfDingbats");
}
public int ReadCharacterCode(IInputBytes bytes, out int codeLength)
@ -49,39 +51,35 @@ namespace UglyToad.PdfPig.PdfFonts.Simple
public bool TryGetUnicode(int characterCode, [NotNullWhen(true)] out string? value)
{
value = null;
var name = encoding.GetName(characterCode);
if (string.Equals(name, GlyphList.NotDefined, StringComparison.OrdinalIgnoreCase))
{
value = null;
return false;
}
if (encoding is ZapfDingbatsEncoding)
try
{
var listed = GlyphList.ZapfDingbats.NameToUnicode(name);
if (isZapfDingbats)
{
value = GlyphList.ZapfDingbats.NameToUnicode(name);
value = listed;
if (value is not null)
{
return true;
}
}
return true;
value = GlyphList.AdobeGlyphList.NameToUnicode(name);
}
catch
{
return false;
}
if (encoding is StandardEncoding || encoding is SymbolEncoding)
{
var listed = GlyphList.AdobeGlyphList.NameToUnicode(name);
value = listed;
return true;
}
else
{
Debug.WriteLine($"Warning: Type1Standard14Font with unexpected encoding: '{encoding.EncodingName}' Expected: 'ZapfDingbatsEncoding','SymbolEncoding' or 'StandardEncoding' . Font: '{standardFontMetrics.FontName}'");
var listed = GlyphList.AdobeGlyphList.NameToUnicode(name);
value = listed;
return true;
}
return value is not null;
}
public CharacterBoundingBox GetBoundingBox(int characterCode)

View File

@ -42,6 +42,9 @@
this.widths = widths;
this.toUnicodeCMap = new ToUnicodeCMap(toUnicodeCMap);
Details = FontDetails.GetDefault(name?.Data);
// Assumption is ZapfDingbats is not possible here. We need to change the behaviour if not the case
System.Diagnostics.Debug.Assert(!(encoding is ZapfDingbatsEncoding || Details.Name.Contains("ZapfDingbats")));
}
public int ReadCharacterCode(IInputBytes bytes, out int codeLength)