Properly handle ZapfDingbats font for TrueTypeSimpleFont and add tests

This commit is contained in:
BobLd 2025-01-05 17:27:01 +00:00
parent 4430a01e43
commit 53cf4f2ced
3 changed files with 26 additions and 7 deletions

View File

@ -4,6 +4,17 @@
public class ZapfDingbatsTests
{
[Fact]
public void TrueTypeSimpleFont1()
{
using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("capas")))
{
var page = document.GetPage(18);
// ZapfDingbats characters are spaces
Assert.Contains(" ", page.Letters.Select(l => l.Value));
}
}
[Fact]
public void Type1Standard14Font1()
{
@ -17,7 +28,6 @@
[Fact]
public void Type1Standard14Font2()
{
// This document does not actually contain circular references
using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("MOZILLA-LINK-5251-1")))
{
var page = document.GetPage(1);
@ -33,7 +43,6 @@
[Fact]
public void Type1FontSimple1()
{
// This document does not actually contain circular references
using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("MOZILLA-2775-1")))
{
var page = document.GetPage(11);
@ -44,7 +53,6 @@
[Fact]
public void Type1FontSimple2()
{
// This document does not actually contain circular references
using (var document = PdfDocument.Open(IntegrationHelpers.GetDocumentPath("PDFBOX-492-4.jar-8")))
{
var page = document.GetPage(1);

View File

@ -31,6 +31,8 @@
private readonly double[] widths;
private readonly bool isZapfDingbats;
#nullable disable
public NameToken Name { get; }
#nullable enable
@ -63,8 +65,7 @@
Details = descriptor?.ToDetails(Name?.Data)
?? FontDetails.GetDefault(Name?.Data);
// Assumption is ZapfDingbats is not possible here. We need to change the behaviour if not the case
System.Diagnostics.Debug.Assert(!(encoding is ZapfDingbatsEncoding || Details.Name.Contains("ZapfDingbats")));
isZapfDingbats = encoding is ZapfDingbatsEncoding || Details.Name.Contains("ZapfDingbats");
}
public int ReadCharacterCode(IInputBytes bytes, out int codeLength)
@ -100,12 +101,22 @@
// If the font is a simple font that uses one of the predefined encodings MacRomanEncoding, MacExpertEncoding, or WinAnsiEncoding...
// Map the character code to a character name.
var encodedCharacterName = encoding.GetName(characterCode);
var name = encoding.GetName(characterCode);
// Look up the character name in the Adobe Glyph List or additional Glyph List.
try
{
value = GlyphList.AdobeGlyphList.NameToUnicode(encodedCharacterName);
if (isZapfDingbats)
{
value = GlyphList.ZapfDingbats.NameToUnicode(name);
if (value is not null)
{
return true;
}
}
value = GlyphList.AdobeGlyphList.NameToUnicode(name);
}
catch
{