#9 fix bug with truetype fonts and start adding support for cid fonts using compact font format

This commit is contained in:
Eliot Jones 2018-12-28 22:34:47 +00:00
parent d9052e1388
commit 47e49c4044
17 changed files with 1273 additions and 1105 deletions

View File

@ -41,7 +41,7 @@ New in v0.0.5 - To create documents use the class ```PdfDocumentBuilder```. Thou
byte[] documentBytes = builder.Build();
File.WriteAllBytes(@"C:\\git\newPdf.pdf");
File.WriteAllBytes(@"C:\git\newPdf.pdf");
Each font must be registered with the PdfDocumentBuilder prior to use enable pages to share the font resources. Currently only Standard 14 fonts and TrueType fonts (.ttf) are supported.

Binary file not shown.

File diff suppressed because it is too large Load Diff

View File

@ -10,7 +10,10 @@
[Fact]
public void Tests()
{
//using (var document = PdfDocument.Open(File.ReadAllBytes(@"C:\Users\eliot\Downloads\Motor Insurance claim form.pdf"), new ParsingOptions{UseLenientParsing = false}))
//{
// var page1 = document.GetPage(1);
//}
}
}
}

View File

@ -33,6 +33,7 @@
<ItemGroup>
<EmbeddedResource Remove="Fonts\TrueType\Andada-Regular.ttf" />
<EmbeddedResource Remove="Fonts\TrueType\google-simple-doc.ttf" />
<EmbeddedResource Remove="Fonts\TrueType\PMingLiU.ttf" />
<EmbeddedResource Remove="Fonts\TrueType\Roboto-Regular.ttf" />
<EmbeddedResource Remove="Fonts\Type1\AdobeUtopia.pfa" />
<EmbeddedResource Remove="Fonts\Type1\CMBX10.pfa" />
@ -43,6 +44,7 @@
<ItemGroup>
<None Remove="Fonts\CompactFontFormat\MinionPro.bin" />
<None Remove="Fonts\TrueType\Roboto-Regular.GlyphData.txt" />
</ItemGroup>
<ItemGroup>
@ -55,6 +57,12 @@
<Content Include="Fonts\TrueType\google-simple-doc.ttf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
<Content Include="Fonts\TrueType\PMingLiU.ttf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
<Content Include="Fonts\TrueType\Roboto-Regular.GlyphData.txt">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>
<Content Include="Fonts\TrueType\Roboto-Regular.ttf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</Content>

View File

@ -11,6 +11,7 @@
/// </summary>
internal class Type0CidFont : ICidFont
{
private readonly ICidFontProgram fontProgram;
public NameToken Type { get; }
public NameToken SubType { get; }
public NameToken BaseFont { get; }
@ -19,9 +20,18 @@
public CidFontType CidFontType => CidFontType.Type0;
public FontDescriptor Descriptor { get; }
public Type0CidFont()
public Type0CidFont(ICidFontProgram fontProgram, NameToken type, NameToken subType, NameToken baseFont,
CharacterIdentifierSystemInfo systemInfo,
FontDescriptor descriptor)
{
throw new System.NotImplementedException();
this.fontProgram = fontProgram;
Type = type;
SubType = subType;
BaseFont = baseFont;
SystemInfo = systemInfo;
var scale = 1 / (decimal)(fontProgram?.GetFontMatrixMultiplier() ?? 1000);
FontMatrix = TransformationMatrix.FromValues(scale, 0, 0, scale, 0, 0);
Descriptor = descriptor;
}
public decimal GetWidthFromFont(int characterCode)

View File

@ -3,6 +3,7 @@
using System;
using System.Collections.Generic;
using System.Linq;
using CidFonts;
using Core;
using Geometry;
using Util.JetBrains.Annotations;
@ -11,7 +12,7 @@
/// A Compact Font Format (CFF) font program as described in The Compact Font Format specification (Adobe Technical Note #5176).
/// A CFF font may contain multiple fonts and achieves compression by sharing details between fonts in the set.
/// </summary>
internal class CompactFontFormatFontProgram
internal class CompactFontFormatFontProgram : ICidFontProgram
{
/// <summary>
/// The decoded header table for this font.
@ -58,5 +59,30 @@
#endif
return Fonts.First().Value;
}
public bool TryGetBoundingBox(int characterIdentifier, out PdfRectangle boundingBox)
{
throw new NotImplementedException();
}
public bool TryGetBoundingBox(int characterIdentifier, Func<int, int> characterIdentifierToGlyphIndex, out PdfRectangle boundingBox)
{
throw new NotImplementedException();
}
public bool TryGetBoundingAdvancedWidth(int characterIdentifier, Func<int, int> characterIdentifierToGlyphIndex, out decimal width)
{
throw new NotImplementedException();
}
public bool TryGetBoundingAdvancedWidth(int characterIdentifier, out decimal width)
{
throw new NotImplementedException();
}
public int GetFontMatrixMultiplier()
{
return 1000;
}
}
}

View File

@ -59,6 +59,8 @@
public bool IsCidFont { get; set; }
public CidFontOperators CidFontOperators { get; set; } = new CidFontOperators();
public struct SizeAndOffset
{
public int Size { get; }
@ -78,6 +80,36 @@
}
}
internal class CidFontOperators
{
public RegistryOrderingSupplement Ros { get; set; }
public int Version { get; set; } = 0;
public int Revision { get; set; } = 0;
public int Type { get; set; } = 0;
public int Count { get; set; } = 8720;
public decimal UidBase { get; set; }
public decimal FontDictionaryArray { get; set; }
public decimal FontDictionarySelect { get; set; }
public string FontName { get; set; }
}
internal class RegistryOrderingSupplement
{
public string Registry { get; set; }
public string Ordering { get; set; }
public decimal Supplement { get; set; }
}
/// <summary>
/// Defines the format of the CharString data contained within a Compact Font Format font.
/// </summary>

View File

@ -96,22 +96,50 @@
break;
// TODO: CID Font Stuff
case 30:
var registry = GetString(operands, stringIndex);
operands.RemoveAt(0);
var ordering = GetString(operands, stringIndex);
operands.RemoveAt(0);
var supplement = GetIntOrDefault(operands);
dictionary.CidFontOperators.Ros = new RegistryOrderingSupplement
{
Registry = registry,
Ordering = ordering,
Supplement = supplement
};
dictionary.IsCidFont = true;
break;
case 31:
dictionary.CidFontOperators.Version = GetIntOrDefault(operands);
dictionary.IsCidFont = true;
break;
case 32:
dictionary.CidFontOperators.Revision = GetIntOrDefault(operands);
dictionary.IsCidFont = true;
break;
case 33:
dictionary.CidFontOperators.Type = GetIntOrDefault(operands);
dictionary.IsCidFont = true;
break;
case 34:
dictionary.CidFontOperators.Count = GetIntOrDefault(operands);
dictionary.IsCidFont = true;
break;
case 35:
dictionary.CidFontOperators.UidBase = GetIntOrDefault(operands);
dictionary.IsCidFont = true;
break;
case 36:
dictionary.CidFontOperators.FontDictionaryArray = GetIntOrDefault(operands);
dictionary.IsCidFont = true;
break;
case 37:
dictionary.CidFontOperators.FontDictionarySelect = operands[0].Decimal;
dictionary.IsCidFont = true;
break;
case 38:
dictionary.CidFontOperators.FontName = GetString(operands, stringIndex);
dictionary.IsCidFont = true;
break;
}
}

View File

@ -2,11 +2,16 @@
{
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using CidFonts;
using CompactFontFormat;
using Core;
using Exceptions;
using Filters;
using Geometry;
using IO;
using PdfPig.Exceptions;
using PdfPig.Parser.Parts;
using Tokenization.Scanner;
using Tokens;
@ -18,16 +23,19 @@
{
private readonly FontDescriptorFactory descriptorFactory;
private readonly TrueTypeFontParser trueTypeFontParser;
private readonly CompactFontFormatParser compactFontFormatParser;
private readonly IFilterProvider filterProvider;
private readonly IPdfTokenScanner pdfScanner;
public CidFontFactory(IPdfTokenScanner pdfScanner, FontDescriptorFactory descriptorFactory, TrueTypeFontParser trueTypeFontParser,
CompactFontFormatParser compactFontFormatParser,
IFilterProvider filterProvider)
{
this.pdfScanner = pdfScanner;
this.descriptorFactory = descriptorFactory;
this.trueTypeFontParser = trueTypeFontParser;
this.compactFontFormatParser = compactFontFormatParser;
this.filterProvider = filterProvider;
this.pdfScanner = pdfScanner;
}
public ICidFont Generate(DictionaryToken dictionary, bool isLenientParsing)
@ -56,7 +64,7 @@
var subType = dictionary.GetNameOrDefault(NameToken.Subtype);
if (NameToken.CidFontType0.Equals(subType))
{
//return new PDCIDFontType0(dictionary, parent);
return new Type0CidFont(fontProgram, type, subType, baseFont, systemInfo, descriptor);
}
if (NameToken.CidFontType2.Equals(subType))
@ -106,6 +114,40 @@
case DescriptorFontFile.FontFileType.TrueType:
var input = new TrueTypeDataBytes(new ByteArrayInputBytes(fontFile));
return trueTypeFontParser.Parse(input);
case DescriptorFontFile.FontFileType.FromSubtype:
{
if (!DirectObjectFinder.TryGet(descriptor.FontFile.ObjectKey, pdfScanner, out StreamToken str))
{
throw new NotSupportedException("Cannot read CID font from subtype.");
}
if (!str.StreamDictionary.TryGet(NameToken.Subtype, out NameToken subtypeName))
{
throw new PdfDocumentFormatException($"The font file stream did not contain a subtype entry: {str.StreamDictionary}.");
}
if (subtypeName == NameToken.CidFontType0C)
{
var bytes = str.Decode(filterProvider);
var font = compactFontFormatParser.Parse(new CompactFontFormatData(bytes));
return font;
}
if (subtypeName == NameToken.Type1C)
{
}
else if (subtypeName == NameToken.OpenType)
{
}
else
{
throw new PdfDocumentFormatException($"Unexpected subtype for CID font: {subtypeName}.");
}
throw new NotSupportedException("Cannot read CID font from subtype.");
}
default:
throw new NotSupportedException("Currently only TrueType fonts are supported.");
}

View File

@ -1,11 +1,63 @@
namespace UglyToad.PdfPig.Fonts.TrueType.Parser
{
using System;
using System.Text;
using Names;
using Tables;
using Util;
using Util.JetBrains.Annotations;
internal class HorizontalHeaderTableParser : ITrueTypeTableParser<HorizontalHeaderTable>
{
public HorizontalHeaderTable Parse(TrueTypeHeaderTable header, TrueTypeDataBytes data, TableRegister.Builder register)
{
data.Seek(header.Offset);
var majorVersion = data.ReadUnsignedShort();
var minorVersion = data.ReadUnsignedShort();
var ascender = data.ReadSignedShort();
var descender = data.ReadSignedShort();
var lineGap = data.ReadSignedShort();
var advancedWidthMax = data.ReadUnsignedShort();
var minLeftSideBearing = data.ReadSignedShort();
var minRightSideBearing = data.ReadSignedShort();
var xMaxExtent = data.ReadSignedShort();
var caretSlopeRise = data.ReadSignedShort();
var caretSlopeRun = data.ReadSignedShort();
var caretOffset = data.ReadSignedShort();
// Reserved section
data.ReadSignedShort();
data.ReadSignedShort();
data.ReadSignedShort();
data.ReadSignedShort();
var metricDataFormat = data.ReadSignedShort();
if (metricDataFormat != 0)
{
throw new NotSupportedException("The metric data format for a horizontal header table should be 0.");
}
var numberOfHeaderMetrics = data.ReadUnsignedShort();
return new HorizontalHeaderTable(header, majorVersion, minorVersion, ascender,
descender, lineGap, advancedWidthMax,
minLeftSideBearing,
minRightSideBearing,
xMaxExtent,
caretSlopeRise,
caretSlopeRun,
caretOffset,
metricDataFormat,
numberOfHeaderMetrics);
}
}
internal class NameTableParser : ITrueTypeTableParser<NameTable>
{
public NameTable Parse(TrueTypeHeaderTable header, TrueTypeDataBytes data, TableRegister.Builder register)

View File

@ -7,6 +7,7 @@
{
private static readonly CMapTableParser CMapTableParser = new CMapTableParser();
private static readonly HorizontalMetricsTableParser HorizontalMetricsTableParser = new HorizontalMetricsTableParser();
private static readonly HorizontalHeaderTableParser HorizontalHeaderTableParser = new HorizontalHeaderTableParser();
private static readonly NameTableParser NameTableParser = new NameTableParser();
private static readonly Os2TableParser Os2TableParser = new Os2TableParser();
@ -32,6 +33,11 @@
return (T)(object)Os2TableParser.Parse(table, data, register);
}
if (typeof(T) == typeof(HorizontalHeaderTable))
{
return (T) (object) HorizontalHeaderTableParser.Parse(table, data, register);
}
throw new NotImplementedException();
}
}

View File

@ -74,7 +74,7 @@
}
// hhea
builder.HorizontalHeaderTable = HorizontalHeaderTable.Load(data, hHead);
builder.HorizontalHeaderTable = TableParser.Parse<HorizontalHeaderTable>(hHead, data, builder);
if (!tables.TryGetValue(TrueTypeHeaderTable.Maxp, out var maxHeaderTable))
{

View File

@ -100,52 +100,5 @@
MetricDataFormat = metricDataFormat;
NumberOfHeaderMetrics = numberOfHeaderMetrics;
}
public static HorizontalHeaderTable Load(TrueTypeDataBytes data, TrueTypeHeaderTable table)
{
data.Seek(table.Offset);
var majorVersion = data.ReadUnsignedShort();
var minorVersion = data.ReadUnsignedShort();
var ascender = data.ReadSignedShort();
var descender = data.ReadSignedShort();
var lineGap = data.ReadSignedShort();
var advancedWidthMax = data.ReadUnsignedShort();
var minLeftSideBearing = data.ReadSignedShort();
var minRightSideBearing = data.ReadSignedShort();
var xMaxExtent = data.ReadSignedShort();
var caretSlopeRise = data.ReadSignedShort();
var caretSlopeRun = data.ReadSignedShort();
var caretOffset = data.ReadSignedShort();
// Reserved section
data.ReadSignedShort();
data.ReadSignedShort();
data.ReadSignedShort();
data.ReadSignedShort();
var metricDataFormat = data.ReadSignedShort();
if (metricDataFormat != 0)
{
throw new NotSupportedException("The metric data format for a horizontal header table should be 0.");
}
var numberOfHeaderMetrics = data.ReadSignedShort();
return new HorizontalHeaderTable(table, majorVersion, minorVersion, ascender,
descender, lineGap, advancedWidthMax,
minLeftSideBearing,
minRightSideBearing,
xMaxExtent,
caretSlopeRise,
caretSlopeRun,
caretOffset,
metricDataFormat,
numberOfHeaderMetrics);
}
}
}

View File

@ -99,20 +99,19 @@
var trueTypeFontParser = new TrueTypeFontParser();
var fontDescriptorFactory = new FontDescriptorFactory();
var cidFontFactory = new CidFontFactory(pdfScanner, fontDescriptorFactory, trueTypeFontParser, filterProvider);
var encodingReader = new EncodingReader(pdfScanner);
var compactFontFormatIndexReader = new CompactFontFormatIndexReader();
var compactFontFormatParser = new CompactFontFormatParser(new CompactFontFormatIndividualFontParser(compactFontFormatIndexReader, new CompactFontFormatTopLevelDictionaryReader(),
new CompactFontFormatPrivateDictionaryReader()), compactFontFormatIndexReader);
var cidFontFactory = new CidFontFactory(pdfScanner, fontDescriptorFactory, trueTypeFontParser, compactFontFormatParser, filterProvider);
var encodingReader = new EncodingReader(pdfScanner);
var fontFactory = new FontFactory(log, new Type0FontHandler(cidFontFactory,
cMapCache,
filterProvider, pdfScanner),
new TrueTypeFontHandler(log, pdfScanner, filterProvider, cMapCache, fontDescriptorFactory, trueTypeFontParser, encodingReader, new SystemFontFinder(new TrueTypeFontParser())),
new Type1FontHandler(pdfScanner, cMapCache, filterProvider, fontDescriptorFactory, encodingReader,
new Type1FontParser(new Type1EncryptedPortionParser()),
new CompactFontFormatParser(new CompactFontFormatIndividualFontParser(compactFontFormatIndexReader, new CompactFontFormatTopLevelDictionaryReader(),
new CompactFontFormatPrivateDictionaryReader()), compactFontFormatIndexReader)),
new Type1FontParser(new Type1EncryptedPortionParser()), compactFontFormatParser),
new Type3FontHandler(pdfScanner, cMapCache, filterProvider, encodingReader));
var resourceContainer = new ResourceContainer(pdfScanner, fontFactory);

View File

@ -96,6 +96,7 @@
public static readonly NameToken CharSet = new NameToken("CharSet");
public static readonly NameToken CiciSignit = new NameToken("CICI.SignIt");
public static readonly NameToken CidFontType0 = new NameToken("CIDFontType0");
public static readonly NameToken CidFontType0C = new NameToken("CIDFontType0C");
public static readonly NameToken CidFontType2 = new NameToken("CIDFontType2");
public static readonly NameToken CidToGidMap = new NameToken("CIDToGIDMap");
public static readonly NameToken CidSet = new NameToken("CIDSet");