From b9c8e152c104b1f22e5734af56b9e1b966242080 Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Thu, 22 Nov 2018 19:32:16 +0000 Subject: [PATCH] #16 change letter api to match the actual information --- .../Integration/AssertablePositionData.cs | 14 ++ .../Integration/LaTexTests.cs | 138 ++++++++++++++++++ ...SinglePageNonLatinAcrobatDistillerTests.cs | 8 +- .../SinglePageSimpleGoogleChromeTests.cs | 12 +- src/UglyToad.PdfPig/Content/Letter.cs | 20 ++- .../Fonts/Composite/Type0Font.cs | 3 +- src/UglyToad.PdfPig/Fonts/IFont.cs | 6 +- .../Fonts/Simple/TrueTypeSimpleFont.cs | 8 +- .../Fonts/Simple/Type1FontSimple.cs | 4 +- .../Fonts/Simple/Type1Standard14Font.cs | 4 +- src/UglyToad.PdfPig/Fonts/Simple/Type3Font.cs | 8 +- .../Graphics/ContentStreamProcessor.cs | 14 +- 12 files changed, 199 insertions(+), 40 deletions(-) diff --git a/src/UglyToad.PdfPig.Tests/Integration/AssertablePositionData.cs b/src/UglyToad.PdfPig.Tests/Integration/AssertablePositionData.cs index 162626ff..562fe203 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/AssertablePositionData.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/AssertablePositionData.cs @@ -1,6 +1,8 @@ namespace UglyToad.PdfPig.Tests.Integration { using System; + using Content; + using Xunit; public class AssertablePositionData { @@ -40,5 +42,17 @@ Height = height }; } + + public void AssertWithinTolerance(Letter letter, Page page, bool includeHeight = true) + { + Assert.Equal(Text, letter.Value); + Assert.Equal(FontName, letter.FontName); + Assert.Equal(X, letter.Position.X, 1); + Assert.Equal(Width, letter.Width, 1); + if (includeHeight) + { + Assert.Equal(Height, letter.GlyphRectangle.Height, 1); + } + } } } \ No newline at end of file diff --git a/src/UglyToad.PdfPig.Tests/Integration/LaTexTests.cs b/src/UglyToad.PdfPig.Tests/Integration/LaTexTests.cs index 0d6e7a7f..d1dd9610 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/LaTexTests.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/LaTexTests.cs @@ -1,7 +1,9 @@ namespace UglyToad.PdfPig.Tests.Integration { using System; + using System.Collections.Generic; using System.IO; + using System.Linq; using Xunit; public class LaTexTests @@ -47,5 +49,141 @@ Assert.Equal(8, document.NumberOfPages); } } + + [Fact] + public void LettersHaveCorrectPositionsXfinium() + { + var positions = GetXfiniumPositionData(); + using (var document = PdfDocument.Open(GetFilename())) + { + var page = document.GetPage(1); + + for (var i = 0; i < page.Letters.Count; i++) + { + if (i >= positions.Count) + { + break; + } + var letter = page.Letters[i]; + var expected = positions[i]; + + expected.AssertWithinTolerance(letter, page, false); + } + } + } + + [Fact] + public void LettersHaveCorrectPositionsPdfBox() + { + var positions = GetPdfBoxPositionData(); + using (var document = PdfDocument.Open(GetFilename())) + { + var page = document.GetPage(1); + + for (var i = 0; i < page.Letters.Count; i++) + { + if (i >= positions.Count) + { + break; + } + var letter = page.Letters[i]; + var expected = positions[i]; + + expected.AssertWithinTolerance(letter, page); + } + } + } + + private static IReadOnlyList GetPdfBoxPositionData() + { + const string data = @"75.731 698.917 11.218573 T 14.346 WDKAAR+CMBX12 9.712242 +85.615395 698.917 7.8472624 a 14.346 WDKAAR+CMBX12 6.584814 +93.46266 698.917 7.173 c 14.346 WDKAAR+CMBX12 6.584814 +100.17659 698.917 8.521524 k 14.346 WDKAAR+CMBX12 9.956124 +108.698105 698.917 4.4902983 l 14.346 WDKAAR+CMBX12 9.956124 +113.18841 698.917 4.4902983 i 14.346 WDKAAR+CMBX12 9.97047 +117.67871 698.917 8.966249 n 14.346 WDKAAR+CMBX12 6.4557 +126.64496 698.917 8.076798 g 14.346 WDKAAR+CMBX12 9.425322 +140.08716 698.917 6.2835484 t 14.346 WDKAAR+CMBX12 9.195786 +146.3707 698.917 8.966249 h 14.346 WDKAAR+CMBX12 9.956124 +155.33694 698.917 7.359498 e 14.346 WDKAAR+CMBX12 6.584814 +168.06186 698.917 11.032075 P 14.346 WDKAAR+CMBX12 9.841356 +178.6492 698.917 8.076798 o 14.346 WDKAAR+CMBX12 6.584814 +187.15637 698.917 8.076798 o 14.346 WDKAAR+CMBX12 6.584814 +195.23318 698.917 6.584814 r 14.346 WDKAAR+CMBX12 6.4557 +207.19774 698.917 12.1941 A 14.346 WDKAAR+CMBX12 10.0422 +219.39185 698.917 6.3696246 s 14.346 WDKAAR+CMBX12 6.584814 +225.76147 698.917 6.3696246 s 14.346 WDKAAR+CMBX12 6.584814 +232.1311 698.917 8.966249 u 14.346 WDKAAR+CMBX12 6.541776 +241.09735 698.917 13.456548 m 14.346 WDKAAR+CMBX12 6.4557 +254.5539 698.917 8.966249 p 14.346 WDKAAR+CMBX12 9.238825 +263.52014 698.917 6.2835484 t 14.346 WDKAAR+CMBX12 9.195786 +269.8037 698.917 4.4902983 i 14.346 WDKAAR+CMBX12 9.97047 +274.294 698.917 8.076798 o 14.346 WDKAAR+CMBX12 6.584814 +282.3708 698.917 8.966249 n 14.346 WDKAAR+CMBX12 6.4557 +291.33704 698.917 6.3696246 s 14.346 WDKAAR+CMBX12 6.584814 +303.0434 698.917 8.076798 o 14.346 WDKAAR+CMBX12 6.584814 +311.12018 698.917 4.9350243 f 14.346 WDKAAR+CMBX12 10.0422 +321.43494 698.917 12.62448 N 14.346 WDKAAR+CMBX12 9.841356 +334.05945 698.917 7.8472624 a 14.346 WDKAAR+CMBX12 6.584814 +341.90668 698.917 4.4902983 i 14.346 WDKAAR+CMBX12 9.97047 +346.39697 698.917 8.521524 v 14.346 WDKAAR+CMBX12 6.4413543 +354.44507 698.917 7.359498 e 14.346 WDKAAR+CMBX12 6.584814 +367.18433 698.917 11.4768 B 14.346 WDKAAR+CMBX12 9.841356 +378.66113 698.917 7.8472624 a 14.346 WDKAAR+CMBX12 6.584814 +386.06366 698.917 8.521524 y 14.346 WDKAAR+CMBX12 9.238825 +394.1261 698.917 7.359498 e 14.346 WDKAAR+CMBX12 6.584814 +401.4856 698.917 6.3696246 s 14.346 WDKAAR+CMBX12 6.584814 +413.235 698.917 11.218573 T 14.346 WDKAAR+CMBX12 9.712242 +423.1194 698.917 7.359498 e 14.346 WDKAAR+CMBX12 6.584814 +430.47888 698.917 8.521524 x 14.346 WDKAAR+CMBX12 6.3696246 +439.00043 698.917 6.2835484 t 14.346 WDKAAR+CMBX12 9.195786 +450.6637 698.917 11.663298 C 14.346 WDKAAR+CMBX12 10.18566 +462.32703 698.917 4.4902983 l 14.346 WDKAAR+CMBX12 9.956124 +466.81732 698.917 7.8472624 a 14.346 WDKAAR+CMBX12 6.584814 +474.66455 698.917 6.3696246 s 14.346 WDKAAR+CMBX12 6.584814 +481.03418 698.917 6.3696246 s 14.346 WDKAAR+CMBX12 6.584814 +487.4038 698.917 4.4902983 i 14.346 WDKAAR+CMBX12 9.97047 +491.8941 698.917 8.966249 fi 14.346 WDKAAR+CMBX12 10.0422 +500.86035 698.917 7.359498 e 14.346 WDKAAR+CMBX12 6.584814 +508.21985 698.917 6.584814 r 14.346 WDKAAR+CMBX12 6.4557 +514.8047 698.917 6.3696246 s 14.346 WDKAAR+CMBX12 6.584814 +55.440002 650.772 5.9180226 J 9.963 IYBKTJ+CMBX10 6.9442115 +61.358025 650.772 5.5693173 a 9.963 IYBKTJ+CMBX10 4.573017 +66.92734 650.772 4.5232024 s 9.963 IYBKTJ+CMBX10 4.573017 +71.45055 650.772 5.7287254 o 9.963 IYBKTJ+CMBX10 4.573017 +77.17927 650.772 6.366358 n 9.963 IYBKTJ+CMBX10 4.4833503 +87.36145 650.772 8.787367 D 9.963 IYBKTJ+CMBX10 6.8346186 +96.14882 650.772 3.1781971 . 9.963 IYBKTJ+CMBX10 1.5542281 +103.1528 650.772 10.879597 M 9.963 IYBKTJ+CMBX10 6.8346186 +114.0324 650.772 3.1781971 . 9.963 IYBKTJ+CMBX10 1.5542281 +121.02643 650.772 8.588107 R 9.963 IYBKTJ+CMBX10 6.9442115 +129.61453 650.772 5.250501 e 9.963 IYBKTJ+CMBX10 4.573017 +134.86504 650.772 6.366358 n 9.963 IYBKTJ+CMBX10 4.4833503 +141.23138 650.772 6.366358 n 9.963 IYBKTJ+CMBX10 4.4833503 +147.59775 650.772 3.1781971 i 9.963 IYBKTJ+CMBX10 6.924286 +150.77594 650.772 5.250501 e 9.963 IYBKTJ+CMBX10 4.573017"; + + var result = data.Split(new[] { "\r", "\n", "\r\n" }, StringSplitOptions.RemoveEmptyEntries) + .Select(AssertablePositionData.Parse) + .ToList(); + + return result; + } + + private static IReadOnlyList GetXfiniumPositionData() + { + const string data = @"75.731 83.12866 11.218572 T 14.346 WDKAAR+CMBX12 9.956124 +85.6153934 83.123866 7.847262 a 11.218572 WDKAAR+CMBX12 9.956124 +93.462656 83.123866 7.173 c 11.218572 WDKAAR+CMBX12 9.956124 +100.176584 83.123866 8.521524 k 11.218572 WDKAAR+CMBX12 9.956124 +108.698108 83.123866 4.490298 l 11.218572 WDKAAR+CMBX12 9.956124"; + + var result = data.Split(new[] { "\r", "\n", "\r\n" }, StringSplitOptions.RemoveEmptyEntries) + .Select(AssertablePositionData.Parse) + .ToList(); + + return result; + } } } \ No newline at end of file diff --git a/src/UglyToad.PdfPig.Tests/Integration/SinglePageNonLatinAcrobatDistillerTests.cs b/src/UglyToad.PdfPig.Tests/Integration/SinglePageNonLatinAcrobatDistillerTests.cs index a5b10d70..762bc609 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/SinglePageNonLatinAcrobatDistillerTests.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/SinglePageNonLatinAcrobatDistillerTests.cs @@ -70,7 +70,7 @@ break; } - var myX = pageLetter.CharacterRectangle.Left; + var myX = pageLetter.Position.X; var theirX = pdfBoxData[index].X; var myLetter = pageLetter.Value; @@ -87,7 +87,7 @@ Assert.Equal(theirX, myX, comparer); - Assert.Equal(pdfBoxData[index].Width, pageLetter.CharacterRectangle.Width, comparer); + Assert.Equal(pdfBoxData[index].Width, pageLetter.Width, comparer); index++; } @@ -113,7 +113,7 @@ break; } - var myX = pageLetter.CharacterRectangle.Left; + var myX = pageLetter.Position.X; var theirX = positions[index].X; var myLetter = pageLetter.Value; @@ -127,7 +127,7 @@ Assert.Equal(theirLetter, myLetter); Assert.Equal(theirX, myX, comparer); - Assert.Equal(positions[index].Width, pageLetter.CharacterRectangle.Width, 1); + Assert.Equal(positions[index].Width, pageLetter.Width, 1); index++; } diff --git a/src/UglyToad.PdfPig.Tests/Integration/SinglePageSimpleGoogleChromeTests.cs b/src/UglyToad.PdfPig.Tests/Integration/SinglePageSimpleGoogleChromeTests.cs index bbf32308..4df917bd 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/SinglePageSimpleGoogleChromeTests.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/SinglePageSimpleGoogleChromeTests.cs @@ -134,12 +134,12 @@ namespace UglyToad.PdfPig.Tests.Integration } Assert.Equal(datum.Text, letter.Value); - Assert.Equal(datum.X, letter.CharacterRectangle.BottomLeft.X, 2); + Assert.Equal(datum.X, letter.Position.X, 2); - var transformed = page.Height - letter.CharacterRectangle.BottomLeft.Y; + var transformed = page.Height - letter.Position.Y; Assert.Equal(datum.Y, transformed, 2); - Assert.Equal(datum.Width, letter.CharacterRectangle.Width, 2); + Assert.Equal(datum.Width, letter.Width, 2); Assert.Equal(datum.FontName, letter.FontName); @@ -179,13 +179,13 @@ namespace UglyToad.PdfPig.Tests.Integration } Assert.Equal(datum.Text, letter.Value); - Assert.Equal(datum.X, letter.CharacterRectangle.BottomLeft.X, 2); + Assert.Equal(datum.X, letter.Position.X, 2); - var transformed = page.Height - letter.CharacterRectangle.BottomLeft.Y; + var transformed = page.Height - letter.Position.Y; Assert.Equal(datum.Y, transformed, 2); // Until we get width from glyphs we're a bit out. - Assert.True(Math.Abs(datum.Width - letter.CharacterRectangle.Width) < 0.03m); + Assert.True(Math.Abs(datum.Width - letter.Width) < 0.03m); index++; } diff --git a/src/UglyToad.PdfPig/Content/Letter.cs b/src/UglyToad.PdfPig/Content/Letter.cs index 26364b02..9dd25b28 100644 --- a/src/UglyToad.PdfPig/Content/Letter.cs +++ b/src/UglyToad.PdfPig/Content/Letter.cs @@ -12,16 +12,21 @@ /// public string Value { get; } + /// + /// The placement position of the character in PDF space. + /// + public PdfPoint Position { get; } + + /// + /// The width to advance the renderer once this character is drawn. + /// + public decimal Width { get; } + /// /// Position of the bounding box for the glyph. /// public PdfRectangle GlyphRectangle { get; } - /// - /// The bounding box for the entire character. - /// - public PdfRectangle CharacterRectangle { get; } - /// /// Size as defined in the PDF file. This is not equivalent to font size in points but is relative to other font sizes on the page. /// @@ -40,14 +45,15 @@ /// /// Create a new letter to represent some text drawn by the Tj operator. /// - internal Letter(string value, PdfRectangle glyphRectangle, PdfRectangle characterRectangle, decimal fontSize, string fontName, decimal pointSize) + internal Letter(string value, PdfRectangle glyphRectangle, PdfPoint position, decimal width, decimal fontSize, string fontName, decimal pointSize) { Value = value; GlyphRectangle = glyphRectangle; FontSize = fontSize; FontName = fontName; PointSize = pointSize; - CharacterRectangle = characterRectangle; + Position = position; + Width = width; } /// diff --git a/src/UglyToad.PdfPig/Fonts/Composite/Type0Font.cs b/src/UglyToad.PdfPig/Fonts/Composite/Type0Font.cs index 7d0e06d8..0dc74db4 100644 --- a/src/UglyToad.PdfPig/Fonts/Composite/Type0Font.cs +++ b/src/UglyToad.PdfPig/Fonts/Composite/Type0Font.cs @@ -81,8 +81,7 @@ var width = CidFont.GetWidthFromFont(characterIdentifier); - var advanceWidth = new PdfRectangle(0, 0, width, 0); - advanceWidth = matrix.Transform(advanceWidth); + var advanceWidth = matrix.Transform(new PdfPoint(width, 0)).X; return new CharacterBoundingBox(boundingBox, advanceWidth); } diff --git a/src/UglyToad.PdfPig/Fonts/IFont.cs b/src/UglyToad.PdfPig/Fonts/IFont.cs index 7e36a115..331aa8db 100644 --- a/src/UglyToad.PdfPig/Fonts/IFont.cs +++ b/src/UglyToad.PdfPig/Fonts/IFont.cs @@ -24,12 +24,12 @@ { public PdfRectangle GlyphBounds { get; } - public PdfRectangle CharacterBounds { get; } + public decimal Width { get; } - public CharacterBoundingBox(PdfRectangle glyphBounds, PdfRectangle characterBounds) + public CharacterBoundingBox(PdfRectangle glyphBounds, decimal width) { GlyphBounds = glyphBounds; - CharacterBounds = characterBounds; + Width = width; } } } diff --git a/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs b/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs index 40d46454..4cd88a7c 100644 --- a/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs +++ b/src/UglyToad.PdfPig/Fonts/Simple/TrueTypeSimpleFont.cs @@ -120,18 +120,16 @@ } } - var advancedRectangle = new PdfRectangle(0, 0, width, 0); - if (fromFont) { - advancedRectangle = fontMatrix.Transform(advancedRectangle); + width = fontMatrix.Transform(new PdfVector(width, 0)).X; } else { - advancedRectangle = DefaultTransformation.Transform(advancedRectangle); + width = DefaultTransformation.Transform(new PdfVector(width, 0)).X; } - return new CharacterBoundingBox(boundingBox, advancedRectangle); + return new CharacterBoundingBox(boundingBox, width); } private PdfRectangle GetBoundingBoxInGlyphSpace(int characterCode, out bool fromFont) diff --git a/src/UglyToad.PdfPig/Fonts/Simple/Type1FontSimple.cs b/src/UglyToad.PdfPig/Fonts/Simple/Type1FontSimple.cs index 36018518..5ea404a6 100644 --- a/src/UglyToad.PdfPig/Fonts/Simple/Type1FontSimple.cs +++ b/src/UglyToad.PdfPig/Fonts/Simple/Type1FontSimple.cs @@ -113,7 +113,9 @@ boundingBox = matrix.Transform(boundingBox); - return new CharacterBoundingBox(boundingBox, boundingBox); + var width = matrix.Transform(new PdfVector(widths[characterCode - firstChar], 0)).X; + + return new CharacterBoundingBox(boundingBox, width); } private TransformationMatrix GetFontMatrixInternal() diff --git a/src/UglyToad.PdfPig/Fonts/Simple/Type1Standard14Font.cs b/src/UglyToad.PdfPig/Fonts/Simple/Type1Standard14Font.cs index a1e5be8e..743565d2 100644 --- a/src/UglyToad.PdfPig/Fonts/Simple/Type1Standard14Font.cs +++ b/src/UglyToad.PdfPig/Fonts/Simple/Type1Standard14Font.cs @@ -50,7 +50,7 @@ boundingBox = fontMatrix.Transform(boundingBox); - return new CharacterBoundingBox(boundingBox, boundingBox); + return new CharacterBoundingBox(boundingBox, boundingBox.Width); } private PdfRectangle GetBoundingBoxInGlyphSpace(int characterCode) @@ -62,7 +62,7 @@ return new PdfRectangle(0, 0, 250, 0); } - return new PdfRectangle(0, 0, metrics.WidthX, 0); + return new PdfRectangle(0, 0, metrics.WidthX, metrics.WidthY); } public TransformationMatrix GetFontMatrix() diff --git a/src/UglyToad.PdfPig/Fonts/Simple/Type3Font.cs b/src/UglyToad.PdfPig/Fonts/Simple/Type3Font.cs index 6b162aa2..82787bd0 100644 --- a/src/UglyToad.PdfPig/Fonts/Simple/Type3Font.cs +++ b/src/UglyToad.PdfPig/Fonts/Simple/Type3Font.cs @@ -65,11 +65,13 @@ public CharacterBoundingBox GetBoundingBox(int characterCode) { - var boundingBox = GetBoundingBoxInGlyphSpace(characterCode); + var characterBoundingBox = GetBoundingBoxInGlyphSpace(characterCode); - boundingBox = fontMatrix.Transform(boundingBox); + characterBoundingBox = fontMatrix.Transform(characterBoundingBox); - return new CharacterBoundingBox(boundingBox, boundingBox); + var width = fontMatrix.Transform(new PdfVector(widths[characterCode - firstChar], 0)).X; + + return new CharacterBoundingBox(characterBoundingBox, width); } private PdfRectangle GetBoundingBoxInGlyphSpace(int characterCode) diff --git a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs index c38b418c..bd12a19d 100644 --- a/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs +++ b/src/UglyToad.PdfPig/Graphics/ContentStreamProcessor.cs @@ -144,21 +144,21 @@ .Transform(TextMatrices.TextMatrix .Transform(renderingMatrix .Transform(boundingBox.GlyphBounds))); - var transformedGlyphOrigin = transformationMatrix + var transformedPdfBounds = transformationMatrix .Transform(TextMatrices.TextMatrix - .Transform(renderingMatrix.Transform(boundingBox.CharacterBounds))); + .Transform(renderingMatrix.Transform(new PdfRectangle(0, 0, boundingBox.Width, 0)))); - ShowGlyph(font, transformedGlyphBounds, transformedGlyphOrigin, unicode, fontSize, pointSize); + ShowGlyph(font, transformedGlyphBounds, transformedPdfBounds.BottomLeft, transformedPdfBounds.Width, unicode, fontSize, pointSize); decimal tx, ty; if (font.IsVertical) { tx = 0; - ty = boundingBox.CharacterBounds.Height * fontSize + characterSpacing + wordSpacing; + ty = boundingBox.GlyphBounds.Height * fontSize + characterSpacing + wordSpacing; } else { - tx = (boundingBox.CharacterBounds.Width * fontSize + characterSpacing + wordSpacing) * horizontalScaling; + tx = (boundingBox.Width * fontSize + characterSpacing + wordSpacing) * horizontalScaling; ty = 0; } @@ -254,9 +254,9 @@ TextMatrices.TextMatrix = newMatrix; } - private void ShowGlyph(IFont font, PdfRectangle glyphRectangle, PdfRectangle characterRectangle, string unicode, decimal fontSize, decimal pointSize) + private void ShowGlyph(IFont font, PdfRectangle glyphRectangle, PdfPoint position, decimal width, string unicode, decimal fontSize, decimal pointSize) { - var letter = new Letter(unicode, glyphRectangle, characterRectangle, fontSize, font.Name.Data, pointSize); + var letter = new Letter(unicode, glyphRectangle, position, width, fontSize, font.Name.Data, pointSize); Letters.Add(letter); }