From ba8d2f5b1d737ccb3d401bc885c1cf3553cc1563 Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Wed, 10 Jan 2018 22:15:29 +0000 Subject: [PATCH] fix a bug with tokenization without spaces before string --- .../Scanner/CoreTokenScannerTests.cs | 23 +++++++++++++++++++ .../Fonts/Parser/Handlers/Type1FontHandler.cs | 3 ++- ...ReflectionGraphicsStateOperationFactory.cs | 12 +++++++++- src/UglyToad.PdfPig/PdfDocument.cs | 2 +- .../Tokenization/PlainTokenizer.cs | 3 ++- tools/build.cmd | 1 + 6 files changed, 40 insertions(+), 4 deletions(-) create mode 100644 tools/build.cmd diff --git a/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/CoreTokenScannerTests.cs b/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/CoreTokenScannerTests.cs index 338f1b16..ccd58e04 100644 --- a/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/CoreTokenScannerTests.cs +++ b/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/CoreTokenScannerTests.cs @@ -147,6 +147,29 @@ endobj"; AssertCorrectToken(array.Data[array.Data.Count - 1], ")"); AssertCorrectToken(array.Data[array.Data.Count - 2], 1.9m); } + + [Fact] + public void ScansStringWithoutWhitespacePreceding() + { + const string s = @"T*() Tj +-91"; + + var tokens = new List(); + + var scanner = scannerFactory(StringBytesTestConverter.Convert(s, false).Bytes); + + while (scanner.MoveNext()) + { + tokens.Add(scanner.CurrentToken); + } + + Assert.Equal(4, tokens.Count); + + AssertCorrectToken(tokens[0], "T*"); + AssertCorrectToken(tokens[1], ""); + AssertCorrectToken(tokens[2], "Tj"); + AssertCorrectToken(tokens[3], -91); + } private static void AssertCorrectToken(IToken token, TData expected) where T : IDataToken { diff --git a/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type1FontHandler.cs b/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type1FontHandler.cs index 77591713..fd622567 100644 --- a/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type1FontHandler.cs +++ b/src/UglyToad.PdfPig/Fonts/Parser/Handlers/Type1FontHandler.cs @@ -31,10 +31,11 @@ public IFont Generate(PdfDictionary dictionary, IRandomAccessRead reader, bool isLenientParsing) { - var usingStandard14Only = !dictionary.ContainsKey(CosName.FIRST_CHAR); + var usingStandard14Only = !dictionary.ContainsKey(CosName.FIRST_CHAR) || !dictionary.ContainsKey(CosName.WIDTHS); if (usingStandard14Only) { + // TODO: some fonts combine standard 14 font with other metrics. if (!dictionary.TryGetName(CosName.BASE_FONT, out var standard14Name)) { throw new InvalidFontFormatException($"The Type 1 font did not contain a first character entry but also did not reference a standard 14 font: {dictionary}"); diff --git a/src/UglyToad.PdfPig/Graphics/ReflectionGraphicsStateOperationFactory.cs b/src/UglyToad.PdfPig/Graphics/ReflectionGraphicsStateOperationFactory.cs index 2c8b6346..e2843cc3 100644 --- a/src/UglyToad.PdfPig/Graphics/ReflectionGraphicsStateOperationFactory.cs +++ b/src/UglyToad.PdfPig/Graphics/ReflectionGraphicsStateOperationFactory.cs @@ -5,8 +5,10 @@ namespace UglyToad.PdfPig.Graphics using System.Linq; using System.Reflection; using Cos; + using Exceptions; using Operations; using Operations.TextShowing; + using Operations.TextState; using Tokenization.Tokens; internal class ReflectionGraphicsStateOperationFactory : IGraphicsStateOperationFactory @@ -75,6 +77,14 @@ namespace UglyToad.PdfPig.Graphics var array = operands.ToArray(); return new ShowTextsWithPositioning(array); + case SetFontAndSize.Symbol: + if (operands.Count == 2 && operands[0] is NameToken name && operands[1] is NumericToken size) + { + return new SetFontAndSize(name.Data, size.Data); + } + + var information = string.Join(", ", operands.Select(x => x.ToString())); + throw new PdfDocumentFormatException($"Attempted to set font with wrong number of parameters: [{information}]"); } if (!operations.TryGetValue(op.Data, out Type operationType)) @@ -109,7 +119,7 @@ namespace UglyToad.PdfPig.Graphics { if (operands[offset] is NumericToken numeric) { - arguments.Add(numeric.Data); + arguments.Add(numeric.Data); } else { diff --git a/src/UglyToad.PdfPig/PdfDocument.cs b/src/UglyToad.PdfPig/PdfDocument.cs index 619bf26e..841b7b10 100644 --- a/src/UglyToad.PdfPig/PdfDocument.cs +++ b/src/UglyToad.PdfPig/PdfDocument.cs @@ -10,7 +10,7 @@ /// /// - /// Provides access to document level information for this PDF document as well as access to the s contained in the document. + /// Provides access to document level information for this PDF document as well as access to the s contained in the document. /// public class PdfDocument : IDisposable { diff --git a/src/UglyToad.PdfPig/Tokenization/PlainTokenizer.cs b/src/UglyToad.PdfPig/Tokenization/PlainTokenizer.cs index cb925576..a5e75b01 100644 --- a/src/UglyToad.PdfPig/Tokenization/PlainTokenizer.cs +++ b/src/UglyToad.PdfPig/Tokenization/PlainTokenizer.cs @@ -29,7 +29,8 @@ if (inputBytes.CurrentByte == '<' || inputBytes.CurrentByte == '[' || inputBytes.CurrentByte == '/' || inputBytes.CurrentByte == ']' - || inputBytes.CurrentByte == '>') + || inputBytes.CurrentByte == '>' || inputBytes.CurrentByte == '(' + || inputBytes.CurrentByte == ')') { break; } diff --git a/tools/build.cmd b/tools/build.cmd new file mode 100644 index 00000000..f0535ae9 --- /dev/null +++ b/tools/build.cmd @@ -0,0 +1 @@ +msbuild /t:pack "../src/UglyToad.PdfPig/UglyToad.PdfPig.csproj" /p:Configuration=Release /p:PackageOutputPath="../../releases" \ No newline at end of file