diff --git a/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/PdfTokenScannerTests.cs b/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/PdfTokenScannerTests.cs index a067b423..4b72a319 100644 --- a/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/PdfTokenScannerTests.cs +++ b/src/UglyToad.PdfPig.Tests/Tokenization/Scanner/PdfTokenScannerTests.cs @@ -50,6 +50,36 @@ endobj"; Assert.Equal(new IndirectReference(12, 7), reference.Data); } + [Fact] + public void ReadsObjectWithUndefinedIndirectReference() + { + const string s = @" +5 0 obj +<< +/XObject << +/Pic1 7 0 R +>> +/ProcSet [/PDF /Text /ImageC ] +/Font << +/F0 8 0 R +/F1 9 0 R +/F2 10 0 R +/F3 0 0 R +>> +>> +endobj"; + + var scanner = GetScanner(s); + + ReadToEnd(scanner); + + var token = scanner.Get(new IndirectReference(5, 0)); + Assert.NotNull(token); + + token = scanner.Get(new IndirectReference(0, 0)); + Assert.Null(token); + } + [Fact] public void ReadsNumericObjectWithComment() { diff --git a/src/UglyToad.PdfPig/AcroForms/AcroFormFactory.cs b/src/UglyToad.PdfPig/AcroForms/AcroFormFactory.cs index 3bd3baf5..f8e68068 100644 --- a/src/UglyToad.PdfPig/AcroForms/AcroFormFactory.cs +++ b/src/UglyToad.PdfPig/AcroForms/AcroFormFactory.cs @@ -162,6 +162,10 @@ } var kidObject = tokenScanner.Get(kidReferenceToken.Data); + if (kidObject is null) + { + throw new InvalidOperationException($"Could not find the object with reference: {kidReferenceToken.Data}."); + } if (kidObject.Data is DictionaryToken kidDictionaryToken) { diff --git a/src/UglyToad.PdfPig/Content/ResourceStore.cs b/src/UglyToad.PdfPig/Content/ResourceStore.cs index ec605730..3b27c34d 100644 --- a/src/UglyToad.PdfPig/Content/ResourceStore.cs +++ b/src/UglyToad.PdfPig/Content/ResourceStore.cs @@ -148,7 +148,8 @@ if (fontObject == null) { - throw new InvalidOperationException($"Could not retrieve the font with name: {pair.Key} which should have been object {objectKey}"); + //This is a valid use case + continue; } loadedFonts[reference] = fontFactory.Get(fontObject); diff --git a/src/UglyToad.PdfPig/Parser/Parts/DirectObjectFinder.cs b/src/UglyToad.PdfPig/Parser/Parts/DirectObjectFinder.cs index 39180a4e..f3fc637a 100644 --- a/src/UglyToad.PdfPig/Parser/Parts/DirectObjectFinder.cs +++ b/src/UglyToad.PdfPig/Parser/Parts/DirectObjectFinder.cs @@ -43,9 +43,13 @@ return false; } - public static T Get(IndirectReference reference, IPdfTokenScanner scanner) where T : IToken + public static T Get(IndirectReference reference, IPdfTokenScanner scanner) where T : class, IToken { var temp = scanner.Get(reference); + if (temp is null) + { + return null; + } if (temp.Data is T locatedResult) { @@ -75,7 +79,7 @@ throw new PdfDocumentFormatException($"Could not find the object number {reference} with type {typeof(T).Name}."); } - public static T Get(IToken token, IPdfTokenScanner scanner) where T : IToken + public static T Get(IToken token, IPdfTokenScanner scanner) where T : class, IToken { if (token is T result) { @@ -84,32 +88,7 @@ if (token is IndirectReferenceToken reference) { - var temp = scanner.Get(reference.Data); - - if (temp.Data is T locatedResult) - { - return locatedResult; - } - - if (temp.Data is IndirectReferenceToken nestedReference) - { - return Get(nestedReference, scanner); - } - - if (temp.Data is ArrayToken array && array.Data.Count == 1) - { - var arrayElement = array.Data[0]; - - if (arrayElement is IndirectReferenceToken arrayReference) - { - return Get(arrayReference, scanner); - } - - if (arrayElement is T arrayToken) - { - return arrayToken; - } - } + return Get(reference.Data, scanner); } throw new PdfDocumentFormatException($"Could not find the object {token} with type {typeof(T).Name}."); diff --git a/src/UglyToad.PdfPig/PdfExtensions.cs b/src/UglyToad.PdfPig/PdfExtensions.cs index f9ab7682..93bffa7e 100644 --- a/src/UglyToad.PdfPig/PdfExtensions.cs +++ b/src/UglyToad.PdfPig/PdfExtensions.cs @@ -32,7 +32,7 @@ return true; } - internal static T Get(this DictionaryToken dictionary, NameToken name, IPdfTokenScanner scanner) where T : IToken + internal static T Get(this DictionaryToken dictionary, NameToken name, IPdfTokenScanner scanner) where T : class, IToken { if (!dictionary.TryGet(name, out var token) || !(token is T typedToken)) { diff --git a/src/UglyToad.PdfPig/PdfFonts/Parser/Handlers/Type1FontHandler.cs b/src/UglyToad.PdfPig/PdfFonts/Parser/Handlers/Type1FontHandler.cs index 048d09af..4b187770 100644 --- a/src/UglyToad.PdfPig/PdfFonts/Parser/Handlers/Type1FontHandler.cs +++ b/src/UglyToad.PdfPig/PdfFonts/Parser/Handlers/Type1FontHandler.cs @@ -138,7 +138,7 @@ try { - if (!(pdfScanner.Get(descriptor.FontFile.ObjectKey.Data).Data is StreamToken stream)) + if (!(pdfScanner.Get(descriptor.FontFile.ObjectKey.Data)?.Data is StreamToken stream)) { return null; } diff --git a/src/UglyToad.PdfPig/Structure.cs b/src/UglyToad.PdfPig/Structure.cs index 71c83654..3d280008 100644 --- a/src/UglyToad.PdfPig/Structure.cs +++ b/src/UglyToad.PdfPig/Structure.cs @@ -45,7 +45,7 @@ /// The tokenized PDF object from the file. public ObjectToken GetObject(IndirectReference reference) { - return TokenScanner.Get(reference); + return TokenScanner.Get(reference) ?? throw new InvalidOperationException($"Could not find the object with reference: {reference}."); } } } diff --git a/src/UglyToad.PdfPig/Tokenization/Scanner/IPdfTokenScanner.cs b/src/UglyToad.PdfPig/Tokenization/Scanner/IPdfTokenScanner.cs index 93282f93..907de186 100644 --- a/src/UglyToad.PdfPig/Tokenization/Scanner/IPdfTokenScanner.cs +++ b/src/UglyToad.PdfPig/Tokenization/Scanner/IPdfTokenScanner.cs @@ -11,6 +11,7 @@ { /// /// Tokenize the object with a given object number. + /// May return null when the reference is undefined /// /// The object number for the object to tokenize. /// The tokenized object. diff --git a/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs b/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs index f9bd430a..941be3a8 100644 --- a/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs +++ b/src/UglyToad.PdfPig/Tokenization/Scanner/PdfTokenScanner.cs @@ -664,7 +664,7 @@ if (!objectLocationProvider.TryGetOffset(reference, out var offset)) { - throw new InvalidOperationException($"Could not find the object with reference: {reference}."); + return null; } // Negative offsets refer to a stream with that number.