Support trailer info to be a dictionary instead of an indirect reference

This commit is contained in:
Arnaud TAMAILLON 2024-08-30 16:06:41 +02:00 committed by BobLd
parent b4649758c6
commit b824fb2b14
6 changed files with 80 additions and 10 deletions

View File

@ -96,5 +96,23 @@
Assert.Equal("D:20190306232856Z00'00'", information.CreationDate);
}
}
[Fact]
public void CanReadDocumentInfromationDirectory()
{
// Issue 884
var path = IntegrationHelpers.GetSpecificTestDocumentPath("info_dictionary.pdf");
// Lenient Parsing On -> can process
using (var document = PdfDocument.Open(path))
{
var information = document.Information;
Assert.Equal("SumatraPDF 3.2", information.Producer);
}
// Lenient Parsing Off -> throws
var ex = Assert.Throws<PdfDocumentFormatException>(() => PdfDocument.Open(path, ParsingOptions.LenientParsingOff));
Assert.Equal("The info token in the trailer dictionary should only contain indirect references, instead got: <Producer, (SumatraPDF 3.2)>.", ex.Message);
}
}
}

View File

@ -0,0 +1,42 @@
%PDF-1.0
%µ¶
1 0 obj
<</Type/Catalog/Pages 2 0 R>>
endobj
2 0 obj
<</Kids[3 0 R]/Count 1/Type/Pages/MediaBox[0 0 595 792]>>
endobj
3 0 obj
<</Type/Page/Parent 2 0 R/Contents 4 0 R/Resources<<>>>>
endobj
4 0 obj
<</Length 58>>
stream
q
BT
/ 96 Tf
1 0 0 1 36 684 Tm
(Hello World!) Tj
ET
Q
endstream
endobj
xref
0 5
0000000000 65536 f
0000000016 00000 n
0000000062 00000 n
0000000136 00000 n
0000000209 00000 n
trailer
<</Size 5/Root 1 0 R/Info <</Producer(SumatraPDF 3.2)>>>>
startxref
316
%%EOF

View File

@ -28,7 +28,7 @@
parts.Add(part);
}
public CrossReferenceTable Build(long firstCrossReferenceOffset, long offsetCorrection, ILog log)
public CrossReferenceTable Build(long firstCrossReferenceOffset, long offsetCorrection, bool isLenientParsing, ILog log)
{
CrossReferenceType type = CrossReferenceType.Table;
DictionaryToken trailerDictionary = new DictionaryToken(new Dictionary<NameToken, IToken>());
@ -118,7 +118,7 @@
}
}
return new CrossReferenceTable(type, objectOffsets, new TrailerDictionary(trailerDictionary),
return new CrossReferenceTable(type, objectOffsets, new TrailerDictionary(trailerDictionary, isLenientParsing),
parts.Select(x =>
{
var prev = x.GetPreviousOffset();

View File

@ -35,7 +35,7 @@
/// <summary>
/// The object reference for the document's information dictionary if it contains one.
/// </summary>
public IndirectReference? Info { get; }
public IToken? Info { get; }
/// <summary>
/// A list containing two-byte string tokens which act as file identifiers.
@ -51,7 +51,8 @@
/// Create a new <see cref="TrailerDictionary"/>.
/// </summary>
/// <param name="dictionary">The parsed dictionary from the document.</param>
internal TrailerDictionary(DictionaryToken dictionary)
/// <param name="isLenientParsing">Indicates if the parsing is in lenient mode</param>
internal TrailerDictionary(DictionaryToken dictionary, bool isLenientParsing)
{
if (dictionary is null)
{
@ -68,9 +69,13 @@
Root = rootReference.Data;
if (dictionary.TryGet(NameToken.Info, out IndirectReferenceToken reference))
if (dictionary.TryGet(NameToken.Info, out var infoToken))
{
Info = reference.Data;
if (!isLenientParsing && infoToken is not IndirectReferenceToken)
{
throw new PdfDocumentFormatException($"The info token in the trailer dictionary should only contain indirect references, instead got: {infoToken}.");
}
Info = infoToken;
}
if (dictionary.TryGet(NameToken.Id, out ArrayToken arr))

View File

@ -17,12 +17,17 @@
/// </summary>
public static DocumentInformation Create(IPdfTokenScanner pdfTokenScanner, TrailerDictionary trailer, bool isLenientParsing)
{
if (!trailer.Info.HasValue)
var token = trailer.Info;
if (token is IndirectReferenceToken reference)
{
token = DirectObjectFinder.Get<IToken>(reference.Data, pdfTokenScanner);
}
if (token == null)
{
return DocumentInformation.Default;
}
var token = DirectObjectFinder.Get<IToken>(trailer.Info.Value, pdfTokenScanner);
if (token is DictionaryToken infoParsed)
{
var title = GetEntryOrDefault(infoParsed, NameToken.Title, pdfTokenScanner);

View File

@ -238,7 +238,7 @@
throw new PdfDocumentFormatException("The cross reference was not found.");
}
var resolved = table.Build(crossReferenceLocation, offsetCorrection, log);
var resolved = table.Build(crossReferenceLocation, offsetCorrection, isLenientParsing, log);
// check the offsets of all referenced objects
if (!CrossReferenceObjectOffsetValidator.ValidateCrossReferenceOffsets(bytes, resolved, log, out var actualOffsets))