mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-04-05 20:55:01 +08:00
Support reading files with missing white space after xref in lenient mode (#906)
Support missing white space after xref --------- Co-authored-by: Arnaud TAMAILLON <arnaud.tamaillon@younited-credit.fr>
This commit is contained in:
parent
09bddba778
commit
4845f43696
@ -0,0 +1,13 @@
|
||||
namespace UglyToad.PdfPig.Tests.Integration
|
||||
{
|
||||
public class CrossReferenceParserTests
|
||||
{
|
||||
[Fact]
|
||||
public void CanReadDocumentWithMissingWhitespaceAfterXRef()
|
||||
{
|
||||
string path = IntegrationHelpers.GetSpecificTestDocumentPath("xref-with-no-whitespace.pdf");
|
||||
using var document = PdfDocument.Open(path);
|
||||
Assert.Equal(3, document.NumberOfPages);
|
||||
}
|
||||
}
|
||||
}
|
Binary file not shown.
@ -300,18 +300,42 @@ trailer
|
||||
trailer
|
||||
<< >>";
|
||||
// Strict parsing
|
||||
var input = StringBytesTestConverter.Scanner(data);
|
||||
var act = () => CrossReferenceTableParser.Parse(input.scanner, 0, false);
|
||||
var input = GetReader(data);
|
||||
var act = () => CrossReferenceTableParser.Parse(input, 0, false);
|
||||
var ex = Assert.Throws<PdfDocumentFormatException>(act);
|
||||
Assert.Equal("Found a line with 2 unexpected entries in the cross reference table: 127, 0.", ex.Message);
|
||||
|
||||
// Lenient Parsing
|
||||
input = StringBytesTestConverter.Scanner(data);
|
||||
var result = CrossReferenceTableParser.Parse(input.scanner, 0, true);
|
||||
input = GetReader(data);
|
||||
var result = CrossReferenceTableParser.Parse(input, 0, true);
|
||||
|
||||
Assert.Equal(6, result.ObjectOffsets.Count);
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void ParsesMissingWhitespaceAfterXref()
|
||||
{
|
||||
var data = @"xref15 2
|
||||
0000000190 00000 n
|
||||
0000000250 00032 n
|
||||
|
||||
trailer
|
||||
<<>>";
|
||||
var input = GetReader(data);
|
||||
|
||||
// Strict parsing
|
||||
var act = () => CrossReferenceTableParser.Parse(input, 0, false);
|
||||
|
||||
var ex = Assert.Throws<PdfDocumentFormatException>(act);
|
||||
Assert.Equal("Unexpected operator in xref position: xref15.", ex.Message);
|
||||
|
||||
// Lenient Parsing
|
||||
input = GetReader(data);
|
||||
var result = CrossReferenceTableParser.Parse(input, 0, true);
|
||||
|
||||
Assert.Equal(2, result.ObjectOffsets.Count);
|
||||
}
|
||||
|
||||
private static CoreTokenScanner GetReader(string input)
|
||||
{
|
||||
return StringBytesTestConverter.Scanner(input).scanner;
|
||||
|
@ -59,7 +59,7 @@
|
||||
|
||||
tokenScanner.MoveNext();
|
||||
|
||||
if (tokenScanner.CurrentToken is OperatorToken tableToken && tableToken.Data == "xref")
|
||||
if (CrossReferenceTableParser.IsCrossReferenceMarker(tokenScanner, isLenientParsing))
|
||||
{
|
||||
missedAttempts = 0;
|
||||
log.Debug("Element was cross reference table.");
|
||||
|
@ -13,7 +13,7 @@
|
||||
{
|
||||
private const string InUseEntry = "n";
|
||||
private const string FreeEntry = "f";
|
||||
|
||||
|
||||
public static CrossReferenceTablePart Parse(ISeekableTokenScanner scanner, long offset, bool isLenientParsing)
|
||||
{
|
||||
var builder = new CrossReferenceTablePartBuilder
|
||||
@ -31,10 +31,22 @@
|
||||
|
||||
if (scanner.CurrentToken is OperatorToken operatorToken)
|
||||
{
|
||||
if (operatorToken.Data == "xref")
|
||||
if (operatorToken.Data == OperatorToken.Xref.Data)
|
||||
{
|
||||
scanner.MoveNext();
|
||||
}
|
||||
else if (isLenientParsing)
|
||||
{
|
||||
if (operatorToken.Data.StartsWith(OperatorToken.Xref.Data))
|
||||
{
|
||||
scanner.Seek(scanner.CurrentPosition - operatorToken.Data.Length + OperatorToken.Xref.Data.Length);
|
||||
scanner.MoveNext();
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new PdfDocumentFormatException($"Unexpected operator in xref position: {operatorToken}.");
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
throw new PdfDocumentFormatException($"Unexpected operator in xref position: {operatorToken}.");
|
||||
@ -106,6 +118,15 @@
|
||||
return builder.Build();
|
||||
}
|
||||
|
||||
public static bool IsCrossReferenceMarker(ISeekableTokenScanner scanner, bool isLenientParsing)
|
||||
{
|
||||
return (scanner.CurrentToken is OperatorToken operatorToken
|
||||
&& (operatorToken.Data == OperatorToken.Xref.Data
|
||||
|| (isLenientParsing
|
||||
&& operatorToken.Data.StartsWith(OperatorToken.Xref.Data)
|
||||
&& int.TryParse(operatorToken.Data.Substring(OperatorToken.Xref.Data.Length), out _))));
|
||||
}
|
||||
|
||||
private static int ProcessTokens(ReadOnlySpan<IToken> tokens, CrossReferenceTablePartBuilder builder, bool isLenientParsing,
|
||||
int objectCount, ref TableSubsectionDefinition definition)
|
||||
{
|
||||
|
Loading…
Reference in New Issue
Block a user