mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-04-05 20:55:01 +08:00
fix #176, allow startxref to appear earlier in the document
This commit is contained in:
parent
4312aa470e
commit
bf45602ac5
@ -90,5 +90,21 @@
|
||||
Assert.False(document.TryGetBookmarks(out _));
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void StartXRefNotNearEnd()
|
||||
{
|
||||
var bytes = File.ReadAllBytes(GetFilename());
|
||||
|
||||
var emptyTrailer = new byte[2026];
|
||||
emptyTrailer[0] = 10;
|
||||
|
||||
bytes = bytes.Concat(emptyTrailer).ToArray();
|
||||
|
||||
using (var document = PdfDocument.Open(bytes, ParsingOptions.LenientParsingOff))
|
||||
{
|
||||
Assert.Equal(1, document.NumberOfPages);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -38,7 +38,7 @@
|
||||
(byte) 'e',
|
||||
(byte) 'f'
|
||||
};
|
||||
|
||||
|
||||
public static long GetFirstCrossReferenceOffset(IInputBytes bytes, ISeekableTokenScanner scanner, bool isLenientParsing)
|
||||
{
|
||||
if (bytes == null)
|
||||
@ -55,10 +55,6 @@
|
||||
|
||||
var offsetFromEnd = fileLength < EndOfFileSearchRange ? (int)fileLength : EndOfFileSearchRange;
|
||||
|
||||
var startPosition = fileLength - offsetFromEnd;
|
||||
|
||||
bytes.Seek(startPosition);
|
||||
|
||||
var startXrefPosition = GetStartXrefPosition(bytes, offsetFromEnd);
|
||||
|
||||
scanner.Seek(startXrefPosition);
|
||||
@ -96,38 +92,48 @@
|
||||
var startXrefs = new List<int>();
|
||||
|
||||
var index = 0;
|
||||
var offset = 0;
|
||||
|
||||
// Starting scanning the last 1024 bytes.
|
||||
while (bytes.MoveNext())
|
||||
|
||||
var fileLength = bytes.Length;
|
||||
var multiple = 1;
|
||||
|
||||
var actualStartOffset = Math.Max(0, fileLength - (offsetFromEnd * multiple));
|
||||
do
|
||||
{
|
||||
offset++;
|
||||
if (bytes.CurrentByte == StartXRefBytes[index])
|
||||
multiple *= 2;
|
||||
bytes.Seek(actualStartOffset);
|
||||
|
||||
// Starting scanning the file bytes.
|
||||
while (bytes.MoveNext())
|
||||
{
|
||||
// We might be reading "startxref".
|
||||
index++;
|
||||
}
|
||||
else
|
||||
{
|
||||
index = 0;
|
||||
if (bytes.CurrentByte == StartXRefBytes[index])
|
||||
{
|
||||
// We might be reading "startxref".
|
||||
index++;
|
||||
}
|
||||
else
|
||||
{
|
||||
index = 0;
|
||||
}
|
||||
|
||||
if (index == StartXRefBytes.Length)
|
||||
{
|
||||
// Add this "startxref" (position from the start of the document to the first 's').
|
||||
startXrefs.Add((int)bytes.CurrentOffset - StartXRefBytes.Length);
|
||||
|
||||
// Continue scanning in case there are further "startxref"s. Not sure if this ever happens.
|
||||
index = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (index == StartXRefBytes.Length)
|
||||
{
|
||||
// Add this "startxref" (position from the end of the document to the first 's').
|
||||
startXrefs.Add(offsetFromEnd - (offset - StartXRefBytes.Length));
|
||||
|
||||
// Continue scanning in case there are further "startxref"s. Not sure if this ever happens.
|
||||
index = 0;
|
||||
}
|
||||
}
|
||||
|
||||
actualStartOffset = Math.Max(0, fileLength - (offsetFromEnd * multiple));
|
||||
} while (startXrefs.Count == 0 && actualStartOffset > 0);
|
||||
|
||||
if (startXrefs.Count == 0)
|
||||
{
|
||||
throw new PdfDocumentFormatException("Could not find the startxref within the last 1024 characters.");
|
||||
throw new PdfDocumentFormatException($"Could not find the startxref within the last {offsetFromEnd} characters.");
|
||||
}
|
||||
|
||||
return bytes.Length - startXrefs[startXrefs.Count - 1];
|
||||
return startXrefs[startXrefs.Count - 1];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user