mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-04-05 20:55:01 +08:00
#434 ensure companion stream is added to cross reference on building
This commit is contained in:
parent
abcbdc55e3
commit
2fd46571b3
@ -1,39 +1,39 @@
|
||||
namespace UglyToad.PdfPig.Tests.Integration
|
||||
{
|
||||
//using System;
|
||||
//using System.Diagnostics;
|
||||
//using System.IO;
|
||||
//using Xunit;
|
||||
using System;
|
||||
using System.Diagnostics;
|
||||
using System.IO;
|
||||
using Xunit;
|
||||
|
||||
/// <summary>
|
||||
/// A class for testing files which are not checked in to source control.
|
||||
/// </summary>
|
||||
public class LocalTests
|
||||
{
|
||||
//[Fact]
|
||||
//public void Tests()
|
||||
//{
|
||||
// var files = Directory.GetFiles(@"C:\temp\pdfs", "*.pdf");
|
||||
[Fact]
|
||||
public void Tests()
|
||||
{
|
||||
var files = Directory.GetFiles(@"C:\temp\pdfs", "*.pdf");
|
||||
|
||||
// foreach (var file in files)
|
||||
// {
|
||||
// try
|
||||
// {
|
||||
// using (var document = PdfDocument.Open(file, new ParsingOptions { UseLenientParsing = false }))
|
||||
// {
|
||||
// for (var i = 1; i <= document.NumberOfPages; i++)
|
||||
// {
|
||||
// var page = document.GetPage(i);
|
||||
// var text = page.Text;
|
||||
// Trace.WriteLine(text);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// catch (Exception ex)
|
||||
// {
|
||||
// throw new InvalidOperationException($"Error parsing: {Path.GetFileName(file)}.", ex);
|
||||
// }
|
||||
// }
|
||||
//}
|
||||
foreach (var file in files)
|
||||
{
|
||||
try
|
||||
{
|
||||
using (var document = PdfDocument.Open(file, new ParsingOptions { UseLenientParsing = false }))
|
||||
{
|
||||
for (var i = 1; i <= document.NumberOfPages; i++)
|
||||
{
|
||||
var page = document.GetPage(i);
|
||||
var text = page.Text;
|
||||
Trace.WriteLine(text);
|
||||
}
|
||||
}
|
||||
}
|
||||
catch (Exception ex)
|
||||
{
|
||||
throw new InvalidOperationException($"Error parsing: {Path.GetFileName(file)}.", ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -34,28 +34,35 @@
|
||||
DictionaryToken trailerDictionary = new DictionaryToken(new Dictionary<NameToken, IToken>());
|
||||
Dictionary<IndirectReference, long> objectOffsets = new Dictionary<IndirectReference, long>();
|
||||
|
||||
List<long> xrefSeqBytePos = new List<long>();
|
||||
var xrefPartToBytePositionOrder = new List<long>();
|
||||
|
||||
var currentPart = parts.FirstOrDefault(x => x.Offset == firstCrossReferenceOffset);
|
||||
|
||||
if (currentPart == null)
|
||||
{
|
||||
// no XRef at given position
|
||||
log.Warn("Did not found XRef object at specified startxref position " + firstCrossReferenceOffset);
|
||||
log.Warn($"Did not find an XRef object at the specified startxref position {firstCrossReferenceOffset}");
|
||||
|
||||
// use all objects in byte position order (last entries overwrite previous ones)
|
||||
xrefSeqBytePos.AddRange(parts.Select(x => x.Offset));
|
||||
xrefSeqBytePos.Sort();
|
||||
xrefPartToBytePositionOrder.AddRange(parts.Select(x => x.Offset));
|
||||
xrefPartToBytePositionOrder.Sort();
|
||||
}
|
||||
else
|
||||
{
|
||||
// copy xref type
|
||||
type = currentPart.Type;
|
||||
|
||||
|
||||
// found starting Xref object
|
||||
// add this and follow chain defined by 'Prev' keys
|
||||
xrefSeqBytePos.Add(firstCrossReferenceOffset);
|
||||
xrefPartToBytePositionOrder.Add(firstCrossReferenceOffset);
|
||||
|
||||
// Get any streams that are tied to this table.
|
||||
var activePart = currentPart;
|
||||
var dependents = parts.Where(x => x.TiedToXrefAtOffset == activePart.Offset);
|
||||
foreach (var dependent in dependents)
|
||||
{
|
||||
xrefPartToBytePositionOrder.Add(dependent.Offset);
|
||||
}
|
||||
|
||||
while (currentPart.Dictionary != null)
|
||||
{
|
||||
@ -72,21 +79,21 @@
|
||||
break;
|
||||
}
|
||||
|
||||
xrefSeqBytePos.Add(prevBytePos);
|
||||
xrefPartToBytePositionOrder.Add(prevBytePos);
|
||||
|
||||
// sanity check to prevent infinite loops
|
||||
if (xrefSeqBytePos.Count >= parts.Count)
|
||||
if (xrefPartToBytePositionOrder.Count >= parts.Count)
|
||||
{
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// have to reverse order so that later XRefs will overwrite previous ones
|
||||
xrefSeqBytePos.Reverse();
|
||||
xrefPartToBytePositionOrder.Reverse();
|
||||
}
|
||||
|
||||
// merge used and sorted XRef/trailer
|
||||
foreach (long bPos in xrefSeqBytePos)
|
||||
foreach (long bPos in xrefPartToBytePositionOrder)
|
||||
{
|
||||
var currentObject = parts.First(x => x.Offset == bPos || x.Offset == bPos + offsetCorrection);
|
||||
if (currentObject.Dictionary != null)
|
||||
|
@ -33,13 +33,24 @@
|
||||
|
||||
public CrossReferenceType Type { get; }
|
||||
|
||||
public CrossReferenceTablePart(IReadOnlyDictionary<IndirectReference, long> objectOffsets, long offset, long previous, DictionaryToken dictionary, CrossReferenceType type)
|
||||
/// <summary>
|
||||
/// For Xref streams indicated by tables they should be used together when constructing the final table.
|
||||
/// </summary>
|
||||
public long? TiedToXrefAtOffset { get; }
|
||||
|
||||
public CrossReferenceTablePart(
|
||||
IReadOnlyDictionary<IndirectReference, long> objectOffsets,
|
||||
long offset, long previous,
|
||||
DictionaryToken dictionary,
|
||||
CrossReferenceType type,
|
||||
long? tiedToXrefAtOffset)
|
||||
{
|
||||
ObjectOffsets = objectOffsets;
|
||||
Offset = offset;
|
||||
Previous = previous;
|
||||
Dictionary = dictionary;
|
||||
Type = type;
|
||||
TiedToXrefAtOffset = tiedToXrefAtOffset;
|
||||
}
|
||||
|
||||
public void FixOffset(long offset)
|
||||
|
@ -15,7 +15,9 @@
|
||||
public DictionaryToken Dictionary { get; set; }
|
||||
|
||||
public CrossReferenceType XRefType { get; set; }
|
||||
|
||||
|
||||
public long? TiedToPreviousAtOffset { get; set; }
|
||||
|
||||
public void Add(long objectId, int generationNumber, long offset)
|
||||
{
|
||||
IndirectReference objKey = new IndirectReference(objectId, generationNumber);
|
||||
@ -28,7 +30,7 @@
|
||||
|
||||
public CrossReferenceTablePart Build()
|
||||
{
|
||||
return new CrossReferenceTablePart(objects, Offset, Previous, Dictionary, XRefType);
|
||||
return new CrossReferenceTablePart(objects, Offset, Previous, Dictionary, XRefType, TiedToPreviousAtOffset);
|
||||
}
|
||||
}
|
||||
}
|
@ -82,9 +82,11 @@
|
||||
// check for a XRef stream, it may contain some object ids of compressed objects
|
||||
if (tableDictionary.ContainsKey(NameToken.XrefStm))
|
||||
{
|
||||
log.Debug("Cross reference table contained referenced to stream. Reading the stream.");
|
||||
log.Debug("Cross reference table contained reference to stream. Reading the stream.");
|
||||
|
||||
int streamOffset = ((NumericToken)tableDictionary.Data[NameToken.XrefStm]).Int;
|
||||
var tiedToTableAtOffset = tablePart.Offset;
|
||||
|
||||
int streamOffset = ((NumericToken) tableDictionary.Data[NameToken.XrefStm]).Int;
|
||||
|
||||
// check the xref stream reference
|
||||
fixedOffset = offsetValidator.CheckXRefOffset(streamOffset, tokenScanner, bytes, isLenientParsing);
|
||||
@ -96,8 +98,13 @@
|
||||
|
||||
// Update the cross reference table to be a stream instead.
|
||||
tableDictionary = tableDictionary.With(NameToken.XrefStm, new NumericToken(streamOffset));
|
||||
tablePart = new CrossReferenceTablePart(tablePart.ObjectOffsets, streamOffset,
|
||||
tablePart.Previous, tableDictionary, tablePart.Type);
|
||||
tablePart = new CrossReferenceTablePart(
|
||||
tablePart.ObjectOffsets,
|
||||
streamOffset,
|
||||
tablePart.Previous,
|
||||
tableDictionary,
|
||||
tablePart.Type,
|
||||
tiedToTableAtOffset);
|
||||
}
|
||||
|
||||
// Read the stream from the table.
|
||||
@ -105,7 +112,7 @@
|
||||
{
|
||||
try
|
||||
{
|
||||
TryParseCrossReferenceStream(streamOffset, pdfScanner, out streamPart);
|
||||
TryParseCrossReferenceStream(streamOffset, pdfScanner, tiedToTableAtOffset, out streamPart);
|
||||
}
|
||||
catch (InvalidOperationException ex)
|
||||
{
|
||||
@ -149,7 +156,7 @@
|
||||
tokenScanner.Seek(previousCrossReferenceLocation);
|
||||
|
||||
// parse xref stream
|
||||
if (!TryParseCrossReferenceStream(previousCrossReferenceLocation, pdfScanner, out var tablePart))
|
||||
if (!TryParseCrossReferenceStream(previousCrossReferenceLocation, pdfScanner, null, out var tablePart))
|
||||
{
|
||||
if (!TryBruteForceXrefTableLocate(bytes, previousCrossReferenceLocation, out var actualOffset))
|
||||
{
|
||||
@ -218,7 +225,10 @@
|
||||
return resolved;
|
||||
}
|
||||
|
||||
private bool TryParseCrossReferenceStream(long objByteOffset, IPdfTokenScanner pdfScanner,
|
||||
private bool TryParseCrossReferenceStream(
|
||||
long objByteOffset,
|
||||
IPdfTokenScanner pdfScanner,
|
||||
long? fromTableAtOffset,
|
||||
out CrossReferenceTablePart xrefTablePart)
|
||||
{
|
||||
xrefTablePart = null;
|
||||
@ -236,7 +246,7 @@
|
||||
return false;
|
||||
}
|
||||
|
||||
xrefTablePart = crossReferenceStreamParser.Parse(objByteOffset, objectStream);
|
||||
xrefTablePart = crossReferenceStreamParser.Parse(objByteOffset, fromTableAtOffset, objectStream);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
@ -19,7 +19,7 @@
|
||||
/// <summary>
|
||||
/// Parses through the unfiltered stream and populates the xrefTable HashMap.
|
||||
/// </summary>
|
||||
public CrossReferenceTablePart Parse(long streamOffset, StreamToken stream)
|
||||
public CrossReferenceTablePart Parse(long streamOffset, long? fromTableAtOffset, StreamToken stream)
|
||||
{
|
||||
var decoded = stream.Decode(filterProvider);
|
||||
|
||||
@ -38,7 +38,8 @@
|
||||
Offset = streamOffset,
|
||||
Previous = previousOffset,
|
||||
Dictionary = stream.StreamDictionary,
|
||||
XRefType = CrossReferenceType.Stream
|
||||
XRefType = CrossReferenceType.Stream,
|
||||
TiedToPreviousAtOffset = fromTableAtOffset
|
||||
};
|
||||
|
||||
var objectNumbers = GetObjectNumbers(stream.StreamDictionary);
|
||||
|
Loading…
Reference in New Issue
Block a user