diff --git a/src/UglyToad.PdfPig.Tests/Integration/LocalTests.cs b/src/UglyToad.PdfPig.Tests/Integration/LocalTests.cs
index 94545f2f..253fc776 100644
--- a/src/UglyToad.PdfPig.Tests/Integration/LocalTests.cs
+++ b/src/UglyToad.PdfPig.Tests/Integration/LocalTests.cs
@@ -1,39 +1,39 @@
namespace UglyToad.PdfPig.Tests.Integration
{
- //using System;
- //using System.Diagnostics;
- //using System.IO;
- //using Xunit;
+ using System;
+ using System.Diagnostics;
+ using System.IO;
+ using Xunit;
///
/// A class for testing files which are not checked in to source control.
///
public class LocalTests
{
- //[Fact]
- //public void Tests()
- //{
- // var files = Directory.GetFiles(@"C:\temp\pdfs", "*.pdf");
+ [Fact]
+ public void Tests()
+ {
+ var files = Directory.GetFiles(@"C:\temp\pdfs", "*.pdf");
- // foreach (var file in files)
- // {
- // try
- // {
- // using (var document = PdfDocument.Open(file, new ParsingOptions { UseLenientParsing = false }))
- // {
- // for (var i = 1; i <= document.NumberOfPages; i++)
- // {
- // var page = document.GetPage(i);
- // var text = page.Text;
- // Trace.WriteLine(text);
- // }
- // }
- // }
- // catch (Exception ex)
- // {
- // throw new InvalidOperationException($"Error parsing: {Path.GetFileName(file)}.", ex);
- // }
- // }
- //}
+ foreach (var file in files)
+ {
+ try
+ {
+ using (var document = PdfDocument.Open(file, new ParsingOptions { UseLenientParsing = false }))
+ {
+ for (var i = 1; i <= document.NumberOfPages; i++)
+ {
+ var page = document.GetPage(i);
+ var text = page.Text;
+ Trace.WriteLine(text);
+ }
+ }
+ }
+ catch (Exception ex)
+ {
+ throw new InvalidOperationException($"Error parsing: {Path.GetFileName(file)}.", ex);
+ }
+ }
+ }
}
}
diff --git a/src/UglyToad.PdfPig/CrossReference/CrossReferenceTableBuilder.cs b/src/UglyToad.PdfPig/CrossReference/CrossReferenceTableBuilder.cs
index dc43a446..8b5c2a62 100644
--- a/src/UglyToad.PdfPig/CrossReference/CrossReferenceTableBuilder.cs
+++ b/src/UglyToad.PdfPig/CrossReference/CrossReferenceTableBuilder.cs
@@ -34,28 +34,35 @@
DictionaryToken trailerDictionary = new DictionaryToken(new Dictionary());
Dictionary objectOffsets = new Dictionary();
- List xrefSeqBytePos = new List();
+ var xrefPartToBytePositionOrder = new List();
var currentPart = parts.FirstOrDefault(x => x.Offset == firstCrossReferenceOffset);
if (currentPart == null)
{
// no XRef at given position
- log.Warn("Did not found XRef object at specified startxref position " + firstCrossReferenceOffset);
+ log.Warn($"Did not find an XRef object at the specified startxref position {firstCrossReferenceOffset}");
// use all objects in byte position order (last entries overwrite previous ones)
- xrefSeqBytePos.AddRange(parts.Select(x => x.Offset));
- xrefSeqBytePos.Sort();
+ xrefPartToBytePositionOrder.AddRange(parts.Select(x => x.Offset));
+ xrefPartToBytePositionOrder.Sort();
}
else
{
// copy xref type
type = currentPart.Type;
-
// found starting Xref object
// add this and follow chain defined by 'Prev' keys
- xrefSeqBytePos.Add(firstCrossReferenceOffset);
+ xrefPartToBytePositionOrder.Add(firstCrossReferenceOffset);
+
+ // Get any streams that are tied to this table.
+ var activePart = currentPart;
+ var dependents = parts.Where(x => x.TiedToXrefAtOffset == activePart.Offset);
+ foreach (var dependent in dependents)
+ {
+ xrefPartToBytePositionOrder.Add(dependent.Offset);
+ }
while (currentPart.Dictionary != null)
{
@@ -72,21 +79,21 @@
break;
}
- xrefSeqBytePos.Add(prevBytePos);
+ xrefPartToBytePositionOrder.Add(prevBytePos);
// sanity check to prevent infinite loops
- if (xrefSeqBytePos.Count >= parts.Count)
+ if (xrefPartToBytePositionOrder.Count >= parts.Count)
{
break;
}
}
// have to reverse order so that later XRefs will overwrite previous ones
- xrefSeqBytePos.Reverse();
+ xrefPartToBytePositionOrder.Reverse();
}
// merge used and sorted XRef/trailer
- foreach (long bPos in xrefSeqBytePos)
+ foreach (long bPos in xrefPartToBytePositionOrder)
{
var currentObject = parts.First(x => x.Offset == bPos || x.Offset == bPos + offsetCorrection);
if (currentObject.Dictionary != null)
diff --git a/src/UglyToad.PdfPig/CrossReference/CrossReferenceTablePart.cs b/src/UglyToad.PdfPig/CrossReference/CrossReferenceTablePart.cs
index 02b01b48..69a87fe5 100644
--- a/src/UglyToad.PdfPig/CrossReference/CrossReferenceTablePart.cs
+++ b/src/UglyToad.PdfPig/CrossReference/CrossReferenceTablePart.cs
@@ -33,13 +33,24 @@
public CrossReferenceType Type { get; }
- public CrossReferenceTablePart(IReadOnlyDictionary objectOffsets, long offset, long previous, DictionaryToken dictionary, CrossReferenceType type)
+ ///
+ /// For Xref streams indicated by tables they should be used together when constructing the final table.
+ ///
+ public long? TiedToXrefAtOffset { get; }
+
+ public CrossReferenceTablePart(
+ IReadOnlyDictionary objectOffsets,
+ long offset, long previous,
+ DictionaryToken dictionary,
+ CrossReferenceType type,
+ long? tiedToXrefAtOffset)
{
ObjectOffsets = objectOffsets;
Offset = offset;
Previous = previous;
Dictionary = dictionary;
Type = type;
+ TiedToXrefAtOffset = tiedToXrefAtOffset;
}
public void FixOffset(long offset)
diff --git a/src/UglyToad.PdfPig/CrossReference/CrossReferenceTablePartBuilder.cs b/src/UglyToad.PdfPig/CrossReference/CrossReferenceTablePartBuilder.cs
index 48ff1f44..d2ca13af 100644
--- a/src/UglyToad.PdfPig/CrossReference/CrossReferenceTablePartBuilder.cs
+++ b/src/UglyToad.PdfPig/CrossReference/CrossReferenceTablePartBuilder.cs
@@ -15,7 +15,9 @@
public DictionaryToken Dictionary { get; set; }
public CrossReferenceType XRefType { get; set; }
-
+
+ public long? TiedToPreviousAtOffset { get; set; }
+
public void Add(long objectId, int generationNumber, long offset)
{
IndirectReference objKey = new IndirectReference(objectId, generationNumber);
@@ -28,7 +30,7 @@
public CrossReferenceTablePart Build()
{
- return new CrossReferenceTablePart(objects, Offset, Previous, Dictionary, XRefType);
+ return new CrossReferenceTablePart(objects, Offset, Previous, Dictionary, XRefType, TiedToPreviousAtOffset);
}
}
}
\ No newline at end of file
diff --git a/src/UglyToad.PdfPig/Parser/FileStructure/CrossReferenceParser.cs b/src/UglyToad.PdfPig/Parser/FileStructure/CrossReferenceParser.cs
index 0f63cece..70cae4df 100644
--- a/src/UglyToad.PdfPig/Parser/FileStructure/CrossReferenceParser.cs
+++ b/src/UglyToad.PdfPig/Parser/FileStructure/CrossReferenceParser.cs
@@ -82,9 +82,11 @@
// check for a XRef stream, it may contain some object ids of compressed objects
if (tableDictionary.ContainsKey(NameToken.XrefStm))
{
- log.Debug("Cross reference table contained referenced to stream. Reading the stream.");
+ log.Debug("Cross reference table contained reference to stream. Reading the stream.");
- int streamOffset = ((NumericToken)tableDictionary.Data[NameToken.XrefStm]).Int;
+ var tiedToTableAtOffset = tablePart.Offset;
+
+ int streamOffset = ((NumericToken) tableDictionary.Data[NameToken.XrefStm]).Int;
// check the xref stream reference
fixedOffset = offsetValidator.CheckXRefOffset(streamOffset, tokenScanner, bytes, isLenientParsing);
@@ -96,8 +98,13 @@
// Update the cross reference table to be a stream instead.
tableDictionary = tableDictionary.With(NameToken.XrefStm, new NumericToken(streamOffset));
- tablePart = new CrossReferenceTablePart(tablePart.ObjectOffsets, streamOffset,
- tablePart.Previous, tableDictionary, tablePart.Type);
+ tablePart = new CrossReferenceTablePart(
+ tablePart.ObjectOffsets,
+ streamOffset,
+ tablePart.Previous,
+ tableDictionary,
+ tablePart.Type,
+ tiedToTableAtOffset);
}
// Read the stream from the table.
@@ -105,7 +112,7 @@
{
try
{
- TryParseCrossReferenceStream(streamOffset, pdfScanner, out streamPart);
+ TryParseCrossReferenceStream(streamOffset, pdfScanner, tiedToTableAtOffset, out streamPart);
}
catch (InvalidOperationException ex)
{
@@ -149,7 +156,7 @@
tokenScanner.Seek(previousCrossReferenceLocation);
// parse xref stream
- if (!TryParseCrossReferenceStream(previousCrossReferenceLocation, pdfScanner, out var tablePart))
+ if (!TryParseCrossReferenceStream(previousCrossReferenceLocation, pdfScanner, null, out var tablePart))
{
if (!TryBruteForceXrefTableLocate(bytes, previousCrossReferenceLocation, out var actualOffset))
{
@@ -218,7 +225,10 @@
return resolved;
}
- private bool TryParseCrossReferenceStream(long objByteOffset, IPdfTokenScanner pdfScanner,
+ private bool TryParseCrossReferenceStream(
+ long objByteOffset,
+ IPdfTokenScanner pdfScanner,
+ long? fromTableAtOffset,
out CrossReferenceTablePart xrefTablePart)
{
xrefTablePart = null;
@@ -236,7 +246,7 @@
return false;
}
- xrefTablePart = crossReferenceStreamParser.Parse(objByteOffset, objectStream);
+ xrefTablePart = crossReferenceStreamParser.Parse(objByteOffset, fromTableAtOffset, objectStream);
return true;
}
diff --git a/src/UglyToad.PdfPig/Parser/Parts/CrossReference/CrossReferenceStreamParser.cs b/src/UglyToad.PdfPig/Parser/Parts/CrossReference/CrossReferenceStreamParser.cs
index 02904ed7..e27d74a9 100644
--- a/src/UglyToad.PdfPig/Parser/Parts/CrossReference/CrossReferenceStreamParser.cs
+++ b/src/UglyToad.PdfPig/Parser/Parts/CrossReference/CrossReferenceStreamParser.cs
@@ -19,7 +19,7 @@
///
/// Parses through the unfiltered stream and populates the xrefTable HashMap.
///
- public CrossReferenceTablePart Parse(long streamOffset, StreamToken stream)
+ public CrossReferenceTablePart Parse(long streamOffset, long? fromTableAtOffset, StreamToken stream)
{
var decoded = stream.Decode(filterProvider);
@@ -38,7 +38,8 @@
Offset = streamOffset,
Previous = previousOffset,
Dictionary = stream.StreamDictionary,
- XRefType = CrossReferenceType.Stream
+ XRefType = CrossReferenceType.Stream,
+ TiedToPreviousAtOffset = fromTableAtOffset
};
var objectNumbers = GetObjectNumbers(stream.StreamDictionary);