Annotations named destinations (#579)

* Add Named Destinations to Catalog so that bookmarks and links can access
them.

The named destinations require access to page nodes, so created Pages object
that is made using PagesFactory (which contains the page-related code from
Catalog).

* Further implementation of destinations:
- Implement NamedDestinations in AnnotationProvider, so that we can look
  up named destinations for annotations and turn them into explicit destinations.
  Reused existing code inside BookmarksProvider to get destinations/actions.
- Added GoToE action
- According to the PDF reference, destinations are also required for
  external destinations and hence for ExternalBookmarkNode. This allows us
  to push up DocumentBookmarkNode.Destination to BookmarkNode.

* Implemented stateful appearance streams and integration test

* Added AppearanceStream to public API because it is used in the (public)
Annotation constructor

* After #552, must push down ExplicitDestination do DocumentBookmarkNode since it
does not apply to UriBookmarkNode.

* Added actions, which fits the PDF model better and works well with the
new bookmarks code (after PR #552)

* Rename Action to PdfAction + removed unused using in ActionProvider.cs

---------

Co-authored-by: mvantzet <mark@radialsg.com>
This commit is contained in:
mvantzet 2023-04-10 18:14:14 +02:00 committed by GitHub
parent 2e0622b652
commit 0e39bc0b76
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
37 changed files with 1263 additions and 625 deletions

View File

@ -13,7 +13,7 @@
using (var document = PdfDocument.Open(path))
{
var pg = document.Structure.Catalog.GetPageNode(1).NodeDictionary;
var pg = document.Structure.Catalog.Pages.GetPageNode(1).NodeDictionary;
var contents = pg.Data[NameToken.Contents] as IndirectReferenceToken;
document.Advanced.ReplaceIndirectObject(contents.Data, tk =>
{
@ -39,7 +39,7 @@
dict[NameToken.Length] = new NumericToken(0);
var replacement = new StreamToken(new DictionaryToken(dict), new List<byte>());
var pg = document.Structure.Catalog.GetPageNode(1).NodeDictionary;
var pg = document.Structure.Catalog.Pages.GetPageNode(1).NodeDictionary;
var contents = pg.Data[NameToken.Contents] as IndirectReferenceToken;
document.Advanced.ReplaceIndirectObject(contents.Data, replacement);

View File

@ -0,0 +1,51 @@
namespace UglyToad.PdfPig.Tests.Integration
{
using Actions;
using System.Linq;
using Xunit;
public class AnnotationsTest
{
[Fact]
public void AnnotationsHaveActions()
{
var pdf = IntegrationHelpers.GetDocumentPath("toc");
using (var doc = PdfDocument.Open(pdf))
{
var annots = doc.GetPage(1).ExperimentalAccess.GetAnnotations().ToArray();
Assert.Equal(5, annots.Length);
Assert.All(annots, a => Assert.NotNull(a.Action));
Assert.All(annots, a => Assert.IsType<GoToAction>(a.Action));
Assert.All(annots, a => Assert.True((a.Action as GoToAction).Destination.PageNumber > 0));
}
}
[Fact]
public void CheckAnnotationAppearanceStreams()
{
var pdf = IntegrationHelpers.GetSpecificTestDocumentPath("appearances");
using (var doc = PdfDocument.Open(pdf))
{
var annotations = doc.GetPage(1).ExperimentalAccess.GetAnnotations().ToArray();
Assert.Equal(1, annotations.Length);
var annotation = annotations[0];
Assert.True(annotation.HasDownAppearance);
Assert.True(annotation.HasNormalAppearance);
Assert.False(annotation.HasRollOverAppearance);
Assert.False(annotation.downAppearanceStream.IsStateless);
Assert.False(annotation.normalAppearanceStream.IsStateless);
Assert.Contains("Off", annotation.downAppearanceStream.GetStates);
Assert.Contains("Yes", annotation.downAppearanceStream.GetStates);
Assert.Contains("Off", annotation.normalAppearanceStream.GetStates);
Assert.Contains("Yes", annotation.normalAppearanceStream.GetStates);
Assert.Equal("Off", annotation.appearanceState);
}
}
}
}

View File

@ -58,11 +58,19 @@
"UglyToad.PdfPig.AcroForms.Fields.AcroSignatureField",
"UglyToad.PdfPig.AcroForms.Fields.AcroTextField",
"UglyToad.PdfPig.AcroForms.Fields.AcroTextFieldFlags",
"UglyToad.PdfPig.Actions.AbstractGoToAction",
"UglyToad.PdfPig.Actions.PdfAction",
"UglyToad.PdfPig.Actions.ActionType",
"UglyToad.PdfPig.Actions.GoToAction",
"UglyToad.PdfPig.Actions.GoToEAction",
"UglyToad.PdfPig.Actions.GoToRAction",
"UglyToad.PdfPig.Actions.UriAction",
"UglyToad.PdfPig.AdvancedPdfDocumentAccess",
"UglyToad.PdfPig.Annotations.Annotation",
"UglyToad.PdfPig.Annotations.AnnotationBorder",
"UglyToad.PdfPig.Annotations.AnnotationFlags",
"UglyToad.PdfPig.Annotations.AnnotationType",
"UglyToad.PdfPig.Annotations.AppearanceStream",
"UglyToad.PdfPig.Annotations.QuadPointsQuadrilateral",
"UglyToad.PdfPig.Content.ArtifactMarkedContentElement",
"UglyToad.PdfPig.Content.Catalog",
@ -207,6 +215,7 @@
"UglyToad.PdfPig.Outline.Bookmarks",
"UglyToad.PdfPig.Outline.BookmarkNode",
"UglyToad.PdfPig.Outline.DocumentBookmarkNode",
"UglyToad.PdfPig.Outline.EmbeddedBookmarkNode",
"UglyToad.PdfPig.Outline.ExternalBookmarkNode",
"UglyToad.PdfPig.Outline.UriBookmarkNode",
"UglyToad.PdfPig.Outline.Destinations.ExplicitDestination",

View File

@ -255,6 +255,7 @@
public static readonly NameToken G = new NameToken("G");
public static readonly NameToken Gamma = new NameToken("Gamma");
public static readonly NameToken GoTo = new NameToken("GoTo");
public static readonly NameToken GoToE = new NameToken("GoToE");
public static readonly NameToken GoToR = new NameToken("GoToR");
public static readonly NameToken Group = new NameToken("Group");
public static readonly NameToken GtsPdfa1 = new NameToken("GTS_PDFA1");

View File

@ -187,7 +187,7 @@
int? pageNumber = null;
if (fieldDictionary.TryGet(NameToken.P, tokenScanner, out IndirectReferenceToken pageReference))
{
pageNumber = catalog.GetPageByReference(pageReference.Data)?.PageNumber;
pageNumber = catalog.Pages.GetPageByReference(pageReference.Data)?.PageNumber;
}
PdfRectangle? bounds = null;

View File

@ -0,0 +1,24 @@
namespace UglyToad.PdfPig.Actions;
using Outline.Destinations;
/// <summary>
/// Abstract class for GoTo-type actions (GoTo, GoToE, GoToR) that have a destination
/// </summary>
public abstract class AbstractGoToAction : PdfAction
{
/// <summary>
/// Destination for the GoTo-type action
/// </summary>
public ExplicitDestination Destination { get; }
/// <summary>
/// Constructor
/// </summary>
/// <param name="type"></param>
/// <param name="destination"></param>
protected AbstractGoToAction(ActionType type, ExplicitDestination destination) : base(type)
{
Destination = destination;
}
}

View File

@ -0,0 +1,107 @@
namespace UglyToad.PdfPig.Actions
{
using Core;
using Logging;
using Outline;
using Tokenization.Scanner;
using Tokens;
using Outline.Destinations;
using Util;
internal static class ActionProvider
{
/// <summary>
/// Get an action (A) from dictionary. If GoTo, GoToR or GoToE, also fetches the action destination.
/// </summary>
/// <param name="dictionary"></param>
/// <param name="namedDestinations"></param>
/// <param name="pdfScanner"></param>
/// <param name="log"></param>
/// <param name="result"></param>
/// <returns></returns>
/// <exception cref="PdfDocumentFormatException"></exception>
internal static bool TryGetAction(DictionaryToken dictionary,
NamedDestinations namedDestinations,
IPdfTokenScanner pdfScanner,
ILog log,
out PdfAction result)
{
result = null;
if (!dictionary.TryGet(NameToken.A, pdfScanner, out DictionaryToken actionDictionary))
{
return false;
}
if (!actionDictionary.TryGet(NameToken.S, pdfScanner, out NameToken actionType))
{
throw new PdfDocumentFormatException($"No action type (/S) specified for action: {actionDictionary}.");
}
if (actionType.Equals(NameToken.GoTo))
{
// For GoTo, D(estination) is required
if (DestinationProvider.TryGetDestination(actionDictionary,
NameToken.D,
namedDestinations,
pdfScanner,
log,
false,
out var destination))
{
result = new GoToAction(destination);
return true;
}
}
else if (actionType.Equals(NameToken.GoToR))
{
// For GoToR, F(ile) and D(estination) are required
if (actionDictionary.TryGetOptionalStringDirect(NameToken.F, pdfScanner, out var filename)
&& DestinationProvider.TryGetDestination(actionDictionary,
NameToken.D,
namedDestinations,
pdfScanner,
log,
true,
out var destination))
{
result = new GoToRAction(destination, filename);
return true;
}
}
else if (actionType.Equals(NameToken.GoToE))
{
// For GoToE, D(estination) is required
if (DestinationProvider.TryGetDestination(actionDictionary,
NameToken.D,
namedDestinations,
pdfScanner,
log,
true,
out var destination))
{
// F(ile specification) is optional
if (!actionDictionary.TryGetOptionalStringDirect(NameToken.F,
pdfScanner,
out var fileSpecification))
{
fileSpecification = null;
}
result = new GoToEAction(destination, fileSpecification);
return true;
}
}
else if (actionType.Equals(NameToken.Uri))
{
if (!actionDictionary.TryGetOptionalStringDirect(NameToken.Uri, pdfScanner, out var uri))
{
uri = null;
}
result = new UriAction(uri);
return true;
}
return false;
}
}
}

View File

@ -0,0 +1,80 @@
namespace UglyToad.PdfPig.Actions;
/// <summary>
/// Action types (PDF reference 8.5.3)
/// </summary>
public enum ActionType
{
/// <summary>
/// Go to a destination in the current document.
/// </summary>
GoTo,
/// <summary>
/// (“Go-to remote”) Go to a destination in another document.
/// </summary>
GoToR,
/// <summary>
/// (“Go-to embedded”; PDF 1.6) Go to a destination in an embedded file.
/// </summary>
GoToE,
/// <summary>
/// Launch an application, usually to open a file.
/// </summary>
Launch,
/// <summary>
/// Begin reading an article thread.
/// </summary>
Thread,
/// <summary>
/// Resolve a uniform resource identifier.
/// </summary>
URI,
/// <summary>
/// (PDF 1.2) Play a sound.
/// </summary>
Sound,
/// <summary>
/// (PDF 1.2) Play a movie.
/// </summary>
Movie,
/// <summary>
/// (PDF 1.2) Set an annotations Hidden flag.
/// </summary>
Hide,
/// <summary>
/// (PDF 1.2) Execute an action predefined by the viewer application.
/// </summary>
Named,
/// <summary>
/// (PDF 1.2) Send data to a uniform resource locator.
/// </summary>
SubmitForm,
/// <summary>
/// (PDF 1.2) Set fields to their default values.
/// </summary>
ResetForm,
/// <summary>
/// (PDF 1.2) Import field values from a file.
/// </summary>
ImportData,
/// <summary>
/// (PDF 1.3) Execute a JavaScript script.
/// </summary>
JavaScript,
/// <summary>
/// (PDF 1.5) Set the states of optional content groups.
/// </summary>
SetOCGState,
/// <summary>
/// (PDF 1.5) Controls the playing of multimedia content.
/// </summary>
Rendition,
/// <summary>
/// (PDF 1.5) Updates the display of a document, using a transition dictionary.
/// </summary>
Trans,
/// <summary>
/// (PDF 1.6) Set the current view of a 3D annotation
/// </summary>
GoTo3DView
}

View File

@ -0,0 +1,17 @@
namespace UglyToad.PdfPig.Actions;
using Outline.Destinations;
/// <summary>
/// GoTo action (with a destination inside the current document)
/// </summary>
public class GoToAction : AbstractGoToAction
{
/// <summary>
/// Constructor
/// </summary>
/// <param name="destination"></param>
public GoToAction(ExplicitDestination destination) : base(ActionType.GoTo, destination)
{
}
}

View File

@ -0,0 +1,24 @@
namespace UglyToad.PdfPig.Actions;
using Outline.Destinations;
/// <summary>
/// GoToE action (to go to a destination inside a file embedded within the PDF)
/// </summary>
public class GoToEAction : AbstractGoToAction
{
/// <summary>
/// File specification of the embedded file
/// </summary>
public string FileSpecification { get; }
/// <summary>
/// Constructor
/// </summary>
/// <param name="destination">Destination within the embedded file</param>
/// <param name="fileSpecification">Specification of the embedded file</param>
public GoToEAction(ExplicitDestination destination, string fileSpecification) : base(ActionType.GoToE, destination)
{
FileSpecification = fileSpecification;
}
}

View File

@ -0,0 +1,24 @@
namespace UglyToad.PdfPig.Actions;
using Outline.Destinations;
/// <summary>
/// GoToR action, to go to a destination in a remote PDF
/// </summary>
public class GoToRAction : AbstractGoToAction
{
/// <summary>
/// Filename of the remote PDF
/// </summary>
public string Filename { get; }
/// <summary>
/// Constructor
/// </summary>
/// <param name="destination">Destination within the remote PDF</param>
/// <param name="filename">Filename of the remote PDF</param>
public GoToRAction(ExplicitDestination destination, string filename) : base(ActionType.GoToR, destination)
{
Filename = filename;
}
}

View File

@ -0,0 +1,22 @@
namespace UglyToad.PdfPig.Actions
{
/// <summary>
/// Actions (PDF reference 8.5)
/// </summary>
public class PdfAction
{
/// <summary>
/// Type of action
/// </summary>
public ActionType Type { get; }
/// <summary>
/// Constructor
/// </summary>
/// <param name="type"></param>
protected PdfAction(ActionType type)
{
Type = type;
}
}
}

View File

@ -0,0 +1,21 @@
namespace UglyToad.PdfPig.Actions;
/// <summary>
/// Action to open a URI
/// </summary>
public class UriAction : PdfAction
{
/// <summary>
/// URI to open
/// </summary>
public string Uri { get; }
/// <summary>
/// Constructor
/// </summary>
/// <param name="uri">URI to open</param>
public UriAction(string uri) : base(ActionType.URI)
{
Uri = uri;
}
}

View File

@ -3,6 +3,7 @@
using System;
using System.Collections.Generic;
using Core;
using Actions;
using Tokens;
using Util.JetBrains.Annotations;
@ -11,9 +12,10 @@
/// </summary>
public class Annotation
{
private readonly StreamToken normalAppearanceStream;
private readonly StreamToken rollOverAppearanceStream;
private readonly StreamToken downAppearanceStream;
internal readonly AppearanceStream normalAppearanceStream;
internal readonly AppearanceStream rollOverAppearanceStream;
internal readonly AppearanceStream downAppearanceStream;
internal readonly string appearanceState;
/// <summary>
/// The underlying PDF dictionary which this annotation was created from.
@ -66,6 +68,16 @@
/// </summary>
public IReadOnlyList<QuadPointsQuadrilateral> QuadPoints { get; }
/// <summary>
/// Action for this annotation, if any (can be null)
/// </summary>
public PdfAction Action { get; }
/// <summary>
/// Indicates if a normal appearance is present for this annotation
/// </summary>
public bool HasNormalAppearance => normalAppearanceStream != null;
/// <summary>
/// Indicates if a roll over appearance is present for this annotation (shown when you hover over this annotation)
/// </summary>
@ -79,9 +91,12 @@
/// <summary>
/// Create a new <see cref="Annotation"/>.
/// </summary>
public Annotation(DictionaryToken annotationDictionary, AnnotationType type, PdfRectangle rectangle, string content, string name, string modifiedDate,
public Annotation(DictionaryToken annotationDictionary, AnnotationType type, PdfRectangle rectangle,
string content, string name, string modifiedDate,
AnnotationFlags flags, AnnotationBorder border, IReadOnlyList<QuadPointsQuadrilateral> quadPoints,
StreamToken normalAppearanceStream, StreamToken rollOverAppearanceStream, StreamToken downAppearanceStream)
PdfAction action,
AppearanceStream normalAppearanceStream, AppearanceStream rollOverAppearanceStream,
AppearanceStream downAppearanceStream, string appearanceState)
{
AnnotationDictionary = annotationDictionary ?? throw new ArgumentNullException(nameof(annotationDictionary));
Type = type;
@ -92,9 +107,11 @@
Flags = flags;
Border = border;
QuadPoints = quadPoints ?? EmptyArray<QuadPointsQuadrilateral>.Instance;
Action = action;
this.normalAppearanceStream = normalAppearanceStream;
this.rollOverAppearanceStream = rollOverAppearanceStream;
this.downAppearanceStream = downAppearanceStream;
this.appearanceState = appearanceState;
}
/// <inheritdoc />

View File

@ -1,9 +1,13 @@
namespace UglyToad.PdfPig.Annotations
{
using Actions;
using System;
using System.Collections.Generic;
using System.Linq;
using Core;
using Logging;
using Outline;
using Outline.Destinations;
using Parser.Parts;
using Tokenization.Scanner;
using Tokens;
@ -13,14 +17,18 @@
{
private readonly IPdfTokenScanner tokenScanner;
private readonly DictionaryToken pageDictionary;
private readonly NamedDestinations namedDestinations;
private readonly ILog log;
private readonly TransformationMatrix matrix;
public AnnotationProvider(IPdfTokenScanner tokenScanner, DictionaryToken pageDictionary,
TransformationMatrix matrix)
TransformationMatrix matrix, NamedDestinations namedDestinations, ILog log)
{
this.matrix = matrix;
this.tokenScanner = tokenScanner ?? throw new ArgumentNullException(nameof(tokenScanner));
this.pageDictionary = pageDictionary ?? throw new ArgumentNullException(nameof(pageDictionary));
this.namedDestinations = namedDestinations;
this.log = log;
}
public IEnumerable<Annotation> GetAnnotations()
@ -38,10 +46,9 @@
}
var type = annotationDictionary.Get<NameToken>(NameToken.Subtype, tokenScanner);
var annotationType = type.ToAnnotationType();
var action = GetAction(annotationDictionary);
var rectangle = matrix.Transform(annotationDictionary.Get<ArrayToken>(NameToken.Rect, tokenScanner).ToRectangle(tokenScanner));
var contents = GetNamedString(NameToken.Contents, annotationDictionary);
var name = GetNamedString(NameToken.Nm, annotationDictionary);
// As indicated in PDF reference 8.4.1, the modified date can be anything, but is usually a date formatted according to sec. 3.8.3
@ -98,32 +105,67 @@
}
}
StreamToken normalAppearanceStream = null, downAppearanceStream = null, rollOverAppearanceStream = null;
AppearanceStream normalAppearanceStream = null;
AppearanceStream downAppearanceStream = null;
AppearanceStream rollOverAppearanceStream = null;
if (annotationDictionary.TryGet(NameToken.Ap, out DictionaryToken appearanceDictionary))
{
// The normal appearance of this annotation
if (appearanceDictionary.TryGet(NameToken.N, out IndirectReferenceToken normalAppearanceRef))
if (AppearanceStreamFactory.TryCreate(appearanceDictionary, NameToken.N, tokenScanner, out AppearanceStream stream))
{
normalAppearanceStream = tokenScanner.Get(normalAppearanceRef.Data)?.Data as StreamToken;
normalAppearanceStream = stream;
}
// If present, the 'roll over' appearance of this annotation (when hovering the mouse pointer over this annotation)
if (appearanceDictionary.TryGet(NameToken.R, out IndirectReferenceToken rollOverAppearanceRef))
if (AppearanceStreamFactory.TryCreate(appearanceDictionary, NameToken.R, tokenScanner, out stream))
{
rollOverAppearanceStream = tokenScanner.Get(rollOverAppearanceRef.Data)?.Data as StreamToken;
rollOverAppearanceStream = stream;
}
// If present, the 'down' appearance of this annotation (when you click on it)
if (appearanceDictionary.TryGet(NameToken.D, out IndirectReferenceToken downAppearanceRef))
if (AppearanceStreamFactory.TryCreate(appearanceDictionary, NameToken.D, tokenScanner, out stream))
{
downAppearanceStream = tokenScanner.Get(downAppearanceRef.Data)?.Data as StreamToken;
downAppearanceStream = stream;
}
}
string appearanceState = null;
if (annotationDictionary.TryGet(NameToken.As, out NameToken appearanceStateToken))
{
appearanceState = appearanceStateToken.Data;
}
yield return new Annotation(annotationDictionary, annotationType, rectangle,
contents, name, modifiedDate, flags, border, quadPointRectangles,
normalAppearanceStream, rollOverAppearanceStream, downAppearanceStream);
contents, name, modifiedDate, flags, border, quadPointRectangles, action,
normalAppearanceStream, rollOverAppearanceStream, downAppearanceStream, appearanceState);
}
}
private PdfAction GetAction(DictionaryToken annotationDictionary)
{
// If this annotation returns a direct destination, turn it into a GoTo action.
if (DestinationProvider.TryGetDestination(annotationDictionary,
NameToken.Dest,
namedDestinations,
tokenScanner,
log,
false,
out var destination))
{
return new GoToAction(destination);
}
// Try get action from the dictionary.
if (ActionProvider.TryGetAction(annotationDictionary, namedDestinations, tokenScanner, log, out var action))
{
return action;
}
// No action or destination found, return null
return null;
}
private string GetNamedString(NameToken name, DictionaryToken dictionary)
{
string content = null;

View File

@ -0,0 +1,66 @@
namespace UglyToad.PdfPig.Annotations;
using System;
using System.Collections.Generic;
using Tokens;
/// <summary>
/// Appearance stream (PDF Reference 8.4.4) that describes what an annotation looks like. Each stream is a Form XObject.
/// The appearance stream is either stateless (in which case <see cref="IsStateless"/> is true)
/// or stateful, in which case <see cref="IsStateless"/> is false and the states can be retrieved via <see cref="GetStates"/>.
/// The states can then be used to retrieve the state-specific appearances using <see cref="Get"/>.
/// </summary>
public class AppearanceStream
{
private readonly IDictionary<string, StreamToken> appearanceStreamsByState;
private readonly StreamToken statelessAppearanceStream;
/// <summary>
/// Indicates if this appearance stream is stateless, or whether you can get appearances by state.
/// </summary>
public bool IsStateless => statelessAppearanceStream != null;
/// <summary>
/// Get list of states. If this is a stateless appearance stream, an empty collection is returned.
/// </summary>
public ICollection<string> GetStates => appearanceStreamsByState != null ? appearanceStreamsByState.Keys : new string[0];
/// <summary>
/// Constructor for stateless appearance stream
/// </summary>
/// <param name="streamToken"></param>
internal AppearanceStream(StreamToken streamToken)
{
statelessAppearanceStream = streamToken;
}
/// <summary>
/// Constructor for stateful appearance stream
/// </summary>
/// <param name="appearanceStreamsByState"></param>
internal AppearanceStream(IDictionary<string, StreamToken> appearanceStreamsByState)
{
this.appearanceStreamsByState = appearanceStreamsByState;
}
/// <summary>
/// Get appearance stream for particular state
/// </summary>
/// <param name="state"></param>
/// <returns></returns>
/// <exception cref="Exception"></exception>
/// <exception cref="ArgumentOutOfRangeException"></exception>
public StreamToken Get(string state)
{
if (appearanceStreamsByState == null)
{
throw new Exception("Cannot get appearance by state when this is a stateless appearance stream");
}
if (!appearanceStreamsByState.ContainsKey(state))
{
throw new ArgumentOutOfRangeException(nameof(state), $"Appearance stream does not have state '{state}' (available states: {string.Join(",", appearanceStreamsByState.Keys)})");
}
return appearanceStreamsByState[state];
}
}

View File

@ -0,0 +1,42 @@
namespace UglyToad.PdfPig.Annotations;
using System.Collections.Generic;
using Tokenization.Scanner;
using Tokens;
internal static class AppearanceStreamFactory
{
public static bool TryCreate(DictionaryToken appearanceDictionary, NameToken name, IPdfTokenScanner tokenScanner, out AppearanceStream appearanceStream)
{
if (appearanceDictionary.TryGet(name, out IndirectReferenceToken appearanceReference))
{
var streamToken = tokenScanner.Get(appearanceReference.Data)?.Data as StreamToken;
appearanceStream = new AppearanceStream(streamToken);
return true;
}
if (appearanceDictionary.TryGet(name, out DictionaryToken stateDictionary))
{
var dict = new Dictionary<string, StreamToken>();
foreach (var state in stateDictionary.Data.Keys)
{
if (stateDictionary.Data.TryGetValue(state, out var stateRef) &&
stateRef is IndirectReferenceToken appearanceRef)
{
var streamToken = tokenScanner.Get(appearanceRef.Data)?.Data as StreamToken;
dict[state] = streamToken;
}
}
if (dict.Count > 0)
{
appearanceStream = new AppearanceStream(dict);
return true;
}
}
appearanceStream = null;
return false;
}
}

View File

@ -1,8 +1,7 @@
namespace UglyToad.PdfPig.Content
{
using System;
using System.Collections.Generic;
using Core;
using Outline;
using Tokens;
using Util.JetBrains.Annotations;
@ -12,90 +11,24 @@
/// </summary>
public class Catalog
{
private readonly IReadOnlyDictionary<int, PageTreeNode> pagesByNumber;
/// <summary>
/// The catalog dictionary containing assorted information.
/// </summary>
[NotNull]
public DictionaryToken CatalogDictionary { get; }
/// <summary>
/// Defines the page tree node which is the root of the pages tree for the document.
/// </summary>
[NotNull]
public DictionaryToken PagesDictionary { get; }
/// <summary>
/// The page tree for this document containing all pages, page numbers and their dictionaries.
/// </summary>
public PageTreeNode PageTree { get; }
internal NamedDestinations NamedDestinations { get; }
/// <summary>
/// Number of discovered pages.
/// </summary>
public int? NumberOfDiscoveredPages => pagesByNumber?.Count;
internal Pages Pages { get; }
/// <summary>
/// Create a new <see cref="CatalogDictionary"/>.
/// </summary>
internal Catalog(DictionaryToken catalogDictionary, DictionaryToken pagesDictionary,
PageTreeNode pageTree)
internal Catalog(DictionaryToken catalogDictionary, Pages pages, NamedDestinations namedDestinations)
{
CatalogDictionary = catalogDictionary ?? throw new ArgumentNullException(nameof(catalogDictionary));
PagesDictionary = pagesDictionary ?? throw new ArgumentNullException(nameof(pagesDictionary));
PageTree = pageTree ?? throw new ArgumentNullException(nameof(pageTree));
if (!pageTree.IsRoot)
{
throw new ArgumentException("Page tree must be the root page tree node.", nameof(pageTree));
}
var byNumber = new Dictionary<int, PageTreeNode>();
PopulatePageByNumberDictionary(pageTree, byNumber);
pagesByNumber = byNumber;
}
private static void PopulatePageByNumberDictionary(PageTreeNode node, Dictionary<int, PageTreeNode> result)
{
if (node.IsPage)
{
if (!node.PageNumber.HasValue)
{
throw new InvalidOperationException($"Node was page but did not have page number: {node}.");
}
result[node.PageNumber.Value] = node;
return;
}
foreach (var child in node.Children)
{
PopulatePageByNumberDictionary(child, result);
}
}
internal PageTreeNode GetPageNode(int pageNumber)
{
if (!pagesByNumber.TryGetValue(pageNumber, out var node))
{
throw new InvalidOperationException($"Could not find page node by number for: {pageNumber}.");
}
return node;
}
internal PageTreeNode GetPageByReference(IndirectReference reference)
{
foreach (var page in pagesByNumber)
{
if (page.Value.Reference.Equals(reference))
{
return page.Value;
}
}
return null;
Pages = pages ?? throw new ArgumentNullException(nameof(pages));
NamedDestinations = namedDestinations;
}
}
}

View File

@ -1,13 +1,14 @@
namespace UglyToad.PdfPig.Content
{
using Outline;
using Tokens;
internal interface IPageFactory
{
Page Create(
int number,
Page Create(int number,
DictionaryToken dictionary,
PageTreeMembers pageTreeMembers,
NamedDestinations annotationProvider,
InternalParsingOptions parsingOptions);
}
}

View File

@ -1,5 +1,8 @@
namespace UglyToad.PdfPig.Content
{
using Annotations;
using Core;
using Outline;
using System;
using System.Collections.Generic;
using Tokenization.Scanner;
@ -8,29 +11,25 @@
internal class Pages
{
private readonly Catalog catalog;
private readonly IPageFactory pageFactory;
private readonly IPdfTokenScanner pdfScanner;
private readonly Dictionary<int, PageTreeNode> pagesByNumber;
public int Count => pagesByNumber.Count;
/// <summary>
/// The page tree for this document containing all pages, page numbers and their dictionaries.
/// </summary>
public PageTreeNode PageTree { get; }
public int Count { get; }
internal Pages(Catalog catalog, IPageFactory pageFactory, IPdfTokenScanner pdfScanner)
internal Pages(IPageFactory pageFactory, IPdfTokenScanner pdfScanner, PageTreeNode pageTree, Dictionary<int, PageTreeNode> pagesByNumber)
{
this.catalog = catalog ?? throw new ArgumentNullException(nameof(catalog));
this.pageFactory = pageFactory ?? throw new ArgumentNullException(nameof(pageFactory));
this.pdfScanner = pdfScanner ?? throw new ArgumentNullException(nameof(pdfScanner));
Count = catalog.PagesDictionary.GetIntOrDefault(NameToken.Count);
var CountOfPagesByPagesTree = catalog.PageTree.Children.Count;
var numberOfDiscoveredPages = catalog.NumberOfDiscoveredPages;
if (numberOfDiscoveredPages is null == false && Count != numberOfDiscoveredPages)
{
//log.Warning($"Dictionary Page Count {Count} different to discovered pages {numberOfDiscoveredPages}. Using {numberOfDiscoveredPages}.");
Count = numberOfDiscoveredPages.Value;
}
this.pagesByNumber = pagesByNumber;
PageTree = pageTree;
}
public Page GetPage(int pageNumber, InternalParsingOptions parsingOptions)
internal Page GetPage(int pageNumber, NamedDestinations namedDestinations, InternalParsingOptions parsingOptions)
{
if (pageNumber <= 0 || pageNumber > Count)
{
@ -40,7 +39,7 @@
$"Page number {pageNumber} invalid, must be between 1 and {Count}.");
}
var pageNode = catalog.GetPageNode(pageNumber);
var pageNode = GetPageNode(pageNumber);
var pageStack = new Stack<PageTreeNode>();
var currentNode = pageNode;
@ -71,14 +70,37 @@
pageTreeMembers.Rotation = rotateToken.Int;
}
}
var page = pageFactory.Create(
pageNumber,
pageNode.NodeDictionary,
pageTreeMembers,
namedDestinations,
parsingOptions);
return page;
}
internal PageTreeNode GetPageNode(int pageNumber)
{
if (!pagesByNumber.TryGetValue(pageNumber, out var node))
{
throw new InvalidOperationException($"Could not find page node by number for: {pageNumber}.");
}
return node;
}
internal PageTreeNode GetPageByReference(IndirectReference reference)
{
foreach (var page in pagesByNumber)
{
if (page.Value.Reference.Equals(reference))
{
return page.Value;
}
}
return null;
}
}
}

View File

@ -0,0 +1,241 @@
namespace UglyToad.PdfPig.Content;
using Core;
using Logging;
using Parser.Parts;
using System;
using System.Collections.Generic;
using System.Linq;
using Tokenization.Scanner;
using Tokens;
using Util;
internal class PagesFactory
{
private class PageCounter
{
public int PageCount { get; private set; }
public void Increment()
{
PageCount++;
}
}
public static Pages Create(IndirectReference pagesReference, DictionaryToken pagesDictionary, IPdfTokenScanner scanner, IPageFactory pageFactory, ILog log, bool isLenientParsing)
{
var pageNumber = new PageCounter();
var pageTree = ProcessPagesNode(pagesReference, pagesDictionary, new IndirectReference(1, 0), true,
scanner, isLenientParsing, pageNumber);
if (!pageTree.IsRoot)
{
throw new ArgumentException("Page tree must be the root page tree node.", nameof(pageTree));
}
var pagesByNumber = new Dictionary<int, PageTreeNode>();
PopulatePageByNumberDictionary(pageTree, pagesByNumber);
var dictionaryPageCount = pagesDictionary.GetIntOrDefault(NameToken.Count);
if (dictionaryPageCount != pagesByNumber.Count)
{
log.Warn($"Dictionary Page Count {dictionaryPageCount} different to discovered pages {pagesByNumber.Count}. Using {pagesByNumber.Count}.");
}
return new Pages(pageFactory, scanner, pageTree, pagesByNumber);
}
private static PageTreeNode ProcessPagesNode(IndirectReference referenceInput,
DictionaryToken nodeDictionaryInput,
IndirectReference parentReferenceInput,
bool isRoot,
IPdfTokenScanner pdfTokenScanner,
bool isLenientParsing,
PageCounter pageNumber)
{
bool isPage = CheckIfIsPage(nodeDictionaryInput, parentReferenceInput, isRoot, pdfTokenScanner, isLenientParsing);
if (isPage)
{
pageNumber.Increment();
return new PageTreeNode(nodeDictionaryInput, referenceInput, true, pageNumber.PageCount).WithChildren(EmptyArray<PageTreeNode>.Instance);
}
//If we got here, we have to iterate till we manage to exit
// Attempt to detect (and break) any infinite loop (IL) by recording the ids of the last 1000 (by default) tokens processed.
const int InfiniteLoopWorkingWindow = 1000;
var visitedTokens = new Dictionary<long, HashSet<int>>(); // Quick lookup containing ids (object number, generation) of tokens already processed (trimmed as we go to last 1000 (by default))
var visitedTokensWorkingWindow = new Queue<(long ObjectNumber, int Generation)>(InfiniteLoopWorkingWindow);
var toProcess =
new Queue<(PageTreeNode thisPage, IndirectReference reference, DictionaryToken nodeDictionary, IndirectReference parentReference,
List<PageTreeNode> nodeChildren)>();
var firstPage = new PageTreeNode(nodeDictionaryInput, referenceInput, false, null);
var setChildren = new List<Action>();
var firstPageChildren = new List<PageTreeNode>();
setChildren.Add(() => firstPage.WithChildren(firstPageChildren));
toProcess.Enqueue(
(thisPage: firstPage, reference: referenceInput, nodeDictionary: nodeDictionaryInput, parentReference: parentReferenceInput,
nodeChildren: firstPageChildren));
do
{
var current = toProcess.Dequeue();
#region Break any potential infinite loop
// Remember the last 1000 (by default) tokens and if we attempt to process again break out of loop
var currentReferenceObjectNumber = current.reference.ObjectNumber;
var currentReferenceGeneration = current.reference.Generation;
if (visitedTokens.ContainsKey(currentReferenceObjectNumber))
{
var generations = visitedTokens[currentReferenceObjectNumber];
if (generations.Contains(currentReferenceGeneration))
{
var listOfLastVisitedToken = visitedTokensWorkingWindow.ToList();
var indexOfCurrentTokenInListOfLastVisitedToken = listOfLastVisitedToken.IndexOf((currentReferenceObjectNumber, currentReferenceGeneration));
var howManyTokensBack = Math.Abs(indexOfCurrentTokenInListOfLastVisitedToken - listOfLastVisitedToken.Count); //eg initate loop is taking us back to last token or five token back
System.Diagnostics.Debug.WriteLine($"Break infinite loop while processing page {pageNumber.PageCount+1} tokens. Token with object number {currentReferenceObjectNumber} and generation {currentReferenceGeneration} processed {howManyTokensBack} token(s) back. ");
continue; // don't reprocess token already processed. break infinite loop. Issue #519
}
else
{
generations.Add(currentReferenceGeneration);
visitedTokens[currentReferenceObjectNumber] = generations;
}
}
else
{
visitedTokens.Add(currentReferenceObjectNumber, new HashSet<int>() { currentReferenceGeneration });
visitedTokensWorkingWindow.Enqueue((currentReferenceObjectNumber, currentReferenceGeneration));
if (visitedTokensWorkingWindow.Count >= InfiniteLoopWorkingWindow)
{
var toBeRemovedFromWorkingHashset = visitedTokensWorkingWindow.Dequeue();
var toBeRemovedObjectNumber = toBeRemovedFromWorkingHashset.ObjectNumber;
var toBeRemovedGeneration = toBeRemovedFromWorkingHashset.Generation;
var generations = visitedTokens[toBeRemovedObjectNumber];
generations.Remove(toBeRemovedGeneration);
if (generations.Count == 0)
{
visitedTokens.Remove(toBeRemovedObjectNumber);
}
else
{
visitedTokens[toBeRemovedObjectNumber] = generations;
}
}
}
#endregion
if (!current.nodeDictionary.TryGet(NameToken.Kids, pdfTokenScanner, out ArrayToken kids))
{
if (!isLenientParsing)
{
throw new PdfDocumentFormatException($"Pages node in the document pages tree did not define a kids array: {current.nodeDictionary}.");
}
kids = new ArrayToken(EmptyArray<IToken>.Instance);
}
foreach (var kid in kids.Data)
{
if (!(kid is IndirectReferenceToken kidRef))
{
throw new PdfDocumentFormatException($"Kids array contained invalid entry (must be indirect reference): {kid}.");
}
if (!DirectObjectFinder.TryGet(kidRef, pdfTokenScanner, out DictionaryToken kidDictionaryToken))
{
throw new PdfDocumentFormatException($"Could not find dictionary associated with reference in pages kids array: {kidRef}.");
}
bool isChildPage = CheckIfIsPage(kidDictionaryToken, current.reference, false, pdfTokenScanner, isLenientParsing);
if (isChildPage)
{
var kidPageNode =
new PageTreeNode(kidDictionaryToken, kidRef.Data, true, pageNumber.PageCount).WithChildren(EmptyArray<PageTreeNode>.Instance);
current.nodeChildren.Add(kidPageNode);
}
else
{
var kidChildNode = new PageTreeNode(kidDictionaryToken, kidRef.Data, false, null);
var kidChildren = new List<PageTreeNode>();
toProcess.Enqueue(
(thisPage: kidChildNode, reference: kidRef.Data, nodeDictionary: kidDictionaryToken, parentReference: current.reference,
nodeChildren: kidChildren));
setChildren.Add(() => kidChildNode.WithChildren(kidChildren));
current.nodeChildren.Add(kidChildNode);
}
}
} while (toProcess.Count > 0);
foreach (var action in setChildren)
{
action();
}
foreach (var child in firstPage.Children.ToRecursiveOrderList(x=>x.Children).Where(child => child.IsPage))
{
pageNumber.Increment();
child.PageNumber = pageNumber.PageCount;
}
return firstPage;
}
private static bool CheckIfIsPage(DictionaryToken nodeDictionary, IndirectReference parentReference, bool isRoot, IPdfTokenScanner pdfTokenScanner, bool isLenientParsing)
{
var isPage = false;
if (!nodeDictionary.TryGet(NameToken.Type, pdfTokenScanner, out NameToken type))
{
if (!isLenientParsing) { throw new PdfDocumentFormatException($"Node in the document pages tree did not define a type: {nodeDictionary}."); }
if (!nodeDictionary.TryGet(NameToken.Kids, pdfTokenScanner, out ArrayToken _)) { isPage = true; }
}
else
{
isPage = type.Equals(NameToken.Page);
if (!isPage && !type.Equals(NameToken.Pages) && !isLenientParsing) { throw new PdfDocumentFormatException($"Node in the document pages tree defined invalid type: {nodeDictionary}."); }
}
if (!isLenientParsing && !isRoot)
{
if (!nodeDictionary.TryGet(NameToken.Parent, pdfTokenScanner, out IndirectReferenceToken parentReferenceToken)) { throw new PdfDocumentFormatException($"Could not find parent indirect reference token on pages tree node: {nodeDictionary}."); }
if (!parentReferenceToken.Data.Equals(parentReference)) { throw new PdfDocumentFormatException($"Pages tree node parent reference {parentReferenceToken.Data} did not match actual parent {parentReference}."); }
}
return isPage;
}
private static void PopulatePageByNumberDictionary(PageTreeNode node, Dictionary<int, PageTreeNode> result)
{
if (node.IsPage)
{
if (!node.PageNumber.HasValue)
{
throw new InvalidOperationException($"Node was page but did not have page number: {node}.");
}
result[node.PageNumber.Value] = node;
return;
}
foreach (var child in node.Children)
{
PopulatePageByNumberDictionary(child, result);
}
}
}

View File

@ -1,5 +1,6 @@
namespace UglyToad.PdfPig.Outline
{
using Destinations;
using System;
using System.Collections.Generic;

View File

@ -1,5 +1,6 @@
namespace UglyToad.PdfPig.Outline
{
using Actions;
using Content;
using Destinations;
using Logging;
@ -41,14 +42,12 @@
return null;
}
var namedDestinations = ReadNamedDestinations(catalog, pdfScanner, log);
var roots = new List<BookmarkNode>();
var seen = new HashSet<IndirectReference>();
while (next != null)
{
ReadBookmarksRecursively(next, 0, false, seen, namedDestinations, catalog, roots);
ReadBookmarksRecursively(next, 0, false, seen, catalog.NamedDestinations, roots);
if (!next.TryGet(NameToken.Next, out IndirectReferenceToken nextReference)
|| !seen.Add(nextReference.Data))
@ -66,8 +65,7 @@
/// Extract bookmarks recursively.
/// </summary>
private void ReadBookmarksRecursively(DictionaryToken nodeDictionary, int level, bool readSiblings, HashSet<IndirectReference> seen,
IReadOnlyDictionary<string, ExplicitDestination> namedDestinations,
Catalog catalog,
NamedDestinations namedDestinations,
List<BookmarkNode> list)
{
// 12.3 Document-Level Navigation
@ -82,37 +80,37 @@
var children = new List<BookmarkNode>();
if (nodeDictionary.TryGet(NameToken.First, pdfScanner, out DictionaryToken firstChild))
{
ReadBookmarksRecursively(firstChild, level + 1, true, seen, namedDestinations, catalog, children);
ReadBookmarksRecursively(firstChild, level + 1, true, seen, namedDestinations, children);
}
BookmarkNode bookmark;
if (nodeDictionary.TryGet(NameToken.Dest, pdfScanner, out ArrayToken destArray)
&& TryGetExplicitDestination(destArray, catalog, log, out var destination))
if (DestinationProvider.TryGetDestination(nodeDictionary, NameToken.Dest, namedDestinations, pdfScanner, log, false, out var destination))
{
bookmark = new DocumentBookmarkNode(title, level, destination, children);
}
else if (nodeDictionary.TryGet(NameToken.Dest, pdfScanner, out IDataToken<string> destStringToken))
else if (ActionProvider.TryGetAction(nodeDictionary, namedDestinations, pdfScanner, log, out var actionResult))
{
// 12.3.2.3 Named Destinations
if (namedDestinations.TryGetValue(destStringToken.Data, out destination))
if (actionResult is GoToRAction goToRAction)
{
bookmark = new DocumentBookmarkNode(title, level, destination, children);
bookmark = new ExternalBookmarkNode(title, level, goToRAction.Destination, children, goToRAction.Filename);
}
else if (actionResult is GoToAction goToAction)
{
bookmark = new DocumentBookmarkNode(title, level, goToAction.Destination, children);
}
else if (actionResult is UriAction uriAction)
{
bookmark = new UriBookmarkNode(title, level, uriAction.Uri, children);
}
else
{
return;
}
}
else if (nodeDictionary.TryGet(NameToken.A, pdfScanner, out DictionaryToken actionDictionary)
&& TryGetAction(actionDictionary, catalog, pdfScanner, namedDestinations, log, title, level, children, out var actionResult))
{
bookmark = actionResult;
}
else
{
log.Error($"No /Dest(ination) or /A(ction) entry found for bookmark node: {nodeDictionary}.");
return;
}
@ -140,267 +138,8 @@
break;
}
ReadBookmarksRecursively(current, level, false, seen, namedDestinations, catalog, list);
ReadBookmarksRecursively(current, level, false, seen, namedDestinations, list);
}
}
#region Named Destinations
private static IReadOnlyDictionary<string, ExplicitDestination> ReadNamedDestinations(Catalog catalog, IPdfTokenScanner pdfScanner,
ILog log)
{
var result = new Dictionary<string, ExplicitDestination>();
if (catalog.CatalogDictionary.TryGet(NameToken.Dests, pdfScanner, out DictionaryToken dests))
{
/*
* In PDF 1.1, the correspondence between name objects and destinations is defined by the /Dests entry in the document catalog.
* The value of this entry is a dictionary in which each key is a destination name and the corresponding value is either an array
* defining the destination, using the explicit destination syntax, or a dictionary with a /D entry whose value is such an array.
*/
foreach (var kvp in dests.Data)
{
var value = kvp.Value;
if (TryReadExplicitDestination(value, catalog, pdfScanner, log, out var destination))
{
result[kvp.Key] = destination;
}
}
}
else if (catalog.CatalogDictionary.TryGet(NameToken.Names, pdfScanner, out DictionaryToken names)
&& names.TryGet(NameToken.Dests, pdfScanner, out dests))
{
/*
* In PDF 1.2, the correspondence between strings and destinations is defined by the /Dests entry in the document's name dictionary.
* The value of the /Dests entry is a name tree mapping name strings to destinations.
* The keys in the name tree may be treated as text strings for display purposes.
* The destination value associated with a key in the name tree may be either an array or a dictionary.
*/
NameTreeParser.FlattenNameTree(dests, pdfScanner, value =>
{
if (TryReadExplicitDestination(value, catalog, pdfScanner, log, out var destination))
{
return destination;
}
return null;
}, result);
}
return result;
}
private static bool TryReadExplicitDestination(IToken value, Catalog catalog, IPdfTokenScanner pdfScanner,
ILog log, out ExplicitDestination destination)
{
destination = null;
if (DirectObjectFinder.TryGet(value, pdfScanner, out ArrayToken valueArray)
&& TryGetExplicitDestination(valueArray, catalog, log, out destination))
{
return true;
}
if (DirectObjectFinder.TryGet(value, pdfScanner, out DictionaryToken valueDictionary)
&& valueDictionary.TryGet(NameToken.D, pdfScanner, out valueArray)
&& TryGetExplicitDestination(valueArray, catalog, log, out destination))
{
return true;
}
return false;
}
private static bool TryGetExplicitDestination(ArrayToken explicitDestinationArray, Catalog catalog,
ILog log,
out ExplicitDestination destination)
{
destination = null;
if (explicitDestinationArray == null || explicitDestinationArray.Length == 0)
{
return false;
}
int pageNumber;
var pageToken = explicitDestinationArray[0];
if (pageToken is IndirectReferenceToken pageIndirectReferenceToken)
{
var page = catalog.GetPageByReference(pageIndirectReferenceToken.Data);
if (page?.PageNumber == null)
{
return false;
}
pageNumber = page.PageNumber.Value;
}
else if (pageToken is NumericToken pageNumericToken)
{
pageNumber = pageNumericToken.Int + 1;
}
else
{
var errorMessage = $"{nameof(TryGetExplicitDestination)} No page number given in 'Dest': '{explicitDestinationArray}'.";
log.Error(errorMessage);
return false;
}
var destTypeToken = explicitDestinationArray[1] as NameToken;
if (destTypeToken == null)
{
var errorMessage = $"Missing name token as second argument to explicit destination: {explicitDestinationArray}.";
log.Error(errorMessage);
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitPage, ExplicitDestinationCoordinates.Empty);
return true;
}
if (destTypeToken.Equals(NameToken.XYZ))
{
// [page /XYZ left top zoom]
var left = explicitDestinationArray[2] as NumericToken;
var top = explicitDestinationArray[3] as NumericToken;
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.XyzCoordinates,
new ExplicitDestinationCoordinates(left?.Data, top?.Data));
return true;
}
if (destTypeToken.Equals(NameToken.Fit))
{
// [page /Fit]
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitPage,
ExplicitDestinationCoordinates.Empty);
return true;
}
if (destTypeToken.Equals(NameToken.FitH))
{
// [page /FitH top]
var top = explicitDestinationArray[2] as NumericToken;
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitHorizontally,
new ExplicitDestinationCoordinates(null, top?.Data));
return true;
}
if (destTypeToken.Equals(NameToken.FitV))
{
// [page /FitV left]
var left = explicitDestinationArray[2] as NumericToken;
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitVertically,
new ExplicitDestinationCoordinates(left?.Data));
return true;
}
if (destTypeToken.Equals(NameToken.FitR))
{
// [page /FitR left bottom right top]
var left = explicitDestinationArray[2] as NumericToken;
var bottom = explicitDestinationArray[3] as NumericToken;
var right = explicitDestinationArray[4] as NumericToken;
var top = explicitDestinationArray[5] as NumericToken;
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitRectangle,
new ExplicitDestinationCoordinates(left?.Data, top?.Data, right?.Data, bottom?.Data));
return true;
}
if (destTypeToken.Equals(NameToken.FitB))
{
// [page /FitB]
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitBoundingBox,
ExplicitDestinationCoordinates.Empty);
return true;
}
if (destTypeToken.Equals(NameToken.FitBH))
{
// [page /FitBH top]
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitBoundingBoxHorizontally,
new ExplicitDestinationCoordinates(null, (explicitDestinationArray[2] as NumericToken)?.Data));
return true;
}
if (destTypeToken.Equals(NameToken.FitBV))
{
// [page /FitBV left]
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitBoundingBoxVertically,
new ExplicitDestinationCoordinates((explicitDestinationArray[2] as NumericToken)?.Data));
return true;
}
return false;
}
#endregion
private static bool TryGetAction(DictionaryToken actionDictionary, Catalog catalog, IPdfTokenScanner pdfScanner,
IReadOnlyDictionary<string, ExplicitDestination> namedDestinations,
ILog log, string title, int level, List<BookmarkNode> children, out BookmarkNode result)
{
result = null;
if (!actionDictionary.TryGet(NameToken.S, pdfScanner, out NameToken actionType))
{
throw new PdfDocumentFormatException($"No action type (/S) specified for action: {actionDictionary}.");
}
if (actionType.Equals(NameToken.GoTo))
{
if (actionDictionary.TryGet(NameToken.D, pdfScanner, out ArrayToken destinationArray)
&& TryGetExplicitDestination(destinationArray, catalog, log, out var destination))
{
result = new DocumentBookmarkNode(title, level, destination, children);
return true;
}
if (actionDictionary.TryGet(NameToken.D, pdfScanner, out IDataToken<string> destinationName)
&& namedDestinations.TryGetValue(destinationName.Data, out destination))
{
result = new DocumentBookmarkNode(title, level, destination, children);
return true;
}
}
else if (actionType.Equals(NameToken.GoToR))
{
if (actionDictionary.TryGetOptionalStringDirect(NameToken.F, pdfScanner, out var filename))
{
result = new ExternalBookmarkNode(title, level, filename, children);
return true;
}
result = new ExternalBookmarkNode(title, level, string.Empty, children);
return true;
}
else if (actionType.Equals(NameToken.Uri))
{
if (actionDictionary.TryGetOptionalStringDirect(NameToken.Uri, pdfScanner, out var uri))
{
result = new UriBookmarkNode(title, level, uri, children);
return true;
}
result = new UriBookmarkNode(title, level, string.Empty, children);
return true;
}
return false;
}
}
}

View File

@ -0,0 +1,36 @@
namespace UglyToad.PdfPig.Outline.Destinations
{
using Logging;
using Tokenization.Scanner;
using Tokens;
internal static class DestinationProvider
{
/// <summary>
/// Get explicit destination or a named destination (Ref 12.3.2.3) from dictionary
/// </summary>
/// <param name="dictionary"></param>
/// <param name="destinationToken">Token name, can be D or Dest</param>
/// <param name="namedDestinations"></param>
/// <param name="pdfScanner"></param>
/// <param name="log"></param>
/// <param name="isRemoteDestination">in case we are looking up a destination for a GoToR (Go To Remote) action: pass in true
/// to enforce a check for indirect page references (which is not allowed for GoToR)</param>
/// <param name="destination"></param>
/// <returns></returns>
internal static bool TryGetDestination(DictionaryToken dictionary, NameToken destinationToken, NamedDestinations namedDestinations, IPdfTokenScanner pdfScanner, ILog log, bool isRemoteDestination, out ExplicitDestination destination)
{
if (dictionary.TryGet(destinationToken, pdfScanner, out ArrayToken destArray))
{
return namedDestinations.TryGetExplicitDestination(destArray, log, isRemoteDestination, out destination);
}
if (dictionary.TryGet(destinationToken, pdfScanner, out IDataToken<string> destStringToken))
{
return namedDestinations.TryGet(destStringToken.Data, out destination);
}
destination = null;
return false;
}
}
}

View File

@ -6,7 +6,7 @@
public class ExplicitDestination
{
/// <summary>
/// The page number of the destination.
/// The page number (1-based) of the destination.
/// </summary>
public int PageNumber { get; }

View File

@ -0,0 +1,44 @@
namespace UglyToad.PdfPig.Outline;
using Content;
using Destinations;
using Logging;
using System.Collections.Generic;
using Tokens;
/// <summary>
/// Named destinations in a PDF document
/// </summary>
internal class NamedDestinations
{
/// <summary>
/// Dictionary containing explicit destinations, keyed by name
/// </summary>
private readonly IReadOnlyDictionary<string, ExplicitDestination> namedDestinations;
/// <summary>
/// Pages are required for getting explicit destinations
/// </summary>
private readonly Pages pages;
/// <summary>
/// Constructor
/// </summary>
/// <param name="namedDestinations"></param>
/// <param name="pages"></param>
internal NamedDestinations(IReadOnlyDictionary<string, ExplicitDestination> namedDestinations, Pages pages)
{
this.namedDestinations = namedDestinations;
this.pages = pages;
}
internal bool TryGet(string name, out ExplicitDestination destination)
{
return namedDestinations.TryGetValue(name, out destination);
}
internal bool TryGetExplicitDestination(ArrayToken explicitDestinationArray, ILog log, bool isRemoteDestination, out ExplicitDestination destination)
{
return NamedDestinationsProvider.TryGetExplicitDestination(explicitDestinationArray, pages, log, isRemoteDestination, out destination);
}
}

View File

@ -0,0 +1,220 @@
namespace UglyToad.PdfPig.Outline;
using Content;
using Destinations;
using Logging;
using Parser.Parts;
using System.Collections.Generic;
using Tokenization.Scanner;
using Tokens;
internal static class NamedDestinationsProvider
{
internal static NamedDestinations Read(DictionaryToken catalogDictionary, IPdfTokenScanner pdfScanner, Pages pages, ILog log)
{
var destinationsByName = new Dictionary<string, ExplicitDestination>();
if (catalogDictionary.TryGet(NameToken.Dests, pdfScanner, out DictionaryToken destinations))
{
/*
* In PDF 1.1, the correspondence between name objects and destinations is defined by the /Dests entry in the document catalog.
* The value of this entry is a dictionary in which each key is a destination name and the corresponding value is either an array
* defining the destination, using the explicit destination syntax, or a dictionary with a /D entry whose value is such an array.
*/
foreach (var kvp in destinations.Data)
{
var value = kvp.Value;
if (TryReadExplicitDestination(value, pdfScanner, pages, log, false, out var destination))
{
destinationsByName[kvp.Key] = destination;
}
}
}
else if (catalogDictionary.TryGet(NameToken.Names, pdfScanner, out DictionaryToken names)
&& names.TryGet(NameToken.Dests, pdfScanner, out destinations))
{
/*
* In PDF 1.2, the correspondence between strings and destinations is defined by the /Dests entry in the document's name dictionary.
* The value of the /Dests entry is a name tree mapping name strings to destinations.
* The keys in the name tree may be treated as text strings for display purposes.
* The destination value associated with a key in the name tree may be either an array or a dictionary.
*/
NameTreeParser.FlattenNameTree(destinations, pdfScanner, value =>
{
if (TryReadExplicitDestination(value, pdfScanner, pages, log, false, out var destination))
{
return destination;
}
return null;
}, destinationsByName);
}
return new NamedDestinations(destinationsByName, pages);
}
private static bool TryReadExplicitDestination(IToken value, IPdfTokenScanner pdfScanner, Pages pages, ILog log, bool isRemoteDestination, out ExplicitDestination destination)
{
destination = null;
if (DirectObjectFinder.TryGet(value, pdfScanner, out ArrayToken valueArray)
&& TryGetExplicitDestination(valueArray, pages, log, isRemoteDestination, out destination))
{
return true;
}
if (DirectObjectFinder.TryGet(value, pdfScanner, out DictionaryToken valueDictionary)
&& valueDictionary.TryGet(NameToken.D, pdfScanner, out valueArray)
&& TryGetExplicitDestination(valueArray, pages, log, isRemoteDestination, out destination))
{
return true;
}
return false;
}
internal static bool TryGetExplicitDestination(ArrayToken explicitDestinationArray, Pages pages, ILog log, bool isRemoteDestination, out ExplicitDestination destination)
{
destination = null;
if (explicitDestinationArray == null || explicitDestinationArray.Length == 0)
{
return false;
}
int pageNumber;
var pageToken = explicitDestinationArray[0];
if (pageToken is IndirectReferenceToken pageIndirectReferenceToken)
{
if (isRemoteDestination)
{
// Table 8.50 Remote Go-To Actions
var errorMessage = $"{nameof(TryGetExplicitDestination)} Cannot use indirect reference for remote destination.";
log?.Error(errorMessage);
return false;
}
var page = pages.GetPageByReference(pageIndirectReferenceToken.Data);
if (page?.PageNumber == null)
{
return false;
}
pageNumber = page.PageNumber.Value;
}
else if (pageToken is NumericToken pageNumericToken)
{
pageNumber = pageNumericToken.Int + 1;
}
else
{
var errorMessage = $"{nameof(TryGetExplicitDestination)} No page number given in 'Dest': '{explicitDestinationArray}'.";
log?.Error(errorMessage);
return false;
}
NameToken destTypeToken = null;
if (explicitDestinationArray.Length > 1)
{
destTypeToken = explicitDestinationArray[1] as NameToken;
}
if (destTypeToken == null)
{
var errorMessage = $"Missing name token as second argument to explicit destination: {explicitDestinationArray}.";
log?.Error(errorMessage);
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitPage, ExplicitDestinationCoordinates.Empty);
return true;
}
if (destTypeToken.Equals(NameToken.XYZ))
{
// [page /XYZ left top zoom]
var left = explicitDestinationArray[2] as NumericToken;
var top = explicitDestinationArray[3] as NumericToken;
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.XyzCoordinates,
new ExplicitDestinationCoordinates(left?.Data, top?.Data));
return true;
}
if (destTypeToken.Equals(NameToken.Fit))
{
// [page /Fit]
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitPage,
ExplicitDestinationCoordinates.Empty);
return true;
}
if (destTypeToken.Equals(NameToken.FitH))
{
// [page /FitH top]
var top = explicitDestinationArray[2] as NumericToken;
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitHorizontally,
new ExplicitDestinationCoordinates(null, top?.Data));
return true;
}
if (destTypeToken.Equals(NameToken.FitV))
{
// [page /FitV left]
var left = explicitDestinationArray[2] as NumericToken;
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitVertically,
new ExplicitDestinationCoordinates(left?.Data));
return true;
}
if (destTypeToken.Equals(NameToken.FitR))
{
// [page /FitR left bottom right top]
var left = explicitDestinationArray[2] as NumericToken;
var bottom = explicitDestinationArray[3] as NumericToken;
var right = explicitDestinationArray[4] as NumericToken;
var top = explicitDestinationArray[5] as NumericToken;
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitRectangle,
new ExplicitDestinationCoordinates(left?.Data, top?.Data, right?.Data, bottom?.Data));
return true;
}
if (destTypeToken.Equals(NameToken.FitB))
{
// [page /FitB]
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitBoundingBox,
ExplicitDestinationCoordinates.Empty);
return true;
}
if (destTypeToken.Equals(NameToken.FitBH))
{
// [page /FitBH top]
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitBoundingBoxHorizontally,
new ExplicitDestinationCoordinates(null, (explicitDestinationArray[2] as NumericToken)?.Data));
return true;
}
if (destTypeToken.Equals(NameToken.FitBV))
{
// [page /FitBV left]
destination = new ExplicitDestination(pageNumber, ExplicitDestinationType.FitBoundingBoxVertically,
new ExplicitDestinationCoordinates((explicitDestinationArray[2] as NumericToken)?.Data));
return true;
}
return false;
}
}

View File

@ -1,6 +1,5 @@
namespace UglyToad.PdfPig.Outline
{
using System;
using System.Collections.Generic;
using Destinations;
@ -14,7 +13,7 @@
/// <summary>
/// The page number where the bookmark is located.
/// </summary>
public int PageNumber { get; }
public int PageNumber => Destination.PageNumber;
/// <summary>
/// The destination of the bookmark in the current document.
@ -28,8 +27,7 @@
public DocumentBookmarkNode(string title, int level, ExplicitDestination destination, IReadOnlyList<BookmarkNode> children)
: base(title, level, children)
{
Destination = destination ?? throw new ArgumentNullException(nameof(destination));
PageNumber = destination.PageNumber;
Destination = destination;
}
/// <inheritdoc />

View File

@ -0,0 +1,33 @@
namespace UglyToad.PdfPig.Outline;
using Destinations;
using System;
using System.Collections.Generic;
/// <inheritdoc />
/// <summary>
/// A node in the <see cref="Bookmarks" /> of a PDF document which corresponds
/// to a location in an embedded file.
/// </summary>
public class EmbeddedBookmarkNode : DocumentBookmarkNode
{
/// <summary>
/// The file specification for the embedded file
/// </summary>
public string FileSpecification { get; }
/// <inheritdoc />
/// <summary>
/// Create a new <see cref="ExternalBookmarkNode" />.
/// </summary>
public EmbeddedBookmarkNode(string title, int level, ExplicitDestination destination, IReadOnlyList<BookmarkNode> children, string fileSpecification) : base(title, level, destination, children)
{
FileSpecification = fileSpecification ?? throw new ArgumentNullException(nameof(fileSpecification));
}
/// <inheritdoc />
public override string ToString()
{
return $"Embedded file '{FileSpecification}', {Level}, {Title}";
}
}

View File

@ -1,5 +1,6 @@
namespace UglyToad.PdfPig.Outline
{
using Destinations;
using System;
using System.Collections.Generic;
@ -8,7 +9,7 @@
/// A node in the <see cref="Bookmarks" /> of a PDF document which corresponds
/// to a location in an external file.
/// </summary>
public class ExternalBookmarkNode : BookmarkNode
public class ExternalBookmarkNode : DocumentBookmarkNode
{
/// <summary>
/// The name of the file containing this bookmark.
@ -19,7 +20,7 @@
/// <summary>
/// Create a new <see cref="ExternalBookmarkNode" />.
/// </summary>
public ExternalBookmarkNode(string title, int level, string fileName, IReadOnlyList<BookmarkNode> children) : base(title, level, children)
public ExternalBookmarkNode(string title, int level, ExplicitDestination destination, IReadOnlyList<BookmarkNode> children, string fileName) : base(title, level, destination, children)
{
FileName = fileName ?? throw new ArgumentNullException(nameof(fileName));
}

View File

@ -1,30 +1,18 @@
namespace UglyToad.PdfPig.Parser
{
using System;
using System.Collections.Generic;
using Content;
using Core;
using Logging;
using Outline;
using Parts;
using System.Linq;
using Tokenization.Scanner;
using Tokens;
using Util;
internal static class CatalogFactory
{
private class PageCounter
{
public int PageCount { get; private set; }
public void Increment()
{
PageCount++;
}
}
public static Catalog Create(IndirectReference rootReference, DictionaryToken dictionary,
IPdfTokenScanner scanner,
bool isLenientParsing)
IPdfTokenScanner scanner, PageFactory pageFactory, ILog log, bool isLenientParsing)
{
if (dictionary == null)
{
@ -41,203 +29,27 @@
throw new PdfDocumentFormatException($"No pages entry was found in the catalog dictionary: {dictionary}.");
}
DictionaryToken pages;
DictionaryToken pagesDictionary;
var pagesReference = rootReference;
if (value is IndirectReferenceToken pagesRef)
{
pagesReference = pagesRef.Data;
pages = DirectObjectFinder.Get<DictionaryToken>(pagesRef, scanner);
pagesDictionary = DirectObjectFinder.Get<DictionaryToken>(pagesRef, scanner);
}
else if (value is DictionaryToken pagesDict)
{
pages = pagesDict;
pagesDictionary = pagesDict;
}
else
{
pages = DirectObjectFinder.Get<DictionaryToken>(value, scanner);
pagesDictionary = DirectObjectFinder.Get<DictionaryToken>(value, scanner);
}
var pageNumber = new PageCounter();
var pages = PagesFactory.Create(pagesReference, pagesDictionary, scanner, pageFactory, log, isLenientParsing);
var namedDestinations = NamedDestinationsProvider.Read(dictionary, scanner, pages, null);
var pageTree = ProcessPagesNode(pagesReference, pages, new IndirectReference(1, 0), true,
scanner, isLenientParsing, pageNumber);
return new Catalog(dictionary, pages, pageTree);
}
private static PageTreeNode ProcessPagesNode(IndirectReference referenceInput,
DictionaryToken nodeDictionaryInput,
IndirectReference parentReferenceInput,
bool isRoot,
IPdfTokenScanner pdfTokenScanner,
bool isLenientParsing,
PageCounter pageNumber)
{
bool isPage = CheckIfIsPage(nodeDictionaryInput, parentReferenceInput, isRoot, pdfTokenScanner, isLenientParsing);
if (isPage)
{
pageNumber.Increment();
return new PageTreeNode(nodeDictionaryInput, referenceInput, true, pageNumber.PageCount).WithChildren(EmptyArray<PageTreeNode>.Instance);
}
//If we got here, we have to iterate till we manage to exit
// Attempt to detect (and break) any infitine loop (IL) by recording the ids of the last 1000 (by default) tokens processed.
const int InfiniteLoopWorkingWindow = 1000;
var visitedTokens = new Dictionary<long, HashSet<int>>(); // Quick lookup containing ids (object number, generation) of tokens already processed (trimmed as we go to last 1000 (by default))
var visitedTokensWorkingWindow = new Queue<(long ObjectNumber, int Generation)>(InfiniteLoopWorkingWindow);
var toProcess =
new Queue<(PageTreeNode thisPage, IndirectReference reference, DictionaryToken nodeDictionary, IndirectReference parentReference,
List<PageTreeNode> nodeChildren)>();
var firstPage = new PageTreeNode(nodeDictionaryInput, referenceInput, false, null);
var setChildren = new List<Action>();
var firstPageChildren = new List<PageTreeNode>();
setChildren.Add(() => firstPage.WithChildren(firstPageChildren));
toProcess.Enqueue(
(thisPage: firstPage, reference: referenceInput, nodeDictionary: nodeDictionaryInput, parentReference: parentReferenceInput,
nodeChildren: firstPageChildren));
do
{
var current = toProcess.Dequeue();
#region Break any potential infinite loop
// Remember the last 1000 (by default) tokens and if we attempt to process again break out of loop
var currentReferenceObjectNumber = current.reference.ObjectNumber;
var currentReferenceGeneration = current.reference.Generation;
if (visitedTokens.ContainsKey(currentReferenceObjectNumber))
{
var generations = visitedTokens[currentReferenceObjectNumber];
if (generations.Contains(currentReferenceGeneration))
{
var listOfLastVisitedToken = visitedTokensWorkingWindow.ToList();
var indexOfCurrentTokenInListOfLastVisitedToken = listOfLastVisitedToken.IndexOf((currentReferenceObjectNumber, currentReferenceGeneration));
var howManyTokensBack = Math.Abs(indexOfCurrentTokenInListOfLastVisitedToken - listOfLastVisitedToken.Count); //eg initate loop is taking us back to last token or five token back
System.Diagnostics.Debug.WriteLine($"Break infinite loop while processing page {pageNumber.PageCount+1} tokens. Token with object number {currentReferenceObjectNumber} and generation {currentReferenceGeneration} processed {howManyTokensBack} token(s) back. ");
continue; // don't reprocess token already processed. break infinite loop. Issue #519
}
else
{
generations.Add(currentReferenceGeneration);
visitedTokens[currentReferenceObjectNumber] = generations;
}
}
else
{
visitedTokens.Add(currentReferenceObjectNumber, new HashSet<int>() { currentReferenceGeneration });
visitedTokensWorkingWindow.Enqueue((currentReferenceObjectNumber, currentReferenceGeneration));
if (visitedTokensWorkingWindow.Count >= InfiniteLoopWorkingWindow)
{
var toBeRemovedFromWorkingHashset = visitedTokensWorkingWindow.Dequeue();
var toBeRemovedObjectNumber = toBeRemovedFromWorkingHashset.ObjectNumber;
var toBeRemovedGeneration = toBeRemovedFromWorkingHashset.Generation;
var generations = visitedTokens[toBeRemovedObjectNumber];
generations.Remove(toBeRemovedGeneration);
if (generations.Count == 0)
{
visitedTokens.Remove(toBeRemovedObjectNumber);
}
else
{
visitedTokens[toBeRemovedObjectNumber] = generations;
}
}
}
#endregion
if (!current.nodeDictionary.TryGet(NameToken.Kids, pdfTokenScanner, out ArrayToken kids))
{
if (!isLenientParsing)
{
throw new PdfDocumentFormatException($"Pages node in the document pages tree did not define a kids array: {current.nodeDictionary}.");
}
kids = new ArrayToken(EmptyArray<IToken>.Instance);
}
foreach (var kid in kids.Data)
{
if (!(kid is IndirectReferenceToken kidRef))
{
throw new PdfDocumentFormatException($"Kids array contained invalid entry (must be indirect reference): {kid}.");
}
if (!DirectObjectFinder.TryGet(kidRef, pdfTokenScanner, out DictionaryToken kidDictionaryToken))
{
throw new PdfDocumentFormatException($"Could not find dictionary associated with reference in pages kids array: {kidRef}.");
}
bool isChildPage = CheckIfIsPage(kidDictionaryToken, current.reference, false, pdfTokenScanner, isLenientParsing);
if (isChildPage)
{
var kidPageNode =
new PageTreeNode(kidDictionaryToken, kidRef.Data, true, pageNumber.PageCount).WithChildren(EmptyArray<PageTreeNode>.Instance);
current.nodeChildren.Add(kidPageNode);
}
else
{
var kidChildNode = new PageTreeNode(kidDictionaryToken, kidRef.Data, false, null);
var kidChildren = new List<PageTreeNode>();
toProcess.Enqueue(
(thisPage: kidChildNode, reference: kidRef.Data, nodeDictionary: kidDictionaryToken, parentReference: current.reference,
nodeChildren: kidChildren));
setChildren.Add(() => kidChildNode.WithChildren(kidChildren));
current.nodeChildren.Add(kidChildNode);
}
}
} while (toProcess.Count > 0);
foreach (var action in setChildren)
{
action();
}
foreach (var child in firstPage.Children.ToRecursiveOrderList(x=>x.Children).Where(child => child.IsPage))
{
pageNumber.Increment();
child.PageNumber = pageNumber.PageCount;
}
return firstPage;
}
private static bool CheckIfIsPage(DictionaryToken nodeDictionary, IndirectReference parentReference, bool isRoot, IPdfTokenScanner pdfTokenScanner, bool isLenientParsing)
{
var isPage = false;
if (!nodeDictionary.TryGet(NameToken.Type, pdfTokenScanner, out NameToken type))
{
if (!isLenientParsing) { throw new PdfDocumentFormatException($"Node in the document pages tree did not define a type: {nodeDictionary}."); }
if (!nodeDictionary.TryGet(NameToken.Kids, pdfTokenScanner, out ArrayToken _)) { isPage = true; }
}
else
{
isPage = type.Equals(NameToken.Page);
if (!isPage && !type.Equals(NameToken.Pages) && !isLenientParsing) { throw new PdfDocumentFormatException($"Node in the document pages tree defined invalid type: {nodeDictionary}."); }
}
if (!isLenientParsing && !isRoot)
{
if (!nodeDictionary.TryGet(NameToken.Parent, pdfTokenScanner, out IndirectReferenceToken parentReferenceToken)) { throw new PdfDocumentFormatException($"Could not find parent indirect reference token on pages tree node: {nodeDictionary}."); }
if (!parentReferenceToken.Data.Equals(parentReference)) { throw new PdfDocumentFormatException($"Pages tree node parent reference {parentReferenceToken.Data} did not match actual parent {parentReference}."); }
}
return isPage;
return new Catalog(dictionary, pages, namedDestinations);
}
}
}

View File

@ -10,6 +10,7 @@
using Graphics;
using Graphics.Operations;
using Logging;
using Outline;
using Parts;
using Tokenization.Scanner;
using Tokens;
@ -21,20 +22,24 @@
private readonly IResourceStore resourceStore;
private readonly ILookupFilterProvider filterProvider;
private readonly IPageContentParser pageContentParser;
private readonly ILog log;
public PageFactory(
IPdfTokenScanner pdfScanner,
IResourceStore resourceStore,
ILookupFilterProvider filterProvider,
IPageContentParser pageContentParser)
IPageContentParser pageContentParser,
ILog log)
{
this.resourceStore = resourceStore;
this.filterProvider = filterProvider;
this.pageContentParser = pageContentParser;
this.pdfScanner = pdfScanner;
this.log = log;
}
public Page Create(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers, InternalParsingOptions parsingOptions)
public Page Create(int number, DictionaryToken dictionary, PageTreeMembers pageTreeMembers,
NamedDestinations namedDestinations, InternalParsingOptions parsingOptions)
{
if (dictionary == null)
{
@ -48,8 +53,8 @@
parsingOptions.Logger.Error($"Page {number} had its type specified as {type} rather than 'Page'.");
}
MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers, parsingOptions.Logger);
CropBox cropBox = GetCropBox(dictionary, pageTreeMembers, mediaBox, parsingOptions.Logger);
MediaBox mediaBox = GetMediaBox(number, dictionary, pageTreeMembers);
CropBox cropBox = GetCropBox(dictionary, pageTreeMembers, mediaBox);
var rotation = new PageRotationDegrees(pageTreeMembers.Rotation);
if (dictionary.TryGet(NameToken.Rotate, pdfScanner, out NumericToken rotateToken))
@ -133,11 +138,9 @@
content = GetContent(number, bytes, cropBox, userSpaceUnit, rotation, mediaBox, parsingOptions);
}
var initialMatrix = ContentStreamProcessor.GetInitialMatrix(userSpaceUnit, mediaBox, cropBox, rotation, parsingOptions.Logger);
var page = new Page(number, dictionary, mediaBox, cropBox, rotation, content,
new AnnotationProvider(pdfScanner, dictionary, initialMatrix),
pdfScanner);
var initialMatrix = ContentStreamProcessor.GetInitialMatrix(userSpaceUnit, mediaBox, cropBox, rotation, log);
var annotationProvider = new AnnotationProvider(pdfScanner, dictionary, initialMatrix, namedDestinations, log);
var page = new Page(number, dictionary, mediaBox, cropBox, rotation, content, annotationProvider, pdfScanner);
for (var i = 0; i < stackDepth; i++)
{
@ -187,8 +190,7 @@
private CropBox GetCropBox(
DictionaryToken dictionary,
PageTreeMembers pageTreeMembers,
MediaBox mediaBox,
ILog log)
MediaBox mediaBox)
{
CropBox cropBox;
if (dictionary.TryGet(NameToken.CropBox, out var cropBoxObject) &&
@ -216,23 +218,22 @@
private MediaBox GetMediaBox(
int number,
DictionaryToken dictionary,
PageTreeMembers pageTreeMembers,
ILog log)
PageTreeMembers pageTreeMembers)
{
MediaBox mediaBox;
if (dictionary.TryGet(NameToken.MediaBox, out var mediaboxObject)
&& DirectObjectFinder.TryGet(mediaboxObject, pdfScanner, out ArrayToken mediaboxArray))
if (dictionary.TryGet(NameToken.MediaBox, out var mediaBoxObject)
&& DirectObjectFinder.TryGet(mediaBoxObject, pdfScanner, out ArrayToken mediaBoxArray))
{
if (mediaboxArray.Length != 4)
if (mediaBoxArray.Length != 4)
{
log.Error($"The MediaBox was the wrong length in the dictionary: {dictionary}. Array was: {mediaboxArray}. Defaulting to US Letter.");
log.Error($"The MediaBox was the wrong length in the dictionary: {dictionary}. Array was: {mediaBoxArray}. Defaulting to US Letter.");
mediaBox = MediaBox.Letter;
return mediaBox;
}
mediaBox = new MediaBox(mediaboxArray.ToRectangle(pdfScanner));
mediaBox = new MediaBox(mediaBoxArray.ToRectangle(pdfScanner));
}
else
{

View File

@ -176,14 +176,18 @@
crossReferenceTable.Trailer,
parsingOptions.UseLenientParsing);
var pageFactory = new PageFactory(pdfScanner, resourceContainer, filterProvider,
new PageContentParser(new ReflectionGraphicsStateOperationFactory()), parsingOptions.Logger);
var catalog = CatalogFactory.Create(
rootReference,
rootDictionary,
pdfScanner,
pageFactory,
parsingOptions.Logger,
parsingOptions.UseLenientParsing);
var pageFactory = new PageFactory(pdfScanner, resourceContainer, filterProvider,
new PageContentParser(new ReflectionGraphicsStateOperationFactory()));
var acroFormFactory = new AcroFormFactory(pdfScanner, filterProvider, crossReferenceTable);
var bookmarksProvider = new BookmarksProvider(parsingOptions.Logger, pdfScanner);

View File

@ -14,6 +14,8 @@
using Tokenization.Scanner;
using Tokens;
using Outline;
using Outline.Destinations;
using System.Linq;
using Util.JetBrains.Annotations;
/// <inheritdoc />
@ -42,6 +44,7 @@
[NotNull]
private readonly Pages pages;
private readonly NamedDestinations namedDestinations;
/// <summary>
/// The metadata associated with this document.
@ -75,13 +78,12 @@
/// </summary>
public bool IsEncrypted => encryptionDictionary != null;
internal PdfDocument(
IInputBytes inputBytes,
HeaderVersion version,
internal PdfDocument(IInputBytes inputBytes,
HeaderVersion version,
CrossReferenceTable crossReferenceTable,
IPageFactory pageFactory,
Catalog catalog,
DocumentInformation information,
DocumentInformation information,
EncryptionDictionary encryptionDictionary,
IPdfTokenScanner pdfScanner,
ILookupFilterProvider filterProvider,
@ -98,7 +100,8 @@
this.parsingOptions = parsingOptions;
Information = information ?? throw new ArgumentNullException(nameof(information));
pages = new Pages(catalog, pageFactory, pdfScanner);
pages = catalog.Pages;
namedDestinations = catalog.NamedDestinations;
Structure = new Structure(catalog, crossReferenceTable, pdfScanner);
Advanced = new AdvancedPdfDocumentAccess(pdfScanner, filterProvider, catalog);
documentForm = new Lazy<AcroForm>(() => acroFormFactory.GetAcroForm(catalog));
@ -148,7 +151,7 @@
try
{
return pages.GetPage(pageNumber, parsingOptions);
return pages.GetPage(pageNumber, namedDestinations, parsingOptions);
}
catch (Exception ex)
{

View File

@ -276,7 +276,6 @@ namespace UglyToad.PdfPig.Writer
return AddPage(rectangle.Width, rectangle.Height);
}
internal IToken CopyToken(IPdfTokenScanner source, IToken token)
{
if (!existingCopies.TryGetValue(source, out var refs))
@ -288,15 +287,18 @@ namespace UglyToad.PdfPig.Writer
return WriterUtil.CopyToken(context, token, source, refs);
}
internal class PageInfo
private class PageInfo
{
public DictionaryToken Page { get; set; }
public IReadOnlyList<DictionaryToken> Parents { get; set; }
}
private readonly ConditionalWeakTable<IPdfTokenScanner, Dictionary<IndirectReference, IndirectReferenceToken>> existingCopies =
new ConditionalWeakTable<IPdfTokenScanner, Dictionary<IndirectReference, IndirectReferenceToken>>();
private readonly ConditionalWeakTable<PdfDocument, Dictionary<int, PageInfo>> existingTrees =
new ConditionalWeakTable<PdfDocument, Dictionary<int, PageInfo>>();
/// <summary>
/// Add a new page with the specified size, this page will be included in the output when <see cref="Build"/> is called.
/// </summary>
@ -315,7 +317,7 @@ namespace UglyToad.PdfPig.Writer
{
pagesInfos = new Dictionary<int, PageInfo>();
int i = 1;
foreach (var (pageDict, parents) in WriterUtil.WalkTree(document.Structure.Catalog.PageTree))
foreach (var (pageDict, parents) in WriterUtil.WalkTree(document.Structure.Catalog.Pages.PageTree))
{
pagesInfos[i] = new PageInfo
{