mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-04-05 20:55:01 +08:00
Merge pull request #359 from plaisted/master
link annotation fix for PdfDocumentBuilder
This commit is contained in:
commit
df3552c38e
BIN
src/UglyToad.PdfPig.Tests/Integration/Documents/outline.pdf
Normal file
BIN
src/UglyToad.PdfPig.Tests/Integration/Documents/outline.pdf
Normal file
Binary file not shown.
@ -101,6 +101,33 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
[Fact]
|
||||||
|
public void CanFastAddPageAndStripLinkAnnots()
|
||||||
|
{
|
||||||
|
var first = IntegrationHelpers.GetDocumentPath("outline.pdf");
|
||||||
|
var contents = File.ReadAllBytes(first);
|
||||||
|
|
||||||
|
var annotCount = 0;
|
||||||
|
byte[] results = null;
|
||||||
|
using (var existing = PdfDocument.Open(contents, ParsingOptions.LenientParsingOff))
|
||||||
|
using (var output = new PdfDocumentBuilder())
|
||||||
|
{
|
||||||
|
output.AddPage(existing, 1);
|
||||||
|
results = output.Build();
|
||||||
|
var pg = existing.GetPage(1);
|
||||||
|
var annots = pg.ExperimentalAccess.GetAnnotations().ToList();
|
||||||
|
annotCount = annots.Count;
|
||||||
|
Assert.Contains(annots, x => x.Type == Annotations.AnnotationType.Link);
|
||||||
|
}
|
||||||
|
|
||||||
|
using (var rewritten = PdfDocument.Open(results, ParsingOptions.LenientParsingOff))
|
||||||
|
{
|
||||||
|
var pg = rewritten.GetPage(1);
|
||||||
|
var annots = pg.ExperimentalAccess.GetAnnotations().ToList();
|
||||||
|
Assert.Equal(annotCount - 1, annots.Count);
|
||||||
|
Assert.DoesNotContain(annots, x => x.Type == Annotations.AnnotationType.Link);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
[Fact]
|
[Fact]
|
||||||
public void CanReadSingleBlankPage()
|
public void CanReadSingleBlankPage()
|
||||||
|
@ -8,6 +8,13 @@
|
|||||||
|
|
||||||
internal interface IPdfStreamWriter : IDisposable
|
internal interface IPdfStreamWriter : IDisposable
|
||||||
{
|
{
|
||||||
|
/// <summary>
|
||||||
|
/// Sets if the stream writer should attempt to deduplicate objects.
|
||||||
|
/// May not have any affect if <see cref="IPdfStreamWriter"/> does not
|
||||||
|
/// support deduplication.
|
||||||
|
/// </summary>
|
||||||
|
bool AttemptDeduplication { get; set; }
|
||||||
|
|
||||||
/// <summary>
|
/// <summary>
|
||||||
/// The underlying stream used by the writer.
|
/// The underlying stream used by the writer.
|
||||||
/// </summary>
|
/// </summary>
|
||||||
|
@ -23,13 +23,16 @@
|
|||||||
ms.SetLength(0);
|
ms.SetLength(0);
|
||||||
TokenWriter.WriteToken(token, ms);
|
TokenWriter.WriteToken(token, ms);
|
||||||
var contents = ms.ToArray();
|
var contents = ms.ToArray();
|
||||||
if (hashes.TryGetValue(contents, out var value))
|
if (AttemptDeduplication && hashes.TryGetValue(contents, out var value))
|
||||||
{
|
{
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
|
||||||
var ir = ReserveObjectNumber();
|
var ir = ReserveObjectNumber();
|
||||||
hashes.Add(contents, ir);
|
if (AttemptDeduplication)
|
||||||
|
{
|
||||||
|
hashes.Add(contents, ir);
|
||||||
|
}
|
||||||
|
|
||||||
offsets.Add(ir.Data, Stream.Position);
|
offsets.Add(ir.Data, Stream.Position);
|
||||||
TokenWriter.WriteObject(ir.Data.ObjectNumber, ir.Data.Generation, contents, Stream);
|
TokenWriter.WriteObject(ir.Data.ObjectNumber, ir.Data.Generation, contents, Stream);
|
||||||
|
@ -330,6 +330,10 @@ namespace UglyToad.PdfPig.Writer
|
|||||||
var streams = new List<PdfPageBuilder.CopiedContentStream>();
|
var streams = new List<PdfPageBuilder.CopiedContentStream>();
|
||||||
if (pageInfo.Page.TryGet(NameToken.Contents, out IToken contentsToken))
|
if (pageInfo.Page.TryGet(NameToken.Contents, out IToken contentsToken))
|
||||||
{
|
{
|
||||||
|
// Adobe Acrobat errors if content streams ref'd by multiple pages, turn off
|
||||||
|
// dedup if on to avoid issues
|
||||||
|
var prev = context.AttemptDeduplication;
|
||||||
|
context.AttemptDeduplication = false;
|
||||||
if (contentsToken is ArrayToken array)
|
if (contentsToken is ArrayToken array)
|
||||||
{
|
{
|
||||||
foreach (var item in array.Data)
|
foreach (var item in array.Data)
|
||||||
@ -347,6 +351,7 @@ namespace UglyToad.PdfPig.Writer
|
|||||||
streams.Add(new PdfPageBuilder.CopiedContentStream(
|
streams.Add(new PdfPageBuilder.CopiedContentStream(
|
||||||
WriterUtil.CopyToken(context, ir, document.Structure.TokenScanner, refs) as IndirectReferenceToken));
|
WriterUtil.CopyToken(context, ir, document.Structure.TokenScanner, refs) as IndirectReferenceToken));
|
||||||
}
|
}
|
||||||
|
context.AttemptDeduplication = prev;
|
||||||
}
|
}
|
||||||
|
|
||||||
// manually copy page dict / resources as we need to modify some
|
// manually copy page dict / resources as we need to modify some
|
||||||
@ -379,15 +384,55 @@ namespace UglyToad.PdfPig.Writer
|
|||||||
{
|
{
|
||||||
if (kvp.Key == NameToken.Contents || kvp.Key == NameToken.Parent || kvp.Key == NameToken.Type)
|
if (kvp.Key == NameToken.Contents || kvp.Key == NameToken.Parent || kvp.Key == NameToken.Type)
|
||||||
{
|
{
|
||||||
|
// don't copy these as they'll be handled during page tree writing
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (kvp.Key == NameToken.Resources)
|
if (kvp.Key == NameToken.Resources)
|
||||||
{
|
{
|
||||||
|
// merge parent resources into child
|
||||||
CopyResourceDict(kvp.Value, resources);
|
CopyResourceDict(kvp.Value, resources);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (kvp.Key == NameToken.Annots)
|
||||||
|
{
|
||||||
|
var val = kvp.Value;
|
||||||
|
if (kvp.Value is IndirectReferenceToken ir)
|
||||||
|
{
|
||||||
|
val = document.Structure.TokenScanner.Get(ir.Data).Data;
|
||||||
|
}
|
||||||
|
|
||||||
|
if (!(val is ArrayToken arr))
|
||||||
|
{
|
||||||
|
// should be array... ignore and remove bad dict
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// -> ignore links to resolve issues with refencing non-existing pages
|
||||||
|
// at some point should add support for copying the links if the
|
||||||
|
// pages are copied as well but for now just fix corruption
|
||||||
|
var toAdd = new List<IToken>();
|
||||||
|
foreach (var annot in arr.Data)
|
||||||
|
{
|
||||||
|
DictionaryToken tk = GetRemoteDict(annot);
|
||||||
|
if (tk == null)
|
||||||
|
{
|
||||||
|
// malformed
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
if (tk.TryGet(NameToken.Subtype, out var st) && st is NameToken nm && nm == NameToken.Link)
|
||||||
|
{
|
||||||
|
// link -> ignore
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
toAdd.Add(WriterUtil.CopyToken(context, tk, document.Structure.TokenScanner, refs));
|
||||||
|
}
|
||||||
|
// copy rest
|
||||||
|
copiedPageDict[NameToken.Annots] = new ArrayToken(toAdd);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
copiedPageDict[NameToken.Create(kvp.Key)] =
|
copiedPageDict[NameToken.Create(kvp.Key)] =
|
||||||
WriterUtil.CopyToken(context, kvp.Value, document.Structure.TokenScanner, refs);
|
WriterUtil.CopyToken(context, kvp.Value, document.Structure.TokenScanner, refs);
|
||||||
}
|
}
|
||||||
@ -508,10 +553,14 @@ namespace UglyToad.PdfPig.Writer
|
|||||||
pageDictionary[NameToken.MediaBox] = RectangleToArray(page.Value.PageSize);
|
pageDictionary[NameToken.MediaBox] = RectangleToArray(page.Value.PageSize);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Adobe Acrobat errors if content streams ref'd by multiple pages, turn off
|
||||||
|
// dedup if on to avoid issues
|
||||||
|
var prev = context.AttemptDeduplication;
|
||||||
|
context.AttemptDeduplication = false;
|
||||||
|
|
||||||
var toWrite = page.Value.contentStreams.Where(x => x.HasContent).ToList();
|
var toWrite = page.Value.contentStreams.Where(x => x.HasContent).ToList();
|
||||||
if (toWrite.Count == 0)
|
if (toWrite.Count == 0)
|
||||||
{
|
{
|
||||||
// write empty
|
|
||||||
pageDictionary[NameToken.Contents] = new PdfPageBuilder.DefaultContentStream().Write(context);
|
pageDictionary[NameToken.Contents] = new PdfPageBuilder.DefaultContentStream().Write(context);
|
||||||
}
|
}
|
||||||
else if (toWrite.Count == 1)
|
else if (toWrite.Count == 1)
|
||||||
@ -529,7 +578,7 @@ namespace UglyToad.PdfPig.Writer
|
|||||||
}
|
}
|
||||||
pageDictionary[NameToken.Contents] = new ArrayToken(streams);
|
pageDictionary[NameToken.Contents] = new ArrayToken(streams);
|
||||||
}
|
}
|
||||||
|
context.AttemptDeduplication = prev;;
|
||||||
|
|
||||||
leafChildren[leafNum].Add(context.WriteToken(new DictionaryToken(pageDictionary)));
|
leafChildren[leafNum].Add(context.WriteToken(new DictionaryToken(pageDictionary)));
|
||||||
|
|
||||||
|
@ -30,8 +30,9 @@
|
|||||||
DisposeStream = disposeStream;
|
DisposeStream = disposeStream;
|
||||||
}
|
}
|
||||||
|
|
||||||
public Stream Stream { get; protected set; }
|
public Stream Stream { get; protected set; }
|
||||||
|
public bool AttemptDeduplication { get; set; } = true;
|
||||||
|
|
||||||
public virtual IndirectReferenceToken WriteToken(IToken token)
|
public virtual IndirectReferenceToken WriteToken(IToken token)
|
||||||
{
|
{
|
||||||
if (!Initialized)
|
if (!Initialized)
|
||||||
|
@ -80,7 +80,11 @@
|
|||||||
/// <param name="token">The token to write to the stream.</param>
|
/// <param name="token">The token to write to the stream.</param>
|
||||||
/// <param name="outputStream">The stream to write the token to.</param>
|
/// <param name="outputStream">The stream to write the token to.</param>
|
||||||
public static void WriteToken(IToken token, Stream outputStream)
|
public static void WriteToken(IToken token, Stream outputStream)
|
||||||
{
|
{
|
||||||
|
if (token == null)
|
||||||
|
{
|
||||||
|
throw new ArgumentNullException(nameof(token));
|
||||||
|
}
|
||||||
switch (token)
|
switch (token)
|
||||||
{
|
{
|
||||||
case ArrayToken array:
|
case ArrayToken array:
|
||||||
@ -119,7 +123,9 @@
|
|||||||
break;
|
break;
|
||||||
case StringToken stringToken:
|
case StringToken stringToken:
|
||||||
WriteString(stringToken, outputStream);
|
WriteString(stringToken, outputStream);
|
||||||
break;
|
break;
|
||||||
|
default:
|
||||||
|
throw new PdfDocumentFormatException($"Attempted to write token type of {token.GetType()} but was not known.");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -294,8 +300,16 @@
|
|||||||
|
|
||||||
foreach (var pair in dictionary.Data)
|
foreach (var pair in dictionary.Data)
|
||||||
{
|
{
|
||||||
WriteName(pair.Key, outputStream);
|
WriteName(pair.Key, outputStream);
|
||||||
WriteToken(pair.Value, outputStream);
|
|
||||||
|
// handle scenario where PdfPig has a null value under some circumstances
|
||||||
|
if (pair.Value == null)
|
||||||
|
{
|
||||||
|
WriteToken(NullToken.Instance, outputStream);
|
||||||
|
} else
|
||||||
|
{
|
||||||
|
WriteToken(pair.Value, outputStream);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
outputStream.Write(DictionaryEnd, 0, DictionaryEnd.Length);
|
outputStream.Write(DictionaryEnd, 0, DictionaryEnd.Length);
|
||||||
|
Loading…
Reference in New Issue
Block a user