mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-04-05 20:55:01 +08:00
Merge pull request #359 from plaisted/master
link annotation fix for PdfDocumentBuilder
This commit is contained in:
commit
df3552c38e
BIN
src/UglyToad.PdfPig.Tests/Integration/Documents/outline.pdf
Normal file
BIN
src/UglyToad.PdfPig.Tests/Integration/Documents/outline.pdf
Normal file
Binary file not shown.
@ -101,6 +101,33 @@
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CanFastAddPageAndStripLinkAnnots()
|
||||
{
|
||||
var first = IntegrationHelpers.GetDocumentPath("outline.pdf");
|
||||
var contents = File.ReadAllBytes(first);
|
||||
|
||||
var annotCount = 0;
|
||||
byte[] results = null;
|
||||
using (var existing = PdfDocument.Open(contents, ParsingOptions.LenientParsingOff))
|
||||
using (var output = new PdfDocumentBuilder())
|
||||
{
|
||||
output.AddPage(existing, 1);
|
||||
results = output.Build();
|
||||
var pg = existing.GetPage(1);
|
||||
var annots = pg.ExperimentalAccess.GetAnnotations().ToList();
|
||||
annotCount = annots.Count;
|
||||
Assert.Contains(annots, x => x.Type == Annotations.AnnotationType.Link);
|
||||
}
|
||||
|
||||
using (var rewritten = PdfDocument.Open(results, ParsingOptions.LenientParsingOff))
|
||||
{
|
||||
var pg = rewritten.GetPage(1);
|
||||
var annots = pg.ExperimentalAccess.GetAnnotations().ToList();
|
||||
Assert.Equal(annotCount - 1, annots.Count);
|
||||
Assert.DoesNotContain(annots, x => x.Type == Annotations.AnnotationType.Link);
|
||||
}
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void CanReadSingleBlankPage()
|
||||
|
@ -8,6 +8,13 @@
|
||||
|
||||
internal interface IPdfStreamWriter : IDisposable
|
||||
{
|
||||
/// <summary>
|
||||
/// Sets if the stream writer should attempt to deduplicate objects.
|
||||
/// May not have any affect if <see cref="IPdfStreamWriter"/> does not
|
||||
/// support deduplication.
|
||||
/// </summary>
|
||||
bool AttemptDeduplication { get; set; }
|
||||
|
||||
/// <summary>
|
||||
/// The underlying stream used by the writer.
|
||||
/// </summary>
|
||||
|
@ -23,13 +23,16 @@
|
||||
ms.SetLength(0);
|
||||
TokenWriter.WriteToken(token, ms);
|
||||
var contents = ms.ToArray();
|
||||
if (hashes.TryGetValue(contents, out var value))
|
||||
if (AttemptDeduplication && hashes.TryGetValue(contents, out var value))
|
||||
{
|
||||
return value;
|
||||
}
|
||||
|
||||
var ir = ReserveObjectNumber();
|
||||
hashes.Add(contents, ir);
|
||||
if (AttemptDeduplication)
|
||||
{
|
||||
hashes.Add(contents, ir);
|
||||
}
|
||||
|
||||
offsets.Add(ir.Data, Stream.Position);
|
||||
TokenWriter.WriteObject(ir.Data.ObjectNumber, ir.Data.Generation, contents, Stream);
|
||||
|
@ -330,6 +330,10 @@ namespace UglyToad.PdfPig.Writer
|
||||
var streams = new List<PdfPageBuilder.CopiedContentStream>();
|
||||
if (pageInfo.Page.TryGet(NameToken.Contents, out IToken contentsToken))
|
||||
{
|
||||
// Adobe Acrobat errors if content streams ref'd by multiple pages, turn off
|
||||
// dedup if on to avoid issues
|
||||
var prev = context.AttemptDeduplication;
|
||||
context.AttemptDeduplication = false;
|
||||
if (contentsToken is ArrayToken array)
|
||||
{
|
||||
foreach (var item in array.Data)
|
||||
@ -347,6 +351,7 @@ namespace UglyToad.PdfPig.Writer
|
||||
streams.Add(new PdfPageBuilder.CopiedContentStream(
|
||||
WriterUtil.CopyToken(context, ir, document.Structure.TokenScanner, refs) as IndirectReferenceToken));
|
||||
}
|
||||
context.AttemptDeduplication = prev;
|
||||
}
|
||||
|
||||
// manually copy page dict / resources as we need to modify some
|
||||
@ -379,15 +384,55 @@ namespace UglyToad.PdfPig.Writer
|
||||
{
|
||||
if (kvp.Key == NameToken.Contents || kvp.Key == NameToken.Parent || kvp.Key == NameToken.Type)
|
||||
{
|
||||
// don't copy these as they'll be handled during page tree writing
|
||||
continue;
|
||||
}
|
||||
|
||||
if (kvp.Key == NameToken.Resources)
|
||||
{
|
||||
// merge parent resources into child
|
||||
CopyResourceDict(kvp.Value, resources);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (kvp.Key == NameToken.Annots)
|
||||
{
|
||||
var val = kvp.Value;
|
||||
if (kvp.Value is IndirectReferenceToken ir)
|
||||
{
|
||||
val = document.Structure.TokenScanner.Get(ir.Data).Data;
|
||||
}
|
||||
|
||||
if (!(val is ArrayToken arr))
|
||||
{
|
||||
// should be array... ignore and remove bad dict
|
||||
continue;
|
||||
}
|
||||
|
||||
// -> ignore links to resolve issues with refencing non-existing pages
|
||||
// at some point should add support for copying the links if the
|
||||
// pages are copied as well but for now just fix corruption
|
||||
var toAdd = new List<IToken>();
|
||||
foreach (var annot in arr.Data)
|
||||
{
|
||||
DictionaryToken tk = GetRemoteDict(annot);
|
||||
if (tk == null)
|
||||
{
|
||||
// malformed
|
||||
continue;
|
||||
}
|
||||
if (tk.TryGet(NameToken.Subtype, out var st) && st is NameToken nm && nm == NameToken.Link)
|
||||
{
|
||||
// link -> ignore
|
||||
continue;
|
||||
}
|
||||
toAdd.Add(WriterUtil.CopyToken(context, tk, document.Structure.TokenScanner, refs));
|
||||
}
|
||||
// copy rest
|
||||
copiedPageDict[NameToken.Annots] = new ArrayToken(toAdd);
|
||||
continue;
|
||||
}
|
||||
|
||||
copiedPageDict[NameToken.Create(kvp.Key)] =
|
||||
WriterUtil.CopyToken(context, kvp.Value, document.Structure.TokenScanner, refs);
|
||||
}
|
||||
@ -508,10 +553,14 @@ namespace UglyToad.PdfPig.Writer
|
||||
pageDictionary[NameToken.MediaBox] = RectangleToArray(page.Value.PageSize);
|
||||
}
|
||||
|
||||
// Adobe Acrobat errors if content streams ref'd by multiple pages, turn off
|
||||
// dedup if on to avoid issues
|
||||
var prev = context.AttemptDeduplication;
|
||||
context.AttemptDeduplication = false;
|
||||
|
||||
var toWrite = page.Value.contentStreams.Where(x => x.HasContent).ToList();
|
||||
if (toWrite.Count == 0)
|
||||
{
|
||||
// write empty
|
||||
pageDictionary[NameToken.Contents] = new PdfPageBuilder.DefaultContentStream().Write(context);
|
||||
}
|
||||
else if (toWrite.Count == 1)
|
||||
@ -529,7 +578,7 @@ namespace UglyToad.PdfPig.Writer
|
||||
}
|
||||
pageDictionary[NameToken.Contents] = new ArrayToken(streams);
|
||||
}
|
||||
|
||||
context.AttemptDeduplication = prev;;
|
||||
|
||||
leafChildren[leafNum].Add(context.WriteToken(new DictionaryToken(pageDictionary)));
|
||||
|
||||
|
@ -30,8 +30,9 @@
|
||||
DisposeStream = disposeStream;
|
||||
}
|
||||
|
||||
public Stream Stream { get; protected set; }
|
||||
|
||||
public Stream Stream { get; protected set; }
|
||||
public bool AttemptDeduplication { get; set; } = true;
|
||||
|
||||
public virtual IndirectReferenceToken WriteToken(IToken token)
|
||||
{
|
||||
if (!Initialized)
|
||||
|
@ -80,7 +80,11 @@
|
||||
/// <param name="token">The token to write to the stream.</param>
|
||||
/// <param name="outputStream">The stream to write the token to.</param>
|
||||
public static void WriteToken(IToken token, Stream outputStream)
|
||||
{
|
||||
{
|
||||
if (token == null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(token));
|
||||
}
|
||||
switch (token)
|
||||
{
|
||||
case ArrayToken array:
|
||||
@ -119,7 +123,9 @@
|
||||
break;
|
||||
case StringToken stringToken:
|
||||
WriteString(stringToken, outputStream);
|
||||
break;
|
||||
break;
|
||||
default:
|
||||
throw new PdfDocumentFormatException($"Attempted to write token type of {token.GetType()} but was not known.");
|
||||
}
|
||||
}
|
||||
|
||||
@ -294,8 +300,16 @@
|
||||
|
||||
foreach (var pair in dictionary.Data)
|
||||
{
|
||||
WriteName(pair.Key, outputStream);
|
||||
WriteToken(pair.Value, outputStream);
|
||||
WriteName(pair.Key, outputStream);
|
||||
|
||||
// handle scenario where PdfPig has a null value under some circumstances
|
||||
if (pair.Value == null)
|
||||
{
|
||||
WriteToken(NullToken.Instance, outputStream);
|
||||
} else
|
||||
{
|
||||
WriteToken(pair.Value, outputStream);
|
||||
}
|
||||
}
|
||||
|
||||
outputStream.Write(DictionaryEnd, 0, DictionaryEnd.Length);
|
||||
|
Loading…
Reference in New Issue
Block a user