refactored previous work to fit pr #250

This commit is contained in:
Plaisted 2021-02-06 12:24:53 -06:00
parent 75d511440b
commit 7f42ad0af9
21 changed files with 1249 additions and 568 deletions

View File

@ -209,6 +209,7 @@
"UglyToad.PdfPig.Writer.PdfAStandard",
"UglyToad.PdfPig.Writer.PdfDocumentBuilder",
"UglyToad.PdfPig.Writer.PdfMerger",
"UglyToad.PdfPig.Writer.PdfWriterType",
"UglyToad.PdfPig.Writer.PdfPageBuilder",
"UglyToad.PdfPig.Writer.TokenWriter",
"UglyToad.PdfPig.XObjects.XObjectImage"

View File

@ -45,6 +45,11 @@ namespace UglyToad.PdfPig.Tests.Tokens
return Objects[reference];
}
public void ReplaceToken(IndirectReference reference, IToken token)
{
throw new NotImplementedException();
}
public void Dispose()
{
}

View File

@ -689,6 +689,186 @@
Assert.Equal("Lorem ipsum dolor sit amet, consectetur adipiscing elit. ", page2.Text);
}
}
[Fact]
public void CanAddHelloWorldToSimplePage()
{
var path = IntegrationHelpers.GetDocumentPath("Single Page Simple - from open office.pdf");
var doc = PdfDocument.Open(path);
var builder = new PdfDocumentBuilder();
var page = builder.AddPage(doc, 1);
page.DrawLine(new PdfPoint(30, 520), new PdfPoint(360, 520));
page.DrawLine(new PdfPoint(360, 520), new PdfPoint(360, 250));
page.SetStrokeColor(250, 132, 131);
page.DrawLine(new PdfPoint(25, 70), new PdfPoint(100, 70), 3);
page.ResetColor();
page.DrawRectangle(new PdfPoint(30, 200), 250, 100, 0.5m);
page.DrawRectangle(new PdfPoint(30, 100), 250, 100, 0.5m);
var file = TrueTypeTestHelper.GetFileBytes("Andada-Regular.ttf");
var font = builder.AddTrueTypeFont(file);
var letters = page.AddText("Hello World!", 12, new PdfPoint(30, 50), font);
Assert.NotEmpty(page.CurrentStream.Operations);
var b = builder.Build();
WriteFile(nameof(CanWriteSinglePageHelloWorld), b);
Assert.NotEmpty(b);
using (var document = PdfDocument.Open(b))
{
var page1 = document.GetPage(1);
Assert.Equal("I am a simple pdf.Hello World!", page1.Text);
var h = page1.Letters[18];
Assert.Equal("H", h.Value);
Assert.Equal("Andada-Regular", h.FontName);
var comparer = new DoubleComparer(0.01);
var pointComparer = new PointComparer(comparer);
for (int i = 0; i < letters.Count; i++)
{
var readerLetter = page1.Letters[i+18];
var writerLetter = letters[i];
Assert.Equal(readerLetter.Value, writerLetter.Value);
Assert.Equal(readerLetter.Location, writerLetter.Location, pointComparer);
Assert.Equal(readerLetter.FontSize, writerLetter.FontSize, comparer);
Assert.Equal(readerLetter.GlyphRectangle.Width, writerLetter.GlyphRectangle.Width, comparer);
Assert.Equal(readerLetter.GlyphRectangle.Height, writerLetter.GlyphRectangle.Height, comparer);
Assert.Equal(readerLetter.GlyphRectangle.BottomLeft, writerLetter.GlyphRectangle.BottomLeft, pointComparer);
}
}
}
[Fact]
public void CanMerge2SimpleDocumentsReversed_Builder()
{
var one = IntegrationHelpers.GetDocumentPath("Single Page Simple - from open office.pdf");
var two = IntegrationHelpers.GetDocumentPath("Single Page Simple - from inkscape.pdf");
using var docOne = PdfDocument.Open(one);
using var docTwo = PdfDocument.Open(two);
var builder = new PdfDocumentBuilder();
builder.AddPage(docOne, 1);
builder.AddPage(docTwo, 1);
var result = builder.Build();
PdfMergerTests.CanMerge2SimpleDocumentsAssertions(new MemoryStream(result), "I am a simple pdf.", "Write something inInkscape", false);
}
[Fact]
public void CanMerge2SimpleDocuments_Builder()
{
var one = IntegrationHelpers.GetDocumentPath("Single Page Simple - from inkscape.pdf");
var two = IntegrationHelpers.GetDocumentPath("Single Page Simple - from open office.pdf");
using var docOne = PdfDocument.Open(one);
using var docTwo = PdfDocument.Open(two);
var builder = new PdfDocumentBuilder();
builder.AddPage(docOne, 1);
builder.AddPage(docTwo, 1);
var result = builder.Build();
PdfMergerTests.CanMerge2SimpleDocumentsAssertions(new MemoryStream(result), "Write something inInkscape", "I am a simple pdf.", false);
}
[Fact]
public void CanDedupObjectsFromSameDoc_Builder()
{
var one = IntegrationHelpers.GetDocumentPath("Multiple Page - from Mortality Statistics.pdf");
using var doc = PdfDocument.Open(one);
using var builder = new PdfDocumentBuilder();
builder.AddPage(doc, 1);
builder.AddPage(doc, 1);
var result = builder.Build();
using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff))
{
Assert.Equal(2, document.NumberOfPages);
Assert.True(document.Structure.CrossReferenceTable.ObjectOffsets.Count <= 29,
"Expected object count to be lower than 30"); // 45 objects with duplicates, 29 with correct re-use
}
}
[Fact]
public void CanDedupObjectsFromDifferentDoc_HashBuilder()
{
var one = IntegrationHelpers.GetDocumentPath("Multiple Page - from Mortality Statistics.pdf");
using var doc = PdfDocument.Open(one);
using var doc2 = PdfDocument.Open(one);
using var builder = new PdfDocumentBuilder(new MemoryStream(), true, PdfWriterType.ObjectInMemoryDedup);
builder.AddPage(doc, 1);
builder.AddPage(doc2, 1);
var result = builder.Build();
using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff))
{
Assert.Equal(2, document.NumberOfPages);
Assert.True(document.Structure.CrossReferenceTable.ObjectOffsets.Count <= 29,
"Expected object count to be lower than 30"); // 45 objects with duplicates, 29 with correct re-use
}
}
[InlineData("Single Page Simple - from google drive.pdf")]
[InlineData("Old Gutnish Internet Explorer.pdf")]
[InlineData("68-1990-01_A.pdf")]
[InlineData("Multiple Page - from Mortality Statistics.pdf")]
[Theory]
public void CopiedPagesResultInSameData(string name)
{
var docPath = IntegrationHelpers.GetDocumentPath(name);
using var doc = PdfDocument.Open(docPath, ParsingOptions.LenientParsingOff);
var count1 = GetCounts(doc);
using var builder = new PdfDocumentBuilder();
for (var i = 1; i <= doc.NumberOfPages; i++)
{
builder.AddPage(doc, i);
}
var result = builder.Build();
using (var doc2 = PdfDocument.Open(result, ParsingOptions.LenientParsingOff))
{
var count2 = GetCounts(doc2);
Assert.Equal(count1.Item1, count2.Item1);
Assert.Equal(count1.Item2, count2.Item2);
}
(int, double) GetCounts(PdfDocument toCount)
{
int letters = 0;
double location = 0;
foreach (var page in toCount.GetPages())
{
foreach (var letter in page.Letters)
{
unchecked { letters += 1; }
unchecked {
location += letter.Location.X;
location += letter.Location.Y;
}
}
}
return (letters, location);
}
}
private static void WriteFile(string name, byte[] bytes, string extension = "pdf")

View File

@ -47,13 +47,16 @@
CanMerge2SimpleDocumentsAssertions(new MemoryStream(result), "I am a simple pdf.", "Write something inInkscape");
}
private void CanMerge2SimpleDocumentsAssertions(Stream stream, string page1Text, string page2Text)
internal static void CanMerge2SimpleDocumentsAssertions(Stream stream, string page1Text, string page2Text, bool checkVersion=true)
{
stream.Position = 0;
using (var document = PdfDocument.Open(stream, ParsingOptions.LenientParsingOff))
{
Assert.Equal(2, document.NumberOfPages);
Assert.Equal(1.5m, document.Version);
if (checkVersion)
{
Assert.Equal(1.5m, document.Version);
}
var page1 = document.GetPage(1);
Assert.Equal(page1Text, page1.Text);
@ -105,7 +108,7 @@
[Fact]
public void DedupsObjectsFromSameDoc()
{
var one = IntegrationHelpers.GetDocumentPath("Multiple Page - from Mortality Statistics.pdf");
var one = IntegrationHelpers.GetDocumentPath("Multiple Page - from Mortality Statistics.pdf");
var result = PdfMerger.Merge(new List<byte[]> { File.ReadAllBytes(one) }, new List<IReadOnlyList<int>> { new List<int> { 1, 2} });

View File

@ -3,6 +3,7 @@
using System;
using System.Collections.Generic;
using Content;
using Core;
using Filters;
using Parser.Parts;
using Tokenization.Scanner;
@ -82,6 +83,30 @@
return embeddedFiles.Count > 0;
}
/// <summary>
/// Replaces the token in an internal cache that will be returned instead of
/// scanning the source PDF data for future requests.
/// </summary>
/// <param name="reference">The object number for the object to replace.</param>
/// <param name="replacer">Func that takes existing token as input and return new token.</param>
public void ReplaceIndirectObject(IndirectReference reference, Func<IToken, IToken> replacer)
{
var obj = pdfScanner.Get(reference);
var replacement = replacer(obj.Data);
pdfScanner.ReplaceToken(reference, replacement);
}
/// <summary>
/// Replaces the token in an internal cache that will be returned instead of
/// scanning the source PDF data for future requests.
/// </summary>
/// <param name="reference">The object number for the object to replace.</param>
/// <param name="replacement">Replacement token to use.</param>
public void ReplaceIndirectObject(IndirectReference reference, IToken replacement)
{
pdfScanner.ReplaceToken(reference, replacement);
}
private void GuardDisposed()
{
if (isDisposed)

View File

@ -16,5 +16,13 @@
/// <param name="reference">The object number for the object to tokenize.</param>
/// <returns>The tokenized object.</returns>
ObjectToken Get(IndirectReference reference);
/// <summary>
/// Adds the token to an internal cache that will be returned instead of
/// scanning the source PDF data.
/// </summary>
/// <param name="reference">The object number for the object to replace.</param>
/// <param name="token">The token to replace the existing data.</param>
void ReplaceToken(IndirectReference reference, IToken token);
}
}

View File

@ -29,6 +29,9 @@
private bool isDisposed;
private bool isBruteForcing;
private readonly Dictionary<IndirectReference, ObjectToken> overwrittenTokens =
new Dictionary<IndirectReference, ObjectToken>();
/// <summary>
/// Stores tokens encountered between obj - endobj markers for each <see cref="MoveNext"/> call.
/// Cleared after each operation.
@ -670,6 +673,11 @@
throw new ObjectDisposedException(nameof(PdfTokenScanner));
}
if (overwrittenTokens.TryGetValue(reference, out var value))
{
return value;
}
if (objectLocationProvider.TryGetCached(reference, out var objectToken))
{
return objectToken;
@ -705,6 +713,13 @@
return BruteForceFileToFindReference(reference);
}
public void ReplaceToken(IndirectReference reference, IToken token)
{
// Using 0 position as it isn't written to stream and this value doesn't
// seem to be used by any callers. In future may need to revisit this.
overwrittenTokens[reference] = new ObjectToken(0, reference, token);
}
private ObjectToken BruteForceFileToFindReference(IndirectReference reference)
{
try

View File

@ -9,7 +9,7 @@
private const string SrgbIec61966OutputCondition = "sRGB IEC61966-2.1";
private const string RegistryName = "http://www.color.org";
public static ArrayToken GetOutputIntentsArray(Func<IToken, ObjectToken> objectWriter)
public static ArrayToken GetOutputIntentsArray(Func<IToken, IndirectReferenceToken> objectWriter)
{
var rgbColorCondition = new StringToken(SrgbIec61966OutputCondition);
@ -38,7 +38,7 @@
{NameToken.OutputConditionIdentifier, rgbColorCondition},
{NameToken.RegistryName, new StringToken(RegistryName)},
{NameToken.Info, rgbColorCondition},
{NameToken.DestOutputProfile, new IndirectReferenceToken(written.Number)}
{NameToken.DestOutputProfile, written}
}),
});
}

View File

@ -16,7 +16,7 @@
TransformationMatrix GetFontMatrix();
ObjectToken WriteFont(NameToken fontKeyName, Stream outputStream, BuilderContext context);
IndirectReferenceToken WriteFont(IPdfStreamWriter writer, NameToken fontKeyName);
byte GetValueForCharacter(char character);
}

View File

@ -55,7 +55,7 @@
return TransformationMatrix.FromValues(1/1000.0, 0, 0, 1/1000.0, 0, 0);
}
public ObjectToken WriteFont(NameToken fontKeyName, Stream outputStream, BuilderContext context)
public IndirectReferenceToken WriteFont(IPdfStreamWriter writer, NameToken fontKeyName)
{
var dictionary = new Dictionary<NameToken, IToken>
{
@ -68,7 +68,7 @@
var token = new DictionaryToken(dictionary);
var result = context.WriteObject(outputStream, token);
var result = writer.WriteToken(token);
return result;
}
@ -92,46 +92,5 @@
return result;
}
}
internal class BuilderContext
{
private readonly List<int> reservedNumbers = new List<int>();
public int CurrentNumber { get; private set; } = 1;
private readonly Dictionary<IndirectReference, long> objectOffsets = new Dictionary<IndirectReference, long>();
public IReadOnlyDictionary<IndirectReference, long> ObjectOffsets => objectOffsets;
public ObjectToken WriteObject(Stream stream, IToken token, int? reservedNumber = null)
{
int number;
if (reservedNumber.HasValue)
{
if (!reservedNumbers.Remove(reservedNumber.Value))
{
throw new InvalidOperationException();
}
number = reservedNumber.Value;
}
else
{
number = CurrentNumber++;
}
var reference = new IndirectReference(number, 0);
var obj = new ObjectToken(stream.Position, reference, token);
objectOffsets.Add(reference, obj.Position);
TokenWriter.WriteToken(obj, stream);
return obj;
}
public int ReserveNumber()
{
var reserved = CurrentNumber;
reservedNumbers.Add(reserved);
CurrentNumber++;
return reserved;
}
}
}

View File

@ -47,14 +47,14 @@
return TransformationMatrix.FromValues(1.0 / unitsPerEm, 0, 0, 1.0 / unitsPerEm, 0, 0);
}
public ObjectToken WriteFont(NameToken fontKeyName, Stream outputStream, BuilderContext context)
public IndirectReferenceToken WriteFont(IPdfStreamWriter writer, NameToken fontKeyName)
{
var newEncoding = new TrueTypeSubsetEncoding(characterMapping.Keys.ToList());
var subsetBytes = TrueTypeSubsetter.Subset(fontFileBytes.ToArray(), newEncoding);
var embeddedFile = DataCompresser.CompressToStream(subsetBytes);
var fileRef = context.WriteObject(outputStream, embeddedFile);
var fileRef = writer.WriteToken(embeddedFile);
var baseFont = NameToken.Create(font.TableRegister.NameTable.GetPostscriptName());
@ -76,7 +76,7 @@
{ NameToken.Descent, new NumericToken(Math.Round(hhead.Descent * scaling, 2)) },
{ NameToken.CapHeight, new NumericToken(90) },
{ NameToken.StemV, new NumericToken(90) },
{ NameToken.FontFile2, new IndirectReferenceToken(fileRef.Number) }
{ NameToken.FontFile2, fileRef }
};
var os2 = font.TableRegister.Os2Table;
@ -108,27 +108,27 @@
widths.Add(new NumericToken(width));
}
var descriptor = context.WriteObject(outputStream, new DictionaryToken(descriptorDictionary));
var descriptor = writer.WriteToken(new DictionaryToken(descriptorDictionary));
var toUnicodeCMap = ToUnicodeCMapBuilder.ConvertToCMapStream(characterMapping);
var toUnicodeStream = DataCompresser.CompressToStream(toUnicodeCMap);
var toUnicode = context.WriteObject(outputStream, toUnicodeStream);
var toUnicode = writer.WriteToken(toUnicodeStream);
var dictionary = new Dictionary<NameToken, IToken>
{
{ NameToken.Type, NameToken.Font },
{ NameToken.Subtype, NameToken.TrueType },
{ NameToken.BaseFont, baseFont },
{ NameToken.FontDescriptor, new IndirectReferenceToken(descriptor.Number) },
{ NameToken.FontDescriptor, descriptor },
{ NameToken.FirstChar, new NumericToken(0) },
{ NameToken.LastChar, new NumericToken(lastCharacter) },
{ NameToken.Widths, new ArrayToken(widths) },
{NameToken.ToUnicode, new IndirectReferenceToken(toUnicode.Number) }
{NameToken.ToUnicode, toUnicode }
};
var token = new DictionaryToken(dictionary);
var result = context.WriteObject(outputStream, token);
var result = writer.WriteToken(token);
return result;
}

View File

@ -0,0 +1,51 @@
namespace UglyToad.PdfPig.Writer
{
using System;
using System.Collections.Generic;
using System.IO;
using System.Text;
using Tokens;
internal interface IPdfStreamWriter : IDisposable
{
/// <summary>
///
/// </summary>
Stream Stream { get; }
/// <summary>
///
/// </summary>
/// <param name="token"></param>
/// <returns></returns>
IndirectReferenceToken WriteToken(IToken token);
/// <summary>
///
/// </summary>
/// <param name="token"></param>
/// <param name="indirectReference"></param>
/// <returns></returns>
IndirectReferenceToken WriteToken(IToken token, IndirectReferenceToken indirectReference);
/// <summary>
///
/// </summary>
/// <returns></returns>
IndirectReferenceToken ReserveObjectNumber();
/// <summary>
///
/// </summary>
/// <param name="version"></param>
void InitializePdf(decimal version);
/// <summary>
///
/// </summary>
/// <param name="catalogReference"></param>
/// <param name="documentInformationReference"></param>
void CompletePdf(IndirectReferenceToken catalogReference, IndirectReferenceToken documentInformationReference=null);
}
}

View File

@ -8,14 +8,14 @@ namespace UglyToad.PdfPig.Writer
{
internal static class PdfABaselineRuleBuilder
{
public static void Obey(Dictionary<NameToken, IToken> catalog, Func<IToken, ObjectToken> writerFunc,
public static void Obey(Dictionary<NameToken, IToken> catalog, Func<IToken, IndirectReferenceToken> writerFunc,
PdfDocumentBuilder.DocumentInformationBuilder documentInformationBuilder,
PdfAStandard archiveStandard)
{
catalog[NameToken.OutputIntents] = OutputIntentsFactory.GetOutputIntentsArray(writerFunc);
var xmpStream = XmpWriter.GenerateXmpStream(documentInformationBuilder, 1.7m, archiveStandard);
var xmpObj = writerFunc(xmpStream);
catalog[NameToken.Metadata] = new IndirectReferenceToken(xmpObj.Number);
catalog[NameToken.Metadata] = xmpObj;
}
}
}

View File

@ -0,0 +1,200 @@
namespace UglyToad.PdfPig.Writer
{
using Core;
using Graphics.Operations;
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Text;
using Tokens;
internal class PdfDedupStreamWriter : IPdfStreamWriter
{
public Stream Stream { get; }
private int CurrentNumber { get; set; } = 1;
private bool DisposeStream { get; set; }
private const decimal DefaultVersion = 1.2m;
private bool Initialized { get; set; }
private readonly Dictionary<IndirectReference, long> offsets = new Dictionary<IndirectReference, long>();
private readonly Dictionary<byte[], IndirectReferenceToken> hashes = new (new FNVByteComparison());
public PdfDedupStreamWriter(Stream stream, bool dispose)
{
Stream = stream;
DisposeStream = dispose;
}
private MemoryStream ms = new MemoryStream();
public IndirectReferenceToken WriteToken(IToken token)
{
if (!Initialized)
{
InitializePdf(DefaultVersion);
}
ms.SetLength(0);
TokenWriter.WriteToken(token, ms);
var contents = ms.ToArray();
if (hashes.TryGetValue(contents, out var value))
{
return value;
}
var ir = ReserveObjectNumber();
hashes.Add(contents, ir);
offsets.Add(ir.Data, Stream.Position);
TokenWriter.WriteObject(ir.Data.ObjectNumber, ir.Data.Generation, contents, Stream);
return ir;
}
public IndirectReferenceToken WriteToken(IToken token, IndirectReferenceToken indirectReference)
{
if (!Initialized)
{
InitializePdf(DefaultVersion);
}
ms.SetLength(0);
TokenWriter.WriteToken(token, ms);
var contents = ms.ToArray();
hashes.Add(contents, indirectReference);
offsets.Add(indirectReference.Data, Stream.Position);
TokenWriter.WriteObject(indirectReference.Data.ObjectNumber, indirectReference.Data.Generation, contents, Stream);
return indirectReference;
}
public IndirectReferenceToken ReserveObjectNumber()
{
return new IndirectReferenceToken(new IndirectReference(CurrentNumber++, 0));
}
public void InitializePdf(decimal version)
{
WriteString($"%PDF-{version.ToString("0.0", CultureInfo.InvariantCulture)}", Stream);
Stream.WriteText("%");
Stream.WriteByte(169);
Stream.WriteByte(205);
Stream.WriteByte(196);
Stream.WriteByte(210);
Stream.WriteNewLine();
Initialized = true;
}
public void CompletePdf(IndirectReferenceToken catalogReference, IndirectReferenceToken documentInformationReference=null)
{
TokenWriter.WriteCrossReferenceTable(offsets, catalogReference.Data, Stream, documentInformationReference?.Data);
}
private static void WriteString(string text, Stream stream)
{
var bytes = OtherEncodings.StringAsLatin1Bytes(text);
stream.Write(bytes, 0, bytes.Length);
stream.WriteNewLine();
}
public void Dispose()
{
if (DisposeStream)
{
Stream.Dispose();
}
hashes.Clear();
}
class FNVByteComparison : IEqualityComparer<byte[]>
{
public bool Equals(byte[] x, byte[] y)
{
if (x.Length != y.Length)
{
return false;
}
for (var i = 0; i < x.Length; i++)
{
if (x[i] != y[i])
{
return false;
}
}
return true;
}
public int GetHashCode(byte[] obj)
{
var hash = FnvHash.Create();
foreach (var t in obj)
{
hash.Combine(t);
}
return hash.HashCode;
}
}
/// <summary>
/// A hash combiner that is implemented with the Fowler/Noll/Vo algorithm (FNV-1a). This is a mutable struct for performance reasons.
/// </summary>
struct FnvHash
{
/// <summary>
/// The starting point of the FNV hash.
/// </summary>
public const int Offset = unchecked((int)2166136261);
/// <summary>
/// The prime number used to compute the FNV hash.
/// </summary>
private const int Prime = 16777619;
/// <summary>
/// Gets the current result of the hash function.
/// </summary>
public int HashCode { get; private set; }
/// <summary>
/// Creates a new FNV hash initialized to <see cref="Offset"/>.
/// </summary>
public static FnvHash Create()
{
var result = new FnvHash();
result.HashCode = Offset;
return result;
}
/// <summary>
/// Adds the specified byte to the hash.
/// </summary>
/// <param name="data">The byte to hash.</param>
public void Combine(byte data)
{
unchecked
{
HashCode ^= data;
HashCode *= Prime;
}
}
/// <summary>
/// Adds the specified integer to this hash, in little-endian order.
/// </summary>
/// <param name="data">The integer to hash.</param>
public void Combine(int data)
{
Combine(unchecked((byte)data));
Combine(unchecked((byte)(data >> 8)));
Combine(unchecked((byte)(data >> 16)));
Combine(unchecked((byte)(data >> 24)));
}
}
}
}

View File

@ -14,6 +14,7 @@ namespace UglyToad.PdfPig.Writer
using Parser.Parts;
using PdfPig.Fonts.Standard14Fonts;
using PdfPig.Fonts.TrueType.Parser;
using System.Runtime.CompilerServices;
using Tokenization.Scanner;
using Tokens;
@ -22,9 +23,9 @@ namespace UglyToad.PdfPig.Writer
/// <summary>
/// Provides methods to construct new PDF documents.
/// </summary>
public class PdfDocumentBuilder
public class PdfDocumentBuilder : IDisposable
{
private readonly BuilderContext context = new BuilderContext();
private readonly IPdfStreamWriter context;
private readonly Dictionary<int, PdfPageBuilder> pages = new Dictionary<int, PdfPageBuilder>();
private readonly Dictionary<Guid, FontStored> fonts = new Dictionary<Guid, FontStored>();
private readonly Dictionary<Guid, ImageStored> images = new Dictionary<Guid, ImageStored>();
@ -63,6 +64,36 @@ namespace UglyToad.PdfPig.Writer
/// </summary>
internal IReadOnlyDictionary<Guid, ImageStored> Images => images;
/// <summary>
/// Creates a document builder keeping resources in memory.
/// </summary>
public PdfDocumentBuilder()
{
context = new PdfStreamWriter(new MemoryStream(), true);
context.InitializePdf(1.7m);
}
/// <summary>
/// Creates a document builder using the supplied stream.
/// </summary>
/// <param name="stream">Steam to write pdf to.</param>
/// <param name="disposeStream">If stream should be disposed when builder is.</param>
/// <param name="type">Type of pdf stream writer to use</param>
public PdfDocumentBuilder(Stream stream, bool disposeStream=false, PdfWriterType type=PdfWriterType.Default)
{
switch (type)
{
case PdfWriterType.ObjectInMemoryDedup:
context = new PdfDedupStreamWriter(stream, disposeStream);
break;
default:
context = new PdfStreamWriter(stream, disposeStream);
break;
}
context.InitializePdf(1.7m);
}
/// <summary>
/// Determines whether the bytes of the TrueType font file provided can be used in a PDF document.
/// </summary>
@ -158,15 +189,10 @@ namespace UglyToad.PdfPig.Writer
return added;
}
internal IndirectReference AddImage(DictionaryToken dictionary, byte[] bytes)
internal IndirectReferenceToken AddImage(DictionaryToken dictionary, byte[] bytes)
{
var reserved = context.ReserveNumber();
var stored = new ImageStored(dictionary, bytes, reserved);
images[stored.Id] = stored;
return new IndirectReference(reserved, 0);
var streamToken = new StreamToken(dictionary, bytes);
return context.WriteToken(streamToken);
}
/// <summary>
@ -235,275 +261,310 @@ namespace UglyToad.PdfPig.Writer
return AddPage(rectangle.Width, rectangle.Height);
}
internal IToken CopyToken(IPdfTokenScanner source, IToken token)
{
if (!existingCopies.TryGetValue(source, out var refs))
{
refs = new Dictionary<IndirectReference, IndirectReferenceToken>();
existingCopies.Add(source, refs);
}
return WriterUtil.CopyToken(context, token, source, refs);
}
private readonly ConditionalWeakTable<IPdfTokenScanner, Dictionary<IndirectReference, IndirectReferenceToken>> existingCopies =
new ConditionalWeakTable<IPdfTokenScanner, Dictionary<IndirectReference, IndirectReferenceToken>>();
/// <summary>
/// Add a new page with the specified size, this page will be included in the output when <see cref="Build"/> is called.
/// </summary>
/// <param name="document">Source document.</param>
/// <param name="pageNumber">Page to copy.</param>
/// <returns>A builder for editing the page.</returns>
public PdfPageBuilder AddPage(PdfDocument document, int pageNumber)
{
if (!existingCopies.TryGetValue(document.Structure.TokenScanner, out var refs))
{
refs = new Dictionary<IndirectReference, IndirectReferenceToken>();
existingCopies.Add(document.Structure.TokenScanner, refs);
}
int i = 1;
foreach (var (pageDict, parents) in WriterUtil.WalkTree(document.Structure.Catalog.PageTree))
{
if (i == pageNumber)
{
// copy content streams
var streams = new List<PdfPageBuilder.CopiedContentStream>();
if (pageDict.ContainsKey(NameToken.Contents))
{
var token = pageDict.Data[NameToken.Contents];
if (token is ArrayToken array)
{
foreach (var item in array.Data)
{
if (item is IndirectReferenceToken ir)
{
streams.Add(new PdfPageBuilder.CopiedContentStream(
WriterUtil.CopyToken(context, ir, document.Structure.TokenScanner, refs) as IndirectReferenceToken));
}
}
}
else if (token is IndirectReferenceToken ir)
{
streams.Add(new PdfPageBuilder.CopiedContentStream(
WriterUtil.CopyToken(context, ir, document.Structure.TokenScanner, refs) as IndirectReferenceToken));
}
}
// manually copy page dict / resources as we need to modify some
var copiedPageDict = new Dictionary<NameToken, IToken>();
Dictionary<NameToken, IToken> resources = new Dictionary<NameToken, IToken>();
// just put all parent resources into new page
foreach (var dict in parents)
{
if (dict.TryGet(NameToken.Resources, out var token))
{
CopyResourceDict(token, resources);
}
}
foreach (var kvp in pageDict.Data)
{
if (kvp.Key == NameToken.Contents || kvp.Key == NameToken.Parent || kvp.Key == NameToken.Type)
{
continue;
}
if (kvp.Key == NameToken.Resources)
{
CopyResourceDict(kvp.Value, resources);
continue;
}
copiedPageDict[NameToken.Create(kvp.Key)] =
WriterUtil.CopyToken(context, kvp.Value, document.Structure.TokenScanner, refs);
}
var builder = new PdfPageBuilder(pages.Count + 1, this, streams, resources, copiedPageDict);
pages[builder.PageNumber] = builder;
return builder;
}
i++;
}
throw new KeyNotFoundException($"Page {pageNumber} was not found in the source document.");
void CopyResourceDict(IToken token, Dictionary<NameToken, IToken> destinationDict)
{
DictionaryToken dict = GetRemoteDict(token);
if (dict == null)
{
return;
}
foreach (var item in dict.Data)
{
if (!destinationDict.ContainsKey(NameToken.Create(item.Key)))
{
if (item.Value is IndirectReferenceToken ir)
{
destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, document.Structure.TokenScanner.Get(ir.Data).Data, document.Structure.TokenScanner, refs);
}
else
{
destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, item.Value, document.Structure.TokenScanner, refs);
}
continue;
}
var subDict = GetRemoteDict(item.Value);
var destSubDict = destinationDict[NameToken.Create(item.Key)] as DictionaryToken;
if (destSubDict == null || subDict == null)
{
// not a dict.. just overwrite with more important one? should maybe check arrays?
destinationDict[NameToken.Create(item.Key)] = WriterUtil.CopyToken(context, item.Value, document.Structure.TokenScanner, refs);
continue;
}
foreach (var subItem in subDict.Data)
{
// last copied most important important
destinationDict[NameToken.Create(subItem.Key)] = WriterUtil.CopyToken(context, subItem.Value,
document.Structure.TokenScanner, refs);
}
}
}
DictionaryToken GetRemoteDict(IToken token)
{
DictionaryToken dict = null;
if (token is IndirectReferenceToken ir)
{
dict = document.Structure.TokenScanner.Get(ir.Data).Data as DictionaryToken;
}
else if (token is DictionaryToken dt)
{
dict = dt;
}
return dict;
}
}
private void CompleteDocument()
{
var fontsWritten = new Dictionary<Guid, IndirectReferenceToken>();
foreach (var font in fonts)
{
var fontObj = font.Value.FontProgram.WriteFont(context, font.Value.FontKey.Name);
fontsWritten.Add(font.Key, fontObj);
}
var procSet = new List<NameToken>
{
NameToken.Create("PDF"),
NameToken.Text,
NameToken.ImageB,
NameToken.ImageC,
NameToken.ImageI
};
var resources = new Dictionary<NameToken, IToken>
{
{ NameToken.ProcSet, new ArrayToken(procSet) }
};
if (fontsWritten.Count > 0)
{
var fontsDictionary = new DictionaryToken(fontsWritten.Select(x =>
(fonts[x.Key].FontKey.Name, (IToken)x.Value))
.ToDictionary(x => x.Item1, x => x.Item2));
var fontsDictionaryRef = context.WriteToken(fontsDictionary);
resources.Add(NameToken.Font, fontsDictionaryRef);
}
var parentIndirect = context.ReserveObjectNumber();
var pageReferences = new List<IndirectReferenceToken>();
foreach (var page in pages)
{
var pageDictionary = page.Value.additionalPageProperties;
pageDictionary[NameToken.Type] = NameToken.Page;
pageDictionary[NameToken.Parent] = parentIndirect;
if (!pageDictionary.ContainsKey(NameToken.MediaBox))
{
pageDictionary[NameToken.MediaBox] = RectangleToArray(page.Value.PageSize);
}
pageDictionary[NameToken.Resources] = new DictionaryToken(page.Value.Resources);
if (page.Value.contentStreams.Count == 1)
{
pageDictionary[NameToken.Contents] = page.Value.contentStreams[0].Write(context);
}
else
{
var streams = new List<IToken>();
foreach (var stream in page.Value.contentStreams)
{
streams.Add(stream.Write(context));
}
pageDictionary[NameToken.Contents] = new ArrayToken(streams);
}
var pageRef = context.WriteToken( new DictionaryToken(pageDictionary));
pageReferences.Add(pageRef);
}
var pagesDictionaryData = new Dictionary<NameToken, IToken>
{
{NameToken.Type, NameToken.Pages},
{NameToken.Kids, new ArrayToken(pageReferences)},
{NameToken.Resources, new DictionaryToken(resources)},
{NameToken.Count, new NumericToken(pageReferences.Count)}
};
var pagesDictionary = new DictionaryToken(pagesDictionaryData);
var pagesRef = context.WriteToken(pagesDictionary, parentIndirect);
var catalogDictionary = new Dictionary<NameToken, IToken>
{
{NameToken.Type, NameToken.Catalog},
{NameToken.Pages, pagesRef}
};
if (ArchiveStandard != PdfAStandard.None)
{
Func<IToken, IndirectReferenceToken> writerFunc = x => context.WriteToken(x);
PdfABaselineRuleBuilder.Obey(catalogDictionary, writerFunc, DocumentInformation, ArchiveStandard);
switch (ArchiveStandard)
{
case PdfAStandard.A1A:
PdfA1ARuleBuilder.Obey(catalogDictionary);
break;
case PdfAStandard.A2B:
break;
case PdfAStandard.A2A:
PdfA1ARuleBuilder.Obey(catalogDictionary);
break;
}
}
var catalog = new DictionaryToken(catalogDictionary);
var catalogRef = context.WriteToken(catalog);
var informationReference = default(IndirectReferenceToken);
if (IncludeDocumentInformation)
{
var informationDictionary = DocumentInformation.ToDictionary();
if (informationDictionary.Count > 0)
{
var dictionary = new DictionaryToken(informationDictionary);
informationReference = context.WriteToken(dictionary);
}
}
context.CompletePdf(catalogRef, informationReference);
}
/// <summary>
/// Builds a PDF document from the current content of this builder and its pages.
/// </summary>
/// <returns>The bytes of the resulting PDF document.</returns>
public byte[] Build()
{
var fontsWritten = new Dictionary<Guid, ObjectToken>();
using (var memory = new MemoryStream())
CompleteDocument();
if (context.Stream is MemoryStream ms)
{
// Header
WriteString("%PDF-1.7", memory);
// Files with binary data should contain a 2nd comment line followed by 4 bytes with values > 127
memory.WriteText("%");
memory.WriteByte(169);
memory.WriteByte(205);
memory.WriteByte(196);
memory.WriteByte(210);
memory.WriteNewLine();
// Body
foreach (var font in fonts)
{
var fontObj = font.Value.FontProgram.WriteFont(font.Value.FontKey.Name, memory, context);
fontsWritten.Add(font.Key, fontObj);
}
foreach (var image in images)
{
var streamToken = new StreamToken(image.Value.StreamDictionary, image.Value.StreamData);
context.WriteObject(memory, streamToken, image.Value.ObjectNumber);
}
foreach (var tokenSet in unwrittenTokens)
{
context.WriteObject(memory, tokenSet.Value, (int)tokenSet.Key.Data.ObjectNumber);
}
var procSet = new List<NameToken>
{
NameToken.Create("PDF"),
NameToken.Text,
NameToken.ImageB,
NameToken.ImageC,
NameToken.ImageI
};
var resources = new Dictionary<NameToken, IToken>
{
{ NameToken.ProcSet, new ArrayToken(procSet) }
};
if (fontsWritten.Count > 0)
{
var fontsDictionary = new DictionaryToken(fontsWritten.Select(x => (fonts[x.Key].FontKey.Name, (IToken)new IndirectReferenceToken(x.Value.Number)))
.ToDictionary(x => x.Item1, x => x.Item2));
resources.Add(NameToken.Font, fontsDictionary);
}
var reserved = context.ReserveNumber();
var parentIndirect = new IndirectReferenceToken(new IndirectReference(reserved, 0));
var pageReferences = new List<IndirectReferenceToken>();
foreach (var page in pages)
{
var individualResources = new Dictionary<NameToken, IToken>(resources);
var pageDictionary = new Dictionary<NameToken, IToken>
{
{NameToken.Type, NameToken.Page},
{NameToken.MediaBox, RectangleToArray(page.Value.PageSize)},
{NameToken.Parent, parentIndirect}
};
if (page.Value.Resources.Count > 0)
{
foreach (var kvp in page.Value.Resources)
{
var value = kvp.Value;
if (individualResources.TryGetValue(kvp.Key, out var pageToken))
{
if (pageToken is DictionaryToken leftDictionary && value is DictionaryToken rightDictionary)
{
var merged = leftDictionary.Data.ToDictionary(k => NameToken.Create(k.Key), v => v.Value);
foreach (var set in rightDictionary.Data)
{
merged[NameToken.Create(set.Key)] = set.Value;
}
value = new DictionaryToken(merged);
}
// Else override
}
individualResources[kvp.Key] = value;
}
}
pageDictionary[NameToken.Resources] = new DictionaryToken(individualResources);
if (page.Value.ContentStreams.Count == 1)
{
var contentStream = WriteContentStream(page.Value.CurrentStream.Operations);
var contentStreamObj = context.WriteObject(memory, contentStream);
pageDictionary[NameToken.Contents] = new IndirectReferenceToken(contentStreamObj.Number);
}
else if (page.Value.ContentStreams.Count > 1)
{
var streamTokens = page.Value.ContentStreams.Select(contentStream =>
{
var streamToken = WriteContentStream(contentStream.Operations);
var contentStreamObj = context.WriteObject(memory, streamToken);
return new IndirectReferenceToken(contentStreamObj.Number);
}).ToList();
pageDictionary[NameToken.Contents] = new ArrayToken(streamTokens);
}
var pageRef = context.WriteObject(memory, new DictionaryToken(pageDictionary));
pageReferences.Add(new IndirectReferenceToken(pageRef.Number));
}
var pagesDictionaryData = new Dictionary<NameToken, IToken>
{
{NameToken.Type, NameToken.Pages},
{NameToken.Kids, new ArrayToken(pageReferences)},
{NameToken.Count, new NumericToken(pageReferences.Count)}
};
var pagesDictionary = new DictionaryToken(pagesDictionaryData);
var pagesRef = context.WriteObject(memory, pagesDictionary, reserved);
var catalogDictionary = new Dictionary<NameToken, IToken>
{
{NameToken.Type, NameToken.Catalog},
{NameToken.Pages, new IndirectReferenceToken(pagesRef.Number)}
};
if (ArchiveStandard != PdfAStandard.None)
{
Func<IToken, ObjectToken> writerFunc = x => context.WriteObject(memory, x);
PdfABaselineRuleBuilder.Obey(catalogDictionary, writerFunc, DocumentInformation, ArchiveStandard);
switch (ArchiveStandard)
{
case PdfAStandard.A1A:
PdfA1ARuleBuilder.Obey(catalogDictionary);
break;
case PdfAStandard.A2B:
break;
case PdfAStandard.A2A:
PdfA1ARuleBuilder.Obey(catalogDictionary);
break;
}
}
var catalog = new DictionaryToken(catalogDictionary);
var catalogRef = context.WriteObject(memory, catalog);
var informationReference = default(IndirectReference?);
if (IncludeDocumentInformation)
{
var informationDictionary = DocumentInformation.ToDictionary();
if (informationDictionary.Count > 0)
{
var dictionary = new DictionaryToken(informationDictionary);
informationReference = context.WriteObject(memory, dictionary).Number;
}
}
TokenWriter.WriteCrossReferenceTable(context.ObjectOffsets, catalogRef, memory, informationReference);
return memory.ToArray();
}
}
/// <summary>
/// The purpose of this method is to resolve indirect reference. That mean copy the reference's content to the new document's stream
/// and replace the indirect reference with the correct/new one
/// </summary>
/// <param name="tokenToCopy">Token to inspect for reference</param>
/// <param name="tokenScanner">scanner get the content from the original document</param>
/// <returns>A reference of the token that was copied. With all the reference updated</returns>
internal IToken CopyToken(IToken tokenToCopy, IPdfTokenScanner tokenScanner)
{
// This token need to be deep copied, because they could contain reference. So we have to update them.
switch (tokenToCopy)
{
case DictionaryToken dictionaryToken:
{
var newContent = new Dictionary<NameToken, IToken>();
foreach (var setPair in dictionaryToken.Data)
{
var name = setPair.Key;
var token = setPair.Value;
newContent.Add(NameToken.Create(name), CopyToken(token, tokenScanner));
}
return new DictionaryToken(newContent);
}
case ArrayToken arrayToken:
{
var newArray = new List<IToken>(arrayToken.Length);
foreach (var token in arrayToken.Data)
{
newArray.Add(CopyToken(token, tokenScanner));
}
return new ArrayToken(newArray);
}
case IndirectReferenceToken referenceToken:
{
var tokenObject = DirectObjectFinder.Get<IToken>(referenceToken.Data, tokenScanner);
Debug.Assert(!(tokenObject is IndirectReferenceToken));
var newToken = CopyToken(tokenObject, tokenScanner);
var reserved = context.ReserveNumber();
var newReference = new IndirectReferenceToken(new IndirectReference(reserved, 0));
unwrittenTokens.Add(newReference, newToken);
return newReference;
}
case StreamToken streamToken:
{
var properties = CopyToken(streamToken.StreamDictionary, tokenScanner) as DictionaryToken;
Debug.Assert(properties != null);
var bytes = streamToken.Data;
return new StreamToken(properties, bytes);
}
case ObjectToken _:
{
// Since we don't write token directly to the stream.
// We can't know the offset. Therefore the token would be invalid
throw new NotSupportedException("Copying a Object token is not supported");
}
return ms.ToArray();
}
return tokenToCopy;
}
private static StreamToken WriteContentStream(IReadOnlyList<IGraphicsStateOperation> content)
{
using (var memoryStream = new MemoryStream())
if (!context.Stream.CanSeek)
{
foreach (var operation in content)
{
operation.Write(memoryStream);
}
throw new InvalidOperationException("PdfDocument.Build() called with non-seekable stream.");
}
var bytes = memoryStream.ToArray();
var stream = DataCompresser.CompressToStream(bytes);
return stream;
using (var temp = new MemoryStream())
{
context.Stream.Seek(0, SeekOrigin.Begin);
context.Stream.CopyTo(temp);
return temp.ToArray();
}
}
@ -661,5 +722,13 @@ namespace UglyToad.PdfPig.Writer
return result;
}
}
/// <summary>
/// Disposes underlying stream if set to do so.
/// </summary>
public void Dispose()
{
context.Dispose();
}
}
}

View File

@ -129,7 +129,21 @@
{
const bool isLenientParsing = false;
var documentBuilder = new DocumentMerger(output);
var writer = new PdfStreamWriter(output, false);
var documentBuilder = new DocumentMerger(writer);
var maxVersion = 1.2m;
var infos = new List<(CoreTokenScanner CoreScanner, HeaderVersion Version)>();
foreach (var fileIndex in Enumerable.Range(0, files.Count))
{
var inputBytes = files[fileIndex];
var coreScanner = new CoreTokenScanner(inputBytes);
var version = FileHeaderParser.Parse(coreScanner, isLenientParsing, Log);
maxVersion = Math.Max(maxVersion, version.Version);
infos.Add((coreScanner, version));
}
writer.InitializePdf(maxVersion);
foreach (var fileIndex in Enumerable.Range(0, files.Count))
{
@ -140,9 +154,7 @@
}
var inputBytes = files[fileIndex];
var coreScanner = new CoreTokenScanner(inputBytes);
var version = FileHeaderParser.Parse(coreScanner, isLenientParsing, Log);
var (coreScanner, version) = infos[fileIndex];
var crossReferenceParser = new CrossReferenceParser(Log, new XrefOffsetValidator(Log),
new Parser.Parts.CrossReference.CrossReferenceStreamParser(FilterProvider));
@ -165,7 +177,7 @@
var documentCatalog = CatalogFactory.Create(crossReference.Trailer.Root, catalogDictionaryToken, pdfScanner, isLenientParsing);
documentBuilder.AppendDocument(documentCatalog, version.Version, pdfScanner, pages);
documentBuilder.AppendDocument(documentCatalog, pdfScanner, pages);
}
documentBuilder.Build();
@ -201,24 +213,21 @@
private class DocumentMerger
{
private const decimal DefaultVersion = 1.2m;
private const int ARTIFICIAL_NODE_LIMIT = 100;
private readonly PdfStreamWriter context;
private readonly IPdfStreamWriter context;
private readonly List<IndirectReferenceToken> pagesTokenReferences = new List<IndirectReferenceToken>();
private readonly IndirectReferenceToken rootPagesReference;
private decimal currentVersion = DefaultVersion;
private int pageCount = 0;
public DocumentMerger(Stream baseStream)
public DocumentMerger(IPdfStreamWriter writer)
{
context = new PdfStreamWriter(baseStream, false);
rootPagesReference = context.ReserveNumberToken();
context = writer;
rootPagesReference = context.ReserveObjectNumber();
}
public void AppendDocument(Catalog catalog, decimal version, IPdfTokenScanner tokenScanner, IReadOnlyList<int> pages)
public void AppendDocument(Catalog catalog, IPdfTokenScanner tokenScanner, IReadOnlyList<int> pages)
{
IEnumerable<int> pageIndices;
if (pages == null)
@ -240,11 +249,9 @@
pageIndices = pages;
}
currentVersion = Math.Max(version, currentVersion);
var referencesFromDocument = new Dictionary<IndirectReference, IndirectReferenceToken>();
var currentNodeReference = context.ReserveNumberToken();
var currentNodeReference = context.ReserveObjectNumber();
var pagesReferences = new List<IndirectReferenceToken>();
var resources = new Dictionary<string, IToken>();
@ -323,7 +330,8 @@
}
var pagesDictionary = new DictionaryToken(newPagesNode);
pagesTokenReferences.Add(context.WriteToken(pagesDictionary, (int)currentNodeReference.Data.ObjectNumber));
context.WriteToken(pagesDictionary, currentNodeReference);
pagesTokenReferences.Add(currentNodeReference);
pageCount += pagesReferences.Count;
};
@ -335,7 +343,7 @@
{
CreateTree();
currentNodeReference = context.ReserveNumberToken();
currentNodeReference = context.ReserveObjectNumber();
pagesReferences = new List<IndirectReferenceToken>();
resources = new Dictionary<string, IToken>();
}
@ -366,7 +374,7 @@
{ NameToken.Count, new NumericToken(pageCount) }
});
var pagesRef = context.WriteToken(pagesDictionary, (int)rootPagesReference.Data.ObjectNumber);
var pagesRef = context.WriteToken(pagesDictionary, rootPagesReference);
var catalog = new DictionaryToken(new Dictionary<NameToken, IToken>
{
@ -376,7 +384,7 @@
var catalogRef = context.WriteToken(catalog);
context.Flush(currentVersion, catalogRef);
context.CompletePdf(catalogRef);
Close();
}
@ -423,67 +431,7 @@
/// <returns>A reference of the token that was copied. With all the reference updated</returns>
private IToken CopyToken(IToken tokenToCopy, IPdfTokenScanner tokenScanner, IDictionary<IndirectReference, IndirectReferenceToken> referencesFromDocument)
{
// This token need to be deep copied, because they could contain reference. So we have to update them.
switch (tokenToCopy)
{
case DictionaryToken dictionaryToken:
{
var newContent = new Dictionary<NameToken, IToken>();
foreach (var setPair in dictionaryToken.Data)
{
var name = setPair.Key;
var token = setPair.Value;
newContent.Add(NameToken.Create(name), CopyToken(token, tokenScanner, referencesFromDocument));
}
return new DictionaryToken(newContent);
}
case ArrayToken arrayToken:
{
var newArray = new List<IToken>(arrayToken.Length);
foreach (var token in arrayToken.Data)
{
newArray.Add(CopyToken(token, tokenScanner, referencesFromDocument));
}
return new ArrayToken(newArray);
}
case IndirectReferenceToken referenceToken:
{
if (referencesFromDocument.TryGetValue(referenceToken.Data, out var newReferenceToken))
{
return newReferenceToken;
}
//we add the token to referencesFromDocument to prevent stackoverflow on references cycles
newReferenceToken = context.ReserveNumberToken();
referencesFromDocument.Add(referenceToken.Data, newReferenceToken);
var tokenObject = DirectObjectFinder.Get<IToken>(referenceToken.Data, tokenScanner);
Debug.Assert(!(tokenObject is IndirectReferenceToken));
var newToken = CopyToken(tokenObject, tokenScanner, referencesFromDocument);
context.WriteToken(newReferenceToken, newToken);
return newReferenceToken;
}
case StreamToken streamToken:
{
var properties = CopyToken(streamToken.StreamDictionary, tokenScanner, referencesFromDocument) as DictionaryToken;
Debug.Assert(properties != null);
var bytes = streamToken.Data;
return new StreamToken(properties, bytes);
}
case ObjectToken _:
{
// Since we don't write token directly to the stream.
// We can't know the offset. Therefore the token would be invalid
throw new NotSupportedException("Copying a Object token is not supported");
}
}
return tokenToCopy;
return WriterUtil.CopyToken(context, tokenToCopy, tokenScanner, referencesFromDocument);
}
}
}

View File

@ -29,7 +29,9 @@
public class PdfPageBuilder
{
private readonly PdfDocumentBuilder documentBuilder;
private readonly List<ContentStream> contentStreams;
private IPageContentStream currentStream;
internal readonly List<IPageContentStream> contentStreams;
internal readonly Dictionary<NameToken, IToken> additionalPageProperties = new Dictionary<NameToken, IToken>();
private readonly Dictionary<NameToken, IToken> resourcesDictionary = new Dictionary<NameToken, IToken>();
//a sequence number of ShowText operation to determine whether letters belong to same operation or not (letters that belong to different operations have less changes to belong to same word)
@ -52,34 +54,44 @@
/// <summary>
/// Access to the underlying data structures for advanced use cases.
/// </summary>
public ContentStream CurrentStream { get; private set; }
public IContentStream CurrentStream => currentStream;
/// <summary>
/// Access to
/// </summary>
public IReadOnlyList<ContentStream> ContentStreams { get; }
public IReadOnlyList<IContentStream> ContentStreams => contentStreams;
internal PdfPageBuilder(int number, PdfDocumentBuilder documentBuilder)
{
this.documentBuilder = documentBuilder ?? throw new ArgumentNullException(nameof(documentBuilder));
PageNumber = number;
CurrentStream = new ContentStream();
ContentStreams = contentStreams = new List<ContentStream>()
{
CurrentStream
};
currentStream = new DefaultContentStream();
contentStreams = new List<IPageContentStream>() {currentStream};
}
internal PdfPageBuilder(int number, PdfDocumentBuilder documentBuilder, IEnumerable<CopiedContentStream> copied,
Dictionary<NameToken, IToken> existingResources, Dictionary<NameToken, IToken> pageDict)
{
this.documentBuilder = documentBuilder ?? throw new ArgumentNullException(nameof(documentBuilder));
PageNumber = number;
contentStreams = new List<IPageContentStream>();
contentStreams.AddRange(copied);
currentStream = new DefaultContentStream();
contentStreams.Add(currentStream);
additionalPageProperties =pageDict ?? new Dictionary<NameToken, IToken>();
resourcesDictionary = existingResources;
}
/// <summary>
/// Allow to append a new content stream before the current one and select it
/// </summary>
public void NewContentStreamBefore()
{
var index = Math.Max(contentStreams.IndexOf(CurrentStream) - 1, 0);
var index = Math.Max(contentStreams.IndexOf(currentStream) - 1, 0);
CurrentStream = new ContentStream();
contentStreams.Insert(index, CurrentStream);
currentStream = new DefaultContentStream();
contentStreams.Insert(index, currentStream);
}
/// <summary>
@ -87,10 +99,10 @@
/// </summary>
public void NewContentStreamAfter()
{
var index = Math.Min(contentStreams.IndexOf(CurrentStream) + 1, contentStreams.Count);
var index = Math.Min(contentStreams.IndexOf(currentStream) + 1, contentStreams.Count);
CurrentStream = new ContentStream();
contentStreams.Insert(index, CurrentStream);
currentStream = new DefaultContentStream();
contentStreams.Insert(index, currentStream);
}
/// <summary>
@ -99,12 +111,12 @@
/// <param name="index">index of the content stream to be selected</param>
public void SelectContentStream(int index)
{
if (index < 0 || index >= ContentStreams.Count)
if (index < 0 || index >= contentStreams.Count)
{
throw new IndexOutOfRangeException(nameof(index));
}
CurrentStream = ContentStreams[index];
currentStream = contentStreams[index];
}
/// <summary>
@ -117,16 +129,16 @@
{
if (lineWidth != 1)
{
CurrentStream.Add(new SetLineWidth(lineWidth));
currentStream.Add(new SetLineWidth(lineWidth));
}
CurrentStream.Add(new BeginNewSubpath((decimal)from.X, (decimal)from.Y));
CurrentStream.Add(new AppendStraightLineSegment((decimal)to.X, (decimal)to.Y));
CurrentStream.Add(StrokePath.Value);
currentStream.Add(new BeginNewSubpath((decimal)from.X, (decimal)from.Y));
currentStream.Add(new AppendStraightLineSegment((decimal)to.X, (decimal)to.Y));
currentStream.Add(StrokePath.Value);
if (lineWidth != 1)
{
CurrentStream.Add(new SetLineWidth(1));
currentStream.Add(new SetLineWidth(1));
}
}
@ -142,23 +154,23 @@
{
if (lineWidth != 1)
{
CurrentStream.Add(new SetLineWidth(lineWidth));
currentStream.Add(new SetLineWidth(lineWidth));
}
CurrentStream.Add(new AppendRectangle((decimal)position.X, (decimal)position.Y, width, height));
currentStream.Add(new AppendRectangle((decimal)position.X, (decimal)position.Y, width, height));
if (fill)
{
CurrentStream.Add(FillPathEvenOddRuleAndStroke.Value);
currentStream.Add(FillPathEvenOddRuleAndStroke.Value);
}
else
{
CurrentStream.Add(StrokePath.Value);
currentStream.Add(StrokePath.Value);
}
if (lineWidth != 1)
{
CurrentStream.Add(new SetLineWidth(lineWidth));
currentStream.Add(new SetLineWidth(lineWidth));
}
}
@ -170,8 +182,8 @@
/// <param name="b">Blue - 0 to 255</param>
public void SetStrokeColor(byte r, byte g, byte b)
{
CurrentStream.Add(Push.Value);
CurrentStream.Add(new SetStrokeColorDeviceRgb(RgbToDecimal(r), RgbToDecimal(g), RgbToDecimal(b)));
currentStream.Add(Push.Value);
currentStream.Add(new SetStrokeColorDeviceRgb(RgbToDecimal(r), RgbToDecimal(g), RgbToDecimal(b)));
}
/// <summary>
@ -182,8 +194,8 @@
/// <param name="b">Blue - 0 to 1</param>
internal void SetStrokeColorExact(decimal r, decimal g, decimal b)
{
CurrentStream.Add(Push.Value);
CurrentStream.Add(new SetStrokeColorDeviceRgb(CheckRgbDecimal(r, nameof(r)),
currentStream.Add(Push.Value);
currentStream.Add(new SetStrokeColorDeviceRgb(CheckRgbDecimal(r, nameof(r)),
CheckRgbDecimal(g, nameof(g)), CheckRgbDecimal(b, nameof(b))));
}
@ -195,8 +207,8 @@
/// <param name="b">Blue - 0 to 255</param>
public void SetTextAndFillColor(byte r, byte g, byte b)
{
CurrentStream.Add(Push.Value);
CurrentStream.Add(new SetNonStrokeColorDeviceRgb(RgbToDecimal(r), RgbToDecimal(g), RgbToDecimal(b)));
currentStream.Add(Push.Value);
currentStream.Add(new SetNonStrokeColorDeviceRgb(RgbToDecimal(r), RgbToDecimal(g), RgbToDecimal(b)));
}
/// <summary>
@ -204,7 +216,7 @@
/// </summary>
public void ResetColor()
{
CurrentStream.Add(Pop.Value);
currentStream.Add(Pop.Value);
}
/// <summary>
@ -294,15 +306,15 @@
var letters = DrawLetters(text, fontProgram, fm, fontSize, textMatrix);
CurrentStream.Add(BeginText.Value);
CurrentStream.Add(new SetFontAndSize(font.Name, fontSize));
CurrentStream.Add(new MoveToNextLineWithOffset((decimal)position.X, (decimal)position.Y));
currentStream.Add(BeginText.Value);
currentStream.Add(new SetFontAndSize(font.Name, fontSize));
currentStream.Add(new MoveToNextLineWithOffset((decimal)position.X, (decimal)position.Y));
var bytesPerShow = new List<byte>();
foreach (var letter in text)
{
if (char.IsWhiteSpace(letter))
{
CurrentStream.Add(new ShowText(bytesPerShow.ToArray()));
currentStream.Add(new ShowText(bytesPerShow.ToArray()));
bytesPerShow.Clear();
}
@ -312,10 +324,10 @@
if (bytesPerShow.Count > 0)
{
CurrentStream.Add(new ShowText(bytesPerShow.ToArray()));
currentStream.Add(new ShowText(bytesPerShow.ToArray()));
}
CurrentStream.Add(EndText.Value);
currentStream.Add(EndText.Value);
return letters;
}
@ -370,20 +382,20 @@
var key = NameToken.Create($"I{imageKey++}");
resourcesDictionary[NameToken.Xobject] = xobjects.With(key, new IndirectReferenceToken(reference));
resourcesDictionary[NameToken.Xobject] = xobjects.With(key, reference);
CurrentStream.Add(Push.Value);
currentStream.Add(Push.Value);
// This needs to be the placement rectangle.
CurrentStream.Add(new ModifyCurrentTransformationMatrix(new []
currentStream.Add(new ModifyCurrentTransformationMatrix(new []
{
(decimal)placementRectangle.Width, 0,
0, (decimal)placementRectangle.Height,
(decimal)placementRectangle.BottomLeft.X, (decimal)placementRectangle.BottomLeft.Y
}));
CurrentStream.Add(new InvokeNamedXObject(key));
CurrentStream.Add(Pop.Value);
currentStream.Add(new InvokeNamedXObject(key));
currentStream.Add(Pop.Value);
return new AddedImage(reference, info.Width, info.Height);
return new AddedImage(reference.Data, info.Width, info.Height);
}
/// <summary>
@ -411,16 +423,16 @@
resourcesDictionary[NameToken.Xobject] = xobjects.With(key, new IndirectReferenceToken(image.Reference));
CurrentStream.Add(Push.Value);
currentStream.Add(Push.Value);
// This needs to be the placement rectangle.
CurrentStream.Add(new ModifyCurrentTransformationMatrix(new[]
currentStream.Add(new ModifyCurrentTransformationMatrix(new[]
{
(decimal)placementRectangle.Width, 0,
0, (decimal)placementRectangle.Height,
(decimal)placementRectangle.BottomLeft.X, (decimal)placementRectangle.BottomLeft.Y
}));
CurrentStream.Add(new InvokeNamedXObject(key));
CurrentStream.Add(Pop.Value);
currentStream.Add(new InvokeNamedXObject(key));
currentStream.Add(Pop.Value);
}
/// <summary>
@ -487,20 +499,20 @@
var key = NameToken.Create($"I{imageKey++}");
resourcesDictionary[NameToken.Xobject] = xobjects.With(key, new IndirectReferenceToken(reference));
resourcesDictionary[NameToken.Xobject] = xobjects.With(key, reference);
CurrentStream.Add(Push.Value);
currentStream.Add(Push.Value);
// This needs to be the placement rectangle.
CurrentStream.Add(new ModifyCurrentTransformationMatrix(new[]
currentStream.Add(new ModifyCurrentTransformationMatrix(new[]
{
(decimal)placementRectangle.Width, 0,
0, (decimal)placementRectangle.Height,
(decimal)placementRectangle.BottomLeft.X, (decimal)placementRectangle.BottomLeft.Y
}));
CurrentStream.Add(new InvokeNamedXObject(key));
CurrentStream.Add(Pop.Value);
currentStream.Add(new InvokeNamedXObject(key));
currentStream.Add(Pop.Value);
return new AddedImage(reference, png.Width, png.Height);
return new AddedImage(reference.Data, png.Width, png.Height);
}
/// <summary>
@ -509,13 +521,12 @@
/// <param name="srcPage">Page to be copied</param>
public void CopyFrom(Page srcPage)
{
ContentStream destinationStream = null;
if (CurrentStream.Operations.Count > 0)
if (currentStream.Operations.Count > 0)
{
NewContentStreamAfter();
}
destinationStream = CurrentStream;
var destinationStream = currentStream;
if (!srcPage.Dictionary.TryGet(NameToken.Resources, srcPage.pdfScanner, out DictionaryToken srcResourceDictionary))
{
@ -547,7 +558,7 @@
{
// It means that this type of resources doesn't currently exist in the page, so we can copy it
// with no problem
resourcesDictionary[nameToken] = documentBuilder.CopyToken(set.Value, srcPage.pdfScanner);
resourcesDictionary[nameToken] = documentBuilder.CopyToken(srcPage.pdfScanner, set.Value);
continue;
}
@ -604,7 +615,7 @@
throw new PdfDocumentFormatException($"Expected a IndirectReferenceToken for the font, got a {fontSet.Value.GetType().Name}");
}
pageFontsDictionary.Add(NameToken.Create(fontName), documentBuilder.CopyToken(fontReferenceToken, srcPage.pdfScanner));
pageFontsDictionary.Add(NameToken.Create(fontName), documentBuilder.CopyToken(srcPage.pdfScanner, fontReferenceToken));
}
resourcesDictionary[NameToken.Font] = new DictionaryToken(pageFontsDictionary);
@ -657,7 +668,7 @@
throw new PdfDocumentFormatException($"Expected a IndirectReferenceToken for the XObject, got a {xobjectSet.Value.GetType().Name}");
}
pageXobjectsDictionary.Add(NameToken.Create(xobjectName), documentBuilder.CopyToken(fontReferenceToken, srcPage.pdfScanner));
pageXobjectsDictionary.Add(NameToken.Create(xobjectName), documentBuilder.CopyToken(srcPage.pdfScanner, fontReferenceToken));
}
resourcesDictionary[NameToken.Xobject] = new DictionaryToken(pageXobjectsDictionary);
@ -741,30 +752,90 @@
return value;
}
/// <summary>
/// Provides access to the raw page data structures for advanced editing use cases.
/// </summary>
public class ContentStream
internal interface IPageContentStream : IContentStream
{
/// <summary>
/// The operations making up the page content stream.
/// </summary>
public List<IGraphicsStateOperation> Operations { get; }
bool ReadOnly { get; }
void Add(IGraphicsStateOperation operation);
IndirectReferenceToken Write(IPdfStreamWriter writer);
}
/// <summary>
/// Create a new <see cref="ContentStream"/>.
/// <summary>
/// Provides access to the raw page data structures for advanced editing use cases.
/// </summary>
public interface IContentStream
{
/// <summary>
/// The operations making up the page content stream.
/// </summary>
internal ContentStream()
List<IGraphicsStateOperation> Operations { get; }
}
internal class DefaultContentStream : IPageContentStream
{
private readonly List<IGraphicsStateOperation> operations;
public DefaultContentStream() : this(new List<IGraphicsStateOperation>())
{
Operations = new List<IGraphicsStateOperation>();
}
public DefaultContentStream(List<IGraphicsStateOperation> operations)
{
this.operations = operations;
}
internal void Add(IGraphicsStateOperation newOperation)
public bool ReadOnly => false;
public void Add(IGraphicsStateOperation operation)
{
Operations.Add(newOperation);
operations.Add(operation);
}
public List<IGraphicsStateOperation> Operations => operations;
public IndirectReferenceToken Write(IPdfStreamWriter writer)
{
using (var memoryStream = new MemoryStream())
{
foreach (var operation in operations)
{
operation.Write(memoryStream);
}
var bytes = memoryStream.ToArray();
var stream = DataCompresser.CompressToStream(bytes);
return writer.WriteToken(stream);
}
}
}
internal class CopiedContentStream : IPageContentStream
{
private readonly IndirectReferenceToken token;
public CopiedContentStream(IndirectReferenceToken indirectReferenceToken)
{
token = indirectReferenceToken;
}
public bool ReadOnly => true;
public IndirectReferenceToken Write(IPdfStreamWriter writer)
{
return token;
}
public void Add(IGraphicsStateOperation operation)
{
throw new NotSupportedException("Writing to a copied content stream is not supported.");
}
public List<IGraphicsStateOperation> Operations =>
throw new NotSupportedException("Reading raw operations is not supported from a copied content stream.");
}
/// <summary>
/// A key representing an image available to use for the current document builder.
/// Create it by adding an image to a page using <see cref="AddJpeg(byte[],PdfRectangle)"/>.

View File

@ -11,17 +11,16 @@
/// <summary>
/// This class would lazily flush all token. Allowing us to make changes to references without need to rewrite the whole stream
/// </summary>
internal class PdfStreamWriter : IDisposable
internal class PdfStreamWriter : IPdfStreamWriter
{
private readonly List<int> reservedNumbers = new List<int>();
private Dictionary<IndirectReference, long> offsets = new Dictionary<IndirectReference, long>();
private const decimal DefaultVersion = 1.2m;
private bool Initialized { get; set; }
private int CurrentNumber { get; set; } = 1;
private readonly Dictionary<IndirectReferenceToken, IToken> tokenReferences = new Dictionary<IndirectReferenceToken, IToken>();
public Stream Stream { get; set; }
public int CurrentNumber { get; private set; } = 1;
public Stream Stream { get; private set; }
public bool DisposeStream { get; set; }
private bool DisposeStream { get; set; }
public PdfStreamWriter(Stream baseStream, bool disposeStream = true)
{
@ -34,13 +33,8 @@
DisposeStream = disposeStream;
}
public void Flush(decimal version, IndirectReferenceToken catalogReference)
public void InitializePdf(decimal version)
{
if (catalogReference == null)
{
throw new ArgumentNullException(nameof(catalogReference));
}
WriteString($"%PDF-{version.ToString("0.0", CultureInfo.InvariantCulture)}", Stream);
Stream.WriteText("%");
@ -49,67 +43,6 @@
Stream.WriteByte(196);
Stream.WriteByte(210);
Stream.WriteNewLine();
var offsets = new Dictionary<IndirectReference, long>();
ObjectToken catalogToken = null;
foreach (var pair in tokenReferences)
{
var referenceToken = pair.Key;
var token = pair.Value;
var offset = Stream.Position;
var obj = new ObjectToken(offset, referenceToken.Data, token);
TokenWriter.WriteToken(obj, Stream);
offsets.Add(referenceToken.Data, offset);
if (catalogToken == null && referenceToken == catalogReference)
{
catalogToken = obj;
}
}
if (catalogToken == null)
{
throw new Exception("Catalog object wasn't found");
}
// TODO: Support document information
TokenWriter.WriteCrossReferenceTable(offsets, catalogToken, Stream, null);
}
public IndirectReferenceToken WriteToken(IToken token, int? reservedNumber = null)
{
if (!reservedNumber.HasValue)
{
return AddToken(token, CurrentNumber++);
}
if (!reservedNumbers.Remove(reservedNumber.Value))
{
throw new InvalidOperationException("You can't reuse a reserved number");
}
// When we end up writing this token, all of his child would already have been added and checked for duplicate
return AddToken(token, reservedNumber.Value);
}
public void WriteToken(IndirectReferenceToken referenceToken, IToken token)
{
tokenReferences.Add(referenceToken, token);
}
public int ReserveNumber()
{
var reserved = CurrentNumber;
reservedNumbers.Add(reserved);
CurrentNumber++;
return reserved;
}
public IndirectReferenceToken ReserveNumberToken()
{
return new IndirectReferenceToken(new IndirectReference(ReserveNumber(), 0));
}
public void Dispose()
@ -124,13 +57,6 @@
Stream = null;
}
private IndirectReferenceToken AddToken(IToken token, int reservedNumber)
{
var reference = new IndirectReference(reservedNumber, 0);
var referenceToken = new IndirectReferenceToken(reference);
tokenReferences.Add(referenceToken, token);
return referenceToken;
}
private static void WriteString(string text, Stream stream)
{
@ -138,5 +64,43 @@
stream.Write(bytes, 0, bytes.Length);
stream.WriteNewLine();
}
public IndirectReferenceToken WriteToken(IToken token)
{
if (!Initialized)
{
InitializePdf(DefaultVersion);
}
var ir = ReserveObjectNumber();
offsets.Add(ir.Data, Stream.Position);
var obj = new ObjectToken(Stream.Position, ir.Data, token);
TokenWriter.WriteToken(obj, Stream);
return ir;
}
public IndirectReferenceToken WriteToken(IToken token, IndirectReferenceToken indirectReference)
{
if (!Initialized)
{
InitializePdf(DefaultVersion);
}
offsets.Add(indirectReference.Data, Stream.Position);
var obj = new ObjectToken(Stream.Position, indirectReference.Data, token);
TokenWriter.WriteToken(obj, Stream);
return indirectReference;
}
public IndirectReferenceToken ReserveObjectNumber()
{
return new IndirectReferenceToken(new IndirectReference(CurrentNumber++, 0));
}
public void CompletePdf(IndirectReferenceToken catalogReference, IndirectReferenceToken documentInformationReference=null)
{
TokenWriter.WriteCrossReferenceTable(offsets, catalogReference.Data, Stream, documentInformationReference?.Data);
}
}
}

View File

@ -0,0 +1,21 @@
namespace UglyToad.PdfPig.Writer
{
using System;
using System.Collections.Generic;
using System.Text;
/// <summary>
/// Type of pdf writer to use.
/// </summary>
public enum PdfWriterType
{
/// <summary>
/// Default output writer
/// </summary>
Default,
/// <summary>
/// De-duplicates objects while writing but requires keeping in memory reference.
/// </summary>
ObjectInMemoryDedup
}
}

View File

@ -131,7 +131,7 @@
/// <param name="outputStream">The output stream to write to.</param>
/// <param name="documentInformationReference">The object reference for the document information dictionary if present.</param>
internal static void WriteCrossReferenceTable(IReadOnlyDictionary<IndirectReference, long> objectOffsets,
ObjectToken catalogToken,
IndirectReference catalogToken,
Stream outputStream,
IndirectReference? documentInformationReference)
{
@ -201,7 +201,7 @@
{
// 1 for the free entry.
{NameToken.Size, new NumericToken(objectOffsets.Count + 1)},
{NameToken.Root, new IndirectReferenceToken(catalogToken.Number)},
{NameToken.Root, new IndirectReferenceToken(catalogToken)},
{NameToken.Id, identifier}
};
@ -225,6 +225,32 @@
outputStream.Write(Eof, 0, Eof.Length);
}
/// <summary>
/// Writes pre-serialized token as an object token to the output stream.
/// </summary>
/// <param name="objectNumber">Object number of the indirect object.</param>
/// <param name="generation">Generation of the indirect object.</param>
/// <param name="data">Pre-serialized object contents.</param>
/// <param name="outputStream">The stream to write the token to.</param>
internal static void WriteObject(long objectNumber, int generation, byte[] data, Stream outputStream)
{
WriteLong(objectNumber, outputStream);
WriteWhitespace(outputStream);
WriteInt(generation, outputStream);
WriteWhitespace(outputStream);
outputStream.Write(ObjStart, 0, ObjStart.Length);
WriteLineBreak(outputStream);
outputStream.Write(data, 0, data.Length);
WriteLineBreak(outputStream);
outputStream.Write(ObjEnd, 0, ObjEnd.Length);
WriteLineBreak(outputStream);
}
private static void WriteHex(HexToken hex, Stream stream)
{
stream.WriteByte(HexStart);

View File

@ -0,0 +1,135 @@
namespace UglyToad.PdfPig.Writer
{
using Content;
using Core;
using Parser.Parts;
using System;
using System.Collections.Generic;
using System.Diagnostics;
using System.Linq;
using System.Text;
using Tokenization.Scanner;
using Tokens;
internal class WriterUtil
{
/// <summary>
/// The purpose of this method is to resolve indirect reference. That mean copy the reference's content to the new document's stream
/// and replace the indirect reference with the correct/new one
/// </summary>
/// <param name="writer">PDF stream writer</param>
/// <param name="tokenToCopy">Token to inspect for reference</param>
/// <param name="tokenScanner">scanner get the content from the original document</param>
/// <param name="referencesFromDocument">Map of previously copied tokens for original document.</param>
/// <param name="callstack">Call stack of indirect references</param>
/// <returns>A reference of the token that was copied. With all the reference updated</returns>
public static IToken CopyToken(IPdfStreamWriter writer, IToken tokenToCopy, IPdfTokenScanner tokenScanner,
IDictionary<IndirectReference, IndirectReferenceToken> referencesFromDocument, Dictionary<IndirectReference, IndirectReferenceToken> callstack=null)
{
callstack ??= new Dictionary<IndirectReference, IndirectReferenceToken>();
// This token need to be deep copied, because they could contain reference. So we have to update them.
switch (tokenToCopy)
{
case DictionaryToken dictionaryToken:
{
var newContent = new Dictionary<NameToken, IToken>();
foreach (var setPair in dictionaryToken.Data)
{
var name = setPair.Key;
var token = setPair.Value;
newContent.Add(NameToken.Create(name), CopyToken(writer, token, tokenScanner, referencesFromDocument, callstack));
}
return new DictionaryToken(newContent);
}
case ArrayToken arrayToken:
{
var newArray = new List<IToken>(arrayToken.Length);
foreach (var token in arrayToken.Data)
{
newArray.Add(CopyToken(writer, token, tokenScanner, referencesFromDocument, callstack));
}
return new ArrayToken(newArray);
}
case IndirectReferenceToken referenceToken:
{
if (referencesFromDocument.TryGetValue(referenceToken.Data, out var newReferenceToken))
{
return newReferenceToken;
}
if (callstack.ContainsKey(referenceToken.Data) && callstack[referenceToken.Data] == null)
{
newReferenceToken = writer.ReserveObjectNumber();
callstack[referenceToken.Data] = newReferenceToken;
referencesFromDocument.Add(referenceToken.Data, newReferenceToken);
return newReferenceToken;
}
callstack.Add(referenceToken.Data, null);
// we add the token to referencesFromDocument to prevent stackoverflow on references cycles
// newReferenceToken = context.ReserveNumberToken();
// callstack.Add(newReferenceToken.Data.ObjectNumber);
// referencesFromDocument.Add(referenceToken.Data, newReferenceToken);
//
var tokenObject = DirectObjectFinder.Get<IToken>(referenceToken.Data, tokenScanner);
Debug.Assert(!(tokenObject is IndirectReferenceToken));
var result = CopyToken(writer, tokenObject, tokenScanner, referencesFromDocument, callstack);
if (callstack[referenceToken.Data] != null)
{
return writer.WriteToken(result, callstack[referenceToken.Data]);
}
newReferenceToken = writer.WriteToken(result);
referencesFromDocument.Add(referenceToken.Data, newReferenceToken);
return newReferenceToken;
}
case StreamToken streamToken:
{
var properties = CopyToken(writer, streamToken.StreamDictionary, tokenScanner, referencesFromDocument, callstack) as DictionaryToken;
Debug.Assert(properties != null);
var bytes = streamToken.Data;
return new StreamToken(properties, bytes);
}
case ObjectToken _:
{
// Since we don't write token directly to the stream.
// We can't know the offset. Therefore the token would be invalid
throw new NotSupportedException("Copying a Object token is not supported");
}
}
return tokenToCopy;
}
internal static IEnumerable<(DictionaryToken, List<DictionaryToken>)> WalkTree(PageTreeNode node, List<DictionaryToken> parents=null)
{
if (parents == null)
{
parents = new List<DictionaryToken>();
}
if (node.IsPage)
{
yield return (node.NodeDictionary, parents);
yield break;
}
parents = parents.ToList();
parents.Add(node.NodeDictionary);
foreach (var child in node.Children)
{
foreach (var item in WalkTree(child, parents))
{
yield return item;
}
}
}
}
}