From c6ed29bda472bdfe116d017bc7b31fa84fddd9a4 Mon Sep 17 00:00:00 2001 From: Plaisted Date: Sun, 7 Feb 2021 10:37:31 -0600 Subject: [PATCH] cleanup stream writing to only write multiple when needed --- .../Writer/PdfDocumentBuilderTests.cs | 79 ++++++++++++++++++- .../Writer/IPdfStreamWriter.cs | 33 ++++---- .../Writer/PdfDocumentBuilder.cs | 30 ++++--- src/UglyToad.PdfPig/Writer/PdfPageBuilder.cs | 40 +++++----- 4 files changed, 135 insertions(+), 47 deletions(-) diff --git a/src/UglyToad.PdfPig.Tests/Writer/PdfDocumentBuilderTests.cs b/src/UglyToad.PdfPig.Tests/Writer/PdfDocumentBuilderTests.cs index 91e9420c..443a27cd 100644 --- a/src/UglyToad.PdfPig.Tests/Writer/PdfDocumentBuilderTests.cs +++ b/src/UglyToad.PdfPig.Tests/Writer/PdfDocumentBuilderTests.cs @@ -6,6 +6,7 @@ using Integration; using PdfPig.Core; using PdfPig.Fonts.Standard14Fonts; + using PdfPig.Tokens; using PdfPig.Writer; using Tests.Fonts.TrueType; using Xunit; @@ -845,7 +846,7 @@ builder.AddPage(PageSize.A4); } var result = builder.Build(); - WriteFile(nameof(CanCreatePageTree), result); + WriteFile(nameof(CanCreatePageTree), result); using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff)) { @@ -854,6 +855,82 @@ } } + [Fact] + public void CanWriteEmptyContentStream() + { + using (var builder = new PdfDocumentBuilder()) + { + builder.AddPage(PageSize.A4); + var result = builder.Build(); + using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff)) + { + Assert.Equal(1, document.NumberOfPages); + var pg = document.GetPage(1); + // single empty page should result in single content stream + Assert.NotNull(pg.Dictionary.Data[NameToken.Contents] as IndirectReferenceToken); + } + } + } + + [Fact] + public void CanWriteSingleContentStream() + { + using (var builder = new PdfDocumentBuilder()) + { + var pb = builder.AddPage(PageSize.A4); + pb.DrawLine(new PdfPoint(1, 1), new PdfPoint(2, 2)); + var result = builder.Build(); + using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff)) + { + Assert.Equal(1, document.NumberOfPages); + var pg = document.GetPage(1); + // single empty page should result in single content stream + Assert.NotNull(pg.Dictionary.Data[NameToken.Contents] as IndirectReferenceToken); + } + } + } + + [Fact] + public void CanWriteAndIgnoreEmptyContentStream() + { + using (var builder = new PdfDocumentBuilder()) + { + var pb = builder.AddPage(PageSize.A4); + pb.DrawLine(new PdfPoint(1, 1), new PdfPoint(2, 2)); + pb.NewContentStreamAfter(); + var result = builder.Build(); + using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff)) + { + Assert.Equal(1, document.NumberOfPages); + var pg = document.GetPage(1); + // empty stream should be ignored and resulting single stream should be written + Assert.NotNull(pg.Dictionary.Data[NameToken.Contents] as IndirectReferenceToken); + } + } + } + + [Fact] + public void CanWriteMultipleContentStream() + { + using (var builder = new PdfDocumentBuilder()) + { + var pb = builder.AddPage(PageSize.A4); + pb.DrawLine(new PdfPoint(1, 1), new PdfPoint(2, 2)); + pb.NewContentStreamAfter(); + pb.DrawLine(new PdfPoint(1, 1), new PdfPoint(2, 2)); + var result = builder.Build(); + using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff)) + { + Assert.Equal(1, document.NumberOfPages); + var pg = document.GetPage(1); + // multiple streams should be written to array + var streams = pg.Dictionary.Data[NameToken.Contents] as ArrayToken; + Assert.NotNull(streams); + Assert.Equal(2, streams.Length); + } + } + } + [InlineData("Single Page Simple - from google drive.pdf")] [InlineData("Old Gutnish Internet Explorer.pdf")] [InlineData("68-1990-01_A.pdf")] diff --git a/src/UglyToad.PdfPig/Writer/IPdfStreamWriter.cs b/src/UglyToad.PdfPig/Writer/IPdfStreamWriter.cs index 4cb86d06..13281f7a 100644 --- a/src/UglyToad.PdfPig/Writer/IPdfStreamWriter.cs +++ b/src/UglyToad.PdfPig/Writer/IPdfStreamWriter.cs @@ -9,43 +9,44 @@ internal interface IPdfStreamWriter : IDisposable { /// - /// + /// The underlying stream used by the writer. /// Stream Stream { get; } /// - /// + /// Writes a single token to the stream. /// - /// - /// + /// Token to write. + /// Indirect reference to the token. IndirectReferenceToken WriteToken(IToken token); /// - /// + /// Writes a token to a reserved object number. /// - /// - /// - /// + /// Token to write. + /// Reserved indirect reference. + /// Reserved indirect reference. IndirectReferenceToken WriteToken(IToken token, IndirectReferenceToken indirectReference); /// - /// + /// Reserves an object number for an object to be written. + /// Useful with cyclic references where object number must be known before + /// writing. /// - /// + /// A reserved indirect reference. IndirectReferenceToken ReserveObjectNumber(); /// - /// + /// Initializes the PDF stream with pdf header. /// - /// + /// Version of PDF. void InitializePdf(decimal version); /// - /// + /// Completes the PDF writing trailing PDF information. /// - /// - /// + /// Indirect reference of catalog. + /// Reference to document information (optional) void CompletePdf(IndirectReferenceToken catalogReference, IndirectReferenceToken documentInformationReference=null); - } } diff --git a/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs b/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs index 8eca2b99..e6370eab 100644 --- a/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs +++ b/src/UglyToad.PdfPig/Writer/PdfDocumentBuilder.cs @@ -446,7 +446,7 @@ namespace UglyToad.PdfPig.Writer } } private void CompleteDocument() - { + { // write fonts to reserved object numbers foreach (var font in fonts) { @@ -464,7 +464,7 @@ namespace UglyToad.PdfPig.Writer int desiredLeafSize = 25; - var numLeafs = (int) Math.Ceiling(Decimal.Divide(Pages.Count, desiredLeafSize)); + var numLeafs = (int) Math.Ceiling(Decimal.Divide(Pages.Count, desiredLeafSize)); var leafRefs = new List(); var leafChildren = new List>(); @@ -474,7 +474,7 @@ namespace UglyToad.PdfPig.Writer leafs.Add(new Dictionary() { {NameToken.Type, NameToken.Pages}, - }); + }); leafChildren.Add(new List()); leafRefs.Add(context.ReserveObjectNumber()); } @@ -492,7 +492,6 @@ namespace UglyToad.PdfPig.Writer pageDictionary[NameToken.MediaBox] = RectangleToArray(page.Value.PageSize); } - // combine existing resources (if any) with added var pageResources = new Dictionary(); foreach (var existing in page.Value.Resources) @@ -503,18 +502,25 @@ namespace UglyToad.PdfPig.Writer pageResources[NameToken.Font] = new DictionaryToken(page.Value.fontDictionary); pageDictionary[NameToken.Resources] = new DictionaryToken(pageResources); - if (page.Value.contentStreams.Count == 1) - { - pageDictionary[NameToken.Contents] = page.Value.contentStreams[0].Write(context); + var toWrite = page.Value.contentStreams.Where(x => x.HasContent).ToList(); + if (toWrite.Count == 0) + { + // write empty + pageDictionary[NameToken.Contents] = new PdfPageBuilder.DefaultContentStream().Write(context); } - else - { + else if (toWrite.Count == 1) + { + // write single + pageDictionary[NameToken.Contents] = toWrite[0].Write(context); + } + else + { + // write array var streams = new List(); - foreach (var stream in page.Value.contentStreams) + foreach (var stream in toWrite) { streams.Add(stream.Write(context)); } - pageDictionary[NameToken.Contents] = new ArrayToken(streams); } @@ -589,7 +595,7 @@ namespace UglyToad.PdfPig.Writer context.CompletePdf(catalogRef, informationReference); (int Count, IndirectReferenceToken Ref) CreatePageTree(List> pagesNodes, IndirectReferenceToken parent) - { + { // TODO shorten page tree when there is a single or small number of pages left in a branch var count = 0; var thisObj = context.ReserveObjectNumber(); diff --git a/src/UglyToad.PdfPig/Writer/PdfPageBuilder.cs b/src/UglyToad.PdfPig/Writer/PdfPageBuilder.cs index 680d5e0d..103fc785 100644 --- a/src/UglyToad.PdfPig/Writer/PdfPageBuilder.cs +++ b/src/UglyToad.PdfPig/Writer/PdfPageBuilder.cs @@ -34,8 +34,8 @@ internal readonly Dictionary additionalPageProperties = new Dictionary(); private readonly Dictionary resourcesDictionary = new Dictionary(); internal Dictionary fontDictionary = new Dictionary(); - internal int nextFontId = 1; - private readonly Dictionary documentFonts = new Dictionary(); + internal int nextFontId = 1; + private readonly Dictionary documentFonts = new Dictionary(); //a sequence number of ShowText operation to determine whether letters belong to same operation or not (letters that belong to different operations have less changes to belong to same word) @@ -336,13 +336,13 @@ currentStream.Add(EndText.Value); return letters; - } - - private NameToken GetAddedFont(PdfDocumentBuilder.AddedFont font) + } + + private NameToken GetAddedFont(PdfDocumentBuilder.AddedFont font) { if (!documentFonts.TryGetValue(font.Id, out NameToken value)) { - value = NameToken.Create($"F{nextFontId++}"); + value = NameToken.Create($"F{nextFontId++}"); while (fontDictionary.ContainsKey(value)) { value = NameToken.Create($"F{nextFontId++}"); @@ -782,18 +782,20 @@ internal interface IPageContentStream : IContentStream { - bool ReadOnly { get; } + bool ReadOnly { get; } + bool HasContent { get; } void Add(IGraphicsStateOperation operation); - IndirectReferenceToken Write(IPdfStreamWriter writer); - } + IndirectReferenceToken Write(IPdfStreamWriter writer); - /// - /// Provides access to the raw page data structures for advanced editing use cases. + } + + /// + /// Provides access to the raw page data structures for advanced editing use cases. /// public interface IContentStream - { - /// - /// The operations making up the page content stream. + { + /// + /// The operations making up the page content stream. /// List Operations { get; } } @@ -812,6 +814,7 @@ } public bool ReadOnly => false; + public bool HasContent => operations.Any(); public void Add(IGraphicsStateOperation operation) { @@ -842,24 +845,25 @@ internal class CopiedContentStream : IPageContentStream { private readonly IndirectReferenceToken token; + public bool ReadOnly => true; + public bool HasContent => true; + public CopiedContentStream(IndirectReferenceToken indirectReferenceToken) { token = indirectReferenceToken; } - public bool ReadOnly => true; - public IndirectReferenceToken Write(IPdfStreamWriter writer) { return token; } - + public void Add(IGraphicsStateOperation operation) { throw new NotSupportedException("Writing to a copied content stream is not supported."); } - public List Operations => + public List Operations => throw new NotSupportedException("Reading raw operations is not supported from a copied content stream."); }