cleanup stream writing to only write multiple when needed

This commit is contained in:
Plaisted 2021-02-07 10:37:31 -06:00
parent 442fa8fb6d
commit c6ed29bda4
4 changed files with 135 additions and 47 deletions

View File

@ -6,6 +6,7 @@
using Integration;
using PdfPig.Core;
using PdfPig.Fonts.Standard14Fonts;
using PdfPig.Tokens;
using PdfPig.Writer;
using Tests.Fonts.TrueType;
using Xunit;
@ -845,7 +846,7 @@
builder.AddPage(PageSize.A4);
}
var result = builder.Build();
WriteFile(nameof(CanCreatePageTree), result);
WriteFile(nameof(CanCreatePageTree), result);
using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff))
{
@ -854,6 +855,82 @@
}
}
[Fact]
public void CanWriteEmptyContentStream()
{
using (var builder = new PdfDocumentBuilder())
{
builder.AddPage(PageSize.A4);
var result = builder.Build();
using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff))
{
Assert.Equal(1, document.NumberOfPages);
var pg = document.GetPage(1);
// single empty page should result in single content stream
Assert.NotNull(pg.Dictionary.Data[NameToken.Contents] as IndirectReferenceToken);
}
}
}
[Fact]
public void CanWriteSingleContentStream()
{
using (var builder = new PdfDocumentBuilder())
{
var pb = builder.AddPage(PageSize.A4);
pb.DrawLine(new PdfPoint(1, 1), new PdfPoint(2, 2));
var result = builder.Build();
using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff))
{
Assert.Equal(1, document.NumberOfPages);
var pg = document.GetPage(1);
// single empty page should result in single content stream
Assert.NotNull(pg.Dictionary.Data[NameToken.Contents] as IndirectReferenceToken);
}
}
}
[Fact]
public void CanWriteAndIgnoreEmptyContentStream()
{
using (var builder = new PdfDocumentBuilder())
{
var pb = builder.AddPage(PageSize.A4);
pb.DrawLine(new PdfPoint(1, 1), new PdfPoint(2, 2));
pb.NewContentStreamAfter();
var result = builder.Build();
using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff))
{
Assert.Equal(1, document.NumberOfPages);
var pg = document.GetPage(1);
// empty stream should be ignored and resulting single stream should be written
Assert.NotNull(pg.Dictionary.Data[NameToken.Contents] as IndirectReferenceToken);
}
}
}
[Fact]
public void CanWriteMultipleContentStream()
{
using (var builder = new PdfDocumentBuilder())
{
var pb = builder.AddPage(PageSize.A4);
pb.DrawLine(new PdfPoint(1, 1), new PdfPoint(2, 2));
pb.NewContentStreamAfter();
pb.DrawLine(new PdfPoint(1, 1), new PdfPoint(2, 2));
var result = builder.Build();
using (var document = PdfDocument.Open(result, ParsingOptions.LenientParsingOff))
{
Assert.Equal(1, document.NumberOfPages);
var pg = document.GetPage(1);
// multiple streams should be written to array
var streams = pg.Dictionary.Data[NameToken.Contents] as ArrayToken;
Assert.NotNull(streams);
Assert.Equal(2, streams.Length);
}
}
}
[InlineData("Single Page Simple - from google drive.pdf")]
[InlineData("Old Gutnish Internet Explorer.pdf")]
[InlineData("68-1990-01_A.pdf")]

View File

@ -9,43 +9,44 @@
internal interface IPdfStreamWriter : IDisposable
{
/// <summary>
///
/// The underlying stream used by the writer.
/// </summary>
Stream Stream { get; }
/// <summary>
///
/// Writes a single token to the stream.
/// </summary>
/// <param name="token"></param>
/// <returns></returns>
/// <param name="token">Token to write.</param>
/// <returns>Indirect reference to the token.</returns>
IndirectReferenceToken WriteToken(IToken token);
/// <summary>
///
/// Writes a token to a reserved object number.
/// </summary>
/// <param name="token"></param>
/// <param name="indirectReference"></param>
/// <returns></returns>
/// <param name="token">Token to write.</param>
/// <param name="indirectReference">Reserved indirect reference.</param>
/// <returns>Reserved indirect reference.</returns>
IndirectReferenceToken WriteToken(IToken token, IndirectReferenceToken indirectReference);
/// <summary>
///
/// Reserves an object number for an object to be written.
/// Useful with cyclic references where object number must be known before
/// writing.
/// </summary>
/// <returns></returns>
/// <returns>A reserved indirect reference.</returns>
IndirectReferenceToken ReserveObjectNumber();
/// <summary>
///
/// Initializes the PDF stream with pdf header.
/// </summary>
/// <param name="version"></param>
/// <param name="version">Version of PDF.</param>
void InitializePdf(decimal version);
/// <summary>
///
/// Completes the PDF writing trailing PDF information.
/// </summary>
/// <param name="catalogReference"></param>
/// <param name="documentInformationReference"></param>
/// <param name="catalogReference">Indirect reference of catalog.</param>
/// <param name="documentInformationReference">Reference to document information (optional)</param>
void CompletePdf(IndirectReferenceToken catalogReference, IndirectReferenceToken documentInformationReference=null);
}
}

View File

@ -446,7 +446,7 @@ namespace UglyToad.PdfPig.Writer
}
}
private void CompleteDocument()
{
{
// write fonts to reserved object numbers
foreach (var font in fonts)
{
@ -464,7 +464,7 @@ namespace UglyToad.PdfPig.Writer
int desiredLeafSize = 25;
var numLeafs = (int) Math.Ceiling(Decimal.Divide(Pages.Count, desiredLeafSize));
var numLeafs = (int) Math.Ceiling(Decimal.Divide(Pages.Count, desiredLeafSize));
var leafRefs = new List<IndirectReferenceToken>();
var leafChildren = new List<List<IndirectReferenceToken>>();
@ -474,7 +474,7 @@ namespace UglyToad.PdfPig.Writer
leafs.Add(new Dictionary<NameToken, IToken>()
{
{NameToken.Type, NameToken.Pages},
});
});
leafChildren.Add(new List<IndirectReferenceToken>());
leafRefs.Add(context.ReserveObjectNumber());
}
@ -492,7 +492,6 @@ namespace UglyToad.PdfPig.Writer
pageDictionary[NameToken.MediaBox] = RectangleToArray(page.Value.PageSize);
}
// combine existing resources (if any) with added
var pageResources = new Dictionary<NameToken, IToken>();
foreach (var existing in page.Value.Resources)
@ -503,18 +502,25 @@ namespace UglyToad.PdfPig.Writer
pageResources[NameToken.Font] = new DictionaryToken(page.Value.fontDictionary);
pageDictionary[NameToken.Resources] = new DictionaryToken(pageResources);
if (page.Value.contentStreams.Count == 1)
{
pageDictionary[NameToken.Contents] = page.Value.contentStreams[0].Write(context);
var toWrite = page.Value.contentStreams.Where(x => x.HasContent).ToList();
if (toWrite.Count == 0)
{
// write empty
pageDictionary[NameToken.Contents] = new PdfPageBuilder.DefaultContentStream().Write(context);
}
else
{
else if (toWrite.Count == 1)
{
// write single
pageDictionary[NameToken.Contents] = toWrite[0].Write(context);
}
else
{
// write array
var streams = new List<IToken>();
foreach (var stream in page.Value.contentStreams)
foreach (var stream in toWrite)
{
streams.Add(stream.Write(context));
}
pageDictionary[NameToken.Contents] = new ArrayToken(streams);
}
@ -589,7 +595,7 @@ namespace UglyToad.PdfPig.Writer
context.CompletePdf(catalogRef, informationReference);
(int Count, IndirectReferenceToken Ref) CreatePageTree(List<Dictionary<NameToken, IToken>> pagesNodes, IndirectReferenceToken parent)
{
{
// TODO shorten page tree when there is a single or small number of pages left in a branch
var count = 0;
var thisObj = context.ReserveObjectNumber();

View File

@ -34,8 +34,8 @@
internal readonly Dictionary<NameToken, IToken> additionalPageProperties = new Dictionary<NameToken, IToken>();
private readonly Dictionary<NameToken, IToken> resourcesDictionary = new Dictionary<NameToken, IToken>();
internal Dictionary<NameToken, IToken> fontDictionary = new Dictionary<NameToken, IToken>();
internal int nextFontId = 1;
private readonly Dictionary<Guid, NameToken> documentFonts = new Dictionary<Guid, NameToken>();
internal int nextFontId = 1;
private readonly Dictionary<Guid, NameToken> documentFonts = new Dictionary<Guid, NameToken>();
//a sequence number of ShowText operation to determine whether letters belong to same operation or not (letters that belong to different operations have less changes to belong to same word)
@ -336,13 +336,13 @@
currentStream.Add(EndText.Value);
return letters;
}
private NameToken GetAddedFont(PdfDocumentBuilder.AddedFont font)
}
private NameToken GetAddedFont(PdfDocumentBuilder.AddedFont font)
{
if (!documentFonts.TryGetValue(font.Id, out NameToken value))
{
value = NameToken.Create($"F{nextFontId++}");
value = NameToken.Create($"F{nextFontId++}");
while (fontDictionary.ContainsKey(value))
{
value = NameToken.Create($"F{nextFontId++}");
@ -782,18 +782,20 @@
internal interface IPageContentStream : IContentStream
{
bool ReadOnly { get; }
bool ReadOnly { get; }
bool HasContent { get; }
void Add(IGraphicsStateOperation operation);
IndirectReferenceToken Write(IPdfStreamWriter writer);
}
IndirectReferenceToken Write(IPdfStreamWriter writer);
/// <summary>
/// Provides access to the raw page data structures for advanced editing use cases.
}
/// <summary>
/// Provides access to the raw page data structures for advanced editing use cases.
/// </summary>
public interface IContentStream
{
/// <summary>
/// The operations making up the page content stream.
{
/// <summary>
/// The operations making up the page content stream.
/// </summary>
List<IGraphicsStateOperation> Operations { get; }
}
@ -812,6 +814,7 @@
}
public bool ReadOnly => false;
public bool HasContent => operations.Any();
public void Add(IGraphicsStateOperation operation)
{
@ -842,24 +845,25 @@
internal class CopiedContentStream : IPageContentStream
{
private readonly IndirectReferenceToken token;
public bool ReadOnly => true;
public bool HasContent => true;
public CopiedContentStream(IndirectReferenceToken indirectReferenceToken)
{
token = indirectReferenceToken;
}
public bool ReadOnly => true;
public IndirectReferenceToken Write(IPdfStreamWriter writer)
{
return token;
}
public void Add(IGraphicsStateOperation operation)
{
throw new NotSupportedException("Writing to a copied content stream is not supported.");
}
public List<IGraphicsStateOperation> Operations =>
public List<IGraphicsStateOperation> Operations =>
throw new NotSupportedException("Reading raw operations is not supported from a copied content stream.");
}