fix name output for merged documents

This commit is contained in:
Eliot Jones 2020-04-25 11:23:37 +01:00
parent 391b650e3c
commit 19047f62ae
3 changed files with 75 additions and 9 deletions

View File

@ -3,6 +3,7 @@
using System;
using System.IO;
using System.Linq;
using PdfPig;
using PdfPig.Writer;
internal static class MergePdfDocuments
@ -20,6 +21,11 @@
var output = Path.Combine(location, "outputOfMerge.pdf");
File.WriteAllBytes(output, resultFileBytes);
Console.WriteLine($"File output to: {output}");
using (var doc = PdfDocument.Open(resultFileBytes))
{
Console.WriteLine($"Generated document with {doc.NumberOfPages} pages.");
}
}
catch (Exception ex)
{

View File

@ -48,6 +48,28 @@
});
}
/// <summary>
/// Merge multiple PDF documents together with the pages in the order the file paths are provided.
/// </summary>
public static byte[] Merge(params string[] filePaths)
{
var bytes = new List<byte[]>(filePaths.Length);
for (var i = 0; i < filePaths.Length; i++)
{
var filePath = filePaths[i];
if (filePath == null)
{
throw new ArgumentNullException(nameof(filePaths), $"Null filepath at index {i}.");
}
bytes.Add(File.ReadAllBytes(filePath));
}
return Merge(bytes);
}
/// <summary>
/// Merge the set of PDF documents.
/// </summary>

View File

@ -3,11 +3,13 @@
using System;
using System.Collections.Generic;
using System.Globalization;
using System.Text;
using System.IO;
using System.Linq;
using Core;
using Graphics.Operations;
using Tokens;
using Util;
/// <summary>
/// Writes any type of <see cref="IToken"/> to the corresponding PDF document format output.
@ -21,7 +23,7 @@
private static readonly byte[] DictionaryEnd = OtherEncodings.StringAsLatin1Bytes(">>");
private static readonly byte Comment = GetByte("%");
private static readonly byte[] Eof = OtherEncodings.StringAsLatin1Bytes("%%EOF");
private static readonly byte[] FalseBytes = OtherEncodings.StringAsLatin1Bytes("false");
@ -56,6 +58,20 @@
private static readonly byte[] Xref = OtherEncodings.StringAsLatin1Bytes("xref");
private static readonly HashSet<char> DelimiterChars = new HashSet<char>
{
'(',
')',
'<',
'>',
'[',
']',
'{',
'}',
'/',
'%'
};
/// <summary>
/// Writes the given input token to the output stream with the correct PDF format and encoding including whitespace and line breaks as applicable.
/// </summary>
@ -112,7 +128,7 @@
/// <param name="catalogToken">The object representing the catalog dictionary which is referenced from the trailer dictionary.</param>
/// <param name="outputStream">The output stream to write to.</param>
/// <param name="documentInformationReference">The object reference for the document information dictionary if present.</param>
internal static void WriteCrossReferenceTable(IReadOnlyDictionary<IndirectReference, long> objectOffsets,
internal static void WriteCrossReferenceTable(IReadOnlyDictionary<IndirectReference, long> objectOffsets,
ObjectToken catalogToken,
Stream outputStream,
IndirectReference? documentInformationReference)
@ -143,7 +159,7 @@
WriteLineBreak(outputStream);
WriteFirstXrefEmptyEntry(outputStream);
foreach (var keyValuePair in objectOffsets.OrderBy(x => x.Key.ObjectNumber))
{
/*
@ -161,22 +177,22 @@
var generation = OtherEncodings.StringAsLatin1Bytes(keyValuePair.Key.Generation.ToString("D5"));
outputStream.Write(generation, 0, generation.Length);
WriteWhitespace(outputStream);
outputStream.WriteByte(InUseEntry);
WriteWhitespace(outputStream);
WriteLineBreak(outputStream);
}
outputStream.Write(Trailer, 0, Trailer.Length);
WriteLineBreak(outputStream);
var identifier = new ArrayToken(new IToken[]
{
new HexToken(Guid.NewGuid().ToString("N").ToCharArray()),
new HexToken(Guid.NewGuid().ToString("N").ToCharArray())
new HexToken(Guid.NewGuid().ToString("N").ToCharArray()),
new HexToken(Guid.NewGuid().ToString("N").ToCharArray())
});
var trailerDictionaryData = new Dictionary<NameToken, IToken>
@ -276,7 +292,29 @@
private static void WriteName(string name, Stream outputStream)
{
var bytes = OtherEncodings.StringAsLatin1Bytes(name);
/*
* Beginning with PDF 1.2, any character except null (character code 0) may be
* included in a name by writing its 2-digit hexadecimal code, preceded by the number sign character (#).
* This is required for delimiter and whitespace characters.
* This is recommended for characters whose codes are outside the range 33 (!) to 126 (~).
*/
var sb = new StringBuilder();
foreach (var c in name)
{
if (c < 33 || c > 126 || DelimiterChars.Contains(c))
{
var str = Hex.GetString(new[] { (byte)c });
sb.Append('#').Append(str);
}
else
{
sb.Append(c);
}
}
var bytes = OtherEncodings.StringAsLatin1Bytes(sb.ToString());
outputStream.WriteByte(NameStart);
outputStream.Write(bytes, 0, bytes.Length);