Spanify work 1 (#812)

* Add GetString(ReadOnlySpan<byte>) polyfill

* Add ArrayPoolBufferWriter

* Use Utf8.IsValid & char.IsAsciiHexDigit on NET8.0+

* Optimize HexTokenizer

* Eliminate various Tuple allocations

* Eliminate List allocation in CrossReferenceTable

* Eliminate various allocations in Ascii85Filter

* Spanify HexToken

* Spanify Palette

* Spanify various Cmap & font methods

* Spanify Type1Charstring classes

* Spanify PdfDocEncoding.TryConvertBytesToString

* Spanify OctalHelpers.FromOctalDigits

* Add missing braces

* React to HexToken.Byte type changes

* Cleanup

* [Tests] React to span changes

* Add ArgumentNullException check back to Type1CharstringDecryptedBytes

* Remove unsafe code

* Seal HexToken

* Avoid allocation when passing an empty span
This commit is contained in:
Jason Nelson 2024-04-01 01:18:01 -07:00 committed by GitHub
parent e789691100
commit f62929eb7c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
41 changed files with 434 additions and 271 deletions

View File

@ -0,0 +1,148 @@
using System;
using System.Buffers;
namespace UglyToad.PdfPig.Core;
/// <summary>
/// Pooled Buffer Writer
/// </summary>
public sealed class ArrayPoolBufferWriter<T> : IBufferWriter<T>, IDisposable
{
private const int DefaultBufferSize = 256;
private T[] buffer;
private int position;
/// <summary>
/// PooledBufferWriter constructor
/// </summary>
public ArrayPoolBufferWriter()
{
buffer = ArrayPool<T>.Shared.Rent(DefaultBufferSize);
position = 0;
}
/// <summary>
/// Constructs a PooledBufferWriter
/// </summary>
/// <param name="size">The size of the initial buffer</param>
public ArrayPoolBufferWriter(int size)
{
buffer = ArrayPool<T>.Shared.Rent(size);
position = 0;
}
/// <summary>
/// Advanced the current position
/// </summary>
/// <param name="count"></param>
public void Advance(int count)
{
position += count;
}
/// <summary>
/// Writes the provided value
/// </summary>
public void Write(T value)
{
GetSpan(1)[0] = value;
position += 1;
}
/// <summary>
/// Writes the provided values
/// </summary>
/// <param name="values"></param>
public void Write(ReadOnlySpan<T> values)
{
values.CopyTo(GetSpan(values.Length));
position += values.Length;
}
/// <summary>
/// Returns a writeable block of memory that can be written to
/// </summary>
public Memory<T> GetMemory(int sizeHint = 0)
{
EnsureCapacity(sizeHint);
return buffer.AsMemory(position);
}
/// <summary>
/// Returns a span that can be written to
/// </summary>
public Span<T> GetSpan(int sizeHint = 0)
{
EnsureCapacity(sizeHint);
return buffer.AsSpan(position);
}
/// <summary>
/// Returns the number of bytes written to the buffer
/// </summary>
public int WrittenCount => position;
/// <summary>
/// Returns the committed data as Memory
/// </summary>
public ReadOnlyMemory<T> WrittenMemory => buffer.AsMemory(0, position);
/// <summary>
/// Returns the committed data as a Span
/// </summary>
public ReadOnlySpan<T> WrittenSpan => buffer.AsSpan(0, position);
private void EnsureCapacity(int sizeHint)
{
if (sizeHint is 0)
{
sizeHint = 1;
}
if (sizeHint > RemainingBytes)
{
var newBuffer = ArrayPool<T>.Shared.Rent(Math.Max(position + sizeHint, 512));
if (buffer.Length != 0)
{
Array.Copy(buffer, 0, newBuffer, 0, position);
ArrayPool<T>.Shared.Return(buffer);
}
buffer = newBuffer;
}
}
private int RemainingBytes => buffer.Length - position;
/// <summary>
/// Resets the internal state so the instance can be reused before disposal
/// </summary>
/// <param name="clearArray"></param>
public void Reset(bool clearArray = false)
{
position = 0;
if (clearArray)
{
buffer.AsSpan().Clear();
}
}
/// <summary>
/// Disposes the buffer and returns any rented memory to the pool
/// </summary>
public void Dispose()
{
if (buffer.Length != 0)
{
ArrayPool<T>.Shared.Return(buffer);
buffer = [];
}
}
}

View File

@ -42,7 +42,7 @@
/// <summary>
/// Read an integer from octal digits.
/// </summary>
public static int FromOctalDigits(short[] octal)
public static int FromOctalDigits(ReadOnlySpan<short> octal)
{
int sum = 0;
for (int i = octal.Length - 1; i >= 0; i--)

View File

@ -1,7 +1,6 @@
namespace UglyToad.PdfPig.Core
{
using System.Collections.Generic;
using System.Linq;
using System;
using System.Text;
/// <summary>
@ -30,31 +29,8 @@
/// <summary>
/// Convert the bytes to string using the ISO 8859-1 encoding.
/// </summary>
public static string BytesAsLatin1String(IReadOnlyList<byte> bytes)
public static string BytesAsLatin1String(ReadOnlySpan<byte> bytes)
{
if (bytes == null)
{
return null;
}
if (bytes is byte[] arr)
{
return BytesAsLatin1String(arr);
}
return BytesAsLatin1String(bytes.ToArray());
}
/// <summary>
/// Convert the bytes to string using the ISO 8859-1 encoding.
/// </summary>
public static string BytesAsLatin1String(byte[] bytes)
{
if (bytes == null)
{
return null;
}
return Iso88591.GetString(bytes);
}
}

View File

@ -1,5 +1,6 @@
namespace UglyToad.PdfPig.Core
{
using System;
using System.Collections.Generic;
/// <summary>
@ -263,7 +264,7 @@
/// Try to convert raw bytes to a PdfDocEncoding encoded string. If unsupported characters are encountered
/// meaning we cannot safely round-trip the value to bytes this will instead return false.
/// </summary>
public static bool TryConvertBytesToString(byte[] bytes, out string result)
public static bool TryConvertBytesToString(ReadOnlySpan<byte> bytes, out string result)
{
result = null;
if (bytes.Length == 0)

View File

@ -0,0 +1,19 @@
#if NETFRAMEWORK || NETSTANDARD2_0
namespace System.Text;
internal static class EncodingExtensions
{
public static string GetString(this Encoding encoding, ReadOnlySpan<byte> bytes)
{
if (bytes.IsEmpty)
{
return string.Empty;
}
// NOTE: this can be made allocation free by introducing unsafe
return encoding.GetString(bytes.ToArray());
}
}
#endif

View File

@ -5,6 +5,10 @@
using System.Globalization;
using System.Text;
#if NET8_0_OR_GREATER
using System.Text.Unicode;
#endif
/// <summary>
/// Helper methods for reading from PDF files.
/// </summary>
@ -20,8 +24,8 @@
/// </summary>
public const byte AsciiCarriageReturn = 13;
private static readonly HashSet<int> EndOfNameCharacters = new HashSet<int>
{
private static readonly HashSet<int> EndOfNameCharacters =
[
' ',
AsciiCarriageReturn,
AsciiLineFeed,
@ -35,7 +39,7 @@
'(',
0,
'\f'
};
];
private static readonly int MaximumNumberStringLength = long.MaxValue.ToString("D").Length;
@ -269,7 +273,11 @@
/// </summary>
public static bool IsHex(char ch)
{
#if NET8_0_OR_GREATER
return char.IsAsciiHexDigit(ch);
#else
return char.IsDigit(ch) || (ch >= 'a' && ch <= 'f') || (ch >= 'A' && ch <= 'F');
#endif
}
/// <summary>
@ -277,6 +285,9 @@
/// </summary>
public static bool IsValidUtf8(byte[] input)
{
#if NET8_0_OR_GREATER
return Utf8.IsValid(input);
#else
try
{
var d = Encoding.UTF8.GetDecoder();
@ -290,6 +301,7 @@
{
return false;
}
#endif
}
private static StringBuilder ReadStringNumber(IInputBytes reader)

View File

@ -1,4 +1,4 @@
<Project Sdk="Microsoft.NET.Sdk">
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks>netstandard2.0;net462;net471;net6.0;net8.0</TargetFrameworks>
<LangVersion>12</LangVersion>
@ -17,7 +17,10 @@
</ItemGroup>
<ItemGroup Condition="'$(TargetFramework)'=='netstandard2.0' or '$(TargetFramework)'=='net462' OR '$(TargetFramework)'=='net471'">
<PackageReference Include="Microsoft.Bcl.HashCode" Version="1.1.1" />
</ItemGroup>
</ItemGroup>
<ItemGroup Condition="'$(TargetFramework)'=='netstandard2.0' or '$(TargetFramework)'=='net462' or '$(TargetFramework)'=='net471'">
<PackageReference Include="System.Memory" Version="4.5.5" />
</ItemGroup>
<ItemGroup>
<None Include="..\pdfpig.snk" Link="pdfpig.snk" />
</ItemGroup>

View File

@ -17,12 +17,12 @@
/// <summary>
/// Functions used to define left, middle and right edges.
/// </summary>
private static readonly Tuple<EdgeType, Func<PdfRectangle, double>>[] edgesFuncs = new Tuple<EdgeType, Func<PdfRectangle, double>>[]
{
private static readonly Tuple<EdgeType, Func<PdfRectangle, double>>[] edgesFuncs =
[
Tuple.Create<EdgeType, Func<PdfRectangle, double>>(EdgeType.Left, x => Math.Round(x.Left, 0)), // use BoundingBox's left coordinate
Tuple.Create<EdgeType, Func<PdfRectangle, double>>(EdgeType.Mid, x => Math.Round(x.Left + x.Width / 2, 0)), // use BoundingBox's mid coordinate
Tuple.Create<EdgeType, Func<PdfRectangle, double>>(EdgeType.Right, x => Math.Round(x.Right, 0)) // use BoundingBox's right coordinate
};
];
/// <summary>
/// Get the text edges.

View File

@ -1,13 +1,13 @@
namespace UglyToad.PdfPig.Fonts.Type1.CharStrings
{
using System;
using System.Collections.Generic;
using Commands;
using Commands.Arithmetic;
using Commands.Hint;
using Commands.PathConstruction;
using Commands.StartFinishOutline;
using Core;
using System;
using System.Collections.Generic;
/// <summary>
/// Decodes a set of CharStrings to their corresponding Type 1 BuildChar operations.
@ -73,11 +73,11 @@
return new Type1CharStrings(charStringResults, charStringIndexToName, subroutineResults);
}
private static IReadOnlyList<Union<double, LazyType1Command>> ParseSingle(IReadOnlyList<byte> charStringBytes)
private static IReadOnlyList<Union<double, LazyType1Command>> ParseSingle(ReadOnlySpan<byte> charStringBytes)
{
var interpreted = new List<Union<double, LazyType1Command>>();
for (var i = 0; i < charStringBytes.Count; i++)
for (var i = 0; i < charStringBytes.Length; i++)
{
var b = charStringBytes[i];
@ -104,7 +104,7 @@
return interpreted;
}
private static int InterpretNumber(byte b, IReadOnlyList<byte> bytes, ref int i)
private static int InterpretNumber(byte b, ReadOnlySpan<byte> bytes, ref int i)
{
if (b >= 32 && b <= 246)
{
@ -128,7 +128,7 @@
return result;
}
public static LazyType1Command GetCommand(byte v, IReadOnlyList<byte> bytes, ref int i)
public static LazyType1Command GetCommand(byte v, ReadOnlySpan<byte> bytes, ref int i)
{
switch (v)
{

View File

@ -1,12 +1,13 @@
namespace UglyToad.PdfPig.Fonts.Type1.CharStrings
{
using System;
using System.Collections.Generic;
using System.Globalization;
internal sealed class Type1CharstringDecryptedBytes
{
public IReadOnlyList<byte> Bytes { get; }
private readonly byte[] bytes;
public ReadOnlySpan<byte> Bytes => bytes;
public int Index { get; }
@ -14,17 +15,17 @@
public SourceType Source { get; }
public Type1CharstringDecryptedBytes(IReadOnlyList<byte> bytes, int index)
public Type1CharstringDecryptedBytes(byte[] bytes, int index)
{
Bytes = bytes ?? throw new ArgumentNullException(nameof(bytes));
this.bytes = bytes ?? throw new ArgumentNullException(nameof(bytes));
Index = index;
Name = GlyphList.NotDefined;
Source = SourceType.Subroutine;
}
public Type1CharstringDecryptedBytes(string name, IReadOnlyList<byte> bytes, int index)
public Type1CharstringDecryptedBytes(string name, byte[] bytes, int index)
{
Bytes = bytes ?? throw new ArgumentNullException(nameof(bytes));
this.bytes = bytes ?? throw new ArgumentNullException(nameof(bytes));
Index = index;
Name = name ?? index.ToString(CultureInfo.InvariantCulture);
Source = SourceType.Charstring;
@ -38,7 +39,7 @@
public override string ToString()
{
return $"{Name} {Source} {Index} {Bytes.Count} bytes";
return $"{Name} {Source} {Index} {Bytes.Length} bytes";
}
}
}

View File

@ -14,7 +14,7 @@
private const int Password = 5839;
private const int CharstringEncryptionKey = 4330;
public (Type1PrivateDictionary, Type1CharStrings) Parse(IReadOnlyList<byte> bytes, bool isLenientParsing)
public (Type1PrivateDictionary, Type1CharStrings) Parse(ReadOnlySpan<byte> bytes, bool isLenientParsing)
{
if (!IsBinary(bytes))
{
@ -23,7 +23,7 @@
var decrypted = Decrypt(bytes, EexecEncryptionKey, EexecRandomBytes);
if (decrypted.Count == 0)
if (decrypted.Length == 0)
{
var defaultPrivateDictionary = new Type1PrivateDictionary(new Type1PrivateDictionary.Builder());
var defaultCharstrings = new Type1CharStrings(new Dictionary<string, Type1CharStrings.CommandSequence>(),
@ -32,7 +32,7 @@
return (defaultPrivateDictionary, defaultCharstrings);
}
var tokenizer = new Type1Tokenizer(new ByteArrayInputBytes(decrypted));
var tokenizer = new Type1Tokenizer(new ByteArrayInputBytes([.. decrypted]));
/*
* After 4 random characters follows the /Private dictionary and the /CharString dictionary.
@ -315,9 +315,9 @@
/// The first byte must not be whitespace.
/// One of the first four ciphertext bytes must not be an ASCII hex character.
/// </summary>
private static bool IsBinary(IReadOnlyList<byte> bytes)
private static bool IsBinary(ReadOnlySpan<byte> bytes)
{
if (bytes.Count < 4)
if (bytes.Length < 4)
{
return true;
}
@ -340,13 +340,14 @@
return false;
}
private static IReadOnlyList<byte> ConvertHexToBinary(IReadOnlyList<byte> bytes)
private static ReadOnlySpan<byte> ConvertHexToBinary(ReadOnlySpan<byte> bytes)
{
var result = new List<byte>(bytes.Count / 2);
var result = new byte[bytes.Length / 2];
int index = 0;
var last = '\0';
var offset = 0;
for (var i = 0; i < bytes.Count; i++)
for (var i = 0; i < bytes.Length; i++)
{
var c = (char)bytes[i];
if (!ReadHelper.IsHex(c))
@ -357,7 +358,7 @@
if (offset == 1)
{
result.Add(HexToken.Convert(last, c));
result[index++] = HexToken.ConvertPair(last, c);
offset = 0;
}
else
@ -371,7 +372,7 @@
return result;
}
private static IReadOnlyList<byte> Decrypt(IReadOnlyList<byte> bytes, int key, int randomBytes)
private static ReadOnlySpan<byte> Decrypt(ReadOnlySpan<byte> bytes, int key, int randomBytes)
{
/*
* We start with three constants R = 55665, c1 = 52845 and c2 = 22719.
@ -388,17 +389,17 @@
return bytes;
}
if (randomBytes > bytes.Count || bytes.Count == 0)
if (randomBytes > bytes.Length || bytes.Length == 0)
{
return new byte[0];
return [];
}
const int c1 = 52845;
const int c2 = 22719;
var plainBytes = new byte[bytes.Count - randomBytes];
var plainBytes = new byte[bytes.Length - randomBytes];
for (var i = 0; i < bytes.Count; i++)
for (var i = 0; i < bytes.Length; i++)
{
var cipher = bytes[i] & 0xFF;
var plain = cipher ^ key >> 8;
@ -681,13 +682,13 @@
throw new InvalidOperationException($"Found an unexpected token instead of subroutine charstring: {charstring}.");
}
if (!isLenientParsing && charstringToken.Data.Count != byteLength)
if (!isLenientParsing && charstringToken.Data.Length != byteLength)
{
throw new InvalidOperationException($"The subroutine charstring {charstringToken} did not have the expected length of {byteLength}.");
}
var subroutine = Decrypt(charstringToken.Data, CharstringEncryptionKey, lenIv);
subroutines.Add(new Type1CharstringDecryptedBytes(subroutine, index));
var subroutine = Decrypt(charstringToken.Data.Span, CharstringEncryptionKey, lenIv);
subroutines.Add(new Type1CharstringDecryptedBytes([.. subroutine], index));
ReadTillPut(tokenizer);
}
@ -732,14 +733,14 @@
throw new InvalidOperationException($"Got wrong type of token, expected charstring, instead got: {charstring}.");
}
if (!isLenientParsing && charstringToken.Data.Count != charstringLength)
if (!isLenientParsing && charstringToken.Data.Length != charstringLength)
{
throw new InvalidOperationException($"The charstring {charstringToken} did not have the expected length of {charstringLength}.");
}
var data = Decrypt(charstringToken.Data, CharstringEncryptionKey, lenIv);
var data = Decrypt(charstringToken.Data.Span, CharstringEncryptionKey, lenIv);
results.Add(new Type1CharstringDecryptedBytes(name, data, i));
results.Add(new Type1CharstringDecryptedBytes(name, [.. data], i));
ReadTillDef(tokenizer);
}

View File

@ -32,7 +32,7 @@
// Sometimes the entire PFB file including the header bytes can be included which prevents parsing in the normal way.
var isEntirePfbFile = inputBytes.Peek() == PfbFileIndicator;
IReadOnlyList<byte> eexecPortion = new byte[0];
ReadOnlySpan<byte> eexecPortion = [];
if (isEntirePfbFile)
{
@ -77,7 +77,7 @@
try
{
var tempEexecPortion = new List<byte>();
using var tempEexecPortion = new ArrayPoolBufferWriter<byte>();
var tokenSet = new PreviousTokenSet();
tokenSet.Add(scanner.CurrentToken);
while (scanner.MoveNext())
@ -100,7 +100,7 @@
{
for (int i = 0; i < offset; i++)
{
tempEexecPortion.Add((byte)ClearToMark[i]);
tempEexecPortion.Write((byte)ClearToMark[i]);
}
}
@ -117,7 +117,7 @@
continue;
}
tempEexecPortion.Add(inputBytes.CurrentByte);
tempEexecPortion.Write(inputBytes.CurrentByte);
}
}
else
@ -131,7 +131,7 @@
if (!isEntirePfbFile)
{
eexecPortion = tempEexecPortion;
eexecPortion = tempEexecPortion.WrittenSpan.ToArray();
}
}
finally

View File

@ -1,16 +1,15 @@
namespace UglyToad.PdfPig.Fonts.Type1.Parser
{
using System;
using System.Collections.Generic;
using System.Globalization;
internal class Type1DataToken : Type1Token
internal sealed class Type1DataToken : Type1Token
{
public IReadOnlyList<byte> Data { get; }
public ReadOnlyMemory<byte> Data { get; }
public override bool IsPrivateDictionary { get; } = false;
public Type1DataToken(TokenType type, IReadOnlyList<byte> data) : base(string.Empty, type)
public Type1DataToken(TokenType type, ReadOnlyMemory<byte> data) : base(string.Empty, type)
{
if (type != TokenType.Charstring)
{
@ -22,7 +21,7 @@
public override string ToString()
{
return $"Token[type = {Type}, data = {Data.Count} bytes]";
return $"Token[type = {Type}, data = {Data.Length} bytes]";
}
}

View File

@ -35,7 +35,7 @@
var result = new byte[hex.Length / 2];
for (var i = 0; i < hex.Length; i += 2)
{
result[i / 2] = HexToken.Convert(hex[i], hex[i + 1]);
result[i / 2] = HexToken.ConvertPair(hex[i], hex[i + 1]);
}
return result;

View File

@ -162,7 +162,7 @@ three %PDF-1.6";
const string hex =
@"00 0F 4A 43 42 31 33 36 36 31 32 32 37 2E 70 64 66 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 50 44 46 20 43 41 52 4F 01 00 FF FF FF FF 00 00 00 00 00 04 DF 28 00 00 00 00 AF 51 7E 82 AF 52 D7 09 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 81 81 03 0D 00 00 25 50 44 46 2D 31 2E 31 0A 25 E2 E3 CF D3 0D 0A 31 20 30 20 6F 62 6A";
var bytes = hex.Split(' ').Where(x => x.Length > 0).Select(x => HexToken.Convert(x[0], x[1]));
var bytes = hex.Split(' ').Where(x => x.Length > 0).Select(x => HexToken.ConvertPair(x[0], x[1]));
var str = OtherEncodings.BytesAsLatin1String(bytes.ToArray());

View File

@ -4,18 +4,10 @@
public class OtherEncodingsTests
{
[Fact]
public void BytesNullReturnsNullString()
{
var result = OtherEncodings.BytesAsLatin1String(null);
Assert.Null(result);
}
[Fact]
public void BytesEmptyReturnsEmptyString()
{
var result = OtherEncodings.BytesAsLatin1String(new byte[0]);
var result = OtherEncodings.BytesAsLatin1String([]);
Assert.Equal(string.Empty, result);
}

View File

@ -1,10 +1,9 @@
namespace UglyToad.PdfPig.Tokenization
{
using System.Collections.Generic;
using Core;
using Tokens;
internal class HexTokenizer : ITokenizer
internal sealed class HexTokenizer : ITokenizer
{
public bool ReadsNextByte { get; } = false;
@ -16,8 +15,8 @@
{
return false;
}
var characters = new List<char>();
using var charBuffer = new ArrayPoolBufferWriter<char>();
while (inputBytes.MoveNext())
{
@ -38,10 +37,10 @@
return false;
}
characters.Add((char)current);
charBuffer.Write((char)current);
}
token = new HexToken(characters);
token = new HexToken(charBuffer.WrittenSpan);
return true;
}

View File

@ -1,7 +1,6 @@
namespace UglyToad.PdfPig.Tokenization
{
using System;
using System.Collections.Generic;
using System.Text;
using Core;
using Tokens;

View File

@ -10,7 +10,7 @@
/// PDF arrays may be heterogeneous; that is, an array's elements may be any combination of numbers, strings,
/// dictionaries, or any other objects, including other arrays.
/// </summary>
public class ArrayToken : IDataToken<IReadOnlyList<IToken>>
public sealed class ArrayToken : IDataToken<IReadOnlyList<IToken>>
{
/// <summary>
/// The tokens contained in this array.

View File

@ -2,16 +2,14 @@ namespace UglyToad.PdfPig.Tokens
{
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
/// <summary>
/// A token containing string data where the string is encoded as hexadecimal.
/// </summary>
public class HexToken : IDataToken<string>
public sealed class HexToken : IDataToken<string>
{
private static readonly Dictionary<char, byte> HexMap = new Dictionary<char, byte>
{
private static readonly Dictionary<char, byte> HexMap = new() {
{'0', 0x00 },
{'1', 0x01 },
{'2', 0x02 },
@ -42,29 +40,37 @@ namespace UglyToad.PdfPig.Tokens
/// </summary>
public string Data { get; }
private readonly byte[] _bytes;
/// <summary>
/// The bytes of the hex data.
/// </summary>
public IReadOnlyList<byte> Bytes { get; }
public ReadOnlySpan<byte> Bytes => _bytes;
/// <summary>
/// The memory of the hex data.
/// </summary>
public ReadOnlyMemory<byte> Memory => _bytes;
/// <summary>
/// Create a new <see cref="HexToken"/> from the provided hex characters.
/// </summary>
/// <param name="characters">A set of hex characters 0-9, A - F, a - f representing a string.</param>
public HexToken(IReadOnlyList<char> characters)
public HexToken(ReadOnlySpan<char> characters)
{
if (characters == null)
{
throw new ArgumentNullException(nameof(characters));
}
var bytes = new List<byte>();
var bytes = new byte[characters.Length / 2];
int index = 0;
for (var i = 0; i < characters.Count; i += 2)
for (var i = 0; i < characters.Length; i += 2)
{
char high = characters[i];
char low;
if (i == characters.Count - 1)
if (i == characters.Length - 1)
{
low = '0';
}
@ -73,14 +79,14 @@ namespace UglyToad.PdfPig.Tokens
low = characters[i + 1];
}
var b = Convert(high, low);
bytes.Add(b);
var b = ConvertPair(high, low);
bytes[index++] = b;
}
// Handle UTF-16BE format strings.
if (bytes.Count >= 2 && bytes[0] == 0xFE && bytes[1] == 0xFF)
if (bytes.Length >= 2 && bytes[0] == 0xFE && bytes[1] == 0xFF)
{
Data = Encoding.BigEndianUnicode.GetString(bytes.ToArray(), 2, bytes.Count - 2);
Data = Encoding.BigEndianUnicode.GetString(bytes, 2, bytes.Length - 2);
}
else
{
@ -97,7 +103,7 @@ namespace UglyToad.PdfPig.Tokens
Data = builder.ToString();
}
Bytes = bytes;
_bytes = bytes;
}
/// <summary>
@ -106,7 +112,7 @@ namespace UglyToad.PdfPig.Tokens
/// <param name="high">The high nibble.</param>
/// <param name="low">The low nibble.</param>
/// <returns>The byte.</returns>
public static byte Convert(char high, char low)
public static byte ConvertPair(char high, char low)
{
var highByte = HexMap[high];
var lowByte = HexMap[low];
@ -129,7 +135,7 @@ namespace UglyToad.PdfPig.Tokens
var bytes = token.Bytes;
var value = bytes[0] & 0xFF;
if (bytes.Count == 2)
if (bytes.Length == 2)
{
value <<= 8;
value += bytes[1] & 0xFF;
@ -159,7 +165,11 @@ namespace UglyToad.PdfPig.Tokens
/// </summary>
public string GetHexString()
{
#if NET8_0_OR_GREATER
return Convert.ToHexString(Bytes);
#else
return BitConverter.ToString(Bytes.ToArray()).Replace("-", string.Empty);
#endif
}
}
}

View File

@ -1,4 +1,4 @@
<Project Sdk="Microsoft.NET.Sdk">
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks>netstandard2.0;net462;net471;net6.0;net8.0</TargetFrameworks>
<LangVersion>12</LangVersion>
@ -15,6 +15,9 @@
<ItemGroup Condition="'$(TargetFramework)'=='net462'">
<PackageReference Include="System.ValueTuple" Version="4.5.0" />
</ItemGroup>
<ItemGroup Condition="'$(TargetFramework)'=='netstandard2.0' or '$(TargetFramework)'=='net462' or '$(TargetFramework)'=='net471'">
<PackageReference Include="System.Memory" Version="4.5.5" />
</ItemGroup>
<ItemGroup>
<None Include="..\pdfpig.snk" Link="pdfpig.snk" />
</ItemGroup>

View File

@ -67,30 +67,13 @@
}
}
Tuple<string, PdfRectangle> data;
switch (tempTextOrientation)
{
case TextOrientation.Horizontal:
data = GetBoundingBoxH(letters);
break;
case TextOrientation.Rotate180:
data = GetBoundingBox180(letters);
break;
case TextOrientation.Rotate90:
data = GetBoundingBox90(letters);
break;
case TextOrientation.Rotate270:
data = GetBoundingBox270(letters);
break;
case TextOrientation.Other:
default:
data = GetBoundingBoxOther(letters);
break;
}
var data = tempTextOrientation switch {
TextOrientation.Horizontal => GetBoundingBoxH(letters),
TextOrientation.Rotate180 => GetBoundingBox180(letters),
TextOrientation.Rotate90 => GetBoundingBox90(letters),
TextOrientation.Rotate270 => GetBoundingBox270(letters),
_ => GetBoundingBoxOther(letters),
};
Text = data.Item1;
BoundingBox = data.Item2;
@ -100,7 +83,7 @@
}
#region Bounding box
private Tuple<string, PdfRectangle> GetBoundingBoxH(IReadOnlyList<Letter> letters)
private (string, PdfRectangle) GetBoundingBoxH(IReadOnlyList<Letter> letters)
{
var builder = new StringBuilder();
@ -136,10 +119,10 @@
}
}
return new Tuple<string, PdfRectangle>(builder.ToString(), new PdfRectangle(blX, blY, trX, trY));
return new(builder.ToString(), new PdfRectangle(blX, blY, trX, trY));
}
private Tuple<string, PdfRectangle> GetBoundingBox180(IReadOnlyList<Letter> letters)
private (string, PdfRectangle) GetBoundingBox180(IReadOnlyList<Letter> letters)
{
var builder = new StringBuilder();
@ -175,10 +158,10 @@
}
}
return new Tuple<string, PdfRectangle>(builder.ToString(), new PdfRectangle(blX, blY, trX, trY));
return (builder.ToString(), new PdfRectangle(blX, blY, trX, trY));
}
private Tuple<string, PdfRectangle> GetBoundingBox90(IReadOnlyList<Letter> letters)
private (string, PdfRectangle) GetBoundingBox90(IReadOnlyList<Letter> letters)
{
var builder = new StringBuilder();
@ -214,12 +197,12 @@
}
}
return new Tuple<string, PdfRectangle>(builder.ToString(), new PdfRectangle(
return new (builder.ToString(), new PdfRectangle(
new PdfPoint(t, l), new PdfPoint(t, r),
new PdfPoint(b, l), new PdfPoint(b, r)));
}
private Tuple<string, PdfRectangle> GetBoundingBox270(IReadOnlyList<Letter> letters)
private (string, PdfRectangle) GetBoundingBox270(IReadOnlyList<Letter> letters)
{
var builder = new StringBuilder();
@ -255,12 +238,12 @@
}
}
return new Tuple<string, PdfRectangle>(builder.ToString(), new PdfRectangle(
return new(builder.ToString(), new PdfRectangle(
new PdfPoint(t, l), new PdfPoint(t, r),
new PdfPoint(b, l), new PdfPoint(b, r)));
}
private Tuple<string, PdfRectangle> GetBoundingBoxOther(IReadOnlyList<Letter> letters)
private (string, PdfRectangle) GetBoundingBoxOther(IReadOnlyList<Letter> letters)
{
var builder = new StringBuilder();
for (var i = 0; i < letters.Count; i++)
@ -270,7 +253,7 @@
if (letters.Count == 1)
{
return new Tuple<string, PdfRectangle>(builder.ToString(), letters[0].GlyphRectangle);
return new(builder.ToString(), letters[0].GlyphRectangle);
}
else
{
@ -367,7 +350,7 @@
obb = obb3;
}
return new Tuple<string, PdfRectangle>(builder.ToString(), obb);
return new(builder.ToString(), obb);
}
}
#endregion
@ -379,7 +362,10 @@
private static double BoundAngle180(double angle)
{
angle = (angle + 180) % 360;
if (angle < 0) angle += 360;
if (angle < 0)
{
angle += 360;
}
return angle - 180;
}

View File

@ -402,7 +402,7 @@ namespace UglyToad.PdfPig.Encryption
var decrypted = DecryptData(data, reference);
token = new HexToken(Hex.GetString(decrypted).ToCharArray());
token = new HexToken(Hex.GetString(decrypted).AsSpan());
break;
}

View File

@ -2,7 +2,7 @@
{
using System;
using System.Collections.Generic;
using System.IO;
using Core;
using Tokens;
/// <inheritdoc />
@ -35,77 +35,74 @@
var index = 0;
using (var stream = new MemoryStream())
using (var writer = new BinaryWriter(stream))
using var writer = new ArrayPoolBufferWriter<byte>();
for (var i = 0; i < input.Count; i++)
{
for (var i = 0; i < input.Count; i++)
var value = input[i];
if (IsWhiteSpace(value))
{
var value = input[i];
continue;
}
if (IsWhiteSpace(value))
{
continue;
}
if (value == EndOfDataBytes[0])
{
if (i == input.Count - 1 || input[i + 1] == EndOfDataBytes[1])
{
if (index > 0)
{
WriteData(asciiBuffer, index, writer);
}
index = 0;
// The end
break;
}
// TODO: this shouldn't be possible?
}
if (value == EmptyBlock)
if (value == EndOfDataBytes[0])
{
if (i == input.Count - 1 || input[i + 1] == EndOfDataBytes[1])
{
if (index > 0)
{
throw new InvalidOperationException("Encountered z within a 5 character block");
}
for (int j = 0; j < 4; j++)
{
writer.Write((byte)0);
WriteData(asciiBuffer, index, writer);
}
index = 0;
// We've completed our block.
}
else
{
asciiBuffer[index] = (byte) (value - Offset);
index++;
// The end
break;
}
if (index == 5)
{
WriteData(asciiBuffer, index, writer);
index = 0;
}
// TODO: this shouldn't be possible?
}
if (index > 0)
if (value == EmptyBlock)
{
if (index > 0)
{
throw new InvalidOperationException("Encountered z within a 5 character block");
}
for (int j = 0; j < 4; j++)
{
writer.Write(0);
}
index = 0;
// We've completed our block.
}
else
{
asciiBuffer[index] = (byte) (value - Offset);
index++;
}
if (index == 5)
{
WriteData(asciiBuffer, index, writer);
index = 0;
}
writer.Flush();
return stream.ToArray();
}
if (index > 0)
{
WriteData(asciiBuffer, index, writer);
}
return writer.WrittenSpan.ToArray();
}
private static void WriteData(byte[] ascii, int index, BinaryWriter writer)
private static void WriteData(Span<byte> ascii, int index, ArrayPoolBufferWriter<byte> writer)
{
if (index < 2)
{

View File

@ -382,10 +382,10 @@
}
else
{
IReadOnlyList<byte> bytes;
byte[] bytes;
if (token is HexToken hex)
{
bytes = hex.Bytes;
bytes = [.. hex.Bytes];
}
else
{

View File

@ -1,6 +1,8 @@
namespace UglyToad.PdfPig.Images.Png
{
internal class Palette
using System;
internal sealed class Palette
{
public bool HasAlphaValues { get; private set; }
@ -9,7 +11,7 @@
/// <summary>
/// Creates a palette object. Input palette data length from PLTE chunk must be a multiple of 3.
/// </summary>
public Palette(byte[] data)
public Palette(ReadOnlySpan<byte> data)
{
Data = new byte[data.Length * 4 / 3];
var dataIndex = 0;

View File

@ -1,6 +1,5 @@
namespace UglyToad.PdfPig.Outline
{
using Destinations;
using System;
using System.Collections.Generic;

View File

@ -1,6 +1,6 @@
namespace UglyToad.PdfPig.Parser.FileStructure
{
using System.Collections.Generic;
using System;
using System.Linq;
using CrossReference;
using Core;
@ -55,8 +55,9 @@
scanner.RegisterCustomTokenizer((byte)'\r', tokenizer);
scanner.RegisterCustomTokenizer((byte)'\n', tokenizer);
using var tokens = new ArrayPoolBufferWriter<IToken>();
var readingLine = false;
var tokens = new List<IToken>();
var count = 0;
while (scanner.MoveNext())
{
@ -69,9 +70,9 @@
readingLine = false;
count = ProcessTokens(tokens, builder, isLenientParsing, count, ref definition);
count = ProcessTokens(tokens.WrittenSpan, builder, isLenientParsing, count, ref definition);
tokens.Clear();
tokens.Reset();
continue;
}
@ -89,12 +90,12 @@
}
readingLine = true;
tokens.Add(scanner.CurrentToken);
tokens.Write(scanner.CurrentToken);
}
if (tokens.Count > 0)
if (tokens.WrittenCount > 0)
{
ProcessTokens(tokens, builder, isLenientParsing, count, ref definition);
ProcessTokens(tokens.WrittenSpan, builder, isLenientParsing, count, ref definition);
}
scanner.DeregisterCustomTokenizer(tokenizer);
@ -105,19 +106,17 @@
return builder.Build();
}
private static int ProcessTokens(List<IToken> tokens, CrossReferenceTablePartBuilder builder, bool isLenientParsing,
private static int ProcessTokens(ReadOnlySpan<IToken> tokens, CrossReferenceTablePartBuilder builder, bool isLenientParsing,
int objectCount, ref TableSubsectionDefinition definition)
{
string GetErrorMessage()
static string GetErrorMessage(ReadOnlySpan<IToken> tokens)
{
var representation = "Invalid line format in xref table: [" + string.Join(", ", tokens.Select(x => x.ToString())) + "]";
return representation;
return "Invalid line format in xref table: [" + string.Join(", ", tokens.ToArray().Select(x => x.ToString())) + "]";
}
if (objectCount == definition.Count)
{
if (tokens.Count == 2)
if (tokens.Length == 2)
{
if (tokens[0] is NumericToken newFirstObjectToken && tokens[1] is NumericToken newObjectCountToken)
{
@ -130,17 +129,17 @@
throw new PdfDocumentFormatException($"Found a line with 2 unexpected entries in the cross reference table: {tokens[0]}, {tokens[1]}.");
}
if (tokens.Count <= 2)
if (tokens.Length <= 2)
{
if (!isLenientParsing)
{
throw new PdfDocumentFormatException(GetErrorMessage());
throw new PdfDocumentFormatException(GetErrorMessage(tokens));
}
return objectCount;
}
var lastToken = tokens[tokens.Count - 1];
var lastToken = tokens[tokens.Length - 1];
if (lastToken is OperatorToken operatorToken)
{
@ -153,7 +152,7 @@
{
if (!isLenientParsing)
{
throw new PdfDocumentFormatException(GetErrorMessage());
throw new PdfDocumentFormatException(GetErrorMessage(tokens));
}
return objectCount;
@ -170,7 +169,7 @@
{
if (!isLenientParsing)
{
throw new PdfDocumentFormatException(GetErrorMessage());
throw new PdfDocumentFormatException(GetErrorMessage(tokens));
}
}

View File

@ -24,7 +24,7 @@
int result = ReadHelper.ReadInt(bytes);
if (result < 0 || result > GenerationNumberThreshold)
{
throw new FormatException("Generation Number '" + result + "' has more than 5 digits");
throw new FormatException($"Generation Number '{result}' has more than 5 digits");
}
return result;

View File

@ -152,7 +152,7 @@
{
var data = new byte[minCodeLength];
bytes.Read(data);
return data.ToInt(minCodeLength);
return ((ReadOnlySpan<byte>)data).Slice(0, minCodeLength).ToInt();
}
byte[] result = new byte[maxCodeLength];

View File

@ -1,13 +1,14 @@
namespace UglyToad.PdfPig.PdfFonts.Cmap
{
using System;
using System.Collections.Generic;
internal static class CMapUtils
{
public static int ToInt(this IReadOnlyList<byte> data, int length)
public static int ToInt(this ReadOnlySpan<byte> data)
{
int code = 0;
for (int i = 0; i < length; ++i)
for (int i = 0; i < data.Length; ++i)
{
code <<= 8;
code |= (data[i] & 0xFF);
@ -15,8 +16,7 @@
return code;
}
public static void PutAll<TKey, TValue>(this Dictionary<TKey, TValue> target,
IReadOnlyDictionary<TKey, TValue> source)
public static void PutAll<TKey, TValue>(this Dictionary<TKey, TValue> target, IReadOnlyDictionary<TKey, TValue> source)
{
foreach (var pair in source)
{
@ -24,4 +24,4 @@
}
}
}
}
}

View File

@ -3,6 +3,7 @@
namespace UglyToad.PdfPig.PdfFonts.Cmap
{
using Core;
using System;
using System.Collections.Generic;
using System.Linq;
using System.Text;
@ -66,14 +67,14 @@ namespace UglyToad.PdfPig.PdfFonts.Cmap
public Dictionary<int, string> BaseFontCharacterMap { get; } = new Dictionary<int, string>();
public void AddBaseFontCharacter(IReadOnlyList<byte> bytes, IReadOnlyList<byte> value)
public void AddBaseFontCharacter(ReadOnlySpan<byte> bytes, ReadOnlySpan<byte> value)
{
AddBaseFontCharacter(bytes, CreateStringFromBytes(value.ToArray()));
AddBaseFontCharacter(bytes, CreateStringFromBytes(value));
}
public void AddBaseFontCharacter(IReadOnlyList<byte> bytes, string value)
public void AddBaseFontCharacter(ReadOnlySpan<byte> bytes, string value)
{
var code = GetCodeFromArray(bytes, bytes.Count);
var code = GetCodeFromArray(bytes);
BaseFontCharacterMap[code] = value;
}
@ -134,17 +135,13 @@ namespace UglyToad.PdfPig.PdfFonts.Cmap
return a;
}
var result = new List<T>(a);
result.AddRange(b);
return result;
return [.. a, .. b];
}
private int GetCodeFromArray(IReadOnlyList<byte> data, int length)
private int GetCodeFromArray(ReadOnlySpan<byte> data)
{
int code = 0;
for (int i = 0; i < length; i++)
for (int i = 0; i < data.Length; i++)
{
code <<= 8;
code |= (data[i] + 256) % 256;
@ -152,7 +149,7 @@ namespace UglyToad.PdfPig.PdfFonts.Cmap
return code;
}
private static string CreateStringFromBytes(byte[] bytes)
private static string CreateStringFromBytes(ReadOnlySpan<byte> bytes)
{
return bytes.Length == 1
? OtherEncodings.BytesAsLatin1String(bytes)

View File

@ -1,7 +1,6 @@
namespace UglyToad.PdfPig.PdfFonts.Cmap
{
using System;
using System.Collections.Generic;
/// <summary>
/// A codespace range is specified by a pair of codes of some particular length giving the lower and upper bounds of that range.
@ -11,12 +10,12 @@
/// <summary>
/// The lower-bound of this range.
/// </summary>
public IReadOnlyList<byte> Start { get; }
public ReadOnlyMemory<byte> Start { get; }
/// <summary>
/// The upper-bound of this range.
/// </summary>
public IReadOnlyList<byte> End { get; }
public ReadOnlyMemory<byte> End { get; }
/// <summary>
/// The lower-bound of this range as an integer.
@ -36,13 +35,13 @@
/// <summary>
/// Creates a new instance of <see cref="CodespaceRange"/>.
/// </summary>
public CodespaceRange(IReadOnlyList<byte> start, IReadOnlyList<byte> end)
public CodespaceRange(ReadOnlyMemory<byte> start, ReadOnlyMemory<byte> end)
{
Start = start;
End = end;
StartInt = start.ToInt(start.Count);
EndInt = end.ToInt(end.Count);
CodeLength = start.Count;
StartInt = start.Span.ToInt();
EndInt = end.Span.ToInt();
CodeLength = start.Length;
}
/// <summary>
@ -74,7 +73,7 @@
return false;
}
var value = code.ToInt(codeLength);
var value = ((ReadOnlySpan<byte>)code).Slice(0, codeLength).ToInt();
if (value >= StartInt && value <= EndInt)
{
return true;

View File

@ -34,7 +34,7 @@
throw new InvalidFontFormatException("bfrange ended unexpectedly after the high source code.");
}
List<byte>? destinationBytes = null;
byte[]? destinationBytes = null;
ArrayToken? destinationArray = null;
switch (scanner.CurrentToken)
@ -43,7 +43,7 @@
destinationArray = arrayToken;
break;
case HexToken hexToken:
destinationBytes = hexToken.Bytes.ToList();
destinationBytes = [.. hexToken.Bytes];
break;
case NumericToken _:
throw new NotImplementedException("From the spec it seems this possible but the meaning is unclear...");
@ -52,7 +52,7 @@
}
var done = false;
var startCode = new List<byte>(lowSourceCode.Bytes);
var startCode = lowSourceCode.Bytes.ToArray();
var endCode = highSourceCode.Bytes;
if (destinationArray != null)
@ -76,7 +76,7 @@
builder.AddBaseFontCharacter(startCode, hex.Bytes);
}
Increment(startCode, startCode.Count - 1);
Increment(startCode, startCode.Length - 1);
arrayIndex++;
}
@ -93,14 +93,14 @@
builder.AddBaseFontCharacter(startCode, destinationBytes!);
Increment(startCode, startCode.Count - 1);
Increment(startCode, startCode.Length - 1);
Increment(destinationBytes!, destinationBytes!.Count - 1);
Increment(destinationBytes!, destinationBytes!.Length - 1);
}
}
}
private static void Increment(IList<byte> data, int position)
private static void Increment(Span<byte> data, int position)
{
if (position > 0 && (data[position] & 0xFF) == 255)
{
@ -113,9 +113,9 @@
}
}
private static int Compare(IReadOnlyList<byte> first, IReadOnlyList<byte> second)
private static int Compare(ReadOnlySpan<byte> first, ReadOnlySpan<byte> second)
{
for (var i = 0; i < first.Count; i++)
for (var i = 0; i < first.Length; i++)
{
if (first[i] == second[i])
{

View File

@ -24,7 +24,7 @@
throw new InvalidOperationException("The destination token in a line for Cid Character should be an integer, instead it was: " + scanner.CurrentToken);
}
var sourceInteger = sourceCode.Bytes.ToInt(sourceCode.Bytes.Count);
var sourceInteger = sourceCode.Bytes.ToInt();
var mapping = new CidCharacterMapping(sourceInteger, destinationCode.Int);
results.Add(mapping);

View File

@ -44,7 +44,7 @@
throw new InvalidOperationException("Codespace range contains an unexpected token: " + tokenScanner.CurrentToken);
}
ranges.Add(new CodespaceRange(start.Bytes, end.Bytes));
ranges.Add(new CodespaceRange(start.Memory, end.Memory));
}
builder.CodespaceRanges = ranges;

View File

@ -0,0 +1,20 @@
#if NETFRAMEWORK || NETSTANDARD2_0
namespace System.Text;
internal static class EncodingExtensions
{
public static string GetString(this Encoding encoding, ReadOnlySpan<byte> bytes)
{
if (bytes.IsEmpty)
{
return string.Empty;
}
// NOTE: this can be made allocation free by introducing unsafe
return encoding.GetString(bytes.ToArray());
}
}
#endif

View File

@ -825,7 +825,7 @@
var scanner = new CoreTokenScanner(bytes, true, useLenientParsing: parsingOptions.UseLenientParsing);
var objects = new List<Tuple<long, long>>();
var objects = new List<(long, long)>();
for (var i = 0; i < numberOfObjects.Int; i++)
{
@ -834,7 +834,7 @@
scanner.MoveNext();
var byteOffset = (NumericToken)scanner.CurrentToken;
objects.Add(Tuple.Create(objectNumber.Long, byteOffset.Long));
objects.Add((objectNumber.Long, byteOffset.Long));
}
var results = new List<ObjectToken>();

View File

@ -55,8 +55,7 @@
var colorSpaceDetails = GetColorSpaceDetails(colorSpace, imageDictionary.Without(NameToken.Filter).Without(NameToken.F), scanner, resourceStore, filterProvider, true);
var decodeRaw = imageDictionary.GetObjectOrDefault(NameToken.Decode, NameToken.D) as ArrayToken
?? new ArrayToken(Array.Empty<IToken>());
var decodeRaw = imageDictionary.GetObjectOrDefault(NameToken.Decode, NameToken.D) as ArrayToken ?? new ArrayToken([]);
var decode = decodeRaw.Data.OfType<NumericToken>().Select(x => x.Double).ToArray();
return IndexedColorSpaceDetails.Stencil(colorSpaceDetails, decode);
@ -341,7 +340,7 @@
if (DirectObjectFinder.TryGet(fourth, scanner, out HexToken? tableHexToken))
{
tableBytes = tableHexToken.Bytes;
tableBytes = [.. tableHexToken.Bytes];
}
else if (DirectObjectFinder.TryGet(fourth, scanner, out StreamToken? tableStreamToken))
{

View File

@ -16,7 +16,7 @@
private static readonly TokenWriter TokenWriter = new TokenWriter();
public static IReadOnlyList<byte> ConvertToCMapStream(IReadOnlyDictionary<char, byte> unicodeToCharacterCode)
public static byte[] ConvertToCMapStream(IReadOnlyDictionary<char, byte> unicodeToCharacterCode)
{
using (var memoryStream = new MemoryStream())
{

View File

@ -702,7 +702,9 @@
var png = Png.Open(pngStream);
if (placementRectangle.Equals(default(PdfRectangle)))
{
placementRectangle = new PdfRectangle(0, 0, png.Width, png.Height);
}
byte[] data;
var pixelBuffer = new byte[3];