removing locking

This commit is contained in:
Plaisted 2021-01-19 18:06:50 -06:00
parent 60f3005544
commit 9bfe69aef1
4 changed files with 466 additions and 465 deletions

View File

@ -1,158 +1,158 @@
namespace UglyToad.PdfPig.Tokenization
{
using System;
using System.Globalization;
using System.Text;
using Core;
using Tokens;
internal class NumericTokenizer : ITokenizer
{
private static readonly StringBuilderPool StringBuilderPool = new StringBuilderPool(10);
private const byte Zero = 48;
private const byte Nine = 57;
public bool ReadsNextByte { get; } = true;
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
{
token = null;
StringBuilder characters;
if ((currentByte >= Zero && currentByte <= Nine) || currentByte == '-' || currentByte == '+' || currentByte == '.')
{
characters = StringBuilderPool.Borrow();
characters.Append((char)currentByte);
}
else
{
return false;
}
while (inputBytes.MoveNext())
{
var b = inputBytes.CurrentByte;
if ((b >= Zero && b <= Nine) ||
b == '-' ||
b == '+' ||
b == '.' ||
b == 'E' ||
b == 'e')
{
characters.Append((char)b);
}
else
{
break;
}
}
try
{
var str = characters.ToString();
StringBuilderPool.Return(characters);
switch (str)
{
case "-1":
token = NumericToken.MinusOne;
return true;
case "-":
case ".":
case "0":
case "0000":
token = NumericToken.Zero;
return true;
case "1":
token = NumericToken.One;
return true;
case "2":
token = NumericToken.Two;
return true;
case "3":
token = NumericToken.Three;
return true;
case "4":
token = NumericToken.Four;
return true;
case "5":
token = NumericToken.Five;
return true;
case "6":
token = NumericToken.Six;
return true;
case "7":
token = NumericToken.Seven;
return true;
case "8":
token = NumericToken.Eight;
return true;
case "9":
token = NumericToken.Nine;
return true;
case "10":
token = NumericToken.Ten;
return true;
case "11":
token = NumericToken.Eleven;
return true;
case "12":
token = NumericToken.Twelve;
return true;
case "13":
token = NumericToken.Thirteen;
return true;
case "14":
token = NumericToken.Fourteen;
return true;
case "15":
token = NumericToken.Fifteen;
return true;
case "16":
token = NumericToken.Sixteen;
return true;
case "17":
token = NumericToken.Seventeen;
return true;
case "18":
token = NumericToken.Eighteen;
return true;
case "19":
token = NumericToken.Nineteen;
return true;
case "20":
token = NumericToken.Twenty;
return true;
case "100":
token = NumericToken.OneHundred;
return true;
case "500":
token = NumericToken.FiveHundred;
return true;
case "1000":
token = NumericToken.OneThousand;
return true;
default:
if (!decimal.TryParse(str, NumberStyles.Any, CultureInfo.InvariantCulture, out var value))
{
return false;
}
token = new NumericToken(value);
return true;
}
}
catch (FormatException)
{
return false;
}
catch (OverflowException)
{
return false;
}
}
}
}
namespace UglyToad.PdfPig.Tokenization
{
using System;
using System.Globalization;
using System.Text;
using Core;
using Tokens;
internal class NumericTokenizer : ITokenizer
{
private readonly StringBuilder stringBuilder = new();
private const byte Zero = 48;
private const byte Nine = 57;
public bool ReadsNextByte { get; } = true;
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
{
token = null;
StringBuilder characters;
if ((currentByte >= Zero && currentByte <= Nine) || currentByte == '-' || currentByte == '+' || currentByte == '.')
{
characters = stringBuilder;
characters.Append((char)currentByte);
}
else
{
return false;
}
while (inputBytes.MoveNext())
{
var b = inputBytes.CurrentByte;
if ((b >= Zero && b <= Nine) ||
b == '-' ||
b == '+' ||
b == '.' ||
b == 'E' ||
b == 'e')
{
characters.Append((char)b);
}
else
{
break;
}
}
try
{
var str = characters.ToString();
characters.Clear();
switch (str)
{
case "-1":
token = NumericToken.MinusOne;
return true;
case "-":
case ".":
case "0":
case "0000":
token = NumericToken.Zero;
return true;
case "1":
token = NumericToken.One;
return true;
case "2":
token = NumericToken.Two;
return true;
case "3":
token = NumericToken.Three;
return true;
case "4":
token = NumericToken.Four;
return true;
case "5":
token = NumericToken.Five;
return true;
case "6":
token = NumericToken.Six;
return true;
case "7":
token = NumericToken.Seven;
return true;
case "8":
token = NumericToken.Eight;
return true;
case "9":
token = NumericToken.Nine;
return true;
case "10":
token = NumericToken.Ten;
return true;
case "11":
token = NumericToken.Eleven;
return true;
case "12":
token = NumericToken.Twelve;
return true;
case "13":
token = NumericToken.Thirteen;
return true;
case "14":
token = NumericToken.Fourteen;
return true;
case "15":
token = NumericToken.Fifteen;
return true;
case "16":
token = NumericToken.Sixteen;
return true;
case "17":
token = NumericToken.Seventeen;
return true;
case "18":
token = NumericToken.Eighteen;
return true;
case "19":
token = NumericToken.Nineteen;
return true;
case "20":
token = NumericToken.Twenty;
return true;
case "100":
token = NumericToken.OneHundred;
return true;
case "500":
token = NumericToken.FiveHundred;
return true;
case "1000":
token = NumericToken.OneThousand;
return true;
default:
if (!decimal.TryParse(str, NumberStyles.Any, CultureInfo.InvariantCulture, out var value))
{
return false;
}
token = new NumericToken(value);
return true;
}
}
catch (FormatException)
{
return false;
}
catch (OverflowException)
{
return false;
}
}
}
}

View File

@ -15,9 +15,10 @@
private static readonly DictionaryTokenizer DictionaryTokenizer = new DictionaryTokenizer();
private static readonly HexTokenizer HexTokenizer = new HexTokenizer();
private static readonly NameTokenizer NameTokenizer = new NameTokenizer();
private static readonly NumericTokenizer NumericTokenizer = new NumericTokenizer();
private static readonly PlainTokenizer PlainTokenizer = new PlainTokenizer();
private static readonly StringTokenizer StringTokenizer = new StringTokenizer();
private static readonly PlainTokenizer PlainTokenizer = new PlainTokenizer();
private readonly NumericTokenizer NumericTokenizer = new NumericTokenizer();
private readonly StringTokenizer StringTokenizer = new StringTokenizer();
private readonly ScannerScope scope;
private readonly IInputBytes inputBytes;

View File

@ -1,281 +1,281 @@
namespace UglyToad.PdfPig.Tokenization
{
using System.Text;
using Core;
using Tokens;
internal class StringTokenizer : ITokenizer
{
private static readonly StringBuilderPool StringBuilderPool = new StringBuilderPool(16);
public bool ReadsNextByte { get; } = false;
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
{
token = null;
if (inputBytes == null)
{
return false;
}
if (currentByte != '(')
{
return false;
}
var builder = StringBuilderPool.Borrow();
var numberOfBrackets = 1;
var isEscapeActive = false;
var isLineBreaking = false;
var octalModeActive = false;
short[] octal = { 0, 0, 0 };
var octalsRead = 0;
while (inputBytes.MoveNext())
{
var b = inputBytes.CurrentByte;
var c = (char)b;
if (octalModeActive)
{
var nextCharacterOctal = c >= '0' && c <= '7';
if (nextCharacterOctal)
{
// left shift the octals.
LeftShiftOctal(c, octalsRead, octal);
octalsRead++;
}
if (octalsRead == 3 || !nextCharacterOctal)
{
var characterCode = OctalHelpers.FromOctalDigits(octal);
// For now :(
// TODO: I have a sneaking suspicion this is wrong, not sure what behaviour is for large octal numbers
builder.Append((char)characterCode);
octal[0] = 0;
octal[1] = 0;
octal[2] = 0;
octalsRead = 0;
octalModeActive = false;
}
if (nextCharacterOctal)
{
continue;
}
}
switch (c)
{
case ')':
isLineBreaking = false;
if (!isEscapeActive)
{
numberOfBrackets--;
}
isEscapeActive = false;
if (numberOfBrackets > 0)
{
builder.Append(c);
}
// TODO: Check for other ends of string where the string is improperly formatted. See commented method
numberOfBrackets = CheckForEndOfString(numberOfBrackets, inputBytes);
break;
case '(':
isLineBreaking = false;
if (!isEscapeActive)
{
numberOfBrackets++;
}
isEscapeActive = false;
builder.Append(c);
break;
// Escape
case '\\':
isLineBreaking = false;
// Escaped backslash
if (isEscapeActive)
{
builder.Append(c);
isEscapeActive = false;
}
else
{
isEscapeActive = true;
}
break;
default:
if (isLineBreaking)
{
if (ReadHelper.IsEndOfLine(c))
{
continue;
}
isLineBreaking = false;
builder.Append(c);
}
else if (isEscapeActive)
{
ProcessEscapedCharacter(c, builder, octal, ref octalModeActive, ref octalsRead, ref isLineBreaking);
isEscapeActive = false;
}
else
{
builder.Append(c);
}
break;
}
if (numberOfBrackets <= 0)
{
break;
}
}
StringToken.Encoding encodedWith;
string tokenStr;
if (builder.Length >= 2)
{
if (builder[0] == 0xFE && builder[1] == 0xFF)
{
var rawBytes = OtherEncodings.StringAsLatin1Bytes(builder.ToString());
tokenStr = Encoding.BigEndianUnicode.GetString(rawBytes).Substring(1);
encodedWith = StringToken.Encoding.Utf16BE;
}
else if (builder[0] == 0xFF && builder[1] == 0xFE)
{
var rawBytes = OtherEncodings.StringAsLatin1Bytes(builder.ToString());
tokenStr = Encoding.Unicode.GetString(rawBytes).Substring(1);
encodedWith = StringToken.Encoding.Utf16;
}
else
{
tokenStr = builder.ToString();
encodedWith = StringToken.Encoding.Iso88591;
}
}
else
{
tokenStr = builder.ToString();
encodedWith = StringToken.Encoding.Iso88591;
}
StringBuilderPool.Return(builder);
token = new StringToken(tokenStr, encodedWith);
return true;
}
private static void LeftShiftOctal(char nextOctalChar, int octalsRead, short[] octals)
{
for (var i = octalsRead; i > 0; i--)
{
octals[i] = octals[i - 1];
}
var value = nextOctalChar.CharacterToShort();
octals[0] = value;
}
private static void ProcessEscapedCharacter(char c, StringBuilder builder, short[] octal, ref bool isOctalActive,
ref int octalsRead, ref bool isLineBreaking)
{
switch (c)
{
case 'n':
builder.Append('\n');
break;
case 'r':
builder.Append('\r');
break;
case 't':
builder.Append('\t');
break;
case 'b':
builder.Append('\b');
break;
case 'f':
builder.Append('\f');
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
octal[0] = c.CharacterToShort();
isOctalActive = true;
octalsRead = 1;
break;
default:
if (c == ReadHelper.AsciiCarriageReturn || c == ReadHelper.AsciiLineFeed)
{
isLineBreaking = true;
}
else
{
// Drop the backslash
builder.Append(c);
}
break;
}
}
private static int CheckForEndOfString(int numberOfBrackets, IInputBytes bytes)
{
const byte lineFeed = 10;
const byte carriageReturn = 13;
var braces = numberOfBrackets;
var nextThreeBytes = new byte[3];
var startAt = bytes.CurrentOffset;
var amountRead = bytes.Read(nextThreeBytes);
// Check the next 3 bytes if available
// The following cases are valid indicators for the end of the string
// 1. Next line contains another COSObject: CR + LF + '/'
// 2. COSDictionary ends in the next line: CR + LF + '>'
// 3. Next line contains another COSObject: CR + '/'
// 4. COSDictionary ends in the next line: CR + '>'
if (amountRead == 3 && nextThreeBytes[0] == carriageReturn)
{
if ((nextThreeBytes[1] == lineFeed && (nextThreeBytes[2] == '/') || nextThreeBytes[2] == '>')
|| nextThreeBytes[1] == '/' || nextThreeBytes[1] == '>')
{
braces = 0;
}
}
if (amountRead > 0)
{
bytes.Seek(startAt);
}
return braces;
}
}
namespace UglyToad.PdfPig.Tokenization
{
using System.Text;
using Core;
using Tokens;
internal class StringTokenizer : ITokenizer
{
private readonly StringBuilder stringBuilder = new();
public bool ReadsNextByte { get; } = false;
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
{
token = null;
if (inputBytes == null)
{
return false;
}
if (currentByte != '(')
{
return false;
}
var builder = stringBuilder;
var numberOfBrackets = 1;
var isEscapeActive = false;
var isLineBreaking = false;
var octalModeActive = false;
short[] octal = { 0, 0, 0 };
var octalsRead = 0;
while (inputBytes.MoveNext())
{
var b = inputBytes.CurrentByte;
var c = (char)b;
if (octalModeActive)
{
var nextCharacterOctal = c >= '0' && c <= '7';
if (nextCharacterOctal)
{
// left shift the octals.
LeftShiftOctal(c, octalsRead, octal);
octalsRead++;
}
if (octalsRead == 3 || !nextCharacterOctal)
{
var characterCode = OctalHelpers.FromOctalDigits(octal);
// For now :(
// TODO: I have a sneaking suspicion this is wrong, not sure what behaviour is for large octal numbers
builder.Append((char)characterCode);
octal[0] = 0;
octal[1] = 0;
octal[2] = 0;
octalsRead = 0;
octalModeActive = false;
}
if (nextCharacterOctal)
{
continue;
}
}
switch (c)
{
case ')':
isLineBreaking = false;
if (!isEscapeActive)
{
numberOfBrackets--;
}
isEscapeActive = false;
if (numberOfBrackets > 0)
{
builder.Append(c);
}
// TODO: Check for other ends of string where the string is improperly formatted. See commented method
numberOfBrackets = CheckForEndOfString(numberOfBrackets, inputBytes);
break;
case '(':
isLineBreaking = false;
if (!isEscapeActive)
{
numberOfBrackets++;
}
isEscapeActive = false;
builder.Append(c);
break;
// Escape
case '\\':
isLineBreaking = false;
// Escaped backslash
if (isEscapeActive)
{
builder.Append(c);
isEscapeActive = false;
}
else
{
isEscapeActive = true;
}
break;
default:
if (isLineBreaking)
{
if (ReadHelper.IsEndOfLine(c))
{
continue;
}
isLineBreaking = false;
builder.Append(c);
}
else if (isEscapeActive)
{
ProcessEscapedCharacter(c, builder, octal, ref octalModeActive, ref octalsRead, ref isLineBreaking);
isEscapeActive = false;
}
else
{
builder.Append(c);
}
break;
}
if (numberOfBrackets <= 0)
{
break;
}
}
StringToken.Encoding encodedWith;
string tokenStr;
if (builder.Length >= 2)
{
if (builder[0] == 0xFE && builder[1] == 0xFF)
{
var rawBytes = OtherEncodings.StringAsLatin1Bytes(builder.ToString());
tokenStr = Encoding.BigEndianUnicode.GetString(rawBytes).Substring(1);
encodedWith = StringToken.Encoding.Utf16BE;
}
else if (builder[0] == 0xFF && builder[1] == 0xFE)
{
var rawBytes = OtherEncodings.StringAsLatin1Bytes(builder.ToString());
tokenStr = Encoding.Unicode.GetString(rawBytes).Substring(1);
encodedWith = StringToken.Encoding.Utf16;
}
else
{
tokenStr = builder.ToString();
encodedWith = StringToken.Encoding.Iso88591;
}
}
else
{
tokenStr = builder.ToString();
encodedWith = StringToken.Encoding.Iso88591;
}
builder.Clear();
token = new StringToken(tokenStr, encodedWith);
return true;
}
private static void LeftShiftOctal(char nextOctalChar, int octalsRead, short[] octals)
{
for (var i = octalsRead; i > 0; i--)
{
octals[i] = octals[i - 1];
}
var value = nextOctalChar.CharacterToShort();
octals[0] = value;
}
private static void ProcessEscapedCharacter(char c, StringBuilder builder, short[] octal, ref bool isOctalActive,
ref int octalsRead, ref bool isLineBreaking)
{
switch (c)
{
case 'n':
builder.Append('\n');
break;
case 'r':
builder.Append('\r');
break;
case 't':
builder.Append('\t');
break;
case 'b':
builder.Append('\b');
break;
case 'f':
builder.Append('\f');
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
octal[0] = c.CharacterToShort();
isOctalActive = true;
octalsRead = 1;
break;
default:
if (c == ReadHelper.AsciiCarriageReturn || c == ReadHelper.AsciiLineFeed)
{
isLineBreaking = true;
}
else
{
// Drop the backslash
builder.Append(c);
}
break;
}
}
private static int CheckForEndOfString(int numberOfBrackets, IInputBytes bytes)
{
const byte lineFeed = 10;
const byte carriageReturn = 13;
var braces = numberOfBrackets;
var nextThreeBytes = new byte[3];
var startAt = bytes.CurrentOffset;
var amountRead = bytes.Read(nextThreeBytes);
// Check the next 3 bytes if available
// The following cases are valid indicators for the end of the string
// 1. Next line contains another COSObject: CR + LF + '/'
// 2. COSDictionary ends in the next line: CR + LF + '>'
// 3. Next line contains another COSObject: CR + '/'
// 4. COSDictionary ends in the next line: CR + '>'
if (amountRead == 3 && nextThreeBytes[0] == carriageReturn)
{
if ((nextThreeBytes[1] == lineFeed && (nextThreeBytes[2] == '/') || nextThreeBytes[2] == '>')
|| nextThreeBytes[1] == '/' || nextThreeBytes[1] == '>')
{
braces = 0;
}
}
if (amountRead > 0)
{
bytes.Seek(startAt);
}
return braces;
}
}
}

View File

@ -1,25 +1,25 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks>netstandard2.0;net45;net451;net452;net46;net461;net462;net47</TargetFrameworks>
<LangVersion>latest</LangVersion>
<Version>0.1.4</Version>
<IsTestProject>False</IsTestProject>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
<SignAssembly>true</SignAssembly>
<AssemblyOriginatorKeyFile>..\pdfpig.snk</AssemblyOriginatorKeyFile>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Debug|netstandard2.0|AnyCPU'">
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<WarningsAsErrors />
</PropertyGroup>
<ItemGroup Condition="'$(TargetFramework)'=='net45' OR '$(TargetFramework)'=='net451' OR '$(TargetFramework)'=='net452' OR '$(TargetFramework)'=='net46' OR '$(TargetFramework)'=='net461' OR '$(TargetFramework)'=='net462' OR '$(TargetFramework)'=='net47'">
<PackageReference Include="System.ValueTuple" Version="4.5.0" />
</ItemGroup>
<ItemGroup>
<None Include="..\pdfpig.snk" Link="pdfpig.snk" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\UglyToad.PdfPig.Core\UglyToad.PdfPig.Core.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig.Tokens\UglyToad.PdfPig.Tokens.csproj" />
</ItemGroup>
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks>netstandard2.0;net45;net451;net452;net46;net461;net462;net47</TargetFrameworks>
<LangVersion>latest</LangVersion>
<Version>0.1.4</Version>
<IsTestProject>False</IsTestProject>
<GenerateDocumentationFile>true</GenerateDocumentationFile>
<SignAssembly>true</SignAssembly>
<AssemblyOriginatorKeyFile>..\pdfpig.snk</AssemblyOriginatorKeyFile>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Debug|netstandard2.0|AnyCPU'">
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<WarningsAsErrors />
</PropertyGroup>
<ItemGroup Condition="'$(TargetFramework)'=='net45' OR '$(TargetFramework)'=='net451' OR '$(TargetFramework)'=='net452' OR '$(TargetFramework)'=='net46' OR '$(TargetFramework)'=='net461' OR '$(TargetFramework)'=='net462' OR '$(TargetFramework)'=='net47'">
<PackageReference Include="System.ValueTuple" Version="4.5.0" />
</ItemGroup>
<ItemGroup>
<None Include="..\pdfpig.snk" Link="pdfpig.snk" />
</ItemGroup>
<ItemGroup>
<ProjectReference Include="..\UglyToad.PdfPig.Core\UglyToad.PdfPig.Core.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig.Tokens\UglyToad.PdfPig.Tokens.csproj" />
</ItemGroup>
</Project>