removing locking

This commit is contained in:
Plaisted 2021-01-19 18:06:50 -06:00
parent 60f3005544
commit 9bfe69aef1
4 changed files with 466 additions and 465 deletions

View File

@ -1,158 +1,158 @@
namespace UglyToad.PdfPig.Tokenization namespace UglyToad.PdfPig.Tokenization
{ {
using System; using System;
using System.Globalization; using System.Globalization;
using System.Text; using System.Text;
using Core; using Core;
using Tokens; using Tokens;
internal class NumericTokenizer : ITokenizer internal class NumericTokenizer : ITokenizer
{ {
private static readonly StringBuilderPool StringBuilderPool = new StringBuilderPool(10); private readonly StringBuilder stringBuilder = new();
private const byte Zero = 48; private const byte Zero = 48;
private const byte Nine = 57; private const byte Nine = 57;
public bool ReadsNextByte { get; } = true; public bool ReadsNextByte { get; } = true;
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token) public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
{ {
token = null; token = null;
StringBuilder characters; StringBuilder characters;
if ((currentByte >= Zero && currentByte <= Nine) || currentByte == '-' || currentByte == '+' || currentByte == '.') if ((currentByte >= Zero && currentByte <= Nine) || currentByte == '-' || currentByte == '+' || currentByte == '.')
{ {
characters = StringBuilderPool.Borrow(); characters = stringBuilder;
characters.Append((char)currentByte); characters.Append((char)currentByte);
} }
else else
{ {
return false; return false;
} }
while (inputBytes.MoveNext()) while (inputBytes.MoveNext())
{ {
var b = inputBytes.CurrentByte; var b = inputBytes.CurrentByte;
if ((b >= Zero && b <= Nine) || if ((b >= Zero && b <= Nine) ||
b == '-' || b == '-' ||
b == '+' || b == '+' ||
b == '.' || b == '.' ||
b == 'E' || b == 'E' ||
b == 'e') b == 'e')
{ {
characters.Append((char)b); characters.Append((char)b);
} }
else else
{ {
break; break;
} }
} }
try try
{ {
var str = characters.ToString(); var str = characters.ToString();
StringBuilderPool.Return(characters); characters.Clear();
switch (str) switch (str)
{ {
case "-1": case "-1":
token = NumericToken.MinusOne; token = NumericToken.MinusOne;
return true; return true;
case "-": case "-":
case ".": case ".":
case "0": case "0":
case "0000": case "0000":
token = NumericToken.Zero; token = NumericToken.Zero;
return true; return true;
case "1": case "1":
token = NumericToken.One; token = NumericToken.One;
return true; return true;
case "2": case "2":
token = NumericToken.Two; token = NumericToken.Two;
return true; return true;
case "3": case "3":
token = NumericToken.Three; token = NumericToken.Three;
return true; return true;
case "4": case "4":
token = NumericToken.Four; token = NumericToken.Four;
return true; return true;
case "5": case "5":
token = NumericToken.Five; token = NumericToken.Five;
return true; return true;
case "6": case "6":
token = NumericToken.Six; token = NumericToken.Six;
return true; return true;
case "7": case "7":
token = NumericToken.Seven; token = NumericToken.Seven;
return true; return true;
case "8": case "8":
token = NumericToken.Eight; token = NumericToken.Eight;
return true; return true;
case "9": case "9":
token = NumericToken.Nine; token = NumericToken.Nine;
return true; return true;
case "10": case "10":
token = NumericToken.Ten; token = NumericToken.Ten;
return true; return true;
case "11": case "11":
token = NumericToken.Eleven; token = NumericToken.Eleven;
return true; return true;
case "12": case "12":
token = NumericToken.Twelve; token = NumericToken.Twelve;
return true; return true;
case "13": case "13":
token = NumericToken.Thirteen; token = NumericToken.Thirteen;
return true; return true;
case "14": case "14":
token = NumericToken.Fourteen; token = NumericToken.Fourteen;
return true; return true;
case "15": case "15":
token = NumericToken.Fifteen; token = NumericToken.Fifteen;
return true; return true;
case "16": case "16":
token = NumericToken.Sixteen; token = NumericToken.Sixteen;
return true; return true;
case "17": case "17":
token = NumericToken.Seventeen; token = NumericToken.Seventeen;
return true; return true;
case "18": case "18":
token = NumericToken.Eighteen; token = NumericToken.Eighteen;
return true; return true;
case "19": case "19":
token = NumericToken.Nineteen; token = NumericToken.Nineteen;
return true; return true;
case "20": case "20":
token = NumericToken.Twenty; token = NumericToken.Twenty;
return true; return true;
case "100": case "100":
token = NumericToken.OneHundred; token = NumericToken.OneHundred;
return true; return true;
case "500": case "500":
token = NumericToken.FiveHundred; token = NumericToken.FiveHundred;
return true; return true;
case "1000": case "1000":
token = NumericToken.OneThousand; token = NumericToken.OneThousand;
return true; return true;
default: default:
if (!decimal.TryParse(str, NumberStyles.Any, CultureInfo.InvariantCulture, out var value)) if (!decimal.TryParse(str, NumberStyles.Any, CultureInfo.InvariantCulture, out var value))
{ {
return false; return false;
} }
token = new NumericToken(value); token = new NumericToken(value);
return true; return true;
} }
} }
catch (FormatException) catch (FormatException)
{ {
return false; return false;
} }
catch (OverflowException) catch (OverflowException)
{ {
return false; return false;
} }
} }
} }
} }

View File

@ -15,9 +15,10 @@
private static readonly DictionaryTokenizer DictionaryTokenizer = new DictionaryTokenizer(); private static readonly DictionaryTokenizer DictionaryTokenizer = new DictionaryTokenizer();
private static readonly HexTokenizer HexTokenizer = new HexTokenizer(); private static readonly HexTokenizer HexTokenizer = new HexTokenizer();
private static readonly NameTokenizer NameTokenizer = new NameTokenizer(); private static readonly NameTokenizer NameTokenizer = new NameTokenizer();
private static readonly NumericTokenizer NumericTokenizer = new NumericTokenizer(); private static readonly PlainTokenizer PlainTokenizer = new PlainTokenizer();
private static readonly PlainTokenizer PlainTokenizer = new PlainTokenizer();
private static readonly StringTokenizer StringTokenizer = new StringTokenizer(); private readonly NumericTokenizer NumericTokenizer = new NumericTokenizer();
private readonly StringTokenizer StringTokenizer = new StringTokenizer();
private readonly ScannerScope scope; private readonly ScannerScope scope;
private readonly IInputBytes inputBytes; private readonly IInputBytes inputBytes;

View File

@ -1,281 +1,281 @@
namespace UglyToad.PdfPig.Tokenization namespace UglyToad.PdfPig.Tokenization
{ {
using System.Text; using System.Text;
using Core; using Core;
using Tokens; using Tokens;
internal class StringTokenizer : ITokenizer internal class StringTokenizer : ITokenizer
{ {
private static readonly StringBuilderPool StringBuilderPool = new StringBuilderPool(16); private readonly StringBuilder stringBuilder = new();
public bool ReadsNextByte { get; } = false; public bool ReadsNextByte { get; } = false;
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token) public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
{ {
token = null; token = null;
if (inputBytes == null) if (inputBytes == null)
{ {
return false; return false;
} }
if (currentByte != '(') if (currentByte != '(')
{ {
return false; return false;
} }
var builder = StringBuilderPool.Borrow(); var builder = stringBuilder;
var numberOfBrackets = 1; var numberOfBrackets = 1;
var isEscapeActive = false; var isEscapeActive = false;
var isLineBreaking = false; var isLineBreaking = false;
var octalModeActive = false; var octalModeActive = false;
short[] octal = { 0, 0, 0 }; short[] octal = { 0, 0, 0 };
var octalsRead = 0; var octalsRead = 0;
while (inputBytes.MoveNext()) while (inputBytes.MoveNext())
{ {
var b = inputBytes.CurrentByte; var b = inputBytes.CurrentByte;
var c = (char)b; var c = (char)b;
if (octalModeActive) if (octalModeActive)
{ {
var nextCharacterOctal = c >= '0' && c <= '7'; var nextCharacterOctal = c >= '0' && c <= '7';
if (nextCharacterOctal) if (nextCharacterOctal)
{ {
// left shift the octals. // left shift the octals.
LeftShiftOctal(c, octalsRead, octal); LeftShiftOctal(c, octalsRead, octal);
octalsRead++; octalsRead++;
} }
if (octalsRead == 3 || !nextCharacterOctal) if (octalsRead == 3 || !nextCharacterOctal)
{ {
var characterCode = OctalHelpers.FromOctalDigits(octal); var characterCode = OctalHelpers.FromOctalDigits(octal);
// For now :( // For now :(
// TODO: I have a sneaking suspicion this is wrong, not sure what behaviour is for large octal numbers // TODO: I have a sneaking suspicion this is wrong, not sure what behaviour is for large octal numbers
builder.Append((char)characterCode); builder.Append((char)characterCode);
octal[0] = 0; octal[0] = 0;
octal[1] = 0; octal[1] = 0;
octal[2] = 0; octal[2] = 0;
octalsRead = 0; octalsRead = 0;
octalModeActive = false; octalModeActive = false;
} }
if (nextCharacterOctal) if (nextCharacterOctal)
{ {
continue; continue;
} }
} }
switch (c) switch (c)
{ {
case ')': case ')':
isLineBreaking = false; isLineBreaking = false;
if (!isEscapeActive) if (!isEscapeActive)
{ {
numberOfBrackets--; numberOfBrackets--;
} }
isEscapeActive = false; isEscapeActive = false;
if (numberOfBrackets > 0) if (numberOfBrackets > 0)
{ {
builder.Append(c); builder.Append(c);
} }
// TODO: Check for other ends of string where the string is improperly formatted. See commented method // TODO: Check for other ends of string where the string is improperly formatted. See commented method
numberOfBrackets = CheckForEndOfString(numberOfBrackets, inputBytes); numberOfBrackets = CheckForEndOfString(numberOfBrackets, inputBytes);
break; break;
case '(': case '(':
isLineBreaking = false; isLineBreaking = false;
if (!isEscapeActive) if (!isEscapeActive)
{ {
numberOfBrackets++; numberOfBrackets++;
} }
isEscapeActive = false; isEscapeActive = false;
builder.Append(c); builder.Append(c);
break; break;
// Escape // Escape
case '\\': case '\\':
isLineBreaking = false; isLineBreaking = false;
// Escaped backslash // Escaped backslash
if (isEscapeActive) if (isEscapeActive)
{ {
builder.Append(c); builder.Append(c);
isEscapeActive = false; isEscapeActive = false;
} }
else else
{ {
isEscapeActive = true; isEscapeActive = true;
} }
break; break;
default: default:
if (isLineBreaking) if (isLineBreaking)
{ {
if (ReadHelper.IsEndOfLine(c)) if (ReadHelper.IsEndOfLine(c))
{ {
continue; continue;
} }
isLineBreaking = false; isLineBreaking = false;
builder.Append(c); builder.Append(c);
} }
else if (isEscapeActive) else if (isEscapeActive)
{ {
ProcessEscapedCharacter(c, builder, octal, ref octalModeActive, ref octalsRead, ref isLineBreaking); ProcessEscapedCharacter(c, builder, octal, ref octalModeActive, ref octalsRead, ref isLineBreaking);
isEscapeActive = false; isEscapeActive = false;
} }
else else
{ {
builder.Append(c); builder.Append(c);
} }
break; break;
} }
if (numberOfBrackets <= 0) if (numberOfBrackets <= 0)
{ {
break; break;
} }
} }
StringToken.Encoding encodedWith; StringToken.Encoding encodedWith;
string tokenStr; string tokenStr;
if (builder.Length >= 2) if (builder.Length >= 2)
{ {
if (builder[0] == 0xFE && builder[1] == 0xFF) if (builder[0] == 0xFE && builder[1] == 0xFF)
{ {
var rawBytes = OtherEncodings.StringAsLatin1Bytes(builder.ToString()); var rawBytes = OtherEncodings.StringAsLatin1Bytes(builder.ToString());
tokenStr = Encoding.BigEndianUnicode.GetString(rawBytes).Substring(1); tokenStr = Encoding.BigEndianUnicode.GetString(rawBytes).Substring(1);
encodedWith = StringToken.Encoding.Utf16BE; encodedWith = StringToken.Encoding.Utf16BE;
} }
else if (builder[0] == 0xFF && builder[1] == 0xFE) else if (builder[0] == 0xFF && builder[1] == 0xFE)
{ {
var rawBytes = OtherEncodings.StringAsLatin1Bytes(builder.ToString()); var rawBytes = OtherEncodings.StringAsLatin1Bytes(builder.ToString());
tokenStr = Encoding.Unicode.GetString(rawBytes).Substring(1); tokenStr = Encoding.Unicode.GetString(rawBytes).Substring(1);
encodedWith = StringToken.Encoding.Utf16; encodedWith = StringToken.Encoding.Utf16;
} }
else else
{ {
tokenStr = builder.ToString(); tokenStr = builder.ToString();
encodedWith = StringToken.Encoding.Iso88591; encodedWith = StringToken.Encoding.Iso88591;
} }
} }
else else
{ {
tokenStr = builder.ToString(); tokenStr = builder.ToString();
encodedWith = StringToken.Encoding.Iso88591; encodedWith = StringToken.Encoding.Iso88591;
} }
StringBuilderPool.Return(builder); builder.Clear();
token = new StringToken(tokenStr, encodedWith); token = new StringToken(tokenStr, encodedWith);
return true; return true;
} }
private static void LeftShiftOctal(char nextOctalChar, int octalsRead, short[] octals) private static void LeftShiftOctal(char nextOctalChar, int octalsRead, short[] octals)
{ {
for (var i = octalsRead; i > 0; i--) for (var i = octalsRead; i > 0; i--)
{ {
octals[i] = octals[i - 1]; octals[i] = octals[i - 1];
} }
var value = nextOctalChar.CharacterToShort(); var value = nextOctalChar.CharacterToShort();
octals[0] = value; octals[0] = value;
} }
private static void ProcessEscapedCharacter(char c, StringBuilder builder, short[] octal, ref bool isOctalActive, private static void ProcessEscapedCharacter(char c, StringBuilder builder, short[] octal, ref bool isOctalActive,
ref int octalsRead, ref bool isLineBreaking) ref int octalsRead, ref bool isLineBreaking)
{ {
switch (c) switch (c)
{ {
case 'n': case 'n':
builder.Append('\n'); builder.Append('\n');
break; break;
case 'r': case 'r':
builder.Append('\r'); builder.Append('\r');
break; break;
case 't': case 't':
builder.Append('\t'); builder.Append('\t');
break; break;
case 'b': case 'b':
builder.Append('\b'); builder.Append('\b');
break; break;
case 'f': case 'f':
builder.Append('\f'); builder.Append('\f');
break; break;
case '0': case '0':
case '1': case '1':
case '2': case '2':
case '3': case '3':
case '4': case '4':
case '5': case '5':
case '6': case '6':
case '7': case '7':
octal[0] = c.CharacterToShort(); octal[0] = c.CharacterToShort();
isOctalActive = true; isOctalActive = true;
octalsRead = 1; octalsRead = 1;
break; break;
default: default:
if (c == ReadHelper.AsciiCarriageReturn || c == ReadHelper.AsciiLineFeed) if (c == ReadHelper.AsciiCarriageReturn || c == ReadHelper.AsciiLineFeed)
{ {
isLineBreaking = true; isLineBreaking = true;
} }
else else
{ {
// Drop the backslash // Drop the backslash
builder.Append(c); builder.Append(c);
} }
break; break;
} }
} }
private static int CheckForEndOfString(int numberOfBrackets, IInputBytes bytes) private static int CheckForEndOfString(int numberOfBrackets, IInputBytes bytes)
{ {
const byte lineFeed = 10; const byte lineFeed = 10;
const byte carriageReturn = 13; const byte carriageReturn = 13;
var braces = numberOfBrackets; var braces = numberOfBrackets;
var nextThreeBytes = new byte[3]; var nextThreeBytes = new byte[3];
var startAt = bytes.CurrentOffset; var startAt = bytes.CurrentOffset;
var amountRead = bytes.Read(nextThreeBytes); var amountRead = bytes.Read(nextThreeBytes);
// Check the next 3 bytes if available // Check the next 3 bytes if available
// The following cases are valid indicators for the end of the string // The following cases are valid indicators for the end of the string
// 1. Next line contains another COSObject: CR + LF + '/' // 1. Next line contains another COSObject: CR + LF + '/'
// 2. COSDictionary ends in the next line: CR + LF + '>' // 2. COSDictionary ends in the next line: CR + LF + '>'
// 3. Next line contains another COSObject: CR + '/' // 3. Next line contains another COSObject: CR + '/'
// 4. COSDictionary ends in the next line: CR + '>' // 4. COSDictionary ends in the next line: CR + '>'
if (amountRead == 3 && nextThreeBytes[0] == carriageReturn) if (amountRead == 3 && nextThreeBytes[0] == carriageReturn)
{ {
if ((nextThreeBytes[1] == lineFeed && (nextThreeBytes[2] == '/') || nextThreeBytes[2] == '>') if ((nextThreeBytes[1] == lineFeed && (nextThreeBytes[2] == '/') || nextThreeBytes[2] == '>')
|| nextThreeBytes[1] == '/' || nextThreeBytes[1] == '>') || nextThreeBytes[1] == '/' || nextThreeBytes[1] == '>')
{ {
braces = 0; braces = 0;
} }
} }
if (amountRead > 0) if (amountRead > 0)
{ {
bytes.Seek(startAt); bytes.Seek(startAt);
} }
return braces; return braces;
} }
} }
} }

View File

@ -1,25 +1,25 @@
<Project Sdk="Microsoft.NET.Sdk"> <Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup> <PropertyGroup>
<TargetFrameworks>netstandard2.0;net45;net451;net452;net46;net461;net462;net47</TargetFrameworks> <TargetFrameworks>netstandard2.0;net45;net451;net452;net46;net461;net462;net47</TargetFrameworks>
<LangVersion>latest</LangVersion> <LangVersion>latest</LangVersion>
<Version>0.1.4</Version> <Version>0.1.4</Version>
<IsTestProject>False</IsTestProject> <IsTestProject>False</IsTestProject>
<GenerateDocumentationFile>true</GenerateDocumentationFile> <GenerateDocumentationFile>true</GenerateDocumentationFile>
<SignAssembly>true</SignAssembly> <SignAssembly>true</SignAssembly>
<AssemblyOriginatorKeyFile>..\pdfpig.snk</AssemblyOriginatorKeyFile> <AssemblyOriginatorKeyFile>..\pdfpig.snk</AssemblyOriginatorKeyFile>
</PropertyGroup> </PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Debug|netstandard2.0|AnyCPU'"> <PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Debug|netstandard2.0|AnyCPU'">
<TreatWarningsAsErrors>true</TreatWarningsAsErrors> <TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<WarningsAsErrors /> <WarningsAsErrors />
</PropertyGroup> </PropertyGroup>
<ItemGroup Condition="'$(TargetFramework)'=='net45' OR '$(TargetFramework)'=='net451' OR '$(TargetFramework)'=='net452' OR '$(TargetFramework)'=='net46' OR '$(TargetFramework)'=='net461' OR '$(TargetFramework)'=='net462' OR '$(TargetFramework)'=='net47'"> <ItemGroup Condition="'$(TargetFramework)'=='net45' OR '$(TargetFramework)'=='net451' OR '$(TargetFramework)'=='net452' OR '$(TargetFramework)'=='net46' OR '$(TargetFramework)'=='net461' OR '$(TargetFramework)'=='net462' OR '$(TargetFramework)'=='net47'">
<PackageReference Include="System.ValueTuple" Version="4.5.0" /> <PackageReference Include="System.ValueTuple" Version="4.5.0" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<None Include="..\pdfpig.snk" Link="pdfpig.snk" /> <None Include="..\pdfpig.snk" Link="pdfpig.snk" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<ProjectReference Include="..\UglyToad.PdfPig.Core\UglyToad.PdfPig.Core.csproj" /> <ProjectReference Include="..\UglyToad.PdfPig.Core\UglyToad.PdfPig.Core.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig.Tokens\UglyToad.PdfPig.Tokens.csproj" /> <ProjectReference Include="..\UglyToad.PdfPig.Tokens\UglyToad.PdfPig.Tokens.csproj" />
</ItemGroup> </ItemGroup>
</Project> </Project>