move tokenizers to their own project

since both pdfs and Adobe Type1 fonts use postscript type objects, tokenization is needed by the main project and the fonts project
This commit is contained in:
Eliot Jones 2020-01-05 10:40:44 +00:00
parent d09b33af4d
commit bbde38f656
34 changed files with 205 additions and 86 deletions

View File

@ -1,9 +1,15 @@
namespace UglyToad.PdfPig.Util
namespace UglyToad.PdfPig.Core
{
using System;
internal static class OctalHelpers
/// <summary>
/// Interprets numbers in octal format.
/// </summary>
public static class OctalHelpers
{
/// <summary>
/// Read a short.
/// </summary>
public static short CharacterToShort(this char c)
{
switch (c)
@ -33,10 +39,11 @@
}
}
/// <summary>
/// Read an integer from octal digits.
/// </summary>
public static int FromOctalDigits(short[] octal)
{
int sum = 0;
for (int i = octal.Length - 1; i >= 0; i--)
{
@ -47,6 +54,9 @@
return sum;
}
/// <summary>
/// Interpret an int as octal.
/// </summary>
public static int FromOctalInt(int input)
{
var str = input.ToString();

View File

@ -18,6 +18,8 @@
<ItemGroup>
<ProjectReference Include="..\UglyToad.PdfPig.Core\UglyToad.PdfPig.Core.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig.Fonts\UglyToad.PdfPig.Fonts.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig.Tokenization\UglyToad.PdfPig.Tokenization.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig.Tokens\UglyToad.PdfPig.Tokens.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig\UglyToad.PdfPig.csproj" />
</ItemGroup>
</Project>

View File

@ -1,4 +1,4 @@
namespace UglyToad.PdfPig.Tokenization
namespace UglyToad.PdfPig.Fonts.Type1
{
using System;
using System.Collections.Generic;
@ -6,11 +6,15 @@
using System.Text;
using Core;
using Tokens;
using Tokenization;
internal class Type1ArrayTokenizer : ITokenizer
/// <inheritdoc />
public class Type1ArrayTokenizer : ITokenizer
{
/// <inheritdoc />
public bool ReadsNextByte { get; } = false;
/// <inheritdoc />
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
{
token = null;

View File

@ -1,14 +1,17 @@
namespace UglyToad.PdfPig.Tokenization
namespace UglyToad.PdfPig.Fonts.Type1
{
using System.Text;
using Core;
using Parser.Parts;
using Tokens;
using Tokenization;
internal class Type1NameTokenizer : ITokenizer
/// <inheritdoc />
public class Type1NameTokenizer : ITokenizer
{
/// <inheritdoc />
public bool ReadsNextByte { get; } = true;
/// <inheritdoc />
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
{
token = null;

View File

@ -27,6 +27,7 @@
<ItemGroup>
<ProjectReference Include="..\UglyToad.PdfPig.Core\UglyToad.PdfPig.Core.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig.Tokenization\UglyToad.PdfPig.Tokenization.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig.Tokens\UglyToad.PdfPig.Tokens.csproj" />
</ItemGroup>

View File

@ -100,6 +100,7 @@
<ItemGroup>
<ProjectReference Include="..\UglyToad.PdfPig.Core\UglyToad.PdfPig.Core.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig.Fonts\UglyToad.PdfPig.Fonts.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig.Tokenization\UglyToad.PdfPig.Tokenization.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig.Tokens\UglyToad.PdfPig.Tokens.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig\UglyToad.PdfPig.csproj" />
</ItemGroup>

View File

@ -1,6 +1,6 @@
namespace UglyToad.PdfPig.Tests.Util
{
using PdfPig.Util;
using PdfPig.Core;
using Xunit;
public class OctalHelpersTests

View File

@ -0,0 +1,3 @@
using System.Runtime.CompilerServices;
[assembly: InternalsVisibleTo("UglyToad.PdfPig.Tests")]

View File

@ -2,7 +2,6 @@
{
using System.Text;
using Core;
using Parser.Parts;
using Tokens;
internal class CommentTokenizer : ITokenizer

View File

@ -1,12 +1,9 @@
namespace UglyToad.PdfPig.Tokenization
{
using System.Collections.Generic;
using Exceptions;
using Core;
using Parser.Parts;
using Scanner;
using Tokens;
using Util.JetBrains.Annotations;
internal class DictionaryTokenizer : ITokenizer
{
@ -111,7 +108,6 @@
return result;
}
[CanBeNull]
private static IToken PeekNext(IReadOnlyList<IToken> tokens, int currentIndex)
{
if (tokens.Count - 1 < currentIndex + 1)

View File

@ -3,10 +3,15 @@
using Core;
using Tokens;
internal class EndOfLineTokenizer : ITokenizer
/// <summary>
/// Read an <see cref="EndOfLineToken"/>.
/// </summary>
public class EndOfLineTokenizer : ITokenizer
{
/// <inheritdoc />
public bool ReadsNextByte { get; } = false;
/// <inheritdoc />
public bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token)
{
token = null;

View File

@ -2,7 +2,6 @@
{
using System.Collections.Generic;
using Core;
using Parser.Parts;
using Tokens;
internal class HexTokenizer : ITokenizer

View File

@ -0,0 +1,24 @@
namespace UglyToad.PdfPig.Tokenization
{
using Core;
using Tokens;
/// <summary>
/// Reads tokens from input data.
/// </summary>
public interface ITokenizer
{
/// <summary>
/// Whether this tokenizer type reads the byte following the token itself to detect if the token has ended.
/// </summary>
bool ReadsNextByte { get; }
/// <summary>
/// Try and read the token of the corresponding type from the input.
/// </summary>
/// <param name="currentByte">The byte read to detect this is the correct tokenizer to use.</param>
/// <param name="inputBytes">The input data.</param>
/// <param name="token">The token of the corresponding type if read.</param>
bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token);
}
}

View File

@ -4,7 +4,6 @@
using System.Collections.Generic;
using System.Text;
using Core;
using Parser.Parts;
using Tokens;
internal class NameTokenizer : ITokenizer

View File

@ -2,7 +2,6 @@
{
using System.Text;
using Core;
using Parser.Parts;
using Tokens;
internal class PlainTokenizer : ITokenizer

View File

@ -3,11 +3,12 @@
using System;
using System.Collections.Generic;
using Core;
using Exceptions;
using Parser.Parts;
using Tokens;
internal class CoreTokenScanner : ISeekableTokenScanner
/// <summary>
/// The default <see cref="ITokenScanner"/> for reading PostScript/PDF style data.
/// </summary>
public class CoreTokenScanner : ISeekableTokenScanner
{
private static readonly ArrayTokenizer ArrayTokenizer = new ArrayTokenizer();
private static readonly CommentTokenizer CommentTokenizer = new CommentTokenizer();
@ -22,10 +23,30 @@
private readonly IInputBytes inputBytes;
private readonly List<(byte firstByte, ITokenizer tokenizer)> customTokenizers = new List<(byte, ITokenizer)>();
internal long CurrentTokenStart { get; private set; }
/// <summary>
/// The offset in the input data at which the <see cref="CurrentToken"/> starts.
/// </summary>
public long CurrentTokenStart { get; private set; }
/// <inheritdoc />
public IToken CurrentToken { get; private set; }
/// <inheritdoc />
public long CurrentPosition => inputBytes.CurrentOffset;
private bool hasBytePreRead;
private bool isInInlineImage;
/// <summary>
/// Create a new <see cref="CoreTokenScanner"/> from the input.
/// </summary>
public CoreTokenScanner(IInputBytes inputBytes, ScannerScope scope = ScannerScope.None)
{
this.scope = scope;
this.inputBytes = inputBytes ?? throw new ArgumentNullException(nameof(inputBytes));
}
/// <inheritdoc />
public bool TryReadToken<T>(out T token) where T : class, IToken
{
token = default(T);
@ -44,22 +65,13 @@
return false;
}
/// <inheritdoc />
public void Seek(long position)
{
inputBytes.Seek(position);
}
public long CurrentPosition => inputBytes.CurrentOffset;
private bool hasBytePreRead;
private bool isInInlineImage;
internal CoreTokenScanner(IInputBytes inputBytes, ScannerScope scope = ScannerScope.None)
{
this.scope = scope;
this.inputBytes = inputBytes ?? throw new ArgumentNullException(nameof(inputBytes));
}
/// <inheritdoc />
public bool MoveNext()
{
var endAngleBracesRead = 0;
@ -191,6 +203,7 @@
return false;
}
/// <inheritdoc />
public void RegisterCustomTokenizer(byte firstByte, ITokenizer tokenizer)
{
if (tokenizer == null)
@ -201,6 +214,7 @@
customTokenizers.Add((firstByte, tokenizer));
}
/// <inheritdoc />
public void DeregisterCustomTokenizer(ITokenizer tokenizer)
{
customTokenizers.RemoveAll(x => ReferenceEquals(x.tokenizer, tokenizer));

View File

@ -0,0 +1,29 @@
namespace UglyToad.PdfPig.Tokenization.Scanner
{
/// <inheritdoc />
/// <summary>
/// A <see cref="T:UglyToad.PdfPig.Tokenization.Scanner.ITokenScanner" /> that supports seeking in the underlying input data.
/// </summary>
public interface ISeekableTokenScanner : ITokenScanner
{
/// <summary>
/// Move to the specified position.
/// </summary>
void Seek(long position);
/// <summary>
/// The current position in the input.
/// </summary>
long CurrentPosition { get; }
/// <summary>
/// Add support for a custom type of tokenizer.
/// </summary>
void RegisterCustomTokenizer(byte firstByte, ITokenizer tokenizer);
/// <summary>
/// Remove support for a custom type of tokenizer added with <see cref="RegisterCustomTokenizer"/>.
/// </summary>
void DeregisterCustomTokenizer(ITokenizer tokenizer);
}
}

View File

@ -0,0 +1,26 @@
namespace UglyToad.PdfPig.Tokenization.Scanner
{
using Tokens;
/// <summary>
/// Scan input for PostScript/PDF tokens.
/// </summary>
public interface ITokenScanner
{
/// <summary>
/// Read the next token in the input.
/// </summary>
/// <returns></returns>
bool MoveNext();
/// <summary>
/// The currently read token.
/// </summary>
IToken CurrentToken { get; }
/// <summary>
/// Try reading a token of the specific type.
/// </summary>
bool TryReadToken<T>(out T token) where T : class, IToken;
}
}

View File

@ -0,0 +1,21 @@
namespace UglyToad.PdfPig.Tokenization.Scanner
{
/// <summary>
/// The current scope of the <see cref="ITokenScanner"/>.
/// </summary>
public enum ScannerScope
{
/// <summary>
/// Reading normally.
/// </summary>
None = 0,
/// <summary>
/// Reading inside an array.
/// </summary>
Array = 1,
/// <summary>
/// Reading inside a dictionary.
/// </summary>
Dictionary = 2
}
}

View File

@ -2,9 +2,7 @@
{
using System.Text;
using Core;
using Parser.Parts;
using Tokens;
using Util;
internal class StringTokenizer : ITokenizer
{

View File

@ -0,0 +1,23 @@
<Project Sdk="Microsoft.NET.Sdk">
<PropertyGroup>
<TargetFrameworks>netstandard2.0;net45;net451;net452;net46;net461;net462;net47</TargetFrameworks>
<LangVersion>latest</LangVersion>
</PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(TargetFramework)|$(Platform)'=='Debug|netstandard2.0|AnyCPU'">
<TreatWarningsAsErrors>true</TreatWarningsAsErrors>
<WarningsAsErrors />
<DocumentationFile>obj\Debug\netstandard2.0\UglyToad.PdfPig.Core.xml</DocumentationFile>
</PropertyGroup>
<ItemGroup>
<ProjectReference Include="..\UglyToad.PdfPig.Core\UglyToad.PdfPig.Core.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig.Tokens\UglyToad.PdfPig.Tokens.csproj" />
</ItemGroup>
<ItemGroup Condition="'$(TargetFramework)'=='net45' OR '$(TargetFramework)'=='net451' OR '$(TargetFramework)'=='net452' OR '$(TargetFramework)'=='net46' OR '$(TargetFramework)'=='net461' OR '$(TargetFramework)'=='net462' OR '$(TargetFramework)'=='net47'">
<PackageReference Include="System.ValueTuple" Version="4.5.0" />
</ItemGroup>
</Project>

View File

@ -20,6 +20,8 @@ Project("{9A19103F-16F7-4668-BE54-9A1E7A4F7556}") = "UglyToad.PdfPig.DocumentLay
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "UglyToad.PdfPig.Tokens", "UglyToad.PdfPig.Tokens\UglyToad.PdfPig.Tokens.csproj", "{D840FF69-4250-4B05-9829-5ABEC43EC82C}"
EndProject
Project("{FAE04EC0-301F-11D3-BF4B-00C04F79EFBC}") = "UglyToad.PdfPig.Tokenization", "UglyToad.PdfPig.Tokenization\UglyToad.PdfPig.Tokenization.csproj", "{FD005C50-CD2C-497E-8F7E-6D791091E9B0}"
EndProject
Global
GlobalSection(SolutionConfigurationPlatforms) = preSolution
Debug|Any CPU = Debug|Any CPU
@ -50,6 +52,10 @@ Global
{D840FF69-4250-4B05-9829-5ABEC43EC82C}.Debug|Any CPU.Build.0 = Debug|Any CPU
{D840FF69-4250-4B05-9829-5ABEC43EC82C}.Release|Any CPU.ActiveCfg = Release|Any CPU
{D840FF69-4250-4B05-9829-5ABEC43EC82C}.Release|Any CPU.Build.0 = Release|Any CPU
{FD005C50-CD2C-497E-8F7E-6D791091E9B0}.Debug|Any CPU.ActiveCfg = Debug|Any CPU
{FD005C50-CD2C-497E-8F7E-6D791091E9B0}.Debug|Any CPU.Build.0 = Debug|Any CPU
{FD005C50-CD2C-497E-8F7E-6D791091E9B0}.Release|Any CPU.ActiveCfg = Release|Any CPU
{FD005C50-CD2C-497E-8F7E-6D791091E9B0}.Release|Any CPU.Build.0 = Release|Any CPU
EndGlobalSection
GlobalSection(SolutionProperties) = preSolution
HideSolutionNode = FALSE

View File

@ -1,6 +1,6 @@
namespace UglyToad.PdfPig.PdfFonts.Encodings
{
using Util;
using Core;
internal class MacExpertEncoding : Encoding
{

View File

@ -1,6 +1,6 @@
namespace UglyToad.PdfPig.PdfFonts.Encodings
{
using Util;
using Core;
/// <summary>
/// Similar to the <see cref="MacRomanEncoding"/> with 15 additional entries.

View File

@ -1,6 +1,6 @@
namespace UglyToad.PdfPig.PdfFonts.Encodings
{
using Util;
using Core;
internal class MacRomanEncoding : Encoding
{

View File

@ -1,6 +1,6 @@
namespace UglyToad.PdfPig.PdfFonts.Encodings
{
using Util;
using Core;
internal class StandardEncoding : Encoding
{

View File

@ -1,6 +1,6 @@
namespace UglyToad.PdfPig.PdfFonts.Encodings
{
using Util;
using Core;
internal class WinAnsiEncoding : Encoding
{

View File

@ -5,6 +5,7 @@
using Core;
using Encodings;
using Fonts;
using Fonts.Type1;
using Tokenization;
using Tokenization.Scanner;
using Tokens;

View File

@ -1,12 +0,0 @@
namespace UglyToad.PdfPig.Tokenization
{
using Core;
using Tokens;
internal interface ITokenizer
{
bool ReadsNextByte { get; }
bool TryTokenize(byte currentByte, IInputBytes inputBytes, out IToken token);
}
}

View File

@ -1,24 +0,0 @@
namespace UglyToad.PdfPig.Tokenization.Scanner
{
using Tokens;
internal interface ITokenScanner
{
bool MoveNext();
IToken CurrentToken { get; }
bool TryReadToken<T>(out T token) where T : class, IToken;
}
internal interface ISeekableTokenScanner : ITokenScanner
{
void Seek(long position);
long CurrentPosition { get; }
void RegisterCustomTokenizer(byte firstByte, ITokenizer tokenizer);
void DeregisterCustomTokenizer(ITokenizer tokenizer);
}
}

View File

@ -1,9 +0,0 @@
namespace UglyToad.PdfPig.Tokenization.Scanner
{
internal enum ScannerScope
{
None,
Array,
Dictionary
}
}

View File

@ -52,6 +52,7 @@
<ItemGroup>
<ProjectReference Include="..\UglyToad.PdfPig.Core\UglyToad.PdfPig.Core.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig.Fonts\UglyToad.PdfPig.Fonts.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig.Tokenization\UglyToad.PdfPig.Tokenization.csproj" />
<ProjectReference Include="..\UglyToad.PdfPig.Tokens\UglyToad.PdfPig.Tokens.csproj" />
</ItemGroup>