mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-04-05 20:55:01 +08:00
Introduce ParsingOptions.FilterProvider and BaseFilterProvider and make CcittFaxCompressionType a byte
This commit is contained in:
parent
4b5cb4736f
commit
8cee4f480f
126
src/UglyToad.PdfPig.Tests/Integration/FilterTests.cs
Normal file
126
src/UglyToad.PdfPig.Tests/Integration/FilterTests.cs
Normal file
@ -0,0 +1,126 @@
|
||||
namespace UglyToad.PdfPig.Tests.Integration
|
||||
{
|
||||
using PdfPig.Filters;
|
||||
using PdfPig.Tokens;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
|
||||
public class FilterTests
|
||||
{
|
||||
private static readonly Lazy<string> DocumentFolder = new Lazy<string>(() => Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Integration", "Documents")));
|
||||
private static readonly HashSet<string> _documentsToIgnore =
|
||||
[
|
||||
"issue_671.pdf",
|
||||
"GHOSTSCRIPT-698363-0.pdf",
|
||||
"ErcotFacts.pdf"
|
||||
];
|
||||
|
||||
[Theory]
|
||||
[MemberData(nameof(GetAllDocuments))]
|
||||
public void NoImageDecoding(string documentName)
|
||||
{
|
||||
// Add the full path back on, we removed it so we could see it in the test explorer.
|
||||
documentName = Path.Combine(DocumentFolder.Value, documentName);
|
||||
|
||||
var parsingOptions = new ParsingOptions
|
||||
{
|
||||
UseLenientParsing = true,
|
||||
FilterProvider = MyFilterProvider.Instance
|
||||
};
|
||||
|
||||
using (var document = PdfDocument.Open(documentName, parsingOptions))
|
||||
{
|
||||
for (var i = 0; i < document.NumberOfPages; i++)
|
||||
{
|
||||
var page = document.GetPage(i + 1);
|
||||
|
||||
foreach (var pdfImage in page.GetImages())
|
||||
{
|
||||
if (pdfImage.ImageDictionary.TryGet(NameToken.Filter, out NameToken filter))
|
||||
{
|
||||
if (filter.Data.Equals(NameToken.FlateDecode.Data) ||
|
||||
filter.Data.Equals(NameToken.FlateDecodeAbbreviation.Data) ||
|
||||
filter.Data.Equals(NameToken.LzwDecode.Data) ||
|
||||
filter.Data.Equals(NameToken.LzwDecodeAbbreviation.Data))
|
||||
{
|
||||
continue;
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
continue;
|
||||
}
|
||||
|
||||
Assert.False(pdfImage.TryGetPng(out _));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public sealed class NoFilter : IFilter
|
||||
{
|
||||
public bool IsSupported => false;
|
||||
|
||||
public ReadOnlyMemory<byte> Decode(ReadOnlySpan<byte> input, DictionaryToken streamDictionary, int filterIndex)
|
||||
{
|
||||
throw new NotImplementedException();
|
||||
}
|
||||
}
|
||||
|
||||
public class MyFilterProvider : BaseFilterProvider
|
||||
{
|
||||
/// <summary>
|
||||
/// The single instance of this provider.
|
||||
/// </summary>
|
||||
public static readonly IFilterProvider Instance = new MyFilterProvider();
|
||||
|
||||
/// <inheritdoc/>
|
||||
protected MyFilterProvider() : base(GetDictionary())
|
||||
{
|
||||
}
|
||||
|
||||
private static Dictionary<string, IFilter> GetDictionary()
|
||||
{
|
||||
var ascii85 = new Ascii85Filter();
|
||||
var asciiHex = new AsciiHexDecodeFilter();
|
||||
var flate = new FlateFilter();
|
||||
var runLength = new RunLengthFilter();
|
||||
var lzw = new LzwFilter();
|
||||
|
||||
var noFilter = new NoFilter();
|
||||
|
||||
return new Dictionary<string, IFilter>
|
||||
{
|
||||
{ NameToken.Ascii85Decode.Data, ascii85 },
|
||||
{ NameToken.Ascii85DecodeAbbreviation.Data, ascii85 },
|
||||
{ NameToken.AsciiHexDecode.Data, asciiHex },
|
||||
{ NameToken.AsciiHexDecodeAbbreviation.Data, asciiHex },
|
||||
{ NameToken.CcittfaxDecode.Data, noFilter },
|
||||
{ NameToken.CcittfaxDecodeAbbreviation.Data, noFilter },
|
||||
{ NameToken.DctDecode.Data, noFilter },
|
||||
{ NameToken.DctDecodeAbbreviation.Data, noFilter },
|
||||
{ NameToken.FlateDecode.Data, flate },
|
||||
{ NameToken.FlateDecodeAbbreviation.Data, flate },
|
||||
{ NameToken.Jbig2Decode.Data, noFilter },
|
||||
{ NameToken.JpxDecode.Data, noFilter },
|
||||
{ NameToken.RunLengthDecode.Data, runLength },
|
||||
{ NameToken.RunLengthDecodeAbbreviation.Data, runLength },
|
||||
{NameToken.LzwDecode, lzw },
|
||||
{NameToken.LzwDecodeAbbreviation, lzw }
|
||||
};
|
||||
}
|
||||
}
|
||||
|
||||
public static IEnumerable<object[]> GetAllDocuments
|
||||
{
|
||||
get
|
||||
{
|
||||
var files = Directory.GetFiles(DocumentFolder.Value, "*.pdf");
|
||||
|
||||
// Return the shortname so we can see it in the test explorer.
|
||||
return files.Where(x => !_documentsToIgnore.Any(i => x.EndsWith(i))).Select(x => new object[] { Path.GetFileName(x) });
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -97,6 +97,7 @@
|
||||
"UglyToad.PdfPig.CrossReference.CrossReferenceType",
|
||||
"UglyToad.PdfPig.CrossReference.TrailerDictionary",
|
||||
"UglyToad.PdfPig.Exceptions.PdfDocumentEncryptedException",
|
||||
"UglyToad.PdfPig.Filters.BaseFilterProvider",
|
||||
"UglyToad.PdfPig.Filters.DefaultFilterProvider",
|
||||
"UglyToad.PdfPig.Filters.IFilter",
|
||||
"UglyToad.PdfPig.Filters.IFilterProvider",
|
||||
|
96
src/UglyToad.PdfPig/Filters/BaseFilterProvider.cs
Normal file
96
src/UglyToad.PdfPig/Filters/BaseFilterProvider.cs
Normal file
@ -0,0 +1,96 @@
|
||||
namespace UglyToad.PdfPig.Filters
|
||||
{
|
||||
using Core;
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using Tokens;
|
||||
using Util;
|
||||
|
||||
/// <summary>
|
||||
/// Base abstract class for FilterProvider.
|
||||
/// </summary>
|
||||
public abstract class BaseFilterProvider : IFilterProvider
|
||||
{
|
||||
/// <summary>
|
||||
/// Dictionary of filters.
|
||||
/// </summary>
|
||||
protected readonly IReadOnlyDictionary<string, IFilter> FilterInstances;
|
||||
|
||||
/// <summary>
|
||||
/// Create a new <see cref="BaseFilterProvider"/> with the given filters.
|
||||
/// </summary>
|
||||
/// <param name="filterInstances"></param>
|
||||
protected BaseFilterProvider(IReadOnlyDictionary<string, IFilter> filterInstances)
|
||||
{
|
||||
FilterInstances = filterInstances;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<IFilter> GetFilters(DictionaryToken dictionary)
|
||||
{
|
||||
if (dictionary is null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(dictionary));
|
||||
}
|
||||
|
||||
var token = dictionary.GetObjectOrDefault(NameToken.Filter, NameToken.F);
|
||||
if (token is null)
|
||||
{
|
||||
return Array.Empty<IFilter>();
|
||||
}
|
||||
|
||||
switch (token)
|
||||
{
|
||||
case ArrayToken filters:
|
||||
var result = new IFilter[filters.Data.Count];
|
||||
for (var i = 0; i < filters.Data.Count; i++)
|
||||
{
|
||||
var filterToken = filters.Data[i];
|
||||
var filterName = ((NameToken)filterToken).Data;
|
||||
result[i] = GetFilterStrict(filterName);
|
||||
}
|
||||
|
||||
return result;
|
||||
case NameToken name:
|
||||
return new[] { GetFilterStrict(name.Data) };
|
||||
default:
|
||||
throw new PdfDocumentFormatException($"The filter for the stream was not a valid object. Expected name or array, instead got: {token}.");
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<IFilter> GetNamedFilters(IReadOnlyList<NameToken> names)
|
||||
{
|
||||
if (names is null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(names));
|
||||
}
|
||||
|
||||
var result = new List<IFilter>();
|
||||
|
||||
foreach (var name in names)
|
||||
{
|
||||
result.Add(GetFilterStrict(name));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private IFilter GetFilterStrict(string name)
|
||||
{
|
||||
if (!FilterInstances.TryGetValue(name, out var factory))
|
||||
{
|
||||
throw new NotSupportedException($"The filter with the name {name} is not supported yet. Please raise an issue.");
|
||||
}
|
||||
|
||||
return factory;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<IFilter> GetAllFilters()
|
||||
{
|
||||
return FilterInstances.Values.Distinct().ToList();
|
||||
}
|
||||
}
|
||||
}
|
@ -3,7 +3,7 @@
|
||||
/// <summary>
|
||||
/// Specifies the compression type to use with <see cref="T:UglyToad.PdfPig.Filters.CcittFaxDecoderStream" />.
|
||||
/// </summary>
|
||||
internal enum CcittFaxCompressionType
|
||||
internal enum CcittFaxCompressionType : byte
|
||||
{
|
||||
/// <summary>
|
||||
/// Modified Huffman (MH) - Group 3 variation (T2)
|
||||
|
@ -1,25 +1,24 @@
|
||||
namespace UglyToad.PdfPig.Filters
|
||||
{
|
||||
using System;
|
||||
using System.Collections.Generic;
|
||||
using System.Linq;
|
||||
using Core;
|
||||
using Tokens;
|
||||
using Util;
|
||||
|
||||
/// <summary>
|
||||
/// The default implementation of the <see cref="T:UglyToad.PdfPig.Filters.IFilterProvider" />.
|
||||
/// </summary>
|
||||
public class DefaultFilterProvider : IFilterProvider
|
||||
public sealed class DefaultFilterProvider : BaseFilterProvider
|
||||
{
|
||||
private readonly IReadOnlyDictionary<string, IFilter> filterInstances;
|
||||
|
||||
/// <summary>
|
||||
/// The single instance of this provider.
|
||||
/// </summary>
|
||||
public static readonly IFilterProvider Instance = new DefaultFilterProvider();
|
||||
|
||||
private DefaultFilterProvider()
|
||||
/// <inheritdoc/>
|
||||
private DefaultFilterProvider() : base(GetDictionary())
|
||||
{
|
||||
}
|
||||
|
||||
private static Dictionary<string, IFilter> GetDictionary()
|
||||
{
|
||||
var ascii85 = new Ascii85Filter();
|
||||
var asciiHex = new AsciiHexDecodeFilter();
|
||||
@ -31,7 +30,7 @@
|
||||
var runLength = new RunLengthFilter();
|
||||
var lzw = new LzwFilter();
|
||||
|
||||
filterInstances = new Dictionary<string, IFilter>
|
||||
return new Dictionary<string, IFilter>
|
||||
{
|
||||
{ NameToken.Ascii85Decode.Data, ascii85 },
|
||||
{ NameToken.Ascii85DecodeAbbreviation.Data, ascii85 },
|
||||
@ -47,77 +46,9 @@
|
||||
{ NameToken.JpxDecode.Data, jpx },
|
||||
{ NameToken.RunLengthDecode.Data, runLength },
|
||||
{ NameToken.RunLengthDecodeAbbreviation.Data, runLength },
|
||||
{NameToken.LzwDecode, lzw },
|
||||
{NameToken.LzwDecodeAbbreviation, lzw }
|
||||
{ NameToken.LzwDecode.Data, lzw },
|
||||
{ NameToken.LzwDecodeAbbreviation.Data, lzw }
|
||||
};
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<IFilter> GetFilters(DictionaryToken dictionary)
|
||||
{
|
||||
if (dictionary is null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(dictionary));
|
||||
}
|
||||
|
||||
var token = dictionary.GetObjectOrDefault(NameToken.Filter, NameToken.F);
|
||||
if (token is null)
|
||||
{
|
||||
return Array.Empty<IFilter>();
|
||||
}
|
||||
|
||||
switch (token)
|
||||
{
|
||||
case ArrayToken filters:
|
||||
var result = new IFilter[filters.Data.Count];
|
||||
for (var i = 0; i < filters.Data.Count; i++)
|
||||
{
|
||||
var filterToken = filters.Data[i];
|
||||
var filterName = ((NameToken) filterToken).Data;
|
||||
result[i] = GetFilterStrict(filterName);
|
||||
}
|
||||
|
||||
return result;
|
||||
case NameToken name:
|
||||
return new[] { GetFilterStrict(name.Data) };
|
||||
default:
|
||||
throw new PdfDocumentFormatException($"The filter for the stream was not a valid object. Expected name or array, instead got: {token}.");
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<IFilter> GetNamedFilters(IReadOnlyList<NameToken> names)
|
||||
{
|
||||
if (names is null)
|
||||
{
|
||||
throw new ArgumentNullException(nameof(names));
|
||||
}
|
||||
|
||||
var result = new List<IFilter>();
|
||||
|
||||
foreach (var name in names)
|
||||
{
|
||||
result.Add(GetFilterStrict(name));
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private IFilter GetFilterStrict(string name)
|
||||
{
|
||||
if (!filterInstances.TryGetValue(name, out var factory))
|
||||
{
|
||||
throw new NotSupportedException($"The filter with the name {name} is not supported yet. Please raise an issue.");
|
||||
}
|
||||
|
||||
return factory;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public IReadOnlyList<IFilter> GetAllFilters()
|
||||
{
|
||||
return filterInstances.Values.Distinct().ToList();
|
||||
}
|
||||
|
||||
}
|
||||
}
|
@ -85,6 +85,7 @@
|
||||
}
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public byte[] Encode(Stream input, DictionaryToken streamDictionary, int index)
|
||||
{
|
||||
const int headerLength = 2;
|
||||
|
@ -106,7 +106,7 @@
|
||||
ISeekableTokenScanner scanner,
|
||||
ParsingOptions parsingOptions)
|
||||
{
|
||||
var filterProvider = new FilterProviderWithLookup(DefaultFilterProvider.Instance);
|
||||
var filterProvider = new FilterProviderWithLookup(parsingOptions.FilterProvider ?? DefaultFilterProvider.Instance);
|
||||
|
||||
CrossReferenceTable? crossReferenceTable = null;
|
||||
|
||||
|
@ -1,5 +1,6 @@
|
||||
namespace UglyToad.PdfPig
|
||||
{
|
||||
using Filters;
|
||||
using System.Collections.Generic;
|
||||
using Logging;
|
||||
|
||||
@ -50,5 +51,10 @@
|
||||
/// forms and images when missing.
|
||||
/// </summary>
|
||||
public bool SkipMissingFonts { get; set; } = false;
|
||||
|
||||
/// <summary>
|
||||
/// Filter provider to use while parsing the document. The <see cref="DefaultFilterProvider"/> will be used if set to <c>null</c>.
|
||||
/// </summary>
|
||||
public IFilterProvider? FilterProvider { get; set; } = null;
|
||||
}
|
||||
}
|
Loading…
Reference in New Issue
Block a user