make filter provider and filter public and use tryget for image bytes

This commit is contained in:
Eliot Jones 2020-04-25 09:42:24 +01:00
parent 635c4b4c5e
commit 27e251f921
12 changed files with 108 additions and 35 deletions

View File

@ -1,6 +1,5 @@
namespace UglyToad.PdfPig.Tests.Integration namespace UglyToad.PdfPig.Tests.Integration
{ {
using System;
using System.Linq; using System.Linq;
using Xunit; using Xunit;
@ -74,13 +73,12 @@
var page = document.GetPage(1); var page = document.GetPage(1);
foreach (var image in page.GetImages()) foreach (var image in page.GetImages())
{ {
try if (image.TryGetBytes(out var bytes))
{ {
Assert.NotNull(image.Bytes); Assert.NotNull(bytes);
} }
catch (NotSupportedException ) else
{ {
// Should allow access to raw bytes.
Assert.NotNull(image.RawBytes); Assert.NotNull(image.RawBytes);
} }
} }

View File

@ -85,6 +85,9 @@
"UglyToad.PdfPig.CrossReference.CrossReferenceType", "UglyToad.PdfPig.CrossReference.CrossReferenceType",
"UglyToad.PdfPig.CrossReference.TrailerDictionary", "UglyToad.PdfPig.CrossReference.TrailerDictionary",
"UglyToad.PdfPig.Exceptions.PdfDocumentEncryptedException", "UglyToad.PdfPig.Exceptions.PdfDocumentEncryptedException",
"UglyToad.PdfPig.Filters.DefaultFilterProvider",
"UglyToad.PdfPig.Filters.IFilter",
"UglyToad.PdfPig.Filters.IFilterProvider",
"UglyToad.PdfPig.PdfFonts.DescriptorFontFile", "UglyToad.PdfPig.PdfFonts.DescriptorFontFile",
"UglyToad.PdfPig.PdfFonts.FontDescriptor", "UglyToad.PdfPig.PdfFonts.FontDescriptor",
"UglyToad.PdfPig.PdfFonts.FontDescriptorFlags", "UglyToad.PdfPig.PdfFonts.FontDescriptorFlags",
@ -192,6 +195,7 @@
"UglyToad.PdfPig.Outline.Destinations.ExplicitDestinationType", "UglyToad.PdfPig.Outline.Destinations.ExplicitDestinationType",
"UglyToad.PdfPig.ParsingOptions", "UglyToad.PdfPig.ParsingOptions",
"UglyToad.PdfPig.PdfDocument", "UglyToad.PdfPig.PdfDocument",
"UglyToad.PdfPig.PdfExtensions",
"UglyToad.PdfPig.Structure", "UglyToad.PdfPig.Structure",
"UglyToad.PdfPig.Util.Adler32Checksum", "UglyToad.PdfPig.Util.Adler32Checksum",
"UglyToad.PdfPig.Util.IWordExtractor", "UglyToad.PdfPig.Util.IWordExtractor",

View File

@ -42,13 +42,6 @@
/// </summary> /// </summary>
int BitsPerComponent { get; } int BitsPerComponent { get; }
/// <summary>
/// The bytes of the image with any filters decoded.
/// If the filter used to encode the bytes is not supported accessing this property will throw, access the <see cref="RawBytes"/>
/// instead.
/// </summary>
IReadOnlyList<byte> Bytes { get; }
/// <summary> /// <summary>
/// The encoded bytes of the image with all filters still applied. /// The encoded bytes of the image with all filters still applied.
/// </summary> /// </summary>
@ -90,5 +83,11 @@
/// Whether this image is an <see cref="InlineImage"/> or a <see cref="XObjectImage"/>. /// Whether this image is an <see cref="InlineImage"/> or a <see cref="XObjectImage"/>.
/// </summary> /// </summary>
bool IsInlineImage { get; } bool IsInlineImage { get; }
/// <summary>
/// Get the decoded bytes of the image if applicable. For JPEG images and some other types the
/// <see cref="RawBytes"/> should be used directly.
/// </summary>
bool TryGetBytes(out IReadOnlyList<byte> bytes);
} }
} }

View File

@ -47,9 +47,6 @@
/// <inheritdoc /> /// <inheritdoc />
public bool Interpolate { get; } public bool Interpolate { get; }
/// <inheritdoc />
public IReadOnlyList<byte> Bytes => bytesFactory.Value;
/// <inheritdoc /> /// <inheritdoc />
public IReadOnlyList<byte> RawBytes { get; } public IReadOnlyList<byte> RawBytes { get; }
@ -76,7 +73,18 @@
Interpolate = interpolate; Interpolate = interpolate;
RawBytes = bytes; RawBytes = bytes;
bytesFactory = new Lazy<IReadOnlyList<byte>>(() =>
var supportsFilters = true;
foreach (var filter in filters)
{
if (!filter.IsSupported)
{
supportsFilters = false;
break;
}
}
bytesFactory = supportsFilters ? new Lazy<IReadOnlyList<byte>>(() =>
{ {
var b = bytes.ToArray(); var b = bytes.ToArray();
for (var i = 0; i < filters.Count; i++) for (var i = 0; i < filters.Count; i++)
@ -86,7 +94,21 @@
} }
return b; return b;
}); }) : null;
}
/// <inheritdoc />
public bool TryGetBytes(out IReadOnlyList<byte> bytes)
{
bytes = null;
if (bytesFactory == null)
{
return false;
}
bytes = bytesFactory.Value;
return true;
} }
/// <inheritdoc /> /// <inheritdoc />

View File

@ -6,13 +6,20 @@
using Core; using Core;
using Tokens; using Tokens;
internal class MemoryFilterProvider : IFilterProvider /// <inheritdoc />
/// <summary>
/// The default implementation of the <see cref="T:UglyToad.PdfPig.Filters.IFilterProvider" />.
/// </summary>
public class DefaultFilterProvider : IFilterProvider
{ {
private readonly IReadOnlyDictionary<string, IFilter> filterInstances; private readonly IReadOnlyDictionary<string, IFilter> filterInstances;
public static readonly IFilterProvider Instance = new MemoryFilterProvider(); /// <summary>
/// The single instance of this provider.
/// </summary>
public static readonly IFilterProvider Instance = new DefaultFilterProvider();
private MemoryFilterProvider() private DefaultFilterProvider()
{ {
var ascii85 = new Ascii85Filter(); var ascii85 = new Ascii85Filter();
var asciiHex = new AsciiHexDecodeFilter(); var asciiHex = new AsciiHexDecodeFilter();
@ -45,6 +52,7 @@
}; };
} }
/// <inheritdoc />
public IReadOnlyList<IFilter> GetFilters(DictionaryToken dictionary) public IReadOnlyList<IFilter> GetFilters(DictionaryToken dictionary)
{ {
if (dictionary == null) if (dictionary == null)
@ -76,6 +84,7 @@
} }
} }
/// <inheritdoc />
public IReadOnlyList<IFilter> GetNamedFilters(IReadOnlyList<NameToken> names) public IReadOnlyList<IFilter> GetNamedFilters(IReadOnlyList<NameToken> names)
{ {
if (names == null) if (names == null)
@ -103,6 +112,7 @@
return factory; return factory;
} }
/// <inheritdoc />
public IReadOnlyList<IFilter> GetAllFilters() public IReadOnlyList<IFilter> GetAllFilters()
{ {
return filterInstances.Values.Distinct().ToList(); return filterInstances.Values.Distinct().ToList();

View File

@ -4,10 +4,9 @@
using Tokens; using Tokens;
/// <summary> /// <summary>
/// A filter is used in a PDF to encode/decode data either to compress it /// A filter is used in a PDF to encode/decode data either to compress it or derive an ASCII representation of the data.
/// or derive an ASCII representation of the data.
/// </summary> /// </summary>
internal interface IFilter public interface IFilter
{ {
/// <summary> /// <summary>
/// Whether this library can decode information encoded using this filter. /// Whether this library can decode information encoded using this filter.

View File

@ -3,12 +3,24 @@
using System.Collections.Generic; using System.Collections.Generic;
using Tokens; using Tokens;
internal interface IFilterProvider /// <summary>
/// Gets filter implementations (<see cref="IFilter"/>) for decoding PDF data.
/// </summary>
public interface IFilterProvider
{ {
/// <summary>
/// Get the filters specified in this dictionary.
/// </summary>
IReadOnlyList<IFilter> GetFilters(DictionaryToken dictionary); IReadOnlyList<IFilter> GetFilters(DictionaryToken dictionary);
/// <summary>
/// Gets the filters specified by the filter names.
/// </summary>
IReadOnlyList<IFilter> GetNamedFilters(IReadOnlyList<NameToken> names); IReadOnlyList<IFilter> GetNamedFilters(IReadOnlyList<NameToken> names);
/// <summary>
/// Get all available filters in this library.
/// </summary>
IReadOnlyList<IFilter> GetAllFilters(); IReadOnlyList<IFilter> GetAllFilters();
} }
} }

View File

@ -83,7 +83,7 @@
private static PdfDocument OpenDocument(IInputBytes inputBytes, ISeekableTokenScanner scanner, ILog log, bool isLenientParsing, private static PdfDocument OpenDocument(IInputBytes inputBytes, ISeekableTokenScanner scanner, ILog log, bool isLenientParsing,
IReadOnlyList<string> passwords, bool clipPaths) IReadOnlyList<string> passwords, bool clipPaths)
{ {
var filterProvider = MemoryFilterProvider.Instance; var filterProvider = DefaultFilterProvider.Instance;
CrossReferenceTable crossReferenceTable = null; CrossReferenceTable crossReferenceTable = null;

View File

@ -7,7 +7,10 @@
using Tokenization.Scanner; using Tokenization.Scanner;
using Tokens; using Tokens;
internal static class PdfExtensions /// <summary>
/// Extensions for PDF types.
/// </summary>
public static class PdfExtensions
{ {
/// <summary> /// <summary>
/// Try and get the entry with a given name and type or look-up the object if it's an indirect reference. /// Try and get the entry with a given name and type or look-up the object if it's an indirect reference.
@ -44,7 +47,10 @@
return typedToken; return typedToken;
} }
internal static IReadOnlyList<byte> Decode(this StreamToken stream, IFilterProvider filterProvider) /// <summary>
/// Get the decoded data from this stream.
/// </summary>
public static IReadOnlyList<byte> Decode(this StreamToken stream, IFilterProvider filterProvider)
{ {
var filters = filterProvider.GetFilters(stream.StreamDictionary); var filters = filterProvider.GetFilters(stream.StreamDictionary);

View File

@ -24,7 +24,7 @@
{ {
private static readonly ILog Log = new NoOpLog(); private static readonly ILog Log = new NoOpLog();
private static readonly IFilterProvider FilterProvider = MemoryFilterProvider.Instance; private static readonly IFilterProvider FilterProvider = DefaultFilterProvider.Instance;
/// <summary> /// <summary>
/// Merge two PDF documents together with the pages from <paramref name="file1"/> followed by <paramref name="file2"/>. /// Merge two PDF documents together with the pages from <paramref name="file1"/> followed by <paramref name="file2"/>.

View File

@ -66,7 +66,19 @@
var interpolate = dictionary.TryGet(NameToken.Interpolate, pdfScanner, out BooleanToken interpolateToken) var interpolate = dictionary.TryGet(NameToken.Interpolate, pdfScanner, out BooleanToken interpolateToken)
&& interpolateToken.Data; && interpolateToken.Data;
var decodedBytes = new Lazy<IReadOnlyList<byte>>(() => xObject.Stream.Decode(filterProvider)); var filters = filterProvider.GetFilters(xObject.Stream.StreamDictionary);
var supportsFilters = true;
foreach (var filter in filters)
{
if (!filter.IsSupported)
{
supportsFilters = false;
break;
}
}
var decodedBytes = supportsFilters ? new Lazy<IReadOnlyList<byte>>(() => xObject.Stream.Decode(filterProvider))
: null;
var decode = EmptyArray<decimal>.Instance; var decode = EmptyArray<decimal>.Instance;

View File

@ -15,7 +15,8 @@
/// </summary> /// </summary>
public class XObjectImage : IPdfImage public class XObjectImage : IPdfImage
{ {
private readonly Lazy<IReadOnlyList<byte>> bytes; [CanBeNull]
private readonly Lazy<IReadOnlyList<byte>> bytesFactory;
/// <inheritdoc /> /// <inheritdoc />
public PdfRectangle Bounds { get; } public PdfRectangle Bounds { get; }
@ -62,10 +63,6 @@
/// <inheritdoc /> /// <inheritdoc />
public IReadOnlyList<byte> RawBytes { get; } public IReadOnlyList<byte> RawBytes { get; }
/// <inheritdoc />
[NotNull]
public IReadOnlyList<byte> Bytes => bytes.Value;
/// <summary> /// <summary>
/// Creates a new <see cref="XObjectImage"/>. /// Creates a new <see cref="XObjectImage"/>.
@ -93,7 +90,21 @@
Decode = decode; Decode = decode;
ImageDictionary = imageDictionary ?? throw new ArgumentNullException(nameof(imageDictionary)); ImageDictionary = imageDictionary ?? throw new ArgumentNullException(nameof(imageDictionary));
RawBytes = rawBytes; RawBytes = rawBytes;
this.bytes = bytes ?? throw new ArgumentNullException(nameof(bytes)); bytesFactory = bytes;
}
/// <inheritdoc />
public bool TryGetBytes(out IReadOnlyList<byte> bytes)
{
bytes = null;
if (bytesFactory == null)
{
return false;
}
bytes = bytesFactory.Value;
return true;
} }
/// <inheritdoc /> /// <inheritdoc />