make filter provider and filter public and use tryget for image bytes

This commit is contained in:
Eliot Jones 2020-04-25 09:42:24 +01:00
parent 635c4b4c5e
commit 27e251f921
12 changed files with 108 additions and 35 deletions

View File

@ -1,6 +1,5 @@
namespace UglyToad.PdfPig.Tests.Integration
{
using System;
using System.Linq;
using Xunit;
@ -74,13 +73,12 @@
var page = document.GetPage(1);
foreach (var image in page.GetImages())
{
try
if (image.TryGetBytes(out var bytes))
{
Assert.NotNull(image.Bytes);
Assert.NotNull(bytes);
}
catch (NotSupportedException )
else
{
// Should allow access to raw bytes.
Assert.NotNull(image.RawBytes);
}
}

View File

@ -85,6 +85,9 @@
"UglyToad.PdfPig.CrossReference.CrossReferenceType",
"UglyToad.PdfPig.CrossReference.TrailerDictionary",
"UglyToad.PdfPig.Exceptions.PdfDocumentEncryptedException",
"UglyToad.PdfPig.Filters.DefaultFilterProvider",
"UglyToad.PdfPig.Filters.IFilter",
"UglyToad.PdfPig.Filters.IFilterProvider",
"UglyToad.PdfPig.PdfFonts.DescriptorFontFile",
"UglyToad.PdfPig.PdfFonts.FontDescriptor",
"UglyToad.PdfPig.PdfFonts.FontDescriptorFlags",
@ -192,6 +195,7 @@
"UglyToad.PdfPig.Outline.Destinations.ExplicitDestinationType",
"UglyToad.PdfPig.ParsingOptions",
"UglyToad.PdfPig.PdfDocument",
"UglyToad.PdfPig.PdfExtensions",
"UglyToad.PdfPig.Structure",
"UglyToad.PdfPig.Util.Adler32Checksum",
"UglyToad.PdfPig.Util.IWordExtractor",

View File

@ -42,13 +42,6 @@
/// </summary>
int BitsPerComponent { get; }
/// <summary>
/// The bytes of the image with any filters decoded.
/// If the filter used to encode the bytes is not supported accessing this property will throw, access the <see cref="RawBytes"/>
/// instead.
/// </summary>
IReadOnlyList<byte> Bytes { get; }
/// <summary>
/// The encoded bytes of the image with all filters still applied.
/// </summary>
@ -90,5 +83,11 @@
/// Whether this image is an <see cref="InlineImage"/> or a <see cref="XObjectImage"/>.
/// </summary>
bool IsInlineImage { get; }
/// <summary>
/// Get the decoded bytes of the image if applicable. For JPEG images and some other types the
/// <see cref="RawBytes"/> should be used directly.
/// </summary>
bool TryGetBytes(out IReadOnlyList<byte> bytes);
}
}

View File

@ -47,9 +47,6 @@
/// <inheritdoc />
public bool Interpolate { get; }
/// <inheritdoc />
public IReadOnlyList<byte> Bytes => bytesFactory.Value;
/// <inheritdoc />
public IReadOnlyList<byte> RawBytes { get; }
@ -76,7 +73,18 @@
Interpolate = interpolate;
RawBytes = bytes;
bytesFactory = new Lazy<IReadOnlyList<byte>>(() =>
var supportsFilters = true;
foreach (var filter in filters)
{
if (!filter.IsSupported)
{
supportsFilters = false;
break;
}
}
bytesFactory = supportsFilters ? new Lazy<IReadOnlyList<byte>>(() =>
{
var b = bytes.ToArray();
for (var i = 0; i < filters.Count; i++)
@ -86,7 +94,21 @@
}
return b;
});
}) : null;
}
/// <inheritdoc />
public bool TryGetBytes(out IReadOnlyList<byte> bytes)
{
bytes = null;
if (bytesFactory == null)
{
return false;
}
bytes = bytesFactory.Value;
return true;
}
/// <inheritdoc />

View File

@ -6,13 +6,20 @@
using Core;
using Tokens;
internal class MemoryFilterProvider : IFilterProvider
/// <inheritdoc />
/// <summary>
/// The default implementation of the <see cref="T:UglyToad.PdfPig.Filters.IFilterProvider" />.
/// </summary>
public class DefaultFilterProvider : IFilterProvider
{
private readonly IReadOnlyDictionary<string, IFilter> filterInstances;
public static readonly IFilterProvider Instance = new MemoryFilterProvider();
/// <summary>
/// The single instance of this provider.
/// </summary>
public static readonly IFilterProvider Instance = new DefaultFilterProvider();
private MemoryFilterProvider()
private DefaultFilterProvider()
{
var ascii85 = new Ascii85Filter();
var asciiHex = new AsciiHexDecodeFilter();
@ -45,6 +52,7 @@
};
}
/// <inheritdoc />
public IReadOnlyList<IFilter> GetFilters(DictionaryToken dictionary)
{
if (dictionary == null)
@ -76,6 +84,7 @@
}
}
/// <inheritdoc />
public IReadOnlyList<IFilter> GetNamedFilters(IReadOnlyList<NameToken> names)
{
if (names == null)
@ -103,6 +112,7 @@
return factory;
}
/// <inheritdoc />
public IReadOnlyList<IFilter> GetAllFilters()
{
return filterInstances.Values.Distinct().ToList();

View File

@ -4,10 +4,9 @@
using Tokens;
/// <summary>
/// A filter is used in a PDF to encode/decode data either to compress it
/// or derive an ASCII representation of the data.
/// A filter is used in a PDF to encode/decode data either to compress it or derive an ASCII representation of the data.
/// </summary>
internal interface IFilter
public interface IFilter
{
/// <summary>
/// Whether this library can decode information encoded using this filter.

View File

@ -3,12 +3,24 @@
using System.Collections.Generic;
using Tokens;
internal interface IFilterProvider
/// <summary>
/// Gets filter implementations (<see cref="IFilter"/>) for decoding PDF data.
/// </summary>
public interface IFilterProvider
{
/// <summary>
/// Get the filters specified in this dictionary.
/// </summary>
IReadOnlyList<IFilter> GetFilters(DictionaryToken dictionary);
/// <summary>
/// Gets the filters specified by the filter names.
/// </summary>
IReadOnlyList<IFilter> GetNamedFilters(IReadOnlyList<NameToken> names);
/// <summary>
/// Get all available filters in this library.
/// </summary>
IReadOnlyList<IFilter> GetAllFilters();
}
}

View File

@ -83,7 +83,7 @@
private static PdfDocument OpenDocument(IInputBytes inputBytes, ISeekableTokenScanner scanner, ILog log, bool isLenientParsing,
IReadOnlyList<string> passwords, bool clipPaths)
{
var filterProvider = MemoryFilterProvider.Instance;
var filterProvider = DefaultFilterProvider.Instance;
CrossReferenceTable crossReferenceTable = null;

View File

@ -7,7 +7,10 @@
using Tokenization.Scanner;
using Tokens;
internal static class PdfExtensions
/// <summary>
/// Extensions for PDF types.
/// </summary>
public static class PdfExtensions
{
/// <summary>
/// Try and get the entry with a given name and type or look-up the object if it's an indirect reference.
@ -44,7 +47,10 @@
return typedToken;
}
internal static IReadOnlyList<byte> Decode(this StreamToken stream, IFilterProvider filterProvider)
/// <summary>
/// Get the decoded data from this stream.
/// </summary>
public static IReadOnlyList<byte> Decode(this StreamToken stream, IFilterProvider filterProvider)
{
var filters = filterProvider.GetFilters(stream.StreamDictionary);

View File

@ -24,7 +24,7 @@
{
private static readonly ILog Log = new NoOpLog();
private static readonly IFilterProvider FilterProvider = MemoryFilterProvider.Instance;
private static readonly IFilterProvider FilterProvider = DefaultFilterProvider.Instance;
/// <summary>
/// Merge two PDF documents together with the pages from <paramref name="file1"/> followed by <paramref name="file2"/>.

View File

@ -66,7 +66,19 @@
var interpolate = dictionary.TryGet(NameToken.Interpolate, pdfScanner, out BooleanToken interpolateToken)
&& interpolateToken.Data;
var decodedBytes = new Lazy<IReadOnlyList<byte>>(() => xObject.Stream.Decode(filterProvider));
var filters = filterProvider.GetFilters(xObject.Stream.StreamDictionary);
var supportsFilters = true;
foreach (var filter in filters)
{
if (!filter.IsSupported)
{
supportsFilters = false;
break;
}
}
var decodedBytes = supportsFilters ? new Lazy<IReadOnlyList<byte>>(() => xObject.Stream.Decode(filterProvider))
: null;
var decode = EmptyArray<decimal>.Instance;

View File

@ -15,7 +15,8 @@
/// </summary>
public class XObjectImage : IPdfImage
{
private readonly Lazy<IReadOnlyList<byte>> bytes;
[CanBeNull]
private readonly Lazy<IReadOnlyList<byte>> bytesFactory;
/// <inheritdoc />
public PdfRectangle Bounds { get; }
@ -62,10 +63,6 @@
/// <inheritdoc />
public IReadOnlyList<byte> RawBytes { get; }
/// <inheritdoc />
[NotNull]
public IReadOnlyList<byte> Bytes => bytes.Value;
/// <summary>
/// Creates a new <see cref="XObjectImage"/>.
@ -93,7 +90,21 @@
Decode = decode;
ImageDictionary = imageDictionary ?? throw new ArgumentNullException(nameof(imageDictionary));
RawBytes = rawBytes;
this.bytes = bytes ?? throw new ArgumentNullException(nameof(bytes));
bytesFactory = bytes;
}
/// <inheritdoc />
public bool TryGetBytes(out IReadOnlyList<byte> bytes)
{
bytes = null;
if (bytesFactory == null)
{
return false;
}
bytes = bytesFactory.Value;
return true;
}
/// <inheritdoc />