From 27e251f9219e6318da2c4290b74252e62a9f378e Mon Sep 17 00:00:00 2001 From: Eliot Jones Date: Sat, 25 Apr 2020 09:42:24 +0100 Subject: [PATCH] make filter provider and filter public and use tryget for image bytes --- .../SinglePageLibreOfficeImages.cs | 8 ++--- .../PublicApiScannerTests.cs | 4 +++ src/UglyToad.PdfPig/Content/IPdfImage.cs | 13 ++++---- src/UglyToad.PdfPig/Content/InlineImage.cs | 32 ++++++++++++++++--- ...erProvider.cs => DefaultFilterProvider.cs} | 16 ++++++++-- src/UglyToad.PdfPig/Filters/IFilter.cs | 5 ++- .../Filters/IFilterProvider.cs | 14 +++++++- .../Parser/PdfDocumentFactory.cs | 2 +- src/UglyToad.PdfPig/PdfExtensions.cs | 10 ++++-- src/UglyToad.PdfPig/Writer/PdfMerger.cs | 2 +- .../XObjects/XObjectFactory.cs | 14 +++++++- src/UglyToad.PdfPig/XObjects/XObjectImage.cs | 23 +++++++++---- 12 files changed, 108 insertions(+), 35 deletions(-) rename src/UglyToad.PdfPig/Filters/{MemoryFilterProvider.cs => DefaultFilterProvider.cs} (87%) diff --git a/src/UglyToad.PdfPig.Tests/Integration/SinglePageLibreOfficeImages.cs b/src/UglyToad.PdfPig.Tests/Integration/SinglePageLibreOfficeImages.cs index 5b210f1b..03e8f275 100644 --- a/src/UglyToad.PdfPig.Tests/Integration/SinglePageLibreOfficeImages.cs +++ b/src/UglyToad.PdfPig.Tests/Integration/SinglePageLibreOfficeImages.cs @@ -1,6 +1,5 @@ namespace UglyToad.PdfPig.Tests.Integration { - using System; using System.Linq; using Xunit; @@ -74,13 +73,12 @@ var page = document.GetPage(1); foreach (var image in page.GetImages()) { - try + if (image.TryGetBytes(out var bytes)) { - Assert.NotNull(image.Bytes); + Assert.NotNull(bytes); } - catch (NotSupportedException ) + else { - // Should allow access to raw bytes. Assert.NotNull(image.RawBytes); } } diff --git a/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs b/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs index a853ca40..bcd770ed 100644 --- a/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs +++ b/src/UglyToad.PdfPig.Tests/PublicApiScannerTests.cs @@ -85,6 +85,9 @@ "UglyToad.PdfPig.CrossReference.CrossReferenceType", "UglyToad.PdfPig.CrossReference.TrailerDictionary", "UglyToad.PdfPig.Exceptions.PdfDocumentEncryptedException", + "UglyToad.PdfPig.Filters.DefaultFilterProvider", + "UglyToad.PdfPig.Filters.IFilter", + "UglyToad.PdfPig.Filters.IFilterProvider", "UglyToad.PdfPig.PdfFonts.DescriptorFontFile", "UglyToad.PdfPig.PdfFonts.FontDescriptor", "UglyToad.PdfPig.PdfFonts.FontDescriptorFlags", @@ -192,6 +195,7 @@ "UglyToad.PdfPig.Outline.Destinations.ExplicitDestinationType", "UglyToad.PdfPig.ParsingOptions", "UglyToad.PdfPig.PdfDocument", + "UglyToad.PdfPig.PdfExtensions", "UglyToad.PdfPig.Structure", "UglyToad.PdfPig.Util.Adler32Checksum", "UglyToad.PdfPig.Util.IWordExtractor", diff --git a/src/UglyToad.PdfPig/Content/IPdfImage.cs b/src/UglyToad.PdfPig/Content/IPdfImage.cs index 7f2bfc22..bb6dc9e7 100644 --- a/src/UglyToad.PdfPig/Content/IPdfImage.cs +++ b/src/UglyToad.PdfPig/Content/IPdfImage.cs @@ -42,13 +42,6 @@ /// int BitsPerComponent { get; } - /// - /// The bytes of the image with any filters decoded. - /// If the filter used to encode the bytes is not supported accessing this property will throw, access the - /// instead. - /// - IReadOnlyList Bytes { get; } - /// /// The encoded bytes of the image with all filters still applied. /// @@ -90,5 +83,11 @@ /// Whether this image is an or a . /// bool IsInlineImage { get; } + + /// + /// Get the decoded bytes of the image if applicable. For JPEG images and some other types the + /// should be used directly. + /// + bool TryGetBytes(out IReadOnlyList bytes); } } diff --git a/src/UglyToad.PdfPig/Content/InlineImage.cs b/src/UglyToad.PdfPig/Content/InlineImage.cs index 2188bc48..b92d6041 100644 --- a/src/UglyToad.PdfPig/Content/InlineImage.cs +++ b/src/UglyToad.PdfPig/Content/InlineImage.cs @@ -47,9 +47,6 @@ /// public bool Interpolate { get; } - /// - public IReadOnlyList Bytes => bytesFactory.Value; - /// public IReadOnlyList RawBytes { get; } @@ -76,7 +73,18 @@ Interpolate = interpolate; RawBytes = bytes; - bytesFactory = new Lazy>(() => + + var supportsFilters = true; + foreach (var filter in filters) + { + if (!filter.IsSupported) + { + supportsFilters = false; + break; + } + } + + bytesFactory = supportsFilters ? new Lazy>(() => { var b = bytes.ToArray(); for (var i = 0; i < filters.Count; i++) @@ -86,7 +94,21 @@ } return b; - }); + }) : null; + } + + /// + public bool TryGetBytes(out IReadOnlyList bytes) + { + bytes = null; + if (bytesFactory == null) + { + return false; + } + + bytes = bytesFactory.Value; + + return true; } /// diff --git a/src/UglyToad.PdfPig/Filters/MemoryFilterProvider.cs b/src/UglyToad.PdfPig/Filters/DefaultFilterProvider.cs similarity index 87% rename from src/UglyToad.PdfPig/Filters/MemoryFilterProvider.cs rename to src/UglyToad.PdfPig/Filters/DefaultFilterProvider.cs index a0cd6512..1ee3dba4 100644 --- a/src/UglyToad.PdfPig/Filters/MemoryFilterProvider.cs +++ b/src/UglyToad.PdfPig/Filters/DefaultFilterProvider.cs @@ -6,13 +6,20 @@ using Core; using Tokens; - internal class MemoryFilterProvider : IFilterProvider + /// + /// + /// The default implementation of the . + /// + public class DefaultFilterProvider : IFilterProvider { private readonly IReadOnlyDictionary filterInstances; - public static readonly IFilterProvider Instance = new MemoryFilterProvider(); + /// + /// The single instance of this provider. + /// + public static readonly IFilterProvider Instance = new DefaultFilterProvider(); - private MemoryFilterProvider() + private DefaultFilterProvider() { var ascii85 = new Ascii85Filter(); var asciiHex = new AsciiHexDecodeFilter(); @@ -45,6 +52,7 @@ }; } + /// public IReadOnlyList GetFilters(DictionaryToken dictionary) { if (dictionary == null) @@ -76,6 +84,7 @@ } } + /// public IReadOnlyList GetNamedFilters(IReadOnlyList names) { if (names == null) @@ -103,6 +112,7 @@ return factory; } + /// public IReadOnlyList GetAllFilters() { return filterInstances.Values.Distinct().ToList(); diff --git a/src/UglyToad.PdfPig/Filters/IFilter.cs b/src/UglyToad.PdfPig/Filters/IFilter.cs index 654ec21a..d6c326ba 100644 --- a/src/UglyToad.PdfPig/Filters/IFilter.cs +++ b/src/UglyToad.PdfPig/Filters/IFilter.cs @@ -4,10 +4,9 @@ using Tokens; /// - /// A filter is used in a PDF to encode/decode data either to compress it - /// or derive an ASCII representation of the data. + /// A filter is used in a PDF to encode/decode data either to compress it or derive an ASCII representation of the data. /// - internal interface IFilter + public interface IFilter { /// /// Whether this library can decode information encoded using this filter. diff --git a/src/UglyToad.PdfPig/Filters/IFilterProvider.cs b/src/UglyToad.PdfPig/Filters/IFilterProvider.cs index 0460174c..dad2422c 100644 --- a/src/UglyToad.PdfPig/Filters/IFilterProvider.cs +++ b/src/UglyToad.PdfPig/Filters/IFilterProvider.cs @@ -3,12 +3,24 @@ using System.Collections.Generic; using Tokens; - internal interface IFilterProvider + /// + /// Gets filter implementations () for decoding PDF data. + /// + public interface IFilterProvider { + /// + /// Get the filters specified in this dictionary. + /// IReadOnlyList GetFilters(DictionaryToken dictionary); + /// + /// Gets the filters specified by the filter names. + /// IReadOnlyList GetNamedFilters(IReadOnlyList names); + /// + /// Get all available filters in this library. + /// IReadOnlyList GetAllFilters(); } } \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs b/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs index e2475c06..7d11dfd6 100644 --- a/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs +++ b/src/UglyToad.PdfPig/Parser/PdfDocumentFactory.cs @@ -83,7 +83,7 @@ private static PdfDocument OpenDocument(IInputBytes inputBytes, ISeekableTokenScanner scanner, ILog log, bool isLenientParsing, IReadOnlyList passwords, bool clipPaths) { - var filterProvider = MemoryFilterProvider.Instance; + var filterProvider = DefaultFilterProvider.Instance; CrossReferenceTable crossReferenceTable = null; diff --git a/src/UglyToad.PdfPig/PdfExtensions.cs b/src/UglyToad.PdfPig/PdfExtensions.cs index 5c6dc7a6..f9ab7682 100644 --- a/src/UglyToad.PdfPig/PdfExtensions.cs +++ b/src/UglyToad.PdfPig/PdfExtensions.cs @@ -7,7 +7,10 @@ using Tokenization.Scanner; using Tokens; - internal static class PdfExtensions + /// + /// Extensions for PDF types. + /// + public static class PdfExtensions { /// /// Try and get the entry with a given name and type or look-up the object if it's an indirect reference. @@ -44,7 +47,10 @@ return typedToken; } - internal static IReadOnlyList Decode(this StreamToken stream, IFilterProvider filterProvider) + /// + /// Get the decoded data from this stream. + /// + public static IReadOnlyList Decode(this StreamToken stream, IFilterProvider filterProvider) { var filters = filterProvider.GetFilters(stream.StreamDictionary); diff --git a/src/UglyToad.PdfPig/Writer/PdfMerger.cs b/src/UglyToad.PdfPig/Writer/PdfMerger.cs index 141c70f6..87a4e9af 100644 --- a/src/UglyToad.PdfPig/Writer/PdfMerger.cs +++ b/src/UglyToad.PdfPig/Writer/PdfMerger.cs @@ -24,7 +24,7 @@ { private static readonly ILog Log = new NoOpLog(); - private static readonly IFilterProvider FilterProvider = MemoryFilterProvider.Instance; + private static readonly IFilterProvider FilterProvider = DefaultFilterProvider.Instance; /// /// Merge two PDF documents together with the pages from followed by . diff --git a/src/UglyToad.PdfPig/XObjects/XObjectFactory.cs b/src/UglyToad.PdfPig/XObjects/XObjectFactory.cs index e380b566..c533cae2 100644 --- a/src/UglyToad.PdfPig/XObjects/XObjectFactory.cs +++ b/src/UglyToad.PdfPig/XObjects/XObjectFactory.cs @@ -66,7 +66,19 @@ var interpolate = dictionary.TryGet(NameToken.Interpolate, pdfScanner, out BooleanToken interpolateToken) && interpolateToken.Data; - var decodedBytes = new Lazy>(() => xObject.Stream.Decode(filterProvider)); + var filters = filterProvider.GetFilters(xObject.Stream.StreamDictionary); + var supportsFilters = true; + foreach (var filter in filters) + { + if (!filter.IsSupported) + { + supportsFilters = false; + break; + } + } + + var decodedBytes = supportsFilters ? new Lazy>(() => xObject.Stream.Decode(filterProvider)) + : null; var decode = EmptyArray.Instance; diff --git a/src/UglyToad.PdfPig/XObjects/XObjectImage.cs b/src/UglyToad.PdfPig/XObjects/XObjectImage.cs index 8a9275f1..f7591c6a 100644 --- a/src/UglyToad.PdfPig/XObjects/XObjectImage.cs +++ b/src/UglyToad.PdfPig/XObjects/XObjectImage.cs @@ -15,7 +15,8 @@ /// public class XObjectImage : IPdfImage { - private readonly Lazy> bytes; + [CanBeNull] + private readonly Lazy> bytesFactory; /// public PdfRectangle Bounds { get; } @@ -62,10 +63,6 @@ /// public IReadOnlyList RawBytes { get; } - - /// - [NotNull] - public IReadOnlyList Bytes => bytes.Value; /// /// Creates a new . @@ -93,7 +90,21 @@ Decode = decode; ImageDictionary = imageDictionary ?? throw new ArgumentNullException(nameof(imageDictionary)); RawBytes = rawBytes; - this.bytes = bytes ?? throw new ArgumentNullException(nameof(bytes)); + bytesFactory = bytes; + } + + /// + public bool TryGetBytes(out IReadOnlyList bytes) + { + bytes = null; + if (bytesFactory == null) + { + return false; + } + + bytes = bytesFactory.Value; + + return true; } ///