Merge pull request #324 from kasperdaff/ccitt-fax-filter

Port of PDFBox's CCITTFaxFilter
This commit is contained in:
Eliot Jones 2021-05-09 11:52:46 -04:00 committed by GitHub
commit a85bcb4ec1
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
19 changed files with 2036 additions and 87 deletions

View File

@ -0,0 +1,40 @@
namespace UglyToad.PdfPig.Tests.Filters
{
using System.Collections.Generic;
using UglyToad.PdfPig.Filters;
using UglyToad.PdfPig.Tests.Images;
using UglyToad.PdfPig.Tokens;
using Xunit;
public class CcittFaxDecodeFilterTests
{
[Fact]
public void CanDecodeCCittFaxCompressedImageData()
{
var encodedBytes = ImageHelpers.LoadFileBytes("ccittfax-encoded.bin");
var filter = new CcittFaxDecodeFilter();
var dictionary = new Dictionary<NameToken, IToken>
{
{ NameToken.D, new ArrayToken(new []{ new NumericToken(1), new NumericToken(0) })},
{ NameToken.W, new NumericToken(1800) },
{ NameToken.H, new NumericToken(3113) },
{ NameToken.Bpc, new NumericToken(1) },
{ NameToken.F, NameToken.CcittfaxDecode },
{ NameToken.DecodeParms,
new DictionaryToken(new Dictionary<NameToken, IToken>
{
{ NameToken.K, new NumericToken(-1) },
{ NameToken.Columns, new NumericToken(1800) },
{ NameToken.Rows, new NumericToken(3113) },
{ NameToken.BlackIs1, BooleanToken.True }
})
}
};
var expectedBytes = ImageHelpers.LoadFileBytes("ccittfax-decoded.bin");
var decodedBytes = filter.Decode(encodedBytes, new DictionaryToken(dictionary), 0);
Assert.Equal(expectedBytes, decodedBytes);
}
}
}

File diff suppressed because one or more lines are too long

Binary file not shown.

After

Width:  |  Height:  |  Size: 522 KiB

View File

@ -0,0 +1,39 @@
namespace UglyToad.PdfPig.Tests.Images
{
using System;
using System.IO;
using UglyToad.PdfPig.Images.Png;
public static class ImageHelpers
{
private static readonly string FilesFolder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Images", "Files"));
public static byte[] LoadFileBytes(string filename)
{
return File.ReadAllBytes(Path.Combine(FilesFolder, filename));
}
public static bool ImagesAreEqual(byte[] first, byte[] second)
{
var png1 = Png.Open(first);
var png2 = Png.Open(second);
if (png1.Width != png2.Width || png1.Height != png2.Height || png1.HasAlphaChannel != png2.HasAlphaChannel)
{
return false;
}
for (var y = 0; y < png1.Height; y++)
{
for (var x = 0; x < png1.Width; x++)
{
if (!png1.GetPixel(x, y).Equals(png2.GetPixel(x, y)))
{
return false;
}
}
}
return true;
}
}
}

View File

@ -1,7 +1,5 @@
namespace UglyToad.PdfPig.Tests.Images
{
using System;
using System.IO;
using System.Linq;
using UglyToad.PdfPig.Graphics.Colors;
using UglyToad.PdfPig.Images.Png;
@ -38,7 +36,7 @@
};
Assert.True(PngFromPdfImageFactory.TryGenerate(image, out var bytes));
Assert.Equal(LoadImage("3x3.png"), bytes);
Assert.True(ImageHelpers.ImagesAreEqual(LoadImage("3x3.png"), bytes));
}
[Fact]
@ -60,7 +58,7 @@
};
Assert.True(PngFromPdfImageFactory.TryGenerate(image, out var bytes));
Assert.Equal(LoadImage("3x3.png"), bytes);
Assert.True(ImageHelpers.ImagesAreEqual(LoadImage("3x3.png"), bytes));
}
[Fact]
@ -82,7 +80,7 @@
};
Assert.True(PngFromPdfImageFactory.TryGenerate(image, out var bytes));
Assert.Equal(LoadImage("3x3.png"), bytes);
Assert.True(ImageHelpers.ImagesAreEqual(LoadImage("3x3.png"), bytes));
}
[Fact]
@ -104,7 +102,7 @@
};
Assert.True(PngFromPdfImageFactory.TryGenerate(image, out var bytes));
Assert.Equal(LoadImage("3x3.png"), bytes);
Assert.True(ImageHelpers.ImagesAreEqual(LoadImage("3x3.png"), bytes));
}
[Fact]
@ -143,13 +141,29 @@
};
Assert.True(PngFromPdfImageFactory.TryGenerate(image, out var bytes));
Assert.Equal(LoadImage("3x3.png"), bytes);
Assert.True(ImageHelpers.ImagesAreEqual(LoadImage("3x3.png"), bytes));
}
[Fact]
public void CanGeneratePngFromCcittFaxDecodedImageData()
{
var decodedBytes = ImageHelpers.LoadFileBytes("ccittfax-decoded.bin");
var image = new TestPdfImage
{
ColorSpaceDetails = IndexedColorSpaceDetails.StencilBlackIs1,
DecodedBytes = decodedBytes,
WidthInSamples = 1800,
HeightInSamples = 3113,
BitsPerComponent = 1
};
Assert.True(PngFromPdfImageFactory.TryGenerate(image, out var bytes));
Assert.True(ImageHelpers.ImagesAreEqual(LoadImage("ccittfax.png"), bytes));
}
private static byte[] LoadImage(string name)
{
var folder = Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Images", "Files"));
return File.ReadAllBytes(Path.Combine(folder, name));
return ImageHelpers.LoadFileBytes(name);
}
}
}

View File

@ -0,0 +1,21 @@
namespace UglyToad.PdfPig.Filters
{
/// <summary>
/// Specifies the compression type to use with <see cref="T:UglyToad.PdfPig.Filters.CcittFaxDecoderStream" />.
/// </summary>
internal enum CcittFaxCompressionType
{
/// <summary>
/// Modified Huffman - Group 3 (T4)
/// </summary>
ModifiedHuffman,
/// <summary>
/// Modified Read - Group 3 (optional T4)
/// </summary>
T4,
/// <summary>
/// Modified Modified Read - Group 4 (T6)
/// </summary>
T6
}
}

View File

@ -1,19 +1,122 @@
namespace UglyToad.PdfPig.Filters
{
using System;
using System.Collections.Generic;
using Tokens;
internal class CcittFaxDecodeFilter : IFilter
{
using System.Collections.Generic;
using System.IO;
using System.Linq;
using Tokens;
using UglyToad.PdfPig.Util;
/// <summary>
/// Decodes image data that has been encoded using either Group 3 or Group 4.
///
/// Ported from https://github.com/apache/pdfbox/blob/714156a15ea6fcfe44ac09345b01e192cbd74450/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxFilter.java
/// </summary>
internal class CcittFaxDecodeFilter : IFilter
{
/// <inheritdoc />
public bool IsSupported { get; } = false;
public bool IsSupported { get; } = true;
/// <inheritdoc />
public byte[] Decode(IReadOnlyList<byte> input, DictionaryToken streamDictionary, int filterIndex)
{
throw new NotSupportedException("The CCITT Fax Filter for image data is not currently supported. " +
"Try accessing the raw compressed data directly.");
}
{
var decodeParms = DecodeParameterResolver.GetFilterParameters(streamDictionary, filterIndex);
var cols = decodeParms.GetIntOrDefault(NameToken.Columns, 1728);
var rows = decodeParms.GetIntOrDefault(NameToken.Rows, 0);
var height = streamDictionary.GetIntOrDefault(NameToken.Height, NameToken.H, 0);
if (rows > 0 && height > 0)
{
// PDFBOX-771, PDFBOX-3727: rows in DecodeParms sometimes contains an incorrect value
rows = height;
}
else
{
// at least one of the values has to have a valid value
rows = Math.Max(rows, height);
}
var k = decodeParms.GetIntOrDefault(NameToken.K, 0);
var encodedByteAlign = decodeParms.GetBooleanOrDefault(NameToken.EncodedByteAlign, false);
var compressionType = DetermineCompressionType(input, k);
using (var stream = new CcittFaxDecoderStream(new MemoryStream(input.ToArray()), cols, compressionType, encodedByteAlign))
{
var arraySize = (cols + 7) / 8 * rows;
var decompressed = new byte[arraySize];
ReadFromDecoderStream(stream, decompressed);
// we expect black to be 1, if not invert the bitmap
var blackIsOne = decodeParms.GetBooleanOrDefault(NameToken.BlackIs1, false);
if (!blackIsOne)
{
InvertBitmap(decompressed);
}
return decompressed;
}
}
private static CcittFaxCompressionType DetermineCompressionType(IReadOnlyList<byte> input, int k)
{
if (k == 0)
{
var compressionType = CcittFaxCompressionType.T4; // Group 3 1D
if (input.Count < 20)
{
throw new InvalidOperationException("The format is invalid");
}
if (input[0] != 0 || (input[1] >> 4 != 1 && input[1] != 1))
{
// leading EOL (0b000000000001) not found, search further and
// try RLE if not found
compressionType = CcittFaxCompressionType.ModifiedHuffman;
var b = (short)(((input[0] << 8) + (input[1] & 0xff)) >> 4);
for (var i = 12; i < 160; i++)
{
b = (short)((b << 1) + ((input[(i / 8)] >> (7 - (i % 8))) & 0x01));
if ((b & 0xFFF) == 1)
{
return CcittFaxCompressionType.T4;
}
}
}
return compressionType;
}
else if (k > 0)
{
// Group 3 2D
return CcittFaxCompressionType.T4;
}
else
{
return CcittFaxCompressionType.T6;
}
}
private static void ReadFromDecoderStream(CcittFaxDecoderStream decoderStream, byte[] result)
{
int pos = 0;
int read;
while ((read = decoderStream.Read(result, pos, result.Length - pos)) > -1)
{
pos += read;
if (pos >= result.Length)
{
break;
}
}
decoderStream.Close();
}
private static void InvertBitmap(byte[] bufferData)
{
for (int i = 0, c = bufferData.Length; i < c; i++)
{
bufferData[i] = (byte)(~bufferData[i] & 0xFF);
}
}
}
}
}

View File

@ -0,0 +1,788 @@
namespace UglyToad.PdfPig.Filters
{
using System;
using System.IO;
using UglyToad.PdfPig.IO;
using UglyToad.PdfPig.Util;
/// <summary>
/// CCITT Modified Huffman RLE, Group 3 (T4) and Group 4 (T6) fax compression.
///
/// Ported from https://github.com/apache/pdfbox/blob/e644c29279e276bde14ce7a33bdeef0cb1001b3e/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxDecoderStream.java
/// </summary>
internal class CcittFaxDecoderStream : StreamWrapper
{
// See TIFF 6.0 Specification, Section 10: "Modified Huffman Compression", page 43.
private readonly int columns;
private readonly byte[] decodedRow;
private readonly bool optionG32D;
private readonly bool optionByteAligned;
private readonly CcittFaxCompressionType type;
private int decodedLength;
private int decodedPos;
private int[] changesReferenceRow;
private int[] changesCurrentRow;
private int changesReferenceRowCount;
private int changesCurrentRowCount;
private int lastChangingElement = 0;
private int buffer = -1;
private int bufferPos = -1;
/// <summary>
/// Creates a CCITTFaxDecoderStream.
/// This constructor may be used for CCITT streams embedded in PDF files,
/// which use EncodedByteAlign.
/// </summary>
public CcittFaxDecoderStream(Stream stream, int columns, CcittFaxCompressionType type, bool byteAligned)
: base(stream)
{
this.columns = columns;
this.type = type;
// We know this is only used for b/w (1 bit)
decodedRow = new byte[(columns + 7) / 8];
changesReferenceRow = new int[columns + 2];
changesCurrentRow = new int[columns + 2];
optionByteAligned = byteAligned;
switch (type)
{
case CcittFaxCompressionType.ModifiedHuffman:
optionG32D = false;
break;
case CcittFaxCompressionType.T4:
optionG32D = true;
break;
case CcittFaxCompressionType.T6:
optionG32D = false;
break;
default:
throw new ArgumentOutOfRangeException(nameof(type), type, "Illegal parameter");
}
}
private void Fetch()
{
if (decodedPos >= decodedLength)
{
decodedLength = 0;
try
{
DecodeRow();
}
catch (IOException)
{
if (decodedLength != 0)
{
throw;
}
// ..otherwise, just let client code try to read past the
// end of stream
decodedLength = -1;
}
decodedPos = 0;
}
}
private void Decode1D()
{
var index = 0;
var white = true;
changesCurrentRowCount = 0;
do
{
var completeRun = white ? DecodeRun(WhiteRunTree) : DecodeRun(BlackRunTree);
index += completeRun;
changesCurrentRow[changesCurrentRowCount++] = index;
// Flip color for next run
white = !white;
} while (index < columns);
}
private void Decode2D()
{
changesReferenceRowCount = changesCurrentRowCount;
var tmp = changesCurrentRow;
changesCurrentRow = changesReferenceRow;
changesReferenceRow = tmp;
var white = true;
var index = 0;
changesCurrentRowCount = 0;
mode: while (index < columns)
{
var node = CodeTree.Root;
while (true)
{
node = node.Walk(ReadBit());
if (node == null)
{
goto mode;
}
else if (node.IsLeaf)
{
switch (node.Value)
{
case VALUE_HMODE:
var runLength = DecodeRun(white ? WhiteRunTree : BlackRunTree);
index += runLength;
changesCurrentRow[changesCurrentRowCount++] = index;
runLength = DecodeRun(white ? BlackRunTree : WhiteRunTree);
index += runLength;
changesCurrentRow[changesCurrentRowCount++] = index;
break;
case VALUE_PASSMODE:
var pChangingElement = GetNextChangingElement(index, white) + 1;
if (pChangingElement >= changesReferenceRowCount)
{
index = columns;
}
else
{
index = changesReferenceRow[pChangingElement];
}
break;
default:
// Vertical mode (-3 to 3)
var vChangingElement = GetNextChangingElement(index, white);
if (vChangingElement >= changesReferenceRowCount || vChangingElement == -1)
{
index = columns + node.Value;
}
else
{
index = changesReferenceRow[vChangingElement] + node.Value;
}
changesCurrentRow[changesCurrentRowCount] = index;
changesCurrentRowCount++;
white = !white;
break;
}
goto mode;
}
}
}
}
private int GetNextChangingElement(int a0, bool white)
{
var start = (int)(lastChangingElement & 0xFFFF_FFFE) + (white ? 0 : 1);
if (start > 2)
{
start -= 2;
}
if (a0 == 0)
{
return start;
}
for (var i = start; i < changesReferenceRowCount; i += 2)
{
if (a0 < changesReferenceRow[i])
{
lastChangingElement = i;
return i;
}
}
return -1;
}
private void DecodeRowType2()
{
if (optionByteAligned)
{
ResetBuffer();
}
Decode1D();
}
private void DecodeRowType4()
{
if (optionByteAligned)
{
ResetBuffer();
}
eof: while (true)
{
// read till next EOL code
var node = EolOnlyTree.Root;
while (true)
{
node = node.Walk(ReadBit());
if (node == null)
{
goto eof;
}
if (node.IsLeaf)
{
goto done;
}
}
}
done:
if (!optionG32D || ReadBit())
{
Decode1D();
}
else
{
Decode2D();
}
}
private void DecodeRowType6()
{
if (optionByteAligned)
{
ResetBuffer();
}
Decode2D();
}
private void DecodeRow()
{
switch (type)
{
case CcittFaxCompressionType.ModifiedHuffman:
DecodeRowType2();
break;
case CcittFaxCompressionType.T4:
DecodeRowType4();
break;
case CcittFaxCompressionType.T6:
DecodeRowType6();
break;
default:
throw new InvalidOperationException(type + " is not a supported compression type.");
}
var index = 0;
var white = true;
lastChangingElement = 0;
for (var i = 0; i <= changesCurrentRowCount; i++)
{
var nextChange = columns;
if (i != changesCurrentRowCount)
{
nextChange = changesCurrentRow[i];
}
if (nextChange > columns)
{
nextChange = columns;
}
var byteIndex = index / 8;
while (index % 8 != 0 && (nextChange - index) > 0)
{
decodedRow[byteIndex] |= (byte)(white ? 0 : 1 << (7 - ((index) % 8)));
index++;
}
if (index % 8 == 0)
{
byteIndex = index / 8;
var value = (byte)(white ? 0x00 : 0xff);
while ((nextChange - index) > 7)
{
decodedRow[byteIndex] = value;
index += 8;
++byteIndex;
}
}
while ((nextChange - index) > 0)
{
if (index % 8 == 0)
{
decodedRow[byteIndex] = 0;
}
decodedRow[byteIndex] |= (byte)(white ? 0 : 1 << (7 - ((index) % 8)));
index++;
}
white = !white;
}
if (index != columns)
{
throw new IOException("Sum of run-lengths does not equal scan line width: " + index + " > " + columns);
}
decodedLength = (index + 7) / 8;
}
private int DecodeRun(Tree tree)
{
var total = 0;
var node = tree.Root;
while (true)
{
var bit = ReadBit();
node = node.Walk(bit);
if (node == null)
{
throw new IOException("Unknown code in Huffman RLE stream");
}
if (node.IsLeaf)
{
total += node.Value;
if (node.Value >= 64)
{
node = tree.Root;
}
else if (node.Value >= 0)
{
return total;
}
else
{
return columns;
}
}
}
}
private void ResetBuffer()
{
bufferPos = -1;
}
private bool ReadBit()
{
if (bufferPos < 0 || bufferPos > 7)
{
buffer = Stream.ReadByte();
if (buffer == -1)
{
throw new IOException("Unexpected end of Huffman RLE stream");
}
bufferPos = 0;
}
var isSet = ((buffer >> (7 - bufferPos)) & 1) == 1;
bufferPos++;
if (bufferPos > 7)
{
bufferPos = -1;
}
return isSet;
}
public override int ReadByte()
{
if (decodedLength < 0)
{
return 0x0;
}
if (decodedPos >= decodedLength)
{
Fetch();
if (decodedLength < 0)
{
return 0x0;
}
}
return decodedRow[decodedPos++] & 0xff;
}
public override int Read(byte[] b, int off, int len)
{
if (decodedLength < 0)
{
ArrayHelper.Fill(b, off, off + len, (byte)0x0);
return len;
}
if (decodedPos >= decodedLength)
{
Fetch();
if (decodedLength < 0)
{
ArrayHelper.Fill(b, off, off + len, (byte)0x0);
return len;
}
}
var read = Math.Min(decodedLength - decodedPos, len);
Array.Copy(decodedRow, decodedPos, b, off, read);
decodedPos += read;
return read;
}
private class Node
{
public Node Left { get; set; }
public Node Right { get; set; }
public int Value { get; set; }
public bool CanBeFill { get; set; }
public bool IsLeaf { get; set; }
public void Set(bool next, Node node)
{
if (!next)
{
Left = node;
}
else
{
Right = node;
}
}
public Node Walk(bool next)
{
return next ? Right : Left;
}
public override string ToString()
{
return $"[{nameof(IsLeaf)}={IsLeaf}, {nameof(Value)}={Value}, {nameof(CanBeFill)}={CanBeFill}]";
}
}
private class Tree
{
public Node Root { get; } = new Node();
public void Fill(int depth, int path, int value)
{
var current = Root;
for (var i = 0; i < depth; i++)
{
var bitPos = depth - 1 - i;
var isSet = ((path >> bitPos) & 1) == 1;
var next = current.Walk(isSet);
if (next == null)
{
next = new Node();
if (i == depth - 1)
{
next.Value = value;
next.IsLeaf = true;
}
if (path == 0)
{
next.CanBeFill = true;
}
current.Set(isSet, next);
}
else if (next.IsLeaf)
{
throw new IOException("node is leaf, no other following");
}
current = next;
}
}
public void Fill(int depth, int path, Node node)
{
var current = Root;
for (var i = 0; i < depth; i++)
{
var bitPos = depth - 1 - i;
var isSet = ((path >> bitPos) & 1) == 1;
var next = current.Walk(isSet);
if (next == null)
{
if (i == depth - 1)
{
next = node;
}
else
{
next = new Node();
}
if (path == 0)
{
next.CanBeFill = true;
}
current.Set(isSet, next);
}
else if (next.IsLeaf)
{
throw new IOException("node is leaf, no other following");
}
current = next;
}
}
}
private static readonly short[][] BLACK_CODES = new short[][] {
new short[]{ // 2 bits
0x2, 0x3,
},
new short[]{ // 3 bits
0x2, 0x3,
},
new short[]{ // 4 bits
0x2, 0x3,
},
new short[]{ // 5 bits
0x3,
},
new short[]{ // 6 bits
0x4, 0x5,
},
new short[]{ // 7 bits
0x4, 0x5, 0x7,
},
new short[]{ // 8 bits
0x4, 0x7,
},
new short[]{ // 9 bits
0x18,
},
new short[]{ // 10 bits
0x17, 0x18, 0x37, 0x8, 0xf,
},
new short[]{ // 11 bits
0x17, 0x18, 0x28, 0x37, 0x67, 0x68, 0x6c, 0x8, 0xc, 0xd,
},
new short[]{ // 12 bits
0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f, 0x24, 0x27, 0x28, 0x2b, 0x2c, 0x33,
0x34, 0x35, 0x37, 0x38, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x64, 0x65,
0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xd2, 0xd3,
0xd4, 0xd5, 0xd6, 0xd7, 0xda, 0xdb,
},
new short[]{ // 13 bits
0x4a, 0x4b, 0x4c, 0x4d, 0x52, 0x53, 0x54, 0x55, 0x5a, 0x5b, 0x64, 0x65, 0x6c, 0x6d, 0x72, 0x73,
0x74, 0x75, 0x76, 0x77,
}
};
private static readonly short[][] BLACK_RUN_LENGTHS = new short[][]{
new short[]{ // 2 bits
3, 2,
},
new short[]{ // 3 bits
1, 4,
},
new short[]{ // 4 bits
6, 5,
},
new short[]{ // 5 bits
7,
},
new short[]{ // 6 bits
9, 8,
},
new short[]{ // 7 bits
10, 11, 12,
},
new short[]{ // 8 bits
13, 14,
},
new short[]{ // 9 bits
15,
},
new short[]{ // 10 bits
16, 17, 0, 18, 64,
},
new short[]{ // 11 bits
24, 25, 23, 22, 19, 20, 21, 1792, 1856, 1920,
},
new short[]{ // 12 bits
1984, 2048, 2112, 2176, 2240, 2304, 2368, 2432, 2496, 2560, 52, 55, 56, 59, 60, 320, 384, 448, 53,
54, 50, 51, 44, 45, 46, 47, 57, 58, 61, 256, 48, 49, 62, 63, 30, 31, 32, 33, 40, 41, 128, 192, 26,
27, 28, 29, 34, 35, 36, 37, 38, 39, 42, 43,
},
new short[]{ // 13 bits
640, 704, 768, 832, 1280, 1344, 1408, 1472, 1536, 1600, 1664, 1728, 512, 576, 896, 960, 1024, 1088,
1152, 1216,
}
};
private static readonly short[][] WHITE_CODES = new short[][]{
new short[]{ // 4 bits
0x7, 0x8, 0xb, 0xc, 0xe, 0xf,
},
new short[]{ // 5 bits
0x12, 0x13, 0x14, 0x1b, 0x7, 0x8,
},
new short[]{ // 6 bits
0x17, 0x18, 0x2a, 0x2b, 0x3, 0x34, 0x35, 0x7, 0x8,
},
new short[]{ // 7 bits
0x13, 0x17, 0x18, 0x24, 0x27, 0x28, 0x2b, 0x3, 0x37, 0x4, 0x8, 0xc,
},
new short[]{ // 8 bits
0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x1a, 0x1b, 0x2, 0x24, 0x25, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d,
0x3, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x4, 0x4a, 0x4b, 0x5, 0x52, 0x53, 0x54, 0x55, 0x58, 0x59,
0x5a, 0x5b, 0x64, 0x65, 0x67, 0x68, 0xa, 0xb,
},
new short[]{ // 9 bits
0x98, 0x99, 0x9a, 0x9b, 0xcc, 0xcd, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb,
},
new short[]{ // 10 bits
},
new short[]{ // 11 bits
0x8, 0xc, 0xd,
},
new short[]{ // 12 bits
0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f,
}
};
private static readonly short[][] WHITE_RUN_LENGTHS = new short[][]{
new short[]{ // 4 bits
2, 3, 4, 5, 6, 7,
},
new short[]{ // 5 bits
128, 8, 9, 64, 10, 11,
},
new short[]{ // 6 bits
192, 1664, 16, 17, 13, 14, 15, 1, 12,
},
new short[]{ // 7 bits
26, 21, 28, 27, 18, 24, 25, 22, 256, 23, 20, 19,
},
new short[]{ // 8 bits
33, 34, 35, 36, 37, 38, 31, 32, 29, 53, 54, 39, 40, 41, 42, 43, 44, 30, 61, 62, 63, 0, 320, 384, 45,
59, 60, 46, 49, 50, 51, 52, 55, 56, 57, 58, 448, 512, 640, 576, 47, 48,
},
new short[]{ // 9 bits
1472, 1536, 1600, 1728, 704, 768, 832, 896, 960, 1024, 1088, 1152, 1216, 1280, 1344, 1408,
},
new short[]{ // 10 bits
},
new short[]{ // 11 bits
1792, 1856, 1920,
},
new short[]{ // 12 bits
1984, 2048, 2112, 2176, 2240, 2304, 2368, 2432, 2496, 2560,
}
};
private static readonly Node EOL;
private static readonly Node FILL;
private static readonly Tree BlackRunTree;
private static readonly Tree WhiteRunTree;
private static readonly Tree EolOnlyTree;
private static readonly Tree CodeTree;
const int VALUE_EOL = -2000;
const int VALUE_FILL = -1000;
const int VALUE_PASSMODE = -3000;
const int VALUE_HMODE = -4000;
static CcittFaxDecoderStream()
{
EOL = new Node
{
IsLeaf = true,
Value = VALUE_EOL
};
FILL = new Node
{
Value = VALUE_FILL
};
FILL.Left = FILL;
FILL.Right = EOL;
EolOnlyTree = new Tree();
EolOnlyTree.Fill(12, 0, FILL);
EolOnlyTree.Fill(12, 1, EOL);
BlackRunTree = new Tree();
for (var i = 0; i < BLACK_CODES.Length; i++)
{
for (var j = 0; j < BLACK_CODES[i].Length; j++)
{
BlackRunTree.Fill(i + 2, BLACK_CODES[i][j], BLACK_RUN_LENGTHS[i][j]);
}
}
BlackRunTree.Fill(12, 0, FILL);
BlackRunTree.Fill(12, 1, EOL);
WhiteRunTree = new Tree();
for (var i = 0; i < WHITE_CODES.Length; i++)
{
for (var j = 0; j < WHITE_CODES[i].Length; j++)
{
WhiteRunTree.Fill(i + 4, WHITE_CODES[i][j], WHITE_RUN_LENGTHS[i][j]);
}
}
WhiteRunTree.Fill(12, 0, FILL);
WhiteRunTree.Fill(12, 1, EOL);
CodeTree = new Tree();
CodeTree.Fill(4, 1, VALUE_PASSMODE); // pass mode
CodeTree.Fill(3, 1, VALUE_HMODE); // H mode
CodeTree.Fill(1, 1, 0); // V(0)
CodeTree.Fill(3, 3, 1); // V_R(1)
CodeTree.Fill(6, 3, 2); // V_R(2)
CodeTree.Fill(7, 3, 3); // V_R(3)
CodeTree.Fill(3, 2, -1); // V_L(1)
CodeTree.Fill(6, 2, -2); // V_L(2)
CodeTree.Fill(7, 2, -3); // V_L(3)
}
}
}

View File

@ -2,8 +2,9 @@
{
using System;
using System.Collections.Generic;
using Tokens;
using Tokens;
using UglyToad.PdfPig.Util;
internal static class DecodeParameterResolver
{
public static DictionaryToken GetFilterParameters(DictionaryToken streamDictionary, int index)
@ -18,9 +19,9 @@
throw new ArgumentOutOfRangeException(nameof(index), "Index must be 0 or greater");
}
var filter = GetDictionaryObject(streamDictionary, NameToken.Filter, NameToken.F);
var filter = streamDictionary.GetObjectOrDefault(NameToken.Filter, NameToken.F);
var parameters = GetDictionaryObject(streamDictionary, NameToken.DecodeParms, NameToken.Dp);
var parameters = streamDictionary.GetObjectOrDefault(NameToken.DecodeParms, NameToken.Dp);
switch (filter)
{
@ -45,20 +46,5 @@
return new DictionaryToken(new Dictionary<NameToken, IToken>());
}
private static IToken GetDictionaryObject(DictionaryToken dictionary, NameToken first, NameToken second)
{
if (dictionary.TryGet(first, out var token))
{
return token;
}
if (dictionary.TryGet(second, out token))
{
return token;
}
return null;
}
}
}

View File

@ -4,8 +4,9 @@
using System.Collections.Generic;
using System.Linq;
using Core;
using Tokens;
using Tokens;
using UglyToad.PdfPig.Util;
/// <inheritdoc />
/// <summary>
/// The default implementation of the <see cref="T:UglyToad.PdfPig.Filters.IFilterProvider" />.
@ -60,7 +61,8 @@
throw new ArgumentNullException(nameof(dictionary));
}
if (!dictionary.TryGet(NameToken.Filter, out var token))
var token = dictionary.GetObjectOrDefault(NameToken.Filter, NameToken.F);
if (token == null)
{
return EmptyArray<IFilter>.Instance;
}

View File

@ -6,8 +6,9 @@
using Core;
using Parser.Parts;
using Tokenization.Scanner;
using Tokens;
using Tokens;
using UglyToad.PdfPig.Util;
internal class FilterProviderWithLookup : ILookupFilterProvider
{
private readonly IFilterProvider inner;
@ -33,7 +34,8 @@
throw new ArgumentNullException(nameof(dictionary));
}
if (!dictionary.TryGet(NameToken.Filter, out var token))
var token = dictionary.GetObjectOrDefault(NameToken.Filter, NameToken.F);
if (token == null)
{
return EmptyArray<IFilter>.Instance;
}

View File

@ -84,6 +84,26 @@
/// </summary>
public class IndexedColorSpaceDetails : ColorSpaceDetails
{
/// <summary>
/// A color space useful for extracting stencil masks as black-and-white images.
/// Index 0 is black and index 1 is white.
/// </summary>
internal static readonly IndexedColorSpaceDetails StencilBlackIs0
= new IndexedColorSpaceDetails(DeviceGrayColorSpaceDetails.Instance, 1, new byte[] { 0, 255 });
/// <summary>
/// A color space useful for extracting stencil masks as black-and-white images.
/// Index 0 is white and index 1 is black.
/// </summary>
internal static readonly IndexedColorSpaceDetails StencilBlackIs1
= new IndexedColorSpaceDetails(DeviceGrayColorSpaceDetails.Instance, 1, new byte[] { 255, 0 });
internal static ColorSpaceDetails Stencil(decimal[] decode)
{
return decode.Length >= 2 && decode[0] == 1 && decode[1] == 0 ?
StencilBlackIs1 : StencilBlackIs0 /* default */;
}
/// <summary>
/// The base color space in which the values in the color table are to be interpreted.
/// It can be any device or CIE-based color space or(in PDF 1.3) a Separation or DeviceN space,
@ -111,7 +131,7 @@
HiVal = hiVal;
ColorTable = colorTable;
BaseType = baseColorSpaceDetails.BaseType;
}
}
}
/// <summary>

View File

@ -114,30 +114,11 @@
var decode = decodeRaw.Data.OfType<NumericToken>().Select(x => x.Data).ToArray();
var filterDictionaryEntries = new Dictionary<NameToken, IToken>();
var decodeParamsDict = GetByKeys<DictionaryToken>(NameToken.DecodeParms, NameToken.Dp, false);
if (decodeParamsDict == null)
{
var decodeParamsArray = GetByKeys<ArrayToken>(NameToken.DecodeParms, NameToken.Dp, false);
if (decodeParamsArray != null)
{
filterDictionaryEntries[NameToken.DecodeParms] = decodeParamsArray;
}
}
else
{
filterDictionaryEntries[NameToken.DecodeParms] = decodeParamsDict;
}
var streamDictionary = new DictionaryToken(filterDictionaryEntries);
var interpolate = GetByKeys<BooleanToken>(NameToken.Interpolate, NameToken.I, false)?.Data ?? false;
return new InlineImage(bounds, width, height, bitsPerComponent, isMask, renderingIntent, interpolate, colorSpace, decode, Bytes,
filters,
streamDictionary,
imgDic,
details);
}

View File

@ -0,0 +1,74 @@
namespace UglyToad.PdfPig.IO
{
using System.IO;
internal class StreamWrapper : Stream
{
protected readonly Stream Stream;
public StreamWrapper(Stream stream)
{
Stream = stream;
}
public override void Flush()
{
Stream.Flush();
}
public override long Seek(long offset, SeekOrigin origin)
{
return Stream.Seek(offset, origin);
}
public override void SetLength(long value)
{
Stream.SetLength(value);
}
public override int Read(byte[] buffer, int offset, int count)
{
return Stream.Read(buffer, offset, count);
}
public override void Write(byte[] buffer, int offset, int count)
{
Stream.Write(buffer, offset, count);
}
public override bool CanRead
{
get { return Stream.CanRead; }
}
public override bool CanSeek
{
get { return Stream.CanSeek; }
}
public override bool CanWrite
{
get { return Stream.CanWrite; }
}
public override long Length
{
get { return Stream.Length; }
}
public override long Position
{
get { return Stream.Position; }
set { Stream.Position = value; }
}
protected override void Dispose(bool disposing)
{
base.Dispose(disposing);
// dispose stream
using (Stream)
{
}
}
}
}

View File

@ -45,8 +45,8 @@
}
return typedToken;
}
}
/// <summary>
/// Get the decoded data from this stream.
/// </summary>

View File

@ -0,0 +1,30 @@
namespace UglyToad.PdfPig.Util
{
using System;
internal static class ArrayHelper
{
public static void Fill<T>(T[] array, int start, int end, T value)
{
if (array == null)
{
throw new ArgumentNullException(nameof(array));
}
if (start < 0 || start >= end)
{
throw new ArgumentOutOfRangeException(nameof(start));
}
if (end >= array.Length)
{
throw new ArgumentOutOfRangeException(nameof(end));
}
for (int i = start; i < end; i++)
{
array[i] = value;
}
}
}
}

View File

@ -1,6 +1,7 @@
namespace UglyToad.PdfPig.Util
{
using System.Collections.Generic;
using System.Collections.Generic;
using System.Linq;
using Content;
using Core;
using Filters;
@ -74,6 +75,15 @@
ILookupFilterProvider filterProvider,
bool cannotRecurse = false)
{
if (imageDictionary.GetObjectOrDefault(NameToken.ImageMask, NameToken.Im) != null ||
filterProvider.GetFilters(imageDictionary, scanner).OfType<CcittFaxDecodeFilter>().Any())
{
var decodeRaw = imageDictionary.GetObjectOrDefault(NameToken.Decode, NameToken.D) as ArrayToken
?? new ArrayToken(EmptyArray<IToken>.Instance);
var decode = decodeRaw.Data.OfType<NumericToken>().Select(x => x.Data).ToArray();
return IndexedColorSpaceDetails.Stencil(decode);
}
if (!colorSpace.HasValue)
{
return UnsupportedColorSpaceDetails.Instance;

View File

@ -9,6 +9,33 @@
internal static class DictionaryTokenExtensions
{
[CanBeNull]
public static IToken GetObjectOrDefault(this DictionaryToken token, NameToken name)
{
if (token.TryGet(name, out var obj))
{
return obj;
}
return null;
}
[CanBeNull]
public static IToken GetObjectOrDefault(this DictionaryToken token, NameToken first, NameToken second)
{
if (token.TryGet(first, out var obj))
{
return obj;
}
if (token.TryGet(second, out obj))
{
return obj;
}
return null;
}
public static int GetInt(this DictionaryToken token, NameToken name)
{
if (token == null)
@ -16,12 +43,9 @@
throw new ArgumentNullException(nameof(token));
}
if (!token.TryGet(name, out var keyedToken) || !(keyedToken is NumericToken numeric))
{
throw new PdfDocumentFormatException($"The dictionary did not contain a number with the key {name}. Dictionary way: {token}.");
}
var numeric = token.GetObjectOrDefault(name) as NumericToken;
return numeric.Int;
return numeric?.Int ?? throw new PdfDocumentFormatException($"The dictionary did not contain a number with the key {name}. Dictionary way: {token}.");
}
public static int GetIntOrDefault(this DictionaryToken token, NameToken name, int defaultValue = 0)
@ -31,12 +55,21 @@
throw new ArgumentNullException(nameof(token));
}
if (!token.TryGet(name, out var keyedToken) || !(keyedToken is NumericToken numeric))
var numeric = token.GetObjectOrDefault(name) as NumericToken;
return numeric?.Int ?? defaultValue;
}
public static int GetIntOrDefault(this DictionaryToken token, NameToken first, NameToken second, int defaultValue = 0)
{
if (token == null)
{
return defaultValue;
throw new ArgumentNullException(nameof(token));
}
return numeric.Int;
var numeric = token.GetObjectOrDefault(first, second) as NumericToken;
return numeric?.Int ?? default;
}
public static long? GetLongOrDefault(this DictionaryToken token, NameToken name)
@ -46,12 +79,21 @@
throw new ArgumentNullException(nameof(token));
}
if (!token.TryGet(name, out var keyedToken) || !(keyedToken is NumericToken numeric))
var numeric = token.GetObjectOrDefault(name) as NumericToken;
return numeric?.Long;
}
public static bool GetBooleanOrDefault(this DictionaryToken token, NameToken name, bool defaultValue)
{
if (token == null)
{
return null;
throw new ArgumentNullException(nameof(token));
}
return numeric.Long;
var boolean = token.GetObjectOrDefault(name) as BooleanToken;
return boolean?.Data ?? defaultValue;
}
[CanBeNull]
@ -62,12 +104,7 @@
throw new ArgumentNullException(nameof(token));
}
if (!token.TryGet(name, out var nameToken) || !(nameToken is NameToken result))
{
return null;
}
return result;
return token.GetObjectOrDefault(name) as NameToken;
}
public static bool TryGetOptionalTokenDirect<T>(this DictionaryToken token, NameToken name, IPdfTokenScanner scanner, out T result) where T : IToken