diff --git a/src/UglyToad.PdfPig.Tests/Filters/BitStreamTests.cs b/src/UglyToad.PdfPig.Tests/Filters/BitStreamTests.cs index 98fd49ef..a7064c2f 100644 --- a/src/UglyToad.PdfPig.Tests/Filters/BitStreamTests.cs +++ b/src/UglyToad.PdfPig.Tests/Filters/BitStreamTests.cs @@ -1,6 +1,6 @@ namespace UglyToad.PdfPig.Tests.Filters { - using PdfPig.Filters; + using PdfPig.Filters.Lzw; public class BitStreamTests { diff --git a/src/UglyToad.PdfPig/Filters/CcittFaxCompressionType.cs b/src/UglyToad.PdfPig/Filters/CcittFax/CcittFaxCompressionType.cs similarity index 91% rename from src/UglyToad.PdfPig/Filters/CcittFaxCompressionType.cs rename to src/UglyToad.PdfPig/Filters/CcittFax/CcittFaxCompressionType.cs index f0f372de..85f2ba81 100644 --- a/src/UglyToad.PdfPig/Filters/CcittFaxCompressionType.cs +++ b/src/UglyToad.PdfPig/Filters/CcittFax/CcittFaxCompressionType.cs @@ -1,25 +1,25 @@ -namespace UglyToad.PdfPig.Filters -{ - /// - /// Specifies the compression type to use with . - /// - internal enum CcittFaxCompressionType - { - /// - /// Modified Huffman (MH) - Group 3 variation (T2) - /// - ModifiedHuffman, - /// - /// Modified Huffman (MH) - Group 3 (T4) - /// - Group3_1D, - /// - /// Modified Read (MR) - Group 3 (T4) - /// - Group3_2D, - /// - /// Modified Modified Read (MMR) - Group 4 (T6) - /// - Group4_2D - } -} +namespace UglyToad.PdfPig.Filters.CcittFax +{ + /// + /// Specifies the compression type to use with . + /// + internal enum CcittFaxCompressionType + { + /// + /// Modified Huffman (MH) - Group 3 variation (T2) + /// + ModifiedHuffman, + /// + /// Modified Huffman (MH) - Group 3 (T4) + /// + Group3_1D, + /// + /// Modified Read (MR) - Group 3 (T4) + /// + Group3_2D, + /// + /// Modified Modified Read (MMR) - Group 4 (T6) + /// + Group4_2D + } +} diff --git a/src/UglyToad.PdfPig/Filters/CcittFaxDecoderStream.cs b/src/UglyToad.PdfPig/Filters/CcittFax/CcittFaxDecoderStream.cs similarity index 94% rename from src/UglyToad.PdfPig/Filters/CcittFaxDecoderStream.cs rename to src/UglyToad.PdfPig/Filters/CcittFax/CcittFaxDecoderStream.cs index 12aa8fa9..24a30b6f 100644 --- a/src/UglyToad.PdfPig/Filters/CcittFaxDecoderStream.cs +++ b/src/UglyToad.PdfPig/Filters/CcittFax/CcittFaxDecoderStream.cs @@ -1,773 +1,774 @@ -namespace UglyToad.PdfPig.Filters -{ - using System; - using System.IO; - using IO; - - /// - /// CCITT Modified Huffman RLE, Group 3 (T4) and Group 4 (T6) fax compression. - /// - /// Ported from https://github.com/apache/pdfbox/blob/e644c29279e276bde14ce7a33bdeef0cb1001b3e/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxDecoderStream.java - /// - internal sealed class CcittFaxDecoderStream : StreamWrapper - { - // See TIFF 6.0 Specification, Section 10: "Modified Huffman Compression", page 43. - - private readonly int columns; - private readonly byte[] decodedRow; - - private readonly bool optionByteAligned; - - private readonly CcittFaxCompressionType type; - - private int decodedLength; - private int decodedPos; - - private int[] changesReferenceRow; - private int[] changesCurrentRow; - private int changesReferenceRowCount; - private int changesCurrentRowCount; - - private int lastChangingElement; - - private int buffer = -1; - private int bufferPos = -1; - - /// - /// Creates a CCITTFaxDecoderStream. - /// This constructor may be used for CCITT streams embedded in PDF files, - /// which use EncodedByteAlign. - /// - public CcittFaxDecoderStream(Stream stream, int columns, CcittFaxCompressionType type, bool byteAligned) - : base(stream) - { - this.columns = columns; - this.type = type; - - // We know this is only used for b/w (1 bit) - decodedRow = new byte[(columns + 7) / 8]; - changesReferenceRow = new int[columns + 2]; - changesCurrentRow = new int[columns + 2]; - - optionByteAligned = byteAligned; - } - - private void Fetch() - { - if (decodedPos >= decodedLength) - { - decodedLength = 0; - - try - { - DecodeRow(); - } - catch (InvalidOperationException) - { - if (decodedLength != 0) - { - throw; - } - - // ..otherwise, just let client code try to read past the - // end of stream - decodedLength = -1; - } - - decodedPos = 0; - } - } - - private void Decode1D() - { - var index = 0; - var white = true; - changesCurrentRowCount = 0; - - do - { - var completeRun = white ? DecodeRun(WhiteRunTree) : DecodeRun(BlackRunTree); - index += completeRun; - changesCurrentRow[changesCurrentRowCount++] = index; - - // Flip color for next run - white = !white; - } while (index < columns); - } - - private void Decode2D() - { - changesReferenceRowCount = changesCurrentRowCount; - var tmp = changesCurrentRow; - changesCurrentRow = changesReferenceRow; - changesReferenceRow = tmp; - - var white = true; - var index = 0; - changesCurrentRowCount = 0; - - mode: while (index < columns) - { - var node = CodeTree.Root; - - while (true) - { - node = node.Walk(ReadBit()); - - if (node is null) - { - goto mode; - } - else if (node.IsLeaf) - { - switch (node.Value) - { - case VALUE_HMODE: - var runLength = DecodeRun(white ? WhiteRunTree : BlackRunTree); - index += runLength; - changesCurrentRow[changesCurrentRowCount++] = index; - - runLength = DecodeRun(white ? BlackRunTree : WhiteRunTree); - index += runLength; - changesCurrentRow[changesCurrentRowCount++] = index; - break; - - case VALUE_PASSMODE: - var pChangingElement = GetNextChangingElement(index, white) + 1; - - if (pChangingElement >= changesReferenceRowCount) - { - index = columns; - } - else - { - index = changesReferenceRow[pChangingElement]; - } - - break; - - default: - // Vertical mode (-3 to 3) - var vChangingElement = GetNextChangingElement(index, white); - - if (vChangingElement >= changesReferenceRowCount || vChangingElement == -1) - { - index = columns + node.Value; - } - else - { - index = changesReferenceRow[vChangingElement] + node.Value; - } - - changesCurrentRow[changesCurrentRowCount] = index; - changesCurrentRowCount++; - white = !white; - - break; - } - - goto mode; - } - } - } - } - - private int GetNextChangingElement(int a0, bool white) - { - var start = (int)(lastChangingElement & 0xFFFF_FFFE) + (white ? 0 : 1); - if (start > 2) - { - start -= 2; - } - - if (a0 == 0) - { - return start; - } - - for (var i = start; i < changesReferenceRowCount; i += 2) - { - if (a0 < changesReferenceRow[i]) - { - lastChangingElement = i; - return i; - } - } - - return -1; - } - - private void DecodeRowType2() - { - if (optionByteAligned) - { - ResetBuffer(); - } - - Decode1D(); - } - - private void DecodeRowType4() - { - if (optionByteAligned) - { - ResetBuffer(); - } - - eof: while (true) - { - // read till next EOL code - var node = EolOnlyTree.Root; - - while (true) - { - node = node.Walk(ReadBit()); - - if (node is null) - { - goto eof; - } - - if (node.IsLeaf) - { - goto done; - } - } - } - - done: - if (type == CcittFaxCompressionType.Group3_1D || ReadBit()) - { - Decode1D(); - } - else - { - Decode2D(); - } - } - - private void DecodeRowType6() - { - if (optionByteAligned) - { - ResetBuffer(); - } - - Decode2D(); - } - - private void DecodeRow() - { - switch (type) - { - case CcittFaxCompressionType.ModifiedHuffman: - DecodeRowType2(); - break; - case CcittFaxCompressionType.Group3_1D: - case CcittFaxCompressionType.Group3_2D: - DecodeRowType4(); - break; - case CcittFaxCompressionType.Group4_2D: - DecodeRowType6(); - break; - default: - throw new InvalidOperationException(type + " is not a supported compression type."); - } - - var index = 0; - var white = true; - - lastChangingElement = 0; - for (var i = 0; i <= changesCurrentRowCount; i++) - { - var nextChange = columns; - - if (i != changesCurrentRowCount) - { - nextChange = changesCurrentRow[i]; - } - - if (nextChange > columns) - { - nextChange = columns; - } - - var byteIndex = index / 8; - - while (index % 8 != 0 && (nextChange - index) > 0) - { - decodedRow[byteIndex] |= (byte)(white ? 0 : 1 << (7 - ((index) % 8))); - index++; - } - - if (index % 8 == 0) - { - byteIndex = index / 8; - var value = (byte)(white ? 0x00 : 0xff); - - while ((nextChange - index) > 7) - { - decodedRow[byteIndex] = value; - index += 8; - ++byteIndex; - } - } - - while ((nextChange - index) > 0) - { - if (index % 8 == 0) - { - decodedRow[byteIndex] = 0; - } - - decodedRow[byteIndex] |= (byte)(white ? 0 : 1 << (7 - ((index) % 8))); - index++; - } - - white = !white; - } - - if (index != columns) - { - throw new InvalidOperationException($"Sum of run-lengths does not equal scan line width: {index} > {columns}"); - } - - decodedLength = (index + 7) / 8; - } - - private int DecodeRun(Tree tree) - { - var total = 0; - - var node = tree.Root; - - while (true) - { - var bit = ReadBit(); - node = node.Walk(bit); - - if (node is null) - { - throw new InvalidOperationException("Unknown code in Huffman RLE stream"); - } - - if (node.IsLeaf) - { - total += node.Value; - if (node.Value >= 64) - { - node = tree.Root; - } - else if (node.Value >= 0) - { - return total; - } - else - { - return columns; - } - } - } - } - - private void ResetBuffer() - { - bufferPos = -1; - } - - private bool ReadBit() - { - if (bufferPos < 0 || bufferPos > 7) - { - buffer = Stream.ReadByte(); - - if (buffer == -1) - { - throw new InvalidOperationException("Unexpected end of Huffman RLE stream"); - } - - bufferPos = 0; - } - - var isSet = ((buffer >> (7 - bufferPos)) & 1) == 1; - - bufferPos++; - - if (bufferPos > 7) - { - bufferPos = -1; - } - - return isSet; - } - - public override int ReadByte() - { - if (decodedLength < 0) - { - return 0x0; - } - - if (decodedPos >= decodedLength) - { - Fetch(); - - if (decodedLength < 0) - { - return 0x0; - } - } - - return decodedRow[decodedPos++] & 0xff; - } - - public override int Read(byte[] b, int off, int len) - { - if (decodedLength < 0) - { - b.AsSpan(off, len).Fill(0x0); - return len; - } - - if (decodedPos >= decodedLength) - { - Fetch(); - - if (decodedLength < 0) - { - b.AsSpan(off, len).Fill(0x0); - return len; - } - } - - var read = Math.Min(decodedLength - decodedPos, len); - Array.Copy(decodedRow, decodedPos, b, off, read); - decodedPos += read; - - return read; - } - - private class Node - { - public Node? Left { get; set; } - public Node? Right { get; set; } - - public int Value { get; set; } - - public bool CanBeFill { get; set; } - public bool IsLeaf { get; set; } - - public void Set(bool next, Node node) - { - if (!next) - { - Left = node; - } - else - { - Right = node; - } - } - - public Node Walk(bool next) - { - return next ? Right! : Left!; - } - - public override string ToString() - { - return $"[{nameof(IsLeaf)}={IsLeaf}, {nameof(Value)}={Value}, {nameof(CanBeFill)}={CanBeFill}]"; - } - } - - private class Tree - { - public Node Root { get; } = new Node(); - - public void Fill(int depth, int path, int value) - { - var current = Root; - - for (var i = 0; i < depth; i++) - { - var bitPos = depth - 1 - i; - var isSet = ((path >> bitPos) & 1) == 1; - var next = current.Walk(isSet); - - if (next is null) - { - next = new Node(); - - if (i == depth - 1) - { - next.Value = value; - next.IsLeaf = true; - } - - if (path == 0) - { - next.CanBeFill = true; - } - - current.Set(isSet, next); - } - else if (next.IsLeaf) - { - throw new InvalidOperationException("node is leaf, no other following"); - } - - current = next; - } - } - - public void Fill(int depth, int path, Node node) - { - var current = Root; - - for (var i = 0; i < depth; i++) - { - var bitPos = depth - 1 - i; - var isSet = ((path >> bitPos) & 1) == 1; - var next = current.Walk(isSet); - - if (next is null) - { - if (i == depth - 1) - { - next = node; - } - else - { - next = new Node(); - } - - if (path == 0) - { - next.CanBeFill = true; - } - - current.Set(isSet, next); - } - else if (next.IsLeaf) - { - throw new InvalidOperationException("node is leaf, no other following"); - } - - current = next; - } - } - } - - private static readonly short[][] BLACK_CODES = new short[][] { - new short[]{ // 2 bits - 0x2, 0x3, - }, - new short[]{ // 3 bits - 0x2, 0x3, - }, - new short[]{ // 4 bits - 0x2, 0x3, - }, - new short[]{ // 5 bits - 0x3, - }, - new short[]{ // 6 bits - 0x4, 0x5, - }, - new short[]{ // 7 bits - 0x4, 0x5, 0x7, - }, - new short[]{ // 8 bits - 0x4, 0x7, - }, - new short[]{ // 9 bits - 0x18, - }, - new short[]{ // 10 bits - 0x17, 0x18, 0x37, 0x8, 0xf, - }, - new short[]{ // 11 bits - 0x17, 0x18, 0x28, 0x37, 0x67, 0x68, 0x6c, 0x8, 0xc, 0xd, - }, - new short[]{ // 12 bits - 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f, 0x24, 0x27, 0x28, 0x2b, 0x2c, 0x33, - 0x34, 0x35, 0x37, 0x38, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x64, 0x65, - 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xd2, 0xd3, - 0xd4, 0xd5, 0xd6, 0xd7, 0xda, 0xdb, - }, - new short[]{ // 13 bits - 0x4a, 0x4b, 0x4c, 0x4d, 0x52, 0x53, 0x54, 0x55, 0x5a, 0x5b, 0x64, 0x65, 0x6c, 0x6d, 0x72, 0x73, - 0x74, 0x75, 0x76, 0x77, - } - }; - - private static readonly short[][] BLACK_RUN_LENGTHS = new short[][]{ - new short[]{ // 2 bits - 3, 2, - }, - new short[]{ // 3 bits - 1, 4, - }, - new short[]{ // 4 bits - 6, 5, - }, - new short[]{ // 5 bits - 7, - }, - new short[]{ // 6 bits - 9, 8, - }, - new short[]{ // 7 bits - 10, 11, 12, - }, - new short[]{ // 8 bits - 13, 14, - }, - new short[]{ // 9 bits - 15, - }, - new short[]{ // 10 bits - 16, 17, 0, 18, 64, - }, - new short[]{ // 11 bits - 24, 25, 23, 22, 19, 20, 21, 1792, 1856, 1920, - }, - new short[]{ // 12 bits - 1984, 2048, 2112, 2176, 2240, 2304, 2368, 2432, 2496, 2560, 52, 55, 56, 59, 60, 320, 384, 448, 53, - 54, 50, 51, 44, 45, 46, 47, 57, 58, 61, 256, 48, 49, 62, 63, 30, 31, 32, 33, 40, 41, 128, 192, 26, - 27, 28, 29, 34, 35, 36, 37, 38, 39, 42, 43, - }, - new short[]{ // 13 bits - 640, 704, 768, 832, 1280, 1344, 1408, 1472, 1536, 1600, 1664, 1728, 512, 576, 896, 960, 1024, 1088, - 1152, 1216, - } - }; - - private static readonly short[][] WHITE_CODES = new short[][]{ - new short[]{ // 4 bits - 0x7, 0x8, 0xb, 0xc, 0xe, 0xf, - }, - new short[]{ // 5 bits - 0x12, 0x13, 0x14, 0x1b, 0x7, 0x8, - }, - new short[]{ // 6 bits - 0x17, 0x18, 0x2a, 0x2b, 0x3, 0x34, 0x35, 0x7, 0x8, - }, - new short[]{ // 7 bits - 0x13, 0x17, 0x18, 0x24, 0x27, 0x28, 0x2b, 0x3, 0x37, 0x4, 0x8, 0xc, - }, - new short[]{ // 8 bits - 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x1a, 0x1b, 0x2, 0x24, 0x25, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, - 0x3, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x4, 0x4a, 0x4b, 0x5, 0x52, 0x53, 0x54, 0x55, 0x58, 0x59, - 0x5a, 0x5b, 0x64, 0x65, 0x67, 0x68, 0xa, 0xb, - }, - new short[]{ // 9 bits - 0x98, 0x99, 0x9a, 0x9b, 0xcc, 0xcd, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, - }, - new short[]{ // 10 bits - }, - new short[]{ // 11 bits - 0x8, 0xc, 0xd, - }, - new short[]{ // 12 bits - 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f, - } - }; - - private static readonly short[][] WHITE_RUN_LENGTHS = new short[][]{ - new short[]{ // 4 bits - 2, 3, 4, 5, 6, 7, - }, - new short[]{ // 5 bits - 128, 8, 9, 64, 10, 11, - }, - new short[]{ // 6 bits - 192, 1664, 16, 17, 13, 14, 15, 1, 12, - }, - new short[]{ // 7 bits - 26, 21, 28, 27, 18, 24, 25, 22, 256, 23, 20, 19, - }, - new short[]{ // 8 bits - 33, 34, 35, 36, 37, 38, 31, 32, 29, 53, 54, 39, 40, 41, 42, 43, 44, 30, 61, 62, 63, 0, 320, 384, 45, - 59, 60, 46, 49, 50, 51, 52, 55, 56, 57, 58, 448, 512, 640, 576, 47, 48, - }, - new short[]{ // 9 bits - 1472, 1536, 1600, 1728, 704, 768, 832, 896, 960, 1024, 1088, 1152, 1216, 1280, 1344, 1408, - }, - new short[]{ // 10 bits - }, - new short[]{ // 11 bits - 1792, 1856, 1920, - }, - new short[]{ // 12 bits - 1984, 2048, 2112, 2176, 2240, 2304, 2368, 2432, 2496, 2560, - } - }; - - private static readonly Node EOL; - private static readonly Node FILL; - private static readonly Tree BlackRunTree; - private static readonly Tree WhiteRunTree; - private static readonly Tree EolOnlyTree; - private static readonly Tree CodeTree; - - const int VALUE_EOL = -2000; - const int VALUE_FILL = -1000; - const int VALUE_PASSMODE = -3000; - const int VALUE_HMODE = -4000; - - static CcittFaxDecoderStream() - { - EOL = new Node - { - IsLeaf = true, - Value = VALUE_EOL - }; - FILL = new Node - { - Value = VALUE_FILL - }; - FILL.Left = FILL; - FILL.Right = EOL; - - EolOnlyTree = new Tree(); - EolOnlyTree.Fill(12, 0, FILL); - EolOnlyTree.Fill(12, 1, EOL); - - BlackRunTree = new Tree(); - for (var i = 0; i < BLACK_CODES.Length; i++) - { - for (var j = 0; j < BLACK_CODES[i].Length; j++) - { - BlackRunTree.Fill(i + 2, BLACK_CODES[i][j], BLACK_RUN_LENGTHS[i][j]); - } - } - BlackRunTree.Fill(12, 0, FILL); - BlackRunTree.Fill(12, 1, EOL); - - WhiteRunTree = new Tree(); - - for (var i = 0; i < WHITE_CODES.Length; i++) - { - for (var j = 0; j < WHITE_CODES[i].Length; j++) - { - WhiteRunTree.Fill(i + 4, WHITE_CODES[i][j], WHITE_RUN_LENGTHS[i][j]); - } - } - - WhiteRunTree.Fill(12, 0, FILL); - WhiteRunTree.Fill(12, 1, EOL); - - CodeTree = new Tree(); - CodeTree.Fill(4, 1, VALUE_PASSMODE); // pass mode - CodeTree.Fill(3, 1, VALUE_HMODE); // H mode - CodeTree.Fill(1, 1, 0); // V(0) - CodeTree.Fill(3, 3, 1); // V_R(1) - CodeTree.Fill(6, 3, 2); // V_R(2) - CodeTree.Fill(7, 3, 3); // V_R(3) - CodeTree.Fill(3, 2, -1); // V_L(1) - CodeTree.Fill(6, 2, -2); // V_L(2) - CodeTree.Fill(7, 2, -3); // V_L(3) - } - } +namespace UglyToad.PdfPig.Filters.CcittFax +{ + using System; + using System.IO; + using IO; + + /// + /// CCITT Modified Huffman RLE, Group 3 (T4) and Group 4 (T6) fax compression. + /// + /// Ported from https://github.com/apache/pdfbox/blob/e644c29279e276bde14ce7a33bdeef0cb1001b3e/pdfbox/src/main/java/org/apache/pdfbox/filter/CCITTFaxDecoderStream.java + /// + /// + internal sealed class CcittFaxDecoderStream : StreamWrapper + { + // See TIFF 6.0 Specification, Section 10: "Modified Huffman Compression", page 43. + + private readonly int columns; + private readonly byte[] decodedRow; + + private readonly bool optionByteAligned; + + private readonly CcittFaxCompressionType type; + + private int decodedLength; + private int decodedPos; + + private int[] changesReferenceRow; + private int[] changesCurrentRow; + private int changesReferenceRowCount; + private int changesCurrentRowCount; + + private int lastChangingElement; + + private int buffer = -1; + private int bufferPos = -1; + + /// + /// Creates a CCITTFaxDecoderStream. + /// This constructor may be used for CCITT streams embedded in PDF files, + /// which use EncodedByteAlign. + /// + public CcittFaxDecoderStream(Stream stream, int columns, CcittFaxCompressionType type, bool byteAligned) + : base(stream) + { + this.columns = columns; + this.type = type; + + // We know this is only used for b/w (1 bit) + decodedRow = new byte[(columns + 7) / 8]; + changesReferenceRow = new int[columns + 2]; + changesCurrentRow = new int[columns + 2]; + + optionByteAligned = byteAligned; + } + + private void Fetch() + { + if (decodedPos >= decodedLength) + { + decodedLength = 0; + + try + { + DecodeRow(); + } + catch (InvalidOperationException) + { + if (decodedLength != 0) + { + throw; + } + + // ..otherwise, just let client code try to read past the + // end of stream + decodedLength = -1; + } + + decodedPos = 0; + } + } + + private void Decode1D() + { + var index = 0; + var white = true; + changesCurrentRowCount = 0; + + do + { + var completeRun = white ? DecodeRun(WhiteRunTree) : DecodeRun(BlackRunTree); + index += completeRun; + changesCurrentRow[changesCurrentRowCount++] = index; + + // Flip color for next run + white = !white; + } while (index < columns); + } + + private void Decode2D() + { + changesReferenceRowCount = changesCurrentRowCount; + var tmp = changesCurrentRow; + changesCurrentRow = changesReferenceRow; + changesReferenceRow = tmp; + + var white = true; + var index = 0; + changesCurrentRowCount = 0; + + mode: while (index < columns) + { + var node = CodeTree.Root; + + while (true) + { + node = node.Walk(ReadBit()); + + if (node is null) + { + goto mode; + } + else if (node.IsLeaf) + { + switch (node.Value) + { + case VALUE_HMODE: + var runLength = DecodeRun(white ? WhiteRunTree : BlackRunTree); + index += runLength; + changesCurrentRow[changesCurrentRowCount++] = index; + + runLength = DecodeRun(white ? BlackRunTree : WhiteRunTree); + index += runLength; + changesCurrentRow[changesCurrentRowCount++] = index; + break; + + case VALUE_PASSMODE: + var pChangingElement = GetNextChangingElement(index, white) + 1; + + if (pChangingElement >= changesReferenceRowCount) + { + index = columns; + } + else + { + index = changesReferenceRow[pChangingElement]; + } + + break; + + default: + // Vertical mode (-3 to 3) + var vChangingElement = GetNextChangingElement(index, white); + + if (vChangingElement >= changesReferenceRowCount || vChangingElement == -1) + { + index = columns + node.Value; + } + else + { + index = changesReferenceRow[vChangingElement] + node.Value; + } + + changesCurrentRow[changesCurrentRowCount] = index; + changesCurrentRowCount++; + white = !white; + + break; + } + + goto mode; + } + } + } + } + + private int GetNextChangingElement(int a0, bool white) + { + var start = (int)(lastChangingElement & 0xFFFF_FFFE) + (white ? 0 : 1); + if (start > 2) + { + start -= 2; + } + + if (a0 == 0) + { + return start; + } + + for (var i = start; i < changesReferenceRowCount; i += 2) + { + if (a0 < changesReferenceRow[i]) + { + lastChangingElement = i; + return i; + } + } + + return -1; + } + + private void DecodeRowType2() + { + if (optionByteAligned) + { + ResetBuffer(); + } + + Decode1D(); + } + + private void DecodeRowType4() + { + if (optionByteAligned) + { + ResetBuffer(); + } + + eof: while (true) + { + // read till next EOL code + var node = EolOnlyTree.Root; + + while (true) + { + node = node.Walk(ReadBit()); + + if (node is null) + { + goto eof; + } + + if (node.IsLeaf) + { + goto done; + } + } + } + + done: + if (type == CcittFaxCompressionType.Group3_1D || ReadBit()) + { + Decode1D(); + } + else + { + Decode2D(); + } + } + + private void DecodeRowType6() + { + if (optionByteAligned) + { + ResetBuffer(); + } + + Decode2D(); + } + + private void DecodeRow() + { + switch (type) + { + case CcittFaxCompressionType.ModifiedHuffman: + DecodeRowType2(); + break; + case CcittFaxCompressionType.Group3_1D: + case CcittFaxCompressionType.Group3_2D: + DecodeRowType4(); + break; + case CcittFaxCompressionType.Group4_2D: + DecodeRowType6(); + break; + default: + throw new InvalidOperationException(type + " is not a supported compression type."); + } + + var index = 0; + var white = true; + + lastChangingElement = 0; + for (var i = 0; i <= changesCurrentRowCount; i++) + { + var nextChange = columns; + + if (i != changesCurrentRowCount) + { + nextChange = changesCurrentRow[i]; + } + + if (nextChange > columns) + { + nextChange = columns; + } + + var byteIndex = index / 8; + + while (index % 8 != 0 && nextChange - index > 0) + { + decodedRow[byteIndex] |= (byte)(white ? 0 : 1 << 7 - index % 8); + index++; + } + + if (index % 8 == 0) + { + byteIndex = index / 8; + var value = (byte)(white ? 0x00 : 0xff); + + while (nextChange - index > 7) + { + decodedRow[byteIndex] = value; + index += 8; + ++byteIndex; + } + } + + while (nextChange - index > 0) + { + if (index % 8 == 0) + { + decodedRow[byteIndex] = 0; + } + + decodedRow[byteIndex] |= (byte)(white ? 0 : 1 << 7 - index % 8); + index++; + } + + white = !white; + } + + if (index != columns) + { + throw new InvalidOperationException($"Sum of run-lengths does not equal scan line width: {index} > {columns}"); + } + + decodedLength = (index + 7) / 8; + } + + private int DecodeRun(Tree tree) + { + var total = 0; + + var node = tree.Root; + + while (true) + { + var bit = ReadBit(); + node = node.Walk(bit); + + if (node is null) + { + throw new InvalidOperationException("Unknown code in Huffman RLE stream"); + } + + if (node.IsLeaf) + { + total += node.Value; + if (node.Value >= 64) + { + node = tree.Root; + } + else if (node.Value >= 0) + { + return total; + } + else + { + return columns; + } + } + } + } + + private void ResetBuffer() + { + bufferPos = -1; + } + + private bool ReadBit() + { + if (bufferPos < 0 || bufferPos > 7) + { + buffer = Stream.ReadByte(); + + if (buffer == -1) + { + throw new InvalidOperationException("Unexpected end of Huffman RLE stream"); + } + + bufferPos = 0; + } + + var isSet = (buffer >> 7 - bufferPos & 1) == 1; + + bufferPos++; + + if (bufferPos > 7) + { + bufferPos = -1; + } + + return isSet; + } + + public override int ReadByte() + { + if (decodedLength < 0) + { + return 0x0; + } + + if (decodedPos >= decodedLength) + { + Fetch(); + + if (decodedLength < 0) + { + return 0x0; + } + } + + return decodedRow[decodedPos++] & 0xff; + } + + public override int Read(byte[] b, int off, int len) + { + if (decodedLength < 0) + { + b.AsSpan(off, len).Fill(0x0); + return len; + } + + if (decodedPos >= decodedLength) + { + Fetch(); + + if (decodedLength < 0) + { + b.AsSpan(off, len).Fill(0x0); + return len; + } + } + + var read = Math.Min(decodedLength - decodedPos, len); + Array.Copy(decodedRow, decodedPos, b, off, read); + decodedPos += read; + + return read; + } + + private sealed class Node + { + public Node? Left { get; set; } + public Node? Right { get; set; } + + public int Value { get; set; } + + public bool CanBeFill { get; set; } + public bool IsLeaf { get; set; } + + public void Set(bool next, Node node) + { + if (!next) + { + Left = node; + } + else + { + Right = node; + } + } + + public Node Walk(bool next) + { + return next ? Right! : Left!; + } + + public override string ToString() + { + return $"[{nameof(IsLeaf)}={IsLeaf}, {nameof(Value)}={Value}, {nameof(CanBeFill)}={CanBeFill}]"; + } + } + + private sealed class Tree + { + public Node Root { get; } = new Node(); + + public void Fill(int depth, int path, int value) + { + var current = Root; + + for (var i = 0; i < depth; i++) + { + var bitPos = depth - 1 - i; + var isSet = (path >> bitPos & 1) == 1; + var next = current.Walk(isSet); + + if (next is null) + { + next = new Node(); + + if (i == depth - 1) + { + next.Value = value; + next.IsLeaf = true; + } + + if (path == 0) + { + next.CanBeFill = true; + } + + current.Set(isSet, next); + } + else if (next.IsLeaf) + { + throw new InvalidOperationException("node is leaf, no other following"); + } + + current = next; + } + } + + public void Fill(int depth, int path, Node node) + { + var current = Root; + + for (var i = 0; i < depth; i++) + { + var bitPos = depth - 1 - i; + var isSet = (path >> bitPos & 1) == 1; + var next = current.Walk(isSet); + + if (next is null) + { + if (i == depth - 1) + { + next = node; + } + else + { + next = new Node(); + } + + if (path == 0) + { + next.CanBeFill = true; + } + + current.Set(isSet, next); + } + else if (next.IsLeaf) + { + throw new InvalidOperationException("node is leaf, no other following"); + } + + current = next; + } + } + } + + private static readonly short[][] BLACK_CODES = new short[][] { + new short[]{ // 2 bits + 0x2, 0x3, + }, + new short[]{ // 3 bits + 0x2, 0x3, + }, + new short[]{ // 4 bits + 0x2, 0x3, + }, + new short[]{ // 5 bits + 0x3, + }, + new short[]{ // 6 bits + 0x4, 0x5, + }, + new short[]{ // 7 bits + 0x4, 0x5, 0x7, + }, + new short[]{ // 8 bits + 0x4, 0x7, + }, + new short[]{ // 9 bits + 0x18, + }, + new short[]{ // 10 bits + 0x17, 0x18, 0x37, 0x8, 0xf, + }, + new short[]{ // 11 bits + 0x17, 0x18, 0x28, 0x37, 0x67, 0x68, 0x6c, 0x8, 0xc, 0xd, + }, + new short[]{ // 12 bits + 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f, 0x24, 0x27, 0x28, 0x2b, 0x2c, 0x33, + 0x34, 0x35, 0x37, 0x38, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57, 0x58, 0x59, 0x5a, 0x5b, 0x64, 0x65, + 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c, 0x6d, 0xc8, 0xc9, 0xca, 0xcb, 0xcc, 0xcd, 0xd2, 0xd3, + 0xd4, 0xd5, 0xd6, 0xd7, 0xda, 0xdb, + }, + new short[]{ // 13 bits + 0x4a, 0x4b, 0x4c, 0x4d, 0x52, 0x53, 0x54, 0x55, 0x5a, 0x5b, 0x64, 0x65, 0x6c, 0x6d, 0x72, 0x73, + 0x74, 0x75, 0x76, 0x77, + } + }; + + private static readonly short[][] BLACK_RUN_LENGTHS = new short[][]{ + new short[]{ // 2 bits + 3, 2, + }, + new short[]{ // 3 bits + 1, 4, + }, + new short[]{ // 4 bits + 6, 5, + }, + new short[]{ // 5 bits + 7, + }, + new short[]{ // 6 bits + 9, 8, + }, + new short[]{ // 7 bits + 10, 11, 12, + }, + new short[]{ // 8 bits + 13, 14, + }, + new short[]{ // 9 bits + 15, + }, + new short[]{ // 10 bits + 16, 17, 0, 18, 64, + }, + new short[]{ // 11 bits + 24, 25, 23, 22, 19, 20, 21, 1792, 1856, 1920, + }, + new short[]{ // 12 bits + 1984, 2048, 2112, 2176, 2240, 2304, 2368, 2432, 2496, 2560, 52, 55, 56, 59, 60, 320, 384, 448, 53, + 54, 50, 51, 44, 45, 46, 47, 57, 58, 61, 256, 48, 49, 62, 63, 30, 31, 32, 33, 40, 41, 128, 192, 26, + 27, 28, 29, 34, 35, 36, 37, 38, 39, 42, 43, + }, + new short[]{ // 13 bits + 640, 704, 768, 832, 1280, 1344, 1408, 1472, 1536, 1600, 1664, 1728, 512, 576, 896, 960, 1024, 1088, + 1152, 1216, + } + }; + + private static readonly short[][] WHITE_CODES = new short[][]{ + new short[]{ // 4 bits + 0x7, 0x8, 0xb, 0xc, 0xe, 0xf, + }, + new short[]{ // 5 bits + 0x12, 0x13, 0x14, 0x1b, 0x7, 0x8, + }, + new short[]{ // 6 bits + 0x17, 0x18, 0x2a, 0x2b, 0x3, 0x34, 0x35, 0x7, 0x8, + }, + new short[]{ // 7 bits + 0x13, 0x17, 0x18, 0x24, 0x27, 0x28, 0x2b, 0x3, 0x37, 0x4, 0x8, 0xc, + }, + new short[]{ // 8 bits + 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x1a, 0x1b, 0x2, 0x24, 0x25, 0x28, 0x29, 0x2a, 0x2b, 0x2c, 0x2d, + 0x3, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x4, 0x4a, 0x4b, 0x5, 0x52, 0x53, 0x54, 0x55, 0x58, 0x59, + 0x5a, 0x5b, 0x64, 0x65, 0x67, 0x68, 0xa, 0xb, + }, + new short[]{ // 9 bits + 0x98, 0x99, 0x9a, 0x9b, 0xcc, 0xcd, 0xd2, 0xd3, 0xd4, 0xd5, 0xd6, 0xd7, 0xd8, 0xd9, 0xda, 0xdb, + }, + new short[]{ // 10 bits + }, + new short[]{ // 11 bits + 0x8, 0xc, 0xd, + }, + new short[]{ // 12 bits + 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x1c, 0x1d, 0x1e, 0x1f, + } + }; + + private static readonly short[][] WHITE_RUN_LENGTHS = new short[][]{ + new short[]{ // 4 bits + 2, 3, 4, 5, 6, 7, + }, + new short[]{ // 5 bits + 128, 8, 9, 64, 10, 11, + }, + new short[]{ // 6 bits + 192, 1664, 16, 17, 13, 14, 15, 1, 12, + }, + new short[]{ // 7 bits + 26, 21, 28, 27, 18, 24, 25, 22, 256, 23, 20, 19, + }, + new short[]{ // 8 bits + 33, 34, 35, 36, 37, 38, 31, 32, 29, 53, 54, 39, 40, 41, 42, 43, 44, 30, 61, 62, 63, 0, 320, 384, 45, + 59, 60, 46, 49, 50, 51, 52, 55, 56, 57, 58, 448, 512, 640, 576, 47, 48, + }, + new short[]{ // 9 bits + 1472, 1536, 1600, 1728, 704, 768, 832, 896, 960, 1024, 1088, 1152, 1216, 1280, 1344, 1408, + }, + new short[]{ // 10 bits + }, + new short[]{ // 11 bits + 1792, 1856, 1920, + }, + new short[]{ // 12 bits + 1984, 2048, 2112, 2176, 2240, 2304, 2368, 2432, 2496, 2560, + } + }; + + private static readonly Node EOL; + private static readonly Node FILL; + private static readonly Tree BlackRunTree; + private static readonly Tree WhiteRunTree; + private static readonly Tree EolOnlyTree; + private static readonly Tree CodeTree; + + const int VALUE_EOL = -2000; + const int VALUE_FILL = -1000; + const int VALUE_PASSMODE = -3000; + const int VALUE_HMODE = -4000; + + static CcittFaxDecoderStream() + { + EOL = new Node + { + IsLeaf = true, + Value = VALUE_EOL + }; + FILL = new Node + { + Value = VALUE_FILL + }; + FILL.Left = FILL; + FILL.Right = EOL; + + EolOnlyTree = new Tree(); + EolOnlyTree.Fill(12, 0, FILL); + EolOnlyTree.Fill(12, 1, EOL); + + BlackRunTree = new Tree(); + for (var i = 0; i < BLACK_CODES.Length; i++) + { + for (var j = 0; j < BLACK_CODES[i].Length; j++) + { + BlackRunTree.Fill(i + 2, BLACK_CODES[i][j], BLACK_RUN_LENGTHS[i][j]); + } + } + BlackRunTree.Fill(12, 0, FILL); + BlackRunTree.Fill(12, 1, EOL); + + WhiteRunTree = new Tree(); + + for (var i = 0; i < WHITE_CODES.Length; i++) + { + for (var j = 0; j < WHITE_CODES[i].Length; j++) + { + WhiteRunTree.Fill(i + 4, WHITE_CODES[i][j], WHITE_RUN_LENGTHS[i][j]); + } + } + + WhiteRunTree.Fill(12, 0, FILL); + WhiteRunTree.Fill(12, 1, EOL); + + CodeTree = new Tree(); + CodeTree.Fill(4, 1, VALUE_PASSMODE); // pass mode + CodeTree.Fill(3, 1, VALUE_HMODE); // H mode + CodeTree.Fill(1, 1, 0); // V(0) + CodeTree.Fill(3, 3, 1); // V_R(1) + CodeTree.Fill(6, 3, 2); // V_R(2) + CodeTree.Fill(7, 3, 3); // V_R(3) + CodeTree.Fill(3, 2, -1); // V_L(1) + CodeTree.Fill(6, 2, -2); // V_L(2) + CodeTree.Fill(7, 2, -3); // V_L(3) + } + } } \ No newline at end of file diff --git a/src/UglyToad.PdfPig/Filters/CcittFaxDecodeFilter.cs b/src/UglyToad.PdfPig/Filters/CcittFaxDecodeFilter.cs index f0520ecd..fd19e488 100644 --- a/src/UglyToad.PdfPig/Filters/CcittFaxDecodeFilter.cs +++ b/src/UglyToad.PdfPig/Filters/CcittFaxDecodeFilter.cs @@ -3,6 +3,7 @@ using System; using System.IO; using Tokens; + using CcittFax; using Util; /// diff --git a/src/UglyToad.PdfPig/Filters/BitStream.cs b/src/UglyToad.PdfPig/Filters/Lzw/BitStream.cs similarity index 92% rename from src/UglyToad.PdfPig/Filters/BitStream.cs rename to src/UglyToad.PdfPig/Filters/Lzw/BitStream.cs index f729e570..239c6bbc 100644 --- a/src/UglyToad.PdfPig/Filters/BitStream.cs +++ b/src/UglyToad.PdfPig/Filters/Lzw/BitStream.cs @@ -1,4 +1,4 @@ -namespace UglyToad.PdfPig.Filters +namespace UglyToad.PdfPig.Filters.Lzw { using System; @@ -53,9 +53,9 @@ } // 'And' out the leading bits. - var firstBitOfDataWithinInt = (sizeof(int) * 8) - numberOfBits; + var firstBitOfDataWithinInt = sizeof(int) * 8 - numberOfBits; result &= (int)(0xffffffff >> firstBitOfDataWithinInt); - + currentWithinByteBitOffset = endWithinByteBitOffset; return result; diff --git a/src/UglyToad.PdfPig/Filters/LzwFilter.cs b/src/UglyToad.PdfPig/Filters/LzwFilter.cs index eaaae251..33e83426 100644 --- a/src/UglyToad.PdfPig/Filters/LzwFilter.cs +++ b/src/UglyToad.PdfPig/Filters/LzwFilter.cs @@ -4,6 +4,7 @@ namespace UglyToad.PdfPig.Filters { using System; using System.Collections.Generic; + using Lzw; using Tokens; using Util; diff --git a/src/UglyToad.PdfPig/Filters/RunLengthFilter.cs b/src/UglyToad.PdfPig/Filters/RunLengthFilter.cs index 6e8845d3..8a95399e 100644 --- a/src/UglyToad.PdfPig/Filters/RunLengthFilter.cs +++ b/src/UglyToad.PdfPig/Filters/RunLengthFilter.cs @@ -6,7 +6,7 @@ /// /// - /// The Run Length filterencodes data in a simple byte-oriented format based on run length. + /// The Run Length filter encodes data in a simple byte-oriented format based on run length. /// The encoded data is a sequence of runs, where each run consists of a length byte followed by 1 to 128 bytes of data. /// internal sealed class RunLengthFilter : IFilter