Add JPX bits per component decoding
Some checks failed
Build and test / build (push) Has been cancelled
Run Integration Tests / build (push) Has been cancelled

This commit is contained in:
BobLd 2025-02-08 10:56:33 +00:00
parent fdb8835b37
commit 1660c734e2
28 changed files with 242 additions and 11 deletions

View File

@ -0,0 +1,38 @@
namespace UglyToad.PdfPig.Tests.Images
{
using PdfPig.Images;
using System;
public class Jpeg2000HelperTests
{
private static readonly Lazy<string> DocumentFolder = new Lazy<string>(() => Path.GetFullPath(Path.Combine(AppDomain.CurrentDomain.BaseDirectory, "..", "..", "..", "Images", "Files", "Jpx")));
public static IEnumerable<object[]> GetAllDocuments
{
get
{
return Directory.GetFiles(DocumentFolder.Value, "*.jp2").Select(x => new object[] { Path.GetFileName(x) });
}
}
[Fact]
public void GetJp2BitsPerComponent_ThrowsException_WhenInputIsTooShort()
{
Assert.Throws<InvalidOperationException>(() => Jpeg2000Helper.GetJp2BitsPerComponent(new byte[11]));
}
[Fact]
public void GetJp2BitsPerComponent_ThrowsException_WhenSignatureBoxIsInvalid()
{
Assert.Throws<InvalidOperationException>(() => Jpeg2000Helper.GetJp2BitsPerComponent(new byte[12]));
}
[Theory]
[MemberData(nameof(GetAllDocuments))]
public void GetJp2BitsPerComponent_ReturnsCorrectBitsPerComponent_WhenValidInput(string path)
{
byte[] image = File.ReadAllBytes(Path.Combine(DocumentFolder.Value, path));
Assert.Equal(8, Jpeg2000Helper.GetJp2BitsPerComponent(image));
}
}
}

View File

@ -132,6 +132,78 @@
<None Update="Dla\Documents\Random 2 Columns Lists Hyph - Justified.pdf">
<CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
</None>
<None Update="Images\Files\Jpx\1310ba77-4dbf-4d8d-a8f4-5ba59d1221a7.jp2">
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
</None>
<None Update="Images\Files\Jpx\2cee5bb6-f845-4ac1-8156-a899075c0b46.jp2">
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
</None>
<None Update="Images\Files\Jpx\33fb977a-e3da-48da-ad51-89af637ab736.jp2">
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
</None>
<None Update="Images\Files\Jpx\371028e4-aea3-4e1d-b76b-47b763922e2f.jp2">
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
</None>
<None Update="Images\Files\Jpx\804344c3-2c63-4e9c-b7c2-8c64a14d885b.jp2">
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
</None>
<None Update="Images\Files\Jpx\82811cfb-9a70-475d-8338-f20df0acd052.jp2">
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
</None>
<None Update="Images\Files\Jpx\82828826-f624-4f22-8421-f8c4adac43a3.jp2">
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
</None>
<None Update="Images\Files\Jpx\8e95baf6-874e-431c-9cbc-d735ccabac0c.jp2">
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
</None>
<None Update="Images\Files\Jpx\91a217c9-79bb-4a4b-934b-1362344f6b89.jp2">
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
</None>
<None Update="Images\Files\Jpx\99a9ea0e-c407-4336-96a0-85023f46c231.jp2">
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
</None>
<None Update="Images\Files\Jpx\9ac01df9-6623-4d14-89fd-e9934d1a6c7e.jp2">
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
</None>
<None Update="Images\Files\Jpx\9d5c783a-c001-40e9-91b9-630c71804a77.jp2">
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
</None>
<None Update="Images\Files\Jpx\9df64d7b-4003-4d0d-8f68-b5f88de781b7.jp2">
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
</None>
<None Update="Images\Files\Jpx\a6105bfd-3ace-4d6b-b2dc-f9ce4022832b.jp2">
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
</None>
<None Update="Images\Files\Jpx\b650c344-bc4d-427a-94af-cfed04136f67.jp2">
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
</None>
<None Update="Images\Files\Jpx\c390b9c7-a562-42bf-a592-7a7b29819a6a.jp2">
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
</None>
<None Update="Images\Files\Jpx\d6b4b35c-0ceb-47fe-aba8-4360acb49fcb.jp2">
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
</None>
<None Update="Images\Files\Jpx\deff000e-a14a-40fd-bf39-88ce11745260.jp2">
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
</None>
<None Update="Images\Files\Jpx\e29266a2-201a-4ad6-9725-ca1b7c22224d.jp2">
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
</None>
<None Update="Images\Files\Jpx\eae2cabb-f520-4be5-932f-fb19fce5b2f2.jp2">
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
</None>
<None Update="Images\Files\Jpx\eb62f062-6567-48b2-b04d-6d90de120f07.jp2">
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
</None>
<None Update="Images\Files\Jpx\ed5c585f-590e-4585-9ce7-25976f589ca8.jp2">
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
</None>
<None Update="Images\Files\Jpx\ef0af08b-04d1-4a3e-a9d8-7916b9826f5d.jp2">
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
</None>
<None Update="Images\Files\Jpx\fd42e6a0-5c7a-4eb2-b0e3-474cfde067a6.jp2">
<CopyToOutputDirectory>Never</CopyToOutputDirectory>
</None>
</ItemGroup>
<ItemGroup>
<Using Include="Xunit" />

View File

@ -0,0 +1,103 @@
namespace UglyToad.PdfPig.Images
{
using System;
using System.Buffers.Binary;
internal static class Jpeg2000Helper
{
/// <summary>
/// Get bits per component values for Jp2 (Jpx) encoded images (first component).
/// </summary>
public static byte GetJp2BitsPerComponent(ReadOnlySpan<byte> jp2Bytes)
{
// Ensure the input has at least 12 bytes for the signature box
if (jp2Bytes.Length < 12)
{
throw new InvalidOperationException("Input is too short to be a valid JP2 file.");
}
// Verify the JP2 signature box
uint length = BinaryPrimitives.ReadUInt32BigEndian(jp2Bytes.Slice(0, 4));
uint type = BinaryPrimitives.ReadUInt32BigEndian(jp2Bytes.Slice(4, 4));
uint magic = BinaryPrimitives.ReadUInt32BigEndian(jp2Bytes.Slice(8, 4));
if (length != 0x0000000C || type != 0x6A502020 || magic != 0x0D0A870A)
{
throw new InvalidOperationException("Invalid JP2 signature box.");
}
// Proceed to parse JP2 boxes
return ParseBoxes(jp2Bytes.Slice(12));
}
private static byte ParseBoxes(ReadOnlySpan<byte> jp2Bytes)
{
int offset = 0;
while (offset < jp2Bytes.Length)
{
if (offset + 8 > jp2Bytes.Length)
{
throw new InvalidOperationException("Invalid JP2 box structure.");
}
// Read box length and type
uint boxLength = BinaryPrimitives.ReadUInt32BigEndian(jp2Bytes.Slice(offset, 4));
uint boxType = BinaryPrimitives.ReadUInt32BigEndian(jp2Bytes.Slice(offset + 4, 4));
// Check for the contiguous codestream box ('jp2c')
if (boxType == 0x6A703263) // 'jp2c' in ASCII
{
// Parse the codestream to find the SIZ marker
return ParseCodestream(jp2Bytes.Slice(offset + 8));
}
// Move to the next box
offset += (int)(boxLength > 0 ? boxLength : 8); // Box length of 0 means the rest of the file
}
throw new InvalidOperationException("Codestream box not found in JP2 file.");
}
private static byte ParseCodestream(ReadOnlySpan<byte> codestream)
{
int offset = 0;
while (offset + 2 <= codestream.Length)
{
// Read marker (2 bytes)
ushort marker = BinaryPrimitives.ReadUInt16BigEndian(codestream.Slice(offset, 2));
// Check for SIZ marker (0xFF51)
if (marker == 0xFF51)
{
if (offset + 38 > codestream.Length)
{
throw new InvalidOperationException("Invalid SIZ marker structure.");
}
// Skip marker length (2 bytes), capabilities (4 bytes), and reference grid size (8 bytes)
// Skip image offset (8 bytes), tile size (8 bytes), and tile offset (8 bytes)
offset += 38;
// Read number of components (2 bytes)
ushort numComponents = BinaryPrimitives.ReadUInt16BigEndian(codestream.Slice(offset, 2));
offset += 2;
if (numComponents < 1)
{
throw new InvalidOperationException("Invalid number of components in SIZ marker.");
}
// Read bits per component for the first component (1 byte per component)
byte bitsPerComponent = codestream[offset];
// Bits per component is stored as (bits - 1)
return ++bitsPerComponent;
}
// Move to the next marker
offset += 2;
}
throw new InvalidOperationException("SIZ marker not found in JPEG2000 codestream.");
}
}
}

View File

@ -8,6 +8,7 @@
using Graphics;
using Graphics.Colors;
using Graphics.Core;
using Images;
using Tokenization.Scanner;
using Tokens;
using Util;
@ -52,20 +53,37 @@
var isJpxDecode = dictionary.TryGet(NameToken.Filter, pdfScanner, out NameToken filterName)
&& filterName.Equals(NameToken.JpxDecode);
int bitsPerComponent = 0;
if (!isImageMask && !isJpxDecode)
{
if (!dictionary.TryGet(NameToken.BitsPerComponent, pdfScanner, out NumericToken? bitsPerComponentToken))
{
throw new PdfDocumentFormatException($"No bits per component defined for image: {dictionary}.");
}
bitsPerComponent = bitsPerComponentToken.Int;
}
else if (isImageMask)
int bitsPerComponent;
if (isImageMask)
{
bitsPerComponent = 1;
}
else
{
if (isJpxDecode)
{
// Optional for JPX
if (dictionary.TryGet(NameToken.BitsPerComponent, pdfScanner, out NumericToken? bitsPerComponentToken))
{
bitsPerComponent = bitsPerComponentToken.Int;
System.Diagnostics.Debug.Assert(bitsPerComponent == Jpeg2000Helper.GetJp2BitsPerComponent(xObject.Stream.Data.Span));
}
else
{
bitsPerComponent = Jpeg2000Helper.GetJp2BitsPerComponent(xObject.Stream.Data.Span);
System.Diagnostics.Debug.Assert(new int[] { 1, 2, 4, 8, 16 }.Contains(bitsPerComponent));
}
}
else
{
if (!dictionary.TryGet(NameToken.BitsPerComponent, pdfScanner, out NumericToken? bitsPerComponentToken))
{
throw new PdfDocumentFormatException($"No bits per component defined for image: {dictionary}.");
}
bitsPerComponent = bitsPerComponentToken.Int;
}
}
var intent = xObject.DefaultRenderingIntent;
if (dictionary.TryGet(NameToken.Intent, pdfScanner, out NameToken renderingIntentToken))