From 8ca539942dd87e0b726c647988e5b2bbab20f0ee Mon Sep 17 00:00:00 2001 From: madelson <1269046+madelson@users.noreply.github.com> Date: Wed, 13 Nov 2024 14:47:50 -0500 Subject: [PATCH] Optimize cross reference object offset validation by avoiding nested loop * Optimize cross reference object offset validation by avoiding nested loops * Address https://github.com/UglyToad/PdfPig/pull/935#discussion_r1839585652 --- src/UglyToad.PdfPig.Core/IndirectReference.cs | 16 ++++++++-------- .../CrossReference/CrossReferenceTable.cs | 2 +- .../CrossReferenceObjectOffsetValidator.cs | 13 +++++++------ 3 files changed, 16 insertions(+), 15 deletions(-) diff --git a/src/UglyToad.PdfPig.Core/IndirectReference.cs b/src/UglyToad.PdfPig.Core/IndirectReference.cs index c742280d..f7f38b59 100644 --- a/src/UglyToad.PdfPig.Core/IndirectReference.cs +++ b/src/UglyToad.PdfPig.Core/IndirectReference.cs @@ -6,7 +6,7 @@ /// /// Used to uniquely identify and refer to objects in the PDF file. /// - public readonly struct IndirectReference + public readonly struct IndirectReference : IEquatable { /// /// A positive integer object number. @@ -30,16 +30,16 @@ Generation = generation; } + /// + public bool Equals(IndirectReference other) + { + return other.ObjectNumber == ObjectNumber && other.Generation == Generation; + } + /// public override bool Equals(object obj) { - if (obj is IndirectReference reference) - { - return reference.ObjectNumber == ObjectNumber - && reference.Generation == Generation; - } - - return false; + return obj is IndirectReference other && Equals(other); } /// diff --git a/src/UglyToad.PdfPig/CrossReference/CrossReferenceTable.cs b/src/UglyToad.PdfPig/CrossReference/CrossReferenceTable.cs index fe404ea2..91be7106 100644 --- a/src/UglyToad.PdfPig/CrossReference/CrossReferenceTable.cs +++ b/src/UglyToad.PdfPig/CrossReference/CrossReferenceTable.cs @@ -49,7 +49,7 @@ Trailer = trailer ?? throw new ArgumentNullException(nameof(trailer)); CrossReferenceOffsets = crossReferenceOffsets ?? throw new ArgumentNullException(nameof(crossReferenceOffsets)); - var result = new Dictionary(); + var result = new Dictionary(capacity: objectOffsets.Count); foreach (var objectOffset in objectOffsets) { result[objectOffset.Key] = objectOffset.Value; diff --git a/src/UglyToad.PdfPig/Parser/FileStructure/CrossReferenceObjectOffsetValidator.cs b/src/UglyToad.PdfPig/Parser/FileStructure/CrossReferenceObjectOffsetValidator.cs index 54eea31d..f2449b39 100644 --- a/src/UglyToad.PdfPig/Parser/FileStructure/CrossReferenceObjectOffsetValidator.cs +++ b/src/UglyToad.PdfPig/Parser/FileStructure/CrossReferenceObjectOffsetValidator.cs @@ -24,11 +24,12 @@ return true; } - var builderOffsets = new Dictionary(); - var bruteForceOffsets = BruteForceSearcher.GetObjectLocations(bytes); if (bruteForceOffsets.Count > 0) { + // Pre-allocate capacity for at least the bruteForceOffsets, since we'll be adding all of them + var builderOffsets = new Dictionary(bruteForceOffsets.Count); + // find all object streams foreach (var entry in crossReferenceTable.ObjectOffsets) { @@ -39,11 +40,11 @@ // TODO: more validation of streams. builderOffsets[entry.Key] = entry.Value; } + } - foreach (var item in bruteForceOffsets) - { - builderOffsets[item.Key] = item.Value; - } + foreach (var item in bruteForceOffsets) + { + builderOffsets[item.Key] = item.Value; } actualOffsets = builderOffsets;