From 8ca539942dd87e0b726c647988e5b2bbab20f0ee Mon Sep 17 00:00:00 2001
From: madelson <1269046+madelson@users.noreply.github.com>
Date: Wed, 13 Nov 2024 14:47:50 -0500
Subject: [PATCH] Optimize cross reference object offset validation by avoiding
nested loop
* Optimize cross reference object offset validation by avoiding nested loops
* Address https://github.com/UglyToad/PdfPig/pull/935#discussion_r1839585652
---
src/UglyToad.PdfPig.Core/IndirectReference.cs | 16 ++++++++--------
.../CrossReference/CrossReferenceTable.cs | 2 +-
.../CrossReferenceObjectOffsetValidator.cs | 13 +++++++------
3 files changed, 16 insertions(+), 15 deletions(-)
diff --git a/src/UglyToad.PdfPig.Core/IndirectReference.cs b/src/UglyToad.PdfPig.Core/IndirectReference.cs
index c742280d..f7f38b59 100644
--- a/src/UglyToad.PdfPig.Core/IndirectReference.cs
+++ b/src/UglyToad.PdfPig.Core/IndirectReference.cs
@@ -6,7 +6,7 @@
///
/// Used to uniquely identify and refer to objects in the PDF file.
///
- public readonly struct IndirectReference
+ public readonly struct IndirectReference : IEquatable
{
///
/// A positive integer object number.
@@ -30,16 +30,16 @@
Generation = generation;
}
+ ///
+ public bool Equals(IndirectReference other)
+ {
+ return other.ObjectNumber == ObjectNumber && other.Generation == Generation;
+ }
+
///
public override bool Equals(object obj)
{
- if (obj is IndirectReference reference)
- {
- return reference.ObjectNumber == ObjectNumber
- && reference.Generation == Generation;
- }
-
- return false;
+ return obj is IndirectReference other && Equals(other);
}
///
diff --git a/src/UglyToad.PdfPig/CrossReference/CrossReferenceTable.cs b/src/UglyToad.PdfPig/CrossReference/CrossReferenceTable.cs
index fe404ea2..91be7106 100644
--- a/src/UglyToad.PdfPig/CrossReference/CrossReferenceTable.cs
+++ b/src/UglyToad.PdfPig/CrossReference/CrossReferenceTable.cs
@@ -49,7 +49,7 @@
Trailer = trailer ?? throw new ArgumentNullException(nameof(trailer));
CrossReferenceOffsets = crossReferenceOffsets ?? throw new ArgumentNullException(nameof(crossReferenceOffsets));
- var result = new Dictionary();
+ var result = new Dictionary(capacity: objectOffsets.Count);
foreach (var objectOffset in objectOffsets)
{
result[objectOffset.Key] = objectOffset.Value;
diff --git a/src/UglyToad.PdfPig/Parser/FileStructure/CrossReferenceObjectOffsetValidator.cs b/src/UglyToad.PdfPig/Parser/FileStructure/CrossReferenceObjectOffsetValidator.cs
index 54eea31d..f2449b39 100644
--- a/src/UglyToad.PdfPig/Parser/FileStructure/CrossReferenceObjectOffsetValidator.cs
+++ b/src/UglyToad.PdfPig/Parser/FileStructure/CrossReferenceObjectOffsetValidator.cs
@@ -24,11 +24,12 @@
return true;
}
- var builderOffsets = new Dictionary();
-
var bruteForceOffsets = BruteForceSearcher.GetObjectLocations(bytes);
if (bruteForceOffsets.Count > 0)
{
+ // Pre-allocate capacity for at least the bruteForceOffsets, since we'll be adding all of them
+ var builderOffsets = new Dictionary(bruteForceOffsets.Count);
+
// find all object streams
foreach (var entry in crossReferenceTable.ObjectOffsets)
{
@@ -39,11 +40,11 @@
// TODO: more validation of streams.
builderOffsets[entry.Key] = entry.Value;
}
+ }
- foreach (var item in bruteForceOffsets)
- {
- builderOffsets[item.Key] = item.Value;
- }
+ foreach (var item in bruteForceOffsets)
+ {
+ builderOffsets[item.Key] = item.Value;
}
actualOffsets = builderOffsets;