mirror of
https://github.com/UglyToad/PdfPig.git
synced 2025-04-05 20:55:01 +08:00
Merge pull request #527 from fnatzke/Issue350-PdfPageBuilder.CopyFrom()_creates_invalid_PDF
Issue350 pdf page builder.copy from() creates invalid pdf
This commit is contained in:
commit
68c00c9b51
Binary file not shown.
48
src/UglyToad.PdfPig.Tests/Integration/ShowTextEscapeText.cs
Normal file
48
src/UglyToad.PdfPig.Tests/Integration/ShowTextEscapeText.cs
Normal file
@ -0,0 +1,48 @@
|
||||
namespace UglyToad.PdfPig.Tests.Integration
|
||||
{
|
||||
using System.Linq;
|
||||
using UglyToad.PdfPig;
|
||||
using UglyToad.PdfPig.Writer;
|
||||
using Xunit;
|
||||
|
||||
public class ShowTextEscapeText
|
||||
{
|
||||
private static string GetFilename()
|
||||
{
|
||||
// On the single page of the source PDF has 3 ShowText operations with unbalanced round brackets in the text.
|
||||
// Unbalanced meaning there is an open bracket without a close bracket or close without open.
|
||||
// 1. line 387 (\() Tj
|
||||
// 2. line 397 (\)) Tj
|
||||
// 3. line 384 ( \(I\222ll try to stay on ) Tj
|
||||
// note in text 3 the 0222 (octal) or (0x92 hex) is similar to an apostople ' so text is similar to " (I'll try to stay on" (with an open bracket).
|
||||
return IntegrationHelpers.GetDocumentPath("ShowTextOpWithUnbalancedRoundBrackets.pdf");
|
||||
}
|
||||
|
||||
[Fact]
|
||||
public void PdfCopyShowTextOpUsesEscapedText()
|
||||
{
|
||||
var filePath = GetFilename();
|
||||
using (var sourceDocument = PdfDocument.Open(filePath))
|
||||
{
|
||||
PdfDocumentBuilder pdfBuilder = new PdfDocumentBuilder();
|
||||
var numberOfPages = sourceDocument.NumberOfPages;
|
||||
int pageNumber = 1; ////for (int pageNumber = 1; pageNumber <= numberOfPages; pageNumber++)
|
||||
{
|
||||
var sourcePage = sourceDocument.GetPage(pageNumber);
|
||||
|
||||
pdfBuilder.AddPage(sourcePage.Width, sourcePage.Height).CopyFrom(sourcePage);
|
||||
}
|
||||
var pdfBytes = pdfBuilder.Build();
|
||||
|
||||
// Reread (in memory) copied PDF and check example text ("wander") exists in word extract after ShowText operation with unbalanced bracket.
|
||||
using (var document = PdfDocument.Open(pdfBytes))
|
||||
{
|
||||
var page = document.GetPage(1);
|
||||
var words = page.GetWords();
|
||||
var isExpectedTextInCopiedPdf = words.Any(w => w.Text.Contains("wander"));
|
||||
Assert.True(isExpectedTextInCopiedPdf);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
@ -67,16 +67,40 @@
|
||||
operationContext.ShowText(input);
|
||||
}
|
||||
|
||||
string EscapeText(string text)
|
||||
{
|
||||
if (text is null) return null;
|
||||
// Fix Issue 350 from PDF Spec 1.7 (page 408) on handling 'special characters' of '(', ')' and '\'.
|
||||
|
||||
// The strings must conform to the syntax for string objects.
|
||||
// When a string is written by enclosing the data in parentheses,
|
||||
// bytes whose values are the same as those
|
||||
// of the ASCII characters left parenthesis (40), right parenthesis (41), and backslash (92)
|
||||
// must be preceded by a backslash character.
|
||||
// All other byte values between 0 and 255 may be used in a string object.
|
||||
// These rules apply to each individual byte in a string object, whether the string is interpreted by the text-showing operators
|
||||
// as single-byte or multiple-byte character codes.
|
||||
|
||||
// Note: order of replacing is important. Replace slash first before brackets.
|
||||
text = text.Replace(@"\", @"\\)"); // Escape any slash '\' -> '\\'
|
||||
text = text.Replace("(", @"\("); // Escape any open brackets '(' -> '\('
|
||||
text = text.Replace(")", @"\)"); // Escape any close brackets ')' -> '\)'
|
||||
|
||||
return text;
|
||||
}
|
||||
|
||||
/// <inheritdoc />
|
||||
public void Write(Stream stream)
|
||||
{
|
||||
|
||||
if (Bytes != null)
|
||||
{
|
||||
stream.WriteHex(Bytes);
|
||||
}
|
||||
else
|
||||
{
|
||||
stream.WriteText($"({Text})");
|
||||
var EscapedText = EscapeText(Text); // escape '(', ')' or '\'
|
||||
stream.WriteText($"({EscapedText})");
|
||||
}
|
||||
|
||||
stream.WriteWhiteSpace();
|
||||
|
Loading…
Reference in New Issue
Block a user