Browse Source

FOP-2920: Surrogate pair edge-case causes Exception by Dave Roxburgh

tags/2_9
Simon Steiner 10 months ago
parent
commit
ae5f6dd6c3

+ 12
- 9
fop-core/src/main/java/org/apache/fop/pdf/PDFFactory.java View File

@@ -46,6 +46,7 @@ import org.apache.commons.logging.LogFactory;
import org.apache.xmlgraphics.java2d.color.NamedColorSpace;
import org.apache.xmlgraphics.xmp.Metadata;

import org.apache.fop.events.EventBroadcaster;
import org.apache.fop.fonts.CIDFont;
import org.apache.fop.fonts.CodePointMapping;
import org.apache.fop.fonts.CustomFont;
@@ -930,11 +931,12 @@ public class PDFFactory {
* @param encoding character encoding scheme used by the font
* @param metrics additional information about the font
* @param descriptor additional information about the font
* @param eventBroadcaster Event broadcaster.
* @return the created /Font object
*/
public PDFFont makeFont(String fontname, String basefont,
String encoding, FontMetrics metrics,
FontDescriptor descriptor) {
FontDescriptor descriptor, EventBroadcaster eventBroadcaster) {
PDFFont preRegisteredfont = getDocument().findFont(fontname);
if (preRegisteredfont != null) {
return preRegisteredfont;
@@ -955,7 +957,7 @@ public class PDFFactory {
Typeface tf = (Typeface)metrics;
mapping = CodePointMapping.getMapping(tf.getEncodingName());
}
generateToUnicodeCmap(font, mapping);
generateToUnicodeCmap(font, mapping, eventBroadcaster);
}
return font;
} else {
@@ -1005,7 +1007,8 @@ public class PDFFactory {
}
} else {
cmap = new PDFToUnicodeCMap(cidMetrics.getCIDSet().getChars(), "fop-ucs-H",
new PDFCIDSystemInfo("Adobe", "Identity", 0), false);
new PDFCIDSystemInfo("Adobe", "Identity", 0), false,
eventBroadcaster);
}
getDocument().registerObject(cmap);
assert font instanceof PDFFontType0;
@@ -1075,7 +1078,7 @@ public class PDFFactory {
if (singleByteFont.isSymbolicFont()) {
//no encoding, use the font's encoding
if (forceToUnicode) {
generateToUnicodeCmap(nonBase14, mapping);
generateToUnicodeCmap(nonBase14, mapping, eventBroadcaster);
}
} else if (PDFEncoding.isPredefinedEncoding(mapping.getName())) {
font.setEncoding(mapping.getName());
@@ -1105,7 +1108,7 @@ public class PDFFactory {
pdfEncoding.setDifferences(differences);
font.setEncoding(pdfEncoding);
if (mapping.getUnicodeCharMap() != null) {
generateToUnicodeCmap(nonBase14, mapping);
generateToUnicodeCmap(nonBase14, mapping, eventBroadcaster);
}
}
} else {
@@ -1117,7 +1120,7 @@ public class PDFFactory {
font.setEncoding((String)pdfEncoding);
}
if (forceToUnicode) {
generateToUnicodeCmap(nonBase14, mapping);
generateToUnicodeCmap(nonBase14, mapping, eventBroadcaster);
}
}

@@ -1140,7 +1143,7 @@ public class PDFFactory {
getDocument().registerObject(addFont);
getDocument().getResources().addFont(addFont);
if (forceToUnicode) {
generateToUnicodeCmap(addFont, addEncoding);
generateToUnicodeCmap(addFont, addEncoding, eventBroadcaster);
}
}
}
@@ -1228,10 +1231,10 @@ public class PDFFactory {
return additionalEncodings;
}

private void generateToUnicodeCmap(PDFFont font, SingleByteEncoding encoding) {
private void generateToUnicodeCmap(PDFFont font, SingleByteEncoding encoding, EventBroadcaster eventBroadcaster) {
PDFCMap cmap = new PDFToUnicodeCMap(encoding.getUnicodeCharMap(),
"fop-ucs-H",
new PDFCIDSystemInfo("Adobe", "Identity", 0), true);
new PDFCIDSystemInfo("Adobe", "Identity", 0), true, eventBroadcaster);
getDocument().registerObject(cmap);
font.setToUnicode(cmap);
}

+ 4
- 2
fop-core/src/main/java/org/apache/fop/pdf/PDFResources.java View File

@@ -28,6 +28,7 @@ import java.util.Set;

import org.apache.xmlgraphics.java2d.color.profile.ColorProfileUtil;

import org.apache.fop.events.EventBroadcaster;
import org.apache.fop.fonts.FontDescriptor;
import org.apache.fop.fonts.FontInfo;
import org.apache.fop.fonts.Typeface;
@@ -116,8 +117,9 @@ public class PDFResources extends PDFDictionary {
*
* @param doc PDF document to add fonts to
* @param fontInfo font info object to get font information from
* @param eventBroadcaster Event broadcaster.
*/
public void addFonts(PDFDocument doc, FontInfo fontInfo) {
public void addFonts(PDFDocument doc, FontInfo fontInfo, EventBroadcaster eventBroadcaster) {
Map<String, Typeface> usedFonts = fontInfo.getUsedFonts();
for (Map.Entry<String, Typeface> e : usedFonts.entrySet()) {
String f = e.getKey();
@@ -135,7 +137,7 @@ public class PDFResources extends PDFDictionary {
encoding = null; //Symbolic fonts shouldn't specify an encoding value in PDF
}
addFont(doc.getFactory().makeFont(
f, font.getEmbedFontName(), encoding, font, desc));
f, font.getEmbedFontName(), encoding, font, desc, eventBroadcaster));
}
}
}

+ 147
- 41
fop-core/src/main/java/org/apache/fop/pdf/PDFToUnicodeCMap.java View File

@@ -22,6 +22,11 @@ package org.apache.fop.pdf;
import java.io.IOException;
import java.io.Writer;

import static java.lang.Character.isHighSurrogate;

import org.apache.fop.events.EventBroadcaster;
import org.apache.fop.render.pdf.PDFEventProducer;

/**
* Class representing ToUnicode CMaps.
* Here are some documentation resources:
@@ -45,6 +50,8 @@ public class PDFToUnicodeCMap extends PDFCMap {

private boolean singleByte;

private EventBroadcaster eventBroadcaster;

/**
* Constructor.
*
@@ -54,9 +61,10 @@ public class PDFToUnicodeCMap extends PDFCMap {
* Reference, Second Edition.
* @param sysInfo The attributes of the character collection of the CIDFont.
* @param singleByte true for single-byte, false for double-byte
* @param eventBroadcaster Event broadcaster. May be null.
*/
public PDFToUnicodeCMap(char[] unicodeCharMap, String name, PDFCIDSystemInfo sysInfo,
boolean singleByte) {
boolean singleByte, EventBroadcaster eventBroadcaster) {
super(name, sysInfo);
if (singleByte && unicodeCharMap.length > 256) {
throw new IllegalArgumentException("unicodeCharMap may not contain more than"
@@ -64,6 +72,7 @@ public class PDFToUnicodeCMap extends PDFCMap {
}
this.unicodeCharMap = unicodeCharMap;
this.singleByte = singleByte;
this.eventBroadcaster = eventBroadcaster;
}

/** {@inheritDoc} */
@@ -109,39 +118,58 @@ public class PDFToUnicodeCMap extends PDFCMap {
*/
protected void writeBFCharEntries(char[] charArray) throws IOException {
int totalEntries = 0;
for (int i = 0; i < charArray.length; i++) {
if (!partOfRange(charArray, i)) {
totalEntries++;
}
int charIndex = 0;
if (charArray.length > 0) {
do {
if (!partOfRange(charArray, charIndex)) {
totalEntries++;
}
if (isHighSurrogate(charArray[charIndex])) {
charIndex++;
}
} while (++charIndex < charArray.length);
}
if (totalEntries < 1) {
return;
}
int remainingEntries = totalEntries;
int charIndex = 0;
charIndex = 0;
do {
/* Limited to 100 entries in each section */
int entriesThisSection = Math.min(remainingEntries, 100);
writer.write(entriesThisSection + " beginbfchar\n");
for (int i = 0; i < entriesThisSection; i++) {
int sectionEntryCount = 0;
do {
/* Go to the next char not in a range */
while (partOfRange(charArray, charIndex)) {
if (isHighSurrogate(charArray[charIndex])) {
charIndex++;
}
charIndex++;
}

writer.write("<" + padCharIndex(charIndex) + "> ");

if (Character.codePointAt(charArray, charIndex) > 0xFFFF) {
// Handle UTF-16 surrogate pairs
String pairs = Integer.toHexString(charArray[charIndex])
+ Integer.toHexString(charArray[++charIndex]);
writer.write("<" + pairs + ">\n");
i++;
if (isHighSurrogate(charArray[charIndex])) {
char secondChar = 0; // Invalid low surrogate (valid: 0xDC00 - 0xDFFF)
if (charIndex + 1 < charArray.length) {
secondChar = charArray[charIndex + 1];
} else {
if (eventBroadcaster != null) {
PDFEventProducer pdfEventProducer = PDFEventProducer.Provider.get(eventBroadcaster);
pdfEventProducer.unpairedSurrogate(this);
}
}
writer.write("<" + padHexString(Integer.toHexString(charArray[charIndex]), 4)
+ padHexString(Integer.toHexString(secondChar), 4) + ">\n");
charIndex++;
} else {
writer.write("<" + padHexString(Integer.toHexString(charArray[charIndex]), 4)
+ ">\n");
}
charIndex++;
}
} while (++sectionEntryCount < entriesThisSection);

remainingEntries -= entriesThisSection;
writer.write("endbfchar\n");
} while (remainingEntries > 0);
@@ -158,33 +186,58 @@ public class PDFToUnicodeCMap extends PDFCMap {
*/
protected void writeBFRangeEntries(char[] charArray) throws IOException {
int totalEntries = 0;
for (int i = 0; i < charArray.length; i++) {
if (startOfRange(charArray, i)) {
totalEntries++;
}
int charIndex = 0;
if (charArray.length > 0) {
do {
if (startOfRange(charArray, charIndex)) {
totalEntries++;
}
if (isHighSurrogate(charArray[charIndex])) {
charIndex++;
}
} while (++charIndex < charArray.length);
}
if (totalEntries < 1) {
return;
}
int remainingEntries = totalEntries;
int charIndex = 0;
charIndex = 0;
do {
/* Limited to 100 entries in each section */
int entriesThisSection = Math.min(remainingEntries, 100);
writer.write(entriesThisSection + " beginbfrange\n");
for (int i = 0; i < entriesThisSection; i++) {
int sectionEntryCount = 0;
do {
/* Go to the next start of a range */
while (!startOfRange(charArray, charIndex)) {
if (isHighSurrogate(charArray[charIndex])) {
charIndex++;
}
charIndex++;
}
writer.write("<" + padCharIndex(charIndex) + "> ");
writer.write("<"
+ padCharIndex(endOfRange(charArray, charIndex))
+ "> ");
writer.write("<" + padHexString(Integer.toHexString(charArray[charIndex]), 4)
+ ">\n");
if (isHighSurrogate(charArray[charIndex])) {
char secondChar = 0;
if (charIndex + 1 < charArray.length) {
secondChar = charArray[charIndex + 1];
} else {
if (eventBroadcaster != null) {
PDFEventProducer pdfEventProducer = PDFEventProducer.Provider.get(eventBroadcaster);
pdfEventProducer.unpairedSurrogate(this);
}
}
writer.write("<" + padHexString(Integer.toHexString(charArray[charIndex]), 4)
+ padHexString(Integer.toHexString(secondChar), 4)
+ ">\n");
} else {
writer.write("<" + padHexString(Integer.toHexString(charArray[charIndex]), 4)
+ ">\n");
}
charIndex++;
}
} while (++sectionEntryCount < entriesThisSection);
remainingEntries -= entriesThisSection;
writer.write("endbfrange\n");
} while (remainingEntries > 0);
@@ -199,8 +252,14 @@ public class PDFToUnicodeCMap extends PDFCMap {
*/
private int endOfRange(char[] charArray, int startOfRange) {
int i = startOfRange;
while (i < charArray.length - 1 && sameRangeEntryAsNext(charArray, i)) {
i++;
if (isHighSurrogate(charArray[i])) {
while (i < charArray.length - 3 && sameRangeEntryAsNext(charArray, i)) {
i += 2;
}
} else {
while (i < charArray.length - 1 && sameRangeEntryAsNext(charArray, i)) {
i++;
}
}
return i;
}
@@ -213,15 +272,29 @@ public class PDFToUnicodeCMap extends PDFCMap {
* @return True if this array element should be included in a range.
*/
private boolean partOfRange(char[] charArray, int arrayIndex) {
if (charArray.length < 2) {
int minBytesInRange = 2;
if (isHighSurrogate(charArray[arrayIndex])) {
minBytesInRange = 4;
}
if (charArray.length < minBytesInRange) {
return false;
}
if (arrayIndex == 0) {
return sameRangeEntryAsNext(charArray, 0);
}
if (isHighSurrogate(charArray[arrayIndex])) {
if (arrayIndex == charArray.length - 2) {
return sameRangeEntryAsNext(charArray, arrayIndex - 2);
}
}
if (arrayIndex == charArray.length - 1) {
return sameRangeEntryAsNext(charArray, arrayIndex - 1);
}
if (isHighSurrogate(charArray[arrayIndex])) {
if (sameRangeEntryAsNext(charArray, arrayIndex - 2)) {
return true;
}
}
if (sameRangeEntryAsNext(charArray, arrayIndex - 1)) {
return true;
}
@@ -232,22 +305,45 @@ public class PDFToUnicodeCMap extends PDFCMap {
}

/**
* Determine whether two bytes can be written in the same bfrange entry.
* @param charArray The array to be tested.
* @param firstItem The first of the two items in the array to be tested.
* The second item is firstItem + 1.
* @return True if both 1) the next item in the array is sequential with
* this one, and 2) the first byte of the character in the first position
* is equal to the first byte of the character in the second position.
* Determine whether two code points can be included in the same bfrange entry.
* Range sizes are limited to a maximum of 256 (128 for surrogate pairs).
* @param charArray The array holding the code points to be tested.
* @param firstItem The first char of the first code point in the array to be tested.
* The first byte of the second code point is firstItem + n, where n is the number
* of chars in the firstItem code point.
* @return True if both:
* 1) the next code point in the array is sequential with this one, and
* 2) this code point and the next are both NOT surrogate pairs
* or
* this code point and the next are both surrogate pairs and
* the high-surrogates are the same, and
* 3) the resulting range cannot be greater than 256 in size.
*/
private boolean sameRangeEntryAsNext(char[] charArray, int firstItem) {
if (charArray[firstItem] + 1 != charArray[firstItem + 1]) {
return false;
}
if (firstItem / 256 != (firstItem + 1) / 256) {
return false;
}
return true;
boolean retval = false;
do {
if (firstItem < 0 || firstItem >= charArray.length - 1) {
break;
}
if (isHighSurrogate(charArray[firstItem])) {
if (firstItem < charArray.length - 3) {
if (charArray[firstItem + 2] == charArray[firstItem]) {
if (charArray[firstItem + 3] == charArray[firstItem + 1] + 1) {
if (firstItem / 256 == (firstItem + 2) / 256) {
retval = true;
}
}
}
}
} else {
if (charArray[firstItem] + 1 == charArray[firstItem + 1]) {
if (firstItem / 256 == (firstItem + 1) / 256) {
retval = true;
}
}
}
} while (false);
return retval;
}

/**
@@ -262,11 +358,16 @@ public class PDFToUnicodeCMap extends PDFCMap {
if (!partOfRange(charArray, arrayIndex)) {
return false;
}
// If first element in the array, must be start of a range
// If part of a range and first element in the array, must be start of a range
if (arrayIndex == 0) {
return true;
}
// If last element in the array, cannot be start of a range
if (isHighSurrogate(charArray[arrayIndex])) {
if (arrayIndex == charArray.length - 2) {
return false;
}
}
if (arrayIndex == charArray.length - 1) {
return false;
}
@@ -274,6 +375,11 @@ public class PDFToUnicodeCMap extends PDFCMap {
* If part of same range as the previous element is, cannot be start
* of range.
*/
if (isHighSurrogate(charArray[arrayIndex])) {
if (sameRangeEntryAsNext(charArray, arrayIndex - 2)) {
return false;
}
}
if (sameRangeEntryAsNext(charArray, arrayIndex - 1)) {
return false;
}

+ 1
- 1
fop-core/src/main/java/org/apache/fop/render/pdf/PDFDocumentHandler.java View File

@@ -184,7 +184,7 @@ public class PDFDocumentHandler extends AbstractBinaryWritingIFDocumentHandler {
/** {@inheritDoc} */
public void endDocument() throws IFException {
documentNavigationHandler.registerIncompleteActions();
pdfDoc.getResources().addFonts(pdfDoc, fontInfo);
pdfDoc.getResources().addFonts(pdfDoc, fontInfo, getUserAgent().getEventBroadcaster());
try {
if (pdfDoc.isLinearizationEnabled()) {
generator.flushPDFDoc();

+ 8
- 0
fop-core/src/main/java/org/apache/fop/render/pdf/PDFEventProducer.java View File

@@ -24,6 +24,7 @@ import org.apache.fop.events.EventProducer;

/**
* Event producer interface for events generated by the PDF renderer.
* PDFEventProducer.xml should include a message for all event-raising methods.
*/
public interface PDFEventProducer extends EventProducer {

@@ -82,4 +83,11 @@ public interface PDFEventProducer extends EventProducer {
*/
void unknownLanguage(Object source, String location);

/**
* Unicode char map ended with an unpaired surrogate.
*
* @param source the event source
* @event.severity ERROR
*/
void unpairedSurrogate(Object source);
}

+ 1
- 1
fop-core/src/main/java/org/apache/fop/svg/PDFDocumentGraphics2D.java View File

@@ -380,7 +380,7 @@ public class PDFDocumentGraphics2D extends PDFGraphics2D {

closePage();
if (fontInfo != null) {
pdfDoc.getResources().addFonts(pdfDoc, fontInfo);
pdfDoc.getResources().addFonts(pdfDoc, fontInfo, null);
}
this.pdfDoc.output(outputStream);
pdfDoc.outputTrailer(outputStream);

+ 1
- 1
fop-core/src/main/java/org/apache/fop/svg/PDFGraphics2D.java View File

@@ -960,7 +960,7 @@ public class PDFGraphics2D extends AbstractGraphics2D implements NativeImageHand
/** @todo see if pdfDoc and res can be linked here,
(currently res <> PDFDocument's resources) so addFonts()
can be moved to PDFDocument class */
res.addFonts(pdfDoc, specialFontInfo);
res.addFonts(pdfDoc, specialFontInfo, null);

PDFPattern myPat = pdfDoc.getFactory().makePattern(
resourceContext, 1, res, 1, 1, bbox,

+ 1
- 0
fop-core/src/main/resources/org/apache/fop/render/pdf/PDFEventProducer.xml View File

@@ -4,4 +4,5 @@
<message key="nonStandardStructureType">‘{type}’ is not a standard structure type defined by the PDF Reference. Falling back to ‘{fallback}’.</message>
<message key="incorrectEncryptionLength">Encryption length must be a multiple of 8 between 40 and 128. Setting encryption length to {correctedValue} instead of {originalValue}.</message>
<message key="unknownLanguage">A piece of text or an image’s alternate text is missing language information [(See position {location})|(No context info available)]</message>
<message key="unpairedSurrogate">A unicode char map was found to end with an unpaired surrogate.</message>
</catalogue>

+ 7
- 7
fop-core/src/test/java/org/apache/fop/pdf/PDFFactoryTestCase.java View File

@@ -84,10 +84,10 @@ public class PDFFactoryTestCase {
thisURI, resolver);
MockedFont font = new MockedFont(resourceResolver);

PDFFont pdfDejaVu = pdfFactory.makeFont("DejaVu", "DejaVu", "TTF", font, font);
PDFFont pdfDejaVu = pdfFactory.makeFont("DejaVu", "DejaVu", "TTF", font, font, null);
assertEquals("/EAAAAA+DejaVu", pdfDejaVu.getBaseFont().toString());

PDFFont pdfArial = pdfFactory.makeFont("Arial", "Arial", "TTF", font, font);
PDFFont pdfArial = pdfFactory.makeFont("Arial", "Arial", "TTF", font, font, null);
assertEquals("/EAAAAB+Arial", pdfArial.getBaseFont().toString());
}

@@ -104,7 +104,7 @@ public class PDFFactoryTestCase {
sb.addUnencodedCharacter(new NamedCharacter("xyz", String.valueOf((char) 0x2202)), 0, new Rectangle());
sb.mapChar((char) 0x2202);
sb.setEncoding(new CodePointMapping("FOPPDFEncoding", new int[0]));
PDFFont font = pdfFactory.makeFont("a", "a", "WinAnsiEncoding", sb, sb);
PDFFont font = pdfFactory.makeFont("a", "a", "WinAnsiEncoding", sb, sb, null);
ByteArrayOutputStream bos = new ByteArrayOutputStream();
font.output(bos);
assertTrue(bos.toString().contains("/BaseFont /EAAAAA+a"));
@@ -134,7 +134,7 @@ public class PDFFactoryTestCase {
return new char[]{1};
}
});
pdfFactory.makeFont("a", "a", "WinAnsiEncoding", sb, sb);
pdfFactory.makeFont("a", "a", "WinAnsiEncoding", sb, sb, null);
ByteArrayOutputStream bos = new ByteArrayOutputStream();
doc.outputTrailer(bos);
assertTrue(bos.toString().contains("/Differences [1 /a]"));
@@ -151,7 +151,7 @@ public class PDFFactoryTestCase {
sb.setFlags(0);
sb.setEncoding(new CodePointMapping("FOPPDFEncoding", new int[0]));
String enc = "MacRomanEncoding";
PDFFont font = pdfFactory.makeFont("a", "a", enc, sb, sb);
PDFFont font = pdfFactory.makeFont("a", "a", enc, sb, sb, null);
font.output(new ByteArrayOutputStream());
assertEquals(((PDFName)font.entries.get("Encoding")).getName(), enc);
}
@@ -183,7 +183,7 @@ public class PDFFactoryTestCase {
for (char c = 0; c < 512; c++) {
sb.mapChar(c);
}
pdfFactory.makeFont("a", "a", "WinAnsiEncoding", sb, sb);
pdfFactory.makeFont("a", "a", "WinAnsiEncoding", sb, sb, null);
ByteArrayOutputStream bos = new ByteArrayOutputStream();
doc.outputTrailer(bos);

@@ -217,7 +217,7 @@ public class PDFFactoryTestCase {
for (char c = 0; c < 512; c++) {
sb.mapChar(c);
}
pdfFactory.makeFont("a", "a", "WinAnsiEncoding", sb, sb);
pdfFactory.makeFont("a", "a", "WinAnsiEncoding", sb, sb, null);
PDFFont pdfFont = pdfFactory.getDocument().getFontMap().get("a_1");
PDFFontDescriptor fontDescriptor = (PDFFontDescriptor) pdfFont.get("FontDescriptor");
assertNull(fontDescriptor.getCIDSet());

Loading…
Cancel
Save