Browse Source

FOP-2920: Surrogate pair edge-case causes Exception by Dave Roxburgh

tags/2_9
Simon Steiner 9 months ago
parent
commit
76d37582a6
1 changed files with 380 additions and 0 deletions
  1. 380
    0
      fop-core/src/test/java/org/apache/fop/pdf/PDFToUnicodeCMapTestCase.java

+ 380
- 0
fop-core/src/test/java/org/apache/fop/pdf/PDFToUnicodeCMapTestCase.java View File

@@ -0,0 +1,380 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

/* $Id$ */

package org.apache.fop.pdf;

import java.io.CharArrayWriter;
import java.io.IOException;
import java.net.URI;
import java.util.HashMap;
import java.util.Map;
import java.util.Set;

import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;

import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.fail;

import org.apache.fop.apps.FopFactory;
import org.apache.fop.apps.FopFactoryBuilder;
import org.apache.fop.events.Event;
import org.apache.fop.events.EventBroadcaster;
import org.apache.fop.events.EventListener;
import org.apache.fop.events.model.EventSeverity;

public class PDFToUnicodeCMapTestCase {

static final int UNICODE_CHAR_MAP_SIZE = 200;

static final char[] S_UNICODE_CHAR_MAP = new char[UNICODE_CHAR_MAP_SIZE];

EventBroadcaster eventBroadcaster;

@Before
public void initUnicodeChatMap() {
for (int i = 0; i < UNICODE_CHAR_MAP_SIZE; ++i) {
S_UNICODE_CHAR_MAP[i] = (char)(50 + i);
}
}

@Before
public void initEventBroadcaster() {
URI config = URI.create("");
FopFactoryBuilder fopFactoryBuilder = new FopFactoryBuilder(config);
FopFactory fopFactory = fopFactoryBuilder.build();
eventBroadcaster = fopFactory.newFOUserAgent().getEventBroadcaster();
}

private void assertHeader(String cmap) {
Assert.assertTrue(cmap.contains("/CIDInit /ProcSet findresource begin\n"
+ "12 dict begin\n"
+ "begincmap\n"
+ "/CIDSystemInfo 3 dict dup begin\n"
+ " /Registry (Adobe) def\n"
+ " /Ordering (UCS) def\n"
+ " /Supplement 0 def\n"
+ "end def\n"
+ "/CMapName /Adobe-Identity-UCS def\n"
+ "/CMapType 2 def\n"));
}

private void assertFooter(String cmap) {
Assert.assertTrue(cmap.contains("endcmap\n"
+ "CMapName currentdict /CMap defineresource pop\n"
+ "end\n"
+ "end\n"));
}

private void assertHeaderAndFooter(String cmap) {
assertHeader(cmap);
assertFooter(cmap);
}

private void buildAndAssertLine(char[] unicodeCharMap, Boolean singleByte, String expected) throws IOException {
PDFToUnicodeCMap cMap = new PDFToUnicodeCMap(unicodeCharMap,
PDFCMap.ENC_GB_EUC_H,
new PDFCIDSystemInfo("Adobe", "Identity", 0),
singleByte, eventBroadcaster);

CharArrayWriter writer = new CharArrayWriter();
CMapBuilder builder = cMap.createCMapBuilder(writer);
builder.writeCMap();
String cmap = writer.toString();
Assert.assertTrue(cmap.contains(expected));
}

private void buildAndAssert(char[] unicodeCharMap, Map<Boolean, String> configPairs) throws IOException {
Set<Map.Entry<Boolean, String>> configSet = configPairs.entrySet();
for (Map.Entry<Boolean, String> entry : configSet) {
buildAndAssertLine(unicodeCharMap, entry.getKey(), entry.getValue());
}
}

/**
* Checks entire CMap of unmodified unicodeCharMap, including header and footer.
* @throws IOException
*/
@Test
public void simpleTest() throws IOException {
Map<Boolean, String> configPairs = new HashMap<>();
configPairs.put(true, "1 begincodespacerange\n" // Single-byte char map
+ "<00> <FF>\n"
+ "endcodespacerange\n"
+ "1 beginbfrange\n"
+ "<00> <c7> <0032>\n"
+ "endbfrange\n");
configPairs.put(false, "1 begincodespacerange\n" // Double-byte char map
+ "<0000> <FFFF>\n"
+ "endcodespacerange\n"
+ "1 beginbfrange\n"
+ "<0000> <00c7> <0032>\n"
+ "endbfrange\n");

Set<Map.Entry<Boolean, String>> configSet = configPairs.entrySet();
for (Map.Entry<Boolean, String> entry : configSet) {
PDFToUnicodeCMap cMap = new PDFToUnicodeCMap(S_UNICODE_CHAR_MAP,
PDFCMap.ENC_GB_EUC_H,
new PDFCIDSystemInfo("Adobe", "Identity", 0),
entry.getKey(), eventBroadcaster);

CharArrayWriter writer = new CharArrayWriter();
CMapBuilder builder = cMap.createCMapBuilder(writer);
builder.writeCMap();
String cmap = writer.toString();
assertHeaderAndFooter(cmap);
Assert.assertTrue(cmap.contains(entry.getValue()));
}
}

/**
* Checks CMap of unicodeCharMap with one codepoint changed so it is out of sequence.
* @throws IOException
*/
@Test
public void rangeTest() throws IOException {
S_UNICODE_CHAR_MAP[0x32] = 0xfa; // Interrupt the range with an oddity.

Map<Boolean, String> configPairs = new HashMap<>();
configPairs.put(true, "1 begincodespacerange\n"
+ "<00> <FF>\n"
+ "endcodespacerange\n"
+ "1 beginbfchar\n"
+ "<32> <00fa>\n"
+ "endbfchar\n"
+ "2 beginbfrange\n"
+ "<00> <31> <0032>\n"
+ "<33> <c7> <0065>\n"
+ "endbfrange");
configPairs.put(false, "1 begincodespacerange\n"
+ "<0000> <FFFF>\n"
+ "endcodespacerange\n"
+ "1 beginbfchar\n"
+ "<0032> <00fa>\n"
+ "endbfchar\n"
+ "2 beginbfrange\n"
+ "<0000> <0031> <0032>\n"
+ "<0033> <00c7> <0065>\n"
+ "endbfrange");

buildAndAssert(S_UNICODE_CHAR_MAP, configPairs);
}

/**
* Checks that one surrogate pair is correctly handled, even when it crosses a section boundary.
* @throws IOException
*/
@Test
public void surrogatePairTest() throws IOException {
final int charMapSize = 157;

char[] unicodeCharMap = new char[charMapSize];

for (int i = 0; i < charMapSize; ++i) {
unicodeCharMap[i] = (char)(50 + i * 2);
}

unicodeCharMap[99] = '\uD83C'; // High-surrogate code unit, last code unit of section.
unicodeCharMap[100] = '\uDF65';

Map<Boolean, String> configPairs = new HashMap<>();
configPairs.put(true, "<60> <00f2>\n"
+ "<61> <00f4>\n"
+ "<62> <00f6>\n"
+ "<63> <d83cdf65>\n"
+ "endbfchar\n"
+ "56 beginbfchar\n"
+ "<65> <00fc>\n"
+ "<66> <00fe>");
configPairs.put(false, "<0060> <00f2>\n"
+ "<0061> <00f4>\n"
+ "<0062> <00f6>\n"
+ "<0063> <d83cdf65>\n"
+ "endbfchar\n"
+ "56 beginbfchar\n"
+ "<0065> <00fc>\n"
+ "<0066> <00fe>");

buildAndAssert(unicodeCharMap, configPairs);
}

/**
* Checks that a range of surrogate pairs is correctly handled.
* @throws IOException
*/
@Test
public void surrogatePairRangeTest() throws IOException {
final int charMapSize = 20;

char[] unicodeCharMap = new char[charMapSize];

for (int i = 0; i < charMapSize; ++i) {
unicodeCharMap[i] = (char)(50 + i * 2);
}

unicodeCharMap[9] = '\uD83C';
unicodeCharMap[10] = '\uDF65';
unicodeCharMap[11] = '\uD83C';
unicodeCharMap[12] = '\uDF66';

Map<Boolean, String> configPairs = new HashMap<>();
configPairs.put(true, "1 beginbfrange\n"
+ "<09> <0b> <d83cdf65>\n"
+ "endbfrange");
configPairs.put(false, "1 beginbfrange\n"
+ "<0009> <000b> <d83cdf65>\n"
+ "endbfrange");

buildAndAssert(unicodeCharMap, configPairs);
}

/**
* Checks that CMap is correct, even when made up of just one range of surrogate pairs.
* @throws IOException
*/
@Test
public void surrogatePairsRangeTest() throws IOException {
final int charMapSize = 20;

char[] unicodeCharMap = new char[charMapSize];

for (int i = 0; i < charMapSize; i = i + 2) {
unicodeCharMap[i] = '\uD83C';
}
for (int i = 0; i < charMapSize / 2; ++i) {
unicodeCharMap[1 + i * 2] = (char)('\uDF65' + i);
}

Map<Boolean, String> configPairs = new HashMap<>();
configPairs.put(true, "1 beginbfrange\n"
+ "<00> <12> <d83cdf65>\n"
+ "endbfrange");
configPairs.put(false, "1 beginbfrange\n"
+ "<0000> <0012> <d83cdf65>\n"
+ "endbfrange");

buildAndAssert(unicodeCharMap, configPairs);
}

/**
* Checks that an unpaired surrogate (a high-surrogate as the last code unit) is correctly handled.
* @throws IOException
*/
@Test
public void unpairedHighSurrogateTest() throws IOException {
final int charMapSize = 10;

char[] unicodeCharMap = new char[charMapSize];

for (int i = 0; i < charMapSize; ++i) {
unicodeCharMap[i] = (char)(50 + i);
}

unicodeCharMap[9] = '\uD83C'; // High-surrogate code unit.

Map<Boolean, String> configPairs = new HashMap<>();
configPairs.put(true, "1 beginbfchar\n"
+ "<09> <d83c0000>\n"
+ "endbfchar");
configPairs.put(false, "1 beginbfchar\n"
+ "<0009> <d83c0000>\n"
+ "endbfchar");

Set<Map.Entry<Boolean, String>> configSet = configPairs.entrySet();
for (Map.Entry<Boolean, String> entry : configSet) {
MyEventListener listener = new MyEventListener();

eventBroadcaster.addEventListener(listener);

buildAndAssertLine(unicodeCharMap, entry.getKey(), entry.getValue());

Event ev = listener.event;
assertNotNull(ev);
assertEquals("org.apache.fop.render.pdf.PDFEventProducer.unpairedSurrogate", listener.event.getEventID());
assertEquals(EventSeverity.ERROR, listener.event.getSeverity());

eventBroadcaster.removeEventListener(listener);
}
}

private class MyEventListener implements EventListener {

private Event event;

public void processEvent(Event event) {
if (this.event != null) {
fail("Multiple events received");
}
this.event = event;
}
}

/**
* Checks that a range of non-surrogate pairs is limited in size.
* @throws IOException
*/
@Test
public void rangeSizeTest() throws IOException {
final int charMapSize = 300;

char[] unicodeCharMap = new char[charMapSize];

for (int i = 0; i < charMapSize; ++i) {
unicodeCharMap[i] = (char)(50 + i);
}

Map<Boolean, String> configPairs = new HashMap<>();
// PDFToUnicodeCMap CTOR rejects unicodeCharMap with > 256 elements where singleByte is true.
configPairs.put(false, "2 beginbfrange\n"
+ "<0000> <00ff> <0032>\n"
+ "<0100> <012b> <0132>\n"
+ "endbfrange");

buildAndAssert(unicodeCharMap, configPairs);
}

/**
* Checks that a range of surrogate pairs is limited in size.
* @throws IOException
*/
@Test
public void rangeSizeSurrogateTest() throws IOException {
final int charMapSize = 300;

char[] unicodeCharMap = new char[charMapSize];

for (int i = 0; i < charMapSize; i = i + 2) {
unicodeCharMap[i] = '\uD83C';
}
for (int i = 0; i < charMapSize / 2; ++i) {
unicodeCharMap[1 + i * 2] = (char)('\uDF65' + i);
}

Map<Boolean, String> configPairs = new HashMap<>();
// PDFToUnicodeCMap CTOR rejects unicodeCharMap with > 256 elements where singleByte is true.
configPairs.put(false, "2 beginbfrange\n"
+ "<0000> <00fe> <d83cdf65>\n"
+ "<0100> <012a> <d83cdfe5>\n"
+ "endbfrange");

buildAndAssert(unicodeCharMap, configPairs);
}
}

Loading…
Cancel
Save