From: Simon Steiner Date: Tue, 1 Aug 2023 08:43:30 +0000 (+0100) Subject: FOP-2920: Surrogate pair edge-case causes Exception by Dave Roxburgh X-Git-Tag: 2_9~6 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=76d37582a6e3b29697d694af7a85f1b87b7d9dbb;p=xmlgraphics-fop.git FOP-2920: Surrogate pair edge-case causes Exception by Dave Roxburgh --- diff --git a/fop-core/src/test/java/org/apache/fop/pdf/PDFToUnicodeCMapTestCase.java b/fop-core/src/test/java/org/apache/fop/pdf/PDFToUnicodeCMapTestCase.java new file mode 100644 index 000000000..746943119 --- /dev/null +++ b/fop-core/src/test/java/org/apache/fop/pdf/PDFToUnicodeCMapTestCase.java @@ -0,0 +1,380 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id$ */ + +package org.apache.fop.pdf; + +import java.io.CharArrayWriter; +import java.io.IOException; +import java.net.URI; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; + +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.fail; + +import org.apache.fop.apps.FopFactory; +import org.apache.fop.apps.FopFactoryBuilder; +import org.apache.fop.events.Event; +import org.apache.fop.events.EventBroadcaster; +import org.apache.fop.events.EventListener; +import org.apache.fop.events.model.EventSeverity; + +public class PDFToUnicodeCMapTestCase { + + static final int UNICODE_CHAR_MAP_SIZE = 200; + + static final char[] S_UNICODE_CHAR_MAP = new char[UNICODE_CHAR_MAP_SIZE]; + + EventBroadcaster eventBroadcaster; + + @Before + public void initUnicodeChatMap() { + for (int i = 0; i < UNICODE_CHAR_MAP_SIZE; ++i) { + S_UNICODE_CHAR_MAP[i] = (char)(50 + i); + } + } + + @Before + public void initEventBroadcaster() { + URI config = URI.create(""); + FopFactoryBuilder fopFactoryBuilder = new FopFactoryBuilder(config); + FopFactory fopFactory = fopFactoryBuilder.build(); + eventBroadcaster = fopFactory.newFOUserAgent().getEventBroadcaster(); + } + + private void assertHeader(String cmap) { + Assert.assertTrue(cmap.contains("/CIDInit /ProcSet findresource begin\n" + + "12 dict begin\n" + + "begincmap\n" + + "/CIDSystemInfo 3 dict dup begin\n" + + " /Registry (Adobe) def\n" + + " /Ordering (UCS) def\n" + + " /Supplement 0 def\n" + + "end def\n" + + "/CMapName /Adobe-Identity-UCS def\n" + + "/CMapType 2 def\n")); + } + + private void assertFooter(String cmap) { + Assert.assertTrue(cmap.contains("endcmap\n" + + "CMapName currentdict /CMap defineresource pop\n" + + "end\n" + + "end\n")); + } + + private void assertHeaderAndFooter(String cmap) { + assertHeader(cmap); + assertFooter(cmap); + } + + private void buildAndAssertLine(char[] unicodeCharMap, Boolean singleByte, String expected) throws IOException { + PDFToUnicodeCMap cMap = new PDFToUnicodeCMap(unicodeCharMap, + PDFCMap.ENC_GB_EUC_H, + new PDFCIDSystemInfo("Adobe", "Identity", 0), + singleByte, eventBroadcaster); + + CharArrayWriter writer = new CharArrayWriter(); + CMapBuilder builder = cMap.createCMapBuilder(writer); + builder.writeCMap(); + String cmap = writer.toString(); + Assert.assertTrue(cmap.contains(expected)); + } + + private void buildAndAssert(char[] unicodeCharMap, Map configPairs) throws IOException { + Set> configSet = configPairs.entrySet(); + for (Map.Entry entry : configSet) { + buildAndAssertLine(unicodeCharMap, entry.getKey(), entry.getValue()); + } + } + + /** + * Checks entire CMap of unmodified unicodeCharMap, including header and footer. + * @throws IOException + */ + @Test + public void simpleTest() throws IOException { + Map configPairs = new HashMap<>(); + configPairs.put(true, "1 begincodespacerange\n" // Single-byte char map + + "<00> \n" + + "endcodespacerange\n" + + "1 beginbfrange\n" + + "<00> <0032>\n" + + "endbfrange\n"); + configPairs.put(false, "1 begincodespacerange\n" // Double-byte char map + + "<0000> \n" + + "endcodespacerange\n" + + "1 beginbfrange\n" + + "<0000> <00c7> <0032>\n" + + "endbfrange\n"); + + Set> configSet = configPairs.entrySet(); + for (Map.Entry entry : configSet) { + PDFToUnicodeCMap cMap = new PDFToUnicodeCMap(S_UNICODE_CHAR_MAP, + PDFCMap.ENC_GB_EUC_H, + new PDFCIDSystemInfo("Adobe", "Identity", 0), + entry.getKey(), eventBroadcaster); + + CharArrayWriter writer = new CharArrayWriter(); + CMapBuilder builder = cMap.createCMapBuilder(writer); + builder.writeCMap(); + String cmap = writer.toString(); + assertHeaderAndFooter(cmap); + Assert.assertTrue(cmap.contains(entry.getValue())); + } + } + + /** + * Checks CMap of unicodeCharMap with one codepoint changed so it is out of sequence. + * @throws IOException + */ + @Test + public void rangeTest() throws IOException { + S_UNICODE_CHAR_MAP[0x32] = 0xfa; // Interrupt the range with an oddity. + + Map configPairs = new HashMap<>(); + configPairs.put(true, "1 begincodespacerange\n" + + "<00> \n" + + "endcodespacerange\n" + + "1 beginbfchar\n" + + "<32> <00fa>\n" + + "endbfchar\n" + + "2 beginbfrange\n" + + "<00> <31> <0032>\n" + + "<33> <0065>\n" + + "endbfrange"); + configPairs.put(false, "1 begincodespacerange\n" + + "<0000> \n" + + "endcodespacerange\n" + + "1 beginbfchar\n" + + "<0032> <00fa>\n" + + "endbfchar\n" + + "2 beginbfrange\n" + + "<0000> <0031> <0032>\n" + + "<0033> <00c7> <0065>\n" + + "endbfrange"); + + buildAndAssert(S_UNICODE_CHAR_MAP, configPairs); + } + + /** + * Checks that one surrogate pair is correctly handled, even when it crosses a section boundary. + * @throws IOException + */ + @Test + public void surrogatePairTest() throws IOException { + final int charMapSize = 157; + + char[] unicodeCharMap = new char[charMapSize]; + + for (int i = 0; i < charMapSize; ++i) { + unicodeCharMap[i] = (char)(50 + i * 2); + } + + unicodeCharMap[99] = '\uD83C'; // High-surrogate code unit, last code unit of section. + unicodeCharMap[100] = '\uDF65'; + + Map configPairs = new HashMap<>(); + configPairs.put(true, "<60> <00f2>\n" + + "<61> <00f4>\n" + + "<62> <00f6>\n" + + "<63> \n" + + "endbfchar\n" + + "56 beginbfchar\n" + + "<65> <00fc>\n" + + "<66> <00fe>"); + configPairs.put(false, "<0060> <00f2>\n" + + "<0061> <00f4>\n" + + "<0062> <00f6>\n" + + "<0063> \n" + + "endbfchar\n" + + "56 beginbfchar\n" + + "<0065> <00fc>\n" + + "<0066> <00fe>"); + + buildAndAssert(unicodeCharMap, configPairs); + } + + /** + * Checks that a range of surrogate pairs is correctly handled. + * @throws IOException + */ + @Test + public void surrogatePairRangeTest() throws IOException { + final int charMapSize = 20; + + char[] unicodeCharMap = new char[charMapSize]; + + for (int i = 0; i < charMapSize; ++i) { + unicodeCharMap[i] = (char)(50 + i * 2); + } + + unicodeCharMap[9] = '\uD83C'; + unicodeCharMap[10] = '\uDF65'; + unicodeCharMap[11] = '\uD83C'; + unicodeCharMap[12] = '\uDF66'; + + Map configPairs = new HashMap<>(); + configPairs.put(true, "1 beginbfrange\n" + + "<09> <0b> \n" + + "endbfrange"); + configPairs.put(false, "1 beginbfrange\n" + + "<0009> <000b> \n" + + "endbfrange"); + + buildAndAssert(unicodeCharMap, configPairs); + } + + /** + * Checks that CMap is correct, even when made up of just one range of surrogate pairs. + * @throws IOException + */ + @Test + public void surrogatePairsRangeTest() throws IOException { + final int charMapSize = 20; + + char[] unicodeCharMap = new char[charMapSize]; + + for (int i = 0; i < charMapSize; i = i + 2) { + unicodeCharMap[i] = '\uD83C'; + } + for (int i = 0; i < charMapSize / 2; ++i) { + unicodeCharMap[1 + i * 2] = (char)('\uDF65' + i); + } + + Map configPairs = new HashMap<>(); + configPairs.put(true, "1 beginbfrange\n" + + "<00> <12> \n" + + "endbfrange"); + configPairs.put(false, "1 beginbfrange\n" + + "<0000> <0012> \n" + + "endbfrange"); + + buildAndAssert(unicodeCharMap, configPairs); + } + + /** + * Checks that an unpaired surrogate (a high-surrogate as the last code unit) is correctly handled. + * @throws IOException + */ + @Test + public void unpairedHighSurrogateTest() throws IOException { + final int charMapSize = 10; + + char[] unicodeCharMap = new char[charMapSize]; + + for (int i = 0; i < charMapSize; ++i) { + unicodeCharMap[i] = (char)(50 + i); + } + + unicodeCharMap[9] = '\uD83C'; // High-surrogate code unit. + + Map configPairs = new HashMap<>(); + configPairs.put(true, "1 beginbfchar\n" + + "<09> \n" + + "endbfchar"); + configPairs.put(false, "1 beginbfchar\n" + + "<0009> \n" + + "endbfchar"); + + Set> configSet = configPairs.entrySet(); + for (Map.Entry entry : configSet) { + MyEventListener listener = new MyEventListener(); + + eventBroadcaster.addEventListener(listener); + + buildAndAssertLine(unicodeCharMap, entry.getKey(), entry.getValue()); + + Event ev = listener.event; + assertNotNull(ev); + assertEquals("org.apache.fop.render.pdf.PDFEventProducer.unpairedSurrogate", listener.event.getEventID()); + assertEquals(EventSeverity.ERROR, listener.event.getSeverity()); + + eventBroadcaster.removeEventListener(listener); + } + } + + private class MyEventListener implements EventListener { + + private Event event; + + public void processEvent(Event event) { + if (this.event != null) { + fail("Multiple events received"); + } + this.event = event; + } + } + + /** + * Checks that a range of non-surrogate pairs is limited in size. + * @throws IOException + */ + @Test + public void rangeSizeTest() throws IOException { + final int charMapSize = 300; + + char[] unicodeCharMap = new char[charMapSize]; + + for (int i = 0; i < charMapSize; ++i) { + unicodeCharMap[i] = (char)(50 + i); + } + + Map configPairs = new HashMap<>(); + // PDFToUnicodeCMap CTOR rejects unicodeCharMap with > 256 elements where singleByte is true. + configPairs.put(false, "2 beginbfrange\n" + + "<0000> <00ff> <0032>\n" + + "<0100> <012b> <0132>\n" + + "endbfrange"); + + buildAndAssert(unicodeCharMap, configPairs); + } + + /** + * Checks that a range of surrogate pairs is limited in size. + * @throws IOException + */ + @Test + public void rangeSizeSurrogateTest() throws IOException { + final int charMapSize = 300; + + char[] unicodeCharMap = new char[charMapSize]; + + for (int i = 0; i < charMapSize; i = i + 2) { + unicodeCharMap[i] = '\uD83C'; + } + for (int i = 0; i < charMapSize / 2; ++i) { + unicodeCharMap[1 + i * 2] = (char)('\uDF65' + i); + } + + Map configPairs = new HashMap<>(); + // PDFToUnicodeCMap CTOR rejects unicodeCharMap with > 256 elements where singleByte is true. + configPairs.put(false, "2 beginbfrange\n" + + "<0000> <00fe> \n" + + "<0100> <012a> \n" + + "endbfrange"); + + buildAndAssert(unicodeCharMap, configPairs); + } +}