From fb02159c7df3ad79eda435be9ae1f940953d9fd8 Mon Sep 17 00:00:00 2001 From: Vincent Hennebert Date: Mon, 8 Aug 2011 17:34:46 +0000 Subject: [PATCH] Bugzilla #51596: A composite glyph can be remapped more than once in a TTF subset, resulting in garbled character Test and clean-up provided by Mehdi Houshmand git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop/trunk@1155024 13f79535-47bb-0310-9956-ffa450edef68 --- .../fop/fonts/truetype/FontFileReader.java | 7 +- .../apache/fop/fonts/truetype/GlyfTable.java | 231 ++++++++++++++++++ .../fop/fonts/truetype/TTFSubSetFile.java | 196 +-------------- status.xml | 2 +- .../org/apache/fop/StandardTestSuite.java | 2 + .../fop/fonts/truetype/GlyfTableTestCase.java | 190 ++++++++++++++ 6 files changed, 439 insertions(+), 189 deletions(-) create mode 100644 src/java/org/apache/fop/fonts/truetype/GlyfTable.java create mode 100644 test/java/org/apache/fop/fonts/truetype/GlyfTableTestCase.java diff --git a/src/java/org/apache/fop/fonts/truetype/FontFileReader.java b/src/java/org/apache/fop/fonts/truetype/FontFileReader.java index a6db7b6d0..56017d377 100644 --- a/src/java/org/apache/fop/fonts/truetype/FontFileReader.java +++ b/src/java/org/apache/fop/fonts/truetype/FontFileReader.java @@ -198,14 +198,15 @@ public class FontFileReader { * @param val The value to write * @throws IOException If EOF is reached */ - public final void writeTTFUShort(int pos, int val) throws IOException { + public final void writeTTFUShort(long pos, int val) throws IOException { if ((pos + 2) > fsize) { throw new java.io.EOFException("Reached EOF"); } final byte b1 = (byte)((val >> 8) & 0xff); final byte b2 = (byte)(val & 0xff); - file[pos] = b1; - file[pos + 1] = b2; + final int fileIndex = (int) pos; + file[fileIndex] = b1; + file[fileIndex + 1] = b2; } /** diff --git a/src/java/org/apache/fop/fonts/truetype/GlyfTable.java b/src/java/org/apache/fop/fonts/truetype/GlyfTable.java new file mode 100644 index 000000000..f26ac2ffd --- /dev/null +++ b/src/java/org/apache/fop/fonts/truetype/GlyfTable.java @@ -0,0 +1,231 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id$ */ + +package org.apache.fop.fonts.truetype; + +import java.io.IOException; +import java.util.HashSet; +import java.util.Map; +import java.util.Set; +import java.util.TreeSet; + +/** + * This "glyf" table in a TrueType font file contains information that describes the glyphs. This + * class is responsible for creating a subset of the "glyf" table given a set of glyph indices. + */ +public class GlyfTable { + + private final TTFMtxEntry[] mtxTab; + + private final long tableOffset; + + private final Set remappedComposites; + + private final Map subset; + + private final FontFileReader in; + + /** All the composite glyphs that appear in the subset. */ + private Set compositeGlyphs = new TreeSet(); + + /** All the glyphs that are composed, but do not appear in the subset. */ + private Set composedGlyphs = new TreeSet(); + + GlyfTable(FontFileReader in, TTFMtxEntry[] metrics, TTFDirTabEntry dirTableEntry, + Map glyphs) throws IOException { + mtxTab = metrics; + tableOffset = dirTableEntry.getOffset(); + remappedComposites = new HashSet(); + this.subset = glyphs; + this.in = in; + } + + private static enum GlyfFlags { + + ARG_1_AND_2_ARE_WORDS(4, 2), + ARGS_ARE_XY_VALUES, + ROUND_XY_TO_GRID, + WE_HAVE_A_SCALE(2), + RESERVED, + MORE_COMPONENTS, + WE_HAVE_AN_X_AND_Y_SCALE(4), + WE_HAVE_A_TWO_BY_TWO(8), + WE_HAVE_INSTRUCTIONS, + USE_MY_METRICS, + OVERLAP_COMPOUND, + SCALED_COMPONENT_OFFSET, + UNSCALED_COMPONENT_OFFSET; + + private final int bitMask; + private final int argsCountIfSet; + private final int argsCountIfNotSet; + + private GlyfFlags(int argsCountIfSet, int argsCountIfNotSet) { + this.bitMask = 1 << this.ordinal(); + this.argsCountIfSet = argsCountIfSet; + this.argsCountIfNotSet = argsCountIfNotSet; + } + + private GlyfFlags(int argsCountIfSet) { + this(argsCountIfSet, 0); + } + + private GlyfFlags() { + this(0, 0); + } + + /** + * Calculates, from the given flags, the offset to the next glyph index. + * + * @param flags the glyph data flags + * @return offset to the next glyph if any, or 0 + */ + static int getOffsetToNextComposedGlyf(int flags) { + int offset = 0; + for (GlyfFlags flag : GlyfFlags.values()) { + offset += (flags & flag.bitMask) > 0 ? flag.argsCountIfSet : flag.argsCountIfNotSet; + } + return offset; + } + + /** + * Checks the given flags to see if there is another composed glyph. + * + * @param flags the glyph data flags + * @return true if there is another composed glyph, otherwise false. + */ + static boolean hasMoreComposites(int flags) { + return (flags & MORE_COMPONENTS.bitMask) > 0; + } + } + + /** + * Populates the map of subset glyphs with all the glyphs that compose the glyphs in the subset. + * This also re-maps the indices of composed glyphs to their new index in the subset font. + * + * @throws IOException an I/O error + */ + void populateGlyphsWithComposites() throws IOException { + for (int indexInOriginal : subset.keySet()) { + scanGlyphsRecursively(indexInOriginal); + } + + addAllComposedGlyphsToSubset(); + + for (int compositeGlyph : compositeGlyphs) { + long offset = tableOffset + mtxTab[compositeGlyph].getOffset() + 10; + if (!remappedComposites.contains(offset)) { + remapComposite(offset); + } + } + } + + /** + * Scans each glyph for any composed glyphs. This populates compositeGlyphs with + * all the composite glyphs being used in the subset. This also populates newGlyphs + * with any new glyphs that are composed and do not appear in the subset of glyphs. + * + * For example the double quote mark (") is often composed of two apostrophes ('), if an + * apostrophe doesn't appear in the glyphs in the subset, it will be included and will be added + * to newGlyphs. + * + * @param indexInOriginal the index of the glyph to test from the original font + * @throws IOException an I/O error + */ + private void scanGlyphsRecursively(int indexInOriginal) throws IOException { + if (!subset.containsKey(indexInOriginal)) { + composedGlyphs.add(indexInOriginal); + } + + if (isComposite(indexInOriginal)) { + compositeGlyphs.add(indexInOriginal); + Set composedGlyphs = retrieveComposedGlyphs(indexInOriginal); + for (Integer composedGlyph : composedGlyphs) { + scanGlyphsRecursively(composedGlyph); + } + } + } + + /** + * Adds to the subset, all the glyphs that are composed by a glyph, but do not appear themselves + * in the subset. + */ + private void addAllComposedGlyphsToSubset() { + int newIndex = subset.size(); + for (int composedGlyph : composedGlyphs) { + subset.put(composedGlyph, newIndex++); + } + } + + /** + * Re-maps the index of composed glyphs in the original font to the index of the same glyph in + * the subset font. + * + * @param glyphOffset the offset of the composite glyph + * @throws IOException an I/O error + */ + private void remapComposite(long glyphOffset) throws IOException { + long currentGlyphOffset = glyphOffset; + + remappedComposites.add(currentGlyphOffset); + + int flags = 0; + do { + flags = in.readTTFUShort(currentGlyphOffset); + int glyphIndex = in.readTTFUShort(currentGlyphOffset + 2); + Integer indexInSubset = subset.get(glyphIndex); + assert indexInSubset != null; + /* + * TODO: this should not be done here!! We're writing to the stream we're reading from, + * this is asking for trouble! What should happen is when the glyph data is copied from + * subset, the remapping should be done there. So the original stream is left untouched. + */ + in.writeTTFUShort(currentGlyphOffset + 2, indexInSubset); + + currentGlyphOffset += 4 + GlyfFlags.getOffsetToNextComposedGlyf(flags); + } while (GlyfFlags.hasMoreComposites(flags)); + } + + private boolean isComposite(int indexInOriginal) throws IOException { + int numberOfContours = in.readTTFShort(tableOffset + mtxTab[indexInOriginal].getOffset()); + return numberOfContours < 0; + } + + /** + * Reads a composite glyph at a given index and retrieves all the glyph indices of contingent + * composed glyphs. + * + * @param indexInOriginal the glyph index of the composite glyph + * @return the set of glyph indices this glyph composes + * @throws IOException an I/O error + */ + private Set retrieveComposedGlyphs(int indexInOriginal) + throws IOException { + Set composedGlyphs = new HashSet(); + long offset = tableOffset + mtxTab[indexInOriginal].getOffset() + 10; + int flags = 0; + do { + flags = in.readTTFUShort(offset); + composedGlyphs.add(in.readTTFUShort(offset + 2)); + offset += 4 + GlyfFlags.getOffsetToNextComposedGlyf(flags); + } while (GlyfFlags.hasMoreComposites(flags)); + + return composedGlyphs; + } +} diff --git a/src/java/org/apache/fop/fonts/truetype/TTFSubSetFile.java b/src/java/org/apache/fop/fonts/truetype/TTFSubSetFile.java index f0c4da408..665cf289d 100644 --- a/src/java/org/apache/fop/fonts/truetype/TTFSubSetFile.java +++ b/src/java/org/apache/fop/fonts/truetype/TTFSubSetFile.java @@ -21,7 +21,6 @@ package org.apache.fop.fonts.truetype; import java.io.IOException; import java.util.Iterator; -import java.util.List; import java.util.Map; import java.util.Set; @@ -466,190 +465,6 @@ public class TTFSubSetFile extends TTFFile { } } - /** - * Returns a List containing the glyph itself plus all glyphs - * that this composite glyph uses - */ - private List getIncludedGlyphs(FontFileReader in, int glyphOffset, - Integer glyphIdx) throws IOException { - List ret = new java.util.ArrayList(); - ret.add(glyphIdx); - int offset = glyphOffset + (int)mtxTab[glyphIdx.intValue()].getOffset() + 10; - Integer compositeIdx = null; - int flags = 0; - boolean moreComposites = true; - while (moreComposites) { - flags = in.readTTFUShort(offset); - compositeIdx = Integer.valueOf(in.readTTFUShort(offset + 2)); - ret.add(compositeIdx); - - offset += 4; - if ((flags & 1) > 0) { - // ARG_1_AND_ARG_2_ARE_WORDS - offset += 4; - } else { - offset += 2; - } - - if ((flags & 8) > 0) { - offset += 2; // WE_HAVE_A_SCALE - } else if ((flags & 64) > 0) { - offset += 4; // WE_HAVE_AN_X_AND_Y_SCALE - } else if ((flags & 128) > 0) { - offset += 8; // WE_HAVE_A_TWO_BY_TWO - } - - if ((flags & 32) > 0) { - moreComposites = true; - } else { - moreComposites = false; - } - } - - return ret; - } - - - /** - * We need to remember which composites were already remapped because the value to be - * remapped is being read from the TTF file and being replaced right there. Doing this - * twice would create a bad map the second time. - */ - private Set remappedComposites = null; - - /** - * Rewrite all compositepointers in glyphindex glyphIdx - */ - private void remapComposite(FontFileReader in, Map glyphs, - long glyphOffset, - Integer glyphIdx) throws IOException { - if (remappedComposites == null) { - remappedComposites = new java.util.HashSet(); - } - TTFMtxEntry mtxEntry = mtxTab[glyphIdx.intValue()]; - long offset = glyphOffset + mtxEntry.getOffset() + 10; - - //Avoid duplicate remapping (see javadoc for remappedComposites above) - if (!remappedComposites.contains(offset)) { - remappedComposites.add(offset); - innerRemapComposite(in, glyphs, offset); - } - } - - private void innerRemapComposite(FontFileReader in, Map glyphs, long offset) - throws IOException { - Integer compositeIdx = null; - int flags = 0; - boolean moreComposites = true; - - while (moreComposites) { - flags = in.readTTFUShort(offset); - compositeIdx = Integer.valueOf(in.readTTFUShort(offset + 2)); - Integer newIdx = glyphs.get(compositeIdx); - if (newIdx == null) { - // This errormessage would look much better - // if the fontname was printed to - //log.error("An embedded font " - // + "contains bad glyph data. " - // + "Characters might not display " - // + "correctly."); - moreComposites = false; - continue; - } - - in.writeTTFUShort((int)(offset + 2), newIdx.intValue()); - - offset += 4; - - if ((flags & 1) > 0) { - // ARG_1_AND_ARG_2_ARE_WORDS - offset += 4; - } else { - offset += 2; - } - - if ((flags & 8) > 0) { - offset += 2; // WE_HAVE_A_SCALE - } else if ((flags & 64) > 0) { - offset += 4; // WE_HAVE_AN_X_AND_Y_SCALE - } else if ((flags & 128) > 0) { - offset += 8; // WE_HAVE_A_TWO_BY_TWO - } - - if ((flags & 32) > 0) { - moreComposites = true; - } else { - moreComposites = false; - } - } - } - - - /** - * Scan all the original glyphs for composite glyphs and add those glyphs - * to the glyphmapping also rewrite the composite glyph pointers to the new - * mapping - */ - private void scanGlyphs(FontFileReader in, - Map glyphs) throws IOException { - TTFDirTabEntry glyfTable = (TTFDirTabEntry)dirTabs.get("glyf"); - Map newComposites = null; - Set allComposites = new java.util.HashSet(); - - int newIndex = glyphs.size(); - - if (glyfTable != null) { - while (newComposites == null || newComposites.size() > 0) { - // Inefficient to iterate through all glyphs - newComposites = new java.util.HashMap(); - - for (int origIndex : glyphs.keySet()) { - short numberOfContours = in.readTTFShort(glyfTable.getOffset() - + mtxTab[origIndex].getOffset()); - if (numberOfContours < 0) { - // origIndex is a composite glyph - allComposites.add(origIndex); - List composites - = getIncludedGlyphs(in, (int)glyfTable.getOffset(), - origIndex); - - if (log.isTraceEnabled()) { - log.trace("Glyph " + origIndex - + " is a composite glyph using the following glyphs: " - + composites); - } - - // Iterate through all composites pointed to - // by this composite and check if they exists - // in the glyphs map, add them if not. - for (int cIdx : composites) { - if (glyphs.get(cIdx) == null - && newComposites.get(cIdx) == null) { - newComposites.put(cIdx, newIndex); - newIndex++; - } - } - } - } - - // Add composites to glyphs - for (int im : newComposites.keySet()) { - glyphs.put(im, newComposites.get(im)); - } - } - - // Iterate through all composites to remap their composite index - for (int glyphIdx : allComposites) { - remapComposite(in, glyphs, glyfTable.getOffset(), glyphIdx); - } - - } else { - throw new IOException("Can't find glyf table"); - } - } - - - /** * Returns a subset of the original font. * @@ -720,6 +535,17 @@ public class TTFSubSetFile extends TTFFile { return ret; } + private void scanGlyphs(FontFileReader in, Map subsetGlyphs) + throws IOException { + TTFDirTabEntry glyfTableInfo = (TTFDirTabEntry) dirTabs.get("glyf"); + if (glyfTableInfo == null) { + throw new IOException("Glyf table could not be found"); + } + + GlyfTable glyfTable = new GlyfTable(in, mtxTab, glyfTableInfo, subsetGlyphs); + glyfTable.populateGlyphsWithComposites(); + } + /** * writes a ISO-8859-1 string at the currentPosition * updates currentPosition but not realSize diff --git a/status.xml b/status.xml index 37231bfc2..2295a4f60 100644 --- a/status.xml +++ b/status.xml @@ -66,7 +66,7 @@ Fixed a bug in AFP where the object area axes of an Include Object was incorrectly set when rotated by 180. - + Fixed a bug in TTF subsetting where a composite glyph could get remapped more than once resulting in garbled character. diff --git a/test/java/org/apache/fop/StandardTestSuite.java b/test/java/org/apache/fop/StandardTestSuite.java index 01e3a365e..a2e6d7524 100644 --- a/test/java/org/apache/fop/StandardTestSuite.java +++ b/test/java/org/apache/fop/StandardTestSuite.java @@ -25,6 +25,7 @@ import junit.framework.TestSuite; import org.apache.fop.area.ViewportTestSuite; import org.apache.fop.afp.parser.MODCAParserTestCase; import org.apache.fop.fonts.DejaVuLGCSerifTest; +import org.apache.fop.fonts.truetype.GlyfTableTestCase; import org.apache.fop.image.loader.batik.ImageLoaderTestCase; import org.apache.fop.image.loader.batik.ImagePreloaderTestCase; import org.apache.fop.intermediate.IFMimickingTestCase; @@ -61,6 +62,7 @@ public class StandardTestSuite { suite.addTest(new TestSuite(MODCAParserTestCase.class)); suite.addTest(org.apache.fop.render.afp.AFPTestSuite.suite()); suite.addTest(PSTestSuite.suite()); + suite.addTest(new TestSuite(GlyfTableTestCase.class)); suite.addTest(RichTextFormatTestSuite.suite()); suite.addTest(new TestSuite(ImageLoaderTestCase.class)); suite.addTest(new TestSuite(ImagePreloaderTestCase.class)); diff --git a/test/java/org/apache/fop/fonts/truetype/GlyfTableTestCase.java b/test/java/org/apache/fop/fonts/truetype/GlyfTableTestCase.java new file mode 100644 index 000000000..89527a775 --- /dev/null +++ b/test/java/org/apache/fop/fonts/truetype/GlyfTableTestCase.java @@ -0,0 +1,190 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id$ */ + +package org.apache.fop.fonts.truetype; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Arrays; +import java.util.HashMap; +import java.util.Map; + +import junit.framework.TestCase; + +/** + * Tests {@link GlyfTable}. + */ +public class GlyfTableTestCase extends TestCase { + + private final static class DirData { + + final long offset; + final long length; + + DirData(long offset, long length) { + this.offset = offset; + this.length = length; + } + } + + private FontFileReader subsetReader; + + private long[] glyphOffsets; + + private FontFileReader originalFontReader; + + @Override + public void setUp() throws IOException { + originalFontReader = new FontFileReader("test/resources/fonts/DejaVuLGCSerif.ttf"); + } + + /** + * Tests that composed glyphs are included in the glyph subset if a composite glyph is used. + * + * @throws IOException if an I/O error occurs + */ + public void testPopulateGlyphsWithComposites() throws IOException { + // Glyph 408 -> U+01D8 "uni01D8" this is a composite glyph. + int[] composedIndices = setupTest(408); + + int[] expected = new int[composedIndices.length]; + expected[1] = 6; + expected[5] = 2; + expected[6] = 4; + + assertArrayEquals(expected, composedIndices); + } + + /** + * Tests that no glyphs are added if there are no composite glyphs the subset. + * + * @throws IOException if an I/O error occurs + */ + public void testPopulateNoCompositeGlyphs() throws IOException { + int[] composedIndices = setupTest(36, 37, 38); // "A", "B", "C" + int[] expected = new int[composedIndices.length]; + + // There should be NO composite glyphs + assertArrayEquals(expected, composedIndices); + } + + /** + * Tests that glyphs aren't remapped twice if the glyph before a composite glyph has 0-length. + * + * @throws IOException if an I/O error occurs + */ + public void testGlyphsNotRemappedTwice() throws IOException { + int composedGlyph = 12; + // The order of these glyph indices, must NOT be changed! (see javadoc above) + int[] composedIndices = setupTest(1, 2, 3, 16, 2014, 4, 7, 8, 13, 2015, composedGlyph); + + // There are 2 composed glyphs within the subset + int[] expected = new int[composedIndices.length]; + expected[10] = composedGlyph; + + assertArrayEquals(expected, composedIndices); + } + + /** + * Tests that the correct glyph is included in the subset, when a composite glyph composed of a + * composite glyph is used. + * + * @throws IOException if an I/O error occurs + */ + public void testSingleRecursionStep() throws IOException { + // Glyph 2077 -> U+283F "uni283F" this is composed of a composite glyph (recursive). + int[] composedIndices = setupTest(2077); + + int[] expected = new int[composedIndices.length]; + expected[1] = 2; + + assertArrayEquals(expected, composedIndices); + } + + private int[] setupTest(int... glyphIndices) throws IOException { + Map glyphs = new HashMap(); + int index = 0; + glyphs.put(0, index++); // Glyph 0 (.notdef) must ALWAYS be in the subset + + for (int glyphIndex : glyphIndices) { + glyphs.put(glyphIndex, index++); + } + setupSubsetReader(glyphs); + readLoca(); + + return retrieveIndicesOfComposedGlyphs(); + } + + private void setupSubsetReader(Map glyphs) throws IOException { + TTFSubSetFile fontFile = new TTFSubSetFile(); + byte[] subsetFont = fontFile.readFont(originalFontReader, "Deja", glyphs); + InputStream intputStream = new ByteArrayInputStream(subsetFont); + subsetReader = new FontFileReader(intputStream); + } + + private void readLoca() throws IOException { + DirData loca = getTableData("loca"); + int numberOfGlyphs = (int) (loca.length - 4) / 4; + glyphOffsets = new long[numberOfGlyphs]; + subsetReader.seekSet(loca.offset); + + for (int i = 0; i < numberOfGlyphs; i++) { + glyphOffsets[i] = subsetReader.readTTFULong(); + } + } + + private int[] retrieveIndicesOfComposedGlyphs() throws IOException { + DirData glyf = getTableData("glyf"); + int[] composedGlyphIndices = new int[glyphOffsets.length]; + + for (int i = 0; i < glyphOffsets.length; i++) { + long glyphOffset = glyphOffsets[i]; + if (i != glyphOffsets.length - 1 && glyphOffset == glyphOffsets[i + 1]) { + continue; + } + subsetReader.seekSet(glyf.offset + glyphOffset); + short numberOfContours = subsetReader.readTTFShort(); + if (numberOfContours < 0) { + subsetReader.skip(8); + subsetReader.readTTFUShort(); // flags + int glyphIndex = subsetReader.readTTFUShort(); + composedGlyphIndices[i] = glyphIndex; + } + } + return composedGlyphIndices; + } + + private DirData getTableData(String tableName) throws IOException { + subsetReader.seekSet(0); + subsetReader.skip(12); + String name; + do { + name = subsetReader.readTTFString(4); + subsetReader.skip(4 * 3); + } while (!name.equals(tableName)); + + subsetReader.skip(-8); // We've found the table, go back to get the data we skipped over + return new DirData(subsetReader.readTTFLong(), subsetReader.readTTFLong()); + } + + private void assertArrayEquals(int[] expected, int[] actual) { + assertTrue(Arrays.equals(expected, actual)); + } +} -- 2.39.5