From: Jeremias Maerki Date: Sat, 16 Feb 2008 12:28:48 +0000 (+0000) Subject: Extracted most of the code in CodePointMapping (generated by XSLT) into a base class... X-Git-Tag: fop-0_95beta~63 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=57de10b2fdd6e6f0bbca3aac03bce84d82cfaba3;p=xmlgraphics-fop.git Extracted most of the code in CodePointMapping (generated by XSLT) into a base class for easier maintenance and proper Javadocs. Deprecated FOP's copy of Glyphs.java. Took a different approach at handling mapping alternatives for single-byte fonts. The AFM now only lists the main character. Substitution is done through Glyphs.java later in CodePointMapping. Fixed a problem in Type1FontLoader where the PFM overrides asc/desc/cap/x even though the AFM provides the values. It showed itself because the URW Symbol font has wrong values in the PFM. Added a note to myself in Type1FontLoader to implement the "Flags" value. The whole thing still seems to work without that part. Added a glyph name list to the CodePointMapping so we can work with the original list from the AFM. Otherwise, various mapping operations to and from resulted in unwanted mappings (because the mappings are not necessarily 1:1) and in the end the PDF received an incorrect Encoding map. Now there's no such problem anymore. git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop/trunk@628280 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/lib/xmlgraphics-commons-1.3svn.jar b/lib/xmlgraphics-commons-1.3svn.jar index 80927dd7e..55b9d9b2a 100644 Binary files a/lib/xmlgraphics-commons-1.3svn.jar and b/lib/xmlgraphics-commons-1.3svn.jar differ diff --git a/src/codegen/fonts/code-point-mapping.xsl b/src/codegen/fonts/code-point-mapping.xsl index c82ca0104..55b84c742 100644 --- a/src/codegen/fonts/code-point-mapping.xsl +++ b/src/codegen/fonts/code-point-mapping.xsl @@ -40,100 +40,19 @@ package org.apache.fop.fonts; -import java.util.Arrays; import java.util.Map; import java.util.Collections; -import org.apache.fop.util.CharUtilities; - -public class CodePointMapping { +public class CodePointMapping extends AbstractCodePointMapping { - private String name; - private char[] latin1Map; - private char[] characters; - private char[] codepoints; - private char[] unicodeMap; //code point to Unicode char - public CodePointMapping(String name, int[] table) { - this.name = name; - int nonLatin1 = 0; - latin1Map = new char[256]; - unicodeMap = new char[256]; - Arrays.fill(unicodeMap, CharUtilities.NOT_A_CHARACTER); - for (int i = 0; i < table.length; i += 2) { - char unicode = (char)table[i + 1]; - if (unicode < 256) { - if (latin1Map[unicode] == 0) { - latin1Map[unicode] = (char) table[i]; - } - } else { - ++nonLatin1; - } - if (unicodeMap[table[i]] == CharUtilities.NOT_A_CHARACTER) { - unicodeMap[table[i]] = unicode; - } - } - characters = new char[nonLatin1]; - codepoints = new char[nonLatin1]; - int top = 0; - for (int i = 0; i < table.length; i += 2) { - char c = (char) table[i + 1]; - if (c >= 256) { - ++top; - for (int j = top - 1; j >= 0; --j) { - if (j > 0 && characters[j - 1] >= c) { - characters[j] = characters[j - 1]; - codepoints[j] = codepoints[j - 1]; - } else { - characters[j] = c; - codepoints[j] = (char) table[i]; - break; - } - } - } - } - } - - public String getName() { - return this.name; - } - - public final char mapChar(char c) { - if (c < 256) { - return latin1Map[c]; - } else { - int bot = 0, top = characters.length - 1; - while (top >= bot) { - int mid = (bot + top) / 2; - char mc = characters[mid]; - - if (c == mc) { - return codepoints[mid]; - } else if (c < mc) { - top = mid - 1; - } else { - bot = mid + 1; - } - } - return 0; - } - } - - public final char getUnicodeForIndex(int idx) { - return this.unicodeMap[idx]; + super(name, table); } - public final char[] getUnicodeCharMap() { - char[] copy = new char[this.unicodeMap.length]; - System.arraycopy(this.unicodeMap, 0, copy, 0, this.unicodeMap.length); - return copy; - } - - /** {@inheritDoc} */ - public String toString() { - return getName(); + public CodePointMapping(String name, int[] table, String[] charNameMap) { + super(name, table, charNameMap); } private static Map mappings; @@ -146,13 +65,10 @@ public class CodePointMapping { if (mapping != null) { return mapping; } - //TODO: Implement support for Expert and ExpertSubset encoding - else if (encoding.startsWith("Expert")) { - throw new UnsupportedOperationException(encoding + " not implemented yet"); - } throw new UnsupportedOperationException("Unknown encoding: " + encoding); } + } @@ -160,7 +76,7 @@ public class CodePointMapping { else if (encoding.equals()) { - mapping = new CodePointMapping(, enc); + mapping = new CodePointMapping(, enc, names); mappings.put(, mapping); return mapping; } @@ -178,4 +94,78 @@ public class CodePointMapping { }; + + + private static final String[] names + = { + + + + }; + + + + + + + + + + + + + + + + + + /**/ + + + + + + + "" + + null + + + + , + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/src/java/org/apache/fop/fonts/AbstractCodePointMapping.java b/src/java/org/apache/fop/fonts/AbstractCodePointMapping.java new file mode 100644 index 000000000..91d13da85 --- /dev/null +++ b/src/java/org/apache/fop/fonts/AbstractCodePointMapping.java @@ -0,0 +1,261 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id$ */ + +package org.apache.fop.fonts; + +import java.util.Arrays; +import java.util.Map; + +import org.apache.xmlgraphics.fonts.Glyphs; + +import org.apache.fop.util.CharUtilities; + +/** + * Abstract base class for code point mapping classes (1-byte character encodings). + */ +public class AbstractCodePointMapping { + + private String name; + private char[] latin1Map; + private char[] characters; + private char[] codepoints; + private char[] unicodeMap; //code point to Unicode char + private String[] charNameMap; //all character names in the encoding + private Map fallbackMap; //Here we accumulate all mappings we have found through substitution + + /** + * Main constructor. + * @param name the name of the encoding + * @param table the table ([code point, unicode scalar value]+) with the mapping + */ + public AbstractCodePointMapping(String name, int[] table) { + this(name, table, null); + } + + /** + * Extended constructor. + * @param name the name of the encoding + * @param table the table ([code point, unicode scalar value]+) with the mapping + * @param charNameMap all character names in the encoding (a value of null will be converted + * to ".notdef") + */ + public AbstractCodePointMapping(String name, int[] table, String[] charNameMap) { + this.name = name; + buildFromTable(table); + if (charNameMap != null) { + this.charNameMap = new String[256]; + for (int i = 0; i < 256; i++) { + String charName = charNameMap[i]; + if (charName == null) { + this.charNameMap[i] = Glyphs.NOTDEF; + } else { + this.charNameMap[i] = charName; + } + } + } + } + + /** + * Builds the internal lookup structures based on a given table. + * @param table the table ([code point, unicode scalar value]+) with the mapping + */ + protected void buildFromTable(int[] table) { + int nonLatin1 = 0; + latin1Map = new char[256]; + unicodeMap = new char[256]; + Arrays.fill(unicodeMap, CharUtilities.NOT_A_CHARACTER); + for (int i = 0; i < table.length; i += 2) { + char unicode = (char)table[i + 1]; + if (unicode < 256) { + if (latin1Map[unicode] == 0) { + latin1Map[unicode] = (char) table[i]; + } + } else { + ++nonLatin1; + } + if (unicodeMap[table[i]] == CharUtilities.NOT_A_CHARACTER) { + unicodeMap[table[i]] = unicode; + } + } + characters = new char[nonLatin1]; + codepoints = new char[nonLatin1]; + int top = 0; + for (int i = 0; i < table.length; i += 2) { + char c = (char) table[i + 1]; + if (c >= 256) { + ++top; + for (int j = top - 1; j >= 0; --j) { + if (j > 0 && characters[j - 1] >= c) { + characters[j] = characters[j - 1]; + codepoints[j] = codepoints[j - 1]; + } else { + characters[j] = c; + codepoints[j] = (char) table[i]; + break; + } + } + } + } + } + + /** + * Returns the encoding's name. + * @return the name of the encoding + */ + public String getName() { + return this.name; + } + + /** + * Maps a Unicode character to a code point in the encoding. + * @param c the Unicode character to map + * @return the coid point in the encoding or 0 (=.notdef) if not found + */ + public final char mapChar(char c) { + if (c < 256) { + char latin1 = latin1Map[c]; + if (latin1 > 0) { + return latin1; + } + } + int bot = 0, top = characters.length - 1; + while (top >= bot) { + int mid = (bot + top) / 2; + char mc = characters[mid]; + + if (c == mc) { + return codepoints[mid]; + } else if (c < mc) { + top = mid - 1; + } else { + bot = mid + 1; + } + } + + //Fallback: using cache + synchronized (this) { + if (fallbackMap != null) { + Character fallback = (Character)fallbackMap.get(new Character(c)); + if (fallback != null) { + return fallback.charValue(); + } + } + } + //Fallback: find alternatives (slow!) + String glyphName = Glyphs.charToGlyphName(c); + if (glyphName.length() > 0) { + String[] alternatives = Glyphs.getCharNameAlternativesFor(glyphName); + if (alternatives != null) { + for (int i = 0, ic = alternatives.length; i < ic; i++) { + int idx = getCodePointForGlyph(alternatives[i]); + if (idx >= 0) { + putFallbackCharacter(c, (char)idx); + return (char)idx; + } + } + } + } + + putFallbackCharacter(c, '\0'); + return 0; + } + + private void putFallbackCharacter(char c, char mapTo) { + synchronized (this) { + if (this.fallbackMap == null) { + this.fallbackMap = new java.util.HashMap(); + } + this.fallbackMap.put(new Character(c), new Character(mapTo)); + } + } + + /** + * Returns the main Unicode value that is associated with the given code point in the encoding. + * Note that multiple Unicode values can theoretically be mapped to one code point in the + * encoding. + * @param idx the code point in the encoding + * @return the Unicode value (or \uFFFF (NOT A CHARACTER) if no Unicode value is at that point) + */ + public final char getUnicodeForIndex(int idx) { + return this.unicodeMap[idx]; + } + + /** + * Returns a character array with Unicode scalar values which can be used to map encoding + * code points to Unicode values. Note that this does not return all possible Unicode values + * that the encoding maps. + * @return a character array with Unicode scalar values + */ + public final char[] getUnicodeCharMap() { + char[] copy = new char[this.unicodeMap.length]; + System.arraycopy(this.unicodeMap, 0, copy, 0, this.unicodeMap.length); + return copy; + } + + /** + * Returns the index of a character/glyph with the given name. Note that this + * method is relatively slow and should only be used for fallback operations. + * @param charName the character name + * @return the index of the character in the encoding or -1 if it doesn't exist + */ + public short getCodePointForGlyph(String charName) { + String[] names = this.charNameMap; + if (names == null) { + names = getCharNameMap(); + } + for (short i = 0, c = (short)names.length; i < c; i++) { + if (names[i].equals(charName)) { + return i; + } + } + return -1; + } + + /** + * Returns the array of character names for this encoding. + * @return the array of character names + * (unmapped code points are represented by a ".notdef" value) + */ + public String[] getCharNameMap() { + if (this.charNameMap != null) { + String[] copy = new String[this.charNameMap.length]; + System.arraycopy(this.charNameMap, 0, copy, 0, this.charNameMap.length); + return copy; + } else { + //Note: this is suboptimal but will probably never be used. + String[] derived = new String[256]; + Arrays.fill(derived, Glyphs.NOTDEF); + for (int i = 0; i < 256; i++) { + char c = getUnicodeForIndex(i); + if (c != CharUtilities.NOT_A_CHARACTER) { + String charName = Glyphs.charToGlyphName(c); + if (charName.length() > 0) { + derived[i] = charName; + } + } + } + return derived; + } + } + + /** {@inheritDoc} */ + public String toString() { + return getName(); + } +} diff --git a/src/java/org/apache/fop/fonts/Glyphs.java b/src/java/org/apache/fop/fonts/Glyphs.java index e75a6bf54..4fb6c4e5c 100644 --- a/src/java/org/apache/fop/fonts/Glyphs.java +++ b/src/java/org/apache/fop/fonts/Glyphs.java @@ -21,6 +21,7 @@ package org.apache.fop.fonts; /** * This class provides a number of constants for glyph management. + * @deprecated Use the Glyphs class from XML Graphics Commons instead! */ public class Glyphs { diff --git a/src/java/org/apache/fop/fonts/SingleByteFont.java b/src/java/org/apache/fop/fonts/SingleByteFont.java index b6f65edc8..814d05a2c 100644 --- a/src/java/org/apache/fop/fonts/SingleByteFont.java +++ b/src/java/org/apache/fop/fonts/SingleByteFont.java @@ -24,6 +24,8 @@ import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.xmlgraphics.fonts.Glyphs; + /** * Generic SingleByte font */ diff --git a/src/java/org/apache/fop/fonts/truetype/TTFFile.java b/src/java/org/apache/fop/fonts/truetype/TTFFile.java index e46029709..fe40c4b33 100644 --- a/src/java/org/apache/fop/fonts/truetype/TTFFile.java +++ b/src/java/org/apache/fop/fonts/truetype/TTFFile.java @@ -27,8 +27,10 @@ import java.util.Set; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; + +import org.apache.xmlgraphics.fonts.Glyphs; + import org.apache.fop.fonts.FontUtil; -import org.apache.fop.fonts.Glyphs; /** * Reads a TrueType file or a TrueType Collection. diff --git a/src/java/org/apache/fop/fonts/type1/AFMCharMetrics.java b/src/java/org/apache/fop/fonts/type1/AFMCharMetrics.java index fbbff567e..1b7f814b8 100644 --- a/src/java/org/apache/fop/fonts/type1/AFMCharMetrics.java +++ b/src/java/org/apache/fop/fonts/type1/AFMCharMetrics.java @@ -28,7 +28,7 @@ import java.awt.geom.RectangularShape; public class AFMCharMetrics { private int charCode = -1; - private String unicodeChars; + private String unicodeSequence; private String charName; private double widthX; private double widthY; @@ -59,21 +59,20 @@ public class AFMCharMetrics { } /** - * Returns the Unicode characters represented by this object. Some character names can be - * mapped to multiple Unicode code points, so expect to find more than one character in the - * String. + * Returns the Unicode sequence for this character. * @return the Unicode characters + * (or null if no such Unicode sequence exists for this character) */ - public String getUnicodeChars() { - return this.unicodeChars; + public String getUnicodeSequence() { + return this.unicodeSequence; } /** - * Sets the Unicode characters represented by this object. - * @param unicodeChars the Unicode characters + * Sets the Unicode sequence for this character. + * @param unicodeSequence the Unicode sequence */ - public void setUnicodeChars(String unicodeChars) { - this.unicodeChars = unicodeChars; + public void setUnicodeSequence(String unicodeSequence) { + this.unicodeSequence = unicodeSequence; } /** @@ -145,9 +144,9 @@ public class AFMCharMetrics { StringBuffer sb = new StringBuffer("AFM Char: "); sb.append(getCharCode()); sb.append(" ("); - if (getUnicodeChars() != null) { - for (int i = 0, c = getUnicodeChars().length(); i < c; i++) { - sb.append("0x").append(Integer.toHexString(getUnicodeChars().charAt(i))); + if (getUnicodeSequence() != null) { + for (int i = 0, c = getUnicodeSequence().length(); i < c; i++) { + sb.append("0x").append(Integer.toHexString(getUnicodeSequence().charAt(i))); sb.append(", "); } } diff --git a/src/java/org/apache/fop/fonts/type1/AFMFile.java b/src/java/org/apache/fop/fonts/type1/AFMFile.java index 0b7b8d3c2..b51485485 100644 --- a/src/java/org/apache/fop/fonts/type1/AFMFile.java +++ b/src/java/org/apache/fop/fonts/type1/AFMFile.java @@ -21,7 +21,6 @@ package org.apache.fop.fonts.type1; import java.awt.geom.Dimension2D; import java.awt.geom.RectangularShape; -import java.util.Arrays; import java.util.Collections; import java.util.Iterator; import java.util.List; @@ -315,18 +314,11 @@ public class AFMFile { */ public void addCharMetrics(AFMCharMetrics metrics) { String name = metrics.getCharName(); - if (metrics.getUnicodeChars() == null) { + if (metrics.getUnicodeSequence() == null) { if (name != null) { - String u = Glyphs.getUnicodeCodePointsForGlyphName(metrics.getCharName()); + String u = Glyphs.getUnicodeSequenceForGlyphName(metrics.getCharName()); if (u != null) { - if (u.length() > 1) { - //Lower values (ex. space) are most probably more interesting than - //higher values (ex. non-break-space), so sort just to be sure: - char[] chars = u.toCharArray(); - Arrays.sort(chars); - u = String.valueOf(chars); - } - metrics.setUnicodeChars(u); + metrics.setUnicodeSequence(u); } } else { //Ignore as no Unicode assignment is possible diff --git a/src/java/org/apache/fop/fonts/type1/PFMFile.java b/src/java/org/apache/fop/fonts/type1/PFMFile.java index a1e560746..69f8e836f 100644 --- a/src/java/org/apache/fop/fonts/type1/PFMFile.java +++ b/src/java/org/apache/fop/fonts/type1/PFMFile.java @@ -29,7 +29,7 @@ import org.apache.commons.io.IOUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.fop.fonts.Glyphs; +import org.apache.xmlgraphics.fonts.Glyphs; /** * This class represents a PFM file (or parts of it) as a Java object. diff --git a/src/java/org/apache/fop/fonts/type1/Type1FontLoader.java b/src/java/org/apache/fop/fonts/type1/Type1FontLoader.java index 57383dc76..009a446f9 100644 --- a/src/java/org/apache/fop/fonts/type1/Type1FontLoader.java +++ b/src/java/org/apache/fop/fonts/type1/Type1FontLoader.java @@ -198,10 +198,18 @@ public class Type1FontLoader extends FontLoader { } if (pfm != null) { //Sometimes the PFM has these metrics while the AFM doesn't (ex. Symbol) - returnFont.setCapHeight(pfm.getCapHeight()); - returnFont.setXHeight(pfm.getXHeight()); - returnFont.setAscender(pfm.getLowerCaseAscent()); - returnFont.setDescender(pfm.getLowerCaseDescent()); + if (returnFont.getCapHeight() == 0) { + returnFont.setCapHeight(pfm.getCapHeight()); + } + if (returnFont.getXHeight(1) == 0) { + returnFont.setXHeight(pfm.getXHeight()); + } + if (returnFont.getAscender() == 0) { + returnFont.setAscender(pfm.getLowerCaseAscent()); + } + if (returnFont.getDescender() == 0) { + returnFont.setDescender(pfm.getLowerCaseDescent()); + } } //Fallbacks when some crucial font metrics aren't available @@ -253,6 +261,7 @@ public class Type1FontLoader extends FontLoader { } if (afm != null) { + //TODO returnFont.setFlags(flags); returnFont.setFirstChar(afm.getFirstChar()); returnFont.setLastChar(afm.getLastChar()); Iterator iter = afm.getCharMetrics().iterator(); @@ -264,9 +273,9 @@ public class Type1FontLoader extends FontLoader { } returnFont.replaceKerningMap(afm.createXKerningMapEncoded()); } else { + returnFont.setFlags(pfm.getFlags()); returnFont.setFirstChar(pfm.getFirstChar()); returnFont.setLastChar(pfm.getLastChar()); - returnFont.setFlags(pfm.getFlags()); for (short i = pfm.getFirstChar(); i <= pfm.getLastChar(); i++) { singleFont.setWidth(i, pfm.getCharWidth(i)); } @@ -282,28 +291,35 @@ public class Type1FontLoader extends FontLoader { while (iter.hasNext()) { AFMCharMetrics charMetrics = (AFMCharMetrics)iter.next(); if (charMetrics.getCharCode() >= 0) { - String u = charMetrics.getUnicodeChars(); - if (u != null) { - mappingCount += u.length(); + String u = charMetrics.getUnicodeSequence(); + if (u != null && u.length() == 1) { + mappingCount++; } } } //...and now build the table. int[] table = new int[mappingCount * 2]; + String[] charNameMap = new String[256]; iter = chars.iterator(); int idx = 0; while (iter.hasNext()) { AFMCharMetrics charMetrics = (AFMCharMetrics)iter.next(); if (charMetrics.getCharCode() >= 0) { - String unicodes = charMetrics.getUnicodeChars(); - for (int i = 0, c = unicodes.length(); i < c; i++) { + charNameMap[charMetrics.getCharCode()] = charMetrics.getCharName(); + String unicodes = charMetrics.getUnicodeSequence(); + if (unicodes == null) { + log.info("No Unicode mapping for glyph: " + charMetrics); + } else if (unicodes.length() == 1) { table[idx] = charMetrics.getCharCode(); idx++; - table[idx] = unicodes.charAt(i); + table[idx] = unicodes.charAt(0); idx++; + } else { + log.warn("Multi-character representation of glyph not currently supported: " + + charMetrics); } } } - return new CodePointMapping(encodingName, table); + return new CodePointMapping(encodingName, table, charNameMap); } } diff --git a/src/java/org/apache/fop/pdf/PDFFactory.java b/src/java/org/apache/fop/pdf/PDFFactory.java index a50ef2545..687b32016 100644 --- a/src/java/org/apache/fop/pdf/PDFFactory.java +++ b/src/java/org/apache/fop/pdf/PDFFactory.java @@ -40,7 +40,6 @@ import org.apache.commons.io.output.ByteArrayOutputStream; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.xmlgraphics.fonts.Glyphs; import org.apache.xmlgraphics.xmp.Metadata; import org.apache.fop.fonts.CIDFont; @@ -1252,15 +1251,17 @@ public class PDFFactory { PDFEncoding.DifferencesBuilder builder = pdfEncoding.createDifferencesBuilder(); int start = -1; + String[] winansiNames = winansi.getCharNameMap(); + String[] charNameMap = mapping.getCharNameMap(); for (int i = 0; i < 256; i++) { - char wac = winansi.getUnicodeForIndex(i); - char c = mapping.getUnicodeForIndex(i); - if (wac != c) { + String wac = winansiNames[i]; + String c = charNameMap[i]; + if (!wac.equals(c)) { if (start != i) { builder.addDifference(i); start = i; } - builder.addName(Glyphs.charToGlyphName(c)); + builder.addName(c); start++; } }