diff options
author | Peter Hancock <phancock@apache.org> | 2011-09-05 09:42:00 +0000 |
---|---|---|
committer | Peter Hancock <phancock@apache.org> | 2011-09-05 09:42:00 +0000 |
commit | de056bce77c27b3a93acd62c2fad0c6628bff6b4 (patch) | |
tree | 0b7558b1a7c40964a06b24423fcd53853efb2339 /src/java/org/apache/fop/afp/fonts | |
parent | 2b875243aca4c436a611bdc974f3eaf6c6800bb8 (diff) | |
download | xmlgraphics-fop-de056bce77c27b3a93acd62c2fad0c6628bff6b4.tar.gz xmlgraphics-fop-de056bce77c27b3a93acd62c2fad0c6628bff6b4.zip |
Bugzill#51530: Improved support for EBCDIC encoded double byte fonts fo AFP.
Submitted by: Mehdi Houshmand <med1985.at.gmail.com>
git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop/trunk@1165223 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/java/org/apache/fop/afp/fonts')
4 files changed, 261 insertions, 78 deletions
diff --git a/src/java/org/apache/fop/afp/fonts/CharacterSet.java b/src/java/org/apache/fop/afp/fonts/CharacterSet.java index 7123d4138..784588762 100644 --- a/src/java/org/apache/fop/afp/fonts/CharacterSet.java +++ b/src/java/org/apache/fop/afp/fonts/CharacterSet.java @@ -21,19 +21,13 @@ package org.apache.fop.afp.fonts; import java.io.File; import java.io.UnsupportedEncodingException; -import java.nio.ByteBuffer; -import java.nio.CharBuffer; import java.nio.charset.CharacterCodingException; -import java.nio.charset.Charset; -import java.nio.charset.CharsetEncoder; -import java.nio.charset.CodingErrorAction; -import java.nio.charset.UnsupportedCharsetException; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; - import org.apache.fop.afp.AFPConstants; +import org.apache.fop.afp.fonts.CharactersetEncoder.EncodedChars; import org.apache.fop.afp.util.ResourceAccessor; import org.apache.fop.afp.util.SimpleResourceAccessor; import org.apache.fop.afp.util.StringUtils; @@ -70,16 +64,16 @@ public class CharacterSet { /** The code page to which the character set relates */ - protected String codePage; + protected final String codePage; /** The encoding used for the code page */ - protected String encoding; + protected final String encoding; - /** The charset encoder corresponding to this encoding */ - private CharsetEncoder encoder; + /** The characterset encoder corresponding to this encoding */ + private final CharactersetEncoder encoder; /** The character set relating to the font */ - protected String name; + protected final String name; /** The path to the installed fonts */ private ResourceAccessor accessor; @@ -105,20 +99,22 @@ public class CharacterSet { * {@link #CharacterSet(String, String, String, ResourceAccessor)} instead. */ public CharacterSet(String codePage, String encoding, String name, String path) { - this(codePage, encoding, name, + this(codePage, encoding, false, name, new SimpleResourceAccessor(path != null ? new File(path) : null)); } /** - * Constructor for the CharacterSetMetric object, the character set is used - * to load the font information from the actual AFP font. + * Constructor for the CharacterSetMetric object, the character set is used to load the font + * information from the actual AFP font. * * @param codePage the code page identifier * @param encoding the encoding of the font + * @param isEBDCS if this is an EBCDIC double byte character set. * @param name the character set name * @param accessor the resource accessor to load resource with */ - CharacterSet(String codePage, String encoding, String name, ResourceAccessor accessor) { + CharacterSet(String codePage, String encoding, boolean isEBDCS, String name, + ResourceAccessor accessor) { if (name.length() > MAX_NAME_LEN) { String msg = "Character set name '" + name + "' must be a maximum of " + MAX_NAME_LEN + " characters"; @@ -133,14 +129,7 @@ public class CharacterSet { } this.codePage = codePage; this.encoding = encoding; - try { - this.encoder = Charset.forName(encoding).newEncoder(); - this.encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); - } catch (UnsupportedCharsetException uce) { - //No nio-capable encoder available - //This may happen with "Cp500" on Sun Java 1.4.2 - this.encoder = null; - } + this.encoder = CharactersetEncoder.newInstance(encoding, isEBDCS); this.accessor = accessor; this.characterSetOrientations = new java.util.HashMap(4); @@ -357,32 +346,8 @@ public class CharacterSet { * @return the encoded characters * @throws CharacterCodingException if the encoding operation fails */ - public byte[] encodeChars(CharSequence chars) throws CharacterCodingException { - if (encoder != null) { - ByteBuffer bb; - // encode method is not thread safe - synchronized (encoder) { - bb = encoder.encode(CharBuffer.wrap(chars)); - } - if (bb.hasArray()) { - return bb.array(); - } else { - bb.rewind(); - byte[] bytes = new byte[bb.remaining()]; - bb.get(bytes); - return bytes; - } - } else { - //Sun Java 1.4.2 compatibility - byte[] bytes; - try { - bytes = chars.toString().getBytes(this.encoding); - return bytes; - } catch (UnsupportedEncodingException uee) { - throw new UnsupportedOperationException( - "Unsupported encoding: " + uee.getMessage()); - } - } + public EncodedChars encodeChars(CharSequence chars) throws CharacterCodingException { + return encoder.encode(chars); } /** diff --git a/src/java/org/apache/fop/afp/fonts/CharacterSetBuilder.java b/src/java/org/apache/fop/afp/fonts/CharacterSetBuilder.java index d575e2ae1..16893a152 100644 --- a/src/java/org/apache/fop/afp/fonts/CharacterSetBuilder.java +++ b/src/java/org/apache/fop/afp/fonts/CharacterSetBuilder.java @@ -30,13 +30,11 @@ import java.util.WeakHashMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; - -import org.apache.xmlgraphics.image.loader.util.SoftMapCache; - import org.apache.fop.afp.AFPConstants; import org.apache.fop.afp.util.ResourceAccessor; import org.apache.fop.afp.util.StructuredFieldReader; import org.apache.fop.fonts.Typeface; +import org.apache.xmlgraphics.image.loader.util.SoftMapCache; /** * The CharacterSetBuilder is responsible building the a CharacterSet instance that holds @@ -181,9 +179,9 @@ public class CharacterSetBuilder { } /** - * Load the font details and metrics into the CharacterSetMetric object, - * this will use the actual afp code page and character set files to load - * the object with the necessary metrics. + * Load the font details and metrics into the CharacterSetMetric object, this will use the + * actual afp code page and character set files to load the object with the necessary metrics. + * * @param characterSetName name of the characterset * @param codePageName name of the code page file * @param encoding encoding name @@ -191,9 +189,47 @@ public class CharacterSetBuilder { * @return CharacterSet object * @throws IOException if an I/O error occurs */ - public CharacterSet build(String characterSetName, String codePageName, - String encoding, ResourceAccessor accessor) throws IOException { + public CharacterSet build(String characterSetName, String codePageName, String encoding, + ResourceAccessor accessor) throws IOException { + return processFont(characterSetName, codePageName, encoding, false, accessor); + } + /** + * Load the font details and metrics into the CharacterSetMetric object, this will use the + * actual afp code page and character set files to load the object with the necessary metrics. + * This method is to be used for double byte character sets (DBCS). + * + * @param characterSetName name of the characterset + * @param codePageName name of the code page file + * @param encoding encoding name + * @param isEDBCS if this is an EBCDIC double byte character set (DBCS) + * @param accessor used to load codepage and characterset + * @return CharacterSet object + * @throws IOException if an I/O error occurs + */ + public CharacterSet buildDBCS(String characterSetName, String codePageName, String encoding, + boolean isEDBCS, ResourceAccessor accessor) throws IOException { + return processFont(characterSetName, codePageName, encoding, isEDBCS, accessor); + } + + /** + * Load the font details and metrics into the CharacterSetMetric object, this will use the + * actual afp code page and character set files to load the object with the necessary metrics. + * + * @param characterSetName the CharacterSetMetric object to populate + * @param codePageName the name of the code page to use + * @param encoding name of the encoding in use + * @param typeface base14 font name + * @return CharacterSet object + * @throws IOException if an I/O error occurs + */ + public CharacterSet build(String characterSetName, String codePageName, String encoding, + Typeface typeface) throws IOException { + return new FopCharacterSet(codePageName, encoding, characterSetName, typeface); + } + + private CharacterSet processFont(String characterSetName, String codePageName, String encoding, + boolean isEDBCS, ResourceAccessor accessor) throws IOException { // check for cached version of the characterset String descriptor = characterSetName + "_" + encoding + "_" + codePageName; CharacterSet characterSet = (CharacterSet)characterSetsCache.get(descriptor); @@ -203,8 +239,8 @@ public class CharacterSetBuilder { } // characterset not in the cache, so recreating - characterSet = new CharacterSet( - codePageName, encoding, characterSetName, accessor); + characterSet = new CharacterSet(codePageName, encoding, isEDBCS, characterSetName, + accessor); InputStream inputStream = null; @@ -268,23 +304,6 @@ public class CharacterSetBuilder { } characterSetsCache.put(descriptor, characterSet); return characterSet; - - } - - /** - * Load the font details and metrics into the CharacterSetMetric object, - * this will use the actual afp code page and character set files to load - * the object with the necessary metrics. - * - * @param characterSetName the CharacterSetMetric object to populate - * @param codePageName the name of the code page to use - * @param encoding name of the encoding in use - * @param typeface base14 font name - * @return CharacterSet object - */ - public CharacterSet build(String characterSetName, String codePageName, - String encoding, Typeface typeface) { - return new FopCharacterSet(codePageName, encoding, characterSetName, typeface); } /** diff --git a/src/java/org/apache/fop/afp/fonts/CharactersetEncoder.java b/src/java/org/apache/fop/afp/fonts/CharactersetEncoder.java new file mode 100644 index 000000000..d82da80eb --- /dev/null +++ b/src/java/org/apache/fop/afp/fonts/CharactersetEncoder.java @@ -0,0 +1,199 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id$ */ + +package org.apache.fop.afp.fonts; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.Charset; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CodingErrorAction; + +/** + * An abstraction that wraps the encoding mechanism for encoding a Unicode character sequence into a + * specified format. + */ +public abstract class CharactersetEncoder { + + private final CharsetEncoder encoder; + + private CharactersetEncoder(String encoding) { + this.encoder = Charset.forName(encoding).newEncoder(); + this.encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); + } + + /** + * Tells whether or not this encoder can encode the given character. + * + * @param c the character + * @return true if, and only if, this encoder can encode the given character + * @throws IllegalStateException - If an encoding operation is already in progress + */ + final boolean canEncode(char c) { + return encoder.canEncode(c); + } + + /** + * Encodes a character sequence to a byte array. + * + * @param chars the character sequence + * @return the encoded character sequence + * @throws CharacterCodingException if the encoding operation fails + */ + final EncodedChars encode(CharSequence chars) throws CharacterCodingException { + ByteBuffer bb; + // encode method is not thread safe + synchronized (encoder) { + bb = encoder.encode(CharBuffer.wrap(chars)); + } + if (bb.hasArray()) { + return getEncodedChars(bb.array(), bb.limit()); + } else { + bb.rewind(); + byte[] bytes = new byte[bb.remaining()]; + bb.get(bytes); + return getEncodedChars(bytes, bytes.length); + } + } + + abstract EncodedChars getEncodedChars(byte[] byteArray, int length); + + /** + * Encodes <code>chars</code> into a format specified by <code>encoding</code>. + * + * @param chars the character sequence + * @param encoding the encoding type + * @param isEDBCS if this encoding represents a double-byte character set + * @return encoded data + * @throws CharacterCodingException if encoding fails + */ + public static EncodedChars encodeSBCS(CharSequence chars, String encoding, boolean isEDBCS) + throws CharacterCodingException { + CharactersetEncoder encoder = newInstance(encoding, isEDBCS); + return encoder.encode(chars); + } + + /** + * The EBCDIC double byte encoder is used for encoding IBM format DBCS (double byte character + * sets) with an EBCDIC code-page. Given a double byte EBCDIC code page and a Unicode character + * sequence it will return its EBCDIC code-point, however, the "Shift In - Shift Out" operators + * are removed from the sequence of bytes. These are only used in Line Data. + */ + private final static class EbcdicDoubleByteEncoder extends CharactersetEncoder { + private EbcdicDoubleByteEncoder(String encoding) { + super(encoding); + } + @Override + EncodedChars getEncodedChars(byte[] byteArray, int length) { + if (byteArray[0] == 0x0E && byteArray[length - 1] == 0x0F) { + return new EncodedChars(byteArray, 1, length - 2); + } + return new EncodedChars(byteArray); + } + } + + /** + * The default encoder is used for encoding IBM format SBCS (single byte character sets), this + * the primary format for most Latin character sets. This can also be used for Unicode double- + * byte character sets (DBCS). + */ + private final static class DefaultEncoder extends CharactersetEncoder { + private DefaultEncoder(String encoding) { + super(encoding); + } + + @Override + EncodedChars getEncodedChars(byte[] byteArray, int length) { + return new EncodedChars(byteArray); + } + } + + /** + * Returns an new instance of a {@link CharactersetEncoder}. + * + * @param encoding the encoding for the underlying character encoder + * @param isEbcdicDBCS whether or not this wraps a double-byte EBCDIC code page. + * @return the CharactersetEncoder + */ + static CharactersetEncoder newInstance(String encoding, boolean isEbcdicDBCS) { + if (isEbcdicDBCS) { + return new EbcdicDoubleByteEncoder(encoding); + } else { + return new DefaultEncoder(encoding); + } + } + + /** + * A container for encoded character bytes + */ + public static class EncodedChars { + + final private byte[] bytes; + + final private int offset; + + final private int length; + + private EncodedChars(byte[] bytes, int offset, int length) { + if (offset < 0) throw new IllegalArgumentException(); + + if (length < 0) throw new IllegalArgumentException(); + + if (offset + length > bytes.length) throw new IllegalArgumentException(); + + this.bytes = bytes; + + this.offset = offset; + + this.length = length; + } + + private EncodedChars(byte[] bytes) { + this(bytes, 0, bytes.length); + } + + /** + * write <code>length</code> bytes from <code>offset</code> to the output stream + * + * @param out output to write the bytes to + * @throws IOException if an I/O error occurs + */ + public void writeTo(OutputStream out, int offset, int length) throws IOException { + if (offset < 0) throw new IllegalArgumentException(); + + if (length < 0) throw new IllegalArgumentException(); + + if (offset + length > this.length) throw new IllegalArgumentException(); + + out.write(bytes, this.offset + offset, length); + } + + /** + * The number of containing bytes. + * + * @return + */ + public int getLength() { + return length; + } + } +} diff --git a/src/java/org/apache/fop/afp/fonts/FopCharacterSet.java b/src/java/org/apache/fop/afp/fonts/FopCharacterSet.java index 716ca538f..b1efdc511 100644 --- a/src/java/org/apache/fop/afp/fonts/FopCharacterSet.java +++ b/src/java/org/apache/fop/afp/fonts/FopCharacterSet.java @@ -44,7 +44,7 @@ public class FopCharacterSet extends CharacterSet { String name, Typeface charSet) { - super(codePage, encoding, name, (ResourceAccessor)null); + super(codePage, encoding, false, name, (ResourceAccessor) null); this.charSet = charSet; } |