From: Peter Hancock Date: Mon, 5 Sep 2011 09:42:00 +0000 (+0000) Subject: Bugzill#51530: Improved support for EBCDIC encoded double byte fonts fo AFP. X-Git-Tag: fop-1_1rc1old~180 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=de056bce77c27b3a93acd62c2fad0c6628bff6b4;p=xmlgraphics-fop.git Bugzill#51530: Improved support for EBCDIC encoded double byte fonts fo AFP. Submitted by: Mehdi Houshmand git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop/trunk@1165223 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/documentation/content/xdocs/trunk/output.xml b/src/documentation/content/xdocs/trunk/output.xml index c71f6abf8..7b245eb07 100644 --- a/src/documentation/content/xdocs/trunk/output.xml +++ b/src/documentation/content/xdocs/trunk/output.xml @@ -743,6 +743,12 @@ Note that the value of the encoding attribute in the example is the double-byte referenced-fonts section of the configuration file. However, the default fonts shown above will not be embedded.

+

+ For double byte EBCDIC encoded character sets, there is an optional tag that must be set to prevent + characters from being miscoded. This defaults to "false" if not specified.

+ ]]> +
Output Resolution diff --git a/src/java/org/apache/fop/afp/fonts/CharacterSet.java b/src/java/org/apache/fop/afp/fonts/CharacterSet.java index 7123d4138..784588762 100644 --- a/src/java/org/apache/fop/afp/fonts/CharacterSet.java +++ b/src/java/org/apache/fop/afp/fonts/CharacterSet.java @@ -21,19 +21,13 @@ package org.apache.fop.afp.fonts; import java.io.File; import java.io.UnsupportedEncodingException; -import java.nio.ByteBuffer; -import java.nio.CharBuffer; import java.nio.charset.CharacterCodingException; -import java.nio.charset.Charset; -import java.nio.charset.CharsetEncoder; -import java.nio.charset.CodingErrorAction; -import java.nio.charset.UnsupportedCharsetException; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; - import org.apache.fop.afp.AFPConstants; +import org.apache.fop.afp.fonts.CharactersetEncoder.EncodedChars; import org.apache.fop.afp.util.ResourceAccessor; import org.apache.fop.afp.util.SimpleResourceAccessor; import org.apache.fop.afp.util.StringUtils; @@ -70,16 +64,16 @@ public class CharacterSet { /** The code page to which the character set relates */ - protected String codePage; + protected final String codePage; /** The encoding used for the code page */ - protected String encoding; + protected final String encoding; - /** The charset encoder corresponding to this encoding */ - private CharsetEncoder encoder; + /** The characterset encoder corresponding to this encoding */ + private final CharactersetEncoder encoder; /** The character set relating to the font */ - protected String name; + protected final String name; /** The path to the installed fonts */ private ResourceAccessor accessor; @@ -105,20 +99,22 @@ public class CharacterSet { * {@link #CharacterSet(String, String, String, ResourceAccessor)} instead. */ public CharacterSet(String codePage, String encoding, String name, String path) { - this(codePage, encoding, name, + this(codePage, encoding, false, name, new SimpleResourceAccessor(path != null ? new File(path) : null)); } /** - * Constructor for the CharacterSetMetric object, the character set is used - * to load the font information from the actual AFP font. + * Constructor for the CharacterSetMetric object, the character set is used to load the font + * information from the actual AFP font. * * @param codePage the code page identifier * @param encoding the encoding of the font + * @param isEBDCS if this is an EBCDIC double byte character set. * @param name the character set name * @param accessor the resource accessor to load resource with */ - CharacterSet(String codePage, String encoding, String name, ResourceAccessor accessor) { + CharacterSet(String codePage, String encoding, boolean isEBDCS, String name, + ResourceAccessor accessor) { if (name.length() > MAX_NAME_LEN) { String msg = "Character set name '" + name + "' must be a maximum of " + MAX_NAME_LEN + " characters"; @@ -133,14 +129,7 @@ public class CharacterSet { } this.codePage = codePage; this.encoding = encoding; - try { - this.encoder = Charset.forName(encoding).newEncoder(); - this.encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); - } catch (UnsupportedCharsetException uce) { - //No nio-capable encoder available - //This may happen with "Cp500" on Sun Java 1.4.2 - this.encoder = null; - } + this.encoder = CharactersetEncoder.newInstance(encoding, isEBDCS); this.accessor = accessor; this.characterSetOrientations = new java.util.HashMap(4); @@ -357,32 +346,8 @@ public class CharacterSet { * @return the encoded characters * @throws CharacterCodingException if the encoding operation fails */ - public byte[] encodeChars(CharSequence chars) throws CharacterCodingException { - if (encoder != null) { - ByteBuffer bb; - // encode method is not thread safe - synchronized (encoder) { - bb = encoder.encode(CharBuffer.wrap(chars)); - } - if (bb.hasArray()) { - return bb.array(); - } else { - bb.rewind(); - byte[] bytes = new byte[bb.remaining()]; - bb.get(bytes); - return bytes; - } - } else { - //Sun Java 1.4.2 compatibility - byte[] bytes; - try { - bytes = chars.toString().getBytes(this.encoding); - return bytes; - } catch (UnsupportedEncodingException uee) { - throw new UnsupportedOperationException( - "Unsupported encoding: " + uee.getMessage()); - } - } + public EncodedChars encodeChars(CharSequence chars) throws CharacterCodingException { + return encoder.encode(chars); } /** diff --git a/src/java/org/apache/fop/afp/fonts/CharacterSetBuilder.java b/src/java/org/apache/fop/afp/fonts/CharacterSetBuilder.java index d575e2ae1..16893a152 100644 --- a/src/java/org/apache/fop/afp/fonts/CharacterSetBuilder.java +++ b/src/java/org/apache/fop/afp/fonts/CharacterSetBuilder.java @@ -30,13 +30,11 @@ import java.util.WeakHashMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; - -import org.apache.xmlgraphics.image.loader.util.SoftMapCache; - import org.apache.fop.afp.AFPConstants; import org.apache.fop.afp.util.ResourceAccessor; import org.apache.fop.afp.util.StructuredFieldReader; import org.apache.fop.fonts.Typeface; +import org.apache.xmlgraphics.image.loader.util.SoftMapCache; /** * The CharacterSetBuilder is responsible building the a CharacterSet instance that holds @@ -181,9 +179,9 @@ public class CharacterSetBuilder { } /** - * Load the font details and metrics into the CharacterSetMetric object, - * this will use the actual afp code page and character set files to load - * the object with the necessary metrics. + * Load the font details and metrics into the CharacterSetMetric object, this will use the + * actual afp code page and character set files to load the object with the necessary metrics. + * * @param characterSetName name of the characterset * @param codePageName name of the code page file * @param encoding encoding name @@ -191,9 +189,47 @@ public class CharacterSetBuilder { * @return CharacterSet object * @throws IOException if an I/O error occurs */ - public CharacterSet build(String characterSetName, String codePageName, - String encoding, ResourceAccessor accessor) throws IOException { + public CharacterSet build(String characterSetName, String codePageName, String encoding, + ResourceAccessor accessor) throws IOException { + return processFont(characterSetName, codePageName, encoding, false, accessor); + } + /** + * Load the font details and metrics into the CharacterSetMetric object, this will use the + * actual afp code page and character set files to load the object with the necessary metrics. + * This method is to be used for double byte character sets (DBCS). + * + * @param characterSetName name of the characterset + * @param codePageName name of the code page file + * @param encoding encoding name + * @param isEDBCS if this is an EBCDIC double byte character set (DBCS) + * @param accessor used to load codepage and characterset + * @return CharacterSet object + * @throws IOException if an I/O error occurs + */ + public CharacterSet buildDBCS(String characterSetName, String codePageName, String encoding, + boolean isEDBCS, ResourceAccessor accessor) throws IOException { + return processFont(characterSetName, codePageName, encoding, isEDBCS, accessor); + } + + /** + * Load the font details and metrics into the CharacterSetMetric object, this will use the + * actual afp code page and character set files to load the object with the necessary metrics. + * + * @param characterSetName the CharacterSetMetric object to populate + * @param codePageName the name of the code page to use + * @param encoding name of the encoding in use + * @param typeface base14 font name + * @return CharacterSet object + * @throws IOException if an I/O error occurs + */ + public CharacterSet build(String characterSetName, String codePageName, String encoding, + Typeface typeface) throws IOException { + return new FopCharacterSet(codePageName, encoding, characterSetName, typeface); + } + + private CharacterSet processFont(String characterSetName, String codePageName, String encoding, + boolean isEDBCS, ResourceAccessor accessor) throws IOException { // check for cached version of the characterset String descriptor = characterSetName + "_" + encoding + "_" + codePageName; CharacterSet characterSet = (CharacterSet)characterSetsCache.get(descriptor); @@ -203,8 +239,8 @@ public class CharacterSetBuilder { } // characterset not in the cache, so recreating - characterSet = new CharacterSet( - codePageName, encoding, characterSetName, accessor); + characterSet = new CharacterSet(codePageName, encoding, isEDBCS, characterSetName, + accessor); InputStream inputStream = null; @@ -268,23 +304,6 @@ public class CharacterSetBuilder { } characterSetsCache.put(descriptor, characterSet); return characterSet; - - } - - /** - * Load the font details and metrics into the CharacterSetMetric object, - * this will use the actual afp code page and character set files to load - * the object with the necessary metrics. - * - * @param characterSetName the CharacterSetMetric object to populate - * @param codePageName the name of the code page to use - * @param encoding name of the encoding in use - * @param typeface base14 font name - * @return CharacterSet object - */ - public CharacterSet build(String characterSetName, String codePageName, - String encoding, Typeface typeface) { - return new FopCharacterSet(codePageName, encoding, characterSetName, typeface); } /** diff --git a/src/java/org/apache/fop/afp/fonts/CharactersetEncoder.java b/src/java/org/apache/fop/afp/fonts/CharactersetEncoder.java new file mode 100644 index 000000000..d82da80eb --- /dev/null +++ b/src/java/org/apache/fop/afp/fonts/CharactersetEncoder.java @@ -0,0 +1,199 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id$ */ + +package org.apache.fop.afp.fonts; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.ByteBuffer; +import java.nio.CharBuffer; +import java.nio.charset.CharacterCodingException; +import java.nio.charset.Charset; +import java.nio.charset.CharsetEncoder; +import java.nio.charset.CodingErrorAction; + +/** + * An abstraction that wraps the encoding mechanism for encoding a Unicode character sequence into a + * specified format. + */ +public abstract class CharactersetEncoder { + + private final CharsetEncoder encoder; + + private CharactersetEncoder(String encoding) { + this.encoder = Charset.forName(encoding).newEncoder(); + this.encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); + } + + /** + * Tells whether or not this encoder can encode the given character. + * + * @param c the character + * @return true if, and only if, this encoder can encode the given character + * @throws IllegalStateException - If an encoding operation is already in progress + */ + final boolean canEncode(char c) { + return encoder.canEncode(c); + } + + /** + * Encodes a character sequence to a byte array. + * + * @param chars the character sequence + * @return the encoded character sequence + * @throws CharacterCodingException if the encoding operation fails + */ + final EncodedChars encode(CharSequence chars) throws CharacterCodingException { + ByteBuffer bb; + // encode method is not thread safe + synchronized (encoder) { + bb = encoder.encode(CharBuffer.wrap(chars)); + } + if (bb.hasArray()) { + return getEncodedChars(bb.array(), bb.limit()); + } else { + bb.rewind(); + byte[] bytes = new byte[bb.remaining()]; + bb.get(bytes); + return getEncodedChars(bytes, bytes.length); + } + } + + abstract EncodedChars getEncodedChars(byte[] byteArray, int length); + + /** + * Encodes chars into a format specified by encoding. + * + * @param chars the character sequence + * @param encoding the encoding type + * @param isEDBCS if this encoding represents a double-byte character set + * @return encoded data + * @throws CharacterCodingException if encoding fails + */ + public static EncodedChars encodeSBCS(CharSequence chars, String encoding, boolean isEDBCS) + throws CharacterCodingException { + CharactersetEncoder encoder = newInstance(encoding, isEDBCS); + return encoder.encode(chars); + } + + /** + * The EBCDIC double byte encoder is used for encoding IBM format DBCS (double byte character + * sets) with an EBCDIC code-page. Given a double byte EBCDIC code page and a Unicode character + * sequence it will return its EBCDIC code-point, however, the "Shift In - Shift Out" operators + * are removed from the sequence of bytes. These are only used in Line Data. + */ + private final static class EbcdicDoubleByteEncoder extends CharactersetEncoder { + private EbcdicDoubleByteEncoder(String encoding) { + super(encoding); + } + @Override + EncodedChars getEncodedChars(byte[] byteArray, int length) { + if (byteArray[0] == 0x0E && byteArray[length - 1] == 0x0F) { + return new EncodedChars(byteArray, 1, length - 2); + } + return new EncodedChars(byteArray); + } + } + + /** + * The default encoder is used for encoding IBM format SBCS (single byte character sets), this + * the primary format for most Latin character sets. This can also be used for Unicode double- + * byte character sets (DBCS). + */ + private final static class DefaultEncoder extends CharactersetEncoder { + private DefaultEncoder(String encoding) { + super(encoding); + } + + @Override + EncodedChars getEncodedChars(byte[] byteArray, int length) { + return new EncodedChars(byteArray); + } + } + + /** + * Returns an new instance of a {@link CharactersetEncoder}. + * + * @param encoding the encoding for the underlying character encoder + * @param isEbcdicDBCS whether or not this wraps a double-byte EBCDIC code page. + * @return the CharactersetEncoder + */ + static CharactersetEncoder newInstance(String encoding, boolean isEbcdicDBCS) { + if (isEbcdicDBCS) { + return new EbcdicDoubleByteEncoder(encoding); + } else { + return new DefaultEncoder(encoding); + } + } + + /** + * A container for encoded character bytes + */ + public static class EncodedChars { + + final private byte[] bytes; + + final private int offset; + + final private int length; + + private EncodedChars(byte[] bytes, int offset, int length) { + if (offset < 0) throw new IllegalArgumentException(); + + if (length < 0) throw new IllegalArgumentException(); + + if (offset + length > bytes.length) throw new IllegalArgumentException(); + + this.bytes = bytes; + + this.offset = offset; + + this.length = length; + } + + private EncodedChars(byte[] bytes) { + this(bytes, 0, bytes.length); + } + + /** + * write length bytes from offset to the output stream + * + * @param out output to write the bytes to + * @throws IOException if an I/O error occurs + */ + public void writeTo(OutputStream out, int offset, int length) throws IOException { + if (offset < 0) throw new IllegalArgumentException(); + + if (length < 0) throw new IllegalArgumentException(); + + if (offset + length > this.length) throw new IllegalArgumentException(); + + out.write(bytes, this.offset + offset, length); + } + + /** + * The number of containing bytes. + * + * @return + */ + public int getLength() { + return length; + } + } +} diff --git a/src/java/org/apache/fop/afp/fonts/FopCharacterSet.java b/src/java/org/apache/fop/afp/fonts/FopCharacterSet.java index 716ca538f..b1efdc511 100644 --- a/src/java/org/apache/fop/afp/fonts/FopCharacterSet.java +++ b/src/java/org/apache/fop/afp/fonts/FopCharacterSet.java @@ -44,7 +44,7 @@ public class FopCharacterSet extends CharacterSet { String name, Typeface charSet) { - super(codePage, encoding, name, (ResourceAccessor)null); + super(codePage, encoding, false, name, (ResourceAccessor) null); this.charSet = charSet; } diff --git a/src/java/org/apache/fop/afp/ptoca/PtocaBuilder.java b/src/java/org/apache/fop/afp/ptoca/PtocaBuilder.java index 2962dc76c..d1049ca73 100644 --- a/src/java/org/apache/fop/afp/ptoca/PtocaBuilder.java +++ b/src/java/org/apache/fop/afp/ptoca/PtocaBuilder.java @@ -25,7 +25,7 @@ import java.io.IOException; import java.io.OutputStream; import org.apache.commons.io.output.ByteArrayOutputStream; - +import org.apache.fop.afp.fonts.CharactersetEncoder.EncodedChars; import org.apache.xmlgraphics.java2d.color.CIELabColorSpace; import org.apache.xmlgraphics.java2d.color.ColorUtil; import org.apache.xmlgraphics.java2d.color.ColorWithAlternatives; @@ -190,34 +190,25 @@ public abstract class PtocaBuilder implements PtocaConstants { * @param data The text data to add. * @throws IOException if an I/O error occurs */ - public void addTransparentData(byte[] data) throws IOException { - if (data.length <= TRANSPARENT_DATA_MAX_SIZE) { - addTransparentDataChunk(data); - } else { - // data size greater than TRANSPARENT_MAX_SIZE, so slice - int numTransData = data.length / TRANSPARENT_DATA_MAX_SIZE; - int currIndex = 0; - for (int transDataCnt = 0; transDataCnt < numTransData; transDataCnt++) { - addTransparentDataChunk(data, currIndex, TRANSPARENT_DATA_MAX_SIZE); - currIndex += TRANSPARENT_DATA_MAX_SIZE; - } - int left = data.length - currIndex; - addTransparentDataChunk(data, currIndex, left); + public void addTransparentData(EncodedChars encodedChars) throws IOException { + + // data size greater than TRANSPARENT_MAX_SIZE, so slice + int numTransData = encodedChars.getLength() / TRANSPARENT_DATA_MAX_SIZE; + int currIndex = 0; + for (int transDataCnt = 0; transDataCnt < numTransData; transDataCnt++) { + addTransparentDataChunk(encodedChars, currIndex, TRANSPARENT_DATA_MAX_SIZE); + currIndex += TRANSPARENT_DATA_MAX_SIZE; } - } + int left = encodedChars.getLength() - currIndex; + addTransparentDataChunk(encodedChars, currIndex, left); - private void addTransparentDataChunk(byte[] data) throws IOException { - addTransparentDataChunk(data, 0, data.length); } - private void addTransparentDataChunk(byte[] data, int offset, int length) throws IOException { - if (length > TRANSPARENT_MAX_SIZE) { - // Check that we are not exceeding the maximum length - throw new IllegalArgumentException( - "Transparent data is longer than " + TRANSPARENT_MAX_SIZE + " bytes"); - } + + + private void addTransparentDataChunk(EncodedChars encodedChars, int offset, int length) throws IOException { newControlSequence(); - write(data, offset, length); + encodedChars.writeTo(baout, offset, length); commit(chained(TRN)); } diff --git a/src/java/org/apache/fop/afp/ptoca/TextDataInfoProducer.java b/src/java/org/apache/fop/afp/ptoca/TextDataInfoProducer.java index 7ae3028e8..f7ed5a85c 100644 --- a/src/java/org/apache/fop/afp/ptoca/TextDataInfoProducer.java +++ b/src/java/org/apache/fop/afp/ptoca/TextDataInfoProducer.java @@ -22,6 +22,7 @@ package org.apache.fop.afp.ptoca; import java.io.IOException; import org.apache.fop.afp.AFPTextDataInfo; +import org.apache.fop.afp.fonts.CharactersetEncoder; /** * {@link PtocaProducer} implementation that interprets {@link AFPTextDataInfo} objects. @@ -55,8 +56,7 @@ public class TextDataInfoProducer implements PtocaProducer, PtocaConstants { // Add transparent data String textString = textDataInfo.getString(); String encoding = textDataInfo.getEncoding(); - byte[] data = textString.getBytes(encoding); - builder.addTransparentData(data); + builder.addTransparentData(CharactersetEncoder.encodeSBCS(textString, encoding, false)); } } diff --git a/src/java/org/apache/fop/render/afp/AFPRendererConfigurator.java b/src/java/org/apache/fop/render/afp/AFPRendererConfigurator.java index 8cc381c18..fc8d10508 100644 --- a/src/java/org/apache/fop/render/afp/AFPRendererConfigurator.java +++ b/src/java/org/apache/fop/render/afp/AFPRendererConfigurator.java @@ -27,7 +27,6 @@ import java.util.List; import org.apache.avalon.framework.configuration.Configuration; import org.apache.avalon.framework.configuration.ConfigurationException; - import org.apache.fop.afp.AFPResourceLevel; import org.apache.fop.afp.AFPResourceLevelDefaults; import org.apache.fop.afp.fonts.AFPFont; @@ -258,9 +257,11 @@ public class AFPRendererConfigurator extends PrintRendererConfigurator } String name = afpFontCfg.getAttribute("name", characterset); CharacterSet characterSet = null; + boolean ebcdicDBCS = afpFontCfg.getAttributeAsBoolean("ebcdic-dbcs", false); + try { - characterSet = CharacterSetBuilder.getDoubleByteInstance() - .build(characterset, codepage, encoding, accessor); + characterSet = CharacterSetBuilder.getDoubleByteInstance().buildDBCS(characterset, + codepage, encoding, ebcdicDBCS, accessor); } catch (IOException ioe) { toConfigurationException(codepage, characterset, ioe); } diff --git a/status.xml b/status.xml index a80dcd2b2..b843ec401 100644 --- a/status.xml +++ b/status.xml @@ -60,8 +60,12 @@ documents. Example: the fix of marks layering will be such a case when it's done. --> + + Improved support for EBCDIC encoded double byte fonts fo AFP. + - Corrected typographical errors in AFPBase12FontCollection. + Corrected typographical errors in AFPBase12FontCollection. + Improved fix of a bug relating to PCL painter thread safetly. Previous fix in rev 895012 worked by synchronizing methods of a static instance of Java2DFontMetrics. This fix uses a diff --git a/test/java/org/apache/fop/StandardTestSuite.java b/test/java/org/apache/fop/StandardTestSuite.java index a2e6d7524..a49cc7f2e 100644 --- a/test/java/org/apache/fop/StandardTestSuite.java +++ b/test/java/org/apache/fop/StandardTestSuite.java @@ -22,8 +22,9 @@ package org.apache.fop; import junit.framework.Test; import junit.framework.TestSuite; -import org.apache.fop.area.ViewportTestSuite; +import org.apache.fop.afp.fonts.CharactersetEncoderTest; import org.apache.fop.afp.parser.MODCAParserTestCase; +import org.apache.fop.area.ViewportTestSuite; import org.apache.fop.fonts.DejaVuLGCSerifTest; import org.apache.fop.fonts.truetype.GlyfTableTestCase; import org.apache.fop.image.loader.batik.ImageLoaderTestCase; @@ -60,6 +61,7 @@ public class StandardTestSuite { suite.addTest(new TestSuite(PDFsRGBSettingsTestCase.class)); suite.addTest(new TestSuite(DejaVuLGCSerifTest.class)); suite.addTest(new TestSuite(MODCAParserTestCase.class)); + suite.addTest(new TestSuite(CharactersetEncoderTest.class)); suite.addTest(org.apache.fop.render.afp.AFPTestSuite.suite()); suite.addTest(PSTestSuite.suite()); suite.addTest(new TestSuite(GlyfTableTestCase.class)); diff --git a/test/java/org/apache/fop/afp/fonts/CharactersetEncoderTest.java b/test/java/org/apache/fop/afp/fonts/CharactersetEncoderTest.java new file mode 100644 index 000000000..5999a04d6 --- /dev/null +++ b/test/java/org/apache/fop/afp/fonts/CharactersetEncoderTest.java @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +/* $Id$ */ + +package org.apache.fop.afp.fonts; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.nio.charset.CharacterCodingException; + +import junit.framework.TestCase; + +/** + * Test {@link CharactersetEncoder} + */ +public class CharactersetEncoderTest extends TestCase { + private CharactersetEncoder singlebyteEncoder; + private CharactersetEncoder doublebyteEncoder; + + public void setUp() { + singlebyteEncoder = CharactersetEncoder.newInstance("cp500", false); + doublebyteEncoder = CharactersetEncoder.newInstance("cp937", true); + } + + // This is just an arbitrary CJK string + private final String testCJKText = "\u8ACB\u65BC\u627F\u505A\u65E5\u4E03\u65E5\u5167\u672A\u9054" + + "\u4E03\u65E5\u4E4B\u5B9A\u5B58\u8005\u4EE5\u5BE6\u969B\u5230\u671F\u65E5\u5167\u78BA" + + "\u8A8D\u672C\u4EA4\u6613\u5167\u5BB9\u3002\u5982\u672A\u65BC\u4E0A\u8FF0\u671F\u9593" + + "\u5167\u63D0\u51FA\u7570\u8B70\uFF0C\u8996\u540C\u610F\u627F\u8A8D\u672C\u4EA4\u6613" + + "\u3002"; + + private final byte[] test6CJKChars = { + (byte) 0x61, (byte) 0x99, + (byte) 0x50, (byte) 0xf4, + (byte) 0x50, (byte) 0xd4, + (byte) 0x56, (byte) 0x99, + (byte) 0x4c, (byte) 0xc9, + (byte) 0x4c, (byte) 0x44 }; + + private final String testEngText = "Hello World!"; + private final byte[] testEngChars = { + (byte) 0xc8, // H + (byte) 0x85, // e + (byte) 0x93, // l + (byte) 0x93, // l + (byte) 0x96, // o + (byte) 0x40, // " " + (byte) 0xe6, // W + (byte) 0x96, // o + (byte) 0x99, // r + (byte) 0x93, // l + (byte) 0x84, // d + (byte) 0x4f // ! + }; + + /** + * Tests canEncode() - tests that canEncode() responds properly to various input characters. + */ + public void testCanEncode() { + // Both SBCS and DBCS should support Latin characters + for (char c = '!'; c < '~'; c++) { + assertTrue(singlebyteEncoder.canEncode(c)); + assertTrue(doublebyteEncoder.canEncode(c)); + } + // ONLY the double byte characters can handle CJK text + for (char c : testCJKText.toCharArray()) { + assertFalse(singlebyteEncoder.canEncode(c)); + assertTrue(doublebyteEncoder.canEncode(c)); + } + // Ensure that double byte encoder doesn't just return true all the time... + assertFalse(doublebyteEncoder.canEncode('\u00BB')); + } + + public void testEncode() throws CharacterCodingException, IOException { + CharactersetEncoder.EncodedChars encChars;// = doublebyteEncoder.encode(testCJKText); + ByteArrayOutputStream bOut = new ByteArrayOutputStream(); + // JAVA 1.5 has a bug in the JVM in which these err for some reason... JAVA 1.6 no issues + /*encChars.writeTo(bOut, 0, encChars.getLength()); + byte[] bytes = bOut.toByteArray(); + for (int i = 0; i < 12; i++) { + assertEquals(test6CJKChars[i], bytes[i]); + } + bOut.reset();*/ + + encChars = singlebyteEncoder.encode(testEngText); + encChars.writeTo(bOut, 0, encChars.getLength()); + byte[] engBytes = bOut.toByteArray(); + for (int i = 0; i < testEngChars.length; i++) { + assertEquals(testEngChars[i], engBytes[i]); + } + assertEquals(testEngChars.length, engBytes.length); + } +}