From 1e8a2631f0bd5c9e9f1cf889281330249ecd5b00 Mon Sep 17 00:00:00 2001 From: Mehdi Houshmand Date: Tue, 15 May 2012 09:07:02 +0000 Subject: [PATCH] Improved handling of AFP double-byte character sets git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop/trunk@1338605 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/fop/afp/fonts/CharacterSet.java | 8 ++--- .../fop/afp/fonts/CharacterSetBuilder.java | 21 +++++++------ .../fop/afp/fonts/CharactersetEncoder.java | 30 +++++++++++-------- .../apache/fop/afp/fonts/FopCharacterSet.java | 4 +-- .../apache/fop/afp/ptoca/PtocaProducer.java | 4 +-- .../fop/afp/ptoca/TextDataInfoProducer.java | 2 +- .../render/afp/AFPRendererConfigurator.java | 13 ++++---- .../fonts/CharactersetEncoderTestCase.java | 15 +++++----- 8 files changed, 52 insertions(+), 45 deletions(-) diff --git a/src/java/org/apache/fop/afp/fonts/CharacterSet.java b/src/java/org/apache/fop/afp/fonts/CharacterSet.java index 341abde0b..fad5e95e6 100644 --- a/src/java/org/apache/fop/afp/fonts/CharacterSet.java +++ b/src/java/org/apache/fop/afp/fonts/CharacterSet.java @@ -94,12 +94,12 @@ public class CharacterSet { * * @param codePage the code page identifier * @param encoding the encoding of the font - * @param isEBDCS if this is an EBCDIC double byte character set. + * @param charsetType the type of the characterset * @param name the character set name * @param accessor the resource accessor to load resource with * @param eventProducer for handling AFP related events */ - CharacterSet(String codePage, String encoding, boolean isEBDCS, String name, + CharacterSet(String codePage, String encoding, CharacterSetType charsetType, String name, ResourceAccessor accessor, AFPEventProducer eventProducer) { if (name.length() > MAX_NAME_LEN) { String msg = "Character set name '" + name + "' must be a maximum of " @@ -115,7 +115,7 @@ public class CharacterSet { } this.codePage = codePage; this.encoding = encoding; - this.encoder = CharactersetEncoder.newInstance(encoding, isEBDCS); + this.encoder = CharactersetEncoder.newInstance(encoding, charsetType); this.accessor = accessor; this.characterSetOrientations = new HashMap(4); @@ -306,7 +306,7 @@ public class CharacterSet { */ private CharacterSetOrientation getCharacterSetOrientation() { CharacterSetOrientation c - = (CharacterSetOrientation) characterSetOrientations.get(currentOrientation); + = characterSetOrientations.get(currentOrientation); return c; } diff --git a/src/java/org/apache/fop/afp/fonts/CharacterSetBuilder.java b/src/java/org/apache/fop/afp/fonts/CharacterSetBuilder.java index 4988bb949..7da2d71ca 100644 --- a/src/java/org/apache/fop/afp/fonts/CharacterSetBuilder.java +++ b/src/java/org/apache/fop/afp/fonts/CharacterSetBuilder.java @@ -190,10 +190,10 @@ public abstract class CharacterSetBuilder { * @return CharacterSet object * @throws IOException if an I/O error occurs */ - public CharacterSet build(String characterSetName, String codePageName, String encoding, + public CharacterSet buildSBCS(String characterSetName, String codePageName, String encoding, ResourceAccessor accessor, AFPEventProducer eventProducer) throws IOException { - return processFont(characterSetName, codePageName, encoding, false, accessor, - eventProducer); + return processFont(characterSetName, codePageName, encoding, CharacterSetType.SINGLE_BYTE, + accessor, eventProducer); } /** @@ -204,16 +204,16 @@ public abstract class CharacterSetBuilder { * @param characterSetName name of the characterset * @param codePageName name of the code page file * @param encoding encoding name - * @param isEDBCS if this is an EBCDIC double byte character set (DBCS) + * @param charsetType the characterset type * @param accessor used to load codepage and characterset * @param eventProducer for handling AFP related events * @return CharacterSet object * @throws IOException if an I/O error occurs */ public CharacterSet buildDBCS(String characterSetName, String codePageName, String encoding, - boolean isEDBCS, ResourceAccessor accessor, AFPEventProducer eventProducer) + CharacterSetType charsetType, ResourceAccessor accessor, AFPEventProducer eventProducer) throws IOException { - return processFont(characterSetName, codePageName, encoding, isEDBCS, accessor, + return processFont(characterSetName, codePageName, encoding, charsetType, accessor, eventProducer); } @@ -236,7 +236,7 @@ public abstract class CharacterSetBuilder { } private CharacterSet processFont(String characterSetName, String codePageName, String encoding, - boolean isEDBCS, ResourceAccessor accessor, AFPEventProducer eventProducer) + CharacterSetType charsetType, ResourceAccessor accessor, AFPEventProducer eventProducer) throws IOException { // check for cached version of the characterset String descriptor = characterSetName + "_" + encoding + "_" + codePageName; @@ -247,7 +247,7 @@ public abstract class CharacterSetBuilder { } // characterset not in the cache, so recreating - characterSet = new CharacterSet(codePageName, encoding, isEDBCS, characterSetName, + characterSet = new CharacterSet(codePageName, encoding, charsetType, characterSetName, accessor, eventProducer); InputStream inputStream = null; @@ -465,8 +465,7 @@ public abstract class CharacterSetBuilder { } } - return (CharacterSetOrientation[]) orientations - .toArray(EMPTY_CSO_ARRAY); + return orientations.toArray(EMPTY_CSO_ARRAY); } /** @@ -570,7 +569,7 @@ public abstract class CharacterSetBuilder { String gcgiString = new String(gcgid, AFPConstants.EBCIDIC_ENCODING); - String idx = (String) codepage.get(gcgiString); + String idx = codepage.get(gcgiString); if (idx != null) { diff --git a/src/java/org/apache/fop/afp/fonts/CharactersetEncoder.java b/src/java/org/apache/fop/afp/fonts/CharactersetEncoder.java index 6d85c0f52..f101bdab4 100644 --- a/src/java/org/apache/fop/afp/fonts/CharactersetEncoder.java +++ b/src/java/org/apache/fop/afp/fonts/CharactersetEncoder.java @@ -82,13 +82,12 @@ public abstract class CharactersetEncoder { * * @param chars the character sequence * @param encoding the encoding type - * @param isEDBCS if this encoding represents a double-byte character set * @return encoded data * @throws CharacterCodingException if encoding fails */ - public static EncodedChars encodeSBCS(CharSequence chars, String encoding, boolean isEDBCS) + public static EncodedChars encodeSBCS(CharSequence chars, String encoding) throws CharacterCodingException { - CharactersetEncoder encoder = newInstance(encoding, isEDBCS); + CharactersetEncoder encoder = newInstance(encoding, CharacterSetType.SINGLE_BYTE); return encoder.encode(chars); } @@ -98,8 +97,8 @@ public abstract class CharactersetEncoder { * sequence it will return its EBCDIC code-point, however, the "Shift In - Shift Out" operators * are removed from the sequence of bytes. These are only used in Line Data. */ - private static final class EbcdicDoubleByteEncoder extends CharactersetEncoder { - private EbcdicDoubleByteEncoder(String encoding) { + private static final class EbcdicDoubleByteLineDataEncoder extends CharactersetEncoder { + private EbcdicDoubleByteLineDataEncoder(String encoding) { super(encoding); } @Override @@ -117,13 +116,16 @@ public abstract class CharactersetEncoder { * byte character sets (DBCS). */ private static final class DefaultEncoder extends CharactersetEncoder { - private DefaultEncoder(String encoding) { + private final boolean isDBCS; + + private DefaultEncoder(String encoding, boolean isDBCS) { super(encoding); + this.isDBCS = isDBCS; } @Override EncodedChars getEncodedChars(byte[] byteArray, int length) { - return new EncodedChars(byteArray, false); + return new EncodedChars(byteArray, isDBCS); } } @@ -134,17 +136,21 @@ public abstract class CharactersetEncoder { * @param isEbcdicDBCS whether or not this wraps a double-byte EBCDIC code page. * @return the CharactersetEncoder */ - static CharactersetEncoder newInstance(String encoding, boolean isEbcdicDBCS) { - if (isEbcdicDBCS) { - return new EbcdicDoubleByteEncoder(encoding); - } else { - return new DefaultEncoder(encoding); + static CharactersetEncoder newInstance(String encoding, CharacterSetType charsetType) { + switch (charsetType) { + case DOUBLE_BYTE_LINE_DATA: + return new EbcdicDoubleByteLineDataEncoder(encoding); + case DOUBLE_BYTE: + return new DefaultEncoder(encoding, true); + default: + return new DefaultEncoder(encoding, false); } } /** * A container for encoded character bytes */ + // CSOFF: FinalClass - disabling "final" modifier so that this class can be mocked public static class EncodedChars { private final byte[] bytes; diff --git a/src/java/org/apache/fop/afp/fonts/FopCharacterSet.java b/src/java/org/apache/fop/afp/fonts/FopCharacterSet.java index f949976ba..f83c38621 100644 --- a/src/java/org/apache/fop/afp/fonts/FopCharacterSet.java +++ b/src/java/org/apache/fop/afp/fonts/FopCharacterSet.java @@ -42,7 +42,8 @@ public class FopCharacterSet extends CharacterSet { */ public FopCharacterSet(String codePage, String encoding, String name, Typeface charSet, AFPEventProducer eventProducer) { - super(codePage, encoding, false, name, (ResourceAccessor) null, eventProducer); + super(codePage, encoding, CharacterSetType.SINGLE_BYTE, name, (ResourceAccessor) null, + eventProducer); this.charSet = charSet; } @@ -132,5 +133,4 @@ public class FopCharacterSet extends CharacterSet { public char mapChar(char c) { return charSet.mapChar(c); } - } diff --git a/src/java/org/apache/fop/afp/ptoca/PtocaProducer.java b/src/java/org/apache/fop/afp/ptoca/PtocaProducer.java index 9b6d97dec..5f29bef96 100644 --- a/src/java/org/apache/fop/afp/ptoca/PtocaProducer.java +++ b/src/java/org/apache/fop/afp/ptoca/PtocaProducer.java @@ -22,8 +22,8 @@ package org.apache.fop.afp.ptoca; import java.io.IOException; /** - * Producer interface that is passed to a {@link PresentationTextObject} to produce PTOCA control - * sequences using a {@link PtocaBuilder}. + * Producer interface that is passed to a {@link org.apache.fop.afp.modca.PresentationTextObject} + * to produce PTOCA control sequences using a {@link PtocaBuilder}. */ public interface PtocaProducer { diff --git a/src/java/org/apache/fop/afp/ptoca/TextDataInfoProducer.java b/src/java/org/apache/fop/afp/ptoca/TextDataInfoProducer.java index f7ed5a85c..4af21b12b 100644 --- a/src/java/org/apache/fop/afp/ptoca/TextDataInfoProducer.java +++ b/src/java/org/apache/fop/afp/ptoca/TextDataInfoProducer.java @@ -56,7 +56,7 @@ public class TextDataInfoProducer implements PtocaProducer, PtocaConstants { // Add transparent data String textString = textDataInfo.getString(); String encoding = textDataInfo.getEncoding(); - builder.addTransparentData(CharactersetEncoder.encodeSBCS(textString, encoding, false)); + builder.addTransparentData(CharactersetEncoder.encodeSBCS(textString, encoding)); } } diff --git a/src/java/org/apache/fop/render/afp/AFPRendererConfigurator.java b/src/java/org/apache/fop/render/afp/AFPRendererConfigurator.java index bf7fbde4a..36cdbd077 100644 --- a/src/java/org/apache/fop/render/afp/AFPRendererConfigurator.java +++ b/src/java/org/apache/fop/render/afp/AFPRendererConfigurator.java @@ -37,6 +37,7 @@ import org.apache.fop.afp.fonts.AFPFontCollection; import org.apache.fop.afp.fonts.AFPFontInfo; import org.apache.fop.afp.fonts.CharacterSet; import org.apache.fop.afp.fonts.CharacterSetBuilder; +import org.apache.fop.afp.fonts.CharacterSetType; import org.apache.fop.afp.fonts.DoubleByteFont; import org.apache.fop.afp.fonts.OutlineFont; import org.apache.fop.afp.fonts.RasterFont; @@ -220,7 +221,7 @@ public class AFPRendererConfigurator extends PrintRendererConfigurator } } else { font.addCharacterSet(sizeMpt, CharacterSetBuilder.getSingleByteInstance() - .build(characterset, codepage, encoding, accessor, eventProducer)); + .buildSBCS(characterset, codepage, encoding, accessor, eventProducer)); } } return font; @@ -254,7 +255,7 @@ public class AFPRendererConfigurator extends PrintRendererConfigurator log.error(msg); } } else { - characterSet = CharacterSetBuilder.getSingleByteInstance().build( + characterSet = CharacterSetBuilder.getSingleByteInstance().buildSBCS( characterset, codepage, encoding, accessor, eventProducer); } // Return new font object @@ -269,10 +270,10 @@ public class AFPRendererConfigurator extends PrintRendererConfigurator } String name = afpFontCfg.getAttribute("name", characterset); CharacterSet characterSet = null; - boolean ebcdicDBCS = afpFontCfg.getAttributeAsBoolean("ebcdic-dbcs", false); - + CharacterSetType charsetType = afpFontCfg.getAttributeAsBoolean("ebcdic-dbcs", false) + ? CharacterSetType.DOUBLE_BYTE_LINE_DATA : CharacterSetType.DOUBLE_BYTE; characterSet = CharacterSetBuilder.getDoubleByteInstance().buildDBCS(characterset, - codepage, encoding, ebcdicDBCS, accessor, eventProducer); + codepage, encoding, charsetType, accessor, eventProducer); // Create a new font object DoubleByteFont font = new DoubleByteFont(name, characterSet); @@ -322,7 +323,7 @@ public class AFPRendererConfigurator extends PrintRendererConfigurator } List fontTriplets = afi.getFontTriplets(); for (int j = 0; j < fontTriplets.size(); ++j) { - FontTriplet triplet = (FontTriplet) fontTriplets.get(j); + FontTriplet triplet = fontTriplets.get(j); if (log.isDebugEnabled()) { log.debug(" Font triplet " + triplet.getName() + ", " diff --git a/test/java/org/apache/fop/afp/fonts/CharactersetEncoderTestCase.java b/test/java/org/apache/fop/afp/fonts/CharactersetEncoderTestCase.java index dd776e41c..6d0daa2be 100644 --- a/test/java/org/apache/fop/afp/fonts/CharactersetEncoderTestCase.java +++ b/test/java/org/apache/fop/afp/fonts/CharactersetEncoderTestCase.java @@ -19,10 +19,6 @@ package org.apache.fop.afp.fonts; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; - import java.io.ByteArrayOutputStream; import java.io.IOException; import java.nio.charset.CharacterCodingException; @@ -30,6 +26,10 @@ import java.nio.charset.CharacterCodingException; import org.junit.Before; import org.junit.Test; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + /** * Test {@link CharactersetEncoder} */ @@ -39,8 +39,9 @@ public class CharactersetEncoderTestCase { @Before public void setUp() { - singlebyteEncoder = CharactersetEncoder.newInstance("cp500", false); - doublebyteEncoder = CharactersetEncoder.newInstance("cp937", true); + singlebyteEncoder = CharactersetEncoder.newInstance("cp500", CharacterSetType.SINGLE_BYTE); + doublebyteEncoder = CharactersetEncoder.newInstance("cp937", + CharacterSetType.DOUBLE_BYTE_LINE_DATA); } // This is just an arbitrary CJK string @@ -95,7 +96,7 @@ public class CharactersetEncoderTestCase { @Test public void testEncode() throws CharacterCodingException, IOException { - CharactersetEncoder.EncodedChars encChars;// = doublebyteEncoder.encode(testCJKText); + CharactersetEncoder.EncodedChars encChars; // = doublebyteEncoder.encode(testCJKText); ByteArrayOutputStream bOut = new ByteArrayOutputStream(); // JAVA 1.5 has a bug in the JVM in which these err for some reason... JAVA 1.6 no issues /*encChars.writeTo(bOut, 0, encChars.getLength()); -- 2.39.5