Bugzill#51530: Improved support for EBCDIC encoded double byte fonts fo AFP.

author Peter Hancock <phancock@apache.org>

Mon, 5 Sep 2011 09:42:00 +0000 (09:42 +0000)

committer Peter Hancock <phancock@apache.org>

Mon, 5 Sep 2011 09:42:00 +0000 (09:42 +0000)
author Peter Hancock <phancock@apache.org>
Mon, 5 Sep 2011 09:42:00 +0000 (09:42 +0000)
committer Peter Hancock <phancock@apache.org>
Mon, 5 Sep 2011 09:42:00 +0000 (09:42 +0000)
diff --git a/src/documentation/content/xdocs/trunk/output.xml b/src/documentation/content/xdocs/trunk/output.xml

index c71f6abf894de73c042fc15765a859ce0b162ae1..7b245eb07bfec75d393f5825d1ada5b59afe51c6 100644 (file)
--- a/src/documentation/content/xdocs/trunk/output.xml
+++ b/src/documentation/content/xdocs/trunk/output.xml
@@ -743,6 +743,12 @@ Note that the value of the encoding attribute in the example is the double-byte
            <a href="fonts.html#embedding"><code>referenced-fonts</code> section of the configuration file</a>.
            However, the default fonts shown above will not be embedded.
          </p>
+        <p>
+          For double byte EBCDIC encoded character sets, there is an optional tag that must be set to prevent
+          characters from being miscoded. This defaults to "false" if not specified.</p>
+          <source><![CDATA[
+        <afp-font type="CIDKeyed" codepage="T10835  " encoding="Cp937" characterset="CZTKAI" ebcdic-dbcs="true"/>]]>
+          </source>
        </section>
        <section id="afp-renderer-resolution-config">
          <title>Output Resolution</title>
diff --git a/src/java/org/apache/fop/afp/fonts/CharacterSet.java b/src/java/org/apache/fop/afp/fonts/CharacterSet.java

index 7123d4138dbe4a3958a39b8f452721d50f3d0c9f..78458876205523ed7ff1445498145ec69a3e85f2 100644 (file)
--- a/src/java/org/apache/fop/afp/fonts/CharacterSet.java
+++ b/src/java/org/apache/fop/afp/fonts/CharacterSet.java
@@ -21,19 +21,13 @@ package org.apache.fop.afp.fonts;
  
  import java.io.File;
  import java.io.UnsupportedEncodingException;
-import java.nio.ByteBuffer;
-import java.nio.CharBuffer;
  import java.nio.charset.CharacterCodingException;
-import java.nio.charset.Charset;
-import java.nio.charset.CharsetEncoder;
-import java.nio.charset.CodingErrorAction;
-import java.nio.charset.UnsupportedCharsetException;
  import java.util.Map;
  
  import org.apache.commons.logging.Log;
  import org.apache.commons.logging.LogFactory;
-
  import org.apache.fop.afp.AFPConstants;
+import org.apache.fop.afp.fonts.CharactersetEncoder.EncodedChars;
  import org.apache.fop.afp.util.ResourceAccessor;
  import org.apache.fop.afp.util.SimpleResourceAccessor;
  import org.apache.fop.afp.util.StringUtils;
@@ -70,16 +64,16 @@ public class CharacterSet {
  
  
      /** The code page to which the character set relates */
-    protected String codePage;
+    protected final String codePage;
  
      /** The encoding used for the code page */
-    protected String encoding;
+    protected final String encoding;
  
-    /** The charset encoder corresponding to this encoding */
-    private CharsetEncoder encoder;
+    /** The characterset encoder corresponding to this encoding */
+    private final CharactersetEncoder encoder;
  
      /** The character set relating to the font */
-    protected String name;
+    protected final String name;
  
      /** The path to the installed fonts */
      private ResourceAccessor accessor;
@@ -105,20 +99,22 @@ public class CharacterSet {
       * {@link #CharacterSet(String, String, String, ResourceAccessor)} instead.
       */
      public CharacterSet(String codePage, String encoding, String name, String path) {
-        this(codePage, encoding, name,
+        this(codePage, encoding, false, name,
                  new SimpleResourceAccessor(path != null ? new File(path) : null));
      }
  
      /**
-     * Constructor for the CharacterSetMetric object, the character set is used
-     * to load the font information from the actual AFP font.
+     * Constructor for the CharacterSetMetric object, the character set is used to load the font
+     * information from the actual AFP font.
       *
       * @param codePage the code page identifier
       * @param encoding the encoding of the font
+     * @param isEBDCS if this is an EBCDIC double byte character set.
       * @param name the character set name
       * @param accessor the resource accessor to load resource with
       */
-     CharacterSet(String codePage, String encoding, String name, ResourceAccessor accessor) {
+    CharacterSet(String codePage, String encoding, boolean isEBDCS, String name,
+            ResourceAccessor accessor) {
          if (name.length() > MAX_NAME_LEN) {
              String msg = "Character set name '" + name + "' must be a maximum of "
                  + MAX_NAME_LEN + " characters";
@@ -133,14 +129,7 @@ public class CharacterSet {
          }
          this.codePage = codePage;
          this.encoding = encoding;
-        try {
-            this.encoder = Charset.forName(encoding).newEncoder();
-            this.encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
-        } catch (UnsupportedCharsetException uce) {
-            //No nio-capable encoder available
-            //This may happen with "Cp500" on Sun Java 1.4.2
-            this.encoder = null;
-        }
+        this.encoder = CharactersetEncoder.newInstance(encoding, isEBDCS);
          this.accessor = accessor;
  
          this.characterSetOrientations = new java.util.HashMap(4);
@@ -357,32 +346,8 @@ public class CharacterSet {
       * @return the encoded characters
       * @throws CharacterCodingException if the encoding operation fails
       */
-    public byte[] encodeChars(CharSequence chars) throws CharacterCodingException {
-        if (encoder != null) {
-            ByteBuffer bb;
-            // encode method is not thread safe
-            synchronized (encoder) {
-                bb = encoder.encode(CharBuffer.wrap(chars));
-            }
-            if (bb.hasArray()) {
-                return bb.array();
-            } else {
-                bb.rewind();
-                byte[] bytes = new byte[bb.remaining()];
-                bb.get(bytes);
-                return bytes;
-            }
-        } else {
-            //Sun Java 1.4.2 compatibility
-            byte[] bytes;
-            try {
-                bytes = chars.toString().getBytes(this.encoding);
-                return bytes;
-            } catch (UnsupportedEncodingException uee) {
-                throw new UnsupportedOperationException(
-                        "Unsupported encoding: " + uee.getMessage());
-            }
-        }
+    public EncodedChars encodeChars(CharSequence chars) throws CharacterCodingException {
+        return encoder.encode(chars);
      }
  
      /**
diff --git a/src/java/org/apache/fop/afp/fonts/CharacterSetBuilder.java b/src/java/org/apache/fop/afp/fonts/CharacterSetBuilder.java

index d575e2ae1aa364246fa800557114f4e92b670170..16893a152050ec24aa77a66d455a9a9ddec3b8f2 100644 (file)
--- a/src/java/org/apache/fop/afp/fonts/CharacterSetBuilder.java
+++ b/src/java/org/apache/fop/afp/fonts/CharacterSetBuilder.java
@@ -30,13 +30,11 @@ import java.util.WeakHashMap;
  
  import org.apache.commons.logging.Log;
  import org.apache.commons.logging.LogFactory;
-
-import org.apache.xmlgraphics.image.loader.util.SoftMapCache;
-
  import org.apache.fop.afp.AFPConstants;
  import org.apache.fop.afp.util.ResourceAccessor;
  import org.apache.fop.afp.util.StructuredFieldReader;
  import org.apache.fop.fonts.Typeface;
+import org.apache.xmlgraphics.image.loader.util.SoftMapCache;
  
  /**
   * The CharacterSetBuilder is responsible building the a CharacterSet instance that holds
@@ -181,9 +179,9 @@ public class CharacterSetBuilder {
      }
  
      /**
-     * Load the font details and metrics into the CharacterSetMetric object,
-     * this will use the actual afp code page and character set files to load
-     * the object with the necessary metrics.
+     * Load the font details and metrics into the CharacterSetMetric object, this will use the
+     * actual afp code page and character set files to load the object with the necessary metrics.
+     * 
       * @param characterSetName name of the characterset
       * @param codePageName name of the code page file
       * @param encoding encoding name
@@ -191,9 +189,47 @@ public class CharacterSetBuilder {
       * @return CharacterSet object
       * @throws IOException if an I/O error occurs
       */
-    public CharacterSet build(String characterSetName, String codePageName,
-            String encoding, ResourceAccessor accessor) throws IOException {
+    public CharacterSet build(String characterSetName, String codePageName, String encoding,
+            ResourceAccessor accessor) throws IOException {
+        return processFont(characterSetName, codePageName, encoding, false, accessor);
+    }
  
+    /**
+     * Load the font details and metrics into the CharacterSetMetric object, this will use the
+     * actual afp code page and character set files to load the object with the necessary metrics.
+     * This method is to be used for double byte character sets (DBCS).
+     *
+     * @param characterSetName name of the characterset
+     * @param codePageName name of the code page file
+     * @param encoding encoding name
+     * @param isEDBCS if this is an EBCDIC double byte character set (DBCS)
+     * @param accessor used to load codepage and characterset
+     * @return CharacterSet object
+     * @throws IOException if an I/O error occurs
+     */
+    public CharacterSet buildDBCS(String characterSetName, String codePageName, String encoding,
+            boolean isEDBCS, ResourceAccessor accessor) throws IOException {
+        return processFont(characterSetName, codePageName, encoding, isEDBCS, accessor);
+    }
+
+    /**
+     * Load the font details and metrics into the CharacterSetMetric object, this will use the
+     * actual afp code page and character set files to load the object with the necessary metrics.
+     * 
+     * @param characterSetName the CharacterSetMetric object to populate
+     * @param codePageName the name of the code page to use
+     * @param encoding name of the encoding in use
+     * @param typeface base14 font name
+     * @return CharacterSet object
+     * @throws IOException if an I/O error occurs
+     */
+    public CharacterSet build(String characterSetName, String codePageName, String encoding,
+            Typeface typeface) throws IOException {
+        return new FopCharacterSet(codePageName, encoding, characterSetName, typeface);
+    }
+
+    private CharacterSet processFont(String characterSetName, String codePageName, String encoding,
+            boolean isEDBCS, ResourceAccessor accessor) throws IOException {
          // check for cached version of the characterset
          String descriptor = characterSetName + "_" + encoding + "_" + codePageName;
          CharacterSet characterSet = (CharacterSet)characterSetsCache.get(descriptor);
@@ -203,8 +239,8 @@ public class CharacterSetBuilder {
          }
  
          // characterset not in the cache, so recreating
-        characterSet = new CharacterSet(
-                codePageName, encoding, characterSetName, accessor);
+        characterSet = new CharacterSet(codePageName, encoding, isEDBCS, characterSetName,
+                accessor);
  
          InputStream inputStream = null;
  
@@ -268,23 +304,6 @@ public class CharacterSetBuilder {
          }
          characterSetsCache.put(descriptor, characterSet);
          return characterSet;
-
-    }
-
-    /**
-     * Load the font details and metrics into the CharacterSetMetric object,
-     * this will use the actual afp code page and character set files to load
-     * the object with the necessary metrics.
-     *
-     * @param characterSetName the CharacterSetMetric object to populate
-     * @param codePageName the name of the code page to use
-     * @param encoding name of the encoding in use
-     * @param typeface base14 font name
-     * @return CharacterSet object
-     */
-    public CharacterSet build(String characterSetName, String codePageName,
-            String encoding, Typeface typeface) {
-       return new FopCharacterSet(codePageName, encoding, characterSetName, typeface);
      }
  
      /**
diff --git a/src/java/org/apache/fop/afp/fonts/CharactersetEncoder.java b/src/java/org/apache/fop/afp/fonts/CharactersetEncoder.java

new file mode 100644 (file)

index 0000000..d82da80
--- /dev/null
+++ b/src/java/org/apache/fop/afp/fonts/CharactersetEncoder.java
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.afp.fonts;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.nio.CharBuffer;
+import java.nio.charset.CharacterCodingException;
+import java.nio.charset.Charset;
+import java.nio.charset.CharsetEncoder;
+import java.nio.charset.CodingErrorAction;
+
+/**
+ * An abstraction that wraps the encoding mechanism for encoding a Unicode character sequence into a
+ * specified format.
+ */
+public abstract class CharactersetEncoder {
+
+    private final CharsetEncoder encoder;
+
+    private CharactersetEncoder(String encoding) {
+        this.encoder = Charset.forName(encoding).newEncoder();
+        this.encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
+    }
+
+    /**
+     * Tells whether or not this encoder can encode the given character.
+     *
+     * @param c the character
+     * @return true if, and only if, this encoder can encode the given character
+     * @throws IllegalStateException - If an encoding operation is already in progress
+     */
+    final boolean canEncode(char c) {
+        return encoder.canEncode(c);
+    }
+
+    /**
+     * Encodes a character sequence to a byte array.
+     *
+     * @param chars the character sequence
+     * @return the encoded character sequence
+     * @throws CharacterCodingException if the encoding operation fails
+     */
+    final EncodedChars encode(CharSequence chars) throws CharacterCodingException {
+        ByteBuffer bb;
+        // encode method is not thread safe
+        synchronized (encoder) {
+            bb = encoder.encode(CharBuffer.wrap(chars));
+        }
+        if (bb.hasArray()) {
+            return getEncodedChars(bb.array(), bb.limit());
+        } else {
+            bb.rewind();
+            byte[] bytes = new byte[bb.remaining()];
+            bb.get(bytes);
+            return getEncodedChars(bytes, bytes.length);
+        }
+    }
+
+    abstract EncodedChars getEncodedChars(byte[] byteArray, int length);
+
+    /**
+     * Encodes <code>chars</code> into a format specified by <code>encoding</code>.
+     *
+     * @param chars the character sequence
+     * @param encoding the encoding type
+     * @param isEDBCS if this encoding represents a double-byte character set
+     * @return encoded data
+     * @throws CharacterCodingException if encoding fails
+     */
+    public static EncodedChars encodeSBCS(CharSequence chars, String encoding, boolean isEDBCS)
+            throws CharacterCodingException {
+        CharactersetEncoder encoder = newInstance(encoding, isEDBCS);
+        return encoder.encode(chars);
+    }
+
+    /**
+     * The EBCDIC double byte encoder is used for encoding IBM format DBCS (double byte character
+     * sets) with an EBCDIC code-page. Given a double byte EBCDIC code page and a Unicode character
+     * sequence it will return its EBCDIC code-point, however, the "Shift In - Shift Out" operators
+     * are removed from the sequence of bytes. These are only used in Line Data.
+     */
+    private final static class EbcdicDoubleByteEncoder extends CharactersetEncoder {
+        private EbcdicDoubleByteEncoder(String encoding) {
+            super(encoding);
+        }
+        @Override
+        EncodedChars getEncodedChars(byte[] byteArray, int length) {
+            if (byteArray[0] == 0x0E && byteArray[length - 1] == 0x0F) {
+                return new EncodedChars(byteArray, 1, length - 2);
+            }
+            return new EncodedChars(byteArray);
+        }
+    }
+
+    /**
+     * The default encoder is used for encoding IBM format SBCS (single byte character sets), this
+     * the primary format for most Latin character sets. This can also be used for Unicode double-
+     * byte character sets (DBCS).
+     */
+    private final static class DefaultEncoder extends CharactersetEncoder {
+        private DefaultEncoder(String encoding) {
+            super(encoding);
+        }
+
+        @Override
+        EncodedChars getEncodedChars(byte[] byteArray, int length) {
+            return new EncodedChars(byteArray);
+        }
+    }
+
+    /**
+     * Returns an new instance of a {@link CharactersetEncoder}.
+     *
+     * @param encoding the encoding for the underlying character encoder
+     * @param isEbcdicDBCS whether or not this wraps a double-byte EBCDIC code page.
+     * @return the CharactersetEncoder
+     */
+    static CharactersetEncoder newInstance(String encoding, boolean isEbcdicDBCS) {
+        if (isEbcdicDBCS) {
+            return new EbcdicDoubleByteEncoder(encoding);
+        } else {
+            return new DefaultEncoder(encoding);
+        }
+    }
+
+    /**
+     * A container for encoded character bytes
+     */
+    public static class EncodedChars {
+
+        final private byte[] bytes;
+
+        final private int offset;
+
+        final private int length;
+
+        private EncodedChars(byte[] bytes, int offset, int length) {
+            if (offset < 0) throw new IllegalArgumentException();
+
+            if (length < 0) throw new IllegalArgumentException();
+
+            if (offset + length > bytes.length) throw new IllegalArgumentException();
+
+            this.bytes = bytes;
+
+            this.offset = offset;
+
+            this.length = length;
+        }
+
+        private EncodedChars(byte[] bytes) {
+           this(bytes, 0, bytes.length);
+        }
+
+        /**
+         * write <code>length</code> bytes from <code>offset</code> to the output stream
+         *
+         * @param out output to write the bytes to
+         * @throws IOException if an I/O error occurs
+         */
+        public void writeTo(OutputStream out, int offset, int length) throws IOException {
+            if (offset < 0) throw new IllegalArgumentException();
+
+            if (length < 0) throw new IllegalArgumentException();
+
+            if (offset + length > this.length) throw new IllegalArgumentException();
+
+            out.write(bytes, this.offset + offset, length);
+        }
+
+        /**
+         * The number of containing bytes.
+         *
+         * @return
+         */
+        public int getLength() {
+            return length;
+        }
+    }
+}
diff --git a/src/java/org/apache/fop/afp/fonts/FopCharacterSet.java b/src/java/org/apache/fop/afp/fonts/FopCharacterSet.java

index 716ca538fbb73020f5b7f13638ce5bdf1cc39d00..b1efdc5110e0c2904d2d5008f31160224ef9fe39 100644 (file)
--- a/src/java/org/apache/fop/afp/fonts/FopCharacterSet.java
+++ b/src/java/org/apache/fop/afp/fonts/FopCharacterSet.java
@@ -44,7 +44,7 @@ public class FopCharacterSet extends CharacterSet {
          String name,
          Typeface charSet) {
  
-        super(codePage, encoding, name, (ResourceAccessor)null);
+        super(codePage, encoding, false, name, (ResourceAccessor) null);
          this.charSet = charSet;
      }
  
diff --git a/src/java/org/apache/fop/afp/ptoca/PtocaBuilder.java b/src/java/org/apache/fop/afp/ptoca/PtocaBuilder.java

index 2962dc76c65cc81d95e584c2e63f1d1059888dbe..d1049ca734aa268d6140d67adbc18392658c882f 100644 (file)
--- a/src/java/org/apache/fop/afp/ptoca/PtocaBuilder.java
+++ b/src/java/org/apache/fop/afp/ptoca/PtocaBuilder.java
@@ -25,7 +25,7 @@ import java.io.IOException;
  import java.io.OutputStream;
  
  import org.apache.commons.io.output.ByteArrayOutputStream;
-
+import org.apache.fop.afp.fonts.CharactersetEncoder.EncodedChars;
  import org.apache.xmlgraphics.java2d.color.CIELabColorSpace;
  import org.apache.xmlgraphics.java2d.color.ColorUtil;
  import org.apache.xmlgraphics.java2d.color.ColorWithAlternatives;
@@ -190,34 +190,25 @@ public abstract class PtocaBuilder implements PtocaConstants {
       * @param data The text data to add.
       * @throws IOException if an I/O error occurs
       */
-    public void addTransparentData(byte[] data) throws IOException {
-        if (data.length <= TRANSPARENT_DATA_MAX_SIZE) {
-            addTransparentDataChunk(data);
-        } else {
-            // data size greater than TRANSPARENT_MAX_SIZE, so slice
-            int numTransData = data.length / TRANSPARENT_DATA_MAX_SIZE;
-            int currIndex = 0;
-            for (int transDataCnt = 0; transDataCnt < numTransData; transDataCnt++) {
-                addTransparentDataChunk(data, currIndex, TRANSPARENT_DATA_MAX_SIZE);
-                currIndex += TRANSPARENT_DATA_MAX_SIZE;
-            }
-            int left = data.length - currIndex;
-            addTransparentDataChunk(data, currIndex, left);
+    public void addTransparentData(EncodedChars encodedChars) throws IOException {
+
+        // data size greater than TRANSPARENT_MAX_SIZE, so slice
+        int numTransData = encodedChars.getLength() / TRANSPARENT_DATA_MAX_SIZE;
+        int currIndex = 0;
+        for (int transDataCnt = 0; transDataCnt < numTransData; transDataCnt++) {
+            addTransparentDataChunk(encodedChars, currIndex, TRANSPARENT_DATA_MAX_SIZE);
+            currIndex += TRANSPARENT_DATA_MAX_SIZE;
          }
-    }
+        int left = encodedChars.getLength() - currIndex;
+        addTransparentDataChunk(encodedChars, currIndex, left);
  
-    private void addTransparentDataChunk(byte[] data) throws IOException {
-        addTransparentDataChunk(data, 0, data.length);
      }
  
-    private void addTransparentDataChunk(byte[] data, int offset, int length) throws IOException {
-        if (length > TRANSPARENT_MAX_SIZE) {
-            // Check that we are not exceeding the maximum length
-            throw new IllegalArgumentException(
-                    "Transparent data is longer than " + TRANSPARENT_MAX_SIZE + " bytes");
-        }
+
+
+    private void addTransparentDataChunk(EncodedChars encodedChars, int offset, int length) throws IOException {
          newControlSequence();
-        write(data, offset, length);
+        encodedChars.writeTo(baout, offset, length);
          commit(chained(TRN));
      }
  
diff --git a/src/java/org/apache/fop/afp/ptoca/TextDataInfoProducer.java b/src/java/org/apache/fop/afp/ptoca/TextDataInfoProducer.java

index 7ae3028e8130741790695abad63e907d74950cdb..f7ed5a85c76ddd6ec51a8cbdeb3cdbb553aa647a 100644 (file)
--- a/src/java/org/apache/fop/afp/ptoca/TextDataInfoProducer.java
+++ b/src/java/org/apache/fop/afp/ptoca/TextDataInfoProducer.java
@@ -22,6 +22,7 @@ package org.apache.fop.afp.ptoca;
  import java.io.IOException;
  
  import org.apache.fop.afp.AFPTextDataInfo;
+import org.apache.fop.afp.fonts.CharactersetEncoder;
  
  /**
   * {@link PtocaProducer} implementation that interprets {@link AFPTextDataInfo} objects.
@@ -55,8 +56,7 @@ public class TextDataInfoProducer implements PtocaProducer, PtocaConstants {
          // Add transparent data
          String textString = textDataInfo.getString();
          String encoding = textDataInfo.getEncoding();
-        byte[] data = textString.getBytes(encoding);
-        builder.addTransparentData(data);
+        builder.addTransparentData(CharactersetEncoder.encodeSBCS(textString, encoding, false));
      }
  
  }
diff --git a/src/java/org/apache/fop/render/afp/AFPRendererConfigurator.java b/src/java/org/apache/fop/render/afp/AFPRendererConfigurator.java

index 8cc381c18e9e4fd486a795cbe24f0875e1dbd553..fc8d105083bd36fd12a9389416dc51f27c333227 100644 (file)
--- a/src/java/org/apache/fop/render/afp/AFPRendererConfigurator.java
+++ b/src/java/org/apache/fop/render/afp/AFPRendererConfigurator.java
@@ -27,7 +27,6 @@ import java.util.List;
  
  import org.apache.avalon.framework.configuration.Configuration;
  import org.apache.avalon.framework.configuration.ConfigurationException;
-
  import org.apache.fop.afp.AFPResourceLevel;
  import org.apache.fop.afp.AFPResourceLevelDefaults;
  import org.apache.fop.afp.fonts.AFPFont;
@@ -258,9 +257,11 @@ public class AFPRendererConfigurator extends PrintRendererConfigurator
              }
              String name = afpFontCfg.getAttribute("name", characterset);
              CharacterSet characterSet = null;
+            boolean ebcdicDBCS = afpFontCfg.getAttributeAsBoolean("ebcdic-dbcs", false);
+
              try {
-                characterSet = CharacterSetBuilder.getDoubleByteInstance()
-                                .build(characterset, codepage, encoding, accessor);
+                characterSet = CharacterSetBuilder.getDoubleByteInstance().buildDBCS(characterset,
+                        codepage, encoding, ebcdicDBCS, accessor);
              } catch (IOException ioe) {
                  toConfigurationException(codepage, characterset, ioe);
              }
diff --git a/status.xml b/status.xml

index a80dcd2b219dc9150f133dbe41f71e2bebdceec0..b843ec4010720a2abff1c1efb8ed9c865723586b 100644 (file)
--- a/status.xml
+++ b/status.xml
@@ -60,8 +60,12 @@
        documents. Example: the fix of marks layering will be such a case when it's done.
      -->
      <release version="FOP Trunk" date="TBD">
+      <action context="Fonts" dev="PH" type="fix" fixes-bug="51530" due-to="Mehdi Houshmand">
+        Improved support for EBCDIC encoded double byte fonts fo AFP.
+      </action>
        <action context="Fonts" dev="PH" type="fix" fixes-bug="51205" due-to="Mehdi Houshmand">
-        Corrected typographical errors in AFPBase12FontCollection.</action>
+        Corrected typographical errors in AFPBase12FontCollection.
+      </action>
        <action context="Renderers" dev="PH" type="fix" fixes-bug="48062">
          Improved fix of a bug relating to PCL painter thread safetly.  Previous fix in rev 895012
          worked by synchronizing methods of a static instance of Java2DFontMetrics.  This fix uses a
diff --git a/test/java/org/apache/fop/StandardTestSuite.java b/test/java/org/apache/fop/StandardTestSuite.java

index a2e6d752465c71b94f2dad7ce0a059f74a335f21..a49cc7f2e7f6e9a948bc56e2081111a492ae5e6d 100644 (file)
--- a/test/java/org/apache/fop/StandardTestSuite.java
+++ b/test/java/org/apache/fop/StandardTestSuite.java
@@ -22,8 +22,9 @@ package org.apache.fop;
  import junit.framework.Test;
  import junit.framework.TestSuite;
  
-import org.apache.fop.area.ViewportTestSuite;
+import org.apache.fop.afp.fonts.CharactersetEncoderTest;
  import org.apache.fop.afp.parser.MODCAParserTestCase;
+import org.apache.fop.area.ViewportTestSuite;
  import org.apache.fop.fonts.DejaVuLGCSerifTest;
  import org.apache.fop.fonts.truetype.GlyfTableTestCase;
  import org.apache.fop.image.loader.batik.ImageLoaderTestCase;
@@ -60,6 +61,7 @@ public class StandardTestSuite {
          suite.addTest(new TestSuite(PDFsRGBSettingsTestCase.class));
          suite.addTest(new TestSuite(DejaVuLGCSerifTest.class));
          suite.addTest(new TestSuite(MODCAParserTestCase.class));
+        suite.addTest(new TestSuite(CharactersetEncoderTest.class));
          suite.addTest(org.apache.fop.render.afp.AFPTestSuite.suite());
          suite.addTest(PSTestSuite.suite());
          suite.addTest(new TestSuite(GlyfTableTestCase.class));
diff --git a/test/java/org/apache/fop/afp/fonts/CharactersetEncoderTest.java b/test/java/org/apache/fop/afp/fonts/CharactersetEncoderTest.java

new file mode 100644 (file)

index 0000000..5999a04
--- /dev/null
+++ b/test/java/org/apache/fop/afp/fonts/CharactersetEncoderTest.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/* $Id$ */
+
+package org.apache.fop.afp.fonts;
+
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.nio.charset.CharacterCodingException;
+
+import junit.framework.TestCase;
+
+/**
+ * Test {@link CharactersetEncoder}
+ */
+public class CharactersetEncoderTest extends TestCase {
+    private CharactersetEncoder singlebyteEncoder;
+    private CharactersetEncoder doublebyteEncoder;
+
+    public void setUp() {
+        singlebyteEncoder = CharactersetEncoder.newInstance("cp500", false);
+        doublebyteEncoder = CharactersetEncoder.newInstance("cp937", true);
+    }
+
+    // This is just an arbitrary CJK string
+    private final String testCJKText = "\u8ACB\u65BC\u627F\u505A\u65E5\u4E03\u65E5\u5167\u672A\u9054"
+            + "\u4E03\u65E5\u4E4B\u5B9A\u5B58\u8005\u4EE5\u5BE6\u969B\u5230\u671F\u65E5\u5167\u78BA"
+            + "\u8A8D\u672C\u4EA4\u6613\u5167\u5BB9\u3002\u5982\u672A\u65BC\u4E0A\u8FF0\u671F\u9593"
+            + "\u5167\u63D0\u51FA\u7570\u8B70\uFF0C\u8996\u540C\u610F\u627F\u8A8D\u672C\u4EA4\u6613"
+            + "\u3002";
+
+    private final byte[] test6CJKChars = {
+            (byte) 0x61, (byte) 0x99,
+            (byte) 0x50, (byte) 0xf4,
+            (byte) 0x50, (byte) 0xd4,
+            (byte) 0x56, (byte) 0x99,
+            (byte) 0x4c, (byte) 0xc9,
+            (byte) 0x4c, (byte) 0x44 };
+
+    private final String testEngText = "Hello World!";
+    private final byte[] testEngChars = {
+            (byte) 0xc8, // H
+            (byte) 0x85, // e
+            (byte) 0x93, // l
+            (byte) 0x93, // l
+            (byte) 0x96, // o
+            (byte) 0x40, // " "
+            (byte) 0xe6, // W
+            (byte) 0x96, // o
+            (byte) 0x99, // r
+            (byte) 0x93, // l
+            (byte) 0x84, // d
+            (byte) 0x4f  // !
+    };
+
+    /**
+     * Tests canEncode() - tests that canEncode() responds properly to various input characters.
+     */
+    public void testCanEncode() {
+        // Both SBCS and DBCS should support Latin characters
+        for (char c = '!'; c < '~'; c++) {
+            assertTrue(singlebyteEncoder.canEncode(c));
+            assertTrue(doublebyteEncoder.canEncode(c));
+        }
+        // ONLY the double byte characters can handle CJK text
+        for (char c : testCJKText.toCharArray()) {
+            assertFalse(singlebyteEncoder.canEncode(c));
+            assertTrue(doublebyteEncoder.canEncode(c));
+        }
+        // Ensure that double byte encoder doesn't just return true all the time...
+        assertFalse(doublebyteEncoder.canEncode('\u00BB'));
+    }
+
+    public void testEncode() throws CharacterCodingException, IOException {
+        CharactersetEncoder.EncodedChars encChars;// = doublebyteEncoder.encode(testCJKText);
+        ByteArrayOutputStream bOut = new ByteArrayOutputStream();
+        // JAVA 1.5 has a bug in the JVM in which these err for some reason... JAVA 1.6 no issues
+        /*encChars.writeTo(bOut, 0, encChars.getLength());
+        byte[] bytes = bOut.toByteArray();
+        for (int i = 0; i < 12; i++) {
+            assertEquals(test6CJKChars[i], bytes[i]);
+        }
+        bOut.reset();*/
+
+        encChars = singlebyteEncoder.encode(testEngText);
+        encChars.writeTo(bOut, 0, encChars.getLength());
+        byte[] engBytes = bOut.toByteArray();
+        for (int i = 0; i < testEngChars.length; i++) {
+            assertEquals(testEngChars[i], engBytes[i]);
+        }
+        assertEquals(testEngChars.length, engBytes.length);
+    }
+}
author	Peter Hancock <phancock@apache.org>
	Mon, 5 Sep 2011 09:42:00 +0000 (09:42 +0000)
committer	Peter Hancock <phancock@apache.org>
	Mon, 5 Sep 2011 09:42:00 +0000 (09:42 +0000)
src/documentation/content/xdocs/trunk/output.xml		patch \| blob \| history
src/java/org/apache/fop/afp/fonts/CharacterSet.java		patch \| blob \| history
src/java/org/apache/fop/afp/fonts/CharacterSetBuilder.java		patch \| blob \| history
src/java/org/apache/fop/afp/fonts/CharactersetEncoder.java	[new file with mode: 0644]	patch \| blob
src/java/org/apache/fop/afp/fonts/FopCharacterSet.java		patch \| blob \| history
src/java/org/apache/fop/afp/ptoca/PtocaBuilder.java		patch \| blob \| history
src/java/org/apache/fop/afp/ptoca/TextDataInfoProducer.java		patch \| blob \| history
src/java/org/apache/fop/render/afp/AFPRendererConfigurator.java		patch \| blob \| history
status.xml		patch \| blob \| history
test/java/org/apache/fop/StandardTestSuite.java		patch \| blob \| history
test/java/org/apache/fop/afp/fonts/CharactersetEncoderTest.java	[new file with mode: 0644]	patch \| blob