/* * Licensed to the Apache Software Foundation (ASF) under one or more * contributor license agreements. See the NOTICE file distributed with * this work for additional information regarding copyright ownership. * The ASF licenses this file to You under the Apache License, Version 2.0 * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. */ /* $Id$ */ package org.apache.fop.afp.fonts; import java.io.IOException; import java.io.OutputStream; import java.nio.ByteBuffer; import java.nio.CharBuffer; import java.nio.charset.CharacterCodingException; import java.nio.charset.Charset; import java.nio.charset.CharsetEncoder; import java.nio.charset.CodingErrorAction; /** * An abstraction that wraps the encoding mechanism for encoding a Unicode character sequence into a * specified format. */ public abstract class CharactersetEncoder { private final CharsetEncoder encoder; private CharactersetEncoder(String encoding) { this.encoder = Charset.forName(encoding).newEncoder(); this.encoder.onUnmappableCharacter(CodingErrorAction.REPLACE); } /** * Tells whether or not this encoder can encode the given character. * * @param c the character * @return true if, and only if, this encoder can encode the given character * @throws IllegalStateException - If an encoding operation is already in progress */ final boolean canEncode(char c) { return encoder.canEncode(c); } /** * Encodes a character sequence to a byte array. * * @param chars the character sequence * @return the encoded character sequence * @throws CharacterCodingException if the encoding operation fails */ final EncodedChars encode(CharSequence chars) throws CharacterCodingException { ByteBuffer bb; // encode method is not thread safe synchronized (encoder) { bb = encoder.encode(CharBuffer.wrap(chars)); } if (bb.hasArray()) { return getEncodedChars(bb.array(), bb.limit()); } else { bb.rewind(); byte[] bytes = new byte[bb.remaining()]; bb.get(bytes); return getEncodedChars(bytes, bytes.length); } } abstract EncodedChars getEncodedChars(byte[] byteArray, int length); /** * Encodes chars into a format specified by encoding. * * @param chars the character sequence * @param encoding the encoding type * @return encoded data * @throws CharacterCodingException if encoding fails */ public static EncodedChars encodeSBCS(CharSequence chars, String encoding) throws CharacterCodingException { CharactersetEncoder encoder = CharacterSetType.SINGLE_BYTE.getEncoder(encoding); return encoder.encode(chars); } /** * The EBCDIC double byte encoder is used for encoding IBM format DBCS (double byte character * sets) with an EBCDIC code-page. Given a double byte EBCDIC code page and a Unicode character * sequence it will return its EBCDIC code-point, however, the "Shift In - Shift Out" operators * are removed from the sequence of bytes. These are only used in Line Data. */ static final class EbcdicDoubleByteLineDataEncoder extends CharactersetEncoder { EbcdicDoubleByteLineDataEncoder(String encoding) { super(encoding); } @Override EncodedChars getEncodedChars(byte[] byteArray, int length) { if (byteArray[0] == 0x0E && byteArray[length - 1] == 0x0F) { return new EncodedChars(byteArray, 1, length - 2, true); } return new EncodedChars(byteArray, true); } } /** * The default encoder is used for encoding IBM format SBCS (single byte character sets), this * the primary format for most Latin character sets. This can also be used for Unicode double- * byte character sets (DBCS). */ static final class DefaultEncoder extends CharactersetEncoder { private final boolean isDBCS; DefaultEncoder(String encoding, boolean isDBCS) { super(encoding); this.isDBCS = isDBCS; } @Override EncodedChars getEncodedChars(byte[] byteArray, int length) { return new EncodedChars(byteArray, isDBCS); } } /** * A container for encoded character bytes */ // CSOFF: FinalClass - disabling "final" modifier so that this class can be mocked public static class EncodedChars { private final byte[] bytes; private final int offset; private final int length; private final boolean isDBCS; private EncodedChars(byte[] bytes, int offset, int length, boolean isDBCS) { if (offset < 0 || length < 0 || offset + length > bytes.length) { throw new IllegalArgumentException(); } this.bytes = bytes; this.offset = offset; this.length = length; this.isDBCS = isDBCS; } private EncodedChars(byte[] bytes, boolean isDBCS) { this(bytes, 0, bytes.length, isDBCS); } /** * write length bytes from offset to the output stream * * @param out output to write the bytes to * @param offset the offset where to write * @param length the length to write * @throws IOException if an I/O error occurs */ public void writeTo(OutputStream out, int offset, int length) throws IOException { if (offset < 0 || length < 0 || offset + length > bytes.length) { throw new IllegalArgumentException(); } out.write(bytes, this.offset + offset, length); } /** * The number of containing bytes. * * @return the length */ public int getLength() { return length; } /** * Indicates whether or not the EncodedChars object wraps double byte characters. * * @return true if the wrapped characters are double byte (DBCSs) */ public boolean isDBCS() { return isDBCS; } /** * The bytes * * @return the bytes */ public byte[] getBytes() { // return copy just in case byte[] copy = new byte[bytes.length]; System.arraycopy(bytes, 0, copy, 0, bytes.length); return copy; } } }