You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

CharactersetEncoder.java 6.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.afp.fonts;
  19. import java.io.IOException;
  20. import java.io.OutputStream;
  21. import java.nio.ByteBuffer;
  22. import java.nio.CharBuffer;
  23. import java.nio.charset.CharacterCodingException;
  24. import java.nio.charset.Charset;
  25. import java.nio.charset.CharsetEncoder;
  26. import java.nio.charset.CodingErrorAction;
  27. /**
  28. * An abstraction that wraps the encoding mechanism for encoding a Unicode character sequence into a
  29. * specified format.
  30. */
  31. public abstract class CharactersetEncoder {
  32. private final CharsetEncoder encoder;
  33. private CharactersetEncoder(String encoding) {
  34. this.encoder = Charset.forName(encoding).newEncoder();
  35. this.encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
  36. }
  37. /**
  38. * Tells whether or not this encoder can encode the given character.
  39. *
  40. * @param c the character
  41. * @return true if, and only if, this encoder can encode the given character
  42. * @throws IllegalStateException - If an encoding operation is already in progress
  43. */
  44. final boolean canEncode(char c) {
  45. return encoder.canEncode(c);
  46. }
  47. /**
  48. * Encodes a character sequence to a byte array.
  49. *
  50. * @param chars the character sequence
  51. * @return the encoded character sequence
  52. * @throws CharacterCodingException if the encoding operation fails
  53. */
  54. final EncodedChars encode(CharSequence chars) throws CharacterCodingException {
  55. ByteBuffer bb;
  56. // encode method is not thread safe
  57. synchronized (encoder) {
  58. bb = encoder.encode(CharBuffer.wrap(chars));
  59. }
  60. if (bb.hasArray()) {
  61. return getEncodedChars(bb.array(), bb.limit());
  62. } else {
  63. bb.rewind();
  64. byte[] bytes = new byte[bb.remaining()];
  65. bb.get(bytes);
  66. return getEncodedChars(bytes, bytes.length);
  67. }
  68. }
  69. abstract EncodedChars getEncodedChars(byte[] byteArray, int length);
  70. /**
  71. * Encodes <code>chars</code> into a format specified by <code>encoding</code>.
  72. *
  73. * @param chars the character sequence
  74. * @param encoding the encoding type
  75. * @return encoded data
  76. * @throws CharacterCodingException if encoding fails
  77. */
  78. public static EncodedChars encodeSBCS(CharSequence chars, String encoding)
  79. throws CharacterCodingException {
  80. CharactersetEncoder encoder = CharacterSetType.SINGLE_BYTE.getEncoder(encoding);
  81. return encoder.encode(chars);
  82. }
  83. /**
  84. * The EBCDIC double byte encoder is used for encoding IBM format DBCS (double byte character
  85. * sets) with an EBCDIC code-page. Given a double byte EBCDIC code page and a Unicode character
  86. * sequence it will return its EBCDIC code-point, however, the "Shift In - Shift Out" operators
  87. * are removed from the sequence of bytes. These are only used in Line Data.
  88. */
  89. static final class EbcdicDoubleByteLineDataEncoder extends CharactersetEncoder {
  90. EbcdicDoubleByteLineDataEncoder(String encoding) {
  91. super(encoding);
  92. }
  93. @Override
  94. EncodedChars getEncodedChars(byte[] byteArray, int length) {
  95. if (byteArray[0] == 0x0E && byteArray[length - 1] == 0x0F) {
  96. return new EncodedChars(byteArray, 1, length - 2, true);
  97. }
  98. return new EncodedChars(byteArray, true);
  99. }
  100. }
  101. /**
  102. * The default encoder is used for encoding IBM format SBCS (single byte character sets), this
  103. * the primary format for most Latin character sets. This can also be used for Unicode double-
  104. * byte character sets (DBCS).
  105. */
  106. static final class DefaultEncoder extends CharactersetEncoder {
  107. private final boolean isDBCS;
  108. DefaultEncoder(String encoding, boolean isDBCS) {
  109. super(encoding);
  110. this.isDBCS = isDBCS;
  111. }
  112. @Override
  113. EncodedChars getEncodedChars(byte[] byteArray, int length) {
  114. return new EncodedChars(byteArray, isDBCS);
  115. }
  116. }
  117. /**
  118. * A container for encoded character bytes
  119. */
  120. // CSOFF: FinalClass - disabling "final" modifier so that this class can be mocked
  121. public static class EncodedChars {
  122. private final byte[] bytes;
  123. private final int offset;
  124. private final int length;
  125. private final boolean isDBCS;
  126. private EncodedChars(byte[] bytes, int offset, int length, boolean isDBCS) {
  127. if (offset < 0 || length < 0 || offset + length > bytes.length) {
  128. throw new IllegalArgumentException();
  129. }
  130. this.bytes = bytes;
  131. this.offset = offset;
  132. this.length = length;
  133. this.isDBCS = isDBCS;
  134. }
  135. private EncodedChars(byte[] bytes, boolean isDBCS) {
  136. this(bytes, 0, bytes.length, isDBCS);
  137. }
  138. /**
  139. * write <code>length</code> bytes from <code>offset</code> to the output stream
  140. *
  141. * @param out output to write the bytes to
  142. * @param offset the offset where to write
  143. * @param length the length to write
  144. * @throws IOException if an I/O error occurs
  145. */
  146. public void writeTo(OutputStream out, int offset, int length) throws IOException {
  147. if (offset < 0 || length < 0 || offset + length > bytes.length) {
  148. throw new IllegalArgumentException();
  149. }
  150. out.write(bytes, this.offset + offset, length);
  151. }
  152. /**
  153. * The number of containing bytes.
  154. *
  155. * @return the length
  156. */
  157. public int getLength() {
  158. return length;
  159. }
  160. /**
  161. * Indicates whether or not the EncodedChars object wraps double byte characters.
  162. *
  163. * @return true if the wrapped characters are double byte (DBCSs)
  164. */
  165. public boolean isDBCS() {
  166. return isDBCS;
  167. }
  168. /**
  169. * The bytes
  170. *
  171. * @return the bytes
  172. */
  173. public byte[] getBytes() {
  174. // return copy just in case
  175. byte[] copy = new byte[bytes.length];
  176. System.arraycopy(bytes, 0, copy, 0, bytes.length);
  177. return copy;
  178. }
  179. }
  180. }