Você não pode selecionar mais de 25 tópicos Os tópicos devem começar com uma letra ou um número, podem incluir traços ('-') e podem ter até 35 caracteres.

CharactersetEncoder.java 7.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.afp.fonts;
  19. import java.io.IOException;
  20. import java.io.OutputStream;
  21. import java.nio.ByteBuffer;
  22. import java.nio.CharBuffer;
  23. import java.nio.charset.CharacterCodingException;
  24. import java.nio.charset.Charset;
  25. import java.nio.charset.CharsetEncoder;
  26. import java.nio.charset.CodingErrorAction;
  27. /**
  28. * An abstraction that wraps the encoding mechanism for encoding a Unicode character sequence into a
  29. * specified format.
  30. */
  31. public abstract class CharactersetEncoder {
  32. private final CharsetEncoder encoder;
  33. private CharactersetEncoder(String encoding) {
  34. this.encoder = Charset.forName(encoding).newEncoder();
  35. this.encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
  36. }
  37. /**
  38. * Tells whether or not this encoder can encode the given character.
  39. *
  40. * @param c the character
  41. * @return true if, and only if, this encoder can encode the given character
  42. * @throws IllegalStateException - If an encoding operation is already in progress
  43. */
  44. final boolean canEncode(char c) {
  45. return encoder.canEncode(c);
  46. }
  47. /**
  48. * Encodes a character sequence to a byte array.
  49. *
  50. * @param chars the character sequence
  51. * @return the encoded character sequence
  52. * @throws CharacterCodingException if the encoding operation fails
  53. */
  54. final EncodedChars encode(CharSequence chars) throws CharacterCodingException {
  55. ByteBuffer bb;
  56. // encode method is not thread safe
  57. synchronized (encoder) {
  58. bb = encoder.encode(CharBuffer.wrap(chars));
  59. }
  60. if (bb.hasArray()) {
  61. return getEncodedChars(bb.array(), bb.limit());
  62. } else {
  63. bb.rewind();
  64. byte[] bytes = new byte[bb.remaining()];
  65. bb.get(bytes);
  66. return getEncodedChars(bytes, bytes.length);
  67. }
  68. }
  69. abstract EncodedChars getEncodedChars(byte[] byteArray, int length);
  70. /**
  71. * Encodes <code>chars</code> into a format specified by <code>encoding</code>.
  72. *
  73. * @param chars the character sequence
  74. * @param encoding the encoding type
  75. * @param isEDBCS if this encoding represents a double-byte character set
  76. * @return encoded data
  77. * @throws CharacterCodingException if encoding fails
  78. */
  79. public static EncodedChars encodeSBCS(CharSequence chars, String encoding, boolean isEDBCS)
  80. throws CharacterCodingException {
  81. CharactersetEncoder encoder = newInstance(encoding, isEDBCS);
  82. return encoder.encode(chars);
  83. }
  84. /**
  85. * The EBCDIC double byte encoder is used for encoding IBM format DBCS (double byte character
  86. * sets) with an EBCDIC code-page. Given a double byte EBCDIC code page and a Unicode character
  87. * sequence it will return its EBCDIC code-point, however, the "Shift In - Shift Out" operators
  88. * are removed from the sequence of bytes. These are only used in Line Data.
  89. */
  90. private static final class EbcdicDoubleByteEncoder extends CharactersetEncoder {
  91. private EbcdicDoubleByteEncoder(String encoding) {
  92. super(encoding);
  93. }
  94. @Override
  95. EncodedChars getEncodedChars(byte[] byteArray, int length) {
  96. if (byteArray[0] == 0x0E && byteArray[length - 1] == 0x0F) {
  97. return new EncodedChars(byteArray, 1, length - 2, true);
  98. }
  99. return new EncodedChars(byteArray, true);
  100. }
  101. }
  102. /**
  103. * The default encoder is used for encoding IBM format SBCS (single byte character sets), this
  104. * the primary format for most Latin character sets. This can also be used for Unicode double-
  105. * byte character sets (DBCS).
  106. */
  107. private static final class DefaultEncoder extends CharactersetEncoder {
  108. private DefaultEncoder(String encoding) {
  109. super(encoding);
  110. }
  111. @Override
  112. EncodedChars getEncodedChars(byte[] byteArray, int length) {
  113. return new EncodedChars(byteArray, false);
  114. }
  115. }
  116. /**
  117. * Returns an new instance of a {@link CharactersetEncoder}.
  118. *
  119. * @param encoding the encoding for the underlying character encoder
  120. * @param isEbcdicDBCS whether or not this wraps a double-byte EBCDIC code page.
  121. * @return the CharactersetEncoder
  122. */
  123. static CharactersetEncoder newInstance(String encoding, boolean isEbcdicDBCS) {
  124. if (isEbcdicDBCS) {
  125. return new EbcdicDoubleByteEncoder(encoding);
  126. } else {
  127. return new DefaultEncoder(encoding);
  128. }
  129. }
  130. /**
  131. * A container for encoded character bytes
  132. */
  133. public static class EncodedChars {
  134. private final byte[] bytes;
  135. private final int offset;
  136. private final int length;
  137. private final boolean isDBCS;
  138. private EncodedChars(byte[] bytes, int offset, int length, boolean isDBCS) {
  139. if (offset < 0 || length < 0 || offset + length > bytes.length) {
  140. throw new IllegalArgumentException();
  141. }
  142. this.bytes = bytes;
  143. this.offset = offset;
  144. this.length = length;
  145. this.isDBCS = isDBCS;
  146. }
  147. private EncodedChars(byte[] bytes, boolean isDBCS) {
  148. this(bytes, 0, bytes.length, isDBCS);
  149. }
  150. /**
  151. * write <code>length</code> bytes from <code>offset</code> to the output stream
  152. *
  153. * @param out output to write the bytes to
  154. * @param offset the offset where to write
  155. * @param length the length to write
  156. * @throws IOException if an I/O error occurs
  157. */
  158. public void writeTo(OutputStream out, int offset, int length) throws IOException {
  159. if (offset < 0 || length < 0 || offset + length > bytes.length) {
  160. throw new IllegalArgumentException();
  161. }
  162. out.write(bytes, this.offset + offset, length);
  163. }
  164. /**
  165. * The number of containing bytes.
  166. *
  167. * @return the length
  168. */
  169. public int getLength() {
  170. return length;
  171. }
  172. /**
  173. * Indicates whether or not the EncodedChars object wraps double byte characters.
  174. *
  175. * @return true if the wrapped characters are double byte (DBCSs)
  176. */
  177. public boolean isDBCS() {
  178. return isDBCS;
  179. }
  180. /**
  181. * The bytes
  182. *
  183. * @return the bytes
  184. */
  185. public byte[] getBytes() {
  186. // return copy just in case
  187. byte[] copy = new byte[bytes.length];
  188. System.arraycopy(bytes, 0, copy, 0, bytes.length);
  189. return copy;
  190. }
  191. }
  192. }