You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

CharacterSet.java 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.afp.fonts;
  19. import java.io.IOException;
  20. import java.io.UnsupportedEncodingException;
  21. import java.nio.ByteBuffer;
  22. import java.nio.CharBuffer;
  23. import java.nio.charset.CharacterCodingException;
  24. import java.nio.charset.Charset;
  25. import java.nio.charset.CharsetEncoder;
  26. import java.nio.charset.CodingErrorAction;
  27. import java.util.Map;
  28. import org.apache.commons.logging.Log;
  29. import org.apache.commons.logging.LogFactory;
  30. import org.apache.fop.afp.AFPConstants;
  31. import org.apache.fop.afp.util.StringUtils;
  32. /**
  33. * The IBM Font Object Content Architecture (FOCA) supports presentation
  34. * of character shapes by defining their characteristics, which include
  35. * font description information for identifying the characters, font metric
  36. * information for positioning the characters, and character shape information
  37. * for presenting the character images.
  38. * <p/>
  39. * Presenting a graphic character on a presentation surface requires
  40. * information on the rotation and position of character on the physical
  41. * or logical page.
  42. * <p/>
  43. * This class proivdes font metric information for a particular font
  44. * as identified by the character set name. This information is obtained
  45. * directly from the AFP font files which must be installed in the path
  46. * specified in the afp-fonts xml definition file.
  47. * <p/>
  48. */
  49. public class CharacterSet {
  50. /** Static logging instance */
  51. protected static final Log log = LogFactory.getLog(CharacterSet.class.getName());
  52. /** default codepage */
  53. public static final String DEFAULT_CODEPAGE = "T1V10500";
  54. /** default encoding */
  55. public static final String DEFAULT_ENCODING = "Cp500";
  56. private static final int MAX_NAME_LEN = 8;
  57. /** The code page to which the character set relates */
  58. protected String codePage;
  59. /** The encoding used for the code page */
  60. protected String encoding;
  61. /** The charset encoder corresponding to this encoding */
  62. private CharsetEncoder encoder;
  63. /** The character set relating to the font */
  64. protected String name;
  65. /** The path to the installed fonts */
  66. protected String path;
  67. /** Indicator as to whether to metrics have been loaded */
  68. private boolean isMetricsLoaded = false;
  69. /** The current orientation (currently only 0 is supported by FOP) */
  70. private final String currentOrientation = "0";
  71. /** The collection of objects for each orientation */
  72. private Map characterSetOrientations = null;
  73. /**
  74. * Constructor for the CharacterSetMetric object, the character set is used
  75. * to load the font information from the actual AFP font.
  76. *
  77. * @param codePage the code page identifier
  78. * @param encoding the encoding of the font
  79. * @param name the character set name
  80. * @param path the path to the installed afp fonts
  81. */
  82. public CharacterSet(String codePage, String encoding, String name, String path) {
  83. if (name.length() > MAX_NAME_LEN) {
  84. String msg = "Character set name '" + name + "' must be a maximum of "
  85. + MAX_NAME_LEN + " characters";
  86. log.error("Constructor:: " + msg);
  87. throw new IllegalArgumentException(msg);
  88. }
  89. if (name.length() < MAX_NAME_LEN) {
  90. this.name = StringUtils.rpad(name, ' ', MAX_NAME_LEN);
  91. } else {
  92. this.name = name;
  93. }
  94. this.codePage = codePage;
  95. this.encoding = encoding;
  96. this.encoder = Charset.forName(encoding).newEncoder();
  97. this.encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
  98. this.path = path;
  99. this.characterSetOrientations = new java.util.HashMap(4);
  100. }
  101. /**
  102. * Add character set metric information for the different orientations
  103. *
  104. * @param cso the metrics for the orientation
  105. */
  106. public void addCharacterSetOrientation(CharacterSetOrientation cso) {
  107. characterSetOrientations.put(
  108. String.valueOf(cso.getOrientation()),
  109. cso);
  110. }
  111. /**
  112. * Ascender height is the distance from the character baseline to the
  113. * top of the character box. A negative ascender height signifies that
  114. * all of the graphic character is below the character baseline. For
  115. * a character rotation other than 0, ascender height loses its
  116. * meaning when the character is lying on its side or is upside down
  117. * with respect to normal viewing orientation. For the general case,
  118. * Ascender Height is the characters most positive y-axis value.
  119. * For bounded character boxes, for a given character having an
  120. * ascender, ascender height and baseline offset are equal.
  121. *
  122. * @return the ascender value in millipoints
  123. */
  124. public int getAscender() {
  125. load();
  126. return getCharacterSetOrientation().getAscender();
  127. }
  128. /**
  129. * Cap height is the average height of the uppercase characters in
  130. * a font. This value is specified by the designer of a font and is
  131. * usually the height of the uppercase M.
  132. *
  133. * @return the cap height value in millipoints
  134. */
  135. public int getCapHeight() {
  136. load();
  137. return getCharacterSetOrientation().getCapHeight();
  138. }
  139. /**
  140. * Descender depth is the distance from the character baseline to
  141. * the bottom of a character box. A negative descender depth signifies
  142. * that all of the graphic character is above the character baseline.
  143. *
  144. * @return the descender value in millipoints
  145. */
  146. public int getDescender() {
  147. load();
  148. return getCharacterSetOrientation().getDescender();
  149. }
  150. /**
  151. * Returns the first character in the character set
  152. *
  153. * @return the first character in the character set
  154. */
  155. public int getFirstChar() {
  156. load();
  157. return getCharacterSetOrientation().getFirstChar();
  158. }
  159. /**
  160. * Returns the last character in the character set
  161. *
  162. * @return the last character in the character set
  163. */
  164. public int getLastChar() {
  165. load();
  166. return getCharacterSetOrientation().getLastChar();
  167. }
  168. /**
  169. * Returns the path where the font resources are installed
  170. *
  171. * @return the path where the font resources are installed
  172. */
  173. public String getPath() {
  174. return path;
  175. }
  176. /**
  177. * Get the width (in 1/1000ths of a point size) of all characters
  178. *
  179. * @return the widths of all characters
  180. */
  181. public int[] getWidths() {
  182. load();
  183. return getCharacterSetOrientation().getWidths();
  184. }
  185. /**
  186. * XHeight refers to the height of the lower case letters above the baseline.
  187. *
  188. * @return the typical height of characters
  189. */
  190. public int getXHeight() {
  191. load();
  192. return getCharacterSetOrientation().getXHeight();
  193. }
  194. /**
  195. * Get the width (in 1/1000ths of a point size) of the character
  196. * identified by the parameter passed.
  197. *
  198. * @param character the character from which the width will be calculated
  199. * @return the width of the character
  200. */
  201. public int getWidth(int character) {
  202. load();
  203. return getCharacterSetOrientation().getWidth(character);
  204. }
  205. /**
  206. * Lazy creation of the character metrics, the afp font file will only
  207. * be processed on a method call requiring the metric information.
  208. */
  209. private void load() {
  210. if (!isMetricsLoaded) {
  211. AFPFontReader afpFontReader = new AFPFontReader();
  212. try {
  213. afpFontReader.loadCharacterSetMetric(this);
  214. isMetricsLoaded = true;
  215. } catch (IOException e) {
  216. String msg = "Failed to load the character set metrics for code page " + codePage;
  217. log.error(msg);
  218. throw new RuntimeException(e.getMessage());
  219. }
  220. }
  221. }
  222. /**
  223. * Returns the AFP character set identifier
  224. *
  225. * @return the AFP character set identifier
  226. */
  227. public String getName() {
  228. return name;
  229. }
  230. /**
  231. * Returns the AFP character set identifier as a byte array
  232. *
  233. * @return the AFP character set identifier as a byte array
  234. */
  235. public byte[] getNameBytes() {
  236. byte[] nameBytes = null;
  237. try {
  238. nameBytes = name.getBytes(AFPConstants.EBCIDIC_ENCODING);
  239. } catch (UnsupportedEncodingException usee) {
  240. nameBytes = name.getBytes();
  241. log.warn(
  242. "UnsupportedEncodingException translating the name " + name);
  243. }
  244. return nameBytes;
  245. }
  246. /**
  247. * Returns the AFP code page identifier
  248. *
  249. * @return the AFP code page identifier
  250. */
  251. public String getCodePage() {
  252. return codePage;
  253. }
  254. /**
  255. * Returns the AFP code page encoding
  256. *
  257. * @return the AFP code page encoding
  258. */
  259. public String getEncoding() {
  260. return encoding;
  261. }
  262. /**
  263. * Helper method to return the current CharacterSetOrientation, note
  264. * that FOP does not yet implement the "reference-orientation"
  265. * attribute therefore we always use the orientation zero degrees,
  266. * Other orientation information is captured for use by a future
  267. * implementation (whenever FOP implement the mechanism). This is also
  268. * the case for landscape prints which use an orientation of 270 degrees,
  269. * in 99.9% of cases the font metrics will be the same as the 0 degrees
  270. * therefore the implementation currently will always use 0 degrees.
  271. *
  272. * @return characterSetOrentation The current orientation metrics.
  273. */
  274. private CharacterSetOrientation getCharacterSetOrientation() {
  275. CharacterSetOrientation c
  276. = (CharacterSetOrientation) characterSetOrientations.get(currentOrientation);
  277. return c;
  278. }
  279. /**
  280. * Indicates whether the given char in the character set.
  281. * @param c the character to check
  282. * @return true if the character is in the character set
  283. */
  284. public boolean hasChar(char c) {
  285. return encoder.canEncode(c);
  286. }
  287. /**
  288. * Encodes a character sequence to a byte array.
  289. * @param chars the characters
  290. * @return the encoded characters
  291. * @throws CharacterCodingException if the encoding operation fails
  292. */
  293. public byte[] encodeChars(CharSequence chars) throws CharacterCodingException {
  294. ByteBuffer bb = encoder.encode(CharBuffer.wrap(chars));
  295. if (bb.hasArray()) {
  296. return bb.array();
  297. } else {
  298. bb.rewind();
  299. byte[] bytes = new byte[bb.remaining()];
  300. bb.get(bytes);
  301. return bytes;
  302. }
  303. }
  304. /**
  305. * Map a Unicode character to a code point in the font.
  306. * The code tables are already converted to Unicode therefore
  307. * we can use the identity mapping.
  308. *
  309. * @param c character to map
  310. * @return the mapped character
  311. */
  312. public char mapChar(char c) {
  313. //TODO This is not strictly correct but we'll let it be for the moment
  314. return c;
  315. }
  316. }