You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

CharacterSet.java 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.afp.fonts;
  19. import java.io.File;
  20. import java.io.UnsupportedEncodingException;
  21. import java.net.URI;
  22. import java.nio.ByteBuffer;
  23. import java.nio.CharBuffer;
  24. import java.nio.charset.CharacterCodingException;
  25. import java.nio.charset.Charset;
  26. import java.nio.charset.CharsetEncoder;
  27. import java.nio.charset.CodingErrorAction;
  28. import java.nio.charset.UnsupportedCharsetException;
  29. import java.util.Map;
  30. import org.apache.commons.logging.Log;
  31. import org.apache.commons.logging.LogFactory;
  32. import org.apache.fop.afp.AFPConstants;
  33. import org.apache.fop.afp.util.ResourceAccessor;
  34. import org.apache.fop.afp.util.SimpleResourceAccessor;
  35. import org.apache.fop.afp.util.StringUtils;
  36. /**
  37. * The IBM Font Object Content Architecture (FOCA) supports presentation
  38. * of character shapes by defining their characteristics, which include
  39. * font description information for identifying the characters, font metric
  40. * information for positioning the characters, and character shape information
  41. * for presenting the character images.
  42. * <p/>
  43. * Presenting a graphic character on a presentation surface requires
  44. * information on the rotation and position of character on the physical
  45. * or logical page.
  46. * <p/>
  47. * This class proivdes font metric information for a particular font
  48. * as identified by the character set name. This information is obtained
  49. * directly from the AFP font files which must be installed in the path
  50. * specified in the afp-fonts xml definition file.
  51. * <p/>
  52. */
  53. public class CharacterSet {
  54. /** Static logging instance */
  55. protected static final Log LOG = LogFactory.getLog(CharacterSet.class.getName());
  56. /** default codepage */
  57. public static final String DEFAULT_CODEPAGE = "T1V10500";
  58. /** default encoding */
  59. public static final String DEFAULT_ENCODING = "Cp500";
  60. private static final int MAX_NAME_LEN = 8;
  61. /** The code page to which the character set relates */
  62. protected String codePage;
  63. /** The encoding used for the code page */
  64. protected String encoding;
  65. /** The charset encoder corresponding to this encoding */
  66. private CharsetEncoder encoder;
  67. /** The character set relating to the font */
  68. protected String name;
  69. /** The path to the installed fonts */
  70. private ResourceAccessor accessor;
  71. /** The current orientation (currently only 0 is supported by FOP) */
  72. private final String currentOrientation = "0";
  73. /** The collection of objects for each orientation */
  74. private Map characterSetOrientations = null;
  75. /** The nominal vertical size (in millipoints) for bitmap fonts. 0 for outline fonts. */
  76. private int nominalVerticalSize = 0;
  77. /**
  78. * Constructor for the CharacterSetMetric object, the character set is used
  79. * to load the font information from the actual AFP font.
  80. *
  81. * @param codePage the code page identifier
  82. * @param encoding the encoding of the font
  83. * @param name the character set name
  84. * @param path the path to the installed afp fonts
  85. * @deprecated Please use
  86. * {@link #CharacterSet(String, String, String, ResourceAccessor)} instead.
  87. */
  88. public CharacterSet(String codePage, String encoding, String name, String path) {
  89. this(codePage, encoding, name,
  90. new SimpleResourceAccessor(path != null ? new File(path) : null));
  91. }
  92. /**
  93. * Constructor for the CharacterSetMetric object, the character set is used
  94. * to load the font information from the actual AFP font.
  95. *
  96. * @param codePage the code page identifier
  97. * @param encoding the encoding of the font
  98. * @param name the character set name
  99. * @param accessor the resource accessor to load resource with
  100. */
  101. CharacterSet(String codePage, String encoding, String name, ResourceAccessor accessor) {
  102. if (name.length() > MAX_NAME_LEN) {
  103. String msg = "Character set name '" + name + "' must be a maximum of "
  104. + MAX_NAME_LEN + " characters";
  105. LOG.error("Constructor:: " + msg);
  106. throw new IllegalArgumentException(msg);
  107. }
  108. if (name.length() < MAX_NAME_LEN) {
  109. this.name = StringUtils.rpad(name, ' ', MAX_NAME_LEN);
  110. } else {
  111. this.name = name;
  112. }
  113. this.codePage = codePage;
  114. this.encoding = encoding;
  115. try {
  116. this.encoder = Charset.forName(encoding).newEncoder();
  117. this.encoder.onUnmappableCharacter(CodingErrorAction.REPLACE);
  118. } catch (UnsupportedCharsetException uce) {
  119. //No nio-capable encoder available
  120. //This may happen with "Cp500" on Sun Java 1.4.2
  121. this.encoder = null;
  122. }
  123. this.accessor = accessor;
  124. this.characterSetOrientations = new java.util.HashMap(4);
  125. }
  126. /**
  127. * Add character set metric information for the different orientations
  128. *
  129. * @param cso the metrics for the orientation
  130. */
  131. public void addCharacterSetOrientation(CharacterSetOrientation cso) {
  132. characterSetOrientations.put(
  133. String.valueOf(cso.getOrientation()),
  134. cso);
  135. }
  136. /**
  137. * Sets the nominal vertical size of the font in the case of bitmap fonts.
  138. * @param nominalVerticalSize the nominal vertical size (in millipoints)
  139. */
  140. public void setNominalVerticalSize(int nominalVerticalSize) {
  141. this.nominalVerticalSize = nominalVerticalSize;
  142. }
  143. /**
  144. * Returns the nominal vertical size of the font in the case of bitmap fonts. For outline fonts,
  145. * zero is returned, because these are scalable fonts.
  146. * @return the nominal vertical size (in millipoints) for bitmap fonts, or 0 for outline fonts.
  147. */
  148. public int getNominalVerticalSize() {
  149. return this.nominalVerticalSize;
  150. }
  151. /**
  152. * Ascender height is the distance from the character baseline to the
  153. * top of the character box. A negative ascender height signifies that
  154. * all of the graphic character is below the character baseline. For
  155. * a character rotation other than 0, ascender height loses its
  156. * meaning when the character is lying on its side or is upside down
  157. * with respect to normal viewing orientation. For the general case,
  158. * Ascender Height is the characters most positive y-axis value.
  159. * For bounded character boxes, for a given character having an
  160. * ascender, ascender height and baseline offset are equal.
  161. *
  162. * @return the ascender value in millipoints
  163. */
  164. public int getAscender() {
  165. return getCharacterSetOrientation().getAscender();
  166. }
  167. /**
  168. * Cap height is the average height of the uppercase characters in
  169. * a font. This value is specified by the designer of a font and is
  170. * usually the height of the uppercase M.
  171. *
  172. * @return the cap height value in millipoints
  173. */
  174. public int getCapHeight() {
  175. return getCharacterSetOrientation().getCapHeight();
  176. }
  177. /**
  178. * Descender depth is the distance from the character baseline to
  179. * the bottom of a character box. A negative descender depth signifies
  180. * that all of the graphic character is above the character baseline.
  181. *
  182. * @return the descender value in millipoints
  183. */
  184. public int getDescender() {
  185. return getCharacterSetOrientation().getDescender();
  186. }
  187. /**
  188. * Returns the first character in the character set
  189. *
  190. * @return the first character in the character set (Unicode codepoint)
  191. */
  192. public char getFirstChar() {
  193. return getCharacterSetOrientation().getFirstChar();
  194. }
  195. /**
  196. * Returns the last character in the character set
  197. *
  198. * @return the last character in the character set (Unicode codepoint)
  199. */
  200. public char getLastChar() {
  201. return getCharacterSetOrientation().getLastChar();
  202. }
  203. /**
  204. * Returns the resource accessor to load the font resources with.
  205. * @return the resource accessor to load the font resources with
  206. */
  207. public ResourceAccessor getResourceAccessor() {
  208. return this.accessor;
  209. }
  210. /**
  211. * Get the width (in 1/1000ths of a point size) of all characters
  212. *
  213. * @return the widths of all characters
  214. */
  215. public int[] getWidths() {
  216. return getCharacterSetOrientation().getWidths();
  217. }
  218. /**
  219. * XHeight refers to the height of the lower case letters above the baseline.
  220. *
  221. * @return the typical height of characters
  222. */
  223. public int getXHeight() {
  224. return getCharacterSetOrientation().getXHeight();
  225. }
  226. /**
  227. * Get the width (in 1/1000ths of a point size) of the character
  228. * identified by the parameter passed.
  229. *
  230. * @param character the Unicode character from which the width will be calculated
  231. * @return the width of the character
  232. */
  233. public int getWidth(char character) {
  234. return getCharacterSetOrientation().getWidth(character);
  235. }
  236. /**
  237. * Returns the AFP character set identifier
  238. *
  239. * @return the AFP character set identifier
  240. */
  241. public String getName() {
  242. return name;
  243. }
  244. /**
  245. * Returns the AFP character set identifier as a byte array
  246. *
  247. * @return the AFP character set identifier as a byte array
  248. */
  249. public byte[] getNameBytes() {
  250. byte[] nameBytes = null;
  251. try {
  252. nameBytes = name.getBytes(AFPConstants.EBCIDIC_ENCODING);
  253. } catch (UnsupportedEncodingException usee) {
  254. nameBytes = name.getBytes();
  255. LOG.warn(
  256. "UnsupportedEncodingException translating the name " + name);
  257. }
  258. return nameBytes;
  259. }
  260. /**
  261. * Returns the AFP code page identifier
  262. *
  263. * @return the AFP code page identifier
  264. */
  265. public String getCodePage() {
  266. return codePage;
  267. }
  268. /**
  269. * Returns the AFP code page encoding
  270. *
  271. * @return the AFP code page encoding
  272. */
  273. public String getEncoding() {
  274. return encoding;
  275. }
  276. /**
  277. * Helper method to return the current CharacterSetOrientation, note
  278. * that FOP does not yet implement the "reference-orientation"
  279. * attribute therefore we always use the orientation zero degrees,
  280. * Other orientation information is captured for use by a future
  281. * implementation (whenever FOP implement the mechanism). This is also
  282. * the case for landscape prints which use an orientation of 270 degrees,
  283. * in 99.9% of cases the font metrics will be the same as the 0 degrees
  284. * therefore the implementation currently will always use 0 degrees.
  285. *
  286. * @return characterSetOrentation The current orientation metrics.
  287. */
  288. private CharacterSetOrientation getCharacterSetOrientation() {
  289. CharacterSetOrientation c
  290. = (CharacterSetOrientation) characterSetOrientations.get(currentOrientation);
  291. return c;
  292. }
  293. /**
  294. * Indicates whether the given char in the character set.
  295. * @param c the character to check
  296. * @return true if the character is in the character set
  297. */
  298. public boolean hasChar(char c) {
  299. if (encoder != null) {
  300. return encoder.canEncode(c);
  301. } else {
  302. //Sun Java 1.4.2 compatibility
  303. return true;
  304. }
  305. }
  306. /**
  307. * Encodes a character sequence to a byte array.
  308. * @param chars the characters
  309. * @return the encoded characters
  310. * @throws CharacterCodingException if the encoding operation fails
  311. */
  312. public byte[] encodeChars(CharSequence chars) throws CharacterCodingException {
  313. if (encoder != null) {
  314. ByteBuffer bb;
  315. // encode method is not thread safe
  316. synchronized (encoder) {
  317. bb = encoder.encode(CharBuffer.wrap(chars));
  318. }
  319. if (bb.hasArray()) {
  320. return bb.array();
  321. } else {
  322. bb.rewind();
  323. byte[] bytes = new byte[bb.remaining()];
  324. bb.get(bytes);
  325. return bytes;
  326. }
  327. } else {
  328. //Sun Java 1.4.2 compatibility
  329. byte[] bytes;
  330. try {
  331. bytes = chars.toString().getBytes(this.encoding);
  332. return bytes;
  333. } catch (UnsupportedEncodingException uee) {
  334. throw new UnsupportedOperationException(
  335. "Unsupported encoding: " + uee.getMessage());
  336. }
  337. }
  338. }
  339. /**
  340. * Map a Unicode character to a code point in the font.
  341. * The code tables are already converted to Unicode therefore
  342. * we can use the identity mapping.
  343. *
  344. * @param c the Unicode character to map
  345. * @return the mapped character
  346. */
  347. public char mapChar(char c) {
  348. //TODO This is not strictly correct but we'll let it be for the moment
  349. return c;
  350. }
  351. /**
  352. * Returns the increment for an space.
  353. * @return the space increment
  354. */
  355. public int getSpaceIncrement() {
  356. return getCharacterSetOrientation().getSpaceIncrement();
  357. }
  358. /**
  359. * Returns the increment for an em space.
  360. * @return the em space increment
  361. */
  362. public int getEmSpaceIncrement() {
  363. return getCharacterSetOrientation().getEmSpaceIncrement();
  364. }
  365. }