You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

PDFToUnicodeCMap.java 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.pdf;
  19. import java.io.IOException;
  20. import java.io.Writer;
  21. /**
  22. * Class representing ToUnicode CMaps.
  23. * Here are some documentation resources:
  24. * <ul>
  25. * <li>PDF Reference, Second Edition, Section 5.6.4, for general information
  26. * about CMaps in PDF Files.</li>
  27. * <li>PDF Reference, Second Edition, Section 5.9, for specific information
  28. * about ToUnicodeCMaps in PDF Files.</li>
  29. * <li>
  30. * <a href="http://partners.adobe.com/asn/developer/pdfs/tn/5411.ToUnicode.pdf">
  31. * Adobe Technical Note #5411, "ToUnicode Mapping File Tutorial"</a>.
  32. * </ul>
  33. */
  34. public class PDFToUnicodeCMap extends PDFCMap {
  35. /**
  36. * The array of Unicode characters ordered by character code
  37. * (maps from character code to Unicode code point).
  38. */
  39. protected char[] unicodeCharMap;
  40. /**
  41. * Constructor.
  42. *
  43. * @param unicodeCharMap An array of Unicode characters ordered by character code
  44. * (maps from character code to Unicode code point)
  45. * @param name One of the registered names found in Table 5.14 in PDF
  46. * Reference, Second Edition.
  47. * @param sysInfo The attributes of the character collection of the CIDFont.
  48. */
  49. public PDFToUnicodeCMap(char[] unicodeCharMap, String name, PDFCIDSystemInfo sysInfo) {
  50. super(name, sysInfo);
  51. this.unicodeCharMap = unicodeCharMap;
  52. }
  53. /** {@inheritDoc} */
  54. protected CMapBuilder createCMapBuilder(Writer writer) {
  55. return new ToUnicodeCMapBuilder(writer);
  56. }
  57. class ToUnicodeCMapBuilder extends CMapBuilder {
  58. public ToUnicodeCMapBuilder(Writer writer) {
  59. super(writer, null);
  60. }
  61. /**
  62. * Writes the CMap to a Writer.
  63. * @param writer the writer
  64. * @throws IOException if an I/O error occurs
  65. */
  66. public void writeCMap() throws IOException {
  67. writeCIDInit();
  68. writeCIDSystemInfo("Adobe", "UCS", 0);
  69. writeName("Adobe-Identity-UCS");
  70. writeType("2");
  71. writeCodeSpaceRange();
  72. writeBFEntries();
  73. writeWrapUp();
  74. }
  75. /**
  76. * Writes the character mappings for this font.
  77. * @param p StingBuffer to write to
  78. */
  79. protected void writeBFEntries() throws IOException {
  80. if (unicodeCharMap != null) {
  81. writeBFCharEntries(unicodeCharMap);
  82. writeBFRangeEntries(unicodeCharMap);
  83. }
  84. }
  85. /**
  86. * Writes the entries for single characters of a base font (only characters which cannot be
  87. * expressed as part of a character range).
  88. * @param p StringBuffer to write to
  89. * @param charArray all the characters to map
  90. * @throws IOException
  91. */
  92. protected void writeBFCharEntries(char[] charArray) throws IOException {
  93. int totalEntries = 0;
  94. for (int i = 0; i < charArray.length; i++) {
  95. if (!partOfRange(charArray, i)) {
  96. totalEntries++;
  97. }
  98. }
  99. if (totalEntries < 1) {
  100. return;
  101. }
  102. int remainingEntries = totalEntries;
  103. int charIndex = 0;
  104. do {
  105. /* Limited to 100 entries in each section */
  106. int entriesThisSection = Math.min(remainingEntries, 100);
  107. writer.write(entriesThisSection + " beginbfchar\n");
  108. for (int i = 0; i < entriesThisSection; i++) {
  109. /* Go to the next char not in a range */
  110. while (partOfRange(charArray, charIndex)) {
  111. charIndex++;
  112. }
  113. writer.write("<" + padHexString(Integer.toHexString(charIndex), 4) + "> ");
  114. writer.write("<" + padHexString(Integer.toHexString(charArray[charIndex]), 4)
  115. + ">\n");
  116. charIndex++;
  117. }
  118. remainingEntries -= entriesThisSection;
  119. writer.write("endbfchar\n");
  120. } while (remainingEntries > 0);
  121. }
  122. /**
  123. * Writes the entries for character ranges for a base font.
  124. * @param p StringBuffer to write to
  125. * @param charArray all the characters to map
  126. * @throws IOException
  127. */
  128. protected void writeBFRangeEntries(char[] charArray) throws IOException {
  129. int totalEntries = 0;
  130. for (int i = 0; i < charArray.length; i++) {
  131. if (startOfRange(charArray, i)) {
  132. totalEntries++;
  133. }
  134. }
  135. if (totalEntries < 1) {
  136. return;
  137. }
  138. int remainingEntries = totalEntries;
  139. int charIndex = 0;
  140. do {
  141. /* Limited to 100 entries in each section */
  142. int entriesThisSection = Math.min(remainingEntries, 100);
  143. writer.write(entriesThisSection + " beginbfrange\n");
  144. for (int i = 0; i < entriesThisSection; i++) {
  145. /* Go to the next start of a range */
  146. while (!startOfRange(charArray, charIndex)) {
  147. charIndex++;
  148. }
  149. writer.write("<" + padHexString(Integer.toHexString(charIndex), 4) + "> ");
  150. writer.write("<"
  151. + padHexString(Integer.toHexString(endOfRange(charArray, charIndex)), 4)
  152. + "> ");
  153. writer.write("<" + padHexString(Integer.toHexString(charArray[charIndex]), 4)
  154. + ">\n");
  155. charIndex++;
  156. }
  157. remainingEntries -= entriesThisSection;
  158. writer.write("endbfrange\n");
  159. } while (remainingEntries > 0);
  160. }
  161. /**
  162. * Find the end of the current range.
  163. * @param charArray The array which is being tested.
  164. * @param startOfRange The index to the array element that is the start of
  165. * the range.
  166. * @return The index to the element that is the end of the range.
  167. */
  168. private int endOfRange(char[] charArray, int startOfRange) {
  169. int i = startOfRange;
  170. while (i < charArray.length - 1 && sameRangeEntryAsNext(charArray, i)) {
  171. i++;
  172. }
  173. return i;
  174. }
  175. /**
  176. * Determine whether this array element should be part of a bfchar entry or
  177. * a bfrange entry.
  178. * @param charArray The array to be tested.
  179. * @param arrayIndex The index to the array element to be tested.
  180. * @return True if this array element should be included in a range.
  181. */
  182. private boolean partOfRange(char[] charArray, int arrayIndex) {
  183. if (charArray.length < 2) {
  184. return false;
  185. }
  186. if (arrayIndex == 0) {
  187. return sameRangeEntryAsNext(charArray, 0);
  188. }
  189. if (arrayIndex == charArray.length - 1) {
  190. return sameRangeEntryAsNext(charArray, arrayIndex - 1);
  191. }
  192. if (sameRangeEntryAsNext(charArray, arrayIndex - 1)) {
  193. return true;
  194. }
  195. if (sameRangeEntryAsNext(charArray, arrayIndex)) {
  196. return true;
  197. }
  198. return false;
  199. }
  200. /**
  201. * Determine whether two bytes can be written in the same bfrange entry.
  202. * @param charArray The array to be tested.
  203. * @param firstItem The first of the two items in the array to be tested.
  204. * The second item is firstItem + 1.
  205. * @return True if both 1) the next item in the array is sequential with
  206. * this one, and 2) the first byte of the character in the first position
  207. * is equal to the first byte of the character in the second position.
  208. */
  209. private boolean sameRangeEntryAsNext(char[] charArray, int firstItem) {
  210. if (charArray[firstItem] + 1 != charArray[firstItem + 1]) {
  211. return false;
  212. }
  213. if (firstItem / 256 != (firstItem + 1) / 256) {
  214. return false;
  215. }
  216. return true;
  217. }
  218. /**
  219. * Determine whether this array element should be the start of a bfrange
  220. * entry.
  221. * @param charArray The array to be tested.
  222. * @param arrayIndex The index to the array element to be tested.
  223. * @return True if this array element is the beginning of a range.
  224. */
  225. private boolean startOfRange(char[] charArray, int arrayIndex) {
  226. // Can't be the start of a range if not part of a range.
  227. if (!partOfRange(charArray, arrayIndex)) {
  228. return false;
  229. }
  230. // If first element in the array, must be start of a range
  231. if (arrayIndex == 0) {
  232. return true;
  233. }
  234. // If last element in the array, cannot be start of a range
  235. if (arrayIndex == charArray.length - 1) {
  236. return false;
  237. }
  238. /*
  239. * If part of same range as the previous element is, cannot be start
  240. * of range.
  241. */
  242. if (sameRangeEntryAsNext(charArray, arrayIndex - 1)) {
  243. return false;
  244. }
  245. // Otherwise, this is start of a range.
  246. return true;
  247. }
  248. /**
  249. * Prepends the input string with a sufficient number of "0" characters to
  250. * get the returned string to be numChars length.
  251. * @param input The input string.
  252. * @param numChars The minimum characters in the output string.
  253. * @return The padded string.
  254. */
  255. private String padHexString(String input, int numChars) {
  256. int length = input.length();
  257. if (length >= numChars) {
  258. return input;
  259. }
  260. StringBuffer returnString = new StringBuffer();
  261. for (int i = 1; i <= numChars - length; i++) {
  262. returnString.append("0");
  263. }
  264. returnString.append(input);
  265. return returnString.toString();
  266. }
  267. }
  268. }