You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

PDFText.java 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.pdf;
  19. import java.io.ByteArrayOutputStream;
  20. import java.util.Locale;
  21. import org.apache.fop.util.CharUtilities;
  22. /**
  23. * This class represents a simple number object. It also contains contains some
  24. * utility methods for outputting numbers to PDF.
  25. */
  26. public class PDFText extends PDFObject {
  27. private static final char[] DIGITS
  28. = {'0', '1', '2', '3', '4', '5', '6', '7',
  29. '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'};
  30. private String text;
  31. /**
  32. * Returns the text.
  33. * @return the text
  34. */
  35. public String getText() {
  36. return this.text;
  37. }
  38. /**
  39. * Sets the text.
  40. * @param text the text
  41. */
  42. public void setText(String text) {
  43. this.text = text;
  44. }
  45. /**
  46. * {@inheritDoc}
  47. */
  48. protected String toPDFString() {
  49. if (getText() == null) {
  50. throw new IllegalArgumentException(
  51. "The text of this PDFText must not be empty");
  52. }
  53. StringBuffer sb = new StringBuffer(64);
  54. sb.append("(");
  55. sb.append(escapeText(getText()));
  56. sb.append(")");
  57. return sb.toString();
  58. }
  59. /**
  60. * Escape text (see 4.4.1 in PDF 1.3 specs)
  61. * @param text the text to encode
  62. * @return encoded text
  63. */
  64. public static final String escapeText(final String text) {
  65. return escapeText(text, false);
  66. }
  67. /**
  68. * Escape text (see 4.4.1 in PDF 1.3 specs)
  69. * @param text the text to encode
  70. * @param forceHexMode true if the output should follow the hex encoding rules
  71. * @return encoded text
  72. */
  73. public static final String escapeText(final String text, boolean forceHexMode) {
  74. if (text != null && text.length() > 0) {
  75. boolean unicode = false;
  76. boolean hexMode = false;
  77. if (forceHexMode) {
  78. hexMode = true;
  79. } else {
  80. for (int i = 0, c = text.length(); i < c; i++) {
  81. if (text.charAt(i) >= 128) {
  82. unicode = true;
  83. hexMode = true;
  84. break;
  85. }
  86. }
  87. }
  88. if (hexMode) {
  89. final byte[] uniBytes;
  90. try {
  91. uniBytes = text.getBytes("UTF-16");
  92. } catch (java.io.UnsupportedEncodingException uee) {
  93. throw new RuntimeException("Incompatible VM", uee);
  94. }
  95. return toHex(uniBytes);
  96. } else {
  97. final StringBuffer result = new StringBuffer(text.length() * 2);
  98. result.append("(");
  99. final int l = text.length();
  100. if (unicode) {
  101. // byte order marker (0xfeff)
  102. result.append("\\376\\377");
  103. for (int i = 0; i < l; i++) {
  104. final char ch = text.charAt(i);
  105. final int high = (ch & 0xff00) >>> 8;
  106. final int low = ch & 0xff;
  107. result.append("\\");
  108. result.append(Integer.toOctalString(high));
  109. result.append("\\");
  110. result.append(Integer.toOctalString(low));
  111. }
  112. } else {
  113. for (int i = 0; i < l; i++) {
  114. final char ch = text.charAt(i);
  115. if (ch < 256) {
  116. escapeStringChar(ch, result);
  117. } else {
  118. throw new IllegalStateException(
  119. "Can only treat text in 8-bit ASCII/PDFEncoding");
  120. }
  121. }
  122. }
  123. result.append(")");
  124. return result.toString();
  125. }
  126. }
  127. return "()";
  128. }
  129. /**
  130. * Converts a byte array to a Hexadecimal String (3.2.3 in PDF 1.4 specs)
  131. * @param data the data to encode
  132. * @param brackets true if enclosing brackets should be included
  133. * @return String the resulting string
  134. */
  135. public static final String toHex(byte[] data, boolean brackets) {
  136. final StringBuffer sb = new StringBuffer(data.length * 2);
  137. if (brackets) {
  138. sb.append("<");
  139. }
  140. for (byte aData : data) {
  141. sb.append(DIGITS[(aData >>> 4) & 0x0F]);
  142. sb.append(DIGITS[aData & 0x0F]);
  143. }
  144. if (brackets) {
  145. sb.append(">");
  146. }
  147. return sb.toString();
  148. }
  149. /**
  150. * Converts a byte array to a Hexadecimal String (3.2.3 in PDF 1.4 specs)
  151. * @param data the data to encode
  152. * @return String the resulting string
  153. */
  154. public static final String toHex(byte[] data) {
  155. return toHex(data, true);
  156. }
  157. /**
  158. * Converts a String to UTF-16 (big endian).
  159. * @param text text to convert
  160. * @return byte[] UTF-16 stream
  161. */
  162. public static final byte[] toUTF16(String text) {
  163. try {
  164. return text.getBytes("UnicodeBig");
  165. } catch (java.io.UnsupportedEncodingException uee) {
  166. throw new RuntimeException("Incompatible VM", uee);
  167. }
  168. }
  169. /**
  170. * Convert a char to a multibyte hex representation
  171. * @param c character to encode
  172. * @return the encoded character
  173. */
  174. public static final String toUnicodeHex(char c) {
  175. final StringBuffer buf = new StringBuffer(4);
  176. final byte[] uniBytes;
  177. try {
  178. final char[] a = {c};
  179. uniBytes = new String(a).getBytes("UTF-16BE");
  180. } catch (java.io.UnsupportedEncodingException uee) {
  181. throw new RuntimeException("Incompatible VM", uee);
  182. }
  183. for (byte uniByte : uniBytes) {
  184. buf.append(DIGITS[(uniByte >>> 4) & 0x0F]);
  185. buf.append(DIGITS[uniByte & 0x0F]);
  186. }
  187. return buf.toString();
  188. }
  189. /**
  190. * Convert a char to a multibyte hex representation appending to string buffer.
  191. * The created string will be:
  192. * <ul>
  193. * <li>4-character string in case of non-BMP character</li>
  194. * <li>6-character string in case of BMP character</li>
  195. * </ul>
  196. * @param c character to encode
  197. * @param sb the string buffer to append output
  198. */
  199. public static final void toUnicodeHex(int c, StringBuffer sb) {
  200. if (CharUtilities.isBmpCodePoint(c)) {
  201. sb.append(Integer.toHexString(c + 0x10000).substring(1).toUpperCase(Locale.US));
  202. } else {
  203. sb.append(Integer.toHexString(c + 0x1000000).substring(1).toUpperCase(Locale.US));
  204. }
  205. }
  206. /**
  207. * Escaped a String as described in section 4.4 in the PDF 1.3 specs.
  208. * @param s String to escape
  209. * @return String the escaped String
  210. */
  211. public static final String escapeString(final String s) {
  212. if (s == null || s.length() == 0) {
  213. return "()";
  214. } else {
  215. final StringBuffer sb = new StringBuffer(64);
  216. sb.append("(");
  217. for (int i = 0; i < s.length(); i++) {
  218. final char c = s.charAt(i);
  219. escapeStringChar(c, sb);
  220. }
  221. sb.append(")");
  222. return sb.toString();
  223. }
  224. }
  225. /**
  226. * Escapes a character conforming to the rules established in the PostScript
  227. * Language Reference (Search for "Literal Text Strings").
  228. * @param c character to escape
  229. * @param target target StringBuffer to write the escaped character to
  230. */
  231. public static final void escapeStringChar(final char c, final StringBuffer target) {
  232. if (c > 127) {
  233. target.append("\\");
  234. target.append(Integer.toOctalString(c));
  235. } else {
  236. switch (c) {
  237. case '\n':
  238. target.append("\\n");
  239. break;
  240. case '\r':
  241. target.append("\\r");
  242. break;
  243. case '\t':
  244. target.append("\\t");
  245. break;
  246. case '\b':
  247. target.append("\\b");
  248. break;
  249. case '\f':
  250. target.append("\\f");
  251. break;
  252. case '\\':
  253. target.append("\\\\");
  254. break;
  255. case '(':
  256. target.append("\\(");
  257. break;
  258. case ')':
  259. target.append("\\)");
  260. break;
  261. default:
  262. target.append(c);
  263. }
  264. }
  265. }
  266. /**
  267. * Escape a byte array for output to PDF (Used for encrypted strings)
  268. * @param data data to encode
  269. * @return byte[] encoded data
  270. */
  271. public static final byte[] escapeByteArray(byte[] data) {
  272. ByteArrayOutputStream bout = new ByteArrayOutputStream(data.length);
  273. bout.write((int)'(');
  274. for (final byte b : data) {
  275. switch (b) {
  276. case '\n':
  277. bout.write('\\');
  278. bout.write('n');
  279. break;
  280. case '\r':
  281. bout.write('\\');
  282. bout.write('r');
  283. break;
  284. case '\t':
  285. bout.write('\\');
  286. bout.write('t');
  287. break;
  288. case '\b':
  289. bout.write('\\');
  290. bout.write('b');
  291. break;
  292. case '\f':
  293. bout.write('\\');
  294. bout.write('f');
  295. break;
  296. case '\\':
  297. bout.write('\\');
  298. bout.write('\\');
  299. break;
  300. case '(':
  301. bout.write('\\');
  302. bout.write('(');
  303. break;
  304. case ')':
  305. bout.write('\\');
  306. bout.write(')');
  307. break;
  308. default:
  309. bout.write(b);
  310. }
  311. }
  312. bout.write((int)')');
  313. return bout.toByteArray();
  314. }
  315. /**
  316. * Converts a text to PDF's "string" data type. Unsupported characters get converted to '?'
  317. * characters (similar to what the Java "US-ASCII" encoding does).
  318. * @see #toPDFString(CharSequence, char)
  319. * @param text the text to convert
  320. * @return the converted string
  321. */
  322. public static String toPDFString(CharSequence text) {
  323. return toPDFString(text, '?');
  324. }
  325. /**
  326. * Converts a text to PDF's "string" data type. Unsupported characters get converted to the
  327. * given replacement character.
  328. * <p>
  329. * The PDF library currently doesn't properly distinguish between the PDF
  330. * data types "string" and "text string", so we currently restrict "string" to US-ASCII, also
  331. * because "string" seems somewhat under-specified concerning the upper 128 bytes.
  332. * @param text the text to convert
  333. * @param replacement the replacement character used when substituting a character
  334. * @return the converted string
  335. */
  336. public static String toPDFString(CharSequence text, char replacement) {
  337. StringBuffer sb = new StringBuffer();
  338. for (int i = 0, c = text.length(); i < c; i++) {
  339. char ch = text.charAt(i);
  340. if (ch > 127) {
  341. //TODO Revisit the restriction to US-ASCII once "string" and "text string" are
  342. //"disentangled".
  343. sb.append(replacement);
  344. } else {
  345. sb.append(ch);
  346. }
  347. }
  348. return sb.toString();
  349. }
  350. }