You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

PDFCMap.java 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.pdf;
  19. import java.io.IOException;
  20. import java.io.OutputStream;
  21. import java.io.StringWriter;
  22. import java.io.Writer;
  23. /**
  24. * Class representing the CMap encodings.
  25. *
  26. * CMaps are defined in the "Predefined CJK CMap names" table.
  27. * In section 5.6.4 of PDF reference 1.4.
  28. */
  29. public class PDFCMap extends PDFStream {
  30. /*
  31. * Chinese (simplified)
  32. */
  33. /**
  34. * GB-EUC-H Microsoft Code Page 936 (lfCharSet 0x86), GB 2312-80
  35. * character set, EUC-CN encoding
  36. */
  37. public static final String ENC_GB_EUC_H = "GB-EUC-H";
  38. /**
  39. * GB-EUC-V Vertical version of GB-EUC-H
  40. */
  41. public static final String ENC_GB_EUC_V = "GB_EUC_V";
  42. /**
  43. * GBpc-EUC-H Mac OS, GB 2312-80 character set, EUC-CN encoding, Script Manager code 19
  44. */
  45. public static final String ENC_GBPC_EUC_H = "GBpc-EUC-H";
  46. /**
  47. * GBpc-EUC-V Vertical version of GBpc-EUC-H
  48. */
  49. public static final String ENC_GBPC_EUC_V = "GBpc-EUC-V";
  50. /**
  51. * GBK-EUC-H Microsoft Code Page 936 (lfCharSet 0x86), GBK character set, GBK encoding
  52. */
  53. public static final String ENC_GBK_EUC_H = "GBK-EUC-H";
  54. /**
  55. * GBK-EUC-V Vertical version of GBK-EUC-H
  56. */
  57. public static final String ENC_GBK_EUC_V = "GBK-EUC-V";
  58. /**
  59. * GBKp-EUC-H Same as GBK-EUC-H, but replaces half-width
  60. * Latin characters with proportional forms and maps character
  61. * code 0x24 to a dollar sign ($) instead of a yuan symbol
  62. */
  63. public static final String ENC_GBKP_EUC_H = "GBKp-EUC-H";
  64. /**
  65. * GBKp-EUC-V Vertical version of GBKp-EUC-H
  66. */
  67. public static final String ENC_GBKP_EUC_V = "GBKp-EUC-V";
  68. /**
  69. * GBK2K-H GB 18030-2000 character set, mixed 1-, 2-, and 4-byte encoding
  70. */
  71. public static final String ENC_GBK2K_H = "GBK2K-H";
  72. /**
  73. * GBK2K-V Vertical version of GBK2K-H
  74. */
  75. public static final String ENC_GBK2K_V = "GBK2K-V";
  76. /**
  77. * UniGB-UCS2-H Unicode (UCS-2) encoding for the Adobe-GB1 character collection
  78. */
  79. public static final String ENC_UNIGB_UCS2_H = "UniGB-UCS2-H";
  80. /**
  81. * UniGB-UCS2-V Vertical version of UniGB-UCS2-H
  82. */
  83. public static final String ENC_UNIGB_UCS2_V = "UniGB-UCS2-V";
  84. /*
  85. * Chinese (Traditional)
  86. */
  87. /**
  88. * B5pc-H Mac OS, Big Five character set, Big Five encoding, Script Manager code 2
  89. */
  90. public static final String ENC_B5PC_H = "B5pc-H";
  91. /**
  92. * B5pc-V Vertical version of B5pc-H
  93. */
  94. public static final String ENC_B5PC_V = "B5pc-V";
  95. /**
  96. * HKscs-B5-H Hong Kong SCS, an extension to the Big Five
  97. * character set and encoding
  98. */
  99. public static final String ENC_HKSCS_B5_H = "HKscs-B5-H";
  100. /**
  101. * HKscs-B5-V Vertical version of HKscs-B5-H
  102. */
  103. public static final String ENC_HKSCS_B5_V = "HKscs-B5-V";
  104. /**
  105. * ETen-B5-H Microsoft Code Page 950 (lfCharSet 0x88), Big Five
  106. * character set with ETen extensions
  107. */
  108. public static final String ENC_ETEN_B5_H = "ETen-B5-H";
  109. /**
  110. * ETen-B5-V Vertical version of ETen-B5-H
  111. */
  112. public static final String ENC_ETEN_B5_V = "ETen-B5-V";
  113. /**
  114. * ETenms-B5-H Same as ETen-B5-H, but replaces half-width
  115. * Latin characters with proportional forms
  116. */
  117. public static final String ENC_ETENMS_B5_H = "ETenms-B5-H";
  118. /**
  119. * ETenms-B5-V Vertical version of ETenms-B5-H
  120. */
  121. public static final String ENC_ETENMS_B5_V = "ETenms-B5-V";
  122. /**
  123. * CNS-EUC-H CNS 11643-1992 character set, EUC-TW encoding
  124. */
  125. public static final String ENC_CNS_EUC_H = "CNS-EUC-H";
  126. /**
  127. * CNS-EUC-V Vertical version of CNS-EUC-H
  128. */
  129. public static final String ENC_CNS_EUC_V = "CNS-EUC-V";
  130. /**
  131. * UniCNS-UCS2-H Unicode (UCS-2) encoding for the
  132. * Adobe-CNS1 character collection
  133. */
  134. public static final String ENC_UNICNS_UCS2_H = "UniCNS-UCS2-H";
  135. /**
  136. * UniCNS-UCS2-V Vertical version of UniCNS-UCS2-H
  137. */
  138. public static final String ENC_UNICNS_UCS2_V = "UniCNS-UCS2-V";
  139. /*
  140. * Japanese
  141. */
  142. /**
  143. * 83pv-RKSJ-H Mac OS, JIS X 0208 character set with KanjiTalk6
  144. * extensions, Shift-JIS encoding, Script Manager code 1
  145. */
  146. public static final String ENC_83PV_RKSJ_H = "83pv-RKSJ-H"; // no V version
  147. /**
  148. * 90ms-RKSJ-H Microsoft Code Page 932 (lfCharSet 0x80), JIS X 0208
  149. * character set with NEC and IBM extensions
  150. */
  151. public static final String ENC_90MS_RKSJ_H = "90ms-RKSJ-H";
  152. /**
  153. * 90ms-RKSJ-V Vertical version of 90ms-RKSJ-H
  154. */
  155. public static final String ENC_90MS_RKSJ_V = "90ms-RKSJ-V";
  156. /**
  157. * 90msp-RKSJ-H Same as 90ms-RKSJ-H, but replaces half-width Latin
  158. * characters with proportional forms
  159. */
  160. public static final String ENC_90MSP_RKSJ_H = "90msp-RKSJ-H";
  161. /**
  162. * 90msp-RKSJ-V Vertical version of 90msp-RKSJ-H
  163. */
  164. public static final String ENC_90MSP_RKSJ_V = "90msp-RKSJ-V";
  165. /**
  166. * 90pv-RKSJ-H Mac OS, JIS X 0208 character set with KanjiTalk7
  167. * extensions, Shift-JIS encoding, Script Manager code 1
  168. */
  169. public static final String ENC_90PV_RKSJ_H = "90pv-RKSJ-H"; // no V version
  170. /**
  171. * Add-RKSJ-H JIS X 0208 character set with Fujitsu FMR
  172. * extensions, Shift-JIS encoding
  173. */
  174. public static final String ENC_ADD_RKSJ_H = "Add-RKSJ-H";
  175. /**
  176. * Add-RKSJ-V Vertical version of Add-RKSJ-H
  177. */
  178. public static final String ENC_ADD_RKSJ_V = "Add-RKSJ-V";
  179. /**
  180. * EUC-H JIS X 0208 character set, EUC-JP encoding
  181. */
  182. public static final String ENC_EUC_H = "EUC-H";
  183. /**
  184. * EUC-V Vertical version of EUC-H
  185. */
  186. public static final String ENC_EUC_V = "EUC-V";
  187. /**
  188. * Ext-RKSJ-H JIS C 6226 (JIS78) character set with
  189. * NEC extensions, Shift-JIS encoding
  190. */
  191. public static final String ENC_EXT_RKSJ_H = "Ext-RKSJ-H";
  192. /**
  193. * Ext-RKSJ-V Vertical version of Ext-RKSJ-H
  194. */
  195. public static final String ENC_EXT_RKSJ_V = "Ext-RKSJ-V";
  196. /**
  197. * H JIS X 0208 character set, ISO-2022-JP encoding
  198. */
  199. public static final String ENC_H = "H";
  200. /**
  201. * V Vertical version of H
  202. */
  203. public static final String ENC_V = "V";
  204. /**
  205. * UniJIS-UCS2-H Unicode (UCS-2) encoding for the
  206. * Adobe-Japan1 character collection
  207. */
  208. public static final String ENC_UNIJIS_UCS2_H = "UniJIS-UCS2-H";
  209. /**
  210. * UniJIS-UCS2-V Vertical version of UniJIS-UCS2-H
  211. */
  212. public static final String ENC_UNIJIS_UCS2_V = "UniJIS-UCS2-V";
  213. /**
  214. * UniJIS-UCS2-HW-H Same as UniJIS-UCS2-H, but replaces proportional
  215. * Latin characters with half-width forms
  216. */
  217. public static final String ENC_UNIJIS_UCS2_HW_H = "UniJIS-UCS2-HW-H";
  218. /**
  219. * UniJIS-UCS2-HW-V Vertical version of UniJIS-UCS2-HW-H
  220. */
  221. public static final String ENC_UNIJIS_UCS2_HW_V = "UniJIS-UCS2-HW-V";
  222. /*
  223. * Korean
  224. */
  225. /**
  226. * KSC-EUC-H KS X 1001:1992 character set, EUC-KR encoding
  227. */
  228. public static final String ENC_KSC_EUC_H = "KSC-EUC-H";
  229. /**
  230. * KSC-EUC-V Vertical version of KSC-EUC-H
  231. */
  232. public static final String ENC_KSC_EUC_V = "KSC-EUC-V";
  233. /**
  234. * KSCms-UHC-H Microsoft Code Page 949 (lfCharSet 0x81), KS X 1001:1992
  235. * character set plus 8822 additional hangul,
  236. * Unified Hangul Code (UHC) encoding
  237. */
  238. public static final String ENC_KSCMS_UHC_H = "KSCms-UHC-H";
  239. /**
  240. * KSCms-UHC-V Vertical version of KSCms-UHC-H
  241. */
  242. public static final String ENC_KSCMS_UHC_V = "KSCms-UHC-V";
  243. /**
  244. * KSCms-UHC-HW-H Same as KSCms-UHC-H, but replaces proportional
  245. * Latin characters with half-width forms
  246. */
  247. public static final String ENC_KSCMS_UHC_HW_H = "KSCms-UHC-HW-H";
  248. /**
  249. * KSCms-UHC-HW-V Vertical version of KSCms-UHC-HW-H
  250. */
  251. public static final String ENC_KSCMS_UHC_HW_V = "KSCms-UHC-HW-V";
  252. /**
  253. * KSCpc-EUC-H Mac OS, KS X 1001:1992 character set with
  254. * Mac OS KH extensions, Script Manager Code 3
  255. */
  256. public static final String ENC_KSCPC_EUC_H = "KSCpc-EUC-H"; // no V version
  257. /**
  258. * UniKS-UCS2-H Unicode (UCS-2) encoding for the
  259. * Adobe-Korea1 character collection
  260. */
  261. public static final String ENC_UNIKSC_UCS2_H = "UniKSC-UCS2-H";
  262. /**
  263. * UniKS-UCS2-V Vertical version of UniKS-UCS2-H
  264. */
  265. public static final String ENC_UNIKSC_UCS2_V = "UniKSC-UCS2-V";
  266. /*
  267. * Generic
  268. */
  269. /**
  270. * Identity-H The horizontal identity mapping for 2-byte CIDs;
  271. * may be used with CIDFonts using any Registry, Ordering, and
  272. * Supplement values. It maps 2-byte character codes ranging from
  273. * 0 to 65,535 to the same 2-byte CID value, interpreted
  274. * high-order byte first.
  275. */
  276. public static final String ENC_IDENTITY_H = "Identity-H";
  277. /**
  278. * Identity-V Vertical version of Identity-H. The mapping
  279. * is the same as for Identity-H.
  280. */
  281. public static final String ENC_IDENTTITY_V = "Identity-V";
  282. /**
  283. * /CMapName attribute, one of the predefined constants
  284. */
  285. protected String name;
  286. /**
  287. * /CIDSystemInfo attribute
  288. */
  289. protected PDFCIDSystemInfo sysInfo;
  290. /**
  291. * horizontal writing direction
  292. */
  293. public static final byte WMODE_HORIZONTAL = 0;
  294. /**
  295. * vertical writing direction
  296. */
  297. public static final byte WMODE_VERTICAL = 1;
  298. /**
  299. * font's writing direction
  300. */
  301. protected byte wMode = WMODE_HORIZONTAL;
  302. /**
  303. * base CMap (String or PDFStream)
  304. */
  305. protected Object base;
  306. /**
  307. * create the /CMap object
  308. *
  309. * @param name one the registered names (see Table 7.20 on p 215)
  310. * @param sysInfo the attributes of the character collection of the CIDFont
  311. */
  312. public PDFCMap(String name, PDFCIDSystemInfo sysInfo) {
  313. super();
  314. this.name = name;
  315. this.sysInfo = sysInfo;
  316. this.base = null;
  317. }
  318. /**
  319. * set the writing direction
  320. *
  321. * @param mode is either <code>WMODE_HORIZONTAL</code>
  322. * or <code>WMODE_VERTICAL</code>
  323. */
  324. public void setWMode(byte mode) {
  325. this.wMode = mode;
  326. }
  327. /**
  328. * set the base CMap
  329. *
  330. * @param base the name of the base CMap
  331. */
  332. public void setUseCMap(String base) {
  333. this.base = base;
  334. }
  335. /**
  336. * set the base CMap
  337. *
  338. * @param base the stream to be used as base CMap
  339. */
  340. public void setUseCMap(PDFStream base) {
  341. this.base = base;
  342. }
  343. /**
  344. * Creates the CMapBuilder that will build the CMap's content.
  345. * @param writer a Writer to write the CMap's contents to
  346. * @return the newly created CMapBuilder
  347. */
  348. protected CMapBuilder createCMapBuilder(Writer writer) {
  349. return new CMapBuilder(writer, this.name);
  350. }
  351. /** {@inheritDoc} */
  352. protected int output(OutputStream stream) throws IOException {
  353. StringWriter writer = new StringWriter();
  354. CMapBuilder builder = createCMapBuilder(writer);
  355. builder.writeCMap();
  356. add(writer.getBuffer().toString()); //TODO Could be optimized by not buffering
  357. return super.output(stream);
  358. }
  359. }