You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

PDFLinearization.java 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.pdf;
  19. import java.io.ByteArrayOutputStream;
  20. import java.io.IOException;
  21. import java.io.OutputStream;
  22. import java.util.ArrayList;
  23. import java.util.Collections;
  24. import java.util.Comparator;
  25. import java.util.HashMap;
  26. import java.util.LinkedHashSet;
  27. import java.util.List;
  28. import java.util.Map;
  29. import java.util.Set;
  30. import org.apache.commons.io.output.CountingOutputStream;
  31. public class PDFLinearization {
  32. private PDFDocument doc;
  33. private Map<PDFPage, Set<PDFObject>> pageObjsMap = new HashMap<PDFPage, Set<PDFObject>>();
  34. private PDFDictionary linearDict;
  35. private HintTable hintTable;
  36. public PDFLinearization(PDFDocument doc) {
  37. this.doc = doc;
  38. }
  39. static class HintTable extends PDFStream {
  40. private List<PDFPage> pages;
  41. int pageStartPos;
  42. List<Integer> sharedLengths = new ArrayList<Integer>();
  43. List<Integer> pageLengths = new ArrayList<Integer>();
  44. List<Integer> contentStreamLengths = new ArrayList<Integer>();
  45. List<Integer> objCount = new ArrayList<Integer>();
  46. Map<String, int[]> hintGroups = new HashMap<String, int[]>();
  47. public HintTable(PDFDocument doc) {
  48. super(false);
  49. doc.assignObjectNumber(this);
  50. doc.addObject(this);
  51. pages = doc.pageObjs;
  52. for (int i = 0; i < pages.size(); i++) {
  53. pageLengths.add(0);
  54. contentStreamLengths.add(0);
  55. objCount.add(0);
  56. }
  57. hintGroups.put("/C", new int[4]);
  58. hintGroups.put("/L", new int[4]);
  59. hintGroups.put("/I", new int[4]);
  60. hintGroups.put("/E", new int[4]);
  61. hintGroups.put("/O", new int[4]);
  62. hintGroups.put("/V", new int[4]);
  63. }
  64. @Override
  65. public PDFFilterList getFilterList() {
  66. return new PDFFilterList(getDocument().isEncryptionActive());
  67. }
  68. @Override
  69. protected void outputRawStreamData(OutputStream os) throws IOException {
  70. CountingOutputStream bos = new CountingOutputStream(os);
  71. //start header
  72. writeULong(1, bos); //1
  73. writeULong(pageStartPos, bos); //2
  74. writeCard16(32, bos); //3
  75. writeULong(0, bos); //4
  76. writeCard16(32, bos); //5
  77. writeULong(0, bos); //6
  78. writeCard16(0, bos); //7
  79. writeULong(0, bos); //8
  80. writeCard16(32, bos); //9
  81. writeCard16(0, bos); //10
  82. writeCard16(0, bos); //11
  83. writeCard16(0, bos); //12
  84. writeCard16(4, bos); //13
  85. //end header
  86. for (PDFPage page : pages) {
  87. writeULong(objCount.get(page.pageIndex) - 1, bos);
  88. }
  89. for (PDFPage page : pages) {
  90. writeULong(pageLengths.get(page.pageIndex), bos);
  91. }
  92. for (PDFPage page : pages) {
  93. writeULong(contentStreamLengths.get(page.pageIndex), bos);
  94. }
  95. writeSharedTable(bos);
  96. for (Map.Entry<String, int[]> group : hintGroups.entrySet()) {
  97. put(group.getKey(), bos.getCount());
  98. for (int i : group.getValue()) {
  99. writeULong(i, bos);
  100. }
  101. if (group.getKey().equals("/C")) {
  102. writeULong(0, bos);
  103. writeCard16(0, bos);
  104. }
  105. }
  106. }
  107. private void writeSharedTable(CountingOutputStream bos) throws IOException {
  108. put("/S", bos.getCount());
  109. //Shared object hint table, header section
  110. writeULong(0, bos); //1
  111. writeULong(0, bos); //2
  112. writeULong(sharedLengths.size(), bos); //3
  113. writeULong(sharedLengths.size(), bos); //4
  114. writeCard16(0, bos); //5
  115. writeULong(0, bos); //6
  116. writeCard16(32, bos); //7
  117. for (int i : sharedLengths) {
  118. writeULong(i, bos);
  119. }
  120. writeULong(0, bos);
  121. }
  122. private void writeCard16(int s, OutputStream bos) throws IOException {
  123. byte b1 = (byte)((s >> 8) & 0xff);
  124. byte b2 = (byte)(s & 0xff);
  125. bos.write(b1);
  126. bos.write(b2);
  127. }
  128. private void writeULong(int s, OutputStream bos) throws IOException {
  129. byte b1 = (byte)((s >> 24) & 0xff);
  130. byte b2 = (byte)((s >> 16) & 0xff);
  131. byte b3 = (byte)((s >> 8) & 0xff);
  132. byte b4 = (byte)(s & 0xff);
  133. bos.write(b1);
  134. bos.write(b2);
  135. bos.write(b3);
  136. bos.write(b4);
  137. }
  138. }
  139. static class LinearPDFDictionary extends PDFDictionary {
  140. private int lastsize = -1;
  141. public LinearPDFDictionary(PDFDocument doc) {
  142. put("Linearized", 1);
  143. put("/L", 0);
  144. PDFArray larray = new PDFArray();
  145. larray.add(0);
  146. larray.add(0);
  147. put("/H", larray);
  148. doc.assignObjectNumber(this);
  149. getObjectNumber().getNumber();
  150. put("/O", getObjectNumber().getNumber() + 3);
  151. put("/E", 0);
  152. put("/N", doc.pageObjs.size());
  153. put("/T", 0);
  154. }
  155. public int output(OutputStream stream) throws IOException {
  156. int size = super.output(stream);
  157. int padding = lastsize - size + 32;
  158. if (lastsize == -1) {
  159. padding = 32;
  160. lastsize = size;
  161. }
  162. writePadding(padding, stream);
  163. return size + padding;
  164. }
  165. }
  166. private Set<PDFObject> assignNumbers() throws IOException {
  167. Set<PDFObject> page1Children = getPage1Children();
  168. if (!doc.pageObjs.isEmpty()) {
  169. for (int i = 1; i < doc.pageObjs.size(); i++) {
  170. PDFPage page = doc.pageObjs.get(i);
  171. Set<PDFObject> children = pageObjsMap.get(page);
  172. for (PDFObject c : children) {
  173. if (!page1Children.contains(c) && c.hasObjectNumber()) {
  174. c.getObjectNumber().getNumber();
  175. }
  176. }
  177. }
  178. for (PDFObject o : doc.objects) {
  179. if (o instanceof PDFDests || o instanceof PDFOutline) {
  180. for (PDFObject c : getChildren(o)) {
  181. c.getObjectNumber().getNumber();
  182. }
  183. }
  184. if (o instanceof PDFInfo || o instanceof PDFPageLabels) {
  185. o.getObjectNumber().getNumber();
  186. }
  187. }
  188. for (PDFObject o : doc.objects) {
  189. if (!page1Children.contains(o)) {
  190. o.getObjectNumber().getNumber();
  191. }
  192. }
  193. }
  194. linearDict = new LinearPDFDictionary(doc);
  195. for (PDFObject o : page1Children) {
  196. o.getObjectNumber().getNumber();
  197. }
  198. sort(doc.objects);
  199. return page1Children;
  200. }
  201. private void sort(List<PDFObject> objects) {
  202. Collections.sort(objects, new Comparator<PDFObject>() {
  203. public int compare(PDFObject o1, PDFObject o2) {
  204. return Integer.compare(o1.getObjectNumber().getNumber(), o2.getObjectNumber().getNumber());
  205. }
  206. });
  207. }
  208. private Set<PDFObject> getChildren(PDFObject o) {
  209. Set<PDFObject> children = new LinkedHashSet<PDFObject>();
  210. children.add(o);
  211. o.getChildren(children);
  212. return children;
  213. }
  214. public void outputPages(OutputStream stream) throws IOException {
  215. Collections.sort(doc.pageObjs, new Comparator<PDFPage>() {
  216. public int compare(PDFPage o1, PDFPage o2) {
  217. return Integer.compare(o1.pageIndex, o2.pageIndex);
  218. }
  219. });
  220. doc.objects.addAll(doc.trailerObjects);
  221. doc.trailerObjects = null;
  222. if (doc.getStructureTreeElements() != null) {
  223. doc.objects.addAll(doc.getStructureTreeElements());
  224. doc.structureTreeElements = null;
  225. }
  226. for (int i = 0; i < doc.objects.size() * 2; i++) {
  227. doc.indirectObjectOffsets.add(0L);
  228. }
  229. Set<PDFObject> page1Children = assignNumbers();
  230. doc.streamIndirectObject(linearDict, new ByteArrayOutputStream());
  231. for (PDFObject o : page1Children) {
  232. doc.objects.remove(o);
  233. }
  234. int sizeOfRest = doc.objects.size();
  235. ByteArrayOutputStream fakeHeaderTrailerStream = new ByteArrayOutputStream();
  236. long topTrailer = doc.position;
  237. doc.writeTrailer(fakeHeaderTrailerStream, sizeOfRest, page1Children.size() + 1,
  238. page1Children.size() + sizeOfRest + 1, Long.MAX_VALUE, 0);
  239. doc.position += fakeHeaderTrailerStream.size();
  240. ByteArrayOutputStream pageStream = new ByteArrayOutputStream();
  241. writeObjects(page1Children, pageStream, sizeOfRest + 1);
  242. long trailerOffset = doc.position;
  243. ByteArrayOutputStream footerTrailerStream = new ByteArrayOutputStream();
  244. doc.writeTrailer(footerTrailerStream, 0, sizeOfRest, sizeOfRest, 0, topTrailer);
  245. doc.position += footerTrailerStream.size();
  246. linearDict.put("/L", doc.position);
  247. PDFDocument.outputIndirectObject(linearDict, stream);
  248. CountingOutputStream realTrailer = new CountingOutputStream(stream);
  249. doc.writeTrailer(realTrailer, sizeOfRest, page1Children.size() + 1,
  250. page1Children.size() + sizeOfRest + 1, trailerOffset, 0);
  251. writePadding(fakeHeaderTrailerStream.size() - realTrailer.getCount(), stream);
  252. for (PDFObject o : page1Children) {
  253. PDFDocument.outputIndirectObject(o, stream);
  254. if (o instanceof HintTable) {
  255. break;
  256. }
  257. }
  258. stream.write(pageStream.toByteArray());
  259. stream.write(footerTrailerStream.toByteArray());
  260. }
  261. private Set<PDFObject> getPage1Children() throws IOException {
  262. Set<PDFObject> page1Children = new LinkedHashSet<PDFObject>();
  263. if (!doc.pageObjs.isEmpty()) {
  264. PDFPage page1 = doc.pageObjs.get(0);
  265. page1Children.add(doc.getRoot());
  266. hintTable = new HintTable(doc);
  267. page1Children.add(hintTable);
  268. page1Children.add(page1);
  269. page1.getChildren(page1Children);
  270. doc.objects.remove(doc.getPages());
  271. doc.objects.add(0, doc.getPages());
  272. pageObjsMap.put(page1, page1Children);
  273. for (int i = 1; i < doc.pageObjs.size(); i++) {
  274. PDFPage page = doc.pageObjs.get(i);
  275. pageObjsMap.put(page, getChildren(page));
  276. }
  277. }
  278. return page1Children;
  279. }
  280. private static void writePadding(int padding, OutputStream stream) throws IOException {
  281. for (int i = 0; i < padding; i++) {
  282. stream.write(" ".getBytes("UTF-8"));
  283. }
  284. }
  285. private void writeObjects(Set<PDFObject> children1, OutputStream pageStream, int sizeOfRest) throws IOException {
  286. writePage1(children1, pageStream);
  287. linearDict.put("/E", doc.position);
  288. for (PDFPage page : doc.pageObjs) {
  289. if (page.pageIndex != 0) {
  290. writePage(page, pageStream);
  291. }
  292. }
  293. while (!doc.objects.isEmpty()) {
  294. PDFObject o = doc.objects.remove(0);
  295. if (o instanceof PDFOutline) {
  296. writeObjectGroup("/O", getChildren(o), pageStream);
  297. } else if (o instanceof PDFDests) {
  298. writeObjectGroup("/E", getChildren(o), pageStream);
  299. } else if (o instanceof PDFInfo) {
  300. writeObjectGroup("/I", getChildren(o), pageStream);
  301. } else if (o instanceof PDFPageLabels) {
  302. writeObjectGroup("/L", getChildren(o), pageStream);
  303. } else if (o instanceof PDFStructTreeRoot) {
  304. writeObjectGroup("/C", getChildren(o), pageStream);
  305. } else {
  306. doc.streamIndirectObject(o, pageStream);
  307. }
  308. }
  309. linearDict.put("/T", doc.position + 8 + String.valueOf(sizeOfRest).length());
  310. }
  311. private void writeObjectGroup(String name, Set<PDFObject> objects, OutputStream pageStream)
  312. throws IOException {
  313. List<PDFObject> children = new ArrayList<PDFObject>(objects);
  314. sort(children);
  315. int[] values = hintTable.hintGroups.get(name);
  316. values[0] = children.iterator().next().getObjectNumber().getNumber();
  317. values[1] = (int) doc.position;
  318. values[2] = children.size();
  319. for (PDFObject o : children) {
  320. values[3] += doc.streamIndirectObject(o, pageStream);
  321. doc.objects.remove(o);
  322. }
  323. }
  324. private void writePage1(Set<PDFObject> children1, OutputStream pageStream) throws IOException {
  325. hintTable.pageStartPos = (int) doc.position;
  326. OutputStream stream = new ByteArrayOutputStream();
  327. Set<PDFObject> sharedChildren = getSharedObjects();
  328. int page1Len = 0;
  329. int objCount = 0;
  330. int sharedCount = 0;
  331. for (PDFObject o : children1) {
  332. if (o instanceof HintTable) {
  333. PDFArray a = (PDFArray) linearDict.get("/H");
  334. a.set(0, doc.position);
  335. doc.streamIndirectObject(o, stream);
  336. a.set(1, doc.position - (Double)a.get(0));
  337. stream = pageStream;
  338. } else {
  339. int len = doc.streamIndirectObject(o, stream);
  340. if (o instanceof PDFStream && hintTable.contentStreamLengths.get(0) == 0) {
  341. hintTable.contentStreamLengths.set(0, len);
  342. }
  343. if (!(o instanceof PDFRoot)) {
  344. page1Len += len;
  345. objCount++;
  346. }
  347. if (sharedChildren.contains(o)) {
  348. hintTable.sharedLengths.set(sharedCount, len);
  349. sharedCount++;
  350. }
  351. }
  352. }
  353. hintTable.pageLengths.set(0, page1Len);
  354. hintTable.objCount.set(0, objCount);
  355. }
  356. private Set<PDFObject> getSharedObjects() {
  357. Set<PDFObject> pageSharedChildren = getChildren(doc.pageObjs.get(0));
  358. for (int i = 0; i < pageSharedChildren.size(); i++) {
  359. hintTable.sharedLengths.add(0);
  360. }
  361. return pageSharedChildren;
  362. }
  363. private void writePage(PDFPage page, OutputStream pageStream) throws IOException {
  364. Set<PDFObject> children = pageObjsMap.get(page);
  365. int pageLen = 0;
  366. int objCount = 0;
  367. for (PDFObject c : children) {
  368. if (doc.objects.contains(c)) {
  369. int len = doc.streamIndirectObject(c, pageStream);
  370. if (c instanceof PDFStream) {
  371. hintTable.contentStreamLengths.set(page.pageIndex, len);
  372. }
  373. pageLen += len;
  374. doc.objects.remove(c);
  375. objCount++;
  376. }
  377. }
  378. hintTable.pageLengths.set(page.pageIndex, pageLen);
  379. hintTable.objCount.set(page.pageIndex, objCount);
  380. }
  381. }