123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422 |
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /* $Id$ */
- package org.apache.fop.pdf;
-
- import java.io.ByteArrayOutputStream;
- import java.io.IOException;
- import java.io.OutputStream;
- import java.util.ArrayList;
- import java.util.Collections;
- import java.util.Comparator;
- import java.util.HashMap;
- import java.util.LinkedHashSet;
- import java.util.List;
- import java.util.Map;
- import java.util.Set;
-
- import org.apache.commons.io.output.CountingOutputStream;
-
- public class PDFLinearization {
- private PDFDocument doc;
- private Map<PDFPage, Set<PDFObject>> pageObjsMap = new HashMap<PDFPage, Set<PDFObject>>();
- private PDFDictionary linearDict;
- private HintTable hintTable;
-
- public PDFLinearization(PDFDocument doc) {
- this.doc = doc;
- }
-
- static class HintTable extends PDFStream {
- private List<PDFPage> pages;
- int pageStartPos;
- List<Integer> sharedLengths = new ArrayList<Integer>();
- List<Integer> pageLengths = new ArrayList<Integer>();
- List<Integer> contentStreamLengths = new ArrayList<Integer>();
- List<Integer> objCount = new ArrayList<Integer>();
- Map<String, int[]> hintGroups = new HashMap<String, int[]>();
-
- public HintTable(PDFDocument doc) {
- super(false);
- doc.assignObjectNumber(this);
- doc.addObject(this);
- pages = doc.pageObjs;
- for (int i = 0; i < pages.size(); i++) {
- pageLengths.add(0);
- contentStreamLengths.add(0);
- objCount.add(0);
- }
- hintGroups.put("/C", new int[4]);
- hintGroups.put("/L", new int[4]);
- hintGroups.put("/I", new int[4]);
- hintGroups.put("/E", new int[4]);
- hintGroups.put("/O", new int[4]);
- hintGroups.put("/V", new int[4]);
- }
-
- @Override
- public PDFFilterList getFilterList() {
- return new PDFFilterList(getDocument().isEncryptionActive());
- }
-
- @Override
- protected void outputRawStreamData(OutputStream os) throws IOException {
- CountingOutputStream bos = new CountingOutputStream(os);
-
- //start header
- writeULong(1, bos); //1
- writeULong(pageStartPos, bos); //2
- writeCard16(32, bos); //3
- writeULong(0, bos); //4
- writeCard16(32, bos); //5
- writeULong(0, bos); //6
- writeCard16(0, bos); //7
- writeULong(0, bos); //8
- writeCard16(32, bos); //9
- writeCard16(0, bos); //10
- writeCard16(0, bos); //11
- writeCard16(0, bos); //12
- writeCard16(4, bos); //13
- //end header
-
- for (PDFPage page : pages) {
- writeULong(objCount.get(page.pageIndex) - 1, bos);
- }
- for (PDFPage page : pages) {
- writeULong(pageLengths.get(page.pageIndex), bos);
- }
- for (PDFPage page : pages) {
- writeULong(contentStreamLengths.get(page.pageIndex), bos);
- }
-
- writeSharedTable(bos);
-
- for (Map.Entry<String, int[]> group : hintGroups.entrySet()) {
- put(group.getKey(), bos.getCount());
- for (int i : group.getValue()) {
- writeULong(i, bos);
- }
- if (group.getKey().equals("/C")) {
- writeULong(0, bos);
- writeCard16(0, bos);
- }
- }
- }
-
- private void writeSharedTable(CountingOutputStream bos) throws IOException {
- put("/S", bos.getCount());
-
- //Shared object hint table, header section
- writeULong(0, bos); //1
- writeULong(0, bos); //2
- writeULong(sharedLengths.size(), bos); //3
- writeULong(sharedLengths.size(), bos); //4
- writeCard16(0, bos); //5
- writeULong(0, bos); //6
- writeCard16(32, bos); //7
-
- for (int i : sharedLengths) {
- writeULong(i, bos);
- }
- writeULong(0, bos);
- }
-
- private void writeCard16(int s, OutputStream bos) throws IOException {
- byte b1 = (byte)((s >> 8) & 0xff);
- byte b2 = (byte)(s & 0xff);
- bos.write(b1);
- bos.write(b2);
- }
-
- private void writeULong(int s, OutputStream bos) throws IOException {
- byte b1 = (byte)((s >> 24) & 0xff);
- byte b2 = (byte)((s >> 16) & 0xff);
- byte b3 = (byte)((s >> 8) & 0xff);
- byte b4 = (byte)(s & 0xff);
- bos.write(b1);
- bos.write(b2);
- bos.write(b3);
- bos.write(b4);
- }
- }
-
- static class LinearPDFDictionary extends PDFDictionary {
- private int lastsize = -1;
-
- public LinearPDFDictionary(PDFDocument doc) {
- put("Linearized", 1);
- put("/L", 0);
- PDFArray larray = new PDFArray();
- larray.add(0);
- larray.add(0);
- put("/H", larray);
- doc.assignObjectNumber(this);
- getObjectNumber().getNumber();
- put("/O", getObjectNumber().getNumber() + 3);
- put("/E", 0);
- put("/N", doc.pageObjs.size());
- put("/T", 0);
- }
-
- public int output(OutputStream stream) throws IOException {
- int size = super.output(stream);
- int padding = lastsize - size + 32;
- if (lastsize == -1) {
- padding = 32;
- lastsize = size;
- }
- writePadding(padding, stream);
- return size + padding;
- }
- }
-
-
- private Set<PDFObject> assignNumbers() throws IOException {
- Set<PDFObject> page1Children = getPage1Children();
- if (!doc.pageObjs.isEmpty()) {
- for (int i = 1; i < doc.pageObjs.size(); i++) {
- PDFPage page = doc.pageObjs.get(i);
- Set<PDFObject> children = pageObjsMap.get(page);
- for (PDFObject c : children) {
- if (!page1Children.contains(c) && c.hasObjectNumber()) {
- c.getObjectNumber().getNumber();
- }
- }
- }
- for (PDFObject o : doc.objects) {
- if (o instanceof PDFDests || o instanceof PDFOutline) {
- for (PDFObject c : getChildren(o)) {
- c.getObjectNumber().getNumber();
- }
- }
- if (o instanceof PDFInfo || o instanceof PDFPageLabels) {
- o.getObjectNumber().getNumber();
- }
- }
- for (PDFObject o : doc.objects) {
- if (!page1Children.contains(o)) {
- o.getObjectNumber().getNumber();
- }
- }
- }
- linearDict = new LinearPDFDictionary(doc);
- for (PDFObject o : page1Children) {
- o.getObjectNumber().getNumber();
- }
- sort(doc.objects);
- return page1Children;
- }
-
- private void sort(List<PDFObject> objects) {
- Collections.sort(objects, new Comparator<PDFObject>() {
- public int compare(PDFObject o1, PDFObject o2) {
- return Integer.compare(o1.getObjectNumber().getNumber(), o2.getObjectNumber().getNumber());
- }
- });
- }
-
- private Set<PDFObject> getChildren(PDFObject o) {
- Set<PDFObject> children = new LinkedHashSet<PDFObject>();
- children.add(o);
- o.getChildren(children);
- return children;
- }
-
- public void outputPages(OutputStream stream) throws IOException {
- Collections.sort(doc.pageObjs, new Comparator<PDFPage>() {
- public int compare(PDFPage o1, PDFPage o2) {
- return Integer.compare(o1.pageIndex, o2.pageIndex);
- }
- });
- doc.objects.addAll(doc.trailerObjects);
- doc.trailerObjects = null;
- if (doc.getStructureTreeElements() != null) {
- doc.objects.addAll(doc.getStructureTreeElements());
- doc.structureTreeElements = null;
- }
- for (int i = 0; i < doc.objects.size() * 2; i++) {
- doc.indirectObjectOffsets.add(0L);
- }
- Set<PDFObject> page1Children = assignNumbers();
- doc.streamIndirectObject(linearDict, new ByteArrayOutputStream());
- for (PDFObject o : page1Children) {
- doc.objects.remove(o);
- }
- int sizeOfRest = doc.objects.size();
-
- ByteArrayOutputStream fakeHeaderTrailerStream = new ByteArrayOutputStream();
- long topTrailer = doc.position;
- doc.writeTrailer(fakeHeaderTrailerStream, sizeOfRest, page1Children.size() + 1,
- page1Children.size() + sizeOfRest + 1, Long.MAX_VALUE, 0);
- doc.position += fakeHeaderTrailerStream.size();
-
- ByteArrayOutputStream pageStream = new ByteArrayOutputStream();
- writeObjects(page1Children, pageStream, sizeOfRest + 1);
- long trailerOffset = doc.position;
- ByteArrayOutputStream footerTrailerStream = new ByteArrayOutputStream();
- doc.writeTrailer(footerTrailerStream, 0, sizeOfRest, sizeOfRest, 0, topTrailer);
- doc.position += footerTrailerStream.size();
-
- linearDict.put("/L", doc.position);
-
- PDFDocument.outputIndirectObject(linearDict, stream);
- CountingOutputStream realTrailer = new CountingOutputStream(stream);
- doc.writeTrailer(realTrailer, sizeOfRest, page1Children.size() + 1,
- page1Children.size() + sizeOfRest + 1, trailerOffset, 0);
- writePadding(fakeHeaderTrailerStream.size() - realTrailer.getCount(), stream);
- for (PDFObject o : page1Children) {
- PDFDocument.outputIndirectObject(o, stream);
- if (o instanceof HintTable) {
- break;
- }
- }
- stream.write(pageStream.toByteArray());
- stream.write(footerTrailerStream.toByteArray());
- }
-
- private Set<PDFObject> getPage1Children() throws IOException {
- Set<PDFObject> page1Children = new LinkedHashSet<PDFObject>();
- if (!doc.pageObjs.isEmpty()) {
- PDFPage page1 = doc.pageObjs.get(0);
- page1Children.add(doc.getRoot());
- hintTable = new HintTable(doc);
- page1Children.add(hintTable);
- page1Children.add(page1);
- page1.getChildren(page1Children);
- doc.objects.remove(doc.getPages());
- doc.objects.add(0, doc.getPages());
- pageObjsMap.put(page1, page1Children);
-
- for (int i = 1; i < doc.pageObjs.size(); i++) {
- PDFPage page = doc.pageObjs.get(i);
- pageObjsMap.put(page, getChildren(page));
- }
- }
- return page1Children;
- }
-
- private static void writePadding(int padding, OutputStream stream) throws IOException {
- for (int i = 0; i < padding; i++) {
- stream.write(" ".getBytes("UTF-8"));
- }
- }
-
- private void writeObjects(Set<PDFObject> children1, OutputStream pageStream, int sizeOfRest) throws IOException {
- writePage1(children1, pageStream);
- linearDict.put("/E", doc.position);
- for (PDFPage page : doc.pageObjs) {
- if (page.pageIndex != 0) {
- writePage(page, pageStream);
- }
- }
- while (!doc.objects.isEmpty()) {
- PDFObject o = doc.objects.remove(0);
- if (o instanceof PDFOutline) {
- writeObjectGroup("/O", getChildren(o), pageStream);
- } else if (o instanceof PDFDests) {
- writeObjectGroup("/E", getChildren(o), pageStream);
- } else if (o instanceof PDFInfo) {
- writeObjectGroup("/I", getChildren(o), pageStream);
- } else if (o instanceof PDFPageLabels) {
- writeObjectGroup("/L", getChildren(o), pageStream);
- } else if (o instanceof PDFStructTreeRoot) {
- writeObjectGroup("/C", getChildren(o), pageStream);
- } else {
- doc.streamIndirectObject(o, pageStream);
- }
- }
- linearDict.put("/T", doc.position + 8 + String.valueOf(sizeOfRest).length());
- }
-
- private void writeObjectGroup(String name, Set<PDFObject> objects, OutputStream pageStream)
- throws IOException {
- List<PDFObject> children = new ArrayList<PDFObject>(objects);
- sort(children);
-
- int[] values = hintTable.hintGroups.get(name);
- values[0] = children.iterator().next().getObjectNumber().getNumber();
- values[1] = (int) doc.position;
- values[2] = children.size();
- for (PDFObject o : children) {
- values[3] += doc.streamIndirectObject(o, pageStream);
- doc.objects.remove(o);
- }
- }
-
- private void writePage1(Set<PDFObject> children1, OutputStream pageStream) throws IOException {
- hintTable.pageStartPos = (int) doc.position;
- OutputStream stream = new ByteArrayOutputStream();
-
- Set<PDFObject> sharedChildren = getSharedObjects();
-
- int page1Len = 0;
- int objCount = 0;
- int sharedCount = 0;
- for (PDFObject o : children1) {
- if (o instanceof HintTable) {
- PDFArray a = (PDFArray) linearDict.get("/H");
- a.set(0, doc.position);
- doc.streamIndirectObject(o, stream);
- a.set(1, doc.position - (Double)a.get(0));
- stream = pageStream;
- } else {
- int len = doc.streamIndirectObject(o, stream);
- if (o instanceof PDFStream && hintTable.contentStreamLengths.get(0) == 0) {
- hintTable.contentStreamLengths.set(0, len);
- }
- if (!(o instanceof PDFRoot)) {
- page1Len += len;
- objCount++;
- }
- if (sharedChildren.contains(o)) {
- hintTable.sharedLengths.set(sharedCount, len);
- sharedCount++;
- }
- }
- }
- hintTable.pageLengths.set(0, page1Len);
- hintTable.objCount.set(0, objCount);
- }
-
- private Set<PDFObject> getSharedObjects() {
- Set<PDFObject> pageSharedChildren = getChildren(doc.pageObjs.get(0));
- for (int i = 0; i < pageSharedChildren.size(); i++) {
- hintTable.sharedLengths.add(0);
- }
- return pageSharedChildren;
- }
-
- private void writePage(PDFPage page, OutputStream pageStream) throws IOException {
- Set<PDFObject> children = pageObjsMap.get(page);
- int pageLen = 0;
- int objCount = 0;
- for (PDFObject c : children) {
- if (doc.objects.contains(c)) {
- int len = doc.streamIndirectObject(c, pageStream);
- if (c instanceof PDFStream) {
- hintTable.contentStreamLengths.set(page.pageIndex, len);
- }
- pageLen += len;
- doc.objects.remove(c);
- objCount++;
- }
- }
- hintTable.pageLengths.set(page.pageIndex, pageLen);
- hintTable.objCount.set(page.pageIndex, objCount);
- }
- }
|