123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304 |
- /*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
- /* $Id$ */
-
- package org.apache.fop.render.pdf;
-
- import java.util.HashMap;
- import java.util.Locale;
- import java.util.Map;
-
- import org.w3c.dom.Node;
- import org.w3c.dom.NodeList;
-
- import org.apache.fop.events.EventBroadcaster;
- import org.apache.fop.fo.extensions.ExtensionElementMapping;
- import org.apache.fop.fo.extensions.InternalElementMapping;
- import org.apache.fop.pdf.PDFArray;
- import org.apache.fop.pdf.PDFDictionary;
- import org.apache.fop.pdf.PDFDocument;
- import org.apache.fop.pdf.PDFLink;
- import org.apache.fop.pdf.PDFName;
- import org.apache.fop.pdf.PDFPage;
- import org.apache.fop.pdf.PDFParentTree;
- import org.apache.fop.pdf.PDFStructElem;
- import org.apache.fop.pdf.PDFStructTreeRoot;
-
-
- /**
- * Handles the creation of the logical structure in the PDF document.
- */
- class PDFLogicalStructureHandler {
-
- private static final PDFName MCR = new PDFName("MCR");
-
- private static final PDFName OBJR = new PDFName("OBJR");
-
- private static final MarkedContentInfo ARTIFACT = new MarkedContentInfo(null, -1, null);
-
- private final PDFDocument pdfDoc;
-
- private final EventBroadcaster eventBroadcaster;
-
- /**
- * Map of references to the corresponding structure elements.
- */
- private final Map structTreeMap = new HashMap();
-
- private final PDFParentTree parentTree = new PDFParentTree();
-
- private int parentTreeKey;
-
- private PDFPage currentPage;
-
- /**
- * The array of references, from marked-content sequences in the current
- * page, to their parent structure elements. This will be a value in the
- * structure parent tree, whose corresponding key will be the page's
- * StructParents entry.
- */
- private PDFArray pageParentTreeArray;
-
- private PDFStructElem rootStructureElement;
-
- /**
- * Class providing the necessary information for bracketing content
- * associated to a structure element as a marked-content sequence.
- */
- static final class MarkedContentInfo {
-
- /**
- * A value that can be used for the tag operand of a marked-content
- * operator. This is the structure type of the corresponding structure
- * element.
- */
- final String tag; // CSOK: VisibilityModifier
-
- /**
- * The value for the MCID entry of the marked-content sequence's property list.
- */
- final int mcid; // CSOK: VisibilityModifier
-
- private final PDFStructElem parent;
-
- private MarkedContentInfo(String tag, int mcid, PDFStructElem parent) {
- this.tag = tag;
- this.mcid = mcid;
- this.parent = parent;
- }
- }
-
- /**
- * Creates a new instance for handling the logical structure of the given document.
- *
- * @param pdfDoc a document
- */
- PDFLogicalStructureHandler(PDFDocument pdfDoc, EventBroadcaster eventBroadcaster) {
- this.pdfDoc = pdfDoc;
- this.eventBroadcaster = eventBroadcaster;
- PDFStructTreeRoot structTreeRoot = pdfDoc.getFactory().makeStructTreeRoot(parentTree);
- rootStructureElement = pdfDoc.getFactory().makeStructureElement(
- FOToPDFRoleMap.mapFormattingObject("root", structTreeRoot), structTreeRoot);
- structTreeRoot.addKid(rootStructureElement);
- }
-
- /**
- * Converts the given structure tree into PDF.
- *
- * @param structureTree the structure tree of the current page sequence
- * @param language language set on the page sequence
- */
- void processStructureTree(NodeList structureTree, Locale language) {
- pdfDoc.enforceLanguageOnRoot();
- PDFStructElem structElemPart = pdfDoc.getFactory().makeStructureElement(
- FOToPDFRoleMap.mapFormattingObject("page-sequence", rootStructureElement),
- rootStructureElement);
- rootStructureElement.addKid(structElemPart);
- if (language != null) {
- structElemPart.setLanguage(language);
- }
-
- for (int i = 0, n = structureTree.getLength(); i < n; i++) {
- Node node = structureTree.item(i);
- assert node.getLocalName().equals("flow")
- || node.getLocalName().equals("static-content");
- PDFStructElem structElemSect = pdfDoc.getFactory().makeStructureElement(
- FOToPDFRoleMap.mapFormattingObject(node.getLocalName(), structElemPart),
- structElemPart);
- structElemPart.addKid(structElemSect);
- NodeList childNodes = node.getChildNodes();
- for (int j = 0, m = childNodes.getLength(); j < m; j++) {
- processNode(childNodes.item(j), structElemSect, true);
- }
- }
- }
-
- private void processNode(Node node, PDFStructElem parent, boolean addKid) {
- PDFStructElem structElem = pdfDoc.getFactory().makeStructureElement(
- FOToPDFRoleMap.mapFormattingObject(node, parent, eventBroadcaster), parent);
- // TODO necessary? If a page-sequence is empty (e.g., contains a single
- // empty fo:block), should the block still be added to the structure
- // tree? This is not being done for descendant empty elements...
- if (addKid) {
- parent.addKid(structElem);
- }
- String nodeName = node.getLocalName();
- if (nodeName.equals("external-graphic") || nodeName.equals("instream-foreign-object")) {
- Node altTextNode = node.getAttributes().getNamedItemNS(
- ExtensionElementMapping.URI, "alt-text");
- if (altTextNode != null) {
- structElem.put("Alt", altTextNode.getNodeValue());
- } else {
- structElem.put("Alt", "No alternate text specified");
- }
- }
- Node attr = node.getAttributes().getNamedItemNS(InternalElementMapping.URI, "ptr");
- if (attr != null) {
- String ptr = attr.getNodeValue();
- structTreeMap.put(ptr, structElem);
- }
- NodeList nodes = node.getChildNodes();
- for (int i = 0, n = nodes.getLength(); i < n; i++) {
- processNode(nodes.item(i), structElem, false);
- }
- }
-
- private int getNextParentTreeKey() {
- return parentTreeKey++;
- }
-
- /**
- * Receive notification of the beginning of a new page.
- *
- * @param page the page that will be rendered in PDF
- */
- void startPage(PDFPage page) {
- currentPage = page;
- currentPage.setStructParents(getNextParentTreeKey());
- pageParentTreeArray = new PDFArray();
- }
-
- /**
- * Receive notification of the end of the current page.
- */
- void endPage() {
- // TODO
- // Values in a number tree must be indirect references to the PDF
- // objects associated to the keys. To enforce that the array is
- // registered to the PDF document. Unfortunately that can't be done
- // earlier since a call to PDFContentGenerator.flushPDFDoc can be made
- // before the array is complete, which would result in only part of it
- // being output to the PDF.
- // This should really be handled by PDFNumsArray
- pdfDoc.registerObject(pageParentTreeArray);
- parentTree.getNums().put(currentPage.getStructParents(), pageParentTreeArray);
- }
-
- private MarkedContentInfo addToParentTree(String structurePointer) {
- PDFStructElem parent = (PDFStructElem) structTreeMap.get(structurePointer);
- if (parent == null) {
- return ARTIFACT;
- } else {
- pageParentTreeArray.add(parent);
- String type = parent.getStructureType().toString();
- int mcid = pageParentTreeArray.length() - 1;
- return new MarkedContentInfo(type, mcid, parent);
- }
- }
-
- /**
- * Adds a content item corresponding to text into the structure tree, if
- * there is a structure element associated to it.
- *
- * @param structurePointer reference to the parent structure element of the
- * piece of text
- * @return the necessary information for bracketing the content as a
- * marked-content sequence. If there is no element in the structure tree
- * associated to that content, returns an instance whose
- * {@link MarkedContentInfo#tag} value is <code>null</code>. The content
- * must then be treated as an artifact.
- */
- MarkedContentInfo addTextContentItem(String structurePointer) {
- MarkedContentInfo mci = addToParentTree(structurePointer);
- if (mci != ARTIFACT) {
- PDFDictionary contentItem = new PDFDictionary();
- contentItem.put("Type", MCR);
- contentItem.put("Pg", this.currentPage);
- contentItem.put("MCID", mci.mcid);
- mci.parent.addKid(contentItem);
- }
- return mci;
- }
-
- /**
- * Adds a content item corresponding to an image into the structure tree, if
- * there is a structure element associated to it.
- *
- * @param structurePointer reference to the parent structure element of the
- * image
- * @return the necessary information for bracketing the content as a
- * marked-content sequence. If there is no element in the structure tree
- * associated to that image, returns an instance whose
- * {@link MarkedContentInfo#tag} value is <code>null</code>. The image
- * must then be treated as an artifact.
- */
- MarkedContentInfo addImageContentItem(String structurePointer) {
- MarkedContentInfo mci = addToParentTree(structurePointer);
- if (mci != ARTIFACT) {
- mci.parent.setMCIDKid(mci.mcid);
- mci.parent.setPage(this.currentPage);
- }
- return mci;
- }
-
- // While the PDF spec allows images to be referred as PDF objects, this
- // makes the Acrobat Pro checker complain that the image is not accessible.
- // Its alt-text is still read aloud though. Using marked-content sequences
- // like for text works.
- // MarkedContentInfo addImageObject(String parentReference) {
- // MarkedContentInfo mci = addToParentTree(parentReference);
- // if (mci != ARTIFACT) {
- // PDFDictionary contentItem = new PDFDictionary();
- // contentItem.put("Type", OBJR);
- // contentItem.put("Pg", this.currentPage);
- // contentItem.put("Obj", null);
- // mci.parent.addKid(contentItem);
- // }
- // return mci;
- // }
-
- /**
- * Adds a content item corresponding to the given link into the structure
- * tree.
- *
- * @param link a link
- * @param structurePointer reference to the corresponding parent structure element
- */
- void addLinkContentItem(PDFLink link, String structurePointer) {
- int structParent = getNextParentTreeKey();
- link.setStructParent(structParent);
- PDFDictionary contentItem = new PDFDictionary();
- contentItem.put("Type", OBJR);
- contentItem.put("Pg", this.currentPage);
- contentItem.put("Obj", link);
- PDFStructElem parent = (PDFStructElem) structTreeMap.get(structurePointer);
- parentTree.getNums().put(structParent, parent);
- parent.addKid(contentItem);
- }
-
- }
|