You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

PDFLogicalStructureHandler.java 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.render.pdf;
  19. import java.util.HashMap;
  20. import java.util.Locale;
  21. import java.util.Map;
  22. import org.w3c.dom.Node;
  23. import org.w3c.dom.NodeList;
  24. import org.apache.fop.events.EventBroadcaster;
  25. import org.apache.fop.fo.extensions.ExtensionElementMapping;
  26. import org.apache.fop.fo.extensions.InternalElementMapping;
  27. import org.apache.fop.pdf.PDFArray;
  28. import org.apache.fop.pdf.PDFDictionary;
  29. import org.apache.fop.pdf.PDFDocument;
  30. import org.apache.fop.pdf.PDFLink;
  31. import org.apache.fop.pdf.PDFName;
  32. import org.apache.fop.pdf.PDFPage;
  33. import org.apache.fop.pdf.PDFParentTree;
  34. import org.apache.fop.pdf.PDFStructElem;
  35. import org.apache.fop.pdf.PDFStructTreeRoot;
  36. /**
  37. * Handles the creation of the logical structure in the PDF document.
  38. */
  39. class PDFLogicalStructureHandler {
  40. private static final PDFName MCR = new PDFName("MCR");
  41. private static final PDFName OBJR = new PDFName("OBJR");
  42. private static final MarkedContentInfo ARTIFACT = new MarkedContentInfo(null, -1, null);
  43. private final PDFDocument pdfDoc;
  44. private final EventBroadcaster eventBroadcaster;
  45. /**
  46. * Map of references to the corresponding structure elements.
  47. */
  48. private final Map structTreeMap = new HashMap();
  49. private final PDFParentTree parentTree = new PDFParentTree();
  50. private int parentTreeKey;
  51. private PDFPage currentPage;
  52. /**
  53. * The array of references, from marked-content sequences in the current
  54. * page, to their parent structure elements. This will be a value in the
  55. * structure parent tree, whose corresponding key will be the page's
  56. * StructParents entry.
  57. */
  58. private PDFArray pageParentTreeArray;
  59. private PDFStructElem rootStructureElement;
  60. /**
  61. * Class providing the necessary information for bracketing content
  62. * associated to a structure element as a marked-content sequence.
  63. */
  64. static final class MarkedContentInfo {
  65. /**
  66. * A value that can be used for the tag operand of a marked-content
  67. * operator. This is the structure type of the corresponding structure
  68. * element.
  69. */
  70. final String tag; // CSOK: VisibilityModifier
  71. /**
  72. * The value for the MCID entry of the marked-content sequence's property list.
  73. */
  74. final int mcid; // CSOK: VisibilityModifier
  75. private final PDFStructElem parent;
  76. private MarkedContentInfo(String tag, int mcid, PDFStructElem parent) {
  77. this.tag = tag;
  78. this.mcid = mcid;
  79. this.parent = parent;
  80. }
  81. }
  82. /**
  83. * Creates a new instance for handling the logical structure of the given document.
  84. *
  85. * @param pdfDoc a document
  86. */
  87. PDFLogicalStructureHandler(PDFDocument pdfDoc, EventBroadcaster eventBroadcaster) {
  88. this.pdfDoc = pdfDoc;
  89. this.eventBroadcaster = eventBroadcaster;
  90. PDFStructTreeRoot structTreeRoot = pdfDoc.getFactory().makeStructTreeRoot(parentTree);
  91. rootStructureElement = pdfDoc.getFactory().makeStructureElement(
  92. FOToPDFRoleMap.mapFormattingObject("root", structTreeRoot), structTreeRoot);
  93. structTreeRoot.addKid(rootStructureElement);
  94. }
  95. /**
  96. * Converts the given structure tree into PDF.
  97. *
  98. * @param structureTree the structure tree of the current page sequence
  99. * @param language language set on the page sequence
  100. */
  101. void processStructureTree(NodeList structureTree, Locale language) {
  102. pdfDoc.enforceLanguageOnRoot();
  103. PDFStructElem structElemPart = pdfDoc.getFactory().makeStructureElement(
  104. FOToPDFRoleMap.mapFormattingObject("page-sequence", rootStructureElement),
  105. rootStructureElement);
  106. rootStructureElement.addKid(structElemPart);
  107. if (language != null) {
  108. structElemPart.setLanguage(language);
  109. }
  110. for (int i = 0, n = structureTree.getLength(); i < n; i++) {
  111. Node node = structureTree.item(i);
  112. assert node.getLocalName().equals("flow")
  113. || node.getLocalName().equals("static-content");
  114. PDFStructElem structElemSect = pdfDoc.getFactory().makeStructureElement(
  115. FOToPDFRoleMap.mapFormattingObject(node.getLocalName(), structElemPart),
  116. structElemPart);
  117. structElemPart.addKid(structElemSect);
  118. NodeList childNodes = node.getChildNodes();
  119. for (int j = 0, m = childNodes.getLength(); j < m; j++) {
  120. processNode(childNodes.item(j), structElemSect, true);
  121. }
  122. }
  123. }
  124. private void processNode(Node node, PDFStructElem parent, boolean addKid) {
  125. PDFStructElem structElem = pdfDoc.getFactory().makeStructureElement(
  126. FOToPDFRoleMap.mapFormattingObject(node, parent, eventBroadcaster), parent);
  127. // TODO necessary? If a page-sequence is empty (e.g., contains a single
  128. // empty fo:block), should the block still be added to the structure
  129. // tree? This is not being done for descendant empty elements...
  130. if (addKid) {
  131. parent.addKid(structElem);
  132. }
  133. String nodeName = node.getLocalName();
  134. if (nodeName.equals("external-graphic") || nodeName.equals("instream-foreign-object")) {
  135. Node altTextNode = node.getAttributes().getNamedItemNS(
  136. ExtensionElementMapping.URI, "alt-text");
  137. if (altTextNode != null) {
  138. structElem.put("Alt", altTextNode.getNodeValue());
  139. } else {
  140. structElem.put("Alt", "No alternate text specified");
  141. }
  142. }
  143. Node attr = node.getAttributes().getNamedItemNS(InternalElementMapping.URI, "ptr");
  144. if (attr != null) {
  145. String ptr = attr.getNodeValue();
  146. structTreeMap.put(ptr, structElem);
  147. }
  148. NodeList nodes = node.getChildNodes();
  149. for (int i = 0, n = nodes.getLength(); i < n; i++) {
  150. processNode(nodes.item(i), structElem, false);
  151. }
  152. }
  153. private int getNextParentTreeKey() {
  154. return parentTreeKey++;
  155. }
  156. /**
  157. * Receive notification of the beginning of a new page.
  158. *
  159. * @param page the page that will be rendered in PDF
  160. */
  161. void startPage(PDFPage page) {
  162. currentPage = page;
  163. currentPage.setStructParents(getNextParentTreeKey());
  164. pageParentTreeArray = new PDFArray();
  165. }
  166. /**
  167. * Receive notification of the end of the current page.
  168. */
  169. void endPage() {
  170. // TODO
  171. // Values in a number tree must be indirect references to the PDF
  172. // objects associated to the keys. To enforce that the array is
  173. // registered to the PDF document. Unfortunately that can't be done
  174. // earlier since a call to PDFContentGenerator.flushPDFDoc can be made
  175. // before the array is complete, which would result in only part of it
  176. // being output to the PDF.
  177. // This should really be handled by PDFNumsArray
  178. pdfDoc.registerObject(pageParentTreeArray);
  179. parentTree.getNums().put(currentPage.getStructParents(), pageParentTreeArray);
  180. }
  181. private MarkedContentInfo addToParentTree(String structurePointer) {
  182. PDFStructElem parent = (PDFStructElem) structTreeMap.get(structurePointer);
  183. if (parent == null) {
  184. return ARTIFACT;
  185. } else {
  186. pageParentTreeArray.add(parent);
  187. String type = parent.getStructureType().toString();
  188. int mcid = pageParentTreeArray.length() - 1;
  189. return new MarkedContentInfo(type, mcid, parent);
  190. }
  191. }
  192. /**
  193. * Adds a content item corresponding to text into the structure tree, if
  194. * there is a structure element associated to it.
  195. *
  196. * @param structurePointer reference to the parent structure element of the
  197. * piece of text
  198. * @return the necessary information for bracketing the content as a
  199. * marked-content sequence. If there is no element in the structure tree
  200. * associated to that content, returns an instance whose
  201. * {@link MarkedContentInfo#tag} value is <code>null</code>. The content
  202. * must then be treated as an artifact.
  203. */
  204. MarkedContentInfo addTextContentItem(String structurePointer) {
  205. MarkedContentInfo mci = addToParentTree(structurePointer);
  206. if (mci != ARTIFACT) {
  207. PDFDictionary contentItem = new PDFDictionary();
  208. contentItem.put("Type", MCR);
  209. contentItem.put("Pg", this.currentPage);
  210. contentItem.put("MCID", mci.mcid);
  211. mci.parent.addKid(contentItem);
  212. }
  213. return mci;
  214. }
  215. /**
  216. * Adds a content item corresponding to an image into the structure tree, if
  217. * there is a structure element associated to it.
  218. *
  219. * @param structurePointer reference to the parent structure element of the
  220. * image
  221. * @return the necessary information for bracketing the content as a
  222. * marked-content sequence. If there is no element in the structure tree
  223. * associated to that image, returns an instance whose
  224. * {@link MarkedContentInfo#tag} value is <code>null</code>. The image
  225. * must then be treated as an artifact.
  226. */
  227. MarkedContentInfo addImageContentItem(String structurePointer) {
  228. MarkedContentInfo mci = addToParentTree(structurePointer);
  229. if (mci != ARTIFACT) {
  230. mci.parent.setMCIDKid(mci.mcid);
  231. mci.parent.setPage(this.currentPage);
  232. }
  233. return mci;
  234. }
  235. // While the PDF spec allows images to be referred as PDF objects, this
  236. // makes the Acrobat Pro checker complain that the image is not accessible.
  237. // Its alt-text is still read aloud though. Using marked-content sequences
  238. // like for text works.
  239. // MarkedContentInfo addImageObject(String parentReference) {
  240. // MarkedContentInfo mci = addToParentTree(parentReference);
  241. // if (mci != ARTIFACT) {
  242. // PDFDictionary contentItem = new PDFDictionary();
  243. // contentItem.put("Type", OBJR);
  244. // contentItem.put("Pg", this.currentPage);
  245. // contentItem.put("Obj", null);
  246. // mci.parent.addKid(contentItem);
  247. // }
  248. // return mci;
  249. // }
  250. /**
  251. * Adds a content item corresponding to the given link into the structure
  252. * tree.
  253. *
  254. * @param link a link
  255. * @param structurePointer reference to the corresponding parent structure element
  256. */
  257. void addLinkContentItem(PDFLink link, String structurePointer) {
  258. int structParent = getNextParentTreeKey();
  259. link.setStructParent(structParent);
  260. PDFDictionary contentItem = new PDFDictionary();
  261. contentItem.put("Type", OBJR);
  262. contentItem.put("Pg", this.currentPage);
  263. contentItem.put("Obj", link);
  264. PDFStructElem parent = (PDFStructElem) structTreeMap.get(structurePointer);
  265. parentTree.getNums().put(structParent, parent);
  266. parent.addKid(contentItem);
  267. }
  268. }