You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

PDFLogicalStructureHandler.java 7.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.render.pdf;
  19. import org.apache.fop.pdf.PDFArray;
  20. import org.apache.fop.pdf.PDFDictionary;
  21. import org.apache.fop.pdf.PDFDocument;
  22. import org.apache.fop.pdf.PDFLink;
  23. import org.apache.fop.pdf.PDFName;
  24. import org.apache.fop.pdf.PDFPage;
  25. import org.apache.fop.pdf.PDFParentTree;
  26. import org.apache.fop.pdf.PDFStructElem;
  27. /**
  28. * Handles the creation of the logical structure in the PDF document.
  29. */
  30. public class PDFLogicalStructureHandler {
  31. private static final PDFName MCR = new PDFName("MCR");
  32. private static final PDFName OBJR = new PDFName("OBJR");
  33. private static final MarkedContentInfo ARTIFACT = new MarkedContentInfo(null, -1, null);
  34. private final PDFDocument pdfDoc;
  35. private final PDFParentTree parentTree = new PDFParentTree();
  36. private int parentTreeKey;
  37. private PDFPage currentPage;
  38. /**
  39. * The array of references, from marked-content sequences in the current
  40. * page, to their parent structure elements. This will be a value in the
  41. * structure parent tree, whose corresponding key will be the page's
  42. * StructParents entry.
  43. */
  44. private PDFArray pageParentTreeArray;
  45. /**
  46. * Class providing the necessary information for bracketing content
  47. * associated to a structure element as a marked-content sequence.
  48. */
  49. static final class MarkedContentInfo {
  50. /**
  51. * A value that can be used for the tag operand of a marked-content
  52. * operator. This is the structure type of the corresponding structure
  53. * element.
  54. */
  55. final String tag;
  56. /**
  57. * The value for the MCID entry of the marked-content sequence's property list.
  58. */
  59. final int mcid;
  60. private final PDFStructElem parent;
  61. private MarkedContentInfo(String tag, int mcid, PDFStructElem parent) {
  62. this.tag = tag;
  63. this.mcid = mcid;
  64. this.parent = parent;
  65. }
  66. }
  67. /**
  68. * Creates a new instance for handling the logical structure of the given document.
  69. *
  70. * @param pdfDoc a document
  71. */
  72. PDFLogicalStructureHandler(PDFDocument pdfDoc) {
  73. this.pdfDoc = pdfDoc;
  74. }
  75. public PDFArray getPageParentTree() {
  76. return this.pageParentTreeArray;
  77. }
  78. public PDFParentTree getParentTree() {
  79. return parentTree;
  80. }
  81. public int getNextParentTreeKey() {
  82. return parentTreeKey++;
  83. }
  84. /**
  85. * Receive notification of the beginning of a new page.
  86. *
  87. * @param page the page that will be rendered in PDF
  88. */
  89. void startPage(PDFPage page) {
  90. currentPage = page;
  91. currentPage.setStructParents(getNextParentTreeKey());
  92. pageParentTreeArray = new PDFArray();
  93. }
  94. /**
  95. * Receive notification of the end of the current page.
  96. */
  97. void endPage() {
  98. // TODO
  99. // Values in a number tree must be indirect references to the PDF
  100. // objects associated to the keys. To enforce that the array is
  101. // registered to the PDF document. Unfortunately that can't be done
  102. // earlier since a call to PDFContentGenerator.flushPDFDoc can be made
  103. // before the array is complete, which would result in only part of it
  104. // being output to the PDF.
  105. // This should really be handled by PDFNumsArray
  106. pdfDoc.registerObject(pageParentTreeArray);
  107. parentTree.addToNums(currentPage.getStructParents(), pageParentTreeArray);
  108. }
  109. private MarkedContentInfo addToParentTree(PDFStructElem structureTreeElement) {
  110. PDFStructElem parent = structureTreeElement;
  111. while (parent instanceof PDFStructElem.Placeholder) {
  112. parent = parent.getParentStructElem();
  113. }
  114. pageParentTreeArray.add(parent);
  115. String type = parent.getStructureType().getName().toString();
  116. int mcid = pageParentTreeArray.length() - 1;
  117. return new MarkedContentInfo(type, mcid, structureTreeElement);
  118. }
  119. /**
  120. * Adds a content item corresponding to text into the structure tree, if
  121. * there is a structure element associated to it.
  122. *
  123. * @param structElem the parent structure element of the piece of text
  124. * @return the necessary information for bracketing the content as a
  125. * marked-content sequence. If there is no element in the structure tree
  126. * associated to that content, returns an instance whose
  127. * {@link MarkedContentInfo#tag} value is <code>null</code>. The content
  128. * must then be treated as an artifact.
  129. */
  130. MarkedContentInfo addTextContentItem(PDFStructElem structElem) {
  131. if (structElem == null) {
  132. return ARTIFACT;
  133. } else {
  134. MarkedContentInfo mci = addToParentTree(structElem);
  135. PDFDictionary contentItem = new PDFDictionary();
  136. contentItem.put("Type", MCR);
  137. contentItem.put("Pg", this.currentPage);
  138. contentItem.put("MCID", mci.mcid);
  139. mci.parent.addKid(contentItem);
  140. return mci;
  141. }
  142. }
  143. /**
  144. * Adds a content item corresponding to an image into the structure tree, if
  145. * there is a structure element associated to it.
  146. *
  147. * @param structElem the parent structure element of the image
  148. * @return the necessary information for bracketing the content as a
  149. * marked-content sequence. If there is no element in the structure tree
  150. * associated to that image, returns an instance whose
  151. * {@link MarkedContentInfo#tag} value is <code>null</code>. The image must
  152. * then be treated as an artifact.
  153. */
  154. MarkedContentInfo addImageContentItem(PDFStructElem structElem) {
  155. if (structElem == null) {
  156. return ARTIFACT;
  157. } else {
  158. MarkedContentInfo mci = addToParentTree(structElem);
  159. PDFDictionary contentItem = new PDFDictionary();
  160. contentItem.put("Type", MCR);
  161. contentItem.put("Pg", this.currentPage);
  162. contentItem.put("MCID", mci.mcid);
  163. mci.parent.addKid(contentItem);
  164. return mci;
  165. }
  166. }
  167. /**
  168. * Adds a content item corresponding to the given link into the structure
  169. * tree.
  170. *
  171. * @param link a link
  172. * @param structureTreeElement its parent structure element
  173. */
  174. void addLinkContentItem(PDFLink link, PDFStructElem structureTreeElement) {
  175. int structParent = getNextParentTreeKey();
  176. link.setStructParent(structParent);
  177. PDFDictionary contentItem = new PDFDictionary();
  178. contentItem.put("Type", OBJR);
  179. contentItem.put("Pg", this.currentPage);
  180. contentItem.put("Obj", link);
  181. parentTree.addToNums(structParent, structureTreeElement);
  182. structureTreeElement.addKid(contentItem);
  183. }
  184. }