aboutsummaryrefslogtreecommitdiffstats
path: root/src/java/org/apache/fop/render/pdf/PDFLogicalStructureHandler.java
blob: ba95845979d817cfb1a5e9c61b98b62503acf5d3 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

/* $Id$ */

package org.apache.fop.render.pdf;

import java.util.HashMap;
import java.util.Locale;
import java.util.Map;

import org.w3c.dom.Node;
import org.w3c.dom.NodeList;

import org.apache.fop.events.EventBroadcaster;
import org.apache.fop.fo.extensions.ExtensionElementMapping;
import org.apache.fop.fo.extensions.InternalElementMapping;
import org.apache.fop.pdf.PDFArray;
import org.apache.fop.pdf.PDFDictionary;
import org.apache.fop.pdf.PDFDocument;
import org.apache.fop.pdf.PDFLink;
import org.apache.fop.pdf.PDFName;
import org.apache.fop.pdf.PDFPage;
import org.apache.fop.pdf.PDFParentTree;
import org.apache.fop.pdf.PDFStructElem;
import org.apache.fop.pdf.PDFStructTreeRoot;


/**
 * Handles the creation of the logical structure in the PDF document.
 */
class PDFLogicalStructureHandler {

    private static final PDFName MCR = new PDFName("MCR");

    private static final PDFName OBJR = new PDFName("OBJR");

    private static final MarkedContentInfo ARTIFACT = new MarkedContentInfo(null, -1, null);

    private final PDFDocument pdfDoc;

    private final EventBroadcaster eventBroadcaster;

    /**
     * Map of references to the corresponding structure elements.
     */
    private final Map structTreeMap = new HashMap();

    private final PDFParentTree parentTree = new PDFParentTree();

    private int parentTreeKey;

    private PDFPage currentPage;

    /**
     * The array of references, from marked-content sequences in the current
     * page, to their parent structure elements. This will be a value in the
     * structure parent tree, whose corresponding key will be the page's
     * StructParents entry.
     */
    private PDFArray pageParentTreeArray;

    private PDFStructElem rootStructureElement;

    /**
     * Class providing the necessary information for bracketing content
     * associated to a structure element as a marked-content sequence.
     */
    static final class MarkedContentInfo {

        /**
         * A value that can be used for the tag operand of a marked-content
         * operator. This is the structure type of the corresponding structure
         * element.
         */
        final String tag;                                       // CSOK: VisibilityModifier

        /**
         * The value for the MCID entry of the marked-content sequence's property list.
         */
        final int mcid;                                         // CSOK: VisibilityModifier

        private final PDFStructElem parent;

        private MarkedContentInfo(String tag, int mcid, PDFStructElem parent) {
            this.tag = tag;
            this.mcid = mcid;
            this.parent = parent;
        }
    }

    /**
     * Creates a new instance for handling the logical structure of the given document.
     *
     * @param pdfDoc a document
     */
    PDFLogicalStructureHandler(PDFDocument pdfDoc, EventBroadcaster eventBroadcaster) {
        this.pdfDoc = pdfDoc;
        this.eventBroadcaster = eventBroadcaster;
        PDFStructTreeRoot structTreeRoot = pdfDoc.getFactory().makeStructTreeRoot(parentTree);
        rootStructureElement = pdfDoc.getFactory().makeStructureElement(
                FOToPDFRoleMap.mapFormattingObject("root", structTreeRoot), structTreeRoot);
        structTreeRoot.addKid(rootStructureElement);
    }

    /**
     * Converts the given structure tree into PDF.
     *
     * @param structureTree the structure tree of the current page sequence
     * @param language language set on the page sequence
     */
    void processStructureTree(NodeList structureTree, Locale language) {
        pdfDoc.enforceLanguageOnRoot();
        PDFStructElem structElemPart = pdfDoc.getFactory().makeStructureElement(
                FOToPDFRoleMap.mapFormattingObject("page-sequence", rootStructureElement),
                rootStructureElement);
        rootStructureElement.addKid(structElemPart);
        if (language != null) {
            structElemPart.setLanguage(language);
        }

        for (int i = 0, n = structureTree.getLength(); i < n; i++) {
            Node node = structureTree.item(i);
            assert node.getLocalName().equals("flow")
                    || node.getLocalName().equals("static-content");
            PDFStructElem structElemSect = pdfDoc.getFactory().makeStructureElement(
                    FOToPDFRoleMap.mapFormattingObject(node.getLocalName(), structElemPart),
                    structElemPart);
            structElemPart.addKid(structElemSect);
            NodeList childNodes = node.getChildNodes();
            for (int j = 0, m = childNodes.getLength(); j < m; j++) {
                processNode(childNodes.item(j), structElemSect, true);
            }
        }
    }

    private void processNode(Node node, PDFStructElem parent, boolean addKid) {
        Node attr = node.getAttributes().getNamedItemNS(InternalElementMapping.URI, "ptr");
        assert attr != null;
        String ptr = attr.getNodeValue();
        PDFStructElem structElem = pdfDoc.getFactory().makeStructureElement(
                FOToPDFRoleMap.mapFormattingObject(node, parent, eventBroadcaster), parent);
        // TODO necessary? If a page-sequence is empty (e.g., contains a single
        // empty fo:block), should the block still be added to the structure
        // tree? This is not being done for descendant empty elements...
        if (addKid) {
            parent.addKid(structElem);
        }
        String nodeName = node.getLocalName();
        if (nodeName.equals("external-graphic") || nodeName.equals("instream-foreign-object")) {
            Node altTextNode = node.getAttributes().getNamedItemNS(
                    ExtensionElementMapping.URI, "alt-text");
            if (altTextNode != null) {
                structElem.put("Alt", altTextNode.getNodeValue());
            } else {
                structElem.put("Alt", "No alternate text specified");
            }
        }
        structTreeMap.put(ptr, structElem);
        NodeList nodes = node.getChildNodes();
        for (int i = 0, n = nodes.getLength(); i < n; i++) {
            processNode(nodes.item(i), structElem, false);
        }
    }

    private int getNextParentTreeKey() {
        return parentTreeKey++;
    }

    /**
     * Receive notification of the beginning of a new page.
     *
     * @param page the page that will be rendered in PDF
     */
    void startPage(PDFPage page) {
        currentPage = page;
        currentPage.setStructParents(getNextParentTreeKey());
        pageParentTreeArray = new PDFArray();
    }

    /**
     * Receive notification of the end of the current page.
     */
    void endPage() {
        // TODO
        // Values in a number tree must be indirect references to the PDF
        // objects associated to the keys. To enforce that the array is
        // registered to the PDF document. Unfortunately that can't be done
        // earlier since a call to PDFContentGenerator.flushPDFDoc can be made
        // before the array is complete, which would result in only part of it
        // being output to the PDF.
        // This should really be handled by PDFNumsArray
        pdfDoc.registerObject(pageParentTreeArray);
        parentTree.getNums().put(currentPage.getStructParents(), pageParentTreeArray);
    }

    private MarkedContentInfo addToParentTree(String structurePointer) {
        PDFStructElem parent = (PDFStructElem) structTreeMap.get(structurePointer);
        if (parent == null) {
            return ARTIFACT;
        } else {
            pageParentTreeArray.add(parent);
            String type = parent.getStructureType().toString();
            int mcid = pageParentTreeArray.length() - 1;
            return new MarkedContentInfo(type, mcid, parent);
        }
    }

    /**
     * Adds a content item corresponding to text into the structure tree, if
     * there is a structure element associated to it.
     *
     * @param structurePointer reference to the parent structure element of the
     * piece of text
     * @return the necessary information for bracketing the content as a
     * marked-content sequence. If there is no element in the structure tree
     * associated to that content, returns an instance whose
     * {@link MarkedContentInfo#tag} value is <code>null</code>. The content
     * must then be treated as an artifact.
     */
    MarkedContentInfo addTextContentItem(String structurePointer) {
        MarkedContentInfo mci = addToParentTree(structurePointer);
        if (mci != ARTIFACT) {
            PDFDictionary contentItem = new PDFDictionary();
            contentItem.put("Type", MCR);
            contentItem.put("Pg", this.currentPage);
            contentItem.put("MCID", mci.mcid);
            mci.parent.addKid(contentItem);
        }
        return mci;
    }

    /**
     * Adds a content item corresponding to an image into the structure tree, if
     * there is a structure element associated to it.
     *
     * @param structurePointer reference to the parent structure element of the
     * image
     * @return the necessary information for bracketing the content as a
     * marked-content sequence. If there is no element in the structure tree
     * associated to that image, returns an instance whose
     * {@link MarkedContentInfo#tag} value is <code>null</code>. The image
     * must then be treated as an artifact.
     */
    MarkedContentInfo addImageContentItem(String structurePointer) {
        MarkedContentInfo mci = addToParentTree(structurePointer);
        if (mci != ARTIFACT) {
            mci.parent.setMCIDKid(mci.mcid);
            mci.parent.setPage(this.currentPage);
        }
        return mci;
    }

    // While the PDF spec allows images to be referred as PDF objects, this
    // makes the Acrobat Pro checker complain that the image is not accessible.
    // Its alt-text is still read aloud though. Using marked-content sequences
    // like for text works.
//    MarkedContentInfo addImageObject(String parentReference) {
//        MarkedContentInfo mci = addToParentTree(parentReference);
//        if (mci != ARTIFACT) {
//            PDFDictionary contentItem = new PDFDictionary();
//            contentItem.put("Type", OBJR);
//            contentItem.put("Pg", this.currentPage);
//            contentItem.put("Obj", null);
//            mci.parent.addKid(contentItem);
//        }
//        return mci;
//    }

    /**
     * Adds a content item corresponding to the given link into the structure
     * tree.
     *
     * @param link a link
     * @param structurePointer reference to the corresponding parent structure element
     */
    void addLinkContentItem(PDFLink link, String structurePointer) {
        int structParent = getNextParentTreeKey();
        link.setStructParent(structParent);
        parentTree.getNums().put(structParent, link);
        PDFDictionary contentItem = new PDFDictionary();
        contentItem.put("Type", OBJR);
        contentItem.put("Pg", this.currentPage);
        contentItem.put("Obj", link);
        PDFStructElem parent = (PDFStructElem) structTreeMap.get(structurePointer);
        parent.addKid(contentItem);
    }

}