From 91cdef3c3337a3e2537be2136a98eb7a7b46cebe Mon Sep 17 00:00:00 2001 From: Vincent Hennebert Date: Wed, 13 Jan 2010 17:05:59 +0000 Subject: Added possibility to customize PDF tagging via the role property git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop/trunk@898840 13f79535-47bb-0310-9956-ffa450edef68 --- .../content/xdocs/trunk/accessibility.xml | 27 +++- .../org/apache/fop/accessibility/reduceFOTree.xsl | 4 +- .../org/apache/fop/render/pdf/FOToPDFRoleMap.java | 180 +++++++++++++++------ .../apache/fop/render/pdf/PDFDocumentHandler.java | 3 +- .../apache/fop/render/pdf/PDFEventProducer.java | 11 ++ .../org/apache/fop/render/pdf/PDFEventProducer.xml | 1 + .../fop/render/pdf/PDFLogicalStructureHandler.java | 10 +- .../org/apache/fop/render/pdf/PDFRenderer.java | 3 +- status.xml | 5 +- test/accessibility/README | 4 +- test/accessibility/complete.fo | 10 +- test/accessibility/pdf/complete_painter_orig.pdf | Bin 96863 -> 96966 bytes test/accessibility/pdf/complete_renderer_orig.pdf | Bin 96726 -> 96880 bytes .../pdf/role_non-standard_painter_orig.pdf | Bin 0 -> 18938 bytes .../pdf/role_non-standard_renderer_orig.pdf | Bin 0 -> 18849 bytes test/accessibility/pdf/role_painter_orig.pdf | Bin 0 -> 18924 bytes test/accessibility/pdf/role_renderer_orig.pdf | Bin 0 -> 18835 bytes test/accessibility/role.fo | 125 ++++++++++++++ test/accessibility/role_non-standard.fo | 125 ++++++++++++++ 19 files changed, 443 insertions(+), 65 deletions(-) create mode 100644 test/accessibility/pdf/role_non-standard_painter_orig.pdf create mode 100644 test/accessibility/pdf/role_non-standard_renderer_orig.pdf create mode 100644 test/accessibility/pdf/role_painter_orig.pdf create mode 100644 test/accessibility/pdf/role_renderer_orig.pdf create mode 100644 test/accessibility/role.fo create mode 100644 test/accessibility/role_non-standard.fo diff --git a/src/documentation/content/xdocs/trunk/accessibility.xml b/src/documentation/content/xdocs/trunk/accessibility.xml index 1eb78264b..f3c66e06a 100644 --- a/src/documentation/content/xdocs/trunk/accessibility.xml +++ b/src/documentation/content/xdocs/trunk/accessibility.xml @@ -91,6 +91,26 @@ +
+ Customized Tagging +

The PDF Reference defines a set of standard Structure Types to + tag content. For example, ‘P’ is used for identifying paragraphs, ‘H1’ to ‘H6’ for headers, + ‘L’ for lists, ‘Div’ for block-level groups of elements, etc. This standard set is aimed at + improving interoperability between applications producing or consuming PDF.

+

FOP provides a default mapping of Formatting Objects to elements from that standard set. + For example, fo:page-sequence is mapped to ‘Part’, fo:block is + mapped to ‘P’, fo:list-block to ‘L’, etc.

+

You may want to customize that mapping to improve the accuracy of the tagging or deal with + particular FO constructs. For example, you may want to make use of the ‘H1’ to ‘H6’ tags to + make the hierarchical structure of the document appear in the PDF. This is achieved by using + the role XSL-FO property:

+ ... +<fo:block role="H1" font-weight="bold">I. A Level 1 Heading</fo:block> +<fo:block>This is the first paragraph of the first section...</fo:block> +... +

If a non-standard structure type is specified, FOP will issue a warning and fall back to + the default tag associated to the Formatting Object.

+
Testing

@@ -119,11 +139,6 @@ SVG graphics (or images in general) are treated as a single figure. Text contained in SVGs is not accessible. It's only possible to work with fox:alt-text. -

  • - XSL-FO's role property is currently not supported. It could theoretically be used to - differentiate between headings and normal text. At the moment, the two are simply - identified as paragraphs. -
  • The side regions (region-before, region-after etc.) are currently not specially identified. Screen readers may read their content at page changes. @@ -140,7 +155,7 @@
  • US Government - Website on Section 508
  • Wikipedia on Accessibility in general
  • Wikipedia on Accessibility in PDF
  • -
  • +
  • PDF Reference 1.4 (look up chapters 9.7 "Tagged PDF" and 9.8 "Accessibility Support")
  • diff --git a/src/java/org/apache/fop/accessibility/reduceFOTree.xsl b/src/java/org/apache/fop/accessibility/reduceFOTree.xsl index 8e9bcfc13..0d93a2506 100644 --- a/src/java/org/apache/fop/accessibility/reduceFOTree.xsl +++ b/src/java/org/apache/fop/accessibility/reduceFOTree.xsl @@ -86,8 +86,8 @@ - - + + diff --git a/src/java/org/apache/fop/render/pdf/FOToPDFRoleMap.java b/src/java/org/apache/fop/render/pdf/FOToPDFRoleMap.java index 2c13edca5..26595bca1 100644 --- a/src/java/org/apache/fop/render/pdf/FOToPDFRoleMap.java +++ b/src/java/org/apache/fop/render/pdf/FOToPDFRoleMap.java @@ -19,8 +19,12 @@ package org.apache.fop.render.pdf; +import java.util.HashMap; import java.util.Map; +import org.w3c.dom.Node; + +import org.apache.fop.events.EventBroadcaster; import org.apache.fop.pdf.PDFName; import org.apache.fop.pdf.PDFObject; import org.apache.fop.pdf.PDFStructElem; @@ -30,71 +34,137 @@ import org.apache.fop.pdf.PDFStructElem; */ final class FOToPDFRoleMap { - private static final Map STANDARD_MAPPINGS = new java.util.HashMap(); - - private static final PDFName TFOOT = new PDFName("TFoot"); - private static final PDFName THEAD = new PDFName("THead"); - private static final PDFName NON_STRUCT = new PDFName("NonStruct"); - - static { - addMapping("block", "P"); + /** + * Standard structure types defined by the PDF Reference, Fourth Edition (PDF 1.5). + */ + private static final Map STANDARD_STRUCTURE_TYPES = new HashMap(); - PDFName st = new PDFName("Div"); - addMapping("block-container", st); - addMapping("inline-container", st); - addMapping("table-and-caption", st); - addMapping("float", st); + private static final Map DEFAULT_MAPPINGS = new java.util.HashMap(); - st = new PDFName("Span"); - addMapping("inline", st); - addMapping("wrapper", st); - addMapping("character", st); + private static final PDFName THEAD; + private static final PDFName NON_STRUCT; + static { + // Create PDFNames for the standard structure types + // Table 10.18: Grouping elements + addStructureType("Document"); + addStructureType("Part"); + addStructureType("Art"); + addStructureType("Sect"); + addStructureType("Div"); + addStructureType("BlockQuote"); + addStructureType("Caption"); + addStructureType("TOC"); + addStructureType("TOCI"); + addStructureType("Index"); + addStructureType("NonStruct"); + addStructureType("Private"); + // Table 10.20: Paragraphlike elements + addStructureType("H"); + addStructureType("H1"); + addStructureType("H2"); + addStructureType("H3"); + addStructureType("H4"); + addStructureType("H5"); + addStructureType("H6"); + addStructureType("P"); + // Table 10.21: List elements + addStructureType("L"); + addStructureType("LI"); + addStructureType("Lbl"); + addStructureType("LBody"); + // Table 10.22: Table elements + addStructureType("Table"); + addStructureType("TR"); + addStructureType("TH"); + addStructureType("TD"); + addStructureType("THead"); + addStructureType("TBody"); + addStructureType("TFoot"); + // Table 10.23: Inline-level structure elements + addStructureType("Span"); + addStructureType("Quote"); + addStructureType("Note"); + addStructureType("Reference"); + addStructureType("BibEntry"); + addStructureType("Code"); + addStructureType("Link"); + addStructureType("Annot"); + // Table 10.24: Ruby and Warichu elements + addStructureType("Ruby"); + addStructureType("RB"); + addStructureType("RT"); + addStructureType("RP"); + addStructureType("Warichu"); + addStructureType("WT"); + addStructureType("WP"); + // Table 10.25: Illustration elements + addStructureType("Figure"); + addStructureType("Formula"); + addStructureType("Form"); + + NON_STRUCT = (PDFName) STANDARD_STRUCTURE_TYPES.get("NonStruct"); + assert NON_STRUCT != null; + THEAD = (PDFName) STANDARD_STRUCTURE_TYPES.get("THead"); + assert THEAD != null; + + // Create the standard mappings + // Declarations and Pagination and Layout Formatting Objects addMapping("root", "Document"); addMapping("page-sequence", "Part"); addMapping("flow", "Sect"); addMapping("static-content", "Sect"); - - st = new PDFName("Quote"); - addMapping("page-number", st); - addMapping("page-number-citation", st); - addMapping("page-number-citation-last", st); - - st = new PDFName("Figure"); - addMapping("external-graphic", st); - addMapping("instream-foreign-object", st); - - addMapping("table-caption", "Caption"); + // Block-level Formatting Objects + addMapping("block", "P"); + addMapping("block-container", "Div"); + // Inline-level Formatting Objects + addMapping("character", "Span"); + addMapping("external-graphic", "Figure"); + addMapping("instream-foreign-object", "Figure"); + addMapping("inline", "Span"); + addMapping("inline-container", "Div"); + addMapping("page-number", "Quote"); + addMapping("page-number-citation", "Quote"); + addMapping("page-number-citation-last", "Quote"); + // Formatting Objects for Tables + addMapping("table-and-caption", "Div"); addMapping("table", "Table"); + addMapping("table-caption", "Caption"); + addMapping("table-header", "THead"); + addMapping("table-footer", "TFoot"); addMapping("table-body", "TBody"); - addMapping("table-header", THEAD); - addMapping("table-footer", TFOOT); addMapping("table-row", "TR"); addMapping("table-cell", new TableCellMapper()); - + // Formatting Objects for Lists addMapping("list-block", "L"); addMapping("list-item", "LI"); - addMapping("list-item-label", "Lbl"); addMapping("list-item-body", "LBody"); - + addMapping("list-item-label", "Lbl"); + // Dynamic Effects: Link and Multi Formatting Objects addMapping("basic-link", "Link"); + // Out-of-Line Formatting Objects + addMapping("float", "Div"); addMapping("footnote", "Note"); addMapping("footnote-body", "Sect"); + addMapping("wrapper", "Span"); addMapping("marker", "Private"); } - private static void addMapping(String fo, String pdfName) { - addMapping(fo, new PDFName(pdfName)); + private static void addStructureType(String structureType) { + STANDARD_STRUCTURE_TYPES.put(structureType, new PDFName(structureType)); } - private static void addMapping(String fo, PDFName pdfName) { - addMapping(fo, new SimpleMapper(pdfName)); + private static void addMapping(String fo, String structureType) { + PDFName type = (PDFName) STANDARD_STRUCTURE_TYPES.get(structureType); + assert type != null; + addMapping(fo, new SimpleMapper(type)); } private static void addMapping(String fo, Mapper mapper) { - STANDARD_MAPPINGS.put(fo, mapper); + DEFAULT_MAPPINGS.put(fo, mapper); } + /** * Maps a Formatting Object to a PDFName representing the associated structure type. * @param fo the formatting object's local name @@ -102,7 +172,7 @@ final class FOToPDFRoleMap { * @return the structure type or null if no match could be found */ public static PDFName mapFormattingObject(String fo, PDFObject parent) { - Mapper mapper = (Mapper)STANDARD_MAPPINGS.get(fo); + Mapper mapper = (Mapper)DEFAULT_MAPPINGS.get(fo); if (mapper != null) { return mapper.getStructureType(parent); } else { @@ -110,7 +180,27 @@ final class FOToPDFRoleMap { } } - private interface Mapper { + public static PDFName mapFormattingObject(Node fo, PDFObject parent, + EventBroadcaster eventBroadcaster) { + PDFName type = null; + Node role = fo.getAttributes().getNamedItemNS(null, "role"); + if (role == null) { + type = mapFormattingObject(fo.getLocalName(), parent); + } else { + String customType = role.getNodeValue(); + type = (PDFName) STANDARD_STRUCTURE_TYPES.get(customType); + if (type == null) { + String foName = fo.getLocalName(); + type = mapFormattingObject(foName, parent); + PDFEventProducer.Provider.get(eventBroadcaster).nonStandardStructureType(fo, + foName, customType, type.toString().substring(1)); + } + } + assert type != null; + return type; + } + + private static interface Mapper { PDFName getStructureType(PDFObject parent); } @@ -130,18 +220,18 @@ final class FOToPDFRoleMap { private static class TableCellMapper implements Mapper { - private static final PDFName TD = new PDFName("TD"); - private static final PDFName TH = new PDFName("TH"); - public PDFName getStructureType(PDFObject parent) { PDFStructElem grandParent = (PDFStructElem) ((PDFStructElem)parent).getParentStructElem(); //TODO What to do with cells from table-footer? Currently they are mapped on TD. + PDFName type; if (THEAD.equals(grandParent.getStructureType())) { - return TH; + type = (PDFName) STANDARD_STRUCTURE_TYPES.get("TH"); } else { - return TD; + type = (PDFName) STANDARD_STRUCTURE_TYPES.get("TD"); } + assert type != null; + return type; } } diff --git a/src/java/org/apache/fop/render/pdf/PDFDocumentHandler.java b/src/java/org/apache/fop/render/pdf/PDFDocumentHandler.java index a54bbb4e7..6025fb486 100644 --- a/src/java/org/apache/fop/render/pdf/PDFDocumentHandler.java +++ b/src/java/org/apache/fop/render/pdf/PDFDocumentHandler.java @@ -145,7 +145,8 @@ public class PDFDocumentHandler extends AbstractBinaryWritingIFDocumentHandler { this.accessEnabled = getUserAgent().isAccessibilityEnabled(); if (accessEnabled) { pdfDoc.getRoot().makeTagged(); - logicalStructureHandler = new PDFLogicalStructureHandler(pdfDoc); + logicalStructureHandler = new PDFLogicalStructureHandler(pdfDoc, + getUserAgent().getEventBroadcaster()); } } catch (IOException e) { throw new IFException("I/O error in startDocument()", e); diff --git a/src/java/org/apache/fop/render/pdf/PDFEventProducer.java b/src/java/org/apache/fop/render/pdf/PDFEventProducer.java index d70409870..4d1b3a42b 100644 --- a/src/java/org/apache/fop/render/pdf/PDFEventProducer.java +++ b/src/java/org/apache/fop/render/pdf/PDFEventProducer.java @@ -66,4 +66,15 @@ public interface PDFEventProducer extends EventProducer { */ void nonFullyResolvedLinkTargets(Object source, int count); + + /** + * Custom structure type is not standard as per the PDF reference. + * + * @param source the event source + * @param fo the local name of the formatting object having the custom type + * @param type custom structure type + * @param fallback default structure type used as a fallback + * @event.severity WARN + */ + void nonStandardStructureType(Object source, String fo, String type, String fallback); } diff --git a/src/java/org/apache/fop/render/pdf/PDFEventProducer.xml b/src/java/org/apache/fop/render/pdf/PDFEventProducer.xml index fd57d5099..f6425753d 100644 --- a/src/java/org/apache/fop/render/pdf/PDFEventProducer.xml +++ b/src/java/org/apache/fop/render/pdf/PDFEventProducer.xml @@ -1,4 +1,5 @@ {count} link target{count,equals,1,,s} could not be fully resolved and now point{count,equals,1,,s} to the top of the page or {count,equals,1,is,are} dysfunctional. + ‘{type}’ is not a standard structure type defined by the PDF Reference. Falling back to ‘{fallback}’. diff --git a/src/java/org/apache/fop/render/pdf/PDFLogicalStructureHandler.java b/src/java/org/apache/fop/render/pdf/PDFLogicalStructureHandler.java index d55094d48..42b873270 100644 --- a/src/java/org/apache/fop/render/pdf/PDFLogicalStructureHandler.java +++ b/src/java/org/apache/fop/render/pdf/PDFLogicalStructureHandler.java @@ -26,6 +26,7 @@ import java.util.Map; import org.w3c.dom.Node; import org.w3c.dom.NodeList; +import org.apache.fop.events.EventBroadcaster; import org.apache.fop.fo.extensions.ExtensionElementMapping; import org.apache.fop.fo.extensions.InternalElementMapping; import org.apache.fop.pdf.PDFArray; @@ -52,6 +53,8 @@ class PDFLogicalStructureHandler { private final PDFDocument pdfDoc; + private final EventBroadcaster eventBroadcaster; + /** * Map of references to the corresponding structure elements. */ @@ -105,8 +108,9 @@ class PDFLogicalStructureHandler { * * @param pdfDoc a document */ - PDFLogicalStructureHandler(PDFDocument pdfDoc) { + PDFLogicalStructureHandler(PDFDocument pdfDoc, EventBroadcaster eventBroadcaster) { this.pdfDoc = pdfDoc; + this.eventBroadcaster = eventBroadcaster; PDFStructTreeRoot structTreeRoot = pdfDoc.getFactory().makeStructTreeRoot(parentTree); rootStructureElement = pdfDoc.getFactory().makeStructureElement( FOToPDFRoleMap.mapFormattingObject("root", structTreeRoot), structTreeRoot); @@ -148,15 +152,15 @@ class PDFLogicalStructureHandler { Node attr = node.getAttributes().getNamedItemNS(InternalElementMapping.URI, "ptr"); assert attr != null; String ptr = attr.getNodeValue(); - String nodeName = node.getLocalName(); PDFStructElem structElem = pdfDoc.getFactory().makeStructureElement( - FOToPDFRoleMap.mapFormattingObject(nodeName, parent), parent); + FOToPDFRoleMap.mapFormattingObject(node, parent, eventBroadcaster), parent); // TODO necessary? If a page-sequence is empty (e.g., contains a single // empty fo:block), should the block still be added to the structure // tree? This is not being done for descendant empty elements... if (addKid) { parent.addKid(structElem); } + String nodeName = node.getLocalName(); if (nodeName.equals("external-graphic") || nodeName.equals("instream-foreign-object")) { Node altTextNode = node.getAttributes().getNamedItemNS( ExtensionElementMapping.URI, "alt-text"); diff --git a/src/java/org/apache/fop/render/pdf/PDFRenderer.java b/src/java/org/apache/fop/render/pdf/PDFRenderer.java index 5d20bbd9f..a4ba08911 100644 --- a/src/java/org/apache/fop/render/pdf/PDFRenderer.java +++ b/src/java/org/apache/fop/render/pdf/PDFRenderer.java @@ -243,7 +243,8 @@ public class PDFRenderer extends AbstractPathOrientedRenderer implements PDFConf this.pdfDoc = pdfUtil.setupPDFDocument(stream); if (accessEnabled) { pdfDoc.getRoot().makeTagged(); - logicalStructureHandler = new PDFLogicalStructureHandler(pdfDoc); + logicalStructureHandler = new PDFLogicalStructureHandler(pdfDoc, + userAgent.getEventBroadcaster()); } } diff --git a/status.xml b/status.xml index f7c7fc9b2..853fbdf16 100644 --- a/status.xml +++ b/status.xml @@ -58,13 +58,16 @@ documents. Example: the fix of marks layering will be such a case when it's done. --> + + Added possibility to customize PDF tagging via the ‘role’ property. + Bugfix: AFP Renderer: Respect image color settings for svg Bugfix: AFP Renderer: Page Overlays not generated when using Intermediate Format - + Bugfix: AFP Renderer: Underline is incorrectly placed when reference-orientation != 0 diff --git a/test/accessibility/README b/test/accessibility/README index cbe2879e1..391d39055 100644 --- a/test/accessibility/README +++ b/test/accessibility/README @@ -1,5 +1,5 @@ -his directory contains sample FO files for testing the accessibility features of -FOP. +This directory contains sample FO files for testing the accessibility features +of FOP. To every FO file in this directory correspond two PDF files in the pdf/ sub-directory: one generated by the painter, one by the renderer. For example, diff --git a/test/accessibility/complete.fo b/test/accessibility/complete.fo index 03c57b212..81df372b5 100644 --- a/test/accessibility/complete.fo +++ b/test/accessibility/complete.fo @@ -36,10 +36,12 @@ (There’s another page sequence below.) - Apache FOP (Formatting Objects Processor) is a print formatter driven by XSL - formatting objects (XSL-FO) and an output independent formatter1See the - About Apache FOP + It is a print formatter driven by XSL formatting objects (XSL-FO) and an output + independent formatter1See the FOP website for more information. FOP has a nice logo: diff --git a/test/accessibility/pdf/complete_painter_orig.pdf b/test/accessibility/pdf/complete_painter_orig.pdf index 856e8ca92..caf7a41f8 100644 Binary files a/test/accessibility/pdf/complete_painter_orig.pdf and b/test/accessibility/pdf/complete_painter_orig.pdf differ diff --git a/test/accessibility/pdf/complete_renderer_orig.pdf b/test/accessibility/pdf/complete_renderer_orig.pdf index bcca16076..e7e4ed6c1 100644 Binary files a/test/accessibility/pdf/complete_renderer_orig.pdf and b/test/accessibility/pdf/complete_renderer_orig.pdf differ diff --git a/test/accessibility/pdf/role_non-standard_painter_orig.pdf b/test/accessibility/pdf/role_non-standard_painter_orig.pdf new file mode 100644 index 000000000..f8b1c9d2c Binary files /dev/null and b/test/accessibility/pdf/role_non-standard_painter_orig.pdf differ diff --git a/test/accessibility/pdf/role_non-standard_renderer_orig.pdf b/test/accessibility/pdf/role_non-standard_renderer_orig.pdf new file mode 100644 index 000000000..1700d7c03 Binary files /dev/null and b/test/accessibility/pdf/role_non-standard_renderer_orig.pdf differ diff --git a/test/accessibility/pdf/role_painter_orig.pdf b/test/accessibility/pdf/role_painter_orig.pdf new file mode 100644 index 000000000..1eaaf1a9d Binary files /dev/null and b/test/accessibility/pdf/role_painter_orig.pdf differ diff --git a/test/accessibility/pdf/role_renderer_orig.pdf b/test/accessibility/pdf/role_renderer_orig.pdf new file mode 100644 index 000000000..fc8e002a8 Binary files /dev/null and b/test/accessibility/pdf/role_renderer_orig.pdf differ diff --git a/test/accessibility/role.fo b/test/accessibility/role.fo new file mode 100644 index 000000000..e284cde9c --- /dev/null +++ b/test/accessibility/role.fo @@ -0,0 +1,125 @@ + + + + + + + + + + + + Title 1: To Start With + Title 2: A Sub-Title + Apache FOP (Formatting Objects Processor) is a print formatter + driven by XSL formatting objects (XSL-FO) and an output independent formatter. It is a Java + application that reads a formatting object (FO) tree and renders the resulting pages to a + specified output. + Title 2: Another Sub-Title + Apache FOP (Formatting Objects Processor) is a print formatter + driven by XSL formatting objects (XSL-FO) and an output independent formatter. It is a Java + application that reads a formatting object (FO) tree and renders the resulting pages to a + specified output. + Title 1: Second Title + Title 2: A Sample Table + See data below: + + + + + Header 1 + + + Header 2 + + + + + + + Footer 1 + + + Footer 2 + + + + + + + Cell 1.1 + + + Cell 1.2 + + + + + Cell 2.1 + + + Cell 2.2 + + + + + That’s all folks. + + + diff --git a/test/accessibility/role_non-standard.fo b/test/accessibility/role_non-standard.fo new file mode 100644 index 000000000..b6641aa2b --- /dev/null +++ b/test/accessibility/role_non-standard.fo @@ -0,0 +1,125 @@ + + + + + + + + + + + + Title 1: To Start With + A Sub-Title With a Non-Standard Role + Apache FOP (Formatting Objects Processor) is a print formatter + driven by XSL formatting objects (XSL-FO) and an output independent formatter. It is a Java + application that reads a formatting object (FO) tree and renders the resulting pages to a + specified output. + Title 2: Another Sub-Title + Apache FOP (Formatting Objects Processor) is a print formatter + driven by XSL formatting objects (XSL-FO) and an output independent formatter. It is a Java + application that reads a formatting object (FO) tree and renders the resulting pages to a + specified output. + Title 1: Second Title + Title 2: A Sample Table + See data below: + + + + + Header 1 + + + Header 2 + + + + + + + Footer 1 + + + Footer 2 + + + + + + + Cell 1.1 + + + Cell 1.2 + + + + + Cell 2.1 + + + Cell 2.2 + + + + + That’s all folks. + + + -- cgit v1.2.3