]> source.dussan.org Git - xmlgraphics-fop.git/commitdiff
Bugzilla #54081: properly tag hyphenated words in PDF output when accessibility is...
authorVincent Hennebert <vhennebert@apache.org>
Fri, 2 Nov 2012 20:47:32 +0000 (20:47 +0000)
committerVincent Hennebert <vhennebert@apache.org>
Fri, 2 Nov 2012 20:47:32 +0000 (20:47 +0000)
git-svn-id: https://svn.apache.org/repos/asf/xmlgraphics/fop/trunk@1405158 13f79535-47bb-0310-9956-ffa450edef68

35 files changed:
src/documentation/intermediate-format-ng/fop-intermediate-format-ng-content.xsd
src/java/org/apache/fop/area/inline/TextArea.java
src/java/org/apache/fop/layoutmgr/inline/TextLayoutManager.java
src/java/org/apache/fop/render/intermediate/IFContext.java
src/java/org/apache/fop/render/intermediate/IFParser.java
src/java/org/apache/fop/render/intermediate/IFRenderer.java
src/java/org/apache/fop/render/intermediate/IFSerializer.java
src/java/org/apache/fop/render/pdf/PDFContentGenerator.java
src/java/org/apache/fop/render/pdf/PDFPainter.java
status.xml
test/intermediate/hyphenation.xml [new file with mode: 0644]
test/pdf/accessibility/hyphenation.fo [new file with mode: 0644]
test/pdf/accessibility/pdf/background-image_jpg_repeat.pdf
test/pdf/accessibility/pdf/background-image_jpg_single.pdf
test/pdf/accessibility/pdf/background-image_png_repeat.pdf
test/pdf/accessibility/pdf/background-image_png_single.pdf
test/pdf/accessibility/pdf/background-image_svg_repeat.pdf
test/pdf/accessibility/pdf/background-image_svg_single.pdf
test/pdf/accessibility/pdf/complete.pdf
test/pdf/accessibility/pdf/hyphenation.pdf [new file with mode: 0644]
test/pdf/accessibility/pdf/image_jpg.pdf
test/pdf/accessibility/pdf/image_png.pdf
test/pdf/accessibility/pdf/image_svg.pdf
test/pdf/accessibility/pdf/image_wmf.pdf
test/pdf/accessibility/pdf/language.pdf
test/pdf/accessibility/pdf/leader.pdf
test/pdf/accessibility/pdf/links.pdf
test/pdf/accessibility/pdf/role.pdf
test/pdf/accessibility/pdf/role_non-standard.pdf
test/pdf/accessibility/pdf/side-regions.pdf
test/pdf/accessibility/pdf/table_row-col-span.pdf
test/pdf/accessibility/pdf/text_1.pdf
test/pdf/accessibility/pdf/text_2.pdf
test/pdf/accessibility/pdf/text_font-embedding.pdf
test/pdf/accessibility/pdf/th_scope.pdf

index 5e58c820837212f45eca66cb0a5468f674761152..d6f0c694c46649acd6c89fba4dec3ae0e07fd8ab 100644 (file)
@@ -68,6 +68,7 @@
               <xs:attribute name="word-spacing" type="mf:lengthType"/>
               <xs:attribute name="dx" type="mf:lengthListType"/>
               <xs:attribute name="dp" type="mf:dpListType"/>
+              <xs:attribute name="hyphenated" type="xs:boolean"/>
             </xs:extension>
           </xs:simpleContent>
         </xs:complexType>
index 1d27827f7eed4236885a07c90a1adb163e86193a..66854f7f204c55df6b5ee888c80b886dd5b4d935 100644 (file)
@@ -30,6 +30,8 @@ public class TextArea extends AbstractTextArea {
 
     private static final long serialVersionUID = 7315900267242540809L;
 
+    private boolean isHyphenated;
+
     /**
      * Create a text inline area
      */
@@ -116,6 +118,20 @@ public class TextArea extends AbstractTextArea {
         updateLevel(level);
     }
 
+    /**
+     * Records that the last word in this text area is hyphenated.
+     */
+    public void setHyphenated() {
+        this.isHyphenated = true;
+    }
+
+    /**
+     * Returns {@code true} if the last word in this area is hyphenated.
+     */
+    public boolean isHyphenated() {
+        return isHyphenated;
+    }
+
     /**
      * Get the whole text string.
      * Renderers whose space adjustment handling is not affected
index c2f69cf98eb3ae0f56a483911bdf7b9d8a630bf6..573019bf614a858e99e9922d96d28cb4b5a580a3 100644 (file)
@@ -624,6 +624,7 @@ public class TextLayoutManager extends LeafNodeLayoutManager {
             wordChars.append(foText.getCommonHyphenation().getHyphChar(font));
             // [TBD] expand bidi word levels, letter space adjusts, gpos adjusts
             // [TBD] [GA] problematic in bidi context... what is level of hyphen?
+            textArea.setHyphenated();
         }
 
         /**
index 62bbbe9c5c8b5cca8a4ffec078f0ca8cbb58c6f4..fda0cff3b01867c2b0b004c04e2e2e9029105869 100644 (file)
@@ -53,6 +53,8 @@ public class IFContext {
 
     private String location;
 
+    private boolean hyphenated;
+
     /**
      * Main constructor.
      * @param ua the user agent
@@ -200,4 +202,18 @@ public class IFContext {
         return location;
     }
 
+    /**
+     * Records that the last text in the currently processed text area is hyphenated.
+     */
+    public void setHyphenated(boolean hyphenated) {
+        this.hyphenated = hyphenated;
+    }
+
+    /**
+     * Returns {@code true} if the last text in the currently processed text area is hyphenated.
+     */
+    public boolean isHyphenated() {
+        return hyphenated;
+    }
+
 }
index 24b7a924cb397b76e733fc0d668b1858edcd5b7a..5753dad014d7b1d2ab55075e438c1e8858ca94e8 100644 (file)
@@ -633,7 +633,12 @@ public class IFParser implements IFConstants {
                     dp = IFUtil.convertDXToDP ( dx );
                 }
                 establishStructureTreeElement(lastAttributes);
+                boolean isHyphenated = Boolean.valueOf(lastAttributes.getValue("hyphenated"));
+                if (isHyphenated) {
+                    documentHandler.getContext().setHyphenated(isHyphenated);
+                }
                 painter.drawText(x, y, letterSpacing, wordSpacing, dp, content.toString());
+                documentHandler.getContext().setHyphenated(false);
                 resetStructureTreeElement();
             }
 
index 592a57ebeed917538d8e58d7db7d3055a570605e..d1e2217c77f2c01de5f86772b4ed2e616f63e796 100644 (file)
@@ -1032,10 +1032,12 @@ public class IFRenderer extends AbstractPathOrientedRenderer {
         textUtil.flush();
         textUtil.setStartPosition(rx, bl);
         textUtil.setSpacing(text.getTextLetterSpaceAdjust(), text.getTextWordSpaceAdjust());
+        documentHandler.getContext().setHyphenated(text.isHyphenated());
         super.renderText(text);
 
         textUtil.flush();
         renderTextDecoration(tf, size, text, bl, rx);
+        documentHandler.getContext().setHyphenated(false);
         resetStructurePointer();
     }
 
index 24239e5f4bb4913920620065476a8b6aac198816..f8882f1e0eb5f3ea608bc8fabec263e6d4f7ea81 100644 (file)
@@ -678,6 +678,9 @@ implements IFConstants, IFPainter, IFDocumentNavigationHandler {
                 }
             }
             addStructureReference(atts);
+            if (getContext().isHyphenated()) {
+                addAttribute(atts, "hyphenated", "true");
+            }
             handler.startElement(EL_TEXT, atts);
             char[] chars = text.toCharArray();
             handler.characters(chars, 0, chars.length);
index 7a6e4e3d20c0e0f8792904a036e955682bc2ae5d..f2033907894e7f0fc59ba8ff597be3f8d345e058 100644 (file)
@@ -32,6 +32,7 @@ import org.apache.fop.pdf.PDFNumber;
 import org.apache.fop.pdf.PDFPaintingState;
 import org.apache.fop.pdf.PDFResourceContext;
 import org.apache.fop.pdf.PDFStream;
+import org.apache.fop.pdf.PDFText;
 import org.apache.fop.pdf.PDFTextUtil;
 import org.apache.fop.pdf.PDFXObject;
 
@@ -171,17 +172,36 @@ public class PDFContentGenerator {
     }
 
     /**
-     * Begins a new marked content sequence (BDC or BMC). If the parameter structElemType is null,
-     * the sequenceNum is ignored and instead of a BDC with the MCID as parameter, an "Artifact"
-     * and a BMC command is generated.
-     * @param structElemType Structure Element Type
-     * @param mcid    Sequence number
+     * Begins a new marked content sequence (BDC or BMC). If {@code structElemType} is
+     * null, a BMC operator with an "Artifact" tag is generated. Otherwise, a BDC operator
+     * with {@code structElemType} as a tag is generated, and the given mcid stored in its
+     * property list.
+     *
+     * @param structElemType the type of the associated structure element
+     * @param mcid the marked content identifier
      */
     protected void beginMarkedContentSequence(String structElemType, int mcid) {
+        beginMarkedContentSequence(structElemType, mcid, null);
+    }
+
+    /**
+     * Begins a new marked content sequence (BDC or BMC). If {@code structElemType} is
+     * null, a BMC operator with an "Artifact" tag is generated. Otherwise, a BDC operator
+     * with {@code structElemType} as a tag is generated, and the given mcid and actual
+     * text are stored in its property list.
+     *
+     * @param structElemType the type of the associated structure element
+     * @param mcid the marked content identifier
+     * @param actualText the replacement text for the marked content
+     */
+    protected void beginMarkedContentSequence(String structElemType, int mcid, String actualText) {
         assert !this.inMarkedContentSequence;
         assert !this.inArtifactMode;
         if (structElemType != null) {
-            currentStream.add(structElemType + " <</MCID " + String.valueOf(mcid) + ">>\n"
+            String actualTextProperty = actualText == null ? ""
+                    : " /ActualText " + PDFText.escapeText(actualText);
+            currentStream.add(structElemType + " <</MCID " + String.valueOf(mcid)
+                    + actualTextProperty + ">>\n"
                     + "BDC\n");
         } else {
             currentStream.add("/Artifact\nBMC\n");
@@ -230,21 +250,6 @@ public class PDFContentGenerator {
         currentState.restore();
     }
 
-    /**
-     * Separates 2 text elements, ending the current marked content sequence and
-     * starting a new one.
-     *
-     * @param structElemType structure element type
-     * @param mcid sequence number
-     * @see #beginMarkedContentSequence(String, int)
-     */
-    protected void separateTextElements(String structElemType, int mcid) {
-        textutil.endTextObject();
-        endMarkedContentSequence();
-        beginMarkedContentSequence(structElemType, mcid);
-        textutil.beginTextObject();
-    }
-
     /** Indicates the beginning of a text object. */
     protected void beginTextObject() {
         if (!textutil.isInTextObject()) {
@@ -261,8 +266,21 @@ public class PDFContentGenerator {
      * @see #beginMarkedContentSequence(String, int)
      */
     protected void beginTextObject(String structElemType, int mcid) {
+        beginTextObject(structElemType, mcid, null);
+    }
+
+    /**
+     * Indicates the beginning of a marked-content text object.
+     *
+     * @param structElemType structure element type
+     * @param mcid sequence number
+     * @param actualText the replacement text for the marked content
+     * @see #beginTextObject()
+     * @see #beginMarkedContentSequence(String, int, String))
+     */
+    protected void beginTextObject(String structElemType, int mcid, String actualText) {
         if (!textutil.isInTextObject()) {
-            beginMarkedContentSequence(structElemType, mcid);
+            beginMarkedContentSequence(structElemType, mcid, actualText);
             textutil.beginTextObject();
         }
     }
index 7b636fddd9f6e95bd63346b4cb4521807c45039b..d71c6aff48f36bbaf4dfca162050535ca7b10dee 100644 (file)
@@ -343,11 +343,10 @@ public class PDFPainter extends AbstractIFPainter<PDFDocumentHandler> {
             PDFStructElem structElem = (PDFStructElem) getContext().getStructureTreeElement();
             languageAvailabilityChecker.checkLanguageAvailability(text);
             MarkedContentInfo mci = logicalStructureHandler.addTextContentItem(structElem);
-            if (generator.getTextUtil().isInTextObject()) {
-                generator.separateTextElements(mci.tag, mci.mcid);
-            }
+            String actualText = getContext().isHyphenated() ? text.substring(0, text.length() - 1) : null;
+            generator.endTextObject();
             generator.updateColor(state.getTextColor(), true, null);
-            generator.beginTextObject(mci.tag, mci.mcid);
+            generator.beginTextObject(mci.tag, mci.mcid, actualText);
         } else {
             generator.updateColor(state.getTextColor(), true, null);
             generator.beginTextObject();
index d4a2e63bd94d4ed672b6560c381698c8988cfc74..bcf8c2475610f73a09a7b06b4b17d0aaecbfb2de 100644 (file)
@@ -59,6 +59,9 @@
       documents. Example: the fix of marks layering will be such a case when it's done.
     -->
     <release version="FOP Trunk" date="TBD">
+      <action context="Renderers" dev="VH" type="add" fixes-bug="54081">
+        PDF accessibility: properly tag hyphenated words.
+      </action>
       <action context="Code" dev="CB" type="fix" fixes-bug="48955" due-to="Peter Hancock">
         Allow AFP font codepage names to be less than 8 chars
       </action>
diff --git a/test/intermediate/hyphenation.xml b/test/intermediate/hyphenation.xml
new file mode 100644 (file)
index 0000000..307f456
--- /dev/null
@@ -0,0 +1,56 @@
+<?xml version="1.0" standalone="no"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<testcase>
+  <info>
+    <p>
+      This test checks that lines ending with a hyphenated word are properly marked as such.
+    </p>
+  </info>
+  <fo>
+    <fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format" xml:lang="en-US">
+      <fo:layout-master-set>
+        <fo:simple-page-master master-name="page"
+          page-height="170pt" page-width="220pt" margin="10pt">
+          <fo:region-body display-align="center"/>
+        </fo:simple-page-master>
+      </fo:layout-master-set>
+      <fo:page-sequence master-reference="page" hyphenate="true">
+        <fo:flow flow-name="xsl-region-body" text-align="justify">
+          <fo:block-container width="140pt" start-indent="30pt">
+            <fo:block start-indent="0">Hyphenated text. Hyphenated text. Hyphenated text. Hyphenated 
+              text. Hyphenated text.</fo:block>
+          </fo:block-container>
+          <fo:block-container width="140pt" space-before="10pt" start-indent="30pt">
+            <fo:block start-indent="0">Hyphenated text with ‘special’ character. Hyphenated text 
+              with ‘special’ character.</fo:block>
+          </fo:block-container>
+        </fo:flow>
+      </fo:page-sequence>
+    </fo:root>
+  </fo>
+  <if-checks xmlns:if="http://xmlgraphics.apache.org/fop/intermediate">
+    <eval expected="true" xpath="/descendant::if:text[1]/@hyphenated"/>
+    <eval expected=""     xpath="/descendant::if:text[2]/@hyphenated"/>
+    <eval expected="true" xpath="/descendant::if:text[3]/@hyphenated"/>
+    <eval expected=""     xpath="/descendant::if:text[4]/@hyphenated"/>
+    <eval expected="true" xpath="/descendant::if:text[5]/@hyphenated"/>
+    <eval expected="true" xpath="/descendant::if:text[6]/@hyphenated"/>
+    <eval expected="true" xpath="/descendant::if:text[7]/@hyphenated"/>
+    <eval expected=""     xpath="/descendant::if:text[8]/@hyphenated"/>
+  </if-checks>
+</testcase>
diff --git a/test/pdf/accessibility/hyphenation.fo b/test/pdf/accessibility/hyphenation.fo
new file mode 100644 (file)
index 0000000..28a3b76
--- /dev/null
@@ -0,0 +1,21 @@
+<?xml version="1.0"?>
+<fo:root xmlns:fo="http://www.w3.org/1999/XSL/Format" xml:lang="en-US">
+  <fo:layout-master-set>
+    <fo:simple-page-master master-name="page"
+      page-height="170pt" page-width="220pt" margin="10pt">
+      <fo:region-body display-align="center"/>
+    </fo:simple-page-master>
+  </fo:layout-master-set>
+  <fo:page-sequence master-reference="page" hyphenate="true">
+    <fo:flow flow-name="xsl-region-body" text-align="justify">
+      <fo:block-container width="140pt" start-indent="30pt">
+        <fo:block start-indent="0">Hyphenated text. Hyphenated text. Hyphenated text. Hyphenated 
+          text. Hyphenated text.</fo:block>
+      </fo:block-container>
+      <fo:block-container width="140pt" space-before="10pt" start-indent="30pt">
+        <fo:block start-indent="0">Hyphenated text with ‘special’ character. Hyphenated text with 
+          ‘special’ character.</fo:block>
+      </fo:block-container>
+    </fo:flow>
+  </fo:page-sequence>
+</fo:root>
index bf3cdccadbeb9a06a9e7d0bcc5ab8bfeee25afb7..65f04cdef6dc79ca4b12c60c7db7bb3323155dd5 100644 (file)
Binary files a/test/pdf/accessibility/pdf/background-image_jpg_repeat.pdf and b/test/pdf/accessibility/pdf/background-image_jpg_repeat.pdf differ
index 4d40ebc7adfc8a804fb9d352ca648d60952d5796..6803ba999f339884d61a81c5b120db5ce25d0851 100644 (file)
Binary files a/test/pdf/accessibility/pdf/background-image_jpg_single.pdf and b/test/pdf/accessibility/pdf/background-image_jpg_single.pdf differ
index 7847bcedde7e5374df56d8e10b7fb8407f2b805a..d42d935aa1165ba42966e62c8cf4c3059334374d 100644 (file)
Binary files a/test/pdf/accessibility/pdf/background-image_png_repeat.pdf and b/test/pdf/accessibility/pdf/background-image_png_repeat.pdf differ
index 9458b54f8875e36786427f6ce19d54b53a22fa47..ba39a2b31f7322877d196b3b81b00fecfca48fcf 100644 (file)
Binary files a/test/pdf/accessibility/pdf/background-image_png_single.pdf and b/test/pdf/accessibility/pdf/background-image_png_single.pdf differ
index 0921d734f6f18a4f99f9361b4a48119cdb24b6f7..1de4c98a147cc2d44497463a42c376a7879db78e 100644 (file)
Binary files a/test/pdf/accessibility/pdf/background-image_svg_repeat.pdf and b/test/pdf/accessibility/pdf/background-image_svg_repeat.pdf differ
index 9c8af4fb6a7c5c513b5e286a90d68040e1797403..b89ea182ae1e95d94159ce5ed59cea90b692e2a4 100644 (file)
Binary files a/test/pdf/accessibility/pdf/background-image_svg_single.pdf and b/test/pdf/accessibility/pdf/background-image_svg_single.pdf differ
index a9bb3df778058759fb011eff424d053dbd2a3386..9b4df8c400b712a05b624133baa0c09546acf2cc 100644 (file)
Binary files a/test/pdf/accessibility/pdf/complete.pdf and b/test/pdf/accessibility/pdf/complete.pdf differ
diff --git a/test/pdf/accessibility/pdf/hyphenation.pdf b/test/pdf/accessibility/pdf/hyphenation.pdf
new file mode 100644 (file)
index 0000000..d8552f6
Binary files /dev/null and b/test/pdf/accessibility/pdf/hyphenation.pdf differ
index 21bae38267eca10ce1eb1e5f4ca19ca28463b652..e35768d8ae6a3d9b93c35c08986a187cfe56486a 100644 (file)
Binary files a/test/pdf/accessibility/pdf/image_jpg.pdf and b/test/pdf/accessibility/pdf/image_jpg.pdf differ
index 5bd99a6234ab66af4a042b6227358c88ad0bb9f6..a33d2ed3f4746be7e2ba8b05012d81779335a8f7 100644 (file)
Binary files a/test/pdf/accessibility/pdf/image_png.pdf and b/test/pdf/accessibility/pdf/image_png.pdf differ
index cc0a3ebba3d874f64e59fd2895ead4a4610b3dfd..1184ddef9954858234c43282422152fb4652642c 100644 (file)
Binary files a/test/pdf/accessibility/pdf/image_svg.pdf and b/test/pdf/accessibility/pdf/image_svg.pdf differ
index 368afe60d1b72d456cebe486d4e687e7c8aa4240..43c15d9ea6e1f6c3911619aa56ea965dfb7d562b 100644 (file)
Binary files a/test/pdf/accessibility/pdf/image_wmf.pdf and b/test/pdf/accessibility/pdf/image_wmf.pdf differ
index ee4cd0b2d78badae2a57e6f24440e613d6828ab6..48a5f9f52b16f2a3347e9142f8b57e54bf57145a 100644 (file)
Binary files a/test/pdf/accessibility/pdf/language.pdf and b/test/pdf/accessibility/pdf/language.pdf differ
index 4b91dfe8e974d44d5949a9e50db24377ab78e4cc..e277699e5c50d2eeae1a0abe1d2a879b37084c37 100644 (file)
Binary files a/test/pdf/accessibility/pdf/leader.pdf and b/test/pdf/accessibility/pdf/leader.pdf differ
index d2ff84ea4478cbf4c7dc546efd39eeea1b678306..896620bfb393b63c30412a361e2db884db46fea2 100644 (file)
Binary files a/test/pdf/accessibility/pdf/links.pdf and b/test/pdf/accessibility/pdf/links.pdf differ
index 8fb665b796ed86597862f5fa584a36b64bf36138..6e26032ff1920a6cf31106b559ce97e30209c47f 100644 (file)
Binary files a/test/pdf/accessibility/pdf/role.pdf and b/test/pdf/accessibility/pdf/role.pdf differ
index 9effef7935ed8f61e84311a4ad5ff6c85a09deb6..edf7541df069d1b873f61c01f57b1876bf3c8be9 100644 (file)
Binary files a/test/pdf/accessibility/pdf/role_non-standard.pdf and b/test/pdf/accessibility/pdf/role_non-standard.pdf differ
index 1bd64e115bca1eb00949dcfca60194d4fe68a3bd..22364c2c827e854fa6cebc37e51df63799fae94a 100644 (file)
Binary files a/test/pdf/accessibility/pdf/side-regions.pdf and b/test/pdf/accessibility/pdf/side-regions.pdf differ
index 603717d2ef3d32d7d5c2bf443c7be6a0aa52bc72..eec14fa3d8d7fa6166b4a2d9af831d707b564f6e 100644 (file)
Binary files a/test/pdf/accessibility/pdf/table_row-col-span.pdf and b/test/pdf/accessibility/pdf/table_row-col-span.pdf differ
index bacd7a9d06e23e240b035823c436f6a06afda44f..f3daa44b0baeaff71e6ae03f5b2263ad63dec91d 100644 (file)
Binary files a/test/pdf/accessibility/pdf/text_1.pdf and b/test/pdf/accessibility/pdf/text_1.pdf differ
index 4411b302f6c738c2323357418b0ef380c6667ca6..5a2abcf2f8a8926801efa9cdedd1409d680109c6 100644 (file)
Binary files a/test/pdf/accessibility/pdf/text_2.pdf and b/test/pdf/accessibility/pdf/text_2.pdf differ
index 47ca60bdbd0ca1a32901532325e9c30c76d3d407..e7f75b5a3da0f3aa17298be245048176db6475a6 100644 (file)
Binary files a/test/pdf/accessibility/pdf/text_font-embedding.pdf and b/test/pdf/accessibility/pdf/text_font-embedding.pdf differ
index 27159520b60fb7f3f831d274ab48da7a329c6c11..9d19548d7ce6ff42262d00f445b766a72cd2d8b5 100644 (file)
Binary files a/test/pdf/accessibility/pdf/th_scope.pdf and b/test/pdf/accessibility/pdf/th_scope.pdf differ