]> source.dussan.org Git - poi.git/commitdiff
Fix bug #49446 - Don't consider 17.16.23 field codes as properly part of the paragrap...
authorNick Burch <nick@apache.org>
Tue, 29 Jun 2010 13:38:24 +0000 (13:38 +0000)
committerNick Burch <nick@apache.org>
Tue, 29 Jun 2010 13:38:24 +0000 (13:38 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@958965 13f79535-47bb-0310-9956-ffa450edef68

src/documentation/content/xdocs/status.xml
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java
src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
test-data/document/FieldCodes.docx [new file with mode: 0644]

index 12b8a3830e9761e2ba34ae6d3b0bec22f6a80e8d..89dc67248e0ba14a1279f71f33ae5211f162e6e9 100644 (file)
@@ -34,6 +34,7 @@
 
     <changes>
         <release version="3.7-beta2" date="2010-??-??">
+           <action dev="POI-DEVELOPERS" type="fix">49446 - Don't consider 17.16.23 field codes as properly part of the paragraph's text</action>
            <action dev="POI-DEVELOPERS" type="fix">XSLFSlideShow shouldn't break on .thmx (theme) files. Support for them is still very limited though</action>
         </release>
         <release version="3.7-beta1" date="2010-06-20">
index 6aabadef95f5308f9dd9ba122a56d193cf83d909..e0741e782a5ec00cf99c28562c253a24484c040f 100644 (file)
@@ -133,7 +133,13 @@ public class XWPFParagraph implements IBodyElement{
           while (c.toNextSelection()) {
               XmlObject o = c.getObject();
               if (o instanceof CTText) {
-                  text.append(((CTText) o).getStringValue());
+                  String tagName = o.getDomNode().getNodeName();
+                  // Field Codes (w:instrText, defined in spec sec. 17.16.23)
+                  //  come up as instances of CTText, but we don't want them
+                  //  in the normal text output
+                  if (!"w:instrText".equals(tagName)) {
+                     text.append(((CTText) o).getStringValue());
+                  }
               }
               if (o instanceof CTPTab) {
                   text.append("\t");
index f98d5e69ca029529a4e05a038e00f56e70b64021..2277ecec9d8cd4a797f8a1365f8251eb3bcbfbf3 100644 (file)
@@ -237,4 +237,17 @@ public class TestXWPFWordExtractor extends TestCase {
        // Now check the first paragraph in total
        assertTrue(extractor.getText().contains("a\tb\n"));
     }
+    
+    /**
+     * The output should not contain field codes, e.g. those specified in the
+     * w:instrText tag (spec sec. 17.16.23)
+     */
+    public void testNoFieldCodes() {
+        XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("FieldCodes.docx");
+        XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
+        String text = extractor.getText();
+        assertTrue(text.length() > 0);
+        assertFalse(text.contains("AUTHOR"));
+        assertFalse(text.contains("CREATEDATE"));
+    }
 }
diff --git a/test-data/document/FieldCodes.docx b/test-data/document/FieldCodes.docx
new file mode 100644 (file)
index 0000000..3db97f5
Binary files /dev/null and b/test-data/document/FieldCodes.docx differ