From: Nick Burch Date: Tue, 29 Jun 2010 13:38:24 +0000 (+0000) Subject: Fix bug #49446 - Don't consider 17.16.23 field codes as properly part of the paragrap... X-Git-Tag: REL_3_7_BETA2~49 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=f69404de8d0b8d974b9eefe8411f1ce7620bb15c;p=poi.git Fix bug #49446 - Don't consider 17.16.23 field codes as properly part of the paragraph's text git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@958965 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 12b8a3830e..89dc67248e 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 49446 - Don't consider 17.16.23 field codes as properly part of the paragraph's text XSLFSlideShow shouldn't break on .thmx (theme) files. Support for them is still very limited though diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java index 6aabadef95..e0741e782a 100644 --- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java +++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java @@ -133,7 +133,13 @@ public class XWPFParagraph implements IBodyElement{ while (c.toNextSelection()) { XmlObject o = c.getObject(); if (o instanceof CTText) { - text.append(((CTText) o).getStringValue()); + String tagName = o.getDomNode().getNodeName(); + // Field Codes (w:instrText, defined in spec sec. 17.16.23) + // come up as instances of CTText, but we don't want them + // in the normal text output + if (!"w:instrText".equals(tagName)) { + text.append(((CTText) o).getStringValue()); + } } if (o instanceof CTPTab) { text.append("\t"); diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java index f98d5e69ca..2277ecec9d 100644 --- a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java @@ -237,4 +237,17 @@ public class TestXWPFWordExtractor extends TestCase { // Now check the first paragraph in total assertTrue(extractor.getText().contains("a\tb\n")); } + + /** + * The output should not contain field codes, e.g. those specified in the + * w:instrText tag (spec sec. 17.16.23) + */ + public void testNoFieldCodes() { + XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("FieldCodes.docx"); + XWPFWordExtractor extractor = new XWPFWordExtractor(doc); + String text = extractor.getText(); + assertTrue(text.length() > 0); + assertFalse(text.contains("AUTHOR")); + assertFalse(text.contains("CREATEDATE")); + } } diff --git a/test-data/document/FieldCodes.docx b/test-data/document/FieldCodes.docx new file mode 100644 index 0000000000..3db97f5ce7 Binary files /dev/null and b/test-data/document/FieldCodes.docx differ