<changes>
<release version="3.7-beta2" date="2010-??-??">
+ <action dev="POI-DEVELOPERS" type="fix">49446 - Don't consider 17.16.23 field codes as properly part of the paragraph's text</action>
<action dev="POI-DEVELOPERS" type="fix">XSLFSlideShow shouldn't break on .thmx (theme) files. Support for them is still very limited though</action>
</release>
<release version="3.7-beta1" date="2010-06-20">
while (c.toNextSelection()) {
XmlObject o = c.getObject();
if (o instanceof CTText) {
- text.append(((CTText) o).getStringValue());
+ String tagName = o.getDomNode().getNodeName();
+ // Field Codes (w:instrText, defined in spec sec. 17.16.23)
+ // come up as instances of CTText, but we don't want them
+ // in the normal text output
+ if (!"w:instrText".equals(tagName)) {
+ text.append(((CTText) o).getStringValue());
+ }
}
if (o instanceof CTPTab) {
text.append("\t");
// Now check the first paragraph in total
assertTrue(extractor.getText().contains("a\tb\n"));
}
+
+ /**
+ * The output should not contain field codes, e.g. those specified in the
+ * w:instrText tag (spec sec. 17.16.23)
+ */
+ public void testNoFieldCodes() {
+ XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("FieldCodes.docx");
+ XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
+ String text = extractor.getText();
+ assertTrue(text.length() > 0);
+ assertFalse(text.contains("AUTHOR"));
+ assertFalse(text.contains("CREATEDATE"));
+ }
}