Browse Source

[Bug-61354] fix issue with extracting text from Word docs. This closes #66

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1803250 13f79535-47bb-0310-9956-ffa450edef68
tags/REL_3_17_FINAL
PJ Fanning 6 years ago
parent
commit
36d940ce39

+ 1
- 0
.gitignore View File

@@ -45,3 +45,4 @@ sonar/*/target
.ant-targets-build.xml
build
dist
lib/

+ 26
- 18
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java View File

@@ -156,26 +156,34 @@ public class XWPFDocument extends POIXMLDocument implements Document, IBody {

// parse the document with cursor and add
// the XmlObject to its lists
XmlCursor cursor = ctDocument.getBody().newCursor();
cursor.selectPath("./*");
while (cursor.toNextSelection()) {
XmlObject o = cursor.getObject();
if (o instanceof CTP) {
XWPFParagraph p = new XWPFParagraph((CTP) o, this);
bodyElements.add(p);
paragraphs.add(p);
} else if (o instanceof CTTbl) {
XWPFTable t = new XWPFTable((CTTbl) o, this);
bodyElements.add(t);
tables.add(t);
} else if (o instanceof CTSdtBlock) {
XWPFSDT c = new XWPFSDT((CTSdtBlock) o, this);
bodyElements.add(c);
contentControls.add(c);
XmlCursor docCursor = ctDocument.newCursor();
docCursor.selectPath("./*");
while (docCursor.toNextSelection()) {
XmlObject o = docCursor.getObject();
if (o instanceof CTBody) {
XmlCursor bodyCursor = o.newCursor();
bodyCursor.selectPath("./*");
while (bodyCursor.toNextSelection()) {
XmlObject bodyObj = bodyCursor.getObject();
if (bodyObj instanceof CTP) {
XWPFParagraph p = new XWPFParagraph((CTP) bodyObj,
this);
bodyElements.add(p);
paragraphs.add(p);
} else if (bodyObj instanceof CTTbl) {
XWPFTable t = new XWPFTable((CTTbl) bodyObj, this);
bodyElements.add(t);
tables.add(t);
} else if (bodyObj instanceof CTSdtBlock) {
XWPFSDT c = new XWPFSDT((CTSdtBlock) bodyObj, this);
bodyElements.add(c);
contentControls.add(c);
}
}
bodyCursor.dispose();
}
}
cursor.dispose();

docCursor.dispose();
// Sort out headers and footers
if (doc.getDocument().getBody().getSectPr() != null)
headerFooterPolicy = new XWPFHeaderFooterPolicy(this);

+ 10
- 0
src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java View File

@@ -411,4 +411,14 @@ public class TestXWPFWordExtractor extends TestCase {
"In Sequence:\n|X||_||X|\n", extractor.getText());
extractor.close();
}
public void testMultipleBodyBug() throws IOException {
XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("MultipleBodyBug.docx");
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
assertEquals("START BODY 1 The quick, brown fox jumps over a lazy dog. END BODY 1.\n"
+ "START BODY 2 The quick, brown fox jumps over a lazy dog. END BODY 2.\n"
+ "START BODY 3 The quick, brown fox jumps over a lazy dog. END BODY 3.\n",
extractor.getText());
extractor.close();
}
}

BIN
test-data/document/MultipleBodyBug.docx View File


Loading…
Cancel
Save