aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/documentation/content/xdocs/status.xml1
-rw-r--r--src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java24
-rw-r--r--src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java7
-rwxr-xr-xsrc/scratchpad/testcases/org/apache/poi/hwpf/data/delins.docxbin0 -> 17720 bytes
4 files changed, 18 insertions, 14 deletions
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml
index e935ae62c6..91b9cdede0 100644
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -33,6 +33,7 @@
<changes>
<release version="3.5-beta7" date="2009-??-??">
+ <action dev="POI-DEVELOPERS" type="fix">47571 - Fixed XWPFWordExtractor to extract inserted/deleted text</action>
<action dev="POI-DEVELOPERS" type="fix">47548 - Fixed RecordFactoryInputStream to properly read continued DrawingRecords</action>
<action dev="POI-DEVELOPERS" type="fix">46419 - Fixed compatibility issue with OpenOffice 3.0</action>
<action dev="POI-DEVELOPERS" type="fix">47559 - Fixed compatibility issue with Excel 2008 Mac sp2</action>
diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java
index 81a4add75e..2d0caf1b5f 100644
--- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java
@@ -18,6 +18,7 @@ package org.apache.poi.xwpf.usermodel;
import java.math.BigInteger;
import java.util.ArrayList;
+import java.util.Arrays;
import org.apache.xmlbeans.XmlCursor;
import org.apache.xmlbeans.XmlObject;
@@ -56,23 +57,18 @@ public class XWPFParagraph {
// TODO - replace this with some sort of XPath expression
// to directly find all the CTRs, in the right order
ArrayList<CTR> rs = new ArrayList<CTR>();
- CTR[] tmp;
+ rs.addAll(Arrays.asList(paragraph.getRArray()));
- // Get the main text runs
- tmp = paragraph.getRArray();
- for (int i = 0; i < tmp.length; i++) {
- rs.add(tmp[i]);
+ for (CTSdtRun sdt : paragraph.getSdtArray()) {
+ CTSdtContentRun run = sdt.getSdtContent();
+ rs.addAll(Arrays.asList(run.getRArray()));
+ }
+ for (CTRunTrackChange c : paragraph.getDelArray()) {
+ rs.addAll(Arrays.asList(c.getRArray()));
}
- // Not sure quite what these are, but they hold
- // more text runs
- CTSdtRun[] sdts = paragraph.getSdtArray();
- for (int i = 0; i < sdts.length; i++) {
- CTSdtContentRun run = sdts[i].getSdtContent();
- tmp = run.getRArray();
- for (int j = 0; j < tmp.length; j++) {
- rs.add(tmp[j]);
- }
+ for (CTRunTrackChange c : paragraph.getInsArray()) {
+ rs.addAll(Arrays.asList(c.getRArray()));
}
// Get text of the paragraph
diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
index e923c40fcb..fe80baff10 100644
--- a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
+++ b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
@@ -192,6 +192,13 @@ public class TestXWPFWordExtractor extends TestCase {
assertTrue(extractor.getText().contains("XXX"));
}
+ public void testInsertedDeletedText() throws Exception {
+ XWPFDocument doc = open("delins.docx");
+ XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
+
+ assertTrue(extractor.getText().contains("pendant worn"));
+ assertTrue(extractor.getText().contains("extremely well"));
+ }
//TODO use the same logic for opening test files as in HSSFTestDataSamples
private XWPFDocument open(String sampleFileName) throws IOException {
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/data/delins.docx b/src/scratchpad/testcases/org/apache/poi/hwpf/data/delins.docx
new file mode 100755
index 0000000000..b53069135b
--- /dev/null
+++ b/src/scratchpad/testcases/org/apache/poi/hwpf/data/delins.docx
Binary files differ