]> source.dussan.org Git - poi.git/commitdiff
fixed XWPFWordExtractor to extract inserted/deleted text, see Bugzilla 45597
authorYegor Kozlov <yegor@apache.org>
Sat, 25 Jul 2009 10:41:53 +0000 (10:41 +0000)
committerYegor Kozlov <yegor@apache.org>
Sat, 25 Jul 2009 10:41:53 +0000 (10:41 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@797740 13f79535-47bb-0310-9956-ffa450edef68

src/documentation/content/xdocs/status.xml
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java
src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
src/scratchpad/testcases/org/apache/poi/hwpf/data/delins.docx [new file with mode: 0755]

index e935ae62c6a36781b43e2b05c3626925c3f76f45..91b9cdede07c5705a7aafd495564469bc155d6bb 100644 (file)
@@ -33,6 +33,7 @@
 
     <changes>
         <release version="3.5-beta7" date="2009-??-??">
+           <action dev="POI-DEVELOPERS" type="fix">47571 - Fixed XWPFWordExtractor to extract inserted/deleted text</action>
            <action dev="POI-DEVELOPERS" type="fix">47548 - Fixed RecordFactoryInputStream to properly read continued DrawingRecords</action>
            <action dev="POI-DEVELOPERS" type="fix">46419 - Fixed compatibility issue with OpenOffice 3.0</action>
            <action dev="POI-DEVELOPERS" type="fix">47559 - Fixed compatibility issue with Excel 2008 Mac sp2</action>
index 81a4add75e45eb3ea0bb31edf95336620284658b..2d0caf1b5f1e79450194bc463db101a40d4383fe 100644 (file)
@@ -18,6 +18,7 @@ package org.apache.poi.xwpf.usermodel;
 
 import java.math.BigInteger;
 import java.util.ArrayList;
+import java.util.Arrays;
 
 import org.apache.xmlbeans.XmlCursor;
 import org.apache.xmlbeans.XmlObject;
@@ -56,23 +57,18 @@ public class XWPFParagraph {
             // TODO - replace this with some sort of XPath expression
             // to directly find all the CTRs, in the right order
             ArrayList<CTR> rs = new ArrayList<CTR>();
-            CTR[] tmp;
+            rs.addAll(Arrays.asList(paragraph.getRArray()));
 
-            // Get the main text runs
-            tmp = paragraph.getRArray();
-            for (int i = 0; i < tmp.length; i++) {
-                rs.add(tmp[i]);
+            for (CTSdtRun sdt : paragraph.getSdtArray()) {
+                CTSdtContentRun run = sdt.getSdtContent();
+                rs.addAll(Arrays.asList(run.getRArray()));
+            }
+            for (CTRunTrackChange c : paragraph.getDelArray()) {
+                rs.addAll(Arrays.asList(c.getRArray()));
             }
 
-            // Not sure quite what these are, but they hold
-            // more text runs
-            CTSdtRun[] sdts = paragraph.getSdtArray();
-            for (int i = 0; i < sdts.length; i++) {
-                CTSdtContentRun run = sdts[i].getSdtContent();
-                tmp = run.getRArray();
-                for (int j = 0; j < tmp.length; j++) {
-                    rs.add(tmp[j]);
-                }
+            for (CTRunTrackChange c : paragraph.getInsArray()) {
+                rs.addAll(Arrays.asList(c.getRArray()));
             }
 
             // Get text of the paragraph
index e923c40fcb37c8b3b73d0f46de2656a87b38c861..fe80baff10ec402947d9d9550665de2c3cee1b96 100644 (file)
@@ -192,6 +192,13 @@ public class TestXWPFWordExtractor extends TestCase {
         assertTrue(extractor.getText().contains("XXX"));
     }
 
+    public void testInsertedDeletedText() throws Exception {
+        XWPFDocument doc = open("delins.docx");
+        XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
+
+        assertTrue(extractor.getText().contains("pendant worn"));
+        assertTrue(extractor.getText().contains("extremely well"));
+    }
 
     //TODO use the same logic for opening test files as in HSSFTestDataSamples
     private XWPFDocument open(String sampleFileName) throws IOException {
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/data/delins.docx b/src/scratchpad/testcases/org/apache/poi/hwpf/data/delins.docx
new file mode 100755 (executable)
index 0000000..b530691
Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hwpf/data/delins.docx differ