diff options
-rw-r--r-- | src/documentation/content/xdocs/status.xml | 1 | ||||
-rw-r--r-- | src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java | 24 | ||||
-rw-r--r-- | src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java | 7 | ||||
-rwxr-xr-x | src/scratchpad/testcases/org/apache/poi/hwpf/data/delins.docx | bin | 0 -> 17720 bytes |
4 files changed, 18 insertions, 14 deletions
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index e935ae62c6..91b9cdede0 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -33,6 +33,7 @@ <changes> <release version="3.5-beta7" date="2009-??-??"> + <action dev="POI-DEVELOPERS" type="fix">47571 - Fixed XWPFWordExtractor to extract inserted/deleted text</action> <action dev="POI-DEVELOPERS" type="fix">47548 - Fixed RecordFactoryInputStream to properly read continued DrawingRecords</action> <action dev="POI-DEVELOPERS" type="fix">46419 - Fixed compatibility issue with OpenOffice 3.0</action> <action dev="POI-DEVELOPERS" type="fix">47559 - Fixed compatibility issue with Excel 2008 Mac sp2</action> diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java index 81a4add75e..2d0caf1b5f 100644 --- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java +++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java @@ -18,6 +18,7 @@ package org.apache.poi.xwpf.usermodel; import java.math.BigInteger; import java.util.ArrayList; +import java.util.Arrays; import org.apache.xmlbeans.XmlCursor; import org.apache.xmlbeans.XmlObject; @@ -56,23 +57,18 @@ public class XWPFParagraph { // TODO - replace this with some sort of XPath expression // to directly find all the CTRs, in the right order ArrayList<CTR> rs = new ArrayList<CTR>(); - CTR[] tmp; + rs.addAll(Arrays.asList(paragraph.getRArray())); - // Get the main text runs - tmp = paragraph.getRArray(); - for (int i = 0; i < tmp.length; i++) { - rs.add(tmp[i]); + for (CTSdtRun sdt : paragraph.getSdtArray()) { + CTSdtContentRun run = sdt.getSdtContent(); + rs.addAll(Arrays.asList(run.getRArray())); + } + for (CTRunTrackChange c : paragraph.getDelArray()) { + rs.addAll(Arrays.asList(c.getRArray())); } - // Not sure quite what these are, but they hold - // more text runs - CTSdtRun[] sdts = paragraph.getSdtArray(); - for (int i = 0; i < sdts.length; i++) { - CTSdtContentRun run = sdts[i].getSdtContent(); - tmp = run.getRArray(); - for (int j = 0; j < tmp.length; j++) { - rs.add(tmp[j]); - } + for (CTRunTrackChange c : paragraph.getInsArray()) { + rs.addAll(Arrays.asList(c.getRArray())); } // Get text of the paragraph diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java index e923c40fcb..fe80baff10 100644 --- a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java @@ -192,6 +192,13 @@ public class TestXWPFWordExtractor extends TestCase { assertTrue(extractor.getText().contains("XXX")); } + public void testInsertedDeletedText() throws Exception { + XWPFDocument doc = open("delins.docx"); + XWPFWordExtractor extractor = new XWPFWordExtractor(doc); + + assertTrue(extractor.getText().contains("pendant worn")); + assertTrue(extractor.getText().contains("extremely well")); + } //TODO use the same logic for opening test files as in HSSFTestDataSamples private XWPFDocument open(String sampleFileName) throws IOException { diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/data/delins.docx b/src/scratchpad/testcases/org/apache/poi/hwpf/data/delins.docx Binary files differnew file mode 100755 index 0000000000..b53069135b --- /dev/null +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/data/delins.docx |