aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDominik Stadler <centic@apache.org>2020-03-28 09:24:38 +0000
committerDominik Stadler <centic@apache.org>2020-03-28 09:24:38 +0000
commitda2afc19e240ba057ead3f925b087b7f3e5dbd17 (patch)
tree788a11dad155ad2daf92cdf88d500f05319794bd /src
parent456dc4d368bf65c38c8da94b55acba0eb19f49dc (diff)
downloadpoi-da2afc19e240ba057ead3f925b087b7f3e5dbd17.tar.gz
poi-da2afc19e240ba057ead3f925b087b7f3e5dbd17.zip
Bug 55966: Include content control text in word extraction also if it is part of a paragraph
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1875802 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src')
-rw-r--r--src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java9
-rw-r--r--src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java37
2 files changed, 32 insertions, 14 deletions
diff --git a/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java b/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
index cacac24323..4b61c09d19 100644
--- a/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
@@ -90,7 +90,7 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
/**
* Should we concatenate phonetic runs in extraction. Default is <code>true</code>
- * @param concatenatePhoneticRuns
+ * @param concatenatePhoneticRuns If phonetic runs should be concatenated
*/
public void setConcatenatePhoneticRuns(boolean concatenatePhoneticRuns) {
this.concatenatePhoneticRuns = concatenatePhoneticRuns;
@@ -138,9 +138,10 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
extractHeaders(text, headerFooterPolicy);
}
-
- for (IRunElement run : paragraph.getRuns()) {
- if (! concatenatePhoneticRuns && run instanceof XWPFRun) {
+ for (IRunElement run : paragraph.getIRuns()) {
+ if (run instanceof XWPFSDT) {
+ text.append(((XWPFSDT) run).getContent().getText());
+ } else if (! concatenatePhoneticRuns && run instanceof XWPFRun) {
text.append(((XWPFRun)run).text());
} else {
text.append(run);
diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
index ecab432c10..8a14b25ba0 100644
--- a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
+++ b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
@@ -17,6 +17,16 @@
package org.apache.poi.xwpf.extractor;
+import org.apache.poi.util.StringUtil;
+import org.apache.poi.xwpf.XWPFTestDataSamples;
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.junit.Test;
+
+import java.io.IOException;
+import java.util.Locale;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
import static org.apache.poi.POITestCase.assertContains;
import static org.apache.poi.POITestCase.assertEndsWith;
import static org.apache.poi.POITestCase.assertNotContained;
@@ -25,16 +35,6 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
-import java.io.IOException;
-import java.util.Locale;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import org.apache.poi.util.StringUtil;
-import org.apache.poi.xwpf.XWPFTestDataSamples;
-import org.apache.poi.xwpf.usermodel.XWPFDocument;
-import org.junit.Test;
-
/**
* Tests for HXFWordExtractor
*/
@@ -460,4 +460,21 @@ public class TestXWPFWordExtractor {
assertContains(txt, "footer 1");
}
}
+
+ @Test
+ public void bug55966() throws IOException {
+ try (XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("55966.docx")) {
+ String expected = "Content control within a paragraph is here text content from within a paragraph second control with a new\n" +
+ "line\n" +
+ "\n" +
+ "Content control that is the entire paragraph\n";
+
+ XWPFWordExtractor extractedDoc = new XWPFWordExtractor(doc);
+
+ String actual = extractedDoc.getText();
+
+ extractedDoc.close();
+ assertEquals(expected, actual);
+ }
+ }
}