]> source.dussan.org Git - poi.git/commitdiff
Fix bug #49189 - Detect w:tab and w:cr entries in XWPF paragraphs, even when the...
authorNick Burch <nick@apache.org>
Tue, 25 May 2010 20:31:42 +0000 (20:31 +0000)
committerNick Burch <nick@apache.org>
Tue, 25 May 2010 20:31:42 +0000 (20:31 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@948199 13f79535-47bb-0310-9956-ffa450edef68

src/documentation/content/xdocs/status.xml
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java
src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
test-data/document/WithTabs.docx [new file with mode: 0644]

index 826937bee5e7250420506b238fd6bde029c82bbe..be43882899c2de5a2caa97640633d1d50c4f0f77 100644 (file)
@@ -34,6 +34,7 @@
 
     <changes>
         <release version="3.7-SNAPSHOT" date="2010-??-??">
+           <action dev="POI-DEVELOPERS" type="fix">49189 - Detect w:tab and w:cr entries in XWPF paragraphs, even when the XSD is silly and maps them to CTEmpty</action>
            <action dev="POI-DEVELOPERS" type="fix">49273 - Correct handling for Font Character Sets with indicies greater than 127</action>
            <action dev="POI-DEVELOPERS" type="add">49334 - Track the ValueRangeRecords of charts in HSSFChart, to allow the basic axis operations</action>
            <action dev="POI-DEVELOPERS" type="add">49242 - Track the LinkDataRecords of charts in HSSFChart</action>
index 3bdf4c365ebe3e82002bf10473684e63c1e614c5..b09cc1edd2ebf9decc511dcdfdae6fd3f0cac80a 100644 (file)
@@ -24,6 +24,7 @@ import org.apache.xmlbeans.XmlCursor;
 import org.apache.xmlbeans.XmlObject;
 import org.apache.poi.util.Internal;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBorder;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTEmpty;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTFtnEdnRef;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTInd;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTJc;
@@ -108,6 +109,18 @@ public class XWPFParagraph {
                     if (o instanceof CTPTab) {
                         text.append("\t");
                     }
+                    if (o instanceof CTEmpty) {
+                       // Some inline text elements get returned not as
+                       //  themselves, but as CTEmpty, owing to some odd
+                       //  definitions around line 5642 of the XSDs
+                       String tagName = o.getDomNode().getNodeName();
+                       if ("w:tab".equals(tagName)) {
+                          text.append("\t");
+                       }
+                       if ("w:cr".equals(tagName)) {
+                          text.append("\n");
+                       }
+                    }
                     //got a reference to a footnote
                     if (o instanceof CTFtnEdnRef) {
                         CTFtnEdnRef ftn = (CTFtnEdnRef) o;
index ddc0957aa0c39e624a5591d3539999e13f2c4d16..f98d5e69ca029529a4e05a038e00f56e70b64021 100644 (file)
@@ -219,4 +219,22 @@ public class TestXWPFWordExtractor extends TestCase {
         assertTrue(extractor.getText().contains("2008"));
         assertTrue(extractor.getText().contains("(120 "));
     }
+    
+    /**
+     * Test that we handle things like tabs and
+     *  carriage returns properly in the text that
+     *  we're extracting (bug #49189)
+     */
+    public void testDocTabs() {
+       XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("WithTabs.docx");
+       XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
+       // Check bits
+       assertTrue(extractor.getText().contains("a"));
+       assertTrue(extractor.getText().contains("\t"));
+       assertTrue(extractor.getText().contains("b"));
+       
+       // Now check the first paragraph in total
+       assertTrue(extractor.getText().contains("a\tb\n"));
+    }
 }
diff --git a/test-data/document/WithTabs.docx b/test-data/document/WithTabs.docx
new file mode 100644 (file)
index 0000000..e9b3a7a
Binary files /dev/null and b/test-data/document/WithTabs.docx differ