]> source.dussan.org Git - poi.git/commitdiff
Fix bug #55733 - XWPFWordExtractor need
authorNick Burch <nick@apache.org>
Fri, 1 Nov 2013 19:43:46 +0000 (19:43 +0000)
committerNick Burch <nick@apache.org>
Fri, 1 Nov 2013 19:43:46 +0000 (19:43 +0000)
s to handle .docx files with neither headers nor footers

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1538044 13f79535-47bb-0310-9956-ffa450edef68

src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractorBugs.java
test-data/document/55733.docx [new file with mode: 0644]

index 0f4a52e6640482c40c20584960c2cf101d3bbb89..56ba0e7e8d430477af5d0bf37a586b8e19d811c5 100644 (file)
@@ -17,7 +17,6 @@
 package org.apache.poi.xwpf.extractor;
 
 import java.io.IOException;
-import java.util.Iterator;
 import java.util.List;
 
 import org.apache.poi.POIXMLDocument;
@@ -34,7 +33,6 @@ import org.apache.poi.xwpf.usermodel.XWPFHyperlink;
 import org.apache.poi.xwpf.usermodel.XWPFHyperlinkRun;
 import org.apache.poi.xwpf.usermodel.XWPFParagraph;
 import org.apache.poi.xwpf.usermodel.XWPFRelation;
-import org.apache.poi.xwpf.usermodel.XWPFRun;
 import org.apache.poi.xwpf.usermodel.XWPFSDT;
 import org.apache.poi.xwpf.usermodel.XWPFTable;
 import org.apache.poi.xwpf.usermodel.XWPFTableCell;
@@ -85,24 +83,24 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
                System.out.println(extractor.getText());
        }
        
-       public String getText() {
-               StringBuffer text = new StringBuffer();
-               XWPFHeaderFooterPolicy hfPolicy = document.getHeaderFooterPolicy();
+    public String getText() {
+        StringBuffer text = new StringBuffer();
+        XWPFHeaderFooterPolicy hfPolicy = document.getHeaderFooterPolicy();
 
-               // Start out with all headers
-               extractHeaders(text, hfPolicy);
-               
-               // body elements
-      for (IBodyElement e : document.getBodyElements()){
-         appendBodyElementText(text, e);
-         text.append('\n');
-     }
-               
-               // Finish up with all the footers
-               extractFooters(text, hfPolicy);
-               
-               return text.toString();
-       }
+        // Start out with all headers
+        extractHeaders(text, hfPolicy);
+
+        // Process all body elements
+        for (IBodyElement e : document.getBodyElements()){
+               appendBodyElementText(text, e);
+               text.append('\n');
+        }
+
+        // Finish up with all the footers
+        extractFooters(text, hfPolicy);
+
+        return text.toString();
+    }
 
    public void appendBodyElementText(StringBuffer text, IBodyElement e){
       if (e instanceof XWPFParagraph){
@@ -178,6 +176,8 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
    }
    
        private void extractFooters(StringBuffer text, XWPFHeaderFooterPolicy hfPolicy) {
+               if (hfPolicy == null) return;
+               
                if(hfPolicy.getFirstPageFooter() != null) {
                        text.append( hfPolicy.getFirstPageFooter().getText() );
                }
@@ -190,6 +190,8 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
        }
 
        private void extractHeaders(StringBuffer text, XWPFHeaderFooterPolicy hfPolicy) {
+               if (hfPolicy == null) return;
+               
                if(hfPolicy.getFirstPageHeader() != null) {
                        text.append( hfPolicy.getFirstPageHeader().getText() );
                }
index 9ad3cc102b976b380b95d1fbbcc923f5ae237068..6b9f7125ea87ec935867f7648ee70a51077e8377 100644 (file)
@@ -351,5 +351,16 @@ public class TestXWPFWordExtractor extends TestCase {
             assertEquals("controlled content loading-"+targ, true, hit);
         }
         assertEquals("controlled content loading hit count", targs.length, hits);
+        ex.close();
+    }
+
+    /** No Header or Footer in document */
+    public void testBug55733() throws Exception {
+        XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("55733.docx");
+       XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
+
+        // Check it gives text without error
+        extractor.getText();
+        extractor.close();
     }
 }
index 4af076e5ed5277c76cfc00e6831ca6e79a7d3258..f236b967416a44a3b40dd105a7bd91823f195578 100644 (file)
@@ -45,5 +45,4 @@ public final class TestWordExtractorBugs extends TestCase {
         // Check it gives text without error
         extractor.getText();
     }
-    
 }
diff --git a/test-data/document/55733.docx b/test-data/document/55733.docx
new file mode 100644 (file)
index 0000000..521f50e
Binary files /dev/null and b/test-data/document/55733.docx differ