From: Nick Burch Date: Fri, 1 Nov 2013 19:43:46 +0000 (+0000) Subject: Fix bug #55733 - XWPFWordExtractor need X-Git-Tag: REL_3_10_FINAL~74 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=2881930651f926afb12ab4aed62b6e7e59dc860d;p=poi.git Fix bug #55733 - XWPFWordExtractor need s to handle .docx files with neither headers nor footers git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1538044 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java b/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java index 0f4a52e664..56ba0e7e8d 100644 --- a/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java +++ b/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java @@ -17,7 +17,6 @@ package org.apache.poi.xwpf.extractor; import java.io.IOException; -import java.util.Iterator; import java.util.List; import org.apache.poi.POIXMLDocument; @@ -34,7 +33,6 @@ import org.apache.poi.xwpf.usermodel.XWPFHyperlink; import org.apache.poi.xwpf.usermodel.XWPFHyperlinkRun; import org.apache.poi.xwpf.usermodel.XWPFParagraph; import org.apache.poi.xwpf.usermodel.XWPFRelation; -import org.apache.poi.xwpf.usermodel.XWPFRun; import org.apache.poi.xwpf.usermodel.XWPFSDT; import org.apache.poi.xwpf.usermodel.XWPFTable; import org.apache.poi.xwpf.usermodel.XWPFTableCell; @@ -85,24 +83,24 @@ public class XWPFWordExtractor extends POIXMLTextExtractor { System.out.println(extractor.getText()); } - public String getText() { - StringBuffer text = new StringBuffer(); - XWPFHeaderFooterPolicy hfPolicy = document.getHeaderFooterPolicy(); + public String getText() { + StringBuffer text = new StringBuffer(); + XWPFHeaderFooterPolicy hfPolicy = document.getHeaderFooterPolicy(); - // Start out with all headers - extractHeaders(text, hfPolicy); - - // body elements - for (IBodyElement e : document.getBodyElements()){ - appendBodyElementText(text, e); - text.append('\n'); - } - - // Finish up with all the footers - extractFooters(text, hfPolicy); - - return text.toString(); - } + // Start out with all headers + extractHeaders(text, hfPolicy); + + // Process all body elements + for (IBodyElement e : document.getBodyElements()){ + appendBodyElementText(text, e); + text.append('\n'); + } + + // Finish up with all the footers + extractFooters(text, hfPolicy); + + return text.toString(); + } public void appendBodyElementText(StringBuffer text, IBodyElement e){ if (e instanceof XWPFParagraph){ @@ -178,6 +176,8 @@ public class XWPFWordExtractor extends POIXMLTextExtractor { } private void extractFooters(StringBuffer text, XWPFHeaderFooterPolicy hfPolicy) { + if (hfPolicy == null) return; + if(hfPolicy.getFirstPageFooter() != null) { text.append( hfPolicy.getFirstPageFooter().getText() ); } @@ -190,6 +190,8 @@ public class XWPFWordExtractor extends POIXMLTextExtractor { } private void extractHeaders(StringBuffer text, XWPFHeaderFooterPolicy hfPolicy) { + if (hfPolicy == null) return; + if(hfPolicy.getFirstPageHeader() != null) { text.append( hfPolicy.getFirstPageHeader().getText() ); } diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java index 9ad3cc102b..6b9f7125ea 100644 --- a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java @@ -351,5 +351,16 @@ public class TestXWPFWordExtractor extends TestCase { assertEquals("controlled content loading-"+targ, true, hit); } assertEquals("controlled content loading hit count", targs.length, hits); + ex.close(); + } + + /** No Header or Footer in document */ + public void testBug55733() throws Exception { + XWPFDocument doc = XWPFTestDataSamples.openSampleDocument("55733.docx"); + XWPFWordExtractor extractor = new XWPFWordExtractor(doc); + + // Check it gives text without error + extractor.getText(); + extractor.close(); } } diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractorBugs.java b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractorBugs.java index 4af076e5ed..f236b96741 100644 --- a/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractorBugs.java +++ b/src/scratchpad/testcases/org/apache/poi/hwpf/extractor/TestWordExtractorBugs.java @@ -45,5 +45,4 @@ public final class TestWordExtractorBugs extends TestCase { // Check it gives text without error extractor.getText(); } - } diff --git a/test-data/document/55733.docx b/test-data/document/55733.docx new file mode 100644 index 0000000000..521f50ee84 Binary files /dev/null and b/test-data/document/55733.docx differ