aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Burch <nick@apache.org>2008-08-09 15:08:11 +0000
committerNick Burch <nick@apache.org>2008-08-09 15:08:11 +0000
commit157a78a1d047fc69242ed1fce98969d5f765c963 (patch)
tree69e392755e1dd9bffbe0c66006c7d873a6537c70
parent910d9e66f71cadf335c885f51320382ee04200a8 (diff)
downloadpoi-157a78a1d047fc69242ed1fce98969d5f765c963.tar.gz
poi-157a78a1d047fc69242ed1fce98969d5f765c963.zip
Have XWPFWordExtractor extract headers and footers
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@684276 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--src/documentation/content/xdocs/changes.xml1
-rw-r--r--src/documentation/content/xdocs/status.xml1
-rw-r--r--src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java34
-rw-r--r--src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java14
-rw-r--r--src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java71
-rw-r--r--src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java6
6 files changed, 113 insertions, 14 deletions
diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml
index f1dd05950f..8b9af33406 100644
--- a/src/documentation/content/xdocs/changes.xml
+++ b/src/documentation/content/xdocs/changes.xml
@@ -37,6 +37,7 @@
<!-- Don't forget to update status.xml too! -->
<release version="3.5.1-beta2" date="2008-??-??">
+ <action dev="POI-DEVELOPERS" type="add">45539 - Improve XWPFWordExtractor to extract headers and footers</action>
<action dev="POI-DEVELOPERS" type="fix">Improve how XWPF handles paragraph text</action>
<action dev="POI-DEVELOPERS" type="add">Support in XWPF handles headers and footers</action>
<action dev="POI-DEVELOPERS" type="add">45592 - Improve XWPF text extraction to include tables always, and picture text where possible</action>
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml
index 60ad6c0921..62b1dc4e4f 100644
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -34,6 +34,7 @@
<!-- Don't forget to update changes.xml too! -->
<changes>
<release version="3.5.1-beta2" date="2008-??-??">
+ <action dev="POI-DEVELOPERS" type="add">45539 - Improve XWPFWordExtractor to extract headers and footers</action>
<action dev="POI-DEVELOPERS" type="fix">Improve how XWPF handles paragraph text</action>
<action dev="POI-DEVELOPERS" type="add">Support in XWPF handles headers and footers</action>
<action dev="POI-DEVELOPERS" type="add">45592 - Improve XWPF text extraction to include tables always, and picture text where possible</action>
diff --git a/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java b/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
index 64c8e3f780..14031ebc81 100644
--- a/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/extractor/XWPFWordExtractor.java
@@ -23,6 +23,7 @@ import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.xwpf.XWPFDocument;
import org.apache.poi.xwpf.model.XWPFCommentsDecorator;
+import org.apache.poi.xwpf.model.XWPFHeaderFooterPolicy;
import org.apache.poi.xwpf.model.XWPFHyperlinkDecorator;
import org.apache.poi.xwpf.model.XWPFParagraphDecorator;
import org.apache.poi.xwpf.usermodel.XWPFParagraph;
@@ -70,21 +71,46 @@ public class XWPFWordExtractor extends POIXMLTextExtractor {
public String getText() {
StringBuffer text = new StringBuffer();
+ XWPFHeaderFooterPolicy hfPolicy = document.getHeaderFooterPolicy();
-
+ // Start out with all headers
+ // TODO - put them in where they're needed
+ if(hfPolicy.getFirstPageHeader() != null) {
+ text.append( hfPolicy.getFirstPageHeader().getText() );
+ }
+ if(hfPolicy.getEvenPageHeader() != null) {
+ text.append( hfPolicy.getEvenPageHeader().getText() );
+ }
+ if(hfPolicy.getDefaultHeader() != null) {
+ text.append( hfPolicy.getDefaultHeader().getText() );
+ }
+
+ // First up, all our paragraph based text
Iterator<XWPFParagraph> i = document.getParagraphsIterator();
while(i.hasNext()) {
XWPFParagraphDecorator decorator = new XWPFCommentsDecorator(
new XWPFHyperlinkDecorator(i.next(), null, fetchHyperlinks));
text.append(decorator.getText()+"\n");
}
-
+
+ // Then our table based text
Iterator<XWPFTable> j = document.getTablesIterator();
- while(j.hasNext())
- {
+ while(j.hasNext()) {
text.append(j.next().getText()+"\n");
}
+ // Finish up with all the footers
+ // TODO - put them in where they're needed
+ if(hfPolicy.getFirstPageFooter() != null) {
+ text.append( hfPolicy.getFirstPageFooter().getText() );
+ }
+ if(hfPolicy.getEvenPageFooter() != null) {
+ text.append( hfPolicy.getEvenPageFooter().getText() );
+ }
+ if(hfPolicy.getDefaultFooter() != null) {
+ text.append( hfPolicy.getDefaultFooter().getText() );
+ }
+
return text.toString();
}
}
diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java
index 36de229193..708944cf7e 100644
--- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java
@@ -81,15 +81,21 @@ public abstract class XWPFHeaderFooter {
XWPFParagraph[] paras = getParagraphs();
for(int i=0; i<paras.length; i++) {
if(! paras[i].isEmpty()) {
- t.append(paras[i].getText());
- t.append('\n');
+ String text = paras[i].getText();
+ if(text != null && text.length() > 0) {
+ t.append(text);
+ t.append('\n');
+ }
}
}
XWPFTable[] tables = getTables();
for(int i=0; i<tables.length; i++) {
- t.append(tables[i].getText());
- t.append('\n');
+ String text = tables[i].getText();
+ if(text != null && text.length() > 0) {
+ t.append(text);
+ t.append('\n');
+ }
}
return t.toString();
diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
index 1b26bb58ac..8fc83bc91e 100644
--- a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
+++ b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
@@ -37,12 +37,22 @@ public class TestXWPFWordExtractor extends TestCase {
*/
private XWPFDocument xmlB;
private File fileB;
-
/**
- * File with hyperlinks
+ * With a simplish header+footer
*/
private XWPFDocument xmlC;
private File fileC;
+ /**
+ * With different header+footer on first/rest
+ */
+ private XWPFDocument xmlD;
+ private File fileD;
+
+ /**
+ * File with hyperlinks
+ */
+ private XWPFDocument xmlE;
+ private File fileE;
protected void setUp() throws Exception {
super.setUp();
@@ -57,15 +67,27 @@ public class TestXWPFWordExtractor extends TestCase {
);
fileC = new File(
System.getProperty("HWPF.testdata.path") +
+ File.separator + "ThreeColHeadFoot.docx"
+ );
+ fileD = new File(
+ System.getProperty("HWPF.testdata.path") +
+ File.separator + "DiffFirstPageHeadFoot.docx"
+ );
+ fileE = new File(
+ System.getProperty("HWPF.testdata.path") +
File.separator + "TestDocument.docx"
);
assertTrue(fileA.exists());
assertTrue(fileB.exists());
assertTrue(fileC.exists());
+ assertTrue(fileD.exists());
+ assertTrue(fileE.exists());
xmlA = new XWPFDocument(POIXMLDocument.openPackage(fileA.toString()));
xmlB = new XWPFDocument(POIXMLDocument.openPackage(fileB.toString()));
xmlC = new XWPFDocument(POIXMLDocument.openPackage(fileC.toString()));
+ xmlD = new XWPFDocument(POIXMLDocument.openPackage(fileD.toString()));
+ xmlE = new XWPFDocument(POIXMLDocument.openPackage(fileE.toString()));
}
/**
@@ -135,7 +157,7 @@ public class TestXWPFWordExtractor extends TestCase {
public void testGetWithHyperlinks() throws Exception {
XWPFWordExtractor extractor =
- new XWPFWordExtractor(xmlC);
+ new XWPFWordExtractor(xmlE);
extractor.getText();
extractor.setFetchHyperlinks(true);
extractor.getText();
@@ -160,4 +182,47 @@ public class TestXWPFWordExtractor extends TestCase {
extractor.getText()
);
}
+
+ public void testHeadersFooters() throws Exception {
+ XWPFWordExtractor extractor =
+ new XWPFWordExtractor(xmlC);
+ extractor.getText();
+
+ assertEquals(
+ "First header column!\tMid header\tRight header!\n" +
+ "This is a sample word document. It has two pages. It has a three column heading, and a three column footer\n" +
+ "\n" +
+ "HEADING TEXT\n" +
+ "\n" +
+ "More on page one\n" +
+ "\n\n" +
+ "End of page 1\n\n" +
+ "This is page two. It also has a three column heading, and a three column footer.\n" +
+ "Footer Left\tFooter Middle\tFooter Right\n",
+ extractor.getText()
+ );
+
+
+ // Now another file, expect multiple headers
+ // and multiple footers
+ extractor =
+ new XWPFWordExtractor(xmlD);
+ extractor.getText();
+
+ assertEquals(
+ "I am the header on the first page, and I" + '\u2019' + "m nice and simple\n" +
+ "First header column!\tMid header\tRight header!\n" +
+ "This is a sample word document. It has two pages. It has a simple header and footer, which is different to all the other pages.\n" +
+ "\n" +
+ "HEADING TEXT\n" +
+ "\n" +
+ "More on page one\n" +
+ "\n\n" +
+ "End of page 1\n\n" +
+ "This is page two. It also has a three column heading, and a three column footer.\n" +
+ "The footer of the first page\n" +
+ "Footer Left\tFooter Middle\tFooter Right\n",
+ extractor.getText()
+ );
+ }
}
diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java b/src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java
index b2269c2908..9d0e96a175 100644
--- a/src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java
+++ b/src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java
@@ -182,12 +182,12 @@ public class TestXWPFHeaderFooterPolicy extends TestCase {
policy = oddEven.getHeaderFooterPolicy();
assertEquals(
- "\n[]ODD Page Header text\n\n",
+ "[]ODD Page Header text\n\n",
policy.getDefaultHeader().getText()
);
assertEquals(
- "\n[This is an Even Page, with a Header]\n\n",
- policy.getEvenPageHeader().getText()
+ "[This is an Even Page, with a Header]\n\n",
+ policy.getEvenPageHeader().getText()
);
}
}