aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/documentation/content/xdocs/changes.xml2
-rw-r--r--src/documentation/content/xdocs/status.xml2
-rw-r--r--src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java35
-rw-r--r--src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java38
-rw-r--r--src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java26
-rw-r--r--src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java4
-rw-r--r--src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java15
7 files changed, 99 insertions, 23 deletions
diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml
index ac0604d774..f1dd05950f 100644
--- a/src/documentation/content/xdocs/changes.xml
+++ b/src/documentation/content/xdocs/changes.xml
@@ -37,6 +37,8 @@
<!-- Don't forget to update status.xml too! -->
<release version="3.5.1-beta2" date="2008-??-??">
+ <action dev="POI-DEVELOPERS" type="fix">Improve how XWPF handles paragraph text</action>
+ <action dev="POI-DEVELOPERS" type="add">Support in XWPF handles headers and footers</action>
<action dev="POI-DEVELOPERS" type="add">45592 - Improve XWPF text extraction to include tables always, and picture text where possible</action>
<action dev="POI-DEVELOPERS" type="add">45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text</action>
<action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action>
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml
index 89114557f1..60ad6c0921 100644
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -34,6 +34,8 @@
<!-- Don't forget to update changes.xml too! -->
<changes>
<release version="3.5.1-beta2" date="2008-??-??">
+ <action dev="POI-DEVELOPERS" type="fix">Improve how XWPF handles paragraph text</action>
+ <action dev="POI-DEVELOPERS" type="add">Support in XWPF handles headers and footers</action>
<action dev="POI-DEVELOPERS" type="add">45592 - Improve XWPF text extraction to include tables always, and picture text where possible</action>
<action dev="POI-DEVELOPERS" type="add">45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text</action>
<action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action>
diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java
index 7150014e27..36de229193 100644
--- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java
@@ -39,7 +39,8 @@ public abstract class XWPFHeaderFooter {
* Returns the paragraph(s) that holds
* the text of the header or footer.
* Normally there is only the one paragraph, but
- * there could be more in certain cases.
+ * there could be more in certain cases, or
+ * a table.
*/
public XWPFParagraph[] getParagraphs() {
XWPFParagraph[] paras =
@@ -51,6 +52,24 @@ public abstract class XWPFHeaderFooter {
}
return paras;
}
+ /**
+ * Return the table(s) that holds the text
+ * of the header or footer, for complex cases
+ * where a paragraph isn't used.
+ * Normally there's just one paragraph, but some
+ * complex headers/footers have a table or two
+ * in addition.
+ */
+ public XWPFTable[] getTables() {
+ XWPFTable[] tables =
+ new XWPFTable[headerFooter.getTblArray().length];
+ for(int i=0; i<tables.length; i++) {
+ tables[i] = new XWPFTable(
+ headerFooter.getTblArray(i)
+ );
+ }
+ return tables;
+ }
/**
* Returns the textual content of the header/footer,
@@ -58,11 +77,21 @@ public abstract class XWPFHeaderFooter {
*/
public String getText() {
StringBuffer t = new StringBuffer();
+
XWPFParagraph[] paras = getParagraphs();
- for (int i = 0; i < paras.length; i++) {
- t.append(paras[i].getText());
+ for(int i=0; i<paras.length; i++) {
+ if(! paras[i].isEmpty()) {
+ t.append(paras[i].getText());
+ t.append('\n');
+ }
+ }
+
+ XWPFTable[] tables = getTables();
+ for(int i=0; i<tables.length; i++) {
+ t.append(tables[i].getText());
t.append('\n');
}
+
return t.toString();
}
}
diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java
index e42ec0186d..c56aef00d4 100644
--- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java
@@ -16,6 +16,8 @@
==================================================================== */
package org.apache.poi.xwpf.usermodel;
+import java.util.ArrayList;
+
import org.apache.poi.xwpf.XWPFDocument;
import org.apache.poi.xwpf.model.XMLParagraph;
import org.apache.xmlbeans.XmlCursor;
@@ -24,6 +26,10 @@ import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPTab;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPicture;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRPr;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtBlock;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentRun;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtRun;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
@@ -43,15 +49,37 @@ public class XWPFParagraph extends XMLParagraph
public XWPFParagraph(CTP prgrph, XWPFDocument docRef)
{
super(prgrph);
+ this.docRef = docRef;
+
+ // All the runs to loop over
+ // TODO - replace this with some sort of XPath expression
+ // to directly find all the CTRs, in the right order
+ ArrayList<CTR> rs = new ArrayList<CTR>();
+ CTR[] tmp;
+
+ // Get the main text runs
+ tmp = paragraph.getRArray();
+ for(int i=0; i<tmp.length; i++) {
+ rs.add(tmp[i]);
+ }
- this.docRef = docRef;
- CTR[] rs = paragraph.getRArray();
+ // Not sure quite what these are, but they hold
+ // more text runs
+ CTSdtRun[] sdts = paragraph.getSdtArray();
+ for(int i=0; i<sdts.length; i++) {
+ CTSdtContentRun run = sdts[i].getSdtContent();
+ tmp = run.getRArray();
+ for(int j=0; j<tmp.length; j++) {
+ rs.add(tmp[j]);
+ }
+ }
+
// Get text of the paragraph
- for (int j = 0; j < rs.length; j++) {
+ for (int j = 0; j < rs.size(); j++) {
// Grab the text and tabs of the paragraph
// Do so in a way that preserves the ordering
- XmlCursor c = rs[j].newCursor();
+ XmlCursor c = rs.get(j).newCursor();
c.selectPath( "./*" );
while(c.toNextSelection()) {
XmlObject o = c.getObject();
@@ -65,7 +93,7 @@ public class XWPFParagraph extends XMLParagraph
// Loop over pictures inside our
// paragraph, looking for text in them
- CTPicture[] picts = rs[j].getPictArray();
+ CTPicture[] picts = rs.get(j).getPictArray();
for (int k = 0; k < picts.length; k++) {
XmlObject[] t = picts[k].selectPath("declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' .//w:t");
for (int m = 0; m < t.length; m++) {
diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java
index 3f69f41737..aa647503b9 100644
--- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java
@@ -32,24 +32,26 @@ public class XWPFTable
{
protected StringBuffer text=new StringBuffer();
- public XWPFTable(CTTbl table)
- {
- for(CTRow row : table.getTrArray())
- {
- for(CTTc cell : row.getTcArray())
- {
- for(CTP ctp : cell.getPArray())
- {
+ public XWPFTable(CTTbl table) {
+ for(CTRow row : table.getTrArray()) {
+ StringBuffer rowText = new StringBuffer();
+ for(CTTc cell : row.getTcArray()) {
+ for(CTP ctp : cell.getPArray()) {
XWPFParagraph p = new XWPFParagraph(ctp);
- this.text.append(p.getText()+"\t");
+ if(rowText.length() > 0) {
+ rowText.append('\t');
+ }
+ rowText.append(p.getText());
}
}
- this.text.append("\n");
+ if(rowText.length() > 0) {
+ this.text.append(rowText);
+ this.text.append('\n');
+ }
}
}
- public String getText()
- {
+ public String getText() {
return text.toString();
}
}
diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
index b61af2f06c..1b26bb58ac 100644
--- a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
+++ b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
@@ -111,7 +111,7 @@ public class TestXWPFWordExtractor extends TestCase {
assertTrue(text.length() > 0);
char euro = '\u20ac';
-// System.err.println("'"+text.substring(text.length() - 20) + "'");
+// System.err.println("'"+text.substring(text.length() - 40) + "'");
// Check contents
assertTrue(text.startsWith(
@@ -121,7 +121,7 @@ public class TestXWPFWordExtractor extends TestCase {
"As well as gaining "+euro+"90 from child benefit increases, he will also receive the early childhood supplement of "+euro+"250 per quarter for Vincent for the full four quarters of the year.\n\n\n\n \n\n\n"
));
assertTrue(text.endsWith(
- "11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\t\n\n"
+ "11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\n\n"
));
// Check number of paragraphs
diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java b/src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java
index b1f6971652..b2269c2908 100644
--- a/src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java
+++ b/src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java
@@ -165,7 +165,7 @@ public class TestXWPFHeaderFooterPolicy extends TestCase {
public void testContents() throws Exception {
XWPFHeaderFooterPolicy policy;
- // Just test a few bits
+ // Test a few simple bits off a simple header
policy = diffFirst.getHeaderFooterPolicy();
assertEquals(
@@ -176,5 +176,18 @@ public class TestXWPFHeaderFooterPolicy extends TestCase {
"First header column!\tMid header\tRight header!\n",
policy.getDefaultHeader().getText()
);
+
+
+ // And a few bits off a more complex header
+ policy = oddEven.getHeaderFooterPolicy();
+
+ assertEquals(
+ "\n[]ODD Page Header text\n\n",
+ policy.getDefaultHeader().getText()
+ );
+ assertEquals(
+ "\n[This is an Even Page, with a Header]\n\n",
+ policy.getEvenPageHeader().getText()
+ );
}
}