<!-- Don't forget to update status.xml too! -->
<release version="3.5.1-beta2" date="2008-??-??">
+ <action dev="POI-DEVELOPERS" type="fix">Improve how XWPF handles paragraph text</action>
+ <action dev="POI-DEVELOPERS" type="add">Support in XWPF handles headers and footers</action>
<action dev="POI-DEVELOPERS" type="add">45592 - Improve XWPF text extraction to include tables always, and picture text where possible</action>
<action dev="POI-DEVELOPERS" type="add">45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text</action>
<action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action>
<!-- Don't forget to update changes.xml too! -->
<changes>
<release version="3.5.1-beta2" date="2008-??-??">
+ <action dev="POI-DEVELOPERS" type="fix">Improve how XWPF handles paragraph text</action>
+ <action dev="POI-DEVELOPERS" type="add">Support in XWPF handles headers and footers</action>
<action dev="POI-DEVELOPERS" type="add">45592 - Improve XWPF text extraction to include tables always, and picture text where possible</action>
<action dev="POI-DEVELOPERS" type="add">45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text</action>
<action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action>
* Returns the paragraph(s) that holds
* the text of the header or footer.
* Normally there is only the one paragraph, but
- * there could be more in certain cases.
+ * there could be more in certain cases, or
+ * a table.
*/
public XWPFParagraph[] getParagraphs() {
XWPFParagraph[] paras =
}
return paras;
}
+ /**
+ * Return the table(s) that holds the text
+ * of the header or footer, for complex cases
+ * where a paragraph isn't used.
+ * Normally there's just one paragraph, but some
+ * complex headers/footers have a table or two
+ * in addition.
+ */
+ public XWPFTable[] getTables() {
+ XWPFTable[] tables =
+ new XWPFTable[headerFooter.getTblArray().length];
+ for(int i=0; i<tables.length; i++) {
+ tables[i] = new XWPFTable(
+ headerFooter.getTblArray(i)
+ );
+ }
+ return tables;
+ }
/**
* Returns the textual content of the header/footer,
*/
public String getText() {
StringBuffer t = new StringBuffer();
+
XWPFParagraph[] paras = getParagraphs();
- for (int i = 0; i < paras.length; i++) {
- t.append(paras[i].getText());
+ for(int i=0; i<paras.length; i++) {
+ if(! paras[i].isEmpty()) {
+ t.append(paras[i].getText());
+ t.append('\n');
+ }
+ }
+
+ XWPFTable[] tables = getTables();
+ for(int i=0; i<tables.length; i++) {
+ t.append(tables[i].getText());
t.append('\n');
}
+
return t.toString();
}
}
==================================================================== */
package org.apache.poi.xwpf.usermodel;
+import java.util.ArrayList;
+
import org.apache.poi.xwpf.XWPFDocument;
import org.apache.poi.xwpf.model.XMLParagraph;
import org.apache.xmlbeans.XmlCursor;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPTab;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPicture;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRPr;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtBlock;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentRun;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtRun;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
public XWPFParagraph(CTP prgrph, XWPFDocument docRef)
{
super(prgrph);
+ this.docRef = docRef;
+
+ // All the runs to loop over
+ // TODO - replace this with some sort of XPath expression
+ // to directly find all the CTRs, in the right order
+ ArrayList<CTR> rs = new ArrayList<CTR>();
+ CTR[] tmp;
+
+ // Get the main text runs
+ tmp = paragraph.getRArray();
+ for(int i=0; i<tmp.length; i++) {
+ rs.add(tmp[i]);
+ }
- this.docRef = docRef;
- CTR[] rs = paragraph.getRArray();
+ // Not sure quite what these are, but they hold
+ // more text runs
+ CTSdtRun[] sdts = paragraph.getSdtArray();
+ for(int i=0; i<sdts.length; i++) {
+ CTSdtContentRun run = sdts[i].getSdtContent();
+ tmp = run.getRArray();
+ for(int j=0; j<tmp.length; j++) {
+ rs.add(tmp[j]);
+ }
+ }
+
// Get text of the paragraph
- for (int j = 0; j < rs.length; j++) {
+ for (int j = 0; j < rs.size(); j++) {
// Grab the text and tabs of the paragraph
// Do so in a way that preserves the ordering
- XmlCursor c = rs[j].newCursor();
+ XmlCursor c = rs.get(j).newCursor();
c.selectPath( "./*" );
while(c.toNextSelection()) {
XmlObject o = c.getObject();
// Loop over pictures inside our
// paragraph, looking for text in them
- CTPicture[] picts = rs[j].getPictArray();
+ CTPicture[] picts = rs.get(j).getPictArray();
for (int k = 0; k < picts.length; k++) {
XmlObject[] t = picts[k].selectPath("declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' .//w:t");
for (int m = 0; m < t.length; m++) {
{
protected StringBuffer text=new StringBuffer();
- public XWPFTable(CTTbl table)
- {
- for(CTRow row : table.getTrArray())
- {
- for(CTTc cell : row.getTcArray())
- {
- for(CTP ctp : cell.getPArray())
- {
+ public XWPFTable(CTTbl table) {
+ for(CTRow row : table.getTrArray()) {
+ StringBuffer rowText = new StringBuffer();
+ for(CTTc cell : row.getTcArray()) {
+ for(CTP ctp : cell.getPArray()) {
XWPFParagraph p = new XWPFParagraph(ctp);
- this.text.append(p.getText()+"\t");
+ if(rowText.length() > 0) {
+ rowText.append('\t');
+ }
+ rowText.append(p.getText());
}
}
- this.text.append("\n");
+ if(rowText.length() > 0) {
+ this.text.append(rowText);
+ this.text.append('\n');
+ }
}
}
- public String getText()
- {
+ public String getText() {
return text.toString();
}
}
assertTrue(text.length() > 0);
char euro = '\u20ac';
-// System.err.println("'"+text.substring(text.length() - 20) + "'");
+// System.err.println("'"+text.substring(text.length() - 40) + "'");
// Check contents
assertTrue(text.startsWith(
"As well as gaining "+euro+"90 from child benefit increases, he will also receive the early childhood supplement of "+euro+"250 per quarter for Vincent for the full four quarters of the year.\n\n\n\n \n\n\n"
));
assertTrue(text.endsWith(
- "11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\t\n\n"
+ "11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\n\n"
));
// Check number of paragraphs
public void testContents() throws Exception {
XWPFHeaderFooterPolicy policy;
- // Just test a few bits
+ // Test a few simple bits off a simple header
policy = diffFirst.getHeaderFooterPolicy();
assertEquals(
"First header column!\tMid header\tRight header!\n",
policy.getDefaultHeader().getText()
);
+
+
+ // And a few bits off a more complex header
+ policy = oddEven.getHeaderFooterPolicy();
+
+ assertEquals(
+ "\n[]ODD Page Header text\n\n",
+ policy.getDefaultHeader().getText()
+ );
+ assertEquals(
+ "\n[This is an Even Page, with a Header]\n\n",
+ policy.getEvenPageHeader().getText()
+ );
}
}