diff options
7 files changed, 99 insertions, 23 deletions
diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index ac0604d774..f1dd05950f 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -37,6 +37,8 @@ <!-- Don't forget to update status.xml too! --> <release version="3.5.1-beta2" date="2008-??-??"> + <action dev="POI-DEVELOPERS" type="fix">Improve how XWPF handles paragraph text</action> + <action dev="POI-DEVELOPERS" type="add">Support in XWPF handles headers and footers</action> <action dev="POI-DEVELOPERS" type="add">45592 - Improve XWPF text extraction to include tables always, and picture text where possible</action> <action dev="POI-DEVELOPERS" type="add">45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text</action> <action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action> diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 89114557f1..60ad6c0921 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,8 @@ <!-- Don't forget to update changes.xml too! --> <changes> <release version="3.5.1-beta2" date="2008-??-??"> + <action dev="POI-DEVELOPERS" type="fix">Improve how XWPF handles paragraph text</action> + <action dev="POI-DEVELOPERS" type="add">Support in XWPF handles headers and footers</action> <action dev="POI-DEVELOPERS" type="add">45592 - Improve XWPF text extraction to include tables always, and picture text where possible</action> <action dev="POI-DEVELOPERS" type="add">45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text</action> <action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action> diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java index 7150014e27..36de229193 100644 --- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java +++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java @@ -39,7 +39,8 @@ public abstract class XWPFHeaderFooter { * Returns the paragraph(s) that holds * the text of the header or footer. * Normally there is only the one paragraph, but - * there could be more in certain cases. + * there could be more in certain cases, or + * a table. */ public XWPFParagraph[] getParagraphs() { XWPFParagraph[] paras = @@ -51,6 +52,24 @@ public abstract class XWPFHeaderFooter { } return paras; } + /** + * Return the table(s) that holds the text + * of the header or footer, for complex cases + * where a paragraph isn't used. + * Normally there's just one paragraph, but some + * complex headers/footers have a table or two + * in addition. + */ + public XWPFTable[] getTables() { + XWPFTable[] tables = + new XWPFTable[headerFooter.getTblArray().length]; + for(int i=0; i<tables.length; i++) { + tables[i] = new XWPFTable( + headerFooter.getTblArray(i) + ); + } + return tables; + } /** * Returns the textual content of the header/footer, @@ -58,11 +77,21 @@ public abstract class XWPFHeaderFooter { */ public String getText() { StringBuffer t = new StringBuffer(); + XWPFParagraph[] paras = getParagraphs(); - for (int i = 0; i < paras.length; i++) { - t.append(paras[i].getText()); + for(int i=0; i<paras.length; i++) { + if(! paras[i].isEmpty()) { + t.append(paras[i].getText()); + t.append('\n'); + } + } + + XWPFTable[] tables = getTables(); + for(int i=0; i<tables.length; i++) { + t.append(tables[i].getText()); t.append('\n'); } + return t.toString(); } } diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java index e42ec0186d..c56aef00d4 100644 --- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java +++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java @@ -16,6 +16,8 @@ ==================================================================== */ package org.apache.poi.xwpf.usermodel; +import java.util.ArrayList; + import org.apache.poi.xwpf.XWPFDocument; import org.apache.poi.xwpf.model.XMLParagraph; import org.apache.xmlbeans.XmlCursor; @@ -24,6 +26,10 @@ import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPTab; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPicture; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRPr; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtBlock; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentRun; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtRun; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText; import org.w3c.dom.NodeList; import org.w3c.dom.Text; @@ -43,15 +49,37 @@ public class XWPFParagraph extends XMLParagraph public XWPFParagraph(CTP prgrph, XWPFDocument docRef) { super(prgrph); + this.docRef = docRef; + + // All the runs to loop over + // TODO - replace this with some sort of XPath expression + // to directly find all the CTRs, in the right order + ArrayList<CTR> rs = new ArrayList<CTR>(); + CTR[] tmp; + + // Get the main text runs + tmp = paragraph.getRArray(); + for(int i=0; i<tmp.length; i++) { + rs.add(tmp[i]); + } - this.docRef = docRef; - CTR[] rs = paragraph.getRArray(); + // Not sure quite what these are, but they hold + // more text runs + CTSdtRun[] sdts = paragraph.getSdtArray(); + for(int i=0; i<sdts.length; i++) { + CTSdtContentRun run = sdts[i].getSdtContent(); + tmp = run.getRArray(); + for(int j=0; j<tmp.length; j++) { + rs.add(tmp[j]); + } + } + // Get text of the paragraph - for (int j = 0; j < rs.length; j++) { + for (int j = 0; j < rs.size(); j++) { // Grab the text and tabs of the paragraph // Do so in a way that preserves the ordering - XmlCursor c = rs[j].newCursor(); + XmlCursor c = rs.get(j).newCursor(); c.selectPath( "./*" ); while(c.toNextSelection()) { XmlObject o = c.getObject(); @@ -65,7 +93,7 @@ public class XWPFParagraph extends XMLParagraph // Loop over pictures inside our // paragraph, looking for text in them - CTPicture[] picts = rs[j].getPictArray(); + CTPicture[] picts = rs.get(j).getPictArray(); for (int k = 0; k < picts.length; k++) { XmlObject[] t = picts[k].selectPath("declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' .//w:t"); for (int m = 0; m < t.length; m++) { diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java index 3f69f41737..aa647503b9 100644 --- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java +++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java @@ -32,24 +32,26 @@ public class XWPFTable { protected StringBuffer text=new StringBuffer(); - public XWPFTable(CTTbl table) - { - for(CTRow row : table.getTrArray()) - { - for(CTTc cell : row.getTcArray()) - { - for(CTP ctp : cell.getPArray()) - { + public XWPFTable(CTTbl table) { + for(CTRow row : table.getTrArray()) { + StringBuffer rowText = new StringBuffer(); + for(CTTc cell : row.getTcArray()) { + for(CTP ctp : cell.getPArray()) { XWPFParagraph p = new XWPFParagraph(ctp); - this.text.append(p.getText()+"\t"); + if(rowText.length() > 0) { + rowText.append('\t'); + } + rowText.append(p.getText()); } } - this.text.append("\n"); + if(rowText.length() > 0) { + this.text.append(rowText); + this.text.append('\n'); + } } } - public String getText() - { + public String getText() { return text.toString(); } } diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java index b61af2f06c..1b26bb58ac 100644 --- a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java @@ -111,7 +111,7 @@ public class TestXWPFWordExtractor extends TestCase { assertTrue(text.length() > 0); char euro = '\u20ac'; -// System.err.println("'"+text.substring(text.length() - 20) + "'"); +// System.err.println("'"+text.substring(text.length() - 40) + "'"); // Check contents assertTrue(text.startsWith( @@ -121,7 +121,7 @@ public class TestXWPFWordExtractor extends TestCase { "As well as gaining "+euro+"90 from child benefit increases, he will also receive the early childhood supplement of "+euro+"250 per quarter for Vincent for the full four quarters of the year.\n\n\n\n \n\n\n" )); assertTrue(text.endsWith( - "11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\t\n\n" + "11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\n\n" )); // Check number of paragraphs diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java b/src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java index b1f6971652..b2269c2908 100644 --- a/src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java +++ b/src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java @@ -165,7 +165,7 @@ public class TestXWPFHeaderFooterPolicy extends TestCase { public void testContents() throws Exception { XWPFHeaderFooterPolicy policy; - // Just test a few bits + // Test a few simple bits off a simple header policy = diffFirst.getHeaderFooterPolicy(); assertEquals( @@ -176,5 +176,18 @@ public class TestXWPFHeaderFooterPolicy extends TestCase { "First header column!\tMid header\tRight header!\n", policy.getDefaultHeader().getText() ); + + + // And a few bits off a more complex header + policy = oddEven.getHeaderFooterPolicy(); + + assertEquals( + "\n[]ODD Page Header text\n\n", + policy.getDefaultHeader().getText() + ); + assertEquals( + "\n[This is an Even Page, with a Header]\n\n", + policy.getEvenPageHeader().getText() + ); } } |