git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@684273 13f79535-47bb-0310-9956-ffa450edef68

16 years ago · 910d9e66f7
--- a/src/documentation/content/xdocs/changes.xml
+++ b/src/documentation/content/xdocs/changes.xml
@@ -37,6 +37,8 @@

 		<!-- Don't forget to update status.xml too! -->
        <release version="3.5.1-beta2" date="2008-??-??">
           <action dev="POI-DEVELOPERS" type="fix">Improve how XWPF handles paragraph text</action>
           <action dev="POI-DEVELOPERS" type="add">Support in XWPF handles headers and footers</action>
           <action dev="POI-DEVELOPERS" type="add">45592 - Improve XWPF text extraction to include tables always, and picture text where possible</action>
           <action dev="POI-DEVELOPERS" type="add">45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text</action>
           <action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action>
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -34,6 +34,8 @@
 	<!-- Don't forget to update changes.xml too! -->
    <changes>
        <release version="3.5.1-beta2" date="2008-??-??">
           <action dev="POI-DEVELOPERS" type="fix">Improve how XWPF handles paragraph text</action>
           <action dev="POI-DEVELOPERS" type="add">Support in XWPF handles headers and footers</action>
           <action dev="POI-DEVELOPERS" type="add">45592 - Improve XWPF text extraction to include tables always, and picture text where possible</action>
           <action dev="POI-DEVELOPERS" type="add">45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text</action>
           <action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action>
--- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java
@@ -39,7 +39,8 @@ public abstract class XWPFHeaderFooter {
 	 * Returns the paragraph(s) that holds
 	 *  the text of the header or footer.
 	 * Normally there is only the one paragraph, but
 	 *  there could be more in certain cases.
 	 *  there could be more in certain cases, or 
 	 *  a table.
 	 */
 	public XWPFParagraph[] getParagraphs() {
 		XWPFParagraph[] paras = 
@@ -51,6 +52,24 @@ public abstract class XWPFHeaderFooter {
 		}
 		return paras;
 	}
 	/**
 	 * Return the table(s) that holds the text
 	 *  of the header or footer, for complex cases
 	 *  where a paragraph isn't used.
 	 * Normally there's just one paragraph, but some
 	 *  complex headers/footers have a table or two
 	 *  in addition. 
 	 */
 	public XWPFTable[] getTables() {
 		XWPFTable[] tables = 
 			new XWPFTable[headerFooter.getTblArray().length];
 		for(int i=0; i<tables.length; i++) {
 			tables[i] = new XWPFTable(
 					headerFooter.getTblArray(i)
 			);
 		}
 		return tables;
 	}
 	
 	/**
 	 * Returns the textual content of the header/footer,
@@ -58,11 +77,21 @@ public abstract class XWPFHeaderFooter {
 	 */
 	public String getText() {
 		StringBuffer t = new StringBuffer();
 		
 		XWPFParagraph[] paras = getParagraphs();
 		for (int i = 0; i < paras.length; i++) {
 			t.append(paras[i].getText());
 		for(int i=0; i<paras.length; i++) {
 			if(! paras[i].isEmpty()) {
 				t.append(paras[i].getText());
 				t.append('\n');
 			}
 		}
 		
 		XWPFTable[] tables = getTables();
 		for(int i=0; i<tables.length; i++) {
 			t.append(tables[i].getText());
 			t.append('\n');
 		}
 		
 		return t.toString(); 
 	}
 }
--- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java
@@ -16,6 +16,8 @@
 ==================================================================== */
 package org.apache.poi.xwpf.usermodel;

 import java.util.ArrayList;

 import org.apache.poi.xwpf.XWPFDocument;
 import org.apache.poi.xwpf.model.XMLParagraph;
 import org.apache.xmlbeans.XmlCursor;
@@ -24,6 +26,10 @@ import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPTab;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPicture;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRPr;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtBlock;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentRun;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtRun;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
 import org.w3c.dom.NodeList;
 import org.w3c.dom.Text;
@@ -43,15 +49,37 @@ public class XWPFParagraph extends XMLParagraph
    public XWPFParagraph(CTP prgrph, XWPFDocument docRef)
    {
        super(prgrph);
        this.docRef = docRef;
        
        // All the runs to loop over
        // TODO - replace this with some sort of XPath expression
        //  to directly find all the CTRs, in the right order
        ArrayList<CTR> rs = new ArrayList<CTR>();
        CTR[] tmp;
        
        // Get the main text runs
        tmp = paragraph.getRArray();
        for(int i=0; i<tmp.length; i++) {
        	rs.add(tmp[i]);
        }
        
        this.docRef = docRef; 
        CTR[] rs = paragraph.getRArray();
        // Not sure quite what these are, but they hold 
        //  more text runs
        CTSdtRun[] sdts = paragraph.getSdtArray();
        for(int i=0; i<sdts.length; i++) {
        	CTSdtContentRun run = sdts[i].getSdtContent();
        	tmp = run.getRArray();
            for(int j=0; j<tmp.length; j++) {
            	rs.add(tmp[j]);
            }
        }
    
        
        // Get text of the paragraph
        for (int j = 0; j < rs.length; j++) {
        for (int j = 0; j < rs.size(); j++) {
            // Grab the text and tabs of the paragraph
        	// Do so in a way that preserves the ordering
        	XmlCursor c = rs[j].newCursor();
        	XmlCursor c = rs.get(j).newCursor();
        	c.selectPath( "./*" );
        	while(c.toNextSelection()) {
        		XmlObject o = c.getObject();
@@ -65,7 +93,7 @@ public class XWPFParagraph extends XMLParagraph
        	
            // Loop over pictures inside our
            //  paragraph, looking for text in them
            CTPicture[] picts = rs[j].getPictArray();
            CTPicture[] picts = rs.get(j).getPictArray();
            for (int k = 0; k < picts.length; k++) {
                XmlObject[] t = picts[k].selectPath("declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' .//w:t");
                for (int m = 0; m < t.length; m++) {
--- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java
@@ -32,24 +32,26 @@ public class XWPFTable
 {
    protected StringBuffer text=new StringBuffer(); 
    
    public XWPFTable(CTTbl table)
    {
        for(CTRow row : table.getTrArray())
        {
            for(CTTc cell : row.getTcArray())
            {
                for(CTP ctp : cell.getPArray())
                {
    public XWPFTable(CTTbl table) {
        for(CTRow row : table.getTrArray()) {
        	StringBuffer rowText = new StringBuffer();
            for(CTTc cell : row.getTcArray()) {
                for(CTP ctp : cell.getPArray()) {
                    XWPFParagraph p = new XWPFParagraph(ctp);
                    this.text.append(p.getText()+"\t");
                    if(rowText.length() > 0) {
                    	rowText.append('\t');
                    }
                    rowText.append(p.getText());
                }
            }
            this.text.append("\n");
            if(rowText.length() > 0) {
            	this.text.append(rowText);
            	this.text.append('\n');
            }
        }
    }
    
    public String getText()
    {
    public String getText() {
        return text.toString();
    }
 }
--- a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
+++ b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
@@ -111,7 +111,7 @@ public class TestXWPFWordExtractor extends TestCase {
 		assertTrue(text.length() > 0);
 		
 		char euro = '\u20ac';
 //		System.err.println("'"+text.substring(text.length() - 20) + "'");
 //		System.err.println("'"+text.substring(text.length() - 40) + "'");
 		
 		// Check contents
 		assertTrue(text.startsWith(
@@ -121,7 +121,7 @@ public class TestXWPFWordExtractor extends TestCase {
 				"As well as gaining "+euro+"90 from child benefit increases, he will also receive the early childhood supplement of "+euro+"250 per quarter for Vincent for the full four quarters of the year.\n\n\n\n \n\n\n"
 		));
 		assertTrue(text.endsWith(
 				"11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\t\n\n"
 				"11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\n\n"
 		));
 		
 		// Check number of paragraphs
--- a/src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java
+++ b/src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java
@@ -165,7 +165,7 @@ public class TestXWPFHeaderFooterPolicy extends TestCase {
 	public void testContents() throws Exception {
 		XWPFHeaderFooterPolicy policy;
 		
 		// Just test a few bits
 		// Test a few simple bits off a simple header
 		policy = diffFirst.getHeaderFooterPolicy();
 		
 		assertEquals(
@@ -176,5 +176,18 @@ public class TestXWPFHeaderFooterPolicy extends TestCase {
 				"First header column!\tMid header\tRight header!\n", 
 				policy.getDefaultHeader().getText()
 		);
 		
 		
 		// And a few bits off a more complex header
 		policy = oddEven.getHeaderFooterPolicy();
 		
 		assertEquals(
 			"\n[]ODD Page Header text\n\n",
 			policy.getDefaultHeader().getText()
 		);
 		assertEquals(
 				"\n[This is an Even Page, with a Header]\n\n", 
 				policy.getEvenPageHeader().getText()
 		);
 	}
 }