]> source.dussan.org Git - poi.git/commitdiff
Improve paragraph text stuff, and further header tests
authorNick Burch <nick@apache.org>
Sat, 9 Aug 2008 14:50:16 +0000 (14:50 +0000)
committerNick Burch <nick@apache.org>
Sat, 9 Aug 2008 14:50:16 +0000 (14:50 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@684273 13f79535-47bb-0310-9956-ffa450edef68

src/documentation/content/xdocs/changes.xml
src/documentation/content/xdocs/status.xml
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java
src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java

index ac0604d774912af458b912d48e022cebf70a1cfb..f1dd05950fe9fbe0724a4fb6399d7cd5aae9293a 100644 (file)
@@ -37,6 +37,8 @@
 
                <!-- Don't forget to update status.xml too! -->
         <release version="3.5.1-beta2" date="2008-??-??">
+           <action dev="POI-DEVELOPERS" type="fix">Improve how XWPF handles paragraph text</action>
+           <action dev="POI-DEVELOPERS" type="add">Support in XWPF handles headers and footers</action>
            <action dev="POI-DEVELOPERS" type="add">45592 - Improve XWPF text extraction to include tables always, and picture text where possible</action>
            <action dev="POI-DEVELOPERS" type="add">45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text</action>
            <action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action>
index 89114557f1b6184dba7f1ebbcefbef174d5ae979..60ad6c09218a65c584eca53fbffa9508ec18fc6c 100644 (file)
@@ -34,6 +34,8 @@
        <!-- Don't forget to update changes.xml too! -->
     <changes>
         <release version="3.5.1-beta2" date="2008-??-??">
+           <action dev="POI-DEVELOPERS" type="fix">Improve how XWPF handles paragraph text</action>
+           <action dev="POI-DEVELOPERS" type="add">Support in XWPF handles headers and footers</action>
            <action dev="POI-DEVELOPERS" type="add">45592 - Improve XWPF text extraction to include tables always, and picture text where possible</action>
            <action dev="POI-DEVELOPERS" type="add">45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text</action>
            <action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action>
index 7150014e27e3e9e4e2b5d56ef192a5be55ac3cd0..36de2291936c97f2db476ae45118d8bf76d2ee86 100644 (file)
@@ -39,7 +39,8 @@ public abstract class XWPFHeaderFooter {
         * Returns the paragraph(s) that holds
         *  the text of the header or footer.
         * Normally there is only the one paragraph, but
-        *  there could be more in certain cases.
+        *  there could be more in certain cases, or 
+        *  a table.
         */
        public XWPFParagraph[] getParagraphs() {
                XWPFParagraph[] paras = 
@@ -51,6 +52,24 @@ public abstract class XWPFHeaderFooter {
                }
                return paras;
        }
+       /**
+        * Return the table(s) that holds the text
+        *  of the header or footer, for complex cases
+        *  where a paragraph isn't used.
+        * Normally there's just one paragraph, but some
+        *  complex headers/footers have a table or two
+        *  in addition. 
+        */
+       public XWPFTable[] getTables() {
+               XWPFTable[] tables = 
+                       new XWPFTable[headerFooter.getTblArray().length];
+               for(int i=0; i<tables.length; i++) {
+                       tables[i] = new XWPFTable(
+                                       headerFooter.getTblArray(i)
+                       );
+               }
+               return tables;
+       }
        
        /**
         * Returns the textual content of the header/footer,
@@ -58,11 +77,21 @@ public abstract class XWPFHeaderFooter {
         */
        public String getText() {
                StringBuffer t = new StringBuffer();
+               
                XWPFParagraph[] paras = getParagraphs();
-               for (int i = 0; i < paras.length; i++) {
-                       t.append(paras[i].getText());
+               for(int i=0; i<paras.length; i++) {
+                       if(! paras[i].isEmpty()) {
+                               t.append(paras[i].getText());
+                               t.append('\n');
+                       }
+               }
+               
+               XWPFTable[] tables = getTables();
+               for(int i=0; i<tables.length; i++) {
+                       t.append(tables[i].getText());
                        t.append('\n');
                }
+               
                return t.toString(); 
        }
 }
index e42ec0186d401cf54b1d21f67686c7d29733c191..c56aef00d4fe2ff084fbfb9aed4122c5c07700ab 100644 (file)
@@ -16,6 +16,8 @@
 ==================================================================== */
 package org.apache.poi.xwpf.usermodel;
 
+import java.util.ArrayList;
+
 import org.apache.poi.xwpf.XWPFDocument;
 import org.apache.poi.xwpf.model.XMLParagraph;
 import org.apache.xmlbeans.XmlCursor;
@@ -24,6 +26,10 @@ import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPTab;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPicture;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRPr;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtBlock;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentRun;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtRun;
 import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
 import org.w3c.dom.NodeList;
 import org.w3c.dom.Text;
@@ -43,15 +49,37 @@ public class XWPFParagraph extends XMLParagraph
     public XWPFParagraph(CTP prgrph, XWPFDocument docRef)
     {
         super(prgrph);
+        this.docRef = docRef;
+        
+        // All the runs to loop over
+        // TODO - replace this with some sort of XPath expression
+        //  to directly find all the CTRs, in the right order
+        ArrayList<CTR> rs = new ArrayList<CTR>();
+        CTR[] tmp;
+        
+        // Get the main text runs
+        tmp = paragraph.getRArray();
+        for(int i=0; i<tmp.length; i++) {
+               rs.add(tmp[i]);
+        }
         
-        this.docRef = docRef; 
-        CTR[] rs = paragraph.getRArray();
+        // Not sure quite what these are, but they hold 
+        //  more text runs
+        CTSdtRun[] sdts = paragraph.getSdtArray();
+        for(int i=0; i<sdts.length; i++) {
+               CTSdtContentRun run = sdts[i].getSdtContent();
+               tmp = run.getRArray();
+            for(int j=0; j<tmp.length; j++) {
+               rs.add(tmp[j]);
+            }
+        }
     
+        
         // Get text of the paragraph
-        for (int j = 0; j < rs.length; j++) {
+        for (int j = 0; j < rs.size(); j++) {
             // Grab the text and tabs of the paragraph
                // Do so in a way that preserves the ordering
-               XmlCursor c = rs[j].newCursor();
+               XmlCursor c = rs.get(j).newCursor();
                c.selectPath( "./*" );
                while(c.toNextSelection()) {
                        XmlObject o = c.getObject();
@@ -65,7 +93,7 @@ public class XWPFParagraph extends XMLParagraph
                
             // Loop over pictures inside our
             //  paragraph, looking for text in them
-            CTPicture[] picts = rs[j].getPictArray();
+            CTPicture[] picts = rs.get(j).getPictArray();
             for (int k = 0; k < picts.length; k++) {
                 XmlObject[] t = picts[k].selectPath("declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' .//w:t");
                 for (int m = 0; m < t.length; m++) {
index 3f69f41737e6706e7e439c1ec702f90150e8f04f..aa647503b9a5c2893cd5ad0b2a3346e6e5a5e6f9 100644 (file)
@@ -32,24 +32,26 @@ public class XWPFTable
 {
     protected StringBuffer text=new StringBuffer(); 
     
-    public XWPFTable(CTTbl table)
-    {
-        for(CTRow row : table.getTrArray())
-        {
-            for(CTTc cell : row.getTcArray())
-            {
-                for(CTP ctp : cell.getPArray())
-                {
+    public XWPFTable(CTTbl table) {
+        for(CTRow row : table.getTrArray()) {
+               StringBuffer rowText = new StringBuffer();
+            for(CTTc cell : row.getTcArray()) {
+                for(CTP ctp : cell.getPArray()) {
                     XWPFParagraph p = new XWPFParagraph(ctp);
-                    this.text.append(p.getText()+"\t");
+                    if(rowText.length() > 0) {
+                       rowText.append('\t');
+                    }
+                    rowText.append(p.getText());
                 }
             }
-            this.text.append("\n");
+            if(rowText.length() > 0) {
+               this.text.append(rowText);
+               this.text.append('\n');
+            }
         }
     }
     
-    public String getText()
-    {
+    public String getText() {
         return text.toString();
     }
 }
index b61af2f06c3f01d29e26608a6e88698565e055af..1b26bb58ac3a066e896ed10faf4b9e86ddeb1416 100644 (file)
@@ -111,7 +111,7 @@ public class TestXWPFWordExtractor extends TestCase {
                assertTrue(text.length() > 0);
                
                char euro = '\u20ac';
-//             System.err.println("'"+text.substring(text.length() - 20) + "'");
+//             System.err.println("'"+text.substring(text.length() - 40) + "'");
                
                // Check contents
                assertTrue(text.startsWith(
@@ -121,7 +121,7 @@ public class TestXWPFWordExtractor extends TestCase {
                                "As well as gaining "+euro+"90 from child benefit increases, he will also receive the early childhood supplement of "+euro+"250 per quarter for Vincent for the full four quarters of the year.\n\n\n\n \n\n\n"
                ));
                assertTrue(text.endsWith(
-                               "11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\t\n\n"
+                               "11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\n\n"
                ));
                
                // Check number of paragraphs
index b1f6971652aa8348bcb1480281847f36492a4b22..b2269c290842323f2fd5a89ac37ada9b3c718fc0 100644 (file)
@@ -165,7 +165,7 @@ public class TestXWPFHeaderFooterPolicy extends TestCase {
        public void testContents() throws Exception {
                XWPFHeaderFooterPolicy policy;
                
-               // Just test a few bits
+               // Test a few simple bits off a simple header
                policy = diffFirst.getHeaderFooterPolicy();
                
                assertEquals(
@@ -176,5 +176,18 @@ public class TestXWPFHeaderFooterPolicy extends TestCase {
                                "First header column!\tMid header\tRight header!\n", 
                                policy.getDefaultHeader().getText()
                );
+               
+               
+               // And a few bits off a more complex header
+               policy = oddEven.getHeaderFooterPolicy();
+               
+               assertEquals(
+                       "\n[]ODD Page Header text\n\n",
+                       policy.getDefaultHeader().getText()
+               );
+               assertEquals(
+                               "\n[This is an Even Page, with a Header]\n\n", 
+                               policy.getEvenPageHeader().getText()
+               );
        }
 }