Improve paragraph text stuff, and further header tests

author Nick Burch <nick@apache.org>

Sat, 9 Aug 2008 14:50:16 +0000 (14:50 +0000)

committer Nick Burch <nick@apache.org>

Sat, 9 Aug 2008 14:50:16 +0000 (14:50 +0000)
author Nick Burch <nick@apache.org>
Sat, 9 Aug 2008 14:50:16 +0000 (14:50 +0000)
committer Nick Burch <nick@apache.org>
Sat, 9 Aug 2008 14:50:16 +0000 (14:50 +0000)
diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml

index ac0604d774912af458b912d48e022cebf70a1cfb..f1dd05950fe9fbe0724a4fb6399d7cd5aae9293a 100644 (file)
--- a/src/documentation/content/xdocs/changes.xml
+++ b/src/documentation/content/xdocs/changes.xml
@@ -37,6 +37,8 @@
  
                 <!-- Don't forget to update status.xml too! -->
          <release version="3.5.1-beta2" date="2008-??-??">
+           <action dev="POI-DEVELOPERS" type="fix">Improve how XWPF handles paragraph text</action>
+           <action dev="POI-DEVELOPERS" type="add">Support in XWPF handles headers and footers</action>
             <action dev="POI-DEVELOPERS" type="add">45592 - Improve XWPF text extraction to include tables always, and picture text where possible</action>
             <action dev="POI-DEVELOPERS" type="add">45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text</action>
             <action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action>
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml

index 89114557f1b6184dba7f1ebbcefbef174d5ae979..60ad6c09218a65c584eca53fbffa9508ec18fc6c 100644 (file)
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -34,6 +34,8 @@
         <!-- Don't forget to update changes.xml too! -->
      <changes>
          <release version="3.5.1-beta2" date="2008-??-??">
+           <action dev="POI-DEVELOPERS" type="fix">Improve how XWPF handles paragraph text</action>
+           <action dev="POI-DEVELOPERS" type="add">Support in XWPF handles headers and footers</action>
             <action dev="POI-DEVELOPERS" type="add">45592 - Improve XWPF text extraction to include tables always, and picture text where possible</action>
             <action dev="POI-DEVELOPERS" type="add">45545 - Improve XSLF usermodel support, and include XSLF comments in extracted text</action>
             <action dev="POI-DEVELOPERS" type="add">45540 - Fix XSSF header and footer support, and include headers and footers in the output of XSSFExcelExtractor</action>
diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java

index 7150014e27e3e9e4e2b5d56ef192a5be55ac3cd0..36de2291936c97f2db476ae45118d8bf76d2ee86 100644 (file)
--- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java
@@ -39,7 +39,8 @@ public abstract class XWPFHeaderFooter {
          * Returns the paragraph(s) that holds
          *  the text of the header or footer.
          * Normally there is only the one paragraph, but
-        *  there could be more in certain cases.
+        *  there could be more in certain cases, or 
+        *  a table.
          */
         public XWPFParagraph[] getParagraphs() {
                 XWPFParagraph[] paras = 
@@ -51,6 +52,24 @@ public abstract class XWPFHeaderFooter {
                 }
                 return paras;
         }
+       /**
+        * Return the table(s) that holds the text
+        *  of the header or footer, for complex cases
+        *  where a paragraph isn't used.
+        * Normally there's just one paragraph, but some
+        *  complex headers/footers have a table or two
+        *  in addition. 
+        */
+       public XWPFTable[] getTables() {
+               XWPFTable[] tables = 
+                       new XWPFTable[headerFooter.getTblArray().length];
+               for(int i=0; i<tables.length; i++) {
+                       tables[i] = new XWPFTable(
+                                       headerFooter.getTblArray(i)
+                       );
+               }
+               return tables;
+       }
         
         /**
          * Returns the textual content of the header/footer,
@@ -58,11 +77,21 @@ public abstract class XWPFHeaderFooter {
          */
         public String getText() {
                 StringBuffer t = new StringBuffer();
+               
                 XWPFParagraph[] paras = getParagraphs();
-               for (int i = 0; i < paras.length; i++) {
-                       t.append(paras[i].getText());
+               for(int i=0; i<paras.length; i++) {
+                       if(! paras[i].isEmpty()) {
+                               t.append(paras[i].getText());
+                               t.append('\n');
+                       }
+               }
+               
+               XWPFTable[] tables = getTables();
+               for(int i=0; i<tables.length; i++) {
+                       t.append(tables[i].getText());
                         t.append('\n');
                 }
+               
                 return t.toString(); 
         }
  }
diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java

index e42ec0186d401cf54b1d21f67686c7d29733c191..c56aef00d4fe2ff084fbfb9aed4122c5c07700ab 100644 (file)
--- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java
@@ -16,6 +16,8 @@
  ==================================================================== */
  package org.apache.poi.xwpf.usermodel;
  
+import java.util.ArrayList;
+
  import org.apache.poi.xwpf.XWPFDocument;
  import org.apache.poi.xwpf.model.XMLParagraph;
  import org.apache.xmlbeans.XmlCursor;
@@ -24,6 +26,10 @@ import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
  import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPTab;
  import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPicture;
  import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTRPr;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtBlock;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentRun;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtRun;
  import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
  import org.w3c.dom.NodeList;
  import org.w3c.dom.Text;
@@ -43,15 +49,37 @@ public class XWPFParagraph extends XMLParagraph
      public XWPFParagraph(CTP prgrph, XWPFDocument docRef)
      {
          super(prgrph);
+        this.docRef = docRef;
+        
+        // All the runs to loop over
+        // TODO - replace this with some sort of XPath expression
+        //  to directly find all the CTRs, in the right order
+        ArrayList<CTR> rs = new ArrayList<CTR>();
+        CTR[] tmp;
+        
+        // Get the main text runs
+        tmp = paragraph.getRArray();
+        for(int i=0; i<tmp.length; i++) {
+               rs.add(tmp[i]);
+        }
          
-        this.docRef = docRef; 
-        CTR[] rs = paragraph.getRArray();
+        // Not sure quite what these are, but they hold 
+        //  more text runs
+        CTSdtRun[] sdts = paragraph.getSdtArray();
+        for(int i=0; i<sdts.length; i++) {
+               CTSdtContentRun run = sdts[i].getSdtContent();
+               tmp = run.getRArray();
+            for(int j=0; j<tmp.length; j++) {
+               rs.add(tmp[j]);
+            }
+        }
      
+        
          // Get text of the paragraph
-        for (int j = 0; j < rs.length; j++) {
+        for (int j = 0; j < rs.size(); j++) {
              // Grab the text and tabs of the paragraph
                 // Do so in a way that preserves the ordering
-               XmlCursor c = rs[j].newCursor();
+               XmlCursor c = rs.get(j).newCursor();
                 c.selectPath( "./*" );
                 while(c.toNextSelection()) {
                         XmlObject o = c.getObject();
@@ -65,7 +93,7 @@ public class XWPFParagraph extends XMLParagraph
                 
              // Loop over pictures inside our
              //  paragraph, looking for text in them
-            CTPicture[] picts = rs[j].getPictArray();
+            CTPicture[] picts = rs.get(j).getPictArray();
              for (int k = 0; k < picts.length; k++) {
                  XmlObject[] t = picts[k].selectPath("declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' .//w:t");
                  for (int m = 0; m < t.length; m++) {
diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java

index 3f69f41737e6706e7e439c1ec702f90150e8f04f..aa647503b9a5c2893cd5ad0b2a3346e6e5a5e6f9 100644 (file)
--- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java
@@ -32,24 +32,26 @@ public class XWPFTable
  {
      protected StringBuffer text=new StringBuffer(); 
      
-    public XWPFTable(CTTbl table)
-    {
-        for(CTRow row : table.getTrArray())
-        {
-            for(CTTc cell : row.getTcArray())
-            {
-                for(CTP ctp : cell.getPArray())
-                {
+    public XWPFTable(CTTbl table) {
+        for(CTRow row : table.getTrArray()) {
+               StringBuffer rowText = new StringBuffer();
+            for(CTTc cell : row.getTcArray()) {
+                for(CTP ctp : cell.getPArray()) {
                      XWPFParagraph p = new XWPFParagraph(ctp);
-                    this.text.append(p.getText()+"\t");
+                    if(rowText.length() > 0) {
+                       rowText.append('\t');
+                    }
+                    rowText.append(p.getText());
                  }
              }
-            this.text.append("\n");
+            if(rowText.length() > 0) {
+               this.text.append(rowText);
+               this.text.append('\n');
+            }
          }
      }
      
-    public String getText()
-    {
+    public String getText() {
          return text.toString();
      }
  }
diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java

index b61af2f06c3f01d29e26608a6e88698565e055af..1b26bb58ac3a066e896ed10faf4b9e86ddeb1416 100644 (file)
--- a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
+++ b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
@@ -111,7 +111,7 @@ public class TestXWPFWordExtractor extends TestCase {
                 assertTrue(text.length() > 0);
                 
                 char euro = '\u20ac';
-//             System.err.println("'"+text.substring(text.length() - 20) + "'");
+//             System.err.println("'"+text.substring(text.length() - 40) + "'");
                 
                 // Check contents
                 assertTrue(text.startsWith(
@@ -121,7 +121,7 @@ public class TestXWPFWordExtractor extends TestCase {
                                 "As well as gaining "+euro+"90 from child benefit increases, he will also receive the early childhood supplement of "+euro+"250 per quarter for Vincent for the full four quarters of the year.\n\n\n\n \n\n\n"
                 ));
                 assertTrue(text.endsWith(
-                               "11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\t\n\n"
+                               "11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\n\n"
                 ));
                 
                 // Check number of paragraphs
diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java b/src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java

index b1f6971652aa8348bcb1480281847f36492a4b22..b2269c290842323f2fd5a89ac37ada9b3c718fc0 100644 (file)
--- a/src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java
+++ b/src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java
@@ -165,7 +165,7 @@ public class TestXWPFHeaderFooterPolicy extends TestCase {
         public void testContents() throws Exception {
                 XWPFHeaderFooterPolicy policy;
                 
-               // Just test a few bits
+               // Test a few simple bits off a simple header
                 policy = diffFirst.getHeaderFooterPolicy();
                 
                 assertEquals(
@@ -176,5 +176,18 @@ public class TestXWPFHeaderFooterPolicy extends TestCase {
                                 "First header column!\tMid header\tRight header!\n", 
                                 policy.getDefaultHeader().getText()
                 );
+               
+               
+               // And a few bits off a more complex header
+               policy = oddEven.getHeaderFooterPolicy();
+               
+               assertEquals(
+                       "\n[]ODD Page Header text\n\n",
+                       policy.getDefaultHeader().getText()
+               );
+               assertEquals(
+                               "\n[This is an Even Page, with a Header]\n\n", 
+                               policy.getEvenPageHeader().getText()
+               );
         }
  }
author	Nick Burch <nick@apache.org>
	Sat, 9 Aug 2008 14:50:16 +0000 (14:50 +0000)
committer	Nick Burch <nick@apache.org>
	Sat, 9 Aug 2008 14:50:16 +0000 (14:50 +0000)
src/documentation/content/xdocs/changes.xml		patch \| blob \| history
src/documentation/content/xdocs/status.xml		patch \| blob \| history
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java		patch \| blob \| history
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java		patch \| blob \| history
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java		patch \| blob \| history
src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java		patch \| blob \| history
src/ooxml/testcases/org/apache/poi/xwpf/model/TestXWPFHeaderFooterPolicy.java		patch \| blob \| history