Support for extraction of footnotes from docx files, see Bugzilla 45556

author Yegor Kozlov <yegor@apache.org>

Sat, 18 Jul 2009 09:09:59 +0000 (09:09 +0000)

committer Yegor Kozlov <yegor@apache.org>

Sat, 18 Jul 2009 09:09:59 +0000 (09:09 +0000)
author Yegor Kozlov <yegor@apache.org>
Sat, 18 Jul 2009 09:09:59 +0000 (09:09 +0000)
committer Yegor Kozlov <yegor@apache.org>
Sat, 18 Jul 2009 09:09:59 +0000 (09:09 +0000)
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml

index 415f5390ecf39b63329019942f332ffaeeb61d34..7e764b2c68b341fe935dde5865f895031aa5e996 100644 (file)
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -33,6 +33,8 @@
  
      <changes>
          <release version="3.5-beta7" date="2009-??-??">
+           <action dev="POI-DEVELOPERS" type="fix">45556 - Fixed ExtractorFactory to support .xltx and .dotx files</action>
+           <action dev="POI-DEVELOPERS" type="add">45556 - Support for extraction of footnotes from docx files</action>
             <action dev="POI-DEVELOPERS" type="add">47520 - Initial support for custom XML mappings in XSSF</action>
             <action dev="POI-DEVELOPERS" type="fix">47460 - Fixed NPE when retrieving core properties from a newly created workbook</action>
             <action dev="POI-DEVELOPERS" type="fix">47498 - Fixed HyperlinkRecord to properly handle URL monikers</action>
@@ -41,7 +43,7 @@
             <action dev="POI-DEVELOPERS" type="fix">47448 - Allow HSSFEventFactory to handle non-zero padding at the end of the workbook stream</action>
             <action dev="POI-DEVELOPERS" type="add">47456 - Support for getting OLE object data in PowerPointExtractor</action>
             <action dev="POI-DEVELOPERS" type="fix">47411 - Explicitly set the 1900 date system when creating XSSF workbooks</action>
-           <action dev="POI-DEVELOPERS" type="add">47400 - Support fo text extraction of footnotes, endnotes and comments in HWPF</action>
+           <action dev="POI-DEVELOPERS" type="add">47400 - Support for text extraction of footnotes, endnotes and comments in HWPF</action>
             <action dev="POI-DEVELOPERS" type="fix">47415 - Fixed PageSettingsBlock to allow multiple PLS records</action>
             <action dev="POI-DEVELOPERS" type="fix">47412 - Fixed concurrency issue with EscherProperties.initProps()</action>
             <action dev="POI-DEVELOPERS" type="fix">47143 - Fixed OOM in HSSFWorkbook#getAllPictures when reading .xls files containing metafiles</action>
diff --git a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRelation.java b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRelation.java

index c3b97c6c75012156dc4af1075d08cd75c3db48fa..702f549ba709ea57dc6fcbde4994d6a36f272c31 100644 (file)
--- a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRelation.java
+++ b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRelation.java
@@ -66,6 +66,24 @@ public final class XSSFRelation extends POIXMLRelation {
                         "/xl/workbook.xml",
                         null
         );
+    public static final XSSFRelation TEMPLATE_WORKBOOK = new XSSFRelation(
+              "application/vnd.openxmlformats-officedocument.spreadsheetml.template.main+xml",
+              "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
+              "/xl/workbook.xml",
+              null
+    );
+    public static final XSSFRelation MACRO_TEMPLATE_WORKBOOK = new XSSFRelation(
+              "application/vnd.ms-excel.template.macroEnabled.main+xml",
+              "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
+              "/xl/workbook.xml",
+              null
+    );
+    public static final XSSFRelation MACRO_ADDIN_WORKBOOK = new XSSFRelation(
+              "application/vnd.ms-excel.addin.macroEnabled.main+xml",
+              "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
+              "/xl/workbook.xml",
+              null
+    );
         public static final XSSFRelation WORKSHEET = new XSSFRelation(
                         "application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml",
                         "http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet",
diff --git a/src/ooxml/java/org/apache/poi/xwpf/model/XWPFHyperlinkDecorator.java b/src/ooxml/java/org/apache/poi/xwpf/model/XWPFHyperlinkDecorator.java

index 4a246063c973ff6dda0a97c3fc90eff569e3e05b..4be1e302bc7af0d3a0c4897cdc8f7eab23ef162c 100644 (file)
--- a/src/ooxml/java/org/apache/poi/xwpf/model/XWPFHyperlinkDecorator.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/model/XWPFHyperlinkDecorator.java
@@ -19,7 +19,7 @@ package org.apache.poi.xwpf.model;
  import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHyperlink;
  import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
  import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
-import org.apache.poi.xwpf.usermodel.XWPFParagraph;;
+import org.apache.poi.xwpf.usermodel.XWPFParagraph;
  
  /**
   * Decorator class for XWPFParagraph allowing to add hyperlinks 
diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java

index 63ba925f8d9fbbf88f9ab121c98a04f6668d5fec..2e86e79c7de1b93980846680ef23f2b787bb6f95 100644 (file)
--- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java
@@ -30,15 +30,7 @@ import org.apache.xmlbeans.XmlOptions;
  import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
  import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
  import org.apache.poi.openxml4j.opc.*;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTComment;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDocument1;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTStyles;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CommentsDocument;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.StylesDocument;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
  
  import javax.xml.namespace.QName;
  
@@ -60,6 +52,7 @@ public class XWPFDocument extends POIXMLDocument {
      protected List<XWPFHyperlink> hyperlinks;
      protected List<XWPFParagraph> paragraphs;
      protected List<XWPFTable> tables;
+    protected Map<Integer, XWPFFootnote> footnotes;
  
      /** Handles the joy of different headers/footers for different pages */
      private XWPFHeaderFooterPolicy headerFooterPolicy;
@@ -87,6 +80,7 @@ public class XWPFDocument extends POIXMLDocument {
          comments = new ArrayList<XWPFComment>();
          paragraphs = new ArrayList<XWPFParagraph>();
          tables= new ArrayList<XWPFTable>();
+        footnotes = new HashMap<Integer, XWPFFootnote>();
  
          try {
              DocumentDocument doc = DocumentDocument.Factory.parse(getPackagePart().getInputStream());
@@ -94,6 +88,8 @@ public class XWPFDocument extends POIXMLDocument {
  
              CTBody body = ctDocument.getBody();
  
+            initFootnotes();
+
              // filling paragraph list
              for (CTP p : body.getPArray())     {
                  paragraphs.add(new XWPFParagraph(p, this));
@@ -101,7 +97,7 @@ public class XWPFDocument extends POIXMLDocument {
  
              // Get any tables
              for(CTTbl table : body.getTblArray()) {
-                tables.add(new XWPFTable(table));
+                tables.add(new XWPFTable(this, table));
              }
  
              // Sort out headers and footers
@@ -118,7 +114,6 @@ public class XWPFDocument extends POIXMLDocument {
              }
  
              initHyperlinks();
-
          } catch (XmlException e) {
              throw new POIXMLException(e);
          }
@@ -139,6 +134,19 @@ public class XWPFDocument extends POIXMLDocument {
          }
      }
  
+    private void initFootnotes() throws XmlException, IOException {
+        for(POIXMLDocumentPart p : getRelations()){
+            String relation = p.getPackageRelationship().getRelationshipType();
+            if(relation.equals(XWPFRelation.FOOTNOTE.getRelation())){
+                FootnotesDocument footnotesDocument = FootnotesDocument.Factory.parse(p.getPackagePart().getInputStream());
+
+                for(CTFtnEdn ctFtnEdn : footnotesDocument.getFootnotes().getFootnoteArray()) {
+                    footnotes.put(ctFtnEdn.getId().intValue(), new XWPFFootnote(this, ctFtnEdn));
+                }
+            }
+        }
+    }
+
      /**
       * Create a new SpreadsheetML package and setup the default minimal content
       */
@@ -205,6 +213,15 @@ public class XWPFDocument extends POIXMLDocument {
  
          return null;
      }
+
+    public XWPFFootnote getFootnoteByID(int id) {
+        return footnotes.get(id);
+    }
+
+    public Collection<XWPFFootnote> getFootnotes() {
+        return footnotes == null ? new ArrayList<XWPFFootnote>() : footnotes.values();
+    }
+
      public XWPFHyperlink[] getHyperlinks() {
          return hyperlinks.toArray(
                  new XWPFHyperlink[hyperlinks.size()]
@@ -323,7 +340,7 @@ public class XWPFDocument extends POIXMLDocument {
       * @return a new table
       */
      public XWPFTable createTable(){
-        return new XWPFTable(ctDocument.getBody().addNewTbl());
+        return new XWPFTable(this, ctDocument.getBody().addNewTbl());
      }
      
      /**
@@ -333,7 +350,7 @@ public class XWPFDocument extends POIXMLDocument {
       * @return table
       */
      public XWPFTable createTable(int rows, int cols) {
-       return new XWPFTable(ctDocument.getBody().addNewTbl(), rows, cols);
+       return new XWPFTable(this, ctDocument.getBody().addNewTbl(), rows, cols);
      }
  }
  
diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFFootnote.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFFootnote.java

new file mode 100755 (executable)

index 0000000..8ee834d
--- /dev/null
+++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFFootnote.java
@@ -0,0 +1,43 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xwpf.usermodel;
+
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTFtnEdn;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
+
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Iterator;
+
+public class XWPFFootnote implements Iterable<XWPFParagraph> {
+    private List<XWPFParagraph> paragraphs = new ArrayList<XWPFParagraph>();
+
+    public XWPFFootnote(XWPFDocument document, CTFtnEdn body) {
+        for (CTP p : body.getPArray()) {
+            paragraphs.add(new XWPFParagraph(p, document));
+        }
+    }
+
+    public List<XWPFParagraph> getParagraphs() {
+        return paragraphs;
+    }
+
+    public Iterator<XWPFParagraph> iterator(){
+        return paragraphs.iterator();
+    }
+
+}
diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java

index 22ca339ab8a6de8ce47d696bc33849215e12574e..3c84bf228cadbff5118547117a57fba07ee4c0a1 100644 (file)
--- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java
@@ -65,7 +65,8 @@ public abstract class XWPFHeaderFooter {
                         new XWPFTable[headerFooter.getTblArray().length];
                 for(int i=0; i<tables.length; i++) {
                         tables[i] = new XWPFTable(
-                                       headerFooter.getTblArray(i)
+                    null,
+                    headerFooter.getTblArray(i)
                         );
                 }
                 return tables;
diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java

index 297120d22862670fd7fff4d42277c4de853775c6..1ddda9d4fc50d8f68296a0328564e1209f01f887 100644 (file)
--- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java
@@ -21,26 +21,7 @@ import java.util.ArrayList;
  
  import org.apache.xmlbeans.XmlCursor;
  import org.apache.xmlbeans.XmlObject;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBorder;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTInd;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTJc;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTOnOff;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPBdr;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPPr;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPTab;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPicture;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentRun;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtRun;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSpacing;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTextAlignment;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.STBorder;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.STJc;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.STLineSpacingRule;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.STOnOff;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.STTextAlignment;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
  import org.w3c.dom.NodeList;
  import org.w3c.dom.Text;
  
@@ -58,6 +39,7 @@ public class XWPFParagraph {
       */
      private StringBuffer text = new StringBuffer();
      private StringBuffer pictureText = new StringBuffer();
+    private StringBuffer footnoteText = new StringBuffer();
  
  
      protected XWPFParagraph(CTP prgrph) {
@@ -66,79 +48,96 @@ public class XWPFParagraph {
  
  
      protected XWPFParagraph(CTP prgrph, XWPFDocument docRef) {
-       this.paragraph = prgrph;
-       this.document = docRef;
-
-       if (!isEmpty()) {
-           // All the runs to loop over
-           // TODO - replace this with some sort of XPath expression
-           // to directly find all the CTRs, in the right order
-           ArrayList<CTR> rs = new ArrayList<CTR>();
-           CTR[] tmp;
-
-           // Get the main text runs
-           tmp = paragraph.getRArray();
-           for (int i = 0; i < tmp.length; i++) {
-               rs.add(tmp[i]);
-           }
-
-           // Not sure quite what these are, but they hold
-           // more text runs
-           CTSdtRun[] sdts = paragraph.getSdtArray();
-           for (int i = 0; i < sdts.length; i++) {
-               CTSdtContentRun run = sdts[i].getSdtContent();
-               tmp = run.getRArray();
-               for (int j = 0; j < tmp.length; j++) {
-                   rs.add(tmp[j]);
-               }
-           }
-
-           // Get text of the paragraph
-           for (int j = 0; j < rs.size(); j++) {
-               // Grab the text and tabs of the paragraph
-               // Do so in a way that preserves the ordering
-               XmlCursor c = rs.get(j).newCursor();
-               c.selectPath("./*");
-               while (c.toNextSelection()) {
-                   XmlObject o = c.getObject();
-                   if (o instanceof CTText) {
-                       text.append(((CTText) o).getStringValue());
-                   }
-                   if (o instanceof CTPTab) {
-                       text.append("\t");
-                   }
-               }
-
-               // Loop over pictures inside our
-               // paragraph, looking for text in them
-               CTPicture[] picts = rs.get(j).getPictArray();
-               for (int k = 0; k < picts.length; k++) {
-                   XmlObject[] t = picts[k]
-                                         .selectPath("declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' .//w:t");
-                   for (int m = 0; m < t.length; m++) {
-                       NodeList kids = t[m].getDomNode().getChildNodes();
-                       for (int n = 0; n < kids.getLength(); n++) {
-                           if (kids.item(n) instanceof Text) {
-                               pictureText.append("\n");
-                               pictureText.append(kids.item(n).getNodeValue());
-                           }
-                       }
-                   }
-               }
-           }
-       }
+        this.paragraph = prgrph;
+        this.document = docRef;
+
+        if (!isEmpty()) {
+            // All the runs to loop over
+            // TODO - replace this with some sort of XPath expression
+            // to directly find all the CTRs, in the right order
+            ArrayList<CTR> rs = new ArrayList<CTR>();
+            CTR[] tmp;
+
+            // Get the main text runs
+            tmp = paragraph.getRArray();
+            for (int i = 0; i < tmp.length; i++) {
+                rs.add(tmp[i]);
+            }
+
+            // Not sure quite what these are, but they hold
+            // more text runs
+            CTSdtRun[] sdts = paragraph.getSdtArray();
+            for (int i = 0; i < sdts.length; i++) {
+                CTSdtContentRun run = sdts[i].getSdtContent();
+                tmp = run.getRArray();
+                for (int j = 0; j < tmp.length; j++) {
+                    rs.add(tmp[j]);
+                }
+            }
+
+            // Get text of the paragraph
+            for (int j = 0; j < rs.size(); j++) {
+                // Grab the text and tabs of the paragraph
+                // Do so in a way that preserves the ordering
+                XmlCursor c = rs.get(j).newCursor();
+                c.selectPath("./*");
+                while (c.toNextSelection()) {
+                    XmlObject o = c.getObject();
+                    if (o instanceof CTText) {
+                        text.append(((CTText) o).getStringValue());
+                    }
+                    if (o instanceof CTPTab) {
+                        text.append("\t");
+                    }
+                    //got a reference to a footnote
+                    if (o instanceof CTFtnEdnRef) {
+                        CTFtnEdnRef ftn = (CTFtnEdnRef) o;
+                        footnoteText.append("[").append(ftn.getId()).append(": ");
+                        XWPFFootnote footnote = document.getFootnoteByID(ftn.getId().intValue());
+
+                        boolean first = true;
+                        for (XWPFParagraph p : footnote.getParagraphs()) {
+                            if (!first) {
+                                footnoteText.append("\n");
+                                first = false;
+                            }
+                            footnoteText.append(p.getText());
+                        }
+
+                        footnoteText.append("]");
+                    }
+                }
+
+                // Loop over pictures inside our
+                // paragraph, looking for text in them
+                CTPicture[] picts = rs.get(j).getPictArray();
+                for (int k = 0; k < picts.length; k++) {
+                    XmlObject[] t = picts[k]
+                            .selectPath("declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' .//w:t");
+                    for (int m = 0; m < t.length; m++) {
+                        NodeList kids = t[m].getDomNode().getChildNodes();
+                        for (int n = 0; n < kids.getLength(); n++) {
+                            if (kids.item(n) instanceof Text) {
+                                pictureText.append("\n");
+                                pictureText.append(kids.item(n).getNodeValue());
+                            }
+                        }
+                    }
+                }
+            }
+        }
      }
  
      public CTP getCTP() {
-       return paragraph;
+        return paragraph;
      }
  
      public boolean isEmpty() {
-       return !paragraph.getDomNode().hasChildNodes();
+        return !paragraph.getDomNode().hasChildNodes();
      }
  
      public XWPFDocument getDocument() {
-       return document;
+        return document;
      }
  
      /**
@@ -146,7 +145,9 @@ public class XWPFParagraph {
       * in it.
       */
      public String getText() {
-       return getParagraphText() + getPictureText();
+        StringBuffer out = new StringBuffer();
+        out.append(text).append(footnoteText).append(pictureText);
+        return out.toString();
      }
  
      /**
@@ -154,14 +155,23 @@ public class XWPFParagraph {
       * paragraph
       */
      public String getParagraphText() {
-       return text.toString();
+        return text.toString();
      }
  
      /**
       * Returns any text from any suitable pictures in the paragraph
       */
      public String getPictureText() {
-       return pictureText.toString();
+        return pictureText.toString();
+    }
+
+    /**
+     * Returns the footnote text of the paragraph
+     *
+     * @return  the footnote text or empty string if the paragraph does not have footnotes
+     */
+    public String getFootnoteText() {
+        return footnoteText.toString();
      }
  
      /**
@@ -170,7 +180,7 @@ public class XWPFParagraph {
       * @return a new text run
       */
      public XWPFRun createRun() {
-       return new XWPFRun(paragraph.addNewR(), this);
+        return new XWPFRun(paragraph.addNewR(), this);
      }
  
      /**
@@ -350,12 +360,12 @@ public class XWPFParagraph {
       * @see Borders a list of all types of borders
       */
      public void setBorderBottom(Borders border) {
-       CTPBdr ct = getCTPBrd(true);
-       CTBorder pr = ct.isSetBottom() ? ct.getBottom() : ct.addNewBottom();
-       if (border.getValue() == Borders.NONE.getValue())
-           ct.unsetBottom();
-       else
-           pr.setVal(STBorder.Enum.forInt(border.getValue()));
+        CTPBdr ct = getCTPBrd(true);
+        CTBorder pr = ct.isSetBottom() ? ct.getBottom() : ct.addNewBottom();
+        if (border.getValue() == Borders.NONE.getValue())
+            ct.unsetBottom();
+        else
+            pr.setVal(STBorder.Enum.forInt(border.getValue()));
      }
  
      /**
@@ -367,13 +377,13 @@ public class XWPFParagraph {
       * @see Borders a list of all types of borders
       */
      public Borders getBorderBottom() {
-       CTPBdr border = getCTPBrd(false);
-       CTBorder ct = null;
-       if (border != null) {
-           ct = border.getBottom();
-       }
-       STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE;
-       return Borders.valueOf(ptrn.intValue());
+        CTPBdr border = getCTPBrd(false);
+        CTBorder ct = null;
+        if (border != null) {
+            ct = border.getBottom();
+        }
+        STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE;
+        return Borders.valueOf(ptrn.intValue());
      }
  
      /**
@@ -399,12 +409,12 @@ public class XWPFParagraph {
       * @see Borders for a list of all possible borders
       */
      public void setBorderLeft(Borders border) {
-       CTPBdr ct = getCTPBrd(true);
-       CTBorder pr = ct.isSetLeft() ? ct.getLeft() : ct.addNewLeft();
-       if (border.getValue() == Borders.NONE.getValue())
-           ct.unsetLeft();
-       else
-           pr.setVal(STBorder.Enum.forInt(border.getValue()));
+        CTPBdr ct = getCTPBrd(true);
+        CTBorder pr = ct.isSetLeft() ? ct.getLeft() : ct.addNewLeft();
+        if (border.getValue() == Borders.NONE.getValue())
+            ct.unsetLeft();
+        else
+            pr.setVal(STBorder.Enum.forInt(border.getValue()));
      }
  
      /**
@@ -416,13 +426,13 @@ public class XWPFParagraph {
       * @see Borders for a list of all possible borders
       */
      public Borders getBorderLeft() {
-       CTPBdr border = getCTPBrd(false);
-       CTBorder ct = null;
-       if (border != null) {
-           ct = border.getLeft();
-       }
-       STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE;
-       return Borders.valueOf(ptrn.intValue());
+        CTPBdr border = getCTPBrd(false);
+        CTBorder ct = null;
+        if (border != null) {
+            ct = border.getLeft();
+        }
+        STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE;
+        return Borders.valueOf(ptrn.intValue());
      }
  
      /**
@@ -448,12 +458,12 @@ public class XWPFParagraph {
       * @see Borders for a list of all possible borders
       */
      public void setBorderRight(Borders border) {
-       CTPBdr ct = getCTPBrd(true);
-       CTBorder pr = ct.isSetRight() ? ct.getRight() : ct.addNewRight();
-       if (border.getValue() == Borders.NONE.getValue())
-           ct.unsetRight();
-       else
-           pr.setVal(STBorder.Enum.forInt(border.getValue()));
+        CTPBdr ct = getCTPBrd(true);
+        CTBorder pr = ct.isSetRight() ? ct.getRight() : ct.addNewRight();
+        if (border.getValue() == Borders.NONE.getValue())
+            ct.unsetRight();
+        else
+            pr.setVal(STBorder.Enum.forInt(border.getValue()));
      }
  
      /**
@@ -465,13 +475,13 @@ public class XWPFParagraph {
       * @see Borders for a list of all possible borders
       */
      public Borders getBorderRight() {
-       CTPBdr border = getCTPBrd(false);
-       CTBorder ct = null;
-       if (border != null) {
-           ct = border.getRight();
-       }
-       STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE;
-       return Borders.valueOf(ptrn.intValue());
+        CTPBdr border = getCTPBrd(false);
+        CTBorder ct = null;
+        if (border != null) {
+            ct = border.getRight();
+        }
+        STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE;
+        return Borders.valueOf(ptrn.intValue());
      }
  
      /**
@@ -501,12 +511,12 @@ public class XWPFParagraph {
       * @see Borders for a list of all possible borders
       */
      public void setBorderBetween(Borders border) {
-       CTPBdr ct = getCTPBrd(true);
-       CTBorder pr = ct.isSetBetween() ? ct.getBetween() : ct.addNewBetween();
-       if (border.getValue() == Borders.NONE.getValue())
-           ct.unsetBetween();
-       else
-           pr.setVal(STBorder.Enum.forInt(border.getValue()));
+        CTPBdr ct = getCTPBrd(true);
+        CTBorder pr = ct.isSetBetween() ? ct.getBetween() : ct.addNewBetween();
+        if (border.getValue() == Borders.NONE.getValue())
+            ct.unsetBetween();
+        else
+            pr.setVal(STBorder.Enum.forInt(border.getValue()));
      }
  
      /**
@@ -518,13 +528,13 @@ public class XWPFParagraph {
       * @see Borders for a list of all possible borders
       */
      public Borders getBorderBetween() {
-       CTPBdr border = getCTPBrd(false);
-       CTBorder ct = null;
-       if (border != null) {
-           ct = border.getBetween();
-       }
-       STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE;
-       return Borders.valueOf(ptrn.intValue());
+        CTPBdr border = getCTPBrd(false);
+        CTBorder ct = null;
+        if (border != null) {
+            ct = border.getBetween();
+        }
+        STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE;
+        return Borders.valueOf(ptrn.intValue());
      }
  
      /**
@@ -544,13 +554,13 @@ public class XWPFParagraph {
       *                  boolean value
       */
      public void setPageBreak(boolean pageBreak) {
-       CTPPr ppr = getCTPPr();
-       CTOnOff ct_pageBreak = ppr.isSetPageBreakBefore() ? ppr
-               .getPageBreakBefore() : ppr.addNewPageBreakBefore();
-               if (pageBreak)
-                   ct_pageBreak.setVal(STOnOff.TRUE);
-               else
-                   ct_pageBreak.setVal(STOnOff.FALSE);
+        CTPPr ppr = getCTPPr();
+        CTOnOff ct_pageBreak = ppr.isSetPageBreakBefore() ? ppr
+                .getPageBreakBefore() : ppr.addNewPageBreakBefore();
+        if (pageBreak)
+            ct_pageBreak.setVal(STOnOff.TRUE);
+        else
+            ct_pageBreak.setVal(STOnOff.FALSE);
      }
  
      /**
@@ -569,14 +579,14 @@ public class XWPFParagraph {
       * @return boolean - if page break is set
       */
      public boolean isPageBreak() {
-       CTPPr ppr = getCTPPr();
-       CTOnOff ct_pageBreak = ppr.isSetPageBreakBefore() ? ppr
-               .getPageBreakBefore() : null;
-               if (ct_pageBreak != null
-                       && ct_pageBreak.getVal().intValue() == STOnOff.INT_TRUE)
-                   return true;
-               else
-                   return false;
+        CTPPr ppr = getCTPPr();
+        CTOnOff ct_pageBreak = ppr.isSetPageBreakBefore() ? ppr
+                .getPageBreakBefore() : null;
+        if (ct_pageBreak != null
+                && ct_pageBreak.getVal().intValue() == STOnOff.INT_TRUE)
+            return true;
+        else
+            return false;
      }
  
      /**
@@ -640,7 +650,7 @@ public class XWPFParagraph {
       * paragraph in the document in absolute units.
       *
       * @return bigInteger - value representing the spacing after the paragraph
-     * @see #setSpacingAfterLines(int) 
+     * @see #setSpacingAfterLines(int)
       */
      public int getSpacingAfterLines() {
          CTSpacing spacing = getCTSpacing(false);
@@ -902,12 +912,12 @@ public class XWPFParagraph {
       * @param wrap - boolean
       */
      public void setWordWrap(boolean wrap) {
-       CTOnOff wordWrap = getCTPPr().isSetWordWrap() ? getCTPPr()
-               .getWordWrap() : getCTPPr().addNewWordWrap();
-               if (wrap)
-                   wordWrap.setVal(STOnOff.TRUE);
-               else
-                   wordWrap.unsetVal();
+        CTOnOff wordWrap = getCTPPr().isSetWordWrap() ? getCTPPr()
+                .getWordWrap() : getCTPPr().addNewWordWrap();
+        if (wrap)
+            wordWrap.setVal(STOnOff.TRUE);
+        else
+            wordWrap.unsetVal();
      }
  
      /**
@@ -919,14 +929,14 @@ public class XWPFParagraph {
       * @return boolean
       */
      public boolean isWordWrap() {
-       CTOnOff wordWrap = getCTPPr().isSetWordWrap() ? getCTPPr()
-               .getWordWrap() : null;
-               if (wordWrap != null) {
-                   return (wordWrap.getVal() == STOnOff.ON
-                           || wordWrap.getVal() == STOnOff.TRUE || wordWrap.getVal() == STOnOff.X_1) ? true
-                                   : false;
-               } else
-                   return false;
+        CTOnOff wordWrap = getCTPPr().isSetWordWrap() ? getCTPPr()
+                .getWordWrap() : null;
+        if (wordWrap != null) {
+            return (wordWrap.getVal() == STOnOff.ON
+                    || wordWrap.getVal() == STOnOff.TRUE || wordWrap.getVal() == STOnOff.X_1) ? true
+                    : false;
+        } else
+            return false;
      }
  
      /**
diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRelation.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRelation.java

index 5532ee299fc9ce47ac506fd543d08118b9a9a59f..c1b91bb07a17a5a55743e39056e3be66f35749f5 100755 (executable)
--- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRelation.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRelation.java
@@ -40,6 +40,24 @@ public final class XWPFRelation extends POIXMLRelation {
              "/word/document.xml",
              null
      );
+    public static final XWPFRelation TEMPLATE = new XWPFRelation(
+          "application/vnd.openxmlformats-officedocument.wordprocessingml.template.main+xml",
+          "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
+          "/word/document.xml",
+          null
+    );
+    public static final XWPFRelation MACRO_DOCUMENT = new XWPFRelation(
+            "application/vnd.ms-word.document.macroEnabled.main+xml",
+            "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
+            "/word/document.xml",
+            null
+    );
+    public static final XWPFRelation MACRO_TEMPLATE_DOCUMENT = new XWPFRelation(
+            "application/vnd.ms-word.template.macroEnabledTemplate.main+xml",
+            "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
+            "/word/document.xml",
+            null
+    );
      public static final XWPFRelation FONT_TABLE = new XWPFRelation(
              "application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml",
              "http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable",
@@ -88,6 +106,12 @@ public final class XWPFRelation extends POIXMLRelation {
              null,
              null
      );
+    public static final XWPFRelation FOOTNOTE = new XWPFRelation(
+            null,
+            "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes",
+            null,
+            null
+    );
  
  
      private XWPFRelation(String type, String rel, String defaultName, Class<? extends POIXMLDocumentPart> cls) {
diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java

index 59a84976640ad943a25bc0a1eaae1b684ae5cf14..17d0465f585ab37e6e817e67c825003f6b47af7d 100644 (file)
--- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java
+++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java
@@ -42,8 +42,8 @@ public class XWPFTable {
      private CTTbl ctTbl;
  
  
-    public XWPFTable(CTTbl table, int row, int col) {
-        this(table);
+    public XWPFTable(XWPFDocument doc, CTTbl table, int row, int col) {
+        this(doc, table);
          for (int i = 0; i < row; i++) {
              XWPFTableRow tabRow = (getRow(i) == null) ? createRow() : getRow(i);
              for (int k = 0; k < col; k++) {
@@ -54,7 +54,7 @@ public class XWPFTable {
      }
  
  
-    public XWPFTable(CTTbl table) {
+    public XWPFTable(XWPFDocument doc, CTTbl table) {
          this.ctTbl = table;
  
          // is an empty table: I add one row and one column as default
@@ -65,7 +65,7 @@ public class XWPFTable {
              StringBuffer rowText = new StringBuffer();
              for (CTTc cell : row.getTcArray()) {
                  for (CTP ctp : cell.getPArray()) {
-                    XWPFParagraph p = new XWPFParagraph(ctp, null);
+                    XWPFParagraph p = new XWPFParagraph(ctp, doc);
                      if (rowText.length() > 0) {
                          rowText.append('\t');
                      }
diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java

index 2704e0371e3dba9f910aeef81f7dc38d03a0833d..1527e562b7519a2191edffded639db6299f7a91c 100644 (file)
--- a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
+++ b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
@@ -17,6 +17,7 @@
  package org.apache.poi.xwpf.extractor;
  
  import java.io.File;
+import java.io.IOException;
  
  import org.apache.poi.POIXMLDocument;
  import org.apache.poi.xwpf.usermodel.XWPFDocument;
@@ -27,202 +28,176 @@ import junit.framework.TestCase;
   * Tests for HXFWordExtractor
   */
  public class TestXWPFWordExtractor extends TestCase {
-       /**
-        * A very simple file
-        */
-       private XWPFDocument xmlA;
-       private File fileA;
-       /**
-        * A fairly complex file
-        */
-       private XWPFDocument xmlB;
-       private File fileB;
-       /**
-        * With a simplish header+footer
-        */
-       private XWPFDocument xmlC;
-       private File fileC;
-       /**
-        * With different header+footer on first/rest
-        */
-       private XWPFDocument xmlD;
-       private File fileD;
-       
-       /**
-        * File with hyperlinks
-        */
-       private XWPFDocument xmlE;
-       private File fileE;
-
-       protected void setUp() throws Exception {
-               super.setUp();
-               
-               fileA = new File(
-                               System.getProperty("HWPF.testdata.path") +
-                               File.separator + "sample.docx"
-               );
-               fileB = new File(
-                               System.getProperty("HWPF.testdata.path") +
-                               File.separator + "IllustrativeCases.docx"
-               );
-               fileC = new File(
-                               System.getProperty("HWPF.testdata.path") +
-                               File.separator + "ThreeColHeadFoot.docx"
-               );
-               fileD = new File(
-                               System.getProperty("HWPF.testdata.path") +
-                               File.separator + "DiffFirstPageHeadFoot.docx"
-               );
-               fileE = new File(
-                               System.getProperty("HWPF.testdata.path") +
-                               File.separator + "TestDocument.docx"
-               );
-               assertTrue(fileA.exists());
-               assertTrue(fileB.exists());
-               assertTrue(fileC.exists());
-               assertTrue(fileD.exists());
-               assertTrue(fileE.exists());
-               
-               xmlA = new XWPFDocument(POIXMLDocument.openPackage(fileA.toString()));
-               xmlB = new XWPFDocument(POIXMLDocument.openPackage(fileB.toString()));
-               xmlC = new XWPFDocument(POIXMLDocument.openPackage(fileC.toString()));
-               xmlD = new XWPFDocument(POIXMLDocument.openPackage(fileD.toString()));
-               xmlE = new XWPFDocument(POIXMLDocument.openPackage(fileE.toString()));
-       }
-
-       /**
-        * Get text out of the simple file
-        */
-       public void testGetSimpleText() throws Exception {
-               new XWPFWordExtractor(xmlA);
-               new XWPFWordExtractor(POIXMLDocument.openPackage(fileA.toString()));
-               
-               XWPFWordExtractor extractor = 
-                       new XWPFWordExtractor(xmlA);
-               extractor.getText();
-               
-               String text = extractor.getText();
-               assertTrue(text.length() > 0);
-               
-               // Check contents
-               assertTrue(text.startsWith(
-                               "Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Nunc at risus vel erat tempus posuere. Aenean non ante. Suspendisse vehicula dolor sit amet odio."
-               ));
-               assertTrue(text.endsWith(
-                               "Phasellus ultricies mi nec leo. Sed tempus. In sit amet lorem at velit faucibus vestibulum.\n"
-               ));
-               
-               // Check number of paragraphs
-               int ps = 0;
-               char[] t = text.toCharArray();
-               for (int i = 0; i < t.length; i++) {
-                       if(t[i] == '\n') { ps++; }
-               }
-               assertEquals(3, ps);
-       }
-       
-       /**
-        * Tests getting the text out of a complex file
-        */
-       public void testGetComplexText() throws Exception {
-               XWPFWordExtractor extractor = 
-                       new XWPFWordExtractor(xmlB);
-               extractor.getText();
-               
-               String text = extractor.getText();
-               assertTrue(text.length() > 0);
-               
-               char euro = '\u20ac';
+
+    /**
+     * Get text out of the simple file
+     */
+    public void testGetSimpleText() throws Exception {
+        XWPFDocument doc = open("sample.docx");
+        XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
+
+        String text = extractor.getText();
+        assertTrue(text.length() > 0);
+
+        // Check contents
+        assertTrue(text.startsWith(
+                "Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Nunc at risus vel erat tempus posuere. Aenean non ante. Suspendisse vehicula dolor sit amet odio."
+        ));
+        assertTrue(text.endsWith(
+                "Phasellus ultricies mi nec leo. Sed tempus. In sit amet lorem at velit faucibus vestibulum.\n"
+        ));
+
+        // Check number of paragraphs
+        int ps = 0;
+        char[] t = text.toCharArray();
+        for (int i = 0; i < t.length; i++) {
+            if (t[i] == '\n') {
+                ps++;
+            }
+        }
+        assertEquals(3, ps);
+    }
+
+    /**
+     * Tests getting the text out of a complex file
+     */
+    public void testGetComplexText() throws Exception {
+        XWPFDocument doc = open("IllustrativeCases.docx");
+        XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
+
+        String text = extractor.getText();
+        assertTrue(text.length() > 0);
+
+        char euro = '\u20ac';
  //             System.err.println("'"+text.substring(text.length() - 40) + "'");
-               
-               // Check contents
-               assertTrue(text.startsWith(
-                               "  \n(V) ILLUSTRATIVE CASES\n\n"
-               ));
-               assertTrue(text.contains(
-                               "As well as gaining "+euro+"90 from child benefit increases, he will also receive the early childhood supplement of "+euro+"250 per quarter for Vincent for the full four quarters of the year.\n\n\n\n \n\n\n"
-               ));
-               assertTrue(text.endsWith(
-                               "11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\n\n"
-               ));
-               
-               // Check number of paragraphs
-               int ps = 0;
-               char[] t = text.toCharArray();
-               for (int i = 0; i < t.length; i++) {
-                       if(t[i] == '\n') { ps++; }
-               }
-               assertEquals(103, ps);
-       }
-       
-       public void testGetWithHyperlinks() throws Exception {
-               XWPFWordExtractor extractor = 
-                       new XWPFWordExtractor(xmlE);
-               extractor.getText();
-               extractor.setFetchHyperlinks(true);
-               extractor.getText();
-
-               // Now check contents
-               // TODO - fix once correctly handling contents
-               extractor.setFetchHyperlinks(false);
-               assertEquals(
+
+        // Check contents
+        assertTrue(text.startsWith(
+                "  \n(V) ILLUSTRATIVE CASES\n\n"
+        ));
+        assertTrue(text.contains(
+                "As well as gaining " + euro + "90 from child benefit increases, he will also receive the early childhood supplement of " + euro + "250 per quarter for Vincent for the full four quarters of the year.\n\n\n\n \n\n\n"
+        ));
+        assertTrue(text.endsWith(
+                "11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\n\n"
+        ));
+
+        // Check number of paragraphs
+        int ps = 0;
+        char[] t = text.toCharArray();
+        for (int i = 0; i < t.length; i++) {
+            if (t[i] == '\n') {
+                ps++;
+            }
+        }
+        assertEquals(103, ps);
+    }
+
+    public void testGetWithHyperlinks() throws Exception {
+        XWPFDocument doc = open("TestDocument.docx");
+        XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
+
+        // Now check contents
+        // TODO - fix once correctly handling contents
+        extractor.setFetchHyperlinks(false);
+        assertEquals(
  //                             "This is a test document\nThis bit is in bold and italic\n" +
  //                             "Back to normal\nWe have a hyperlink here, and another.\n",
-                               "This is a test document\nThis bit is in bold and italic\n" +
-                               "Back to normal\nWe have a  here, and .hyperlinkanother\n",
-                               extractor.getText()
-               );
-               
-               extractor.setFetchHyperlinks(true);
-               assertEquals(
+                "This is a test document\nThis bit is in bold and italic\n" +
+                        "Back to normal\nWe have a  here, and .hyperlinkanother\n",
+                extractor.getText()
+        );
+
+        extractor.setFetchHyperlinks(true);
+        assertEquals(
  //                             "This is a test document\nThis bit is in bold and italic\n" +
  //                             "Back to normal\nWe have a hyperlink here, and another.\n",
-                               "This is a test document\nThis bit is in bold and italic\n" +
-                               "Back to normal\nWe have a  here, and .hyperlink <http://poi.apache.org/>another\n",
-                               extractor.getText()
-               );
-       }
-       
-       public void testHeadersFooters() throws Exception {
-               XWPFWordExtractor extractor = 
-                       new XWPFWordExtractor(xmlC);
-               extractor.getText();
-               
-               assertEquals(
-                               "First header column!\tMid header\tRight header!\n" +
-                               "This is a sample word document. It has two pages. It has a three column heading, and a three column footer\n" +
-                               "\n" +
-                               "HEADING TEXT\n" + 
-                               "\n" +
-                               "More on page one\n" + 
-                               "\n\n" + 
-                               "End of page 1\n\n" +
-                               "This is page two. It also has a three column heading, and a three column footer.\n" +
-                               "Footer Left\tFooter Middle\tFooter Right\n",
-                               extractor.getText()
-               );
-               
-               
-               // Now another file, expect multiple headers
-               //  and multiple footers
-               extractor = 
-                       new XWPFWordExtractor(xmlD);
-               extractor.getText();
-               
-               assertEquals(
-                               "I am the header on the first page, and I" + '\u2019' + "m nice and simple\n" +
-                               "First header column!\tMid header\tRight header!\n" +
-                               "This is a sample word document. It has two pages. It has a simple header and footer, which is different to all the other pages.\n" +
-                               "\n" +
-                               "HEADING TEXT\n" + 
-                               "\n" +
-                               "More on page one\n" + 
-                               "\n\n" + 
-                               "End of page 1\n\n" +
-                               "This is page two. It also has a three column heading, and a three column footer.\n" +
-                               "The footer of the first page\n" +
-                               "Footer Left\tFooter Middle\tFooter Right\n",
-                               extractor.getText()
-               );
-       }
+                "This is a test document\nThis bit is in bold and italic\n" +
+                        "Back to normal\nWe have a  here, and .hyperlink <http://poi.apache.org/>another\n",
+                extractor.getText()
+        );
+    }
+
+    public void testHeadersFooters() throws Exception {
+        XWPFDocument doc = open("ThreeColHeadFoot.docx");
+        XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
+
+        assertEquals(
+                "First header column!\tMid header\tRight header!\n" +
+                        "This is a sample word document. It has two pages. It has a three column heading, and a three column footer\n" +
+                        "\n" +
+                        "HEADING TEXT\n" +
+                        "\n" +
+                        "More on page one\n" +
+                        "\n\n" +
+                        "End of page 1\n\n" +
+                        "This is page two. It also has a three column heading, and a three column footer.\n" +
+                        "Footer Left\tFooter Middle\tFooter Right\n",
+                extractor.getText()
+        );
+
+        // Now another file, expect multiple headers
+        //  and multiple footers
+        doc = open("DiffFirstPageHeadFoot.docx");
+        extractor = new XWPFWordExtractor(doc);
+        extractor =
+                new XWPFWordExtractor(doc);
+        extractor.getText();
+
+        assertEquals(
+                "I am the header on the first page, and I" + '\u2019' + "m nice and simple\n" +
+                        "First header column!\tMid header\tRight header!\n" +
+                        "This is a sample word document. It has two pages. It has a simple header and footer, which is different to all the other pages.\n" +
+                        "\n" +
+                        "HEADING TEXT\n" +
+                        "\n" +
+                        "More on page one\n" +
+                        "\n\n" +
+                        "End of page 1\n\n" +
+                        "This is page two. It also has a three column heading, and a three column footer.\n" +
+                        "The footer of the first page\n" +
+                        "Footer Left\tFooter Middle\tFooter Right\n",
+                extractor.getText()
+        );
+    }
+
+    public void testFootnotes() throws Exception {
+        XWPFDocument doc = open("footnotes.docx");
+        XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
+
+        assertTrue(extractor.getText().contains("snoska"));
+    }
+
+
+    public void testTableFootnotes() throws Exception {
+        XWPFDocument doc = open("table_footnotes.docx");
+        XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
+
+        assertTrue(extractor.getText().contains("snoska"));
+    }
+
+    public void testFormFootnotes() throws Exception {
+        XWPFDocument doc = open("form_footnotes.docx");
+        XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
+
+        String text = extractor.getText();
+        assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
+        assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
+    }
+
+    //TODO use the same logic as in HSSFTestDataSamples
+    private XWPFDocument open(String sampleFileName) throws IOException {
+        File file = new File(
+                System.getProperty("HWPF.testdata.path"), sampleFileName);
+
+        try {
+            if(!sampleFileName.equals(file.getCanonicalFile().getName())){
+                throw new RuntimeException("File name is case-sensitive: requested '" + sampleFileName
+                        + "' but actual file is '" + file.getCanonicalFile().getName() + "'");
+            }
+        } catch (IOException e){
+            throw new RuntimeException(e);
+        }
+        return new XWPFDocument(POIXMLDocument.openPackage(file.getPath()));
+    }
  }
diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFTable.java b/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFTable.java

index 0819d4a38cada5d1799e3477c6c809f22d4de554..4d877a9bc4bfc052fbf9371f74b2345dbff4a5ce 100755 (executable)
--- a/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFTable.java
+++ b/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFTable.java
@@ -43,14 +43,14 @@ public class TestXWPFTable extends TestCase {
  
      public void testConstructor() {
         CTTbl ctTable=CTTbl.Factory.newInstance();
-       XWPFTable xtab=new XWPFTable(ctTable);
+       XWPFTable xtab=new XWPFTable(null, ctTable);
         assertNotNull(xtab);
         assertEquals(1,ctTable.sizeOfTrArray());
         assertEquals(1,ctTable.getTrArray(0).sizeOfTcArray());
         assertNotNull(ctTable.getTrArray(0).getTcArray(0).getPArray(0));
         
         ctTable=CTTbl.Factory.newInstance();
-       xtab=new XWPFTable(ctTable, 3,2);
+       xtab=new XWPFTable(null, ctTable, 3,2);
         assertNotNull(xtab);
         assertEquals(3,ctTable.sizeOfTrArray());
         assertEquals(2,ctTable.getTrArray(0).sizeOfTcArray());
@@ -67,7 +67,7 @@ public class TestXWPFTable extends TestCase {
         CTText text=run.addNewT();
         text.setStringValue("finally I can write!");
         
-       XWPFTable xtab=new XWPFTable(table);
+       XWPFTable xtab=new XWPFTable(null, table);
         assertEquals("finally I can write!\n",xtab.getText());
      }
      
@@ -84,7 +84,7 @@ public class TestXWPFTable extends TestCase {
         r3.addNewTc().addNewP();
         r3.addNewTc().addNewP();
         
-       XWPFTable xtab=new XWPFTable(table);
+       XWPFTable xtab=new XWPFTable(null, table);
         assertEquals(3,xtab.getNumberOfRows());
         assertNotNull(xtab.getRow(2));
         
@@ -95,7 +95,7 @@ public class TestXWPFTable extends TestCase {
         assertEquals(2,table.getTrArray(0).sizeOfTcArray());
                 
         //check creation of first row
-       xtab=new XWPFTable(CTTbl.Factory.newInstance());
+       xtab=new XWPFTable(null, CTTbl.Factory.newInstance());
         assertEquals(1,xtab.getCTTbl().getTrArray(0).sizeOfTcArray());
      }
      
@@ -104,7 +104,7 @@ public class TestXWPFTable extends TestCase {
         CTTbl table = CTTbl.Factory.newInstance();
         table.addNewTblPr().addNewTblW().setW(new BigInteger("1000"));
         
-       XWPFTable xtab=new XWPFTable(table);
+       XWPFTable xtab=new XWPFTable(null, table);
         
         assertEquals(1000,xtab.getWidth());
         
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/data/footnotes.docx b/src/scratchpad/testcases/org/apache/poi/hwpf/data/footnotes.docx

new file mode 100755 (executable)

index 0000000..db4386c

Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hwpf/data/footnotes.docx differ
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/data/form_footnotes.docx b/src/scratchpad/testcases/org/apache/poi/hwpf/data/form_footnotes.docx

new file mode 100755 (executable)

index 0000000..70abb60

Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hwpf/data/form_footnotes.docx differ
diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/data/table_footnotes.docx b/src/scratchpad/testcases/org/apache/poi/hwpf/data/table_footnotes.docx

new file mode 100755 (executable)

index 0000000..f4d0b2b

Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hwpf/data/table_footnotes.docx differ
author	Yegor Kozlov <yegor@apache.org>
	Sat, 18 Jul 2009 09:09:59 +0000 (09:09 +0000)
committer	Yegor Kozlov <yegor@apache.org>
	Sat, 18 Jul 2009 09:09:59 +0000 (09:09 +0000)
src/documentation/content/xdocs/status.xml		patch \| blob \| history
src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRelation.java		patch \| blob \| history
src/ooxml/java/org/apache/poi/xwpf/model/XWPFHyperlinkDecorator.java		patch \| blob \| history
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java		patch \| blob \| history
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFFootnote.java	[new file with mode: 0755]	patch \| blob
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java		patch \| blob \| history
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFParagraph.java		patch \| blob \| history
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRelation.java		patch \| blob \| history
src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java		patch \| blob \| history
src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java		patch \| blob \| history
src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFTable.java		patch \| blob \| history
src/scratchpad/testcases/org/apache/poi/hwpf/data/footnotes.docx	[new file with mode: 0755]	patch \| blob
src/scratchpad/testcases/org/apache/poi/hwpf/data/form_footnotes.docx	[new file with mode: 0755]	patch \| blob
src/scratchpad/testcases/org/apache/poi/hwpf/data/table_footnotes.docx	[new file with mode: 0755]	patch \| blob