From: Yegor Kozlov Date: Sat, 18 Jul 2009 09:09:59 +0000 (+0000) Subject: Support for extraction of footnotes from docx files, see Bugzilla 45556 X-Git-Tag: REL_3_5-FINAL~78 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=1aafa117225908ae3d4f1961de0bfd79fff09ddb;p=poi.git Support for extraction of footnotes from docx files, see Bugzilla 45556 git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@795328 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 415f5390ec..7e764b2c68 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -33,6 +33,8 @@ + 45556 - Fixed ExtractorFactory to support .xltx and .dotx files + 45556 - Support for extraction of footnotes from docx files 47520 - Initial support for custom XML mappings in XSSF 47460 - Fixed NPE when retrieving core properties from a newly created workbook 47498 - Fixed HyperlinkRecord to properly handle URL monikers @@ -41,7 +43,7 @@ 47448 - Allow HSSFEventFactory to handle non-zero padding at the end of the workbook stream 47456 - Support for getting OLE object data in PowerPointExtractor 47411 - Explicitly set the 1900 date system when creating XSSF workbooks - 47400 - Support fo text extraction of footnotes, endnotes and comments in HWPF + 47400 - Support for text extraction of footnotes, endnotes and comments in HWPF 47415 - Fixed PageSettingsBlock to allow multiple PLS records 47412 - Fixed concurrency issue with EscherProperties.initProps() 47143 - Fixed OOM in HSSFWorkbook#getAllPictures when reading .xls files containing metafiles diff --git a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRelation.java b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRelation.java index c3b97c6c75..702f549ba7 100644 --- a/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRelation.java +++ b/src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFRelation.java @@ -66,6 +66,24 @@ public final class XSSFRelation extends POIXMLRelation { "/xl/workbook.xml", null ); + public static final XSSFRelation TEMPLATE_WORKBOOK = new XSSFRelation( + "application/vnd.openxmlformats-officedocument.spreadsheetml.template.main+xml", + "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument", + "/xl/workbook.xml", + null + ); + public static final XSSFRelation MACRO_TEMPLATE_WORKBOOK = new XSSFRelation( + "application/vnd.ms-excel.template.macroEnabled.main+xml", + "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument", + "/xl/workbook.xml", + null + ); + public static final XSSFRelation MACRO_ADDIN_WORKBOOK = new XSSFRelation( + "application/vnd.ms-excel.addin.macroEnabled.main+xml", + "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument", + "/xl/workbook.xml", + null + ); public static final XSSFRelation WORKSHEET = new XSSFRelation( "application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet", diff --git a/src/ooxml/java/org/apache/poi/xwpf/model/XWPFHyperlinkDecorator.java b/src/ooxml/java/org/apache/poi/xwpf/model/XWPFHyperlinkDecorator.java index 4a246063c9..4be1e302bc 100644 --- a/src/ooxml/java/org/apache/poi/xwpf/model/XWPFHyperlinkDecorator.java +++ b/src/ooxml/java/org/apache/poi/xwpf/model/XWPFHyperlinkDecorator.java @@ -19,7 +19,7 @@ package org.apache.poi.xwpf.model; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHyperlink; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR; import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText; -import org.apache.poi.xwpf.usermodel.XWPFParagraph;; +import org.apache.poi.xwpf.usermodel.XWPFParagraph; /** * Decorator class for XWPFParagraph allowing to add hyperlinks diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java index 63ba925f8d..2e86e79c7d 100644 --- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java +++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFDocument.java @@ -30,15 +30,7 @@ import org.apache.xmlbeans.XmlOptions; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.exceptions.OpenXML4JException; import org.apache.poi.openxml4j.opc.*; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTComment; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDocument1; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTStyles; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.CommentsDocument; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument; -import org.openxmlformats.schemas.wordprocessingml.x2006.main.StylesDocument; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.*; import javax.xml.namespace.QName; @@ -60,6 +52,7 @@ public class XWPFDocument extends POIXMLDocument { protected List hyperlinks; protected List paragraphs; protected List tables; + protected Map footnotes; /** Handles the joy of different headers/footers for different pages */ private XWPFHeaderFooterPolicy headerFooterPolicy; @@ -87,6 +80,7 @@ public class XWPFDocument extends POIXMLDocument { comments = new ArrayList(); paragraphs = new ArrayList(); tables= new ArrayList(); + footnotes = new HashMap(); try { DocumentDocument doc = DocumentDocument.Factory.parse(getPackagePart().getInputStream()); @@ -94,6 +88,8 @@ public class XWPFDocument extends POIXMLDocument { CTBody body = ctDocument.getBody(); + initFootnotes(); + // filling paragraph list for (CTP p : body.getPArray()) { paragraphs.add(new XWPFParagraph(p, this)); @@ -101,7 +97,7 @@ public class XWPFDocument extends POIXMLDocument { // Get any tables for(CTTbl table : body.getTblArray()) { - tables.add(new XWPFTable(table)); + tables.add(new XWPFTable(this, table)); } // Sort out headers and footers @@ -118,7 +114,6 @@ public class XWPFDocument extends POIXMLDocument { } initHyperlinks(); - } catch (XmlException e) { throw new POIXMLException(e); } @@ -139,6 +134,19 @@ public class XWPFDocument extends POIXMLDocument { } } + private void initFootnotes() throws XmlException, IOException { + for(POIXMLDocumentPart p : getRelations()){ + String relation = p.getPackageRelationship().getRelationshipType(); + if(relation.equals(XWPFRelation.FOOTNOTE.getRelation())){ + FootnotesDocument footnotesDocument = FootnotesDocument.Factory.parse(p.getPackagePart().getInputStream()); + + for(CTFtnEdn ctFtnEdn : footnotesDocument.getFootnotes().getFootnoteArray()) { + footnotes.put(ctFtnEdn.getId().intValue(), new XWPFFootnote(this, ctFtnEdn)); + } + } + } + } + /** * Create a new SpreadsheetML package and setup the default minimal content */ @@ -205,6 +213,15 @@ public class XWPFDocument extends POIXMLDocument { return null; } + + public XWPFFootnote getFootnoteByID(int id) { + return footnotes.get(id); + } + + public Collection getFootnotes() { + return footnotes == null ? new ArrayList() : footnotes.values(); + } + public XWPFHyperlink[] getHyperlinks() { return hyperlinks.toArray( new XWPFHyperlink[hyperlinks.size()] @@ -323,7 +340,7 @@ public class XWPFDocument extends POIXMLDocument { * @return a new table */ public XWPFTable createTable(){ - return new XWPFTable(ctDocument.getBody().addNewTbl()); + return new XWPFTable(this, ctDocument.getBody().addNewTbl()); } /** @@ -333,7 +350,7 @@ public class XWPFDocument extends POIXMLDocument { * @return table */ public XWPFTable createTable(int rows, int cols) { - return new XWPFTable(ctDocument.getBody().addNewTbl(), rows, cols); + return new XWPFTable(this, ctDocument.getBody().addNewTbl(), rows, cols); } } diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFFootnote.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFFootnote.java new file mode 100755 index 0000000000..8ee834d486 --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFFootnote.java @@ -0,0 +1,43 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.xwpf.usermodel; + +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTFtnEdn; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP; + +import java.util.List; +import java.util.ArrayList; +import java.util.Iterator; + +public class XWPFFootnote implements Iterable { + private List paragraphs = new ArrayList(); + + public XWPFFootnote(XWPFDocument document, CTFtnEdn body) { + for (CTP p : body.getPArray()) { + paragraphs.add(new XWPFParagraph(p, document)); + } + } + + public List getParagraphs() { + return paragraphs; + } + + public Iterator iterator(){ + return paragraphs.iterator(); + } + +} diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java index 22ca339ab8..3c84bf228c 100644 --- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java +++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFHeaderFooter.java @@ -65,7 +65,8 @@ public abstract class XWPFHeaderFooter { new XWPFTable[headerFooter.getTblArray().length]; for(int i=0; i rs = new ArrayList(); - CTR[] tmp; - - // Get the main text runs - tmp = paragraph.getRArray(); - for (int i = 0; i < tmp.length; i++) { - rs.add(tmp[i]); - } - - // Not sure quite what these are, but they hold - // more text runs - CTSdtRun[] sdts = paragraph.getSdtArray(); - for (int i = 0; i < sdts.length; i++) { - CTSdtContentRun run = sdts[i].getSdtContent(); - tmp = run.getRArray(); - for (int j = 0; j < tmp.length; j++) { - rs.add(tmp[j]); - } - } - - // Get text of the paragraph - for (int j = 0; j < rs.size(); j++) { - // Grab the text and tabs of the paragraph - // Do so in a way that preserves the ordering - XmlCursor c = rs.get(j).newCursor(); - c.selectPath("./*"); - while (c.toNextSelection()) { - XmlObject o = c.getObject(); - if (o instanceof CTText) { - text.append(((CTText) o).getStringValue()); - } - if (o instanceof CTPTab) { - text.append("\t"); - } - } - - // Loop over pictures inside our - // paragraph, looking for text in them - CTPicture[] picts = rs.get(j).getPictArray(); - for (int k = 0; k < picts.length; k++) { - XmlObject[] t = picts[k] - .selectPath("declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' .//w:t"); - for (int m = 0; m < t.length; m++) { - NodeList kids = t[m].getDomNode().getChildNodes(); - for (int n = 0; n < kids.getLength(); n++) { - if (kids.item(n) instanceof Text) { - pictureText.append("\n"); - pictureText.append(kids.item(n).getNodeValue()); - } - } - } - } - } - } + this.paragraph = prgrph; + this.document = docRef; + + if (!isEmpty()) { + // All the runs to loop over + // TODO - replace this with some sort of XPath expression + // to directly find all the CTRs, in the right order + ArrayList rs = new ArrayList(); + CTR[] tmp; + + // Get the main text runs + tmp = paragraph.getRArray(); + for (int i = 0; i < tmp.length; i++) { + rs.add(tmp[i]); + } + + // Not sure quite what these are, but they hold + // more text runs + CTSdtRun[] sdts = paragraph.getSdtArray(); + for (int i = 0; i < sdts.length; i++) { + CTSdtContentRun run = sdts[i].getSdtContent(); + tmp = run.getRArray(); + for (int j = 0; j < tmp.length; j++) { + rs.add(tmp[j]); + } + } + + // Get text of the paragraph + for (int j = 0; j < rs.size(); j++) { + // Grab the text and tabs of the paragraph + // Do so in a way that preserves the ordering + XmlCursor c = rs.get(j).newCursor(); + c.selectPath("./*"); + while (c.toNextSelection()) { + XmlObject o = c.getObject(); + if (o instanceof CTText) { + text.append(((CTText) o).getStringValue()); + } + if (o instanceof CTPTab) { + text.append("\t"); + } + //got a reference to a footnote + if (o instanceof CTFtnEdnRef) { + CTFtnEdnRef ftn = (CTFtnEdnRef) o; + footnoteText.append("[").append(ftn.getId()).append(": "); + XWPFFootnote footnote = document.getFootnoteByID(ftn.getId().intValue()); + + boolean first = true; + for (XWPFParagraph p : footnote.getParagraphs()) { + if (!first) { + footnoteText.append("\n"); + first = false; + } + footnoteText.append(p.getText()); + } + + footnoteText.append("]"); + } + } + + // Loop over pictures inside our + // paragraph, looking for text in them + CTPicture[] picts = rs.get(j).getPictArray(); + for (int k = 0; k < picts.length; k++) { + XmlObject[] t = picts[k] + .selectPath("declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' .//w:t"); + for (int m = 0; m < t.length; m++) { + NodeList kids = t[m].getDomNode().getChildNodes(); + for (int n = 0; n < kids.getLength(); n++) { + if (kids.item(n) instanceof Text) { + pictureText.append("\n"); + pictureText.append(kids.item(n).getNodeValue()); + } + } + } + } + } + } } public CTP getCTP() { - return paragraph; + return paragraph; } public boolean isEmpty() { - return !paragraph.getDomNode().hasChildNodes(); + return !paragraph.getDomNode().hasChildNodes(); } public XWPFDocument getDocument() { - return document; + return document; } /** @@ -146,7 +145,9 @@ public class XWPFParagraph { * in it. */ public String getText() { - return getParagraphText() + getPictureText(); + StringBuffer out = new StringBuffer(); + out.append(text).append(footnoteText).append(pictureText); + return out.toString(); } /** @@ -154,14 +155,23 @@ public class XWPFParagraph { * paragraph */ public String getParagraphText() { - return text.toString(); + return text.toString(); } /** * Returns any text from any suitable pictures in the paragraph */ public String getPictureText() { - return pictureText.toString(); + return pictureText.toString(); + } + + /** + * Returns the footnote text of the paragraph + * + * @return the footnote text or empty string if the paragraph does not have footnotes + */ + public String getFootnoteText() { + return footnoteText.toString(); } /** @@ -170,7 +180,7 @@ public class XWPFParagraph { * @return a new text run */ public XWPFRun createRun() { - return new XWPFRun(paragraph.addNewR(), this); + return new XWPFRun(paragraph.addNewR(), this); } /** @@ -350,12 +360,12 @@ public class XWPFParagraph { * @see Borders a list of all types of borders */ public void setBorderBottom(Borders border) { - CTPBdr ct = getCTPBrd(true); - CTBorder pr = ct.isSetBottom() ? ct.getBottom() : ct.addNewBottom(); - if (border.getValue() == Borders.NONE.getValue()) - ct.unsetBottom(); - else - pr.setVal(STBorder.Enum.forInt(border.getValue())); + CTPBdr ct = getCTPBrd(true); + CTBorder pr = ct.isSetBottom() ? ct.getBottom() : ct.addNewBottom(); + if (border.getValue() == Borders.NONE.getValue()) + ct.unsetBottom(); + else + pr.setVal(STBorder.Enum.forInt(border.getValue())); } /** @@ -367,13 +377,13 @@ public class XWPFParagraph { * @see Borders a list of all types of borders */ public Borders getBorderBottom() { - CTPBdr border = getCTPBrd(false); - CTBorder ct = null; - if (border != null) { - ct = border.getBottom(); - } - STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE; - return Borders.valueOf(ptrn.intValue()); + CTPBdr border = getCTPBrd(false); + CTBorder ct = null; + if (border != null) { + ct = border.getBottom(); + } + STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE; + return Borders.valueOf(ptrn.intValue()); } /** @@ -399,12 +409,12 @@ public class XWPFParagraph { * @see Borders for a list of all possible borders */ public void setBorderLeft(Borders border) { - CTPBdr ct = getCTPBrd(true); - CTBorder pr = ct.isSetLeft() ? ct.getLeft() : ct.addNewLeft(); - if (border.getValue() == Borders.NONE.getValue()) - ct.unsetLeft(); - else - pr.setVal(STBorder.Enum.forInt(border.getValue())); + CTPBdr ct = getCTPBrd(true); + CTBorder pr = ct.isSetLeft() ? ct.getLeft() : ct.addNewLeft(); + if (border.getValue() == Borders.NONE.getValue()) + ct.unsetLeft(); + else + pr.setVal(STBorder.Enum.forInt(border.getValue())); } /** @@ -416,13 +426,13 @@ public class XWPFParagraph { * @see Borders for a list of all possible borders */ public Borders getBorderLeft() { - CTPBdr border = getCTPBrd(false); - CTBorder ct = null; - if (border != null) { - ct = border.getLeft(); - } - STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE; - return Borders.valueOf(ptrn.intValue()); + CTPBdr border = getCTPBrd(false); + CTBorder ct = null; + if (border != null) { + ct = border.getLeft(); + } + STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE; + return Borders.valueOf(ptrn.intValue()); } /** @@ -448,12 +458,12 @@ public class XWPFParagraph { * @see Borders for a list of all possible borders */ public void setBorderRight(Borders border) { - CTPBdr ct = getCTPBrd(true); - CTBorder pr = ct.isSetRight() ? ct.getRight() : ct.addNewRight(); - if (border.getValue() == Borders.NONE.getValue()) - ct.unsetRight(); - else - pr.setVal(STBorder.Enum.forInt(border.getValue())); + CTPBdr ct = getCTPBrd(true); + CTBorder pr = ct.isSetRight() ? ct.getRight() : ct.addNewRight(); + if (border.getValue() == Borders.NONE.getValue()) + ct.unsetRight(); + else + pr.setVal(STBorder.Enum.forInt(border.getValue())); } /** @@ -465,13 +475,13 @@ public class XWPFParagraph { * @see Borders for a list of all possible borders */ public Borders getBorderRight() { - CTPBdr border = getCTPBrd(false); - CTBorder ct = null; - if (border != null) { - ct = border.getRight(); - } - STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE; - return Borders.valueOf(ptrn.intValue()); + CTPBdr border = getCTPBrd(false); + CTBorder ct = null; + if (border != null) { + ct = border.getRight(); + } + STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE; + return Borders.valueOf(ptrn.intValue()); } /** @@ -501,12 +511,12 @@ public class XWPFParagraph { * @see Borders for a list of all possible borders */ public void setBorderBetween(Borders border) { - CTPBdr ct = getCTPBrd(true); - CTBorder pr = ct.isSetBetween() ? ct.getBetween() : ct.addNewBetween(); - if (border.getValue() == Borders.NONE.getValue()) - ct.unsetBetween(); - else - pr.setVal(STBorder.Enum.forInt(border.getValue())); + CTPBdr ct = getCTPBrd(true); + CTBorder pr = ct.isSetBetween() ? ct.getBetween() : ct.addNewBetween(); + if (border.getValue() == Borders.NONE.getValue()) + ct.unsetBetween(); + else + pr.setVal(STBorder.Enum.forInt(border.getValue())); } /** @@ -518,13 +528,13 @@ public class XWPFParagraph { * @see Borders for a list of all possible borders */ public Borders getBorderBetween() { - CTPBdr border = getCTPBrd(false); - CTBorder ct = null; - if (border != null) { - ct = border.getBetween(); - } - STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE; - return Borders.valueOf(ptrn.intValue()); + CTPBdr border = getCTPBrd(false); + CTBorder ct = null; + if (border != null) { + ct = border.getBetween(); + } + STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE; + return Borders.valueOf(ptrn.intValue()); } /** @@ -544,13 +554,13 @@ public class XWPFParagraph { * boolean value */ public void setPageBreak(boolean pageBreak) { - CTPPr ppr = getCTPPr(); - CTOnOff ct_pageBreak = ppr.isSetPageBreakBefore() ? ppr - .getPageBreakBefore() : ppr.addNewPageBreakBefore(); - if (pageBreak) - ct_pageBreak.setVal(STOnOff.TRUE); - else - ct_pageBreak.setVal(STOnOff.FALSE); + CTPPr ppr = getCTPPr(); + CTOnOff ct_pageBreak = ppr.isSetPageBreakBefore() ? ppr + .getPageBreakBefore() : ppr.addNewPageBreakBefore(); + if (pageBreak) + ct_pageBreak.setVal(STOnOff.TRUE); + else + ct_pageBreak.setVal(STOnOff.FALSE); } /** @@ -569,14 +579,14 @@ public class XWPFParagraph { * @return boolean - if page break is set */ public boolean isPageBreak() { - CTPPr ppr = getCTPPr(); - CTOnOff ct_pageBreak = ppr.isSetPageBreakBefore() ? ppr - .getPageBreakBefore() : null; - if (ct_pageBreak != null - && ct_pageBreak.getVal().intValue() == STOnOff.INT_TRUE) - return true; - else - return false; + CTPPr ppr = getCTPPr(); + CTOnOff ct_pageBreak = ppr.isSetPageBreakBefore() ? ppr + .getPageBreakBefore() : null; + if (ct_pageBreak != null + && ct_pageBreak.getVal().intValue() == STOnOff.INT_TRUE) + return true; + else + return false; } /** @@ -640,7 +650,7 @@ public class XWPFParagraph { * paragraph in the document in absolute units. * * @return bigInteger - value representing the spacing after the paragraph - * @see #setSpacingAfterLines(int) + * @see #setSpacingAfterLines(int) */ public int getSpacingAfterLines() { CTSpacing spacing = getCTSpacing(false); @@ -902,12 +912,12 @@ public class XWPFParagraph { * @param wrap - boolean */ public void setWordWrap(boolean wrap) { - CTOnOff wordWrap = getCTPPr().isSetWordWrap() ? getCTPPr() - .getWordWrap() : getCTPPr().addNewWordWrap(); - if (wrap) - wordWrap.setVal(STOnOff.TRUE); - else - wordWrap.unsetVal(); + CTOnOff wordWrap = getCTPPr().isSetWordWrap() ? getCTPPr() + .getWordWrap() : getCTPPr().addNewWordWrap(); + if (wrap) + wordWrap.setVal(STOnOff.TRUE); + else + wordWrap.unsetVal(); } /** @@ -919,14 +929,14 @@ public class XWPFParagraph { * @return boolean */ public boolean isWordWrap() { - CTOnOff wordWrap = getCTPPr().isSetWordWrap() ? getCTPPr() - .getWordWrap() : null; - if (wordWrap != null) { - return (wordWrap.getVal() == STOnOff.ON - || wordWrap.getVal() == STOnOff.TRUE || wordWrap.getVal() == STOnOff.X_1) ? true - : false; - } else - return false; + CTOnOff wordWrap = getCTPPr().isSetWordWrap() ? getCTPPr() + .getWordWrap() : null; + if (wordWrap != null) { + return (wordWrap.getVal() == STOnOff.ON + || wordWrap.getVal() == STOnOff.TRUE || wordWrap.getVal() == STOnOff.X_1) ? true + : false; + } else + return false; } /** diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRelation.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRelation.java index 5532ee299f..c1b91bb07a 100755 --- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRelation.java +++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFRelation.java @@ -40,6 +40,24 @@ public final class XWPFRelation extends POIXMLRelation { "/word/document.xml", null ); + public static final XWPFRelation TEMPLATE = new XWPFRelation( + "application/vnd.openxmlformats-officedocument.wordprocessingml.template.main+xml", + "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument", + "/word/document.xml", + null + ); + public static final XWPFRelation MACRO_DOCUMENT = new XWPFRelation( + "application/vnd.ms-word.document.macroEnabled.main+xml", + "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument", + "/word/document.xml", + null + ); + public static final XWPFRelation MACRO_TEMPLATE_DOCUMENT = new XWPFRelation( + "application/vnd.ms-word.template.macroEnabledTemplate.main+xml", + "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument", + "/word/document.xml", + null + ); public static final XWPFRelation FONT_TABLE = new XWPFRelation( "application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml", "http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable", @@ -88,6 +106,12 @@ public final class XWPFRelation extends POIXMLRelation { null, null ); + public static final XWPFRelation FOOTNOTE = new XWPFRelation( + null, + "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes", + null, + null + ); private XWPFRelation(String type, String rel, String defaultName, Class cls) { diff --git a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java index 59a8497664..17d0465f58 100644 --- a/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java +++ b/src/ooxml/java/org/apache/poi/xwpf/usermodel/XWPFTable.java @@ -42,8 +42,8 @@ public class XWPFTable { private CTTbl ctTbl; - public XWPFTable(CTTbl table, int row, int col) { - this(table); + public XWPFTable(XWPFDocument doc, CTTbl table, int row, int col) { + this(doc, table); for (int i = 0; i < row; i++) { XWPFTableRow tabRow = (getRow(i) == null) ? createRow() : getRow(i); for (int k = 0; k < col; k++) { @@ -54,7 +54,7 @@ public class XWPFTable { } - public XWPFTable(CTTbl table) { + public XWPFTable(XWPFDocument doc, CTTbl table) { this.ctTbl = table; // is an empty table: I add one row and one column as default @@ -65,7 +65,7 @@ public class XWPFTable { StringBuffer rowText = new StringBuffer(); for (CTTc cell : row.getTcArray()) { for (CTP ctp : cell.getPArray()) { - XWPFParagraph p = new XWPFParagraph(ctp, null); + XWPFParagraph p = new XWPFParagraph(ctp, doc); if (rowText.length() > 0) { rowText.append('\t'); } diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java index 2704e0371e..1527e562b7 100644 --- a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java @@ -17,6 +17,7 @@ package org.apache.poi.xwpf.extractor; import java.io.File; +import java.io.IOException; import org.apache.poi.POIXMLDocument; import org.apache.poi.xwpf.usermodel.XWPFDocument; @@ -27,202 +28,176 @@ import junit.framework.TestCase; * Tests for HXFWordExtractor */ public class TestXWPFWordExtractor extends TestCase { - /** - * A very simple file - */ - private XWPFDocument xmlA; - private File fileA; - /** - * A fairly complex file - */ - private XWPFDocument xmlB; - private File fileB; - /** - * With a simplish header+footer - */ - private XWPFDocument xmlC; - private File fileC; - /** - * With different header+footer on first/rest - */ - private XWPFDocument xmlD; - private File fileD; - - /** - * File with hyperlinks - */ - private XWPFDocument xmlE; - private File fileE; - - protected void setUp() throws Exception { - super.setUp(); - - fileA = new File( - System.getProperty("HWPF.testdata.path") + - File.separator + "sample.docx" - ); - fileB = new File( - System.getProperty("HWPF.testdata.path") + - File.separator + "IllustrativeCases.docx" - ); - fileC = new File( - System.getProperty("HWPF.testdata.path") + - File.separator + "ThreeColHeadFoot.docx" - ); - fileD = new File( - System.getProperty("HWPF.testdata.path") + - File.separator + "DiffFirstPageHeadFoot.docx" - ); - fileE = new File( - System.getProperty("HWPF.testdata.path") + - File.separator + "TestDocument.docx" - ); - assertTrue(fileA.exists()); - assertTrue(fileB.exists()); - assertTrue(fileC.exists()); - assertTrue(fileD.exists()); - assertTrue(fileE.exists()); - - xmlA = new XWPFDocument(POIXMLDocument.openPackage(fileA.toString())); - xmlB = new XWPFDocument(POIXMLDocument.openPackage(fileB.toString())); - xmlC = new XWPFDocument(POIXMLDocument.openPackage(fileC.toString())); - xmlD = new XWPFDocument(POIXMLDocument.openPackage(fileD.toString())); - xmlE = new XWPFDocument(POIXMLDocument.openPackage(fileE.toString())); - } - - /** - * Get text out of the simple file - */ - public void testGetSimpleText() throws Exception { - new XWPFWordExtractor(xmlA); - new XWPFWordExtractor(POIXMLDocument.openPackage(fileA.toString())); - - XWPFWordExtractor extractor = - new XWPFWordExtractor(xmlA); - extractor.getText(); - - String text = extractor.getText(); - assertTrue(text.length() > 0); - - // Check contents - assertTrue(text.startsWith( - "Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Nunc at risus vel erat tempus posuere. Aenean non ante. Suspendisse vehicula dolor sit amet odio." - )); - assertTrue(text.endsWith( - "Phasellus ultricies mi nec leo. Sed tempus. In sit amet lorem at velit faucibus vestibulum.\n" - )); - - // Check number of paragraphs - int ps = 0; - char[] t = text.toCharArray(); - for (int i = 0; i < t.length; i++) { - if(t[i] == '\n') { ps++; } - } - assertEquals(3, ps); - } - - /** - * Tests getting the text out of a complex file - */ - public void testGetComplexText() throws Exception { - XWPFWordExtractor extractor = - new XWPFWordExtractor(xmlB); - extractor.getText(); - - String text = extractor.getText(); - assertTrue(text.length() > 0); - - char euro = '\u20ac'; + + /** + * Get text out of the simple file + */ + public void testGetSimpleText() throws Exception { + XWPFDocument doc = open("sample.docx"); + XWPFWordExtractor extractor = new XWPFWordExtractor(doc); + + String text = extractor.getText(); + assertTrue(text.length() > 0); + + // Check contents + assertTrue(text.startsWith( + "Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Nunc at risus vel erat tempus posuere. Aenean non ante. Suspendisse vehicula dolor sit amet odio." + )); + assertTrue(text.endsWith( + "Phasellus ultricies mi nec leo. Sed tempus. In sit amet lorem at velit faucibus vestibulum.\n" + )); + + // Check number of paragraphs + int ps = 0; + char[] t = text.toCharArray(); + for (int i = 0; i < t.length; i++) { + if (t[i] == '\n') { + ps++; + } + } + assertEquals(3, ps); + } + + /** + * Tests getting the text out of a complex file + */ + public void testGetComplexText() throws Exception { + XWPFDocument doc = open("IllustrativeCases.docx"); + XWPFWordExtractor extractor = new XWPFWordExtractor(doc); + + String text = extractor.getText(); + assertTrue(text.length() > 0); + + char euro = '\u20ac'; // System.err.println("'"+text.substring(text.length() - 40) + "'"); - - // Check contents - assertTrue(text.startsWith( - " \n(V) ILLUSTRATIVE CASES\n\n" - )); - assertTrue(text.contains( - "As well as gaining "+euro+"90 from child benefit increases, he will also receive the early childhood supplement of "+euro+"250 per quarter for Vincent for the full four quarters of the year.\n\n\n\n \n\n\n" - )); - assertTrue(text.endsWith( - "11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\n\n" - )); - - // Check number of paragraphs - int ps = 0; - char[] t = text.toCharArray(); - for (int i = 0; i < t.length; i++) { - if(t[i] == '\n') { ps++; } - } - assertEquals(103, ps); - } - - public void testGetWithHyperlinks() throws Exception { - XWPFWordExtractor extractor = - new XWPFWordExtractor(xmlE); - extractor.getText(); - extractor.setFetchHyperlinks(true); - extractor.getText(); - - // Now check contents - // TODO - fix once correctly handling contents - extractor.setFetchHyperlinks(false); - assertEquals( + + // Check contents + assertTrue(text.startsWith( + " \n(V) ILLUSTRATIVE CASES\n\n" + )); + assertTrue(text.contains( + "As well as gaining " + euro + "90 from child benefit increases, he will also receive the early childhood supplement of " + euro + "250 per quarter for Vincent for the full four quarters of the year.\n\n\n\n \n\n\n" + )); + assertTrue(text.endsWith( + "11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\n\n" + )); + + // Check number of paragraphs + int ps = 0; + char[] t = text.toCharArray(); + for (int i = 0; i < t.length; i++) { + if (t[i] == '\n') { + ps++; + } + } + assertEquals(103, ps); + } + + public void testGetWithHyperlinks() throws Exception { + XWPFDocument doc = open("TestDocument.docx"); + XWPFWordExtractor extractor = new XWPFWordExtractor(doc); + + // Now check contents + // TODO - fix once correctly handling contents + extractor.setFetchHyperlinks(false); + assertEquals( // "This is a test document\nThis bit is in bold and italic\n" + // "Back to normal\nWe have a hyperlink here, and another.\n", - "This is a test document\nThis bit is in bold and italic\n" + - "Back to normal\nWe have a here, and .hyperlinkanother\n", - extractor.getText() - ); - - extractor.setFetchHyperlinks(true); - assertEquals( + "This is a test document\nThis bit is in bold and italic\n" + + "Back to normal\nWe have a here, and .hyperlinkanother\n", + extractor.getText() + ); + + extractor.setFetchHyperlinks(true); + assertEquals( // "This is a test document\nThis bit is in bold and italic\n" + // "Back to normal\nWe have a hyperlink here, and another.\n", - "This is a test document\nThis bit is in bold and italic\n" + - "Back to normal\nWe have a here, and .hyperlink another\n", - extractor.getText() - ); - } - - public void testHeadersFooters() throws Exception { - XWPFWordExtractor extractor = - new XWPFWordExtractor(xmlC); - extractor.getText(); - - assertEquals( - "First header column!\tMid header\tRight header!\n" + - "This is a sample word document. It has two pages. It has a three column heading, and a three column footer\n" + - "\n" + - "HEADING TEXT\n" + - "\n" + - "More on page one\n" + - "\n\n" + - "End of page 1\n\n" + - "This is page two. It also has a three column heading, and a three column footer.\n" + - "Footer Left\tFooter Middle\tFooter Right\n", - extractor.getText() - ); - - - // Now another file, expect multiple headers - // and multiple footers - extractor = - new XWPFWordExtractor(xmlD); - extractor.getText(); - - assertEquals( - "I am the header on the first page, and I" + '\u2019' + "m nice and simple\n" + - "First header column!\tMid header\tRight header!\n" + - "This is a sample word document. It has two pages. It has a simple header and footer, which is different to all the other pages.\n" + - "\n" + - "HEADING TEXT\n" + - "\n" + - "More on page one\n" + - "\n\n" + - "End of page 1\n\n" + - "This is page two. It also has a three column heading, and a three column footer.\n" + - "The footer of the first page\n" + - "Footer Left\tFooter Middle\tFooter Right\n", - extractor.getText() - ); - } + "This is a test document\nThis bit is in bold and italic\n" + + "Back to normal\nWe have a here, and .hyperlink another\n", + extractor.getText() + ); + } + + public void testHeadersFooters() throws Exception { + XWPFDocument doc = open("ThreeColHeadFoot.docx"); + XWPFWordExtractor extractor = new XWPFWordExtractor(doc); + + assertEquals( + "First header column!\tMid header\tRight header!\n" + + "This is a sample word document. It has two pages. It has a three column heading, and a three column footer\n" + + "\n" + + "HEADING TEXT\n" + + "\n" + + "More on page one\n" + + "\n\n" + + "End of page 1\n\n" + + "This is page two. It also has a three column heading, and a three column footer.\n" + + "Footer Left\tFooter Middle\tFooter Right\n", + extractor.getText() + ); + + // Now another file, expect multiple headers + // and multiple footers + doc = open("DiffFirstPageHeadFoot.docx"); + extractor = new XWPFWordExtractor(doc); + extractor = + new XWPFWordExtractor(doc); + extractor.getText(); + + assertEquals( + "I am the header on the first page, and I" + '\u2019' + "m nice and simple\n" + + "First header column!\tMid header\tRight header!\n" + + "This is a sample word document. It has two pages. It has a simple header and footer, which is different to all the other pages.\n" + + "\n" + + "HEADING TEXT\n" + + "\n" + + "More on page one\n" + + "\n\n" + + "End of page 1\n\n" + + "This is page two. It also has a three column heading, and a three column footer.\n" + + "The footer of the first page\n" + + "Footer Left\tFooter Middle\tFooter Right\n", + extractor.getText() + ); + } + + public void testFootnotes() throws Exception { + XWPFDocument doc = open("footnotes.docx"); + XWPFWordExtractor extractor = new XWPFWordExtractor(doc); + + assertTrue(extractor.getText().contains("snoska")); + } + + + public void testTableFootnotes() throws Exception { + XWPFDocument doc = open("table_footnotes.docx"); + XWPFWordExtractor extractor = new XWPFWordExtractor(doc); + + assertTrue(extractor.getText().contains("snoska")); + } + + public void testFormFootnotes() throws Exception { + XWPFDocument doc = open("form_footnotes.docx"); + XWPFWordExtractor extractor = new XWPFWordExtractor(doc); + + String text = extractor.getText(); + assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc")); + assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase")); + } + + //TODO use the same logic as in HSSFTestDataSamples + private XWPFDocument open(String sampleFileName) throws IOException { + File file = new File( + System.getProperty("HWPF.testdata.path"), sampleFileName); + + try { + if(!sampleFileName.equals(file.getCanonicalFile().getName())){ + throw new RuntimeException("File name is case-sensitive: requested '" + sampleFileName + + "' but actual file is '" + file.getCanonicalFile().getName() + "'"); + } + } catch (IOException e){ + throw new RuntimeException(e); + } + return new XWPFDocument(POIXMLDocument.openPackage(file.getPath())); + } } diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFTable.java b/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFTable.java index 0819d4a38c..4d877a9bc4 100755 --- a/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFTable.java +++ b/src/ooxml/testcases/org/apache/poi/xwpf/usermodel/TestXWPFTable.java @@ -43,14 +43,14 @@ public class TestXWPFTable extends TestCase { public void testConstructor() { CTTbl ctTable=CTTbl.Factory.newInstance(); - XWPFTable xtab=new XWPFTable(ctTable); + XWPFTable xtab=new XWPFTable(null, ctTable); assertNotNull(xtab); assertEquals(1,ctTable.sizeOfTrArray()); assertEquals(1,ctTable.getTrArray(0).sizeOfTcArray()); assertNotNull(ctTable.getTrArray(0).getTcArray(0).getPArray(0)); ctTable=CTTbl.Factory.newInstance(); - xtab=new XWPFTable(ctTable, 3,2); + xtab=new XWPFTable(null, ctTable, 3,2); assertNotNull(xtab); assertEquals(3,ctTable.sizeOfTrArray()); assertEquals(2,ctTable.getTrArray(0).sizeOfTcArray()); @@ -67,7 +67,7 @@ public class TestXWPFTable extends TestCase { CTText text=run.addNewT(); text.setStringValue("finally I can write!"); - XWPFTable xtab=new XWPFTable(table); + XWPFTable xtab=new XWPFTable(null, table); assertEquals("finally I can write!\n",xtab.getText()); } @@ -84,7 +84,7 @@ public class TestXWPFTable extends TestCase { r3.addNewTc().addNewP(); r3.addNewTc().addNewP(); - XWPFTable xtab=new XWPFTable(table); + XWPFTable xtab=new XWPFTable(null, table); assertEquals(3,xtab.getNumberOfRows()); assertNotNull(xtab.getRow(2)); @@ -95,7 +95,7 @@ public class TestXWPFTable extends TestCase { assertEquals(2,table.getTrArray(0).sizeOfTcArray()); //check creation of first row - xtab=new XWPFTable(CTTbl.Factory.newInstance()); + xtab=new XWPFTable(null, CTTbl.Factory.newInstance()); assertEquals(1,xtab.getCTTbl().getTrArray(0).sizeOfTcArray()); } @@ -104,7 +104,7 @@ public class TestXWPFTable extends TestCase { CTTbl table = CTTbl.Factory.newInstance(); table.addNewTblPr().addNewTblW().setW(new BigInteger("1000")); - XWPFTable xtab=new XWPFTable(table); + XWPFTable xtab=new XWPFTable(null, table); assertEquals(1000,xtab.getWidth()); diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/data/footnotes.docx b/src/scratchpad/testcases/org/apache/poi/hwpf/data/footnotes.docx new file mode 100755 index 0000000000..db4386c09c Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hwpf/data/footnotes.docx differ diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/data/form_footnotes.docx b/src/scratchpad/testcases/org/apache/poi/hwpf/data/form_footnotes.docx new file mode 100755 index 0000000000..70abb60c9a Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hwpf/data/form_footnotes.docx differ diff --git a/src/scratchpad/testcases/org/apache/poi/hwpf/data/table_footnotes.docx b/src/scratchpad/testcases/org/apache/poi/hwpf/data/table_footnotes.docx new file mode 100755 index 0000000000..f4d0b2bd03 Binary files /dev/null and b/src/scratchpad/testcases/org/apache/poi/hwpf/data/table_footnotes.docx differ