<changes>
<release version="3.5-beta7" date="2009-??-??">
+ <action dev="POI-DEVELOPERS" type="fix">45556 - Fixed ExtractorFactory to support .xltx and .dotx files</action>
+ <action dev="POI-DEVELOPERS" type="add">45556 - Support for extraction of footnotes from docx files</action>
<action dev="POI-DEVELOPERS" type="add">47520 - Initial support for custom XML mappings in XSSF</action>
<action dev="POI-DEVELOPERS" type="fix">47460 - Fixed NPE when retrieving core properties from a newly created workbook</action>
<action dev="POI-DEVELOPERS" type="fix">47498 - Fixed HyperlinkRecord to properly handle URL monikers</action>
<action dev="POI-DEVELOPERS" type="fix">47448 - Allow HSSFEventFactory to handle non-zero padding at the end of the workbook stream</action>
<action dev="POI-DEVELOPERS" type="add">47456 - Support for getting OLE object data in PowerPointExtractor</action>
<action dev="POI-DEVELOPERS" type="fix">47411 - Explicitly set the 1900 date system when creating XSSF workbooks</action>
- <action dev="POI-DEVELOPERS" type="add">47400 - Support fo text extraction of footnotes, endnotes and comments in HWPF</action>
+ <action dev="POI-DEVELOPERS" type="add">47400 - Support for text extraction of footnotes, endnotes and comments in HWPF</action>
<action dev="POI-DEVELOPERS" type="fix">47415 - Fixed PageSettingsBlock to allow multiple PLS records</action>
<action dev="POI-DEVELOPERS" type="fix">47412 - Fixed concurrency issue with EscherProperties.initProps()</action>
<action dev="POI-DEVELOPERS" type="fix">47143 - Fixed OOM in HSSFWorkbook#getAllPictures when reading .xls files containing metafiles</action>
"/xl/workbook.xml",
null
);
+ public static final XSSFRelation TEMPLATE_WORKBOOK = new XSSFRelation(
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.template.main+xml",
+ "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
+ "/xl/workbook.xml",
+ null
+ );
+ public static final XSSFRelation MACRO_TEMPLATE_WORKBOOK = new XSSFRelation(
+ "application/vnd.ms-excel.template.macroEnabled.main+xml",
+ "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
+ "/xl/workbook.xml",
+ null
+ );
+ public static final XSSFRelation MACRO_ADDIN_WORKBOOK = new XSSFRelation(
+ "application/vnd.ms-excel.addin.macroEnabled.main+xml",
+ "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
+ "/xl/workbook.xml",
+ null
+ );
public static final XSSFRelation WORKSHEET = new XSSFRelation(
"application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml",
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/worksheet",
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTHyperlink;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
-import org.apache.poi.xwpf.usermodel.XWPFParagraph;;
+import org.apache.poi.xwpf.usermodel.XWPFParagraph;
/**
* Decorator class for XWPFParagraph allowing to add hyperlinks
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
import org.apache.poi.openxml4j.opc.*;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTComment;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDocument1;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTStyles;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTbl;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CommentsDocument;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.StylesDocument;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
import javax.xml.namespace.QName;
protected List<XWPFHyperlink> hyperlinks;
protected List<XWPFParagraph> paragraphs;
protected List<XWPFTable> tables;
+ protected Map<Integer, XWPFFootnote> footnotes;
/** Handles the joy of different headers/footers for different pages */
private XWPFHeaderFooterPolicy headerFooterPolicy;
comments = new ArrayList<XWPFComment>();
paragraphs = new ArrayList<XWPFParagraph>();
tables= new ArrayList<XWPFTable>();
+ footnotes = new HashMap<Integer, XWPFFootnote>();
try {
DocumentDocument doc = DocumentDocument.Factory.parse(getPackagePart().getInputStream());
CTBody body = ctDocument.getBody();
+ initFootnotes();
+
// filling paragraph list
for (CTP p : body.getPArray()) {
paragraphs.add(new XWPFParagraph(p, this));
// Get any tables
for(CTTbl table : body.getTblArray()) {
- tables.add(new XWPFTable(table));
+ tables.add(new XWPFTable(this, table));
}
// Sort out headers and footers
}
initHyperlinks();
-
} catch (XmlException e) {
throw new POIXMLException(e);
}
}
}
+ private void initFootnotes() throws XmlException, IOException {
+ for(POIXMLDocumentPart p : getRelations()){
+ String relation = p.getPackageRelationship().getRelationshipType();
+ if(relation.equals(XWPFRelation.FOOTNOTE.getRelation())){
+ FootnotesDocument footnotesDocument = FootnotesDocument.Factory.parse(p.getPackagePart().getInputStream());
+
+ for(CTFtnEdn ctFtnEdn : footnotesDocument.getFootnotes().getFootnoteArray()) {
+ footnotes.put(ctFtnEdn.getId().intValue(), new XWPFFootnote(this, ctFtnEdn));
+ }
+ }
+ }
+ }
+
/**
* Create a new SpreadsheetML package and setup the default minimal content
*/
return null;
}
+
+ public XWPFFootnote getFootnoteByID(int id) {
+ return footnotes.get(id);
+ }
+
+ public Collection<XWPFFootnote> getFootnotes() {
+ return footnotes == null ? new ArrayList<XWPFFootnote>() : footnotes.values();
+ }
+
public XWPFHyperlink[] getHyperlinks() {
return hyperlinks.toArray(
new XWPFHyperlink[hyperlinks.size()]
* @return a new table
*/
public XWPFTable createTable(){
- return new XWPFTable(ctDocument.getBody().addNewTbl());
+ return new XWPFTable(this, ctDocument.getBody().addNewTbl());
}
/**
* @return table
*/
public XWPFTable createTable(int rows, int cols) {
- return new XWPFTable(ctDocument.getBody().addNewTbl(), rows, cols);
+ return new XWPFTable(this, ctDocument.getBody().addNewTbl(), rows, cols);
}
}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.xwpf.usermodel;
+
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTFtnEdn;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
+
+import java.util.List;
+import java.util.ArrayList;
+import java.util.Iterator;
+
+public class XWPFFootnote implements Iterable<XWPFParagraph> {
+ private List<XWPFParagraph> paragraphs = new ArrayList<XWPFParagraph>();
+
+ public XWPFFootnote(XWPFDocument document, CTFtnEdn body) {
+ for (CTP p : body.getPArray()) {
+ paragraphs.add(new XWPFParagraph(p, document));
+ }
+ }
+
+ public List<XWPFParagraph> getParagraphs() {
+ return paragraphs;
+ }
+
+ public Iterator<XWPFParagraph> iterator(){
+ return paragraphs.iterator();
+ }
+
+}
new XWPFTable[headerFooter.getTblArray().length];
for(int i=0; i<tables.length; i++) {
tables[i] = new XWPFTable(
- headerFooter.getTblArray(i)
+ null,
+ headerFooter.getTblArray(i)
);
}
return tables;
import org.apache.xmlbeans.XmlCursor;
import org.apache.xmlbeans.XmlObject;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBorder;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTInd;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTJc;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTOnOff;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTP;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPBdr;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPPr;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPTab;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTPicture;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTR;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtContentRun;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSdtRun;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTSpacing;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTText;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTTextAlignment;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.STBorder;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.STJc;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.STLineSpacingRule;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.STOnOff;
-import org.openxmlformats.schemas.wordprocessingml.x2006.main.STTextAlignment;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.*;
import org.w3c.dom.NodeList;
import org.w3c.dom.Text;
*/
private StringBuffer text = new StringBuffer();
private StringBuffer pictureText = new StringBuffer();
+ private StringBuffer footnoteText = new StringBuffer();
protected XWPFParagraph(CTP prgrph) {
protected XWPFParagraph(CTP prgrph, XWPFDocument docRef) {
- this.paragraph = prgrph;
- this.document = docRef;
-
- if (!isEmpty()) {
- // All the runs to loop over
- // TODO - replace this with some sort of XPath expression
- // to directly find all the CTRs, in the right order
- ArrayList<CTR> rs = new ArrayList<CTR>();
- CTR[] tmp;
-
- // Get the main text runs
- tmp = paragraph.getRArray();
- for (int i = 0; i < tmp.length; i++) {
- rs.add(tmp[i]);
- }
-
- // Not sure quite what these are, but they hold
- // more text runs
- CTSdtRun[] sdts = paragraph.getSdtArray();
- for (int i = 0; i < sdts.length; i++) {
- CTSdtContentRun run = sdts[i].getSdtContent();
- tmp = run.getRArray();
- for (int j = 0; j < tmp.length; j++) {
- rs.add(tmp[j]);
- }
- }
-
- // Get text of the paragraph
- for (int j = 0; j < rs.size(); j++) {
- // Grab the text and tabs of the paragraph
- // Do so in a way that preserves the ordering
- XmlCursor c = rs.get(j).newCursor();
- c.selectPath("./*");
- while (c.toNextSelection()) {
- XmlObject o = c.getObject();
- if (o instanceof CTText) {
- text.append(((CTText) o).getStringValue());
- }
- if (o instanceof CTPTab) {
- text.append("\t");
- }
- }
-
- // Loop over pictures inside our
- // paragraph, looking for text in them
- CTPicture[] picts = rs.get(j).getPictArray();
- for (int k = 0; k < picts.length; k++) {
- XmlObject[] t = picts[k]
- .selectPath("declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' .//w:t");
- for (int m = 0; m < t.length; m++) {
- NodeList kids = t[m].getDomNode().getChildNodes();
- for (int n = 0; n < kids.getLength(); n++) {
- if (kids.item(n) instanceof Text) {
- pictureText.append("\n");
- pictureText.append(kids.item(n).getNodeValue());
- }
- }
- }
- }
- }
- }
+ this.paragraph = prgrph;
+ this.document = docRef;
+
+ if (!isEmpty()) {
+ // All the runs to loop over
+ // TODO - replace this with some sort of XPath expression
+ // to directly find all the CTRs, in the right order
+ ArrayList<CTR> rs = new ArrayList<CTR>();
+ CTR[] tmp;
+
+ // Get the main text runs
+ tmp = paragraph.getRArray();
+ for (int i = 0; i < tmp.length; i++) {
+ rs.add(tmp[i]);
+ }
+
+ // Not sure quite what these are, but they hold
+ // more text runs
+ CTSdtRun[] sdts = paragraph.getSdtArray();
+ for (int i = 0; i < sdts.length; i++) {
+ CTSdtContentRun run = sdts[i].getSdtContent();
+ tmp = run.getRArray();
+ for (int j = 0; j < tmp.length; j++) {
+ rs.add(tmp[j]);
+ }
+ }
+
+ // Get text of the paragraph
+ for (int j = 0; j < rs.size(); j++) {
+ // Grab the text and tabs of the paragraph
+ // Do so in a way that preserves the ordering
+ XmlCursor c = rs.get(j).newCursor();
+ c.selectPath("./*");
+ while (c.toNextSelection()) {
+ XmlObject o = c.getObject();
+ if (o instanceof CTText) {
+ text.append(((CTText) o).getStringValue());
+ }
+ if (o instanceof CTPTab) {
+ text.append("\t");
+ }
+ //got a reference to a footnote
+ if (o instanceof CTFtnEdnRef) {
+ CTFtnEdnRef ftn = (CTFtnEdnRef) o;
+ footnoteText.append("[").append(ftn.getId()).append(": ");
+ XWPFFootnote footnote = document.getFootnoteByID(ftn.getId().intValue());
+
+ boolean first = true;
+ for (XWPFParagraph p : footnote.getParagraphs()) {
+ if (!first) {
+ footnoteText.append("\n");
+ first = false;
+ }
+ footnoteText.append(p.getText());
+ }
+
+ footnoteText.append("]");
+ }
+ }
+
+ // Loop over pictures inside our
+ // paragraph, looking for text in them
+ CTPicture[] picts = rs.get(j).getPictArray();
+ for (int k = 0; k < picts.length; k++) {
+ XmlObject[] t = picts[k]
+ .selectPath("declare namespace w='http://schemas.openxmlformats.org/wordprocessingml/2006/main' .//w:t");
+ for (int m = 0; m < t.length; m++) {
+ NodeList kids = t[m].getDomNode().getChildNodes();
+ for (int n = 0; n < kids.getLength(); n++) {
+ if (kids.item(n) instanceof Text) {
+ pictureText.append("\n");
+ pictureText.append(kids.item(n).getNodeValue());
+ }
+ }
+ }
+ }
+ }
+ }
}
public CTP getCTP() {
- return paragraph;
+ return paragraph;
}
public boolean isEmpty() {
- return !paragraph.getDomNode().hasChildNodes();
+ return !paragraph.getDomNode().hasChildNodes();
}
public XWPFDocument getDocument() {
- return document;
+ return document;
}
/**
* in it.
*/
public String getText() {
- return getParagraphText() + getPictureText();
+ StringBuffer out = new StringBuffer();
+ out.append(text).append(footnoteText).append(pictureText);
+ return out.toString();
}
/**
* paragraph
*/
public String getParagraphText() {
- return text.toString();
+ return text.toString();
}
/**
* Returns any text from any suitable pictures in the paragraph
*/
public String getPictureText() {
- return pictureText.toString();
+ return pictureText.toString();
+ }
+
+ /**
+ * Returns the footnote text of the paragraph
+ *
+ * @return the footnote text or empty string if the paragraph does not have footnotes
+ */
+ public String getFootnoteText() {
+ return footnoteText.toString();
}
/**
* @return a new text run
*/
public XWPFRun createRun() {
- return new XWPFRun(paragraph.addNewR(), this);
+ return new XWPFRun(paragraph.addNewR(), this);
}
/**
* @see Borders a list of all types of borders
*/
public void setBorderBottom(Borders border) {
- CTPBdr ct = getCTPBrd(true);
- CTBorder pr = ct.isSetBottom() ? ct.getBottom() : ct.addNewBottom();
- if (border.getValue() == Borders.NONE.getValue())
- ct.unsetBottom();
- else
- pr.setVal(STBorder.Enum.forInt(border.getValue()));
+ CTPBdr ct = getCTPBrd(true);
+ CTBorder pr = ct.isSetBottom() ? ct.getBottom() : ct.addNewBottom();
+ if (border.getValue() == Borders.NONE.getValue())
+ ct.unsetBottom();
+ else
+ pr.setVal(STBorder.Enum.forInt(border.getValue()));
}
/**
* @see Borders a list of all types of borders
*/
public Borders getBorderBottom() {
- CTPBdr border = getCTPBrd(false);
- CTBorder ct = null;
- if (border != null) {
- ct = border.getBottom();
- }
- STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE;
- return Borders.valueOf(ptrn.intValue());
+ CTPBdr border = getCTPBrd(false);
+ CTBorder ct = null;
+ if (border != null) {
+ ct = border.getBottom();
+ }
+ STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE;
+ return Borders.valueOf(ptrn.intValue());
}
/**
* @see Borders for a list of all possible borders
*/
public void setBorderLeft(Borders border) {
- CTPBdr ct = getCTPBrd(true);
- CTBorder pr = ct.isSetLeft() ? ct.getLeft() : ct.addNewLeft();
- if (border.getValue() == Borders.NONE.getValue())
- ct.unsetLeft();
- else
- pr.setVal(STBorder.Enum.forInt(border.getValue()));
+ CTPBdr ct = getCTPBrd(true);
+ CTBorder pr = ct.isSetLeft() ? ct.getLeft() : ct.addNewLeft();
+ if (border.getValue() == Borders.NONE.getValue())
+ ct.unsetLeft();
+ else
+ pr.setVal(STBorder.Enum.forInt(border.getValue()));
}
/**
* @see Borders for a list of all possible borders
*/
public Borders getBorderLeft() {
- CTPBdr border = getCTPBrd(false);
- CTBorder ct = null;
- if (border != null) {
- ct = border.getLeft();
- }
- STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE;
- return Borders.valueOf(ptrn.intValue());
+ CTPBdr border = getCTPBrd(false);
+ CTBorder ct = null;
+ if (border != null) {
+ ct = border.getLeft();
+ }
+ STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE;
+ return Borders.valueOf(ptrn.intValue());
}
/**
* @see Borders for a list of all possible borders
*/
public void setBorderRight(Borders border) {
- CTPBdr ct = getCTPBrd(true);
- CTBorder pr = ct.isSetRight() ? ct.getRight() : ct.addNewRight();
- if (border.getValue() == Borders.NONE.getValue())
- ct.unsetRight();
- else
- pr.setVal(STBorder.Enum.forInt(border.getValue()));
+ CTPBdr ct = getCTPBrd(true);
+ CTBorder pr = ct.isSetRight() ? ct.getRight() : ct.addNewRight();
+ if (border.getValue() == Borders.NONE.getValue())
+ ct.unsetRight();
+ else
+ pr.setVal(STBorder.Enum.forInt(border.getValue()));
}
/**
* @see Borders for a list of all possible borders
*/
public Borders getBorderRight() {
- CTPBdr border = getCTPBrd(false);
- CTBorder ct = null;
- if (border != null) {
- ct = border.getRight();
- }
- STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE;
- return Borders.valueOf(ptrn.intValue());
+ CTPBdr border = getCTPBrd(false);
+ CTBorder ct = null;
+ if (border != null) {
+ ct = border.getRight();
+ }
+ STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE;
+ return Borders.valueOf(ptrn.intValue());
}
/**
* @see Borders for a list of all possible borders
*/
public void setBorderBetween(Borders border) {
- CTPBdr ct = getCTPBrd(true);
- CTBorder pr = ct.isSetBetween() ? ct.getBetween() : ct.addNewBetween();
- if (border.getValue() == Borders.NONE.getValue())
- ct.unsetBetween();
- else
- pr.setVal(STBorder.Enum.forInt(border.getValue()));
+ CTPBdr ct = getCTPBrd(true);
+ CTBorder pr = ct.isSetBetween() ? ct.getBetween() : ct.addNewBetween();
+ if (border.getValue() == Borders.NONE.getValue())
+ ct.unsetBetween();
+ else
+ pr.setVal(STBorder.Enum.forInt(border.getValue()));
}
/**
* @see Borders for a list of all possible borders
*/
public Borders getBorderBetween() {
- CTPBdr border = getCTPBrd(false);
- CTBorder ct = null;
- if (border != null) {
- ct = border.getBetween();
- }
- STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE;
- return Borders.valueOf(ptrn.intValue());
+ CTPBdr border = getCTPBrd(false);
+ CTBorder ct = null;
+ if (border != null) {
+ ct = border.getBetween();
+ }
+ STBorder.Enum ptrn = ct != null ? ct.getVal() : STBorder.NONE;
+ return Borders.valueOf(ptrn.intValue());
}
/**
* boolean value
*/
public void setPageBreak(boolean pageBreak) {
- CTPPr ppr = getCTPPr();
- CTOnOff ct_pageBreak = ppr.isSetPageBreakBefore() ? ppr
- .getPageBreakBefore() : ppr.addNewPageBreakBefore();
- if (pageBreak)
- ct_pageBreak.setVal(STOnOff.TRUE);
- else
- ct_pageBreak.setVal(STOnOff.FALSE);
+ CTPPr ppr = getCTPPr();
+ CTOnOff ct_pageBreak = ppr.isSetPageBreakBefore() ? ppr
+ .getPageBreakBefore() : ppr.addNewPageBreakBefore();
+ if (pageBreak)
+ ct_pageBreak.setVal(STOnOff.TRUE);
+ else
+ ct_pageBreak.setVal(STOnOff.FALSE);
}
/**
* @return boolean - if page break is set
*/
public boolean isPageBreak() {
- CTPPr ppr = getCTPPr();
- CTOnOff ct_pageBreak = ppr.isSetPageBreakBefore() ? ppr
- .getPageBreakBefore() : null;
- if (ct_pageBreak != null
- && ct_pageBreak.getVal().intValue() == STOnOff.INT_TRUE)
- return true;
- else
- return false;
+ CTPPr ppr = getCTPPr();
+ CTOnOff ct_pageBreak = ppr.isSetPageBreakBefore() ? ppr
+ .getPageBreakBefore() : null;
+ if (ct_pageBreak != null
+ && ct_pageBreak.getVal().intValue() == STOnOff.INT_TRUE)
+ return true;
+ else
+ return false;
}
/**
* paragraph in the document in absolute units.
*
* @return bigInteger - value representing the spacing after the paragraph
- * @see #setSpacingAfterLines(int)
+ * @see #setSpacingAfterLines(int)
*/
public int getSpacingAfterLines() {
CTSpacing spacing = getCTSpacing(false);
* @param wrap - boolean
*/
public void setWordWrap(boolean wrap) {
- CTOnOff wordWrap = getCTPPr().isSetWordWrap() ? getCTPPr()
- .getWordWrap() : getCTPPr().addNewWordWrap();
- if (wrap)
- wordWrap.setVal(STOnOff.TRUE);
- else
- wordWrap.unsetVal();
+ CTOnOff wordWrap = getCTPPr().isSetWordWrap() ? getCTPPr()
+ .getWordWrap() : getCTPPr().addNewWordWrap();
+ if (wrap)
+ wordWrap.setVal(STOnOff.TRUE);
+ else
+ wordWrap.unsetVal();
}
/**
* @return boolean
*/
public boolean isWordWrap() {
- CTOnOff wordWrap = getCTPPr().isSetWordWrap() ? getCTPPr()
- .getWordWrap() : null;
- if (wordWrap != null) {
- return (wordWrap.getVal() == STOnOff.ON
- || wordWrap.getVal() == STOnOff.TRUE || wordWrap.getVal() == STOnOff.X_1) ? true
- : false;
- } else
- return false;
+ CTOnOff wordWrap = getCTPPr().isSetWordWrap() ? getCTPPr()
+ .getWordWrap() : null;
+ if (wordWrap != null) {
+ return (wordWrap.getVal() == STOnOff.ON
+ || wordWrap.getVal() == STOnOff.TRUE || wordWrap.getVal() == STOnOff.X_1) ? true
+ : false;
+ } else
+ return false;
}
/**
"/word/document.xml",
null
);
+ public static final XWPFRelation TEMPLATE = new XWPFRelation(
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.template.main+xml",
+ "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
+ "/word/document.xml",
+ null
+ );
+ public static final XWPFRelation MACRO_DOCUMENT = new XWPFRelation(
+ "application/vnd.ms-word.document.macroEnabled.main+xml",
+ "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
+ "/word/document.xml",
+ null
+ );
+ public static final XWPFRelation MACRO_TEMPLATE_DOCUMENT = new XWPFRelation(
+ "application/vnd.ms-word.template.macroEnabledTemplate.main+xml",
+ "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument",
+ "/word/document.xml",
+ null
+ );
public static final XWPFRelation FONT_TABLE = new XWPFRelation(
"application/vnd.openxmlformats-officedocument.wordprocessingml.fontTable+xml",
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/fontTable",
null,
null
);
+ public static final XWPFRelation FOOTNOTE = new XWPFRelation(
+ null,
+ "http://schemas.openxmlformats.org/officeDocument/2006/relationships/footnotes",
+ null,
+ null
+ );
private XWPFRelation(String type, String rel, String defaultName, Class<? extends POIXMLDocumentPart> cls) {
private CTTbl ctTbl;
- public XWPFTable(CTTbl table, int row, int col) {
- this(table);
+ public XWPFTable(XWPFDocument doc, CTTbl table, int row, int col) {
+ this(doc, table);
for (int i = 0; i < row; i++) {
XWPFTableRow tabRow = (getRow(i) == null) ? createRow() : getRow(i);
for (int k = 0; k < col; k++) {
}
- public XWPFTable(CTTbl table) {
+ public XWPFTable(XWPFDocument doc, CTTbl table) {
this.ctTbl = table;
// is an empty table: I add one row and one column as default
StringBuffer rowText = new StringBuffer();
for (CTTc cell : row.getTcArray()) {
for (CTP ctp : cell.getPArray()) {
- XWPFParagraph p = new XWPFParagraph(ctp, null);
+ XWPFParagraph p = new XWPFParagraph(ctp, doc);
if (rowText.length() > 0) {
rowText.append('\t');
}
package org.apache.poi.xwpf.extractor;
import java.io.File;
+import java.io.IOException;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
* Tests for HXFWordExtractor
*/
public class TestXWPFWordExtractor extends TestCase {
- /**
- * A very simple file
- */
- private XWPFDocument xmlA;
- private File fileA;
- /**
- * A fairly complex file
- */
- private XWPFDocument xmlB;
- private File fileB;
- /**
- * With a simplish header+footer
- */
- private XWPFDocument xmlC;
- private File fileC;
- /**
- * With different header+footer on first/rest
- */
- private XWPFDocument xmlD;
- private File fileD;
-
- /**
- * File with hyperlinks
- */
- private XWPFDocument xmlE;
- private File fileE;
-
- protected void setUp() throws Exception {
- super.setUp();
-
- fileA = new File(
- System.getProperty("HWPF.testdata.path") +
- File.separator + "sample.docx"
- );
- fileB = new File(
- System.getProperty("HWPF.testdata.path") +
- File.separator + "IllustrativeCases.docx"
- );
- fileC = new File(
- System.getProperty("HWPF.testdata.path") +
- File.separator + "ThreeColHeadFoot.docx"
- );
- fileD = new File(
- System.getProperty("HWPF.testdata.path") +
- File.separator + "DiffFirstPageHeadFoot.docx"
- );
- fileE = new File(
- System.getProperty("HWPF.testdata.path") +
- File.separator + "TestDocument.docx"
- );
- assertTrue(fileA.exists());
- assertTrue(fileB.exists());
- assertTrue(fileC.exists());
- assertTrue(fileD.exists());
- assertTrue(fileE.exists());
-
- xmlA = new XWPFDocument(POIXMLDocument.openPackage(fileA.toString()));
- xmlB = new XWPFDocument(POIXMLDocument.openPackage(fileB.toString()));
- xmlC = new XWPFDocument(POIXMLDocument.openPackage(fileC.toString()));
- xmlD = new XWPFDocument(POIXMLDocument.openPackage(fileD.toString()));
- xmlE = new XWPFDocument(POIXMLDocument.openPackage(fileE.toString()));
- }
-
- /**
- * Get text out of the simple file
- */
- public void testGetSimpleText() throws Exception {
- new XWPFWordExtractor(xmlA);
- new XWPFWordExtractor(POIXMLDocument.openPackage(fileA.toString()));
-
- XWPFWordExtractor extractor =
- new XWPFWordExtractor(xmlA);
- extractor.getText();
-
- String text = extractor.getText();
- assertTrue(text.length() > 0);
-
- // Check contents
- assertTrue(text.startsWith(
- "Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Nunc at risus vel erat tempus posuere. Aenean non ante. Suspendisse vehicula dolor sit amet odio."
- ));
- assertTrue(text.endsWith(
- "Phasellus ultricies mi nec leo. Sed tempus. In sit amet lorem at velit faucibus vestibulum.\n"
- ));
-
- // Check number of paragraphs
- int ps = 0;
- char[] t = text.toCharArray();
- for (int i = 0; i < t.length; i++) {
- if(t[i] == '\n') { ps++; }
- }
- assertEquals(3, ps);
- }
-
- /**
- * Tests getting the text out of a complex file
- */
- public void testGetComplexText() throws Exception {
- XWPFWordExtractor extractor =
- new XWPFWordExtractor(xmlB);
- extractor.getText();
-
- String text = extractor.getText();
- assertTrue(text.length() > 0);
-
- char euro = '\u20ac';
+
+ /**
+ * Get text out of the simple file
+ */
+ public void testGetSimpleText() throws Exception {
+ XWPFDocument doc = open("sample.docx");
+ XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
+
+ String text = extractor.getText();
+ assertTrue(text.length() > 0);
+
+ // Check contents
+ assertTrue(text.startsWith(
+ "Lorem ipsum dolor sit amet, consectetuer adipiscing elit. Nunc at risus vel erat tempus posuere. Aenean non ante. Suspendisse vehicula dolor sit amet odio."
+ ));
+ assertTrue(text.endsWith(
+ "Phasellus ultricies mi nec leo. Sed tempus. In sit amet lorem at velit faucibus vestibulum.\n"
+ ));
+
+ // Check number of paragraphs
+ int ps = 0;
+ char[] t = text.toCharArray();
+ for (int i = 0; i < t.length; i++) {
+ if (t[i] == '\n') {
+ ps++;
+ }
+ }
+ assertEquals(3, ps);
+ }
+
+ /**
+ * Tests getting the text out of a complex file
+ */
+ public void testGetComplexText() throws Exception {
+ XWPFDocument doc = open("IllustrativeCases.docx");
+ XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
+
+ String text = extractor.getText();
+ assertTrue(text.length() > 0);
+
+ char euro = '\u20ac';
// System.err.println("'"+text.substring(text.length() - 40) + "'");
-
- // Check contents
- assertTrue(text.startsWith(
- " \n(V) ILLUSTRATIVE CASES\n\n"
- ));
- assertTrue(text.contains(
- "As well as gaining "+euro+"90 from child benefit increases, he will also receive the early childhood supplement of "+euro+"250 per quarter for Vincent for the full four quarters of the year.\n\n\n\n \n\n\n"
- ));
- assertTrue(text.endsWith(
- "11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\n\n"
- ));
-
- // Check number of paragraphs
- int ps = 0;
- char[] t = text.toCharArray();
- for (int i = 0; i < t.length; i++) {
- if(t[i] == '\n') { ps++; }
- }
- assertEquals(103, ps);
- }
-
- public void testGetWithHyperlinks() throws Exception {
- XWPFWordExtractor extractor =
- new XWPFWordExtractor(xmlE);
- extractor.getText();
- extractor.setFetchHyperlinks(true);
- extractor.getText();
-
- // Now check contents
- // TODO - fix once correctly handling contents
- extractor.setFetchHyperlinks(false);
- assertEquals(
+
+ // Check contents
+ assertTrue(text.startsWith(
+ " \n(V) ILLUSTRATIVE CASES\n\n"
+ ));
+ assertTrue(text.contains(
+ "As well as gaining " + euro + "90 from child benefit increases, he will also receive the early childhood supplement of " + euro + "250 per quarter for Vincent for the full four quarters of the year.\n\n\n\n \n\n\n"
+ ));
+ assertTrue(text.endsWith(
+ "11.4%\t\t90\t\t\t\t\t250\t\t1,310\t\n\n"
+ ));
+
+ // Check number of paragraphs
+ int ps = 0;
+ char[] t = text.toCharArray();
+ for (int i = 0; i < t.length; i++) {
+ if (t[i] == '\n') {
+ ps++;
+ }
+ }
+ assertEquals(103, ps);
+ }
+
+ public void testGetWithHyperlinks() throws Exception {
+ XWPFDocument doc = open("TestDocument.docx");
+ XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
+
+ // Now check contents
+ // TODO - fix once correctly handling contents
+ extractor.setFetchHyperlinks(false);
+ assertEquals(
// "This is a test document\nThis bit is in bold and italic\n" +
// "Back to normal\nWe have a hyperlink here, and another.\n",
- "This is a test document\nThis bit is in bold and italic\n" +
- "Back to normal\nWe have a here, and .hyperlinkanother\n",
- extractor.getText()
- );
-
- extractor.setFetchHyperlinks(true);
- assertEquals(
+ "This is a test document\nThis bit is in bold and italic\n" +
+ "Back to normal\nWe have a here, and .hyperlinkanother\n",
+ extractor.getText()
+ );
+
+ extractor.setFetchHyperlinks(true);
+ assertEquals(
// "This is a test document\nThis bit is in bold and italic\n" +
// "Back to normal\nWe have a hyperlink here, and another.\n",
- "This is a test document\nThis bit is in bold and italic\n" +
- "Back to normal\nWe have a here, and .hyperlink <http://poi.apache.org/>another\n",
- extractor.getText()
- );
- }
-
- public void testHeadersFooters() throws Exception {
- XWPFWordExtractor extractor =
- new XWPFWordExtractor(xmlC);
- extractor.getText();
-
- assertEquals(
- "First header column!\tMid header\tRight header!\n" +
- "This is a sample word document. It has two pages. It has a three column heading, and a three column footer\n" +
- "\n" +
- "HEADING TEXT\n" +
- "\n" +
- "More on page one\n" +
- "\n\n" +
- "End of page 1\n\n" +
- "This is page two. It also has a three column heading, and a three column footer.\n" +
- "Footer Left\tFooter Middle\tFooter Right\n",
- extractor.getText()
- );
-
-
- // Now another file, expect multiple headers
- // and multiple footers
- extractor =
- new XWPFWordExtractor(xmlD);
- extractor.getText();
-
- assertEquals(
- "I am the header on the first page, and I" + '\u2019' + "m nice and simple\n" +
- "First header column!\tMid header\tRight header!\n" +
- "This is a sample word document. It has two pages. It has a simple header and footer, which is different to all the other pages.\n" +
- "\n" +
- "HEADING TEXT\n" +
- "\n" +
- "More on page one\n" +
- "\n\n" +
- "End of page 1\n\n" +
- "This is page two. It also has a three column heading, and a three column footer.\n" +
- "The footer of the first page\n" +
- "Footer Left\tFooter Middle\tFooter Right\n",
- extractor.getText()
- );
- }
+ "This is a test document\nThis bit is in bold and italic\n" +
+ "Back to normal\nWe have a here, and .hyperlink <http://poi.apache.org/>another\n",
+ extractor.getText()
+ );
+ }
+
+ public void testHeadersFooters() throws Exception {
+ XWPFDocument doc = open("ThreeColHeadFoot.docx");
+ XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
+
+ assertEquals(
+ "First header column!\tMid header\tRight header!\n" +
+ "This is a sample word document. It has two pages. It has a three column heading, and a three column footer\n" +
+ "\n" +
+ "HEADING TEXT\n" +
+ "\n" +
+ "More on page one\n" +
+ "\n\n" +
+ "End of page 1\n\n" +
+ "This is page two. It also has a three column heading, and a three column footer.\n" +
+ "Footer Left\tFooter Middle\tFooter Right\n",
+ extractor.getText()
+ );
+
+ // Now another file, expect multiple headers
+ // and multiple footers
+ doc = open("DiffFirstPageHeadFoot.docx");
+ extractor = new XWPFWordExtractor(doc);
+ extractor =
+ new XWPFWordExtractor(doc);
+ extractor.getText();
+
+ assertEquals(
+ "I am the header on the first page, and I" + '\u2019' + "m nice and simple\n" +
+ "First header column!\tMid header\tRight header!\n" +
+ "This is a sample word document. It has two pages. It has a simple header and footer, which is different to all the other pages.\n" +
+ "\n" +
+ "HEADING TEXT\n" +
+ "\n" +
+ "More on page one\n" +
+ "\n\n" +
+ "End of page 1\n\n" +
+ "This is page two. It also has a three column heading, and a three column footer.\n" +
+ "The footer of the first page\n" +
+ "Footer Left\tFooter Middle\tFooter Right\n",
+ extractor.getText()
+ );
+ }
+
+ public void testFootnotes() throws Exception {
+ XWPFDocument doc = open("footnotes.docx");
+ XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
+
+ assertTrue(extractor.getText().contains("snoska"));
+ }
+
+
+ public void testTableFootnotes() throws Exception {
+ XWPFDocument doc = open("table_footnotes.docx");
+ XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
+
+ assertTrue(extractor.getText().contains("snoska"));
+ }
+
+ public void testFormFootnotes() throws Exception {
+ XWPFDocument doc = open("form_footnotes.docx");
+ XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
+
+ String text = extractor.getText();
+ assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
+ assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
+ }
+
+ //TODO use the same logic as in HSSFTestDataSamples
+ private XWPFDocument open(String sampleFileName) throws IOException {
+ File file = new File(
+ System.getProperty("HWPF.testdata.path"), sampleFileName);
+
+ try {
+ if(!sampleFileName.equals(file.getCanonicalFile().getName())){
+ throw new RuntimeException("File name is case-sensitive: requested '" + sampleFileName
+ + "' but actual file is '" + file.getCanonicalFile().getName() + "'");
+ }
+ } catch (IOException e){
+ throw new RuntimeException(e);
+ }
+ return new XWPFDocument(POIXMLDocument.openPackage(file.getPath()));
+ }
}
public void testConstructor() {
CTTbl ctTable=CTTbl.Factory.newInstance();
- XWPFTable xtab=new XWPFTable(ctTable);
+ XWPFTable xtab=new XWPFTable(null, ctTable);
assertNotNull(xtab);
assertEquals(1,ctTable.sizeOfTrArray());
assertEquals(1,ctTable.getTrArray(0).sizeOfTcArray());
assertNotNull(ctTable.getTrArray(0).getTcArray(0).getPArray(0));
ctTable=CTTbl.Factory.newInstance();
- xtab=new XWPFTable(ctTable, 3,2);
+ xtab=new XWPFTable(null, ctTable, 3,2);
assertNotNull(xtab);
assertEquals(3,ctTable.sizeOfTrArray());
assertEquals(2,ctTable.getTrArray(0).sizeOfTcArray());
CTText text=run.addNewT();
text.setStringValue("finally I can write!");
- XWPFTable xtab=new XWPFTable(table);
+ XWPFTable xtab=new XWPFTable(null, table);
assertEquals("finally I can write!\n",xtab.getText());
}
r3.addNewTc().addNewP();
r3.addNewTc().addNewP();
- XWPFTable xtab=new XWPFTable(table);
+ XWPFTable xtab=new XWPFTable(null, table);
assertEquals(3,xtab.getNumberOfRows());
assertNotNull(xtab.getRow(2));
assertEquals(2,table.getTrArray(0).sizeOfTcArray());
//check creation of first row
- xtab=new XWPFTable(CTTbl.Factory.newInstance());
+ xtab=new XWPFTable(null, CTTbl.Factory.newInstance());
assertEquals(1,xtab.getCTTbl().getTrArray(0).sizeOfTcArray());
}
CTTbl table = CTTbl.Factory.newInstance();
table.addNewTblPr().addNewTblW().setW(new BigInteger("1000"));
- XWPFTable xtab=new XWPFTable(table);
+ XWPFTable xtab=new XWPFTable(null, table);
assertEquals(1000,xtab.getWidth());