<sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/>
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
<sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/>
+ <sysproperty key="OOXML.testdata.path" file="${ooxml.src.test}/org/apache/poi/ooxml/data"/>
<sysproperty key="java.awt.headless" value="true"/>
<formatter type="plain" usefile="no"/>
<formatter type="xml"/>
<sysproperty key="HWPF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hwpf/data"/>
<sysproperty key="HSLF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hslf/data"/>
<sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
+ <sysproperty key="OOXML.testdata.path" file="${ooxml.src.test}/org/apache/poi/ooxml/data"/>
<sysproperty key="java.awt.headless" value="true"/>
<formatter type="plain"/>
<formatter type="xml"/>
<action dev="POI-DEVELOPERS" type="add">Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx</action>
</release>
<release version="3.1-final" date="2008-06-??">
+ <action dev="POI-DEVELOPERS" type="add">45043 - Support for getting excel cell comments when extracting text</action>
<action dev="POI-DEVELOPERS" type="add">Extend the support for specifying a policy to HSSF on missing / blank cells when fetching, to be able to specify the policy at the HSSFWorkbook level</action>
<action dev="POI-DEVELOPERS" type="fix">45025 - improved FormulaParser parse error messages</action>
<action dev="POI-DEVELOPERS" type="fix">45046 - allowed EXTERNALBOOK(0x01AE) to be optional in the LinkTable</action>
<action dev="POI-DEVELOPERS" type="add">Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx</action>
</release>
<release version="3.1-final" date="2008-06-??">
+ <action dev="POI-DEVELOPERS" type="add">45043 - Support for getting excel cell comments when extracting text</action>
<action dev="POI-DEVELOPERS" type="add">Extend the support for specifying a policy to HSSF on missing / blank cells when fetching, to be able to specify the policy at the HSSFWorkbook level</action>
<action dev="POI-DEVELOPERS" type="fix">45025 - improved FormulaParser parse error messages</action>
<action dev="POI-DEVELOPERS" type="fix">45046 - allowed EXTERNALBOOK(0x01AE) to be optional in the LinkTable</action>
import org.apache.poi.POIOLE2TextExtractor;
import org.apache.poi.hssf.usermodel.HSSFCell;
+import org.apache.poi.hssf.usermodel.HSSFComment;
import org.apache.poi.hssf.usermodel.HSSFRichTextString;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
private HSSFWorkbook wb;
private boolean includeSheetNames = true;
private boolean formulasNotResults = false;
+ private boolean includeCellComments = false;
public ExcelExtractor(HSSFWorkbook wb) {
super(wb);
public void setFormulasNotResults(boolean formulasNotResults) {
this.formulasNotResults = formulasNotResults;
}
+ /**
+ * Should cell comments be included? Default is true
+ */
+ public void setIncludeCellComments(boolean includeCellComments) {
+ this.includeCellComments = includeCellComments;
+ }
/**
* Retreives the text contents of the file
break;
}
+ // Output the comment, if requested and exists
+ HSSFComment comment = cell.getCellComment();
+ if(includeCellComments && comment != null) {
+ // Replace any newlines with spaces, otherwise it
+ // breaks the output
+ String commentText = comment.getString().getString().replace('\n', ' ');
+ text.append(" Comment by "+comment.getAuthor()+": "+commentText);
+ }
+
// Output a tab if we're not on the last cell
if(outputContents && k < (lastCell-1)) {
text.append("\t");
==================================================================== */
package org.apache.poi.xssf.extractor;
-import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.Comment;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFCell;
-import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.xmlbeans.XmlException;
import org.openxml4j.exceptions.OpenXML4JException;
import org.openxml4j.opc.Package;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRow;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet;
/**
* Helper class to extract text from an OOXML Excel file
private Workbook workbook;
private boolean includeSheetNames = true;
private boolean formulasNotResults = false;
+ private boolean includeCellComments = false;
public XSSFExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
this(new XSSFWorkbook(path));
public void setFormulasNotResults(boolean formulasNotResults) {
this.formulasNotResults = formulasNotResults;
}
+ /**
+ * Should cell comments be included? Default is true
+ */
+ public void setIncludeCellComments(boolean includeCellComments) {
+ this.includeCellComments = includeCellComments;
+ }
/**
* Retreives the text contents of the file
for (Object rawR : sheet) {
Row row = (Row)rawR;
- for(Iterator ri = row.cellIterator(); ri.hasNext();) {
- Cell cell = (Cell)ri.next();
+ for(Iterator<Cell> ri = row.cellIterator(); ri.hasNext();) {
+ Cell cell = ri.next();
// Is it a formula one?
if(cell.getCellType() == Cell.CELL_TYPE_FORMULA && formulasNotResults) {
text.append(xc.getRawValue());
}
+ // Output the comment, if requested and exists
+ Comment comment = cell.getCellComment();
+ if(includeCellComments && comment != null) {
+ // Replace any newlines with spaces, otherwise it
+ // breaks the output
+ String commentText = comment.getString().getString().replace('\n', ' ');
+ text.append(" Comment by "+comment.getAuthor()+": "+commentText);
+ }
+
if(ri.hasNext())
text.append("\t");
}
--- /dev/null
+
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi;
+
+import java.io.File;
+import java.util.Iterator;
+
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.xslf.XSLFSlideShow;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.apache.poi.xwpf.XWPFDocument;
+import org.openxml4j.opc.Package;
+import org.openxml4j.opc.PackagePart;
+
+import junit.framework.TestCase;
+
+/**
+ * Class to test that we handle embeded bits in
+ * OOXML files properly
+ */
+public class TestEmbeded extends TestCase
+{
+ public String dirname;
+
+ public void setUp() {
+ dirname = System.getProperty("OOXML.testdata.path");
+ assertNotNull(dirname);
+ }
+
+ public void testExcel() throws Exception {
+ File f = new File(dirname, "ExcelWithAttachments.xlsx");
+ assertTrue(f.exists());
+
+ POIXMLDocument doc = new XSSFWorkbook(Package.open(f.toString()));
+ test(doc, 0);
+ }
+
+ public void testWord() throws Exception {
+ File f = new File(dirname, "WordWithAttachments.docx");
+ assertTrue(f.exists());
+
+ POIXMLDocument doc = new XWPFDocument(Package.open(f.toString()));
+ test(doc, 4);
+ }
+
+ public void testPowerPoint() throws Exception {
+ File f = new File(dirname, "PPTWithAttachments.pptx");
+ assertTrue(f.exists());
+
+ POIXMLDocument doc = new XSLFSlideShow(Package.open(f.toString()));
+ test(doc, 0);
+ }
+
+ private void test(POIXMLDocument doc, int expectedCount) throws Exception {
+ assertNotNull(doc.getAllEmbedds());
+ assertEquals(expectedCount, doc.getAllEmbedds().size());
+
+ for(int i=0; i<doc.getAllEmbedds().size(); i++) {
+ PackagePart pp = doc.getAllEmbedds().get(i);
+ assertNotNull(pp);
+
+ byte[] b = IOUtils.toByteArray(pp.getInputStream());
+ assertTrue(b.length > 0);
+ }
+ }
+}
);
}
+ public void testWithComments() throws Exception {
+ ExcelExtractor extractor = createExtractor("SimpleWithComments.xls");
+ extractor.setIncludeSheetNames(false);
+
+ // Check without comments
+ assertEquals(
+ "1.0\tone\n" +
+ "2.0\ttwo\n" +
+ "3.0\tthree\n",
+ extractor.getText()
+ );
+
+ // Now with
+ extractor.setIncludeCellComments(true);
+ assertEquals(
+ "1.0\tone Comment by Yegor Kozlov: Yegor Kozlov: first cell\n" +
+ "2.0\ttwo Comment by Yegor Kozlov: Yegor Kozlov: second cell\n" +
+ "3.0\tthree Comment by Yegor Kozlov: Yegor Kozlov: third cell\n",
+ extractor.getText()
+ );
+ }
+
/**
* Embded in a non-excel file