aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorNick Burch <nick@apache.org>2008-05-27 12:36:00 +0000
committerNick Burch <nick@apache.org>2008-05-27 12:36:00 +0000
commit21dc97110af112fb338c1b05283a67be70939286 (patch)
tree5d42b73e062260b3898262ae7904236493a84d41 /src
parent429aba5ca8be4f2addbc841a0ec8bc95e532ea64 (diff)
downloadpoi-21dc97110af112fb338c1b05283a67be70939286.tar.gz
poi-21dc97110af112fb338c1b05283a67be70939286.zip
More merging, plus tests for embeded ooxml files
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@660488 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src')
-rw-r--r--src/documentation/content/xdocs/changes.xml1
-rw-r--r--src/documentation/content/xdocs/status.xml1
-rw-r--r--src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java17
-rw-r--r--src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java27
-rw-r--r--src/ooxml/testcases/org/apache/poi/TestEmbeded.java83
-rw-r--r--src/testcases/org/apache/poi/hssf/data/WithCheckBoxes.xlsbin0 -> 16896 bytes
-rw-r--r--src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java22
7 files changed, 143 insertions, 8 deletions
diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml
index f26c6271b9..1381dd8d28 100644
--- a/src/documentation/content/xdocs/changes.xml
+++ b/src/documentation/content/xdocs/changes.xml
@@ -46,6 +46,7 @@
<action dev="POI-DEVELOPERS" type="add">Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx</action>
</release>
<release version="3.1-final" date="2008-06-??">
+ <action dev="POI-DEVELOPERS" type="add">45043 - Support for getting excel cell comments when extracting text</action>
<action dev="POI-DEVELOPERS" type="add">Extend the support for specifying a policy to HSSF on missing / blank cells when fetching, to be able to specify the policy at the HSSFWorkbook level</action>
<action dev="POI-DEVELOPERS" type="fix">45025 - improved FormulaParser parse error messages</action>
<action dev="POI-DEVELOPERS" type="fix">45046 - allowed EXTERNALBOOK(0x01AE) to be optional in the LinkTable</action>
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml
index 4fc778a5f9..35e3ab7514 100644
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -43,6 +43,7 @@
<action dev="POI-DEVELOPERS" type="add">Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx</action>
</release>
<release version="3.1-final" date="2008-06-??">
+ <action dev="POI-DEVELOPERS" type="add">45043 - Support for getting excel cell comments when extracting text</action>
<action dev="POI-DEVELOPERS" type="add">Extend the support for specifying a policy to HSSF on missing / blank cells when fetching, to be able to specify the policy at the HSSFWorkbook level</action>
<action dev="POI-DEVELOPERS" type="fix">45025 - improved FormulaParser parse error messages</action>
<action dev="POI-DEVELOPERS" type="fix">45046 - allowed EXTERNALBOOK(0x01AE) to be optional in the LinkTable</action>
diff --git a/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java b/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java
index 2a9c455cac..75a73c654d 100644
--- a/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java
+++ b/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java
@@ -20,6 +20,7 @@ import java.io.IOException;
import org.apache.poi.POIOLE2TextExtractor;
import org.apache.poi.hssf.usermodel.HSSFCell;
+import org.apache.poi.hssf.usermodel.HSSFComment;
import org.apache.poi.hssf.usermodel.HSSFRichTextString;
import org.apache.poi.hssf.usermodel.HSSFRow;
import org.apache.poi.hssf.usermodel.HSSFSheet;
@@ -39,6 +40,7 @@ public class ExcelExtractor extends POIOLE2TextExtractor {
private HSSFWorkbook wb;
private boolean includeSheetNames = true;
private boolean formulasNotResults = false;
+ private boolean includeCellComments = false;
public ExcelExtractor(HSSFWorkbook wb) {
super(wb);
@@ -62,6 +64,12 @@ public class ExcelExtractor extends POIOLE2TextExtractor {
public void setFormulasNotResults(boolean formulasNotResults) {
this.formulasNotResults = formulasNotResults;
}
+ /**
+ * Should cell comments be included? Default is true
+ */
+ public void setIncludeCellComments(boolean includeCellComments) {
+ this.includeCellComments = includeCellComments;
+ }
/**
* Retreives the text contents of the file
@@ -128,6 +136,15 @@ public class ExcelExtractor extends POIOLE2TextExtractor {
break;
}
+ // Output the comment, if requested and exists
+ HSSFComment comment = cell.getCellComment();
+ if(includeCellComments && comment != null) {
+ // Replace any newlines with spaces, otherwise it
+ // breaks the output
+ String commentText = comment.getString().getString().replace('\n', ' ');
+ text.append(" Comment by "+comment.getAuthor()+": "+commentText);
+ }
+
// Output a tab if we're not on the last cell
if(outputContents && k < (lastCell-1)) {
text.append("\t");
diff --git a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java
index 9ebb3f053b..2d27f5d33d 100644
--- a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java
+++ b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java
@@ -16,25 +16,20 @@
==================================================================== */
package org.apache.poi.xssf.extractor;
-import java.io.File;
import java.io.IOException;
import java.util.Iterator;
import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.Comment;
import org.apache.poi.ss.usermodel.Row;
import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.xssf.usermodel.XSSFCell;
-import org.apache.poi.xssf.usermodel.XSSFSheet;
import org.apache.poi.xssf.usermodel.XSSFWorkbook;
import org.apache.xmlbeans.XmlException;
import org.openxml4j.exceptions.OpenXML4JException;
import org.openxml4j.opc.Package;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRow;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet;
/**
* Helper class to extract text from an OOXML Excel file
@@ -43,6 +38,7 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor {
private Workbook workbook;
private boolean includeSheetNames = true;
private boolean formulasNotResults = false;
+ private boolean includeCellComments = false;
public XSSFExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
this(new XSSFWorkbook(path));
@@ -79,6 +75,12 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor {
public void setFormulasNotResults(boolean formulasNotResults) {
this.formulasNotResults = formulasNotResults;
}
+ /**
+ * Should cell comments be included? Default is true
+ */
+ public void setIncludeCellComments(boolean includeCellComments) {
+ this.includeCellComments = includeCellComments;
+ }
/**
* Retreives the text contents of the file
@@ -94,8 +96,8 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor {
for (Object rawR : sheet) {
Row row = (Row)rawR;
- for(Iterator ri = row.cellIterator(); ri.hasNext();) {
- Cell cell = (Cell)ri.next();
+ for(Iterator<Cell> ri = row.cellIterator(); ri.hasNext();) {
+ Cell cell = ri.next();
// Is it a formula one?
if(cell.getCellType() == Cell.CELL_TYPE_FORMULA && formulasNotResults) {
@@ -107,6 +109,15 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor {
text.append(xc.getRawValue());
}
+ // Output the comment, if requested and exists
+ Comment comment = cell.getCellComment();
+ if(includeCellComments && comment != null) {
+ // Replace any newlines with spaces, otherwise it
+ // breaks the output
+ String commentText = comment.getString().getString().replace('\n', ' ');
+ text.append(" Comment by "+comment.getAuthor()+": "+commentText);
+ }
+
if(ri.hasNext())
text.append("\t");
}
diff --git a/src/ooxml/testcases/org/apache/poi/TestEmbeded.java b/src/ooxml/testcases/org/apache/poi/TestEmbeded.java
new file mode 100644
index 0000000000..5e127e21c6
--- /dev/null
+++ b/src/ooxml/testcases/org/apache/poi/TestEmbeded.java
@@ -0,0 +1,83 @@
+
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi;
+
+import java.io.File;
+import java.util.Iterator;
+
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.xslf.XSLFSlideShow;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.apache.poi.xwpf.XWPFDocument;
+import org.openxml4j.opc.Package;
+import org.openxml4j.opc.PackagePart;
+
+import junit.framework.TestCase;
+
+/**
+ * Class to test that we handle embeded bits in
+ * OOXML files properly
+ */
+public class TestEmbeded extends TestCase
+{
+ public String dirname;
+
+ public void setUp() {
+ dirname = System.getProperty("OOXML.testdata.path");
+ assertNotNull(dirname);
+ }
+
+ public void testExcel() throws Exception {
+ File f = new File(dirname, "ExcelWithAttachments.xlsx");
+ assertTrue(f.exists());
+
+ POIXMLDocument doc = new XSSFWorkbook(Package.open(f.toString()));
+ test(doc, 0);
+ }
+
+ public void testWord() throws Exception {
+ File f = new File(dirname, "WordWithAttachments.docx");
+ assertTrue(f.exists());
+
+ POIXMLDocument doc = new XWPFDocument(Package.open(f.toString()));
+ test(doc, 4);
+ }
+
+ public void testPowerPoint() throws Exception {
+ File f = new File(dirname, "PPTWithAttachments.pptx");
+ assertTrue(f.exists());
+
+ POIXMLDocument doc = new XSLFSlideShow(Package.open(f.toString()));
+ test(doc, 0);
+ }
+
+ private void test(POIXMLDocument doc, int expectedCount) throws Exception {
+ assertNotNull(doc.getAllEmbedds());
+ assertEquals(expectedCount, doc.getAllEmbedds().size());
+
+ for(int i=0; i<doc.getAllEmbedds().size(); i++) {
+ PackagePart pp = doc.getAllEmbedds().get(i);
+ assertNotNull(pp);
+
+ byte[] b = IOUtils.toByteArray(pp.getInputStream());
+ assertTrue(b.length > 0);
+ }
+ }
+}
diff --git a/src/testcases/org/apache/poi/hssf/data/WithCheckBoxes.xls b/src/testcases/org/apache/poi/hssf/data/WithCheckBoxes.xls
new file mode 100644
index 0000000000..66dd9185ea
--- /dev/null
+++ b/src/testcases/org/apache/poi/hssf/data/WithCheckBoxes.xls
Binary files differ
diff --git a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java
index 63d67ee771..9bb137ff69 100644
--- a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java
+++ b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java
@@ -165,6 +165,28 @@ public final class TestExcelExtractor extends TestCase {
);
}
+ public void testWithComments() throws Exception {
+ ExcelExtractor extractor = createExtractor("SimpleWithComments.xls");
+ extractor.setIncludeSheetNames(false);
+
+ // Check without comments
+ assertEquals(
+ "1.0\tone\n" +
+ "2.0\ttwo\n" +
+ "3.0\tthree\n",
+ extractor.getText()
+ );
+
+ // Now with
+ extractor.setIncludeCellComments(true);
+ assertEquals(
+ "1.0\tone Comment by Yegor Kozlov: Yegor Kozlov: first cell\n" +
+ "2.0\ttwo Comment by Yegor Kozlov: Yegor Kozlov: second cell\n" +
+ "3.0\tthree Comment by Yegor Kozlov: Yegor Kozlov: third cell\n",
+ extractor.getText()
+ );
+ }
+
/**
* Embded in a non-excel file