diff options
author | Nick Burch <nick@apache.org> | 2008-05-27 12:36:00 +0000 |
---|---|---|
committer | Nick Burch <nick@apache.org> | 2008-05-27 12:36:00 +0000 |
commit | 21dc97110af112fb338c1b05283a67be70939286 (patch) | |
tree | 5d42b73e062260b3898262ae7904236493a84d41 /src | |
parent | 429aba5ca8be4f2addbc841a0ec8bc95e532ea64 (diff) | |
download | poi-21dc97110af112fb338c1b05283a67be70939286.tar.gz poi-21dc97110af112fb338c1b05283a67be70939286.zip |
More merging, plus tests for embeded ooxml files
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@660488 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src')
-rw-r--r-- | src/documentation/content/xdocs/changes.xml | 1 | ||||
-rw-r--r-- | src/documentation/content/xdocs/status.xml | 1 | ||||
-rw-r--r-- | src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java | 17 | ||||
-rw-r--r-- | src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java | 27 | ||||
-rw-r--r-- | src/ooxml/testcases/org/apache/poi/TestEmbeded.java | 83 | ||||
-rw-r--r-- | src/testcases/org/apache/poi/hssf/data/WithCheckBoxes.xls | bin | 0 -> 16896 bytes | |||
-rw-r--r-- | src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java | 22 |
7 files changed, 143 insertions, 8 deletions
diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index f26c6271b9..1381dd8d28 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -46,6 +46,7 @@ <action dev="POI-DEVELOPERS" type="add">Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx</action> </release> <release version="3.1-final" date="2008-06-??"> + <action dev="POI-DEVELOPERS" type="add">45043 - Support for getting excel cell comments when extracting text</action> <action dev="POI-DEVELOPERS" type="add">Extend the support for specifying a policy to HSSF on missing / blank cells when fetching, to be able to specify the policy at the HSSFWorkbook level</action> <action dev="POI-DEVELOPERS" type="fix">45025 - improved FormulaParser parse error messages</action> <action dev="POI-DEVELOPERS" type="fix">45046 - allowed EXTERNALBOOK(0x01AE) to be optional in the LinkTable</action> diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 4fc778a5f9..35e3ab7514 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -43,6 +43,7 @@ <action dev="POI-DEVELOPERS" type="add">Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx</action> </release> <release version="3.1-final" date="2008-06-??"> + <action dev="POI-DEVELOPERS" type="add">45043 - Support for getting excel cell comments when extracting text</action> <action dev="POI-DEVELOPERS" type="add">Extend the support for specifying a policy to HSSF on missing / blank cells when fetching, to be able to specify the policy at the HSSFWorkbook level</action> <action dev="POI-DEVELOPERS" type="fix">45025 - improved FormulaParser parse error messages</action> <action dev="POI-DEVELOPERS" type="fix">45046 - allowed EXTERNALBOOK(0x01AE) to be optional in the LinkTable</action> diff --git a/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java b/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java index 2a9c455cac..75a73c654d 100644 --- a/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java +++ b/src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java @@ -20,6 +20,7 @@ import java.io.IOException; import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.hssf.usermodel.HSSFCell; +import org.apache.poi.hssf.usermodel.HSSFComment; import org.apache.poi.hssf.usermodel.HSSFRichTextString; import org.apache.poi.hssf.usermodel.HSSFRow; import org.apache.poi.hssf.usermodel.HSSFSheet; @@ -39,6 +40,7 @@ public class ExcelExtractor extends POIOLE2TextExtractor { private HSSFWorkbook wb; private boolean includeSheetNames = true; private boolean formulasNotResults = false; + private boolean includeCellComments = false; public ExcelExtractor(HSSFWorkbook wb) { super(wb); @@ -62,6 +64,12 @@ public class ExcelExtractor extends POIOLE2TextExtractor { public void setFormulasNotResults(boolean formulasNotResults) { this.formulasNotResults = formulasNotResults; } + /** + * Should cell comments be included? Default is true + */ + public void setIncludeCellComments(boolean includeCellComments) { + this.includeCellComments = includeCellComments; + } /** * Retreives the text contents of the file @@ -128,6 +136,15 @@ public class ExcelExtractor extends POIOLE2TextExtractor { break; } + // Output the comment, if requested and exists + HSSFComment comment = cell.getCellComment(); + if(includeCellComments && comment != null) { + // Replace any newlines with spaces, otherwise it + // breaks the output + String commentText = comment.getString().getString().replace('\n', ' '); + text.append(" Comment by "+comment.getAuthor()+": "+commentText); + } + // Output a tab if we're not on the last cell if(outputContents && k < (lastCell-1)) { text.append("\t"); diff --git a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java index 9ebb3f053b..2d27f5d33d 100644 --- a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java +++ b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java @@ -16,25 +16,20 @@ ==================================================================== */ package org.apache.poi.xssf.extractor; -import java.io.File; import java.io.IOException; import java.util.Iterator; import org.apache.poi.POIXMLTextExtractor; import org.apache.poi.ss.usermodel.Cell; +import org.apache.poi.ss.usermodel.Comment; import org.apache.poi.ss.usermodel.Row; import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.xssf.usermodel.XSSFCell; -import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.apache.xmlbeans.XmlException; import org.openxml4j.exceptions.OpenXML4JException; import org.openxml4j.opc.Package; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRow; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet; /** * Helper class to extract text from an OOXML Excel file @@ -43,6 +38,7 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor { private Workbook workbook; private boolean includeSheetNames = true; private boolean formulasNotResults = false; + private boolean includeCellComments = false; public XSSFExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException { this(new XSSFWorkbook(path)); @@ -79,6 +75,12 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor { public void setFormulasNotResults(boolean formulasNotResults) { this.formulasNotResults = formulasNotResults; } + /** + * Should cell comments be included? Default is true + */ + public void setIncludeCellComments(boolean includeCellComments) { + this.includeCellComments = includeCellComments; + } /** * Retreives the text contents of the file @@ -94,8 +96,8 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor { for (Object rawR : sheet) { Row row = (Row)rawR; - for(Iterator ri = row.cellIterator(); ri.hasNext();) { - Cell cell = (Cell)ri.next(); + for(Iterator<Cell> ri = row.cellIterator(); ri.hasNext();) { + Cell cell = ri.next(); // Is it a formula one? if(cell.getCellType() == Cell.CELL_TYPE_FORMULA && formulasNotResults) { @@ -107,6 +109,15 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor { text.append(xc.getRawValue()); } + // Output the comment, if requested and exists + Comment comment = cell.getCellComment(); + if(includeCellComments && comment != null) { + // Replace any newlines with spaces, otherwise it + // breaks the output + String commentText = comment.getString().getString().replace('\n', ' '); + text.append(" Comment by "+comment.getAuthor()+": "+commentText); + } + if(ri.hasNext()) text.append("\t"); } diff --git a/src/ooxml/testcases/org/apache/poi/TestEmbeded.java b/src/ooxml/testcases/org/apache/poi/TestEmbeded.java new file mode 100644 index 0000000000..5e127e21c6 --- /dev/null +++ b/src/ooxml/testcases/org/apache/poi/TestEmbeded.java @@ -0,0 +1,83 @@ + +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + +package org.apache.poi; + +import java.io.File; +import java.util.Iterator; + +import org.apache.poi.util.IOUtils; +import org.apache.poi.xslf.XSLFSlideShow; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.apache.poi.xwpf.XWPFDocument; +import org.openxml4j.opc.Package; +import org.openxml4j.opc.PackagePart; + +import junit.framework.TestCase; + +/** + * Class to test that we handle embeded bits in + * OOXML files properly + */ +public class TestEmbeded extends TestCase +{ + public String dirname; + + public void setUp() { + dirname = System.getProperty("OOXML.testdata.path"); + assertNotNull(dirname); + } + + public void testExcel() throws Exception { + File f = new File(dirname, "ExcelWithAttachments.xlsx"); + assertTrue(f.exists()); + + POIXMLDocument doc = new XSSFWorkbook(Package.open(f.toString())); + test(doc, 0); + } + + public void testWord() throws Exception { + File f = new File(dirname, "WordWithAttachments.docx"); + assertTrue(f.exists()); + + POIXMLDocument doc = new XWPFDocument(Package.open(f.toString())); + test(doc, 4); + } + + public void testPowerPoint() throws Exception { + File f = new File(dirname, "PPTWithAttachments.pptx"); + assertTrue(f.exists()); + + POIXMLDocument doc = new XSLFSlideShow(Package.open(f.toString())); + test(doc, 0); + } + + private void test(POIXMLDocument doc, int expectedCount) throws Exception { + assertNotNull(doc.getAllEmbedds()); + assertEquals(expectedCount, doc.getAllEmbedds().size()); + + for(int i=0; i<doc.getAllEmbedds().size(); i++) { + PackagePart pp = doc.getAllEmbedds().get(i); + assertNotNull(pp); + + byte[] b = IOUtils.toByteArray(pp.getInputStream()); + assertTrue(b.length > 0); + } + } +} diff --git a/src/testcases/org/apache/poi/hssf/data/WithCheckBoxes.xls b/src/testcases/org/apache/poi/hssf/data/WithCheckBoxes.xls Binary files differnew file mode 100644 index 0000000000..66dd9185ea --- /dev/null +++ b/src/testcases/org/apache/poi/hssf/data/WithCheckBoxes.xls diff --git a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java index 63d67ee771..9bb137ff69 100644 --- a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java +++ b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java @@ -165,6 +165,28 @@ public final class TestExcelExtractor extends TestCase { ); } + public void testWithComments() throws Exception { + ExcelExtractor extractor = createExtractor("SimpleWithComments.xls"); + extractor.setIncludeSheetNames(false); + + // Check without comments + assertEquals( + "1.0\tone\n" + + "2.0\ttwo\n" + + "3.0\tthree\n", + extractor.getText() + ); + + // Now with + extractor.setIncludeCellComments(true); + assertEquals( + "1.0\tone Comment by Yegor Kozlov: Yegor Kozlov: first cell\n" + + "2.0\ttwo Comment by Yegor Kozlov: Yegor Kozlov: second cell\n" + + "3.0\tthree Comment by Yegor Kozlov: Yegor Kozlov: third cell\n", + extractor.getText() + ); + } + /** * Embded in a non-excel file |