]> source.dussan.org Git - poi.git/commitdiff
More merging, plus tests for embeded ooxml files
authorNick Burch <nick@apache.org>
Tue, 27 May 2008 12:36:00 +0000 (12:36 +0000)
committerNick Burch <nick@apache.org>
Tue, 27 May 2008 12:36:00 +0000 (12:36 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@660488 13f79535-47bb-0310-9956-ffa450edef68

build.xml
src/documentation/content/xdocs/changes.xml
src/documentation/content/xdocs/status.xml
src/java/org/apache/poi/hssf/extractor/ExcelExtractor.java
src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java
src/ooxml/testcases/org/apache/poi/TestEmbeded.java [new file with mode: 0644]
src/testcases/org/apache/poi/hssf/data/WithCheckBoxes.xls [new file with mode: 0644]
src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java

index e1cb26c35320d877cb3bf9f1955aabfb3dee3a65..bcb25353648dac6c07a3d1060f1dc9b9259cb29c 100644 (file)
--- a/build.xml
+++ b/build.xml
@@ -650,6 +650,7 @@ under the License.
             <sysproperty key="HSMF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hsmf/data"/>
             <sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
             <sysproperty key="POIFS.testdata.path" file="${main.src.test}/org/apache/poi/poifs/data"/> 
+            <sysproperty key="OOXML.testdata.path" file="${ooxml.src.test}/org/apache/poi/ooxml/data"/> 
                 <sysproperty key="java.awt.headless" value="true"/>
             <formatter type="plain" usefile="no"/>
             <formatter type="xml"/>
@@ -799,6 +800,7 @@ under the License.
             <sysproperty key="HWPF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hwpf/data"/>
             <sysproperty key="HSLF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hslf/data"/>
             <sysproperty key="HDGF.testdata.path" file="${scratchpad.src.test}/org/apache/poi/hdgf/data"/>
+            <sysproperty key="OOXML.testdata.path" file="${ooxml.src.test}/org/apache/poi/ooxml/data"/> 
             <sysproperty key="java.awt.headless" value="true"/>
             <formatter type="plain"/>
             <formatter type="xml"/>
index f26c6271b9be19b5c52855c63446691b5c779d87..1381dd8d28f1094e9cd62e98e93f5fa1dc16eda3 100644 (file)
@@ -46,6 +46,7 @@
            <action dev="POI-DEVELOPERS" type="add">Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx</action>
         </release>
         <release version="3.1-final" date="2008-06-??">
+           <action dev="POI-DEVELOPERS" type="add">45043 - Support for getting excel cell comments when extracting text</action>
            <action dev="POI-DEVELOPERS" type="add">Extend the support for specifying a policy to HSSF on missing / blank cells when fetching, to be able to specify the policy at the HSSFWorkbook level</action>
            <action dev="POI-DEVELOPERS" type="fix">45025 - improved FormulaParser parse error messages</action>
            <action dev="POI-DEVELOPERS" type="fix">45046 - allowed EXTERNALBOOK(0x01AE) to be optional in the LinkTable</action>
index 4fc778a5f97209f69ad9bdb29315d2e48deff6a7..35e3ab751494e4638e8102b8d960b9bc6b0eb6c6 100644 (file)
@@ -43,6 +43,7 @@
            <action dev="POI-DEVELOPERS" type="add">Created a common interface for handling Excel files, irrespective of if they are .xls or .xlsx</action>
         </release>
         <release version="3.1-final" date="2008-06-??">
+           <action dev="POI-DEVELOPERS" type="add">45043 - Support for getting excel cell comments when extracting text</action>
            <action dev="POI-DEVELOPERS" type="add">Extend the support for specifying a policy to HSSF on missing / blank cells when fetching, to be able to specify the policy at the HSSFWorkbook level</action>
            <action dev="POI-DEVELOPERS" type="fix">45025 - improved FormulaParser parse error messages</action>
            <action dev="POI-DEVELOPERS" type="fix">45046 - allowed EXTERNALBOOK(0x01AE) to be optional in the LinkTable</action>
index 2a9c455caccd975696e7e98485a0878ea455e1d3..75a73c654d5c0fef29715de2ed5293a0e81616a7 100644 (file)
@@ -20,6 +20,7 @@ import java.io.IOException;
 
 import org.apache.poi.POIOLE2TextExtractor;
 import org.apache.poi.hssf.usermodel.HSSFCell;
+import org.apache.poi.hssf.usermodel.HSSFComment;
 import org.apache.poi.hssf.usermodel.HSSFRichTextString;
 import org.apache.poi.hssf.usermodel.HSSFRow;
 import org.apache.poi.hssf.usermodel.HSSFSheet;
@@ -39,6 +40,7 @@ public class ExcelExtractor extends POIOLE2TextExtractor {
        private HSSFWorkbook wb;
        private boolean includeSheetNames = true;
        private boolean formulasNotResults = false;
+       private boolean includeCellComments = false;
        
        public ExcelExtractor(HSSFWorkbook wb) {
                super(wb);
@@ -62,6 +64,12 @@ public class ExcelExtractor extends POIOLE2TextExtractor {
        public void setFormulasNotResults(boolean formulasNotResults) {
                this.formulasNotResults = formulasNotResults;
        }
+       /**
+     * Should cell comments be included? Default is true
+     */
+    public void setIncludeCellComments(boolean includeCellComments) {
+        this.includeCellComments = includeCellComments;
+    }
        
        /**
         * Retreives the text contents of the file
@@ -128,6 +136,15 @@ public class ExcelExtractor extends POIOLE2TextExtractor {
                                                        break;
                                        }
                                        
+                                       // Output the comment, if requested and exists
+                                   HSSFComment comment = cell.getCellComment();
+                                       if(includeCellComments && comment != null) {
+                                           // Replace any newlines with spaces, otherwise it
+                                           //  breaks the output
+                                           String commentText = comment.getString().getString().replace('\n', ' ');
+                                           text.append(" Comment by "+comment.getAuthor()+": "+commentText);
+                                       }
+                                       
                                        // Output a tab if we're not on the last cell
                                        if(outputContents && k < (lastCell-1)) {
                                                text.append("\t");
index 9ebb3f053b50264796e8fe22ecf4b563110ec414..2d27f5d33ddba55f11d3c04e3b8de93e6e8e9ce1 100644 (file)
 ==================================================================== */
 package org.apache.poi.xssf.extractor;
 
-import java.io.File;
 import java.io.IOException;
 import java.util.Iterator;
 
 import org.apache.poi.POIXMLTextExtractor;
 import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.Comment;
 import org.apache.poi.ss.usermodel.Row;
 import org.apache.poi.ss.usermodel.Sheet;
 import org.apache.poi.ss.usermodel.Workbook;
 import org.apache.poi.xssf.usermodel.XSSFCell;
-import org.apache.poi.xssf.usermodel.XSSFSheet;
 import org.apache.poi.xssf.usermodel.XSSFWorkbook;
 import org.apache.xmlbeans.XmlException;
 import org.openxml4j.exceptions.OpenXML4JException;
 import org.openxml4j.opc.Package;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRow;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet;
 
 /**
  * Helper class to extract text from an OOXML Excel file
@@ -43,6 +38,7 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor {
        private Workbook workbook;
        private boolean includeSheetNames = true;
        private boolean formulasNotResults = false;
+       private boolean includeCellComments = false;
        
        public XSSFExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
                this(new XSSFWorkbook(path));
@@ -79,6 +75,12 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor {
        public void setFormulasNotResults(boolean formulasNotResults) {
                this.formulasNotResults = formulasNotResults;
        }
+       /**
+     * Should cell comments be included? Default is true
+     */
+    public void setIncludeCellComments(boolean includeCellComments) {
+        this.includeCellComments = includeCellComments;
+    }
        
        /**
         * Retreives the text contents of the file
@@ -94,8 +96,8 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor {
                        
                        for (Object rawR : sheet) {
                                Row row = (Row)rawR;
-                               for(Iterator ri = row.cellIterator(); ri.hasNext();) {
-                                       Cell cell = (Cell)ri.next();
+                               for(Iterator<Cell> ri = row.cellIterator(); ri.hasNext();) {
+                                       Cell cell = ri.next();
                                        
                                        // Is it a formula one?
                                        if(cell.getCellType() == Cell.CELL_TYPE_FORMULA && formulasNotResults) {
@@ -107,6 +109,15 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor {
                                                text.append(xc.getRawValue());
                                        }
                                        
+                                       // Output the comment, if requested and exists
+                                   Comment comment = cell.getCellComment();
+                                       if(includeCellComments && comment != null) {
+                                           // Replace any newlines with spaces, otherwise it
+                                           //  breaks the output
+                                           String commentText = comment.getString().getString().replace('\n', ' ');
+                                           text.append(" Comment by "+comment.getAuthor()+": "+commentText);
+                                       }
+                                       
                                        if(ri.hasNext())
                                                text.append("\t");
                                }
diff --git a/src/ooxml/testcases/org/apache/poi/TestEmbeded.java b/src/ooxml/testcases/org/apache/poi/TestEmbeded.java
new file mode 100644 (file)
index 0000000..5e127e2
--- /dev/null
@@ -0,0 +1,83 @@
+
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+        
+
+package org.apache.poi;
+
+import java.io.File;
+import java.util.Iterator;
+
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.xslf.XSLFSlideShow;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.apache.poi.xwpf.XWPFDocument;
+import org.openxml4j.opc.Package;
+import org.openxml4j.opc.PackagePart;
+
+import junit.framework.TestCase;
+
+/**
+ * Class to test that we handle embeded bits in
+ *  OOXML files properly
+ */
+public class TestEmbeded extends TestCase
+{
+       public String dirname;
+
+       public void setUp() {
+               dirname = System.getProperty("OOXML.testdata.path");
+               assertNotNull(dirname);
+       }
+
+       public void testExcel() throws Exception {
+               File f = new File(dirname, "ExcelWithAttachments.xlsx");
+               assertTrue(f.exists());
+               
+               POIXMLDocument doc = new XSSFWorkbook(Package.open(f.toString()));
+               test(doc, 0);
+       }
+
+       public void testWord() throws Exception {
+               File f = new File(dirname, "WordWithAttachments.docx");
+               assertTrue(f.exists());
+               
+               POIXMLDocument doc = new XWPFDocument(Package.open(f.toString()));
+               test(doc, 4);
+       }
+
+       public void testPowerPoint() throws Exception {
+               File f = new File(dirname, "PPTWithAttachments.pptx");
+               assertTrue(f.exists());
+               
+               POIXMLDocument doc = new XSLFSlideShow(Package.open(f.toString()));
+               test(doc, 0);
+       }
+       
+       private void test(POIXMLDocument doc, int expectedCount) throws Exception {
+               assertNotNull(doc.getAllEmbedds());
+               assertEquals(expectedCount, doc.getAllEmbedds().size());
+
+               for(int i=0; i<doc.getAllEmbedds().size(); i++) {
+                       PackagePart pp = doc.getAllEmbedds().get(i);
+                       assertNotNull(pp);
+                       
+                       byte[] b = IOUtils.toByteArray(pp.getInputStream());
+                       assertTrue(b.length > 0);
+               }
+       }
+}
diff --git a/src/testcases/org/apache/poi/hssf/data/WithCheckBoxes.xls b/src/testcases/org/apache/poi/hssf/data/WithCheckBoxes.xls
new file mode 100644 (file)
index 0000000..66dd918
Binary files /dev/null and b/src/testcases/org/apache/poi/hssf/data/WithCheckBoxes.xls differ
index 63d67ee7716b21b499a85bede9e3e7480fe9a701..9bb137ff69cd1b9959344381a546a4508c6e18aa 100644 (file)
@@ -165,6 +165,28 @@ public final class TestExcelExtractor extends TestCase {
                );
        }
        
+       public void testWithComments() throws Exception {
+               ExcelExtractor extractor = createExtractor("SimpleWithComments.xls");
+               extractor.setIncludeSheetNames(false);
+
+               // Check without comments
+               assertEquals(
+                               "1.0\tone\n" +
+                               "2.0\ttwo\n" + 
+                               "3.0\tthree\n", 
+                               extractor.getText()
+               );
+               
+               // Now with
+               extractor.setIncludeCellComments(true);
+               assertEquals(
+                               "1.0\tone Comment by Yegor Kozlov: Yegor Kozlov: first cell\n" +
+                               "2.0\ttwo Comment by Yegor Kozlov: Yegor Kozlov: second cell\n" + 
+                               "3.0\tthree Comment by Yegor Kozlov: Yegor Kozlov: third cell\n", 
+                               extractor.getText()
+               );
+       }
+       
        
        /**
         * Embded in a non-excel file