]> source.dussan.org Git - poi.git/commitdiff
Start updating the excel extractor to the new style code
authorNick Burch <nick@apache.org>
Sat, 8 Mar 2008 17:39:56 +0000 (17:39 +0000)
committerNick Burch <nick@apache.org>
Sat, 8 Mar 2008 17:39:56 +0000 (17:39 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@635026 13f79535-47bb-0310-9956-ffa450edef68

src/documentation/content/xdocs/ss/quick-guide.xml
src/ooxml/java/org/apache/poi/POIXMLDocument.java
src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java
src/ooxml/java/org/apache/poi/xssf/usermodel/XSSFWorkbook.java
src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java [new file with mode: 0644]
src/ooxml/testcases/org/apache/poi/xssf/extractor/TextXSSFExcelExtractor.java [deleted file]

index 66da60489237b7d74d36238ddbc91ca4b33d5d54..f334948779110ea97acdec9c713d1059b2878ae6 100644 (file)
@@ -21,7 +21,7 @@
 
 <document>
     <header>
-        <title>Busy Developers' Guide to HSSF Features</title>
+        <title>Busy Developers' Guide to HSSF and XSSF Features</title>
         <authors>
             <person email="user@poi.apache.org" name="Glen Stampoultzis" id="CO"/>
             <person email="user@poi.apache.org" name="Yegor Kozlov" id="YK"/>
@@ -30,8 +30,9 @@
     <body>
         <section><title>Busy Developers' Guide to Features</title>
             <p>
-                Want to use HSSF read and write spreadsheets in a hurry?  This guide is for you.  If you're after
-                more in-depth coverage of the HSSF user-API please consult the <link href="how-to.html">HOWTO</link>
+                Want to use HSSF and XSSF read and write spreadsheets in a hurry?  This 
+                guide is for you.  If you're after more in-depth coverage of the HSSF and 
+                XSSF user-APIs, please consult the <link href="how-to.html">HOWTO</link>
                 guide as it contains actual descriptions of how to use this stuff.
             </p>
             <section><title>Index of Features</title>
index 54b92e32dee74b0d5ccfd17d3be82f2f95a6aad0..36f195eeb1f1525e008abc02c0af368d8aeca0f0 100644 (file)
@@ -46,18 +46,33 @@ public abstract class POIXMLDocument {
     
     protected POIXMLDocument() {}
     
+    protected POIXMLDocument(Package pkg) throws IOException {
+       try {
+               this.pkg = pkg;
+               
+               PackageRelationship coreDocRelationship = this.pkg.getRelationshipsByType(
+                       PackageRelationshipTypes.CORE_DOCUMENT).getRelationship(0);
+           
+               // Get core part
+               this.corePart = this.pkg.getPart(coreDocRelationship);
+        } catch (OpenXML4JException e) {
+            throw new IOException(e.toString());
+       }
+    }
     protected POIXMLDocument(String path) throws IOException {
+               this(openPackage(path));
+    }
+    
+    /**
+     * Wrapper to open a package, returning an IOException
+     *  in the event of a problem.
+     * Works around shortcomings in java's this() constructor calls
+     */
+    protected static Package openPackage(String path) throws IOException {
         try {
-            this.pkg = Package.open(path);
-            PackageRelationship coreDocRelationship = this.pkg.getRelationshipsByType(
-                    PackageRelationshipTypes.CORE_DOCUMENT).getRelationship(0);
-        
-            // Get core part
-            this.corePart = this.pkg.getPart(coreDocRelationship);
+            return Package.open(path);
         } catch (InvalidFormatException e) {
             throw new IOException(e.toString());
-        } catch (OpenXML4JException e) {
-            throw new IOException(e.toString());
         }
     }
     
index 69361e7b4b6ee7a26d66bec5a1ed69a2b7ca949e..ba3bd1095b116f8e986bc080d47ea35cfaadddaa 100644 (file)
@@ -20,6 +20,11 @@ import java.io.File;
 import java.io.IOException;
 
 import org.apache.poi.POIXMLTextExtractor;
+import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.ss.usermodel.Workbook;
+import org.apache.poi.xssf.usermodel.XSSFSheet;
 import org.apache.poi.xssf.usermodel.XSSFWorkbook;
 import org.apache.xmlbeans.XmlException;
 import org.openxml4j.exceptions.OpenXML4JException;
@@ -33,10 +38,13 @@ import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet;
  * Helper class to extract text from an OOXML Excel file
  */
 public class XSSFExcelExtractor extends POIXMLTextExtractor {
-       private XSSFWorkbook workbook;
+       private Workbook workbook;
        private boolean includeSheetNames = true;
        private boolean formulasNotResults = false;
        
+       public XSSFExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException {
+               this(new XSSFWorkbook(path));
+       }
        public XSSFExcelExtractor(Package container) throws XmlException, OpenXML4JException, IOException {
                this(new XSSFWorkbook(container));
        }
@@ -52,9 +60,7 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor {
                        System.exit(1);
                }
                POIXMLTextExtractor extractor = 
-                       new HXFExcelExtractor(HXFDocument.openPackage(
-                                       new File(args[0])
-                       ));
+                       new XSSFExcelExtractor(args[0]);
                System.out.println(extractor.getText());
        }
 
@@ -78,48 +84,27 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor {
        public String getText() {
                StringBuffer text = new StringBuffer();
                
-               CTSheet[] sheetRefs =
-                       workbook._getHSSFXML().getSheetReferences().getSheetArray();
-               for(int i=0; i<sheetRefs.length; i++) {
-                       try {
-                               CTWorksheet sheet =
-                                       workbook._getHSSFXML().getSheet(sheetRefs[i]);
-                               CTRow[] rows =
-                                       sheet.getSheetData().getRowArray();
-                               
-                               if(i > 0) {
-                                       text.append("\n");
-                               }
-                               if(includeSheetNames) {
-                                       text.append(sheetRefs[i].getName() + "\n");
-                               }
-                               
-                               for(int j=0; j<rows.length; j++) {
-                                       CTCell[] cells = rows[j].getCArray();
-                                       for(int k=0; k<cells.length; k++) {
-                                               CTCell cell = cells[k];
-                                               if(k > 0) {
-                                                       text.append("\t");
-                                               }
-                                               
-                                               boolean done = false;
-                                               
-                                               // Is it a formula one?
-                                               if(cell.getF() != null) {
-                                                       if(formulasNotResults) {
-                                                               text.append(cell.getF().getStringValue());
-                                                               done = true;
-                                                       }
-                                               }
-                                               if(!done) {
-                                                       HSSFXMLCell uCell = new HSSFXMLCell(cell, workbook);
-                                                       text.append(uCell.getStringValue());
-                                               }
+               for(int i=0; i<workbook.getNumberOfSheets(); i++) {
+                       Sheet sheet = workbook.getSheetAt(i);
+                       if(includeSheetNames) {
+                               text.append(workbook.getSheetName(i) + "\n");
+                       }
+                       
+                       for (Object rawR : sheet) {
+                               Row row = (Row)rawR;
+                               for (Object rawC: row) {
+                                       Cell cell = (Cell)rawC;
+                                       
+                                       // Is it a formula one?
+                                       if(cell.getCellType() == Cell.CELL_TYPE_FORMULA && formulasNotResults) {
+                                               text.append(cell.getCellFormula());
+                                       } else {
+                                               text.append(cell.toString());
                                        }
-                                       text.append("\n");
+                                       
+                                       text.append(",");
                                }
-                       } catch(Exception e) {
-                               throw new RuntimeException(e);
+                               text.append("\n");
                        }
                }
                
index 1f71594e1dd31e4ed0094a56da003908c1298304..2f7442dac2d6650c3a745d8073752bda0d96775e 100644 (file)
@@ -89,7 +89,10 @@ public class XSSFWorkbook extends POIXMLDocument implements Workbook {
     }
     
     public XSSFWorkbook(String path) throws IOException {
-        super(path);
+       this(openPackage(path));
+    }
+    public XSSFWorkbook(Package pkg) throws IOException {
+        super(pkg);
         try {
             WorkbookDocument doc = WorkbookDocument.Factory.parse(getCorePart().getInputStream());
             this.workbook = doc.getWorkbook();
diff --git a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java
new file mode 100644 (file)
index 0000000..de1fc47
--- /dev/null
@@ -0,0 +1,195 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xssf.extractor;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import junit.framework.TestCase;
+
+import org.apache.poi.POITextExtractor;
+import org.apache.poi.hssf.extractor.ExcelExtractor;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+
+/**
+ * Tests for XSSFExcelExtractor
+ */
+public class TestXSSFExcelExtractor extends TestCase {
+       /**
+        * A very simple file
+        */
+       private XSSFWorkbook xmlA;
+       private File fileA;
+       /**
+        * A fairly complex file
+        */
+       private XSSFWorkbook xmlB;
+       
+       /**
+        * A fairly simple file - ooxml
+        */
+       private XSSFWorkbook simpleXLSX; 
+       /**
+        * A fairly simple file - ole2
+        */
+       private HSSFWorkbook simpleXLS;
+
+       protected void setUp() throws Exception {
+               super.setUp();
+               
+               fileA = new File(
+                               System.getProperty("HSSF.testdata.path") +
+                               File.separator + "sample.xlsx"
+               );
+               File fileB = new File(
+                               System.getProperty("HSSF.testdata.path") +
+                               File.separator + "AverageTaxRates.xlsx"
+               );
+               
+               File fileSOOXML = new File(
+                               System.getProperty("HSSF.testdata.path") +
+                               File.separator + "SampleSS.xlsx"
+               );
+               File fileSOLE2 = new File(
+                               System.getProperty("HSSF.testdata.path") +
+                               File.separator + "SampleSS.xls"
+               );
+               
+               xmlA = new XSSFWorkbook(fileA.toString());
+               xmlB = new XSSFWorkbook(fileB.toString());
+               
+               simpleXLSX = new XSSFWorkbook(fileSOOXML.toString());
+               simpleXLS = new HSSFWorkbook(new FileInputStream(fileSOLE2));
+       }
+
+       /**
+        * Get text out of the simple file
+        */
+       public void testGetSimpleText() throws Exception {
+               new XSSFExcelExtractor(fileA.toString());
+               new XSSFExcelExtractor(xmlA);
+               
+               XSSFExcelExtractor extractor = 
+                       new XSSFExcelExtractor(xmlA);
+               extractor.getText();
+               
+               String text = extractor.getText();
+               assertTrue(text.length() > 0);
+               
+               // Check sheet names
+               assertTrue(text.startsWith("Sheet1"));
+               assertTrue(text.endsWith("Sheet3\n"));
+               
+               // Now without, will have text
+               extractor.setIncludeSheetNames(false);
+               text = extractor.getText();
+               assertEquals(
+                               "Lorem\t111\n" +
+                               "ipsum\t222\n" +
+                               "dolor\t333\n" +
+                               "sit\t444\n" +
+                               "amet\t555\n" +
+                               "consectetuer\t666\n" +
+                               "adipiscing\t777\n" +
+                               "elit\t888\n" +
+                               "Nunc\t999\n" +
+                               "at\t4995\n" +
+                               "\n\n", text);
+               
+               // Now get formulas not their values
+               extractor.setFormulasNotResults(true);
+               text = extractor.getText();
+               assertEquals(
+                               "Lorem\t111\n" +
+                               "ipsum\t222\n" +
+                               "dolor\t333\n" +
+                               "sit\t444\n" +
+                               "amet\t555\n" +
+                               "consectetuer\t666\n" +
+                               "adipiscing\t777\n" +
+                               "elit\t888\n" +
+                               "Nunc\t999\n" +
+                               "at\tSUM(B1:B9)\n" +
+                               "\n\n", text);
+               
+               // With sheet names too
+               extractor.setIncludeSheetNames(true);
+               text = extractor.getText();
+               assertEquals(
+                               "Sheet1\n" +
+                               "Lorem\t111\n" +
+                               "ipsum\t222\n" +
+                               "dolor\t333\n" +
+                               "sit\t444\n" +
+                               "amet\t555\n" +
+                               "consectetuer\t666\n" +
+                               "adipiscing\t777\n" +
+                               "elit\t888\n" +
+                               "Nunc\t999\n" +
+                               "at\tSUM(B1:B9)\n\n" +
+                               "Sheet2\n\n" +
+                               "Sheet3\n"
+                               , text);
+       }
+       
+       public void testGetComplexText() throws Exception {
+               new XSSFExcelExtractor(xmlB);
+               
+               XSSFExcelExtractor extractor = 
+                       new XSSFExcelExtractor(xmlB);
+               extractor.getText();
+               
+               String text = extractor.getText();
+               assertTrue(text.length() > 0);
+               
+               // Might not have all formatting it should do!
+               // TODO decide if we should really have the "null" in there
+               assertTrue(text.startsWith(
+                                               "Avgtxfull\n" +
+                                               "null\t(iii) AVERAGE TAX RATES ON ANNUAL"       
+               ));
+       }
+       
+       /**
+        * Test that we return pretty much the same as
+        *  ExcelExtractor does, when we're both passed
+        *  the same file, just saved as xls and xlsx
+        */
+       public void testComparedToOLE2() throws Exception {
+               XSSFExcelExtractor ooxmlExtractor =
+                       new XSSFExcelExtractor(simpleXLSX);
+               ExcelExtractor ole2Extractor =
+                       new ExcelExtractor(simpleXLS);
+               
+               POITextExtractor[] extractors =
+                       new POITextExtractor[] { ooxmlExtractor, ole2Extractor };
+               for (int i = 0; i < extractors.length; i++) {
+                       POITextExtractor extractor = extractors[i];
+                       
+                       String text = extractor.getText().replaceAll("[\r\t]", "");
+                       //System.out.println(text.length());
+                       //System.out.println(text);
+                       assertTrue(text.startsWith("First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n"));
+                       Pattern pattern = Pattern.compile(".*13(\\.0+)?\\s+Sheet3.*", Pattern.DOTALL);
+                       Matcher m = pattern.matcher(text);
+                       assertTrue(m.matches());                        
+               }
+       }
+}
diff --git a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TextXSSFExcelExtractor.java b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TextXSSFExcelExtractor.java
deleted file mode 100644 (file)
index a73b60b..0000000
+++ /dev/null
@@ -1,196 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi.hssf.extractor;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import junit.framework.TestCase;
-
-import org.apache.poi.POITextExtractor;
-import org.apache.poi.hssf.HSSFXML;
-import org.apache.poi.hssf.usermodel.HSSFWorkbook;
-import org.apache.poi.hssf.usermodel.HSSFXMLWorkbook;
-import org.apache.poi.hxf.HXFDocument;
-
-/**
- * Tests for HXFExcelExtractor
- */
-public class TestHXFExcelExtractor extends TestCase {
-       /**
-        * A very simple file
-        */
-       private HSSFXML xmlA;
-       /**
-        * A fairly complex file
-        */
-       private HSSFXML xmlB;
-       
-       /**
-        * A fairly simple file - ooxml
-        */
-       private HSSFXML simpleXLSX; 
-       /**
-        * A fairly simple file - ole2
-        */
-       private HSSFWorkbook simpleXLS;
-
-       protected void setUp() throws Exception {
-               super.setUp();
-               
-               File fileA = new File(
-                               System.getProperty("HSSF.testdata.path") +
-                               File.separator + "sample.xlsx"
-               );
-               File fileB = new File(
-                               System.getProperty("HSSF.testdata.path") +
-                               File.separator + "AverageTaxRates.xlsx"
-               );
-               
-               File fileSOOXML = new File(
-                               System.getProperty("HSSF.testdata.path") +
-                               File.separator + "SampleSS.xlsx"
-               );
-               File fileSOLE2 = new File(
-                               System.getProperty("HSSF.testdata.path") +
-                               File.separator + "SampleSS.xls"
-               );
-               
-               xmlA = new HSSFXML(HXFDocument.openPackage(fileA));
-               xmlB = new HSSFXML(HXFDocument.openPackage(fileB));
-               
-               simpleXLSX = new HSSFXML(HXFDocument.openPackage(fileSOOXML));
-               simpleXLS = new HSSFWorkbook(new FileInputStream(fileSOLE2));
-       }
-
-       /**
-        * Get text out of the simple file
-        */
-       public void testGetSimpleText() throws Exception {
-               new HXFExcelExtractor(xmlA.getPackage());
-               new HXFExcelExtractor(new HSSFXMLWorkbook(xmlA));
-               
-               HXFExcelExtractor extractor = 
-                       new HXFExcelExtractor(xmlA.getPackage());
-               extractor.getText();
-               
-               String text = extractor.getText();
-               assertTrue(text.length() > 0);
-               
-               // Check sheet names
-               assertTrue(text.startsWith("Sheet1"));
-               assertTrue(text.endsWith("Sheet3\n"));
-               
-               // Now without, will have text
-               extractor.setIncludeSheetNames(false);
-               text = extractor.getText();
-               assertEquals(
-                               "Lorem\t111\n" +
-                               "ipsum\t222\n" +
-                               "dolor\t333\n" +
-                               "sit\t444\n" +
-                               "amet\t555\n" +
-                               "consectetuer\t666\n" +
-                               "adipiscing\t777\n" +
-                               "elit\t888\n" +
-                               "Nunc\t999\n" +
-                               "at\t4995\n" +
-                               "\n\n", text);
-               
-               // Now get formulas not their values
-               extractor.setFormulasNotResults(true);
-               text = extractor.getText();
-               assertEquals(
-                               "Lorem\t111\n" +
-                               "ipsum\t222\n" +
-                               "dolor\t333\n" +
-                               "sit\t444\n" +
-                               "amet\t555\n" +
-                               "consectetuer\t666\n" +
-                               "adipiscing\t777\n" +
-                               "elit\t888\n" +
-                               "Nunc\t999\n" +
-                               "at\tSUM(B1:B9)\n" +
-                               "\n\n", text);
-               
-               // With sheet names too
-               extractor.setIncludeSheetNames(true);
-               text = extractor.getText();
-               assertEquals(
-                               "Sheet1\n" +
-                               "Lorem\t111\n" +
-                               "ipsum\t222\n" +
-                               "dolor\t333\n" +
-                               "sit\t444\n" +
-                               "amet\t555\n" +
-                               "consectetuer\t666\n" +
-                               "adipiscing\t777\n" +
-                               "elit\t888\n" +
-                               "Nunc\t999\n" +
-                               "at\tSUM(B1:B9)\n\n" +
-                               "Sheet2\n\n" +
-                               "Sheet3\n"
-                               , text);
-       }
-       
-       public void testGetComplexText() throws Exception {
-               new HXFExcelExtractor(xmlB.getPackage());
-               new HXFExcelExtractor(new HSSFXMLWorkbook(xmlB));
-               
-               HXFExcelExtractor extractor = 
-                       new HXFExcelExtractor(xmlB.getPackage());
-               extractor.getText();
-               
-               String text = extractor.getText();
-               assertTrue(text.length() > 0);
-               
-               // Might not have all formatting it should do!
-               // TODO decide if we should really have the "null" in there
-               assertTrue(text.startsWith(
-                                               "Avgtxfull\n" +
-                                               "null\t(iii) AVERAGE TAX RATES ON ANNUAL"       
-               ));
-       }
-       
-       /**
-        * Test that we return pretty much the same as
-        *  ExcelExtractor does, when we're both passed
-        *  the same file, just saved as xls and xlsx
-        */
-       public void testComparedToOLE2() throws Exception {
-               HXFExcelExtractor ooxmlExtractor =
-                       new HXFExcelExtractor(simpleXLSX.getPackage());
-               ExcelExtractor ole2Extractor =
-                       new ExcelExtractor(simpleXLS);
-               
-               POITextExtractor[] extractors =
-                       new POITextExtractor[] { ooxmlExtractor, ole2Extractor };
-               for (int i = 0; i < extractors.length; i++) {
-                       POITextExtractor extractor = extractors[i];
-                       
-                       String text = extractor.getText().replaceAll("[\r\t]", "");
-                       //System.out.println(text.length());
-                       //System.out.println(text);
-                       assertTrue(text.startsWith("First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n"));
-                       Pattern pattern = Pattern.compile(".*13(\\.0+)?\\s+Sheet3.*", Pattern.DOTALL);
-                       Matcher m = pattern.matcher(text);
-                       assertTrue(m.matches());                        
-               }
-       }
-}