From: Nick Burch Date: Sat, 8 Mar 2008 17:39:56 +0000 (+0000) Subject: Start updating the excel extractor to the new style code X-Git-Tag: REL_3_5_BETA2~199 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=2a7d3ad1545d135548d168010151eea4168c7e78;p=poi.git Start updating the excel extractor to the new style code git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@635026 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/documentation/content/xdocs/ss/quick-guide.xml b/src/documentation/content/xdocs/ss/quick-guide.xml index 66da604892..f334948779 100644 --- a/src/documentation/content/xdocs/ss/quick-guide.xml +++ b/src/documentation/content/xdocs/ss/quick-guide.xml @@ -21,7 +21,7 @@
- Busy Developers' Guide to HSSF Features + Busy Developers' Guide to HSSF and XSSF Features @@ -30,8 +30,9 @@
Busy Developers' Guide to Features

- Want to use HSSF read and write spreadsheets in a hurry? This guide is for you. If you're after - more in-depth coverage of the HSSF user-API please consult the HOWTO + Want to use HSSF and XSSF read and write spreadsheets in a hurry? This + guide is for you. If you're after more in-depth coverage of the HSSF and + XSSF user-APIs, please consult the HOWTO guide as it contains actual descriptions of how to use this stuff.

Index of Features diff --git a/src/ooxml/java/org/apache/poi/POIXMLDocument.java b/src/ooxml/java/org/apache/poi/POIXMLDocument.java index 54b92e32de..36f195eeb1 100644 --- a/src/ooxml/java/org/apache/poi/POIXMLDocument.java +++ b/src/ooxml/java/org/apache/poi/POIXMLDocument.java @@ -46,18 +46,33 @@ public abstract class POIXMLDocument { protected POIXMLDocument() {} + protected POIXMLDocument(Package pkg) throws IOException { + try { + this.pkg = pkg; + + PackageRelationship coreDocRelationship = this.pkg.getRelationshipsByType( + PackageRelationshipTypes.CORE_DOCUMENT).getRelationship(0); + + // Get core part + this.corePart = this.pkg.getPart(coreDocRelationship); + } catch (OpenXML4JException e) { + throw new IOException(e.toString()); + } + } protected POIXMLDocument(String path) throws IOException { + this(openPackage(path)); + } + + /** + * Wrapper to open a package, returning an IOException + * in the event of a problem. + * Works around shortcomings in java's this() constructor calls + */ + protected static Package openPackage(String path) throws IOException { try { - this.pkg = Package.open(path); - PackageRelationship coreDocRelationship = this.pkg.getRelationshipsByType( - PackageRelationshipTypes.CORE_DOCUMENT).getRelationship(0); - - // Get core part - this.corePart = this.pkg.getPart(coreDocRelationship); + return Package.open(path); } catch (InvalidFormatException e) { throw new IOException(e.toString()); - } catch (OpenXML4JException e) { - throw new IOException(e.toString()); } } diff --git a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java index 69361e7b4b..ba3bd1095b 100644 --- a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java +++ b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java @@ -20,6 +20,11 @@ import java.io.File; import java.io.IOException; import org.apache.poi.POIXMLTextExtractor; +import org.apache.poi.ss.usermodel.Cell; +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.ss.usermodel.Workbook; +import org.apache.poi.xssf.usermodel.XSSFSheet; import org.apache.poi.xssf.usermodel.XSSFWorkbook; import org.apache.xmlbeans.XmlException; import org.openxml4j.exceptions.OpenXML4JException; @@ -33,10 +38,13 @@ import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet; * Helper class to extract text from an OOXML Excel file */ public class XSSFExcelExtractor extends POIXMLTextExtractor { - private XSSFWorkbook workbook; + private Workbook workbook; private boolean includeSheetNames = true; private boolean formulasNotResults = false; + public XSSFExcelExtractor(String path) throws XmlException, OpenXML4JException, IOException { + this(new XSSFWorkbook(path)); + } public XSSFExcelExtractor(Package container) throws XmlException, OpenXML4JException, IOException { this(new XSSFWorkbook(container)); } @@ -52,9 +60,7 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor { System.exit(1); } POIXMLTextExtractor extractor = - new HXFExcelExtractor(HXFDocument.openPackage( - new File(args[0]) - )); + new XSSFExcelExtractor(args[0]); System.out.println(extractor.getText()); } @@ -78,48 +84,27 @@ public class XSSFExcelExtractor extends POIXMLTextExtractor { public String getText() { StringBuffer text = new StringBuffer(); - CTSheet[] sheetRefs = - workbook._getHSSFXML().getSheetReferences().getSheetArray(); - for(int i=0; i 0) { - text.append("\n"); - } - if(includeSheetNames) { - text.append(sheetRefs[i].getName() + "\n"); - } - - for(int j=0; j 0) { - text.append("\t"); - } - - boolean done = false; - - // Is it a formula one? - if(cell.getF() != null) { - if(formulasNotResults) { - text.append(cell.getF().getStringValue()); - done = true; - } - } - if(!done) { - HSSFXMLCell uCell = new HSSFXMLCell(cell, workbook); - text.append(uCell.getStringValue()); - } + for(int i=0; i 0); + + // Check sheet names + assertTrue(text.startsWith("Sheet1")); + assertTrue(text.endsWith("Sheet3\n")); + + // Now without, will have text + extractor.setIncludeSheetNames(false); + text = extractor.getText(); + assertEquals( + "Lorem\t111\n" + + "ipsum\t222\n" + + "dolor\t333\n" + + "sit\t444\n" + + "amet\t555\n" + + "consectetuer\t666\n" + + "adipiscing\t777\n" + + "elit\t888\n" + + "Nunc\t999\n" + + "at\t4995\n" + + "\n\n", text); + + // Now get formulas not their values + extractor.setFormulasNotResults(true); + text = extractor.getText(); + assertEquals( + "Lorem\t111\n" + + "ipsum\t222\n" + + "dolor\t333\n" + + "sit\t444\n" + + "amet\t555\n" + + "consectetuer\t666\n" + + "adipiscing\t777\n" + + "elit\t888\n" + + "Nunc\t999\n" + + "at\tSUM(B1:B9)\n" + + "\n\n", text); + + // With sheet names too + extractor.setIncludeSheetNames(true); + text = extractor.getText(); + assertEquals( + "Sheet1\n" + + "Lorem\t111\n" + + "ipsum\t222\n" + + "dolor\t333\n" + + "sit\t444\n" + + "amet\t555\n" + + "consectetuer\t666\n" + + "adipiscing\t777\n" + + "elit\t888\n" + + "Nunc\t999\n" + + "at\tSUM(B1:B9)\n\n" + + "Sheet2\n\n" + + "Sheet3\n" + , text); + } + + public void testGetComplexText() throws Exception { + new XSSFExcelExtractor(xmlB); + + XSSFExcelExtractor extractor = + new XSSFExcelExtractor(xmlB); + extractor.getText(); + + String text = extractor.getText(); + assertTrue(text.length() > 0); + + // Might not have all formatting it should do! + // TODO decide if we should really have the "null" in there + assertTrue(text.startsWith( + "Avgtxfull\n" + + "null\t(iii) AVERAGE TAX RATES ON ANNUAL" + )); + } + + /** + * Test that we return pretty much the same as + * ExcelExtractor does, when we're both passed + * the same file, just saved as xls and xlsx + */ + public void testComparedToOLE2() throws Exception { + XSSFExcelExtractor ooxmlExtractor = + new XSSFExcelExtractor(simpleXLSX); + ExcelExtractor ole2Extractor = + new ExcelExtractor(simpleXLS); + + POITextExtractor[] extractors = + new POITextExtractor[] { ooxmlExtractor, ole2Extractor }; + for (int i = 0; i < extractors.length; i++) { + POITextExtractor extractor = extractors[i]; + + String text = extractor.getText().replaceAll("[\r\t]", ""); + //System.out.println(text.length()); + //System.out.println(text); + assertTrue(text.startsWith("First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n")); + Pattern pattern = Pattern.compile(".*13(\\.0+)?\\s+Sheet3.*", Pattern.DOTALL); + Matcher m = pattern.matcher(text); + assertTrue(m.matches()); + } + } +} diff --git a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TextXSSFExcelExtractor.java b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TextXSSFExcelExtractor.java deleted file mode 100644 index a73b60bf72..0000000000 --- a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TextXSSFExcelExtractor.java +++ /dev/null @@ -1,196 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hssf.extractor; - -import java.io.File; -import java.io.FileInputStream; -import java.util.regex.Matcher; -import java.util.regex.Pattern; - -import junit.framework.TestCase; - -import org.apache.poi.POITextExtractor; -import org.apache.poi.hssf.HSSFXML; -import org.apache.poi.hssf.usermodel.HSSFWorkbook; -import org.apache.poi.hssf.usermodel.HSSFXMLWorkbook; -import org.apache.poi.hxf.HXFDocument; - -/** - * Tests for HXFExcelExtractor - */ -public class TestHXFExcelExtractor extends TestCase { - /** - * A very simple file - */ - private HSSFXML xmlA; - /** - * A fairly complex file - */ - private HSSFXML xmlB; - - /** - * A fairly simple file - ooxml - */ - private HSSFXML simpleXLSX; - /** - * A fairly simple file - ole2 - */ - private HSSFWorkbook simpleXLS; - - protected void setUp() throws Exception { - super.setUp(); - - File fileA = new File( - System.getProperty("HSSF.testdata.path") + - File.separator + "sample.xlsx" - ); - File fileB = new File( - System.getProperty("HSSF.testdata.path") + - File.separator + "AverageTaxRates.xlsx" - ); - - File fileSOOXML = new File( - System.getProperty("HSSF.testdata.path") + - File.separator + "SampleSS.xlsx" - ); - File fileSOLE2 = new File( - System.getProperty("HSSF.testdata.path") + - File.separator + "SampleSS.xls" - ); - - xmlA = new HSSFXML(HXFDocument.openPackage(fileA)); - xmlB = new HSSFXML(HXFDocument.openPackage(fileB)); - - simpleXLSX = new HSSFXML(HXFDocument.openPackage(fileSOOXML)); - simpleXLS = new HSSFWorkbook(new FileInputStream(fileSOLE2)); - } - - /** - * Get text out of the simple file - */ - public void testGetSimpleText() throws Exception { - new HXFExcelExtractor(xmlA.getPackage()); - new HXFExcelExtractor(new HSSFXMLWorkbook(xmlA)); - - HXFExcelExtractor extractor = - new HXFExcelExtractor(xmlA.getPackage()); - extractor.getText(); - - String text = extractor.getText(); - assertTrue(text.length() > 0); - - // Check sheet names - assertTrue(text.startsWith("Sheet1")); - assertTrue(text.endsWith("Sheet3\n")); - - // Now without, will have text - extractor.setIncludeSheetNames(false); - text = extractor.getText(); - assertEquals( - "Lorem\t111\n" + - "ipsum\t222\n" + - "dolor\t333\n" + - "sit\t444\n" + - "amet\t555\n" + - "consectetuer\t666\n" + - "adipiscing\t777\n" + - "elit\t888\n" + - "Nunc\t999\n" + - "at\t4995\n" + - "\n\n", text); - - // Now get formulas not their values - extractor.setFormulasNotResults(true); - text = extractor.getText(); - assertEquals( - "Lorem\t111\n" + - "ipsum\t222\n" + - "dolor\t333\n" + - "sit\t444\n" + - "amet\t555\n" + - "consectetuer\t666\n" + - "adipiscing\t777\n" + - "elit\t888\n" + - "Nunc\t999\n" + - "at\tSUM(B1:B9)\n" + - "\n\n", text); - - // With sheet names too - extractor.setIncludeSheetNames(true); - text = extractor.getText(); - assertEquals( - "Sheet1\n" + - "Lorem\t111\n" + - "ipsum\t222\n" + - "dolor\t333\n" + - "sit\t444\n" + - "amet\t555\n" + - "consectetuer\t666\n" + - "adipiscing\t777\n" + - "elit\t888\n" + - "Nunc\t999\n" + - "at\tSUM(B1:B9)\n\n" + - "Sheet2\n\n" + - "Sheet3\n" - , text); - } - - public void testGetComplexText() throws Exception { - new HXFExcelExtractor(xmlB.getPackage()); - new HXFExcelExtractor(new HSSFXMLWorkbook(xmlB)); - - HXFExcelExtractor extractor = - new HXFExcelExtractor(xmlB.getPackage()); - extractor.getText(); - - String text = extractor.getText(); - assertTrue(text.length() > 0); - - // Might not have all formatting it should do! - // TODO decide if we should really have the "null" in there - assertTrue(text.startsWith( - "Avgtxfull\n" + - "null\t(iii) AVERAGE TAX RATES ON ANNUAL" - )); - } - - /** - * Test that we return pretty much the same as - * ExcelExtractor does, when we're both passed - * the same file, just saved as xls and xlsx - */ - public void testComparedToOLE2() throws Exception { - HXFExcelExtractor ooxmlExtractor = - new HXFExcelExtractor(simpleXLSX.getPackage()); - ExcelExtractor ole2Extractor = - new ExcelExtractor(simpleXLS); - - POITextExtractor[] extractors = - new POITextExtractor[] { ooxmlExtractor, ole2Extractor }; - for (int i = 0; i < extractors.length; i++) { - POITextExtractor extractor = extractors[i]; - - String text = extractor.getText().replaceAll("[\r\t]", ""); - //System.out.println(text.length()); - //System.out.println(text); - assertTrue(text.startsWith("First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n")); - Pattern pattern = Pattern.compile(".*13(\\.0+)?\\s+Sheet3.*", Pattern.DOTALL); - Matcher m = pattern.matcher(text); - assertTrue(m.matches()); - } - } -}