aboutsummaryrefslogtreecommitdiffstats
path: root/src/ooxml
diff options
context:
space:
mode:
authorNick Burch <nick@apache.org>2008-03-08 17:21:29 +0000
committerNick Burch <nick@apache.org>2008-03-08 17:21:29 +0000
commit2cc22cb45b19b057f6061feac08d18fdc0504d31 (patch)
tree510ce2c0248e7d22d0b8e576395859237c954ba2 /src/ooxml
parent4eb4e8eeef6eb96b869943ee5bb2ef4f89dd89df (diff)
downloadpoi-2cc22cb45b19b057f6061feac08d18fdc0504d31.tar.gz
poi-2cc22cb45b19b057f6061feac08d18fdc0504d31.zip
More shuffling of things out of src/scratchpad/ooxml-*
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@635021 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/ooxml')
-rw-r--r--src/ooxml/java/org/apache/poi/POIXMLDocument.java4
-rw-r--r--src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java128
-rw-r--r--src/ooxml/testcases/org/apache/poi/TestDetectAsOOXML.java65
-rw-r--r--src/ooxml/testcases/org/apache/poi/xssf/extractor/TextXSSFExcelExtractor.java196
4 files changed, 393 insertions, 0 deletions
diff --git a/src/ooxml/java/org/apache/poi/POIXMLDocument.java b/src/ooxml/java/org/apache/poi/POIXMLDocument.java
index a9cceb525a..54b92e32de 100644
--- a/src/ooxml/java/org/apache/poi/POIXMLDocument.java
+++ b/src/ooxml/java/org/apache/poi/POIXMLDocument.java
@@ -17,7 +17,11 @@
package org.apache.poi;
import java.io.IOException;
+import java.io.InputStream;
+import java.io.PushbackInputStream;
+import org.apache.poi.poifs.common.POIFSConstants;
+import org.apache.poi.util.IOUtils;
import org.openxml4j.exceptions.InvalidFormatException;
import org.openxml4j.exceptions.OpenXML4JException;
import org.openxml4j.opc.Package;
diff --git a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java
new file mode 100644
index 0000000000..69361e7b4b
--- /dev/null
+++ b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java
@@ -0,0 +1,128 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.xssf.extractor;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.poi.POIXMLTextExtractor;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.apache.xmlbeans.XmlException;
+import org.openxml4j.exceptions.OpenXML4JException;
+import org.openxml4j.opc.Package;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRow;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet;
+
+/**
+ * Helper class to extract text from an OOXML Excel file
+ */
+public class XSSFExcelExtractor extends POIXMLTextExtractor {
+ private XSSFWorkbook workbook;
+ private boolean includeSheetNames = true;
+ private boolean formulasNotResults = false;
+
+ public XSSFExcelExtractor(Package container) throws XmlException, OpenXML4JException, IOException {
+ this(new XSSFWorkbook(container));
+ }
+ public XSSFExcelExtractor(XSSFWorkbook workbook) {
+ super(workbook);
+ this.workbook = workbook;
+ }
+
+ public static void main(String[] args) throws Exception {
+ if(args.length < 1) {
+ System.err.println("Use:");
+ System.err.println(" HXFExcelExtractor <filename.xlsx>");
+ System.exit(1);
+ }
+ POIXMLTextExtractor extractor =
+ new HXFExcelExtractor(HXFDocument.openPackage(
+ new File(args[0])
+ ));
+ System.out.println(extractor.getText());
+ }
+
+ /**
+ * Should sheet names be included? Default is true
+ */
+ public void setIncludeSheetNames(boolean includeSheetNames) {
+ this.includeSheetNames = includeSheetNames;
+ }
+ /**
+ * Should we return the formula itself, and not
+ * the result it produces? Default is false
+ */
+ public void setFormulasNotResults(boolean formulasNotResults) {
+ this.formulasNotResults = formulasNotResults;
+ }
+
+ /**
+ * Retreives the text contents of the file
+ */
+ public String getText() {
+ StringBuffer text = new StringBuffer();
+
+ CTSheet[] sheetRefs =
+ workbook._getHSSFXML().getSheetReferences().getSheetArray();
+ for(int i=0; i<sheetRefs.length; i++) {
+ try {
+ CTWorksheet sheet =
+ workbook._getHSSFXML().getSheet(sheetRefs[i]);
+ CTRow[] rows =
+ sheet.getSheetData().getRowArray();
+
+ if(i > 0) {
+ text.append("\n");
+ }
+ if(includeSheetNames) {
+ text.append(sheetRefs[i].getName() + "\n");
+ }
+
+ for(int j=0; j<rows.length; j++) {
+ CTCell[] cells = rows[j].getCArray();
+ for(int k=0; k<cells.length; k++) {
+ CTCell cell = cells[k];
+ if(k > 0) {
+ text.append("\t");
+ }
+
+ boolean done = false;
+
+ // Is it a formula one?
+ if(cell.getF() != null) {
+ if(formulasNotResults) {
+ text.append(cell.getF().getStringValue());
+ done = true;
+ }
+ }
+ if(!done) {
+ HSSFXMLCell uCell = new HSSFXMLCell(cell, workbook);
+ text.append(uCell.getStringValue());
+ }
+ }
+ text.append("\n");
+ }
+ } catch(Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ return text.toString();
+ }
+}
diff --git a/src/ooxml/testcases/org/apache/poi/TestDetectAsOOXML.java b/src/ooxml/testcases/org/apache/poi/TestDetectAsOOXML.java
new file mode 100644
index 0000000000..36adb497cd
--- /dev/null
+++ b/src/ooxml/testcases/org/apache/poi/TestDetectAsOOXML.java
@@ -0,0 +1,65 @@
+
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi.hxf;
+
+import junit.framework.TestCase;
+import java.io.*;
+
+/**
+ * Class to test that HXF correctly detects OOXML
+ * documents
+ */
+public class TestDetectAsOOXML extends TestCase
+{
+ public String dirname;
+
+ public void setUp() {
+ dirname = System.getProperty("HSSF.testdata.path");
+ }
+
+ public void testOpensProperly() throws Exception
+ {
+ File f = new File(dirname + "/sample.xlsx");
+
+ HXFDocument.openPackage(f);
+ }
+
+ public void testDetectAsPOIFS() throws Exception {
+ InputStream in;
+
+ // ooxml file is
+ in = new PushbackInputStream(
+ new FileInputStream(dirname + "/SampleSS.xlsx"), 10
+ );
+ assertTrue(HXFDocument.hasOOXMLHeader(in));
+
+ // xls file isn't
+ in = new PushbackInputStream(
+ new FileInputStream(dirname + "/SampleSS.xls"), 10
+ );
+ assertFalse(HXFDocument.hasOOXMLHeader(in));
+
+ // text file isn't
+ in = new PushbackInputStream(
+ new FileInputStream(dirname + "/SampleSS.txt"), 10
+ );
+ assertFalse(HXFDocument.hasOOXMLHeader(in));
+ }
+}
diff --git a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TextXSSFExcelExtractor.java b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TextXSSFExcelExtractor.java
new file mode 100644
index 0000000000..a73b60bf72
--- /dev/null
+++ b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TextXSSFExcelExtractor.java
@@ -0,0 +1,196 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hssf.extractor;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import junit.framework.TestCase;
+
+import org.apache.poi.POITextExtractor;
+import org.apache.poi.hssf.HSSFXML;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+import org.apache.poi.hssf.usermodel.HSSFXMLWorkbook;
+import org.apache.poi.hxf.HXFDocument;
+
+/**
+ * Tests for HXFExcelExtractor
+ */
+public class TestHXFExcelExtractor extends TestCase {
+ /**
+ * A very simple file
+ */
+ private HSSFXML xmlA;
+ /**
+ * A fairly complex file
+ */
+ private HSSFXML xmlB;
+
+ /**
+ * A fairly simple file - ooxml
+ */
+ private HSSFXML simpleXLSX;
+ /**
+ * A fairly simple file - ole2
+ */
+ private HSSFWorkbook simpleXLS;
+
+ protected void setUp() throws Exception {
+ super.setUp();
+
+ File fileA = new File(
+ System.getProperty("HSSF.testdata.path") +
+ File.separator + "sample.xlsx"
+ );
+ File fileB = new File(
+ System.getProperty("HSSF.testdata.path") +
+ File.separator + "AverageTaxRates.xlsx"
+ );
+
+ File fileSOOXML = new File(
+ System.getProperty("HSSF.testdata.path") +
+ File.separator + "SampleSS.xlsx"
+ );
+ File fileSOLE2 = new File(
+ System.getProperty("HSSF.testdata.path") +
+ File.separator + "SampleSS.xls"
+ );
+
+ xmlA = new HSSFXML(HXFDocument.openPackage(fileA));
+ xmlB = new HSSFXML(HXFDocument.openPackage(fileB));
+
+ simpleXLSX = new HSSFXML(HXFDocument.openPackage(fileSOOXML));
+ simpleXLS = new HSSFWorkbook(new FileInputStream(fileSOLE2));
+ }
+
+ /**
+ * Get text out of the simple file
+ */
+ public void testGetSimpleText() throws Exception {
+ new HXFExcelExtractor(xmlA.getPackage());
+ new HXFExcelExtractor(new HSSFXMLWorkbook(xmlA));
+
+ HXFExcelExtractor extractor =
+ new HXFExcelExtractor(xmlA.getPackage());
+ extractor.getText();
+
+ String text = extractor.getText();
+ assertTrue(text.length() > 0);
+
+ // Check sheet names
+ assertTrue(text.startsWith("Sheet1"));
+ assertTrue(text.endsWith("Sheet3\n"));
+
+ // Now without, will have text
+ extractor.setIncludeSheetNames(false);
+ text = extractor.getText();
+ assertEquals(
+ "Lorem\t111\n" +
+ "ipsum\t222\n" +
+ "dolor\t333\n" +
+ "sit\t444\n" +
+ "amet\t555\n" +
+ "consectetuer\t666\n" +
+ "adipiscing\t777\n" +
+ "elit\t888\n" +
+ "Nunc\t999\n" +
+ "at\t4995\n" +
+ "\n\n", text);
+
+ // Now get formulas not their values
+ extractor.setFormulasNotResults(true);
+ text = extractor.getText();
+ assertEquals(
+ "Lorem\t111\n" +
+ "ipsum\t222\n" +
+ "dolor\t333\n" +
+ "sit\t444\n" +
+ "amet\t555\n" +
+ "consectetuer\t666\n" +
+ "adipiscing\t777\n" +
+ "elit\t888\n" +
+ "Nunc\t999\n" +
+ "at\tSUM(B1:B9)\n" +
+ "\n\n", text);
+
+ // With sheet names too
+ extractor.setIncludeSheetNames(true);
+ text = extractor.getText();
+ assertEquals(
+ "Sheet1\n" +
+ "Lorem\t111\n" +
+ "ipsum\t222\n" +
+ "dolor\t333\n" +
+ "sit\t444\n" +
+ "amet\t555\n" +
+ "consectetuer\t666\n" +
+ "adipiscing\t777\n" +
+ "elit\t888\n" +
+ "Nunc\t999\n" +
+ "at\tSUM(B1:B9)\n\n" +
+ "Sheet2\n\n" +
+ "Sheet3\n"
+ , text);
+ }
+
+ public void testGetComplexText() throws Exception {
+ new HXFExcelExtractor(xmlB.getPackage());
+ new HXFExcelExtractor(new HSSFXMLWorkbook(xmlB));
+
+ HXFExcelExtractor extractor =
+ new HXFExcelExtractor(xmlB.getPackage());
+ extractor.getText();
+
+ String text = extractor.getText();
+ assertTrue(text.length() > 0);
+
+ // Might not have all formatting it should do!
+ // TODO decide if we should really have the "null" in there
+ assertTrue(text.startsWith(
+ "Avgtxfull\n" +
+ "null\t(iii) AVERAGE TAX RATES ON ANNUAL"
+ ));
+ }
+
+ /**
+ * Test that we return pretty much the same as
+ * ExcelExtractor does, when we're both passed
+ * the same file, just saved as xls and xlsx
+ */
+ public void testComparedToOLE2() throws Exception {
+ HXFExcelExtractor ooxmlExtractor =
+ new HXFExcelExtractor(simpleXLSX.getPackage());
+ ExcelExtractor ole2Extractor =
+ new ExcelExtractor(simpleXLS);
+
+ POITextExtractor[] extractors =
+ new POITextExtractor[] { ooxmlExtractor, ole2Extractor };
+ for (int i = 0; i < extractors.length; i++) {
+ POITextExtractor extractor = extractors[i];
+
+ String text = extractor.getText().replaceAll("[\r\t]", "");
+ //System.out.println(text.length());
+ //System.out.println(text);
+ assertTrue(text.startsWith("First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n"));
+ Pattern pattern = Pattern.compile(".*13(\\.0+)?\\s+Sheet3.*", Pattern.DOTALL);
+ Matcher m = pattern.matcher(text);
+ assertTrue(m.matches());
+ }
+ }
+}