package org.apache.poi;
import java.io.IOException;
+import java.io.InputStream;
+import java.io.PushbackInputStream;
+import org.apache.poi.poifs.common.POIFSConstants;
+import org.apache.poi.util.IOUtils;
import org.openxml4j.exceptions.InvalidFormatException;
import org.openxml4j.exceptions.OpenXML4JException;
import org.openxml4j.opc.Package;
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.xssf.extractor;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.poi.POIXMLTextExtractor;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.apache.xmlbeans.XmlException;
+import org.openxml4j.exceptions.OpenXML4JException;
+import org.openxml4j.opc.Package;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRow;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet;
+
+/**
+ * Helper class to extract text from an OOXML Excel file
+ */
+public class XSSFExcelExtractor extends POIXMLTextExtractor {
+ private XSSFWorkbook workbook;
+ private boolean includeSheetNames = true;
+ private boolean formulasNotResults = false;
+
+ public XSSFExcelExtractor(Package container) throws XmlException, OpenXML4JException, IOException {
+ this(new XSSFWorkbook(container));
+ }
+ public XSSFExcelExtractor(XSSFWorkbook workbook) {
+ super(workbook);
+ this.workbook = workbook;
+ }
+
+ public static void main(String[] args) throws Exception {
+ if(args.length < 1) {
+ System.err.println("Use:");
+ System.err.println(" HXFExcelExtractor <filename.xlsx>");
+ System.exit(1);
+ }
+ POIXMLTextExtractor extractor =
+ new HXFExcelExtractor(HXFDocument.openPackage(
+ new File(args[0])
+ ));
+ System.out.println(extractor.getText());
+ }
+
+ /**
+ * Should sheet names be included? Default is true
+ */
+ public void setIncludeSheetNames(boolean includeSheetNames) {
+ this.includeSheetNames = includeSheetNames;
+ }
+ /**
+ * Should we return the formula itself, and not
+ * the result it produces? Default is false
+ */
+ public void setFormulasNotResults(boolean formulasNotResults) {
+ this.formulasNotResults = formulasNotResults;
+ }
+
+ /**
+ * Retreives the text contents of the file
+ */
+ public String getText() {
+ StringBuffer text = new StringBuffer();
+
+ CTSheet[] sheetRefs =
+ workbook._getHSSFXML().getSheetReferences().getSheetArray();
+ for(int i=0; i<sheetRefs.length; i++) {
+ try {
+ CTWorksheet sheet =
+ workbook._getHSSFXML().getSheet(sheetRefs[i]);
+ CTRow[] rows =
+ sheet.getSheetData().getRowArray();
+
+ if(i > 0) {
+ text.append("\n");
+ }
+ if(includeSheetNames) {
+ text.append(sheetRefs[i].getName() + "\n");
+ }
+
+ for(int j=0; j<rows.length; j++) {
+ CTCell[] cells = rows[j].getCArray();
+ for(int k=0; k<cells.length; k++) {
+ CTCell cell = cells[k];
+ if(k > 0) {
+ text.append("\t");
+ }
+
+ boolean done = false;
+
+ // Is it a formula one?
+ if(cell.getF() != null) {
+ if(formulasNotResults) {
+ text.append(cell.getF().getStringValue());
+ done = true;
+ }
+ }
+ if(!done) {
+ HSSFXMLCell uCell = new HSSFXMLCell(cell, workbook);
+ text.append(uCell.getStringValue());
+ }
+ }
+ text.append("\n");
+ }
+ } catch(Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ return text.toString();
+ }
+}
--- /dev/null
+
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+
+package org.apache.poi.hxf;
+
+import junit.framework.TestCase;
+import java.io.*;
+
+/**
+ * Class to test that HXF correctly detects OOXML
+ * documents
+ */
+public class TestDetectAsOOXML extends TestCase
+{
+ public String dirname;
+
+ public void setUp() {
+ dirname = System.getProperty("HSSF.testdata.path");
+ }
+
+ public void testOpensProperly() throws Exception
+ {
+ File f = new File(dirname + "/sample.xlsx");
+
+ HXFDocument.openPackage(f);
+ }
+
+ public void testDetectAsPOIFS() throws Exception {
+ InputStream in;
+
+ // ooxml file is
+ in = new PushbackInputStream(
+ new FileInputStream(dirname + "/SampleSS.xlsx"), 10
+ );
+ assertTrue(HXFDocument.hasOOXMLHeader(in));
+
+ // xls file isn't
+ in = new PushbackInputStream(
+ new FileInputStream(dirname + "/SampleSS.xls"), 10
+ );
+ assertFalse(HXFDocument.hasOOXMLHeader(in));
+
+ // text file isn't
+ in = new PushbackInputStream(
+ new FileInputStream(dirname + "/SampleSS.txt"), 10
+ );
+ assertFalse(HXFDocument.hasOOXMLHeader(in));
+ }
+}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hssf.extractor;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import junit.framework.TestCase;
+
+import org.apache.poi.POITextExtractor;
+import org.apache.poi.hssf.HSSFXML;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+import org.apache.poi.hssf.usermodel.HSSFXMLWorkbook;
+import org.apache.poi.hxf.HXFDocument;
+
+/**
+ * Tests for HXFExcelExtractor
+ */
+public class TestHXFExcelExtractor extends TestCase {
+ /**
+ * A very simple file
+ */
+ private HSSFXML xmlA;
+ /**
+ * A fairly complex file
+ */
+ private HSSFXML xmlB;
+
+ /**
+ * A fairly simple file - ooxml
+ */
+ private HSSFXML simpleXLSX;
+ /**
+ * A fairly simple file - ole2
+ */
+ private HSSFWorkbook simpleXLS;
+
+ protected void setUp() throws Exception {
+ super.setUp();
+
+ File fileA = new File(
+ System.getProperty("HSSF.testdata.path") +
+ File.separator + "sample.xlsx"
+ );
+ File fileB = new File(
+ System.getProperty("HSSF.testdata.path") +
+ File.separator + "AverageTaxRates.xlsx"
+ );
+
+ File fileSOOXML = new File(
+ System.getProperty("HSSF.testdata.path") +
+ File.separator + "SampleSS.xlsx"
+ );
+ File fileSOLE2 = new File(
+ System.getProperty("HSSF.testdata.path") +
+ File.separator + "SampleSS.xls"
+ );
+
+ xmlA = new HSSFXML(HXFDocument.openPackage(fileA));
+ xmlB = new HSSFXML(HXFDocument.openPackage(fileB));
+
+ simpleXLSX = new HSSFXML(HXFDocument.openPackage(fileSOOXML));
+ simpleXLS = new HSSFWorkbook(new FileInputStream(fileSOLE2));
+ }
+
+ /**
+ * Get text out of the simple file
+ */
+ public void testGetSimpleText() throws Exception {
+ new HXFExcelExtractor(xmlA.getPackage());
+ new HXFExcelExtractor(new HSSFXMLWorkbook(xmlA));
+
+ HXFExcelExtractor extractor =
+ new HXFExcelExtractor(xmlA.getPackage());
+ extractor.getText();
+
+ String text = extractor.getText();
+ assertTrue(text.length() > 0);
+
+ // Check sheet names
+ assertTrue(text.startsWith("Sheet1"));
+ assertTrue(text.endsWith("Sheet3\n"));
+
+ // Now without, will have text
+ extractor.setIncludeSheetNames(false);
+ text = extractor.getText();
+ assertEquals(
+ "Lorem\t111\n" +
+ "ipsum\t222\n" +
+ "dolor\t333\n" +
+ "sit\t444\n" +
+ "amet\t555\n" +
+ "consectetuer\t666\n" +
+ "adipiscing\t777\n" +
+ "elit\t888\n" +
+ "Nunc\t999\n" +
+ "at\t4995\n" +
+ "\n\n", text);
+
+ // Now get formulas not their values
+ extractor.setFormulasNotResults(true);
+ text = extractor.getText();
+ assertEquals(
+ "Lorem\t111\n" +
+ "ipsum\t222\n" +
+ "dolor\t333\n" +
+ "sit\t444\n" +
+ "amet\t555\n" +
+ "consectetuer\t666\n" +
+ "adipiscing\t777\n" +
+ "elit\t888\n" +
+ "Nunc\t999\n" +
+ "at\tSUM(B1:B9)\n" +
+ "\n\n", text);
+
+ // With sheet names too
+ extractor.setIncludeSheetNames(true);
+ text = extractor.getText();
+ assertEquals(
+ "Sheet1\n" +
+ "Lorem\t111\n" +
+ "ipsum\t222\n" +
+ "dolor\t333\n" +
+ "sit\t444\n" +
+ "amet\t555\n" +
+ "consectetuer\t666\n" +
+ "adipiscing\t777\n" +
+ "elit\t888\n" +
+ "Nunc\t999\n" +
+ "at\tSUM(B1:B9)\n\n" +
+ "Sheet2\n\n" +
+ "Sheet3\n"
+ , text);
+ }
+
+ public void testGetComplexText() throws Exception {
+ new HXFExcelExtractor(xmlB.getPackage());
+ new HXFExcelExtractor(new HSSFXMLWorkbook(xmlB));
+
+ HXFExcelExtractor extractor =
+ new HXFExcelExtractor(xmlB.getPackage());
+ extractor.getText();
+
+ String text = extractor.getText();
+ assertTrue(text.length() > 0);
+
+ // Might not have all formatting it should do!
+ // TODO decide if we should really have the "null" in there
+ assertTrue(text.startsWith(
+ "Avgtxfull\n" +
+ "null\t(iii) AVERAGE TAX RATES ON ANNUAL"
+ ));
+ }
+
+ /**
+ * Test that we return pretty much the same as
+ * ExcelExtractor does, when we're both passed
+ * the same file, just saved as xls and xlsx
+ */
+ public void testComparedToOLE2() throws Exception {
+ HXFExcelExtractor ooxmlExtractor =
+ new HXFExcelExtractor(simpleXLSX.getPackage());
+ ExcelExtractor ole2Extractor =
+ new ExcelExtractor(simpleXLS);
+
+ POITextExtractor[] extractors =
+ new POITextExtractor[] { ooxmlExtractor, ole2Extractor };
+ for (int i = 0; i < extractors.length; i++) {
+ POITextExtractor extractor = extractors[i];
+
+ String text = extractor.getText().replaceAll("[\r\t]", "");
+ //System.out.println(text.length());
+ //System.out.println(text);
+ assertTrue(text.startsWith("First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n"));
+ Pattern pattern = Pattern.compile(".*13(\\.0+)?\\s+Sheet3.*", Pattern.DOTALL);
+ Matcher m = pattern.matcher(text);
+ assertTrue(m.matches());
+ }
+ }
+}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hssf;
-
-import java.io.IOException;
-
-import org.apache.poi.hssf.model.SharedStringsTable;
-import org.apache.poi.hxf.HXFDocument;
-import org.apache.xmlbeans.XmlException;
-import org.openxml4j.exceptions.OpenXML4JException;
-import org.openxml4j.opc.Package;
-import org.openxml4j.opc.PackagePart;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheets;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbook;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorkbookDocument;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorksheetDocument;
-
-/**
- * Experimental class to do low level processing
- * of xlsx files.
- *
- * If you are using these low level classes, then you
- * will almost certainly need to refer to the OOXML
- * specifications from
- * http://www.ecma-international.org/publications/standards/Ecma-376.htm
- *
- * WARNING - APIs expected to change rapidly
- */
-public class HSSFXML extends HXFDocument {
- public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml";
- public static final String SHEET_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml";
- public static final String SHARED_STRINGS_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml";
- public static final String SHARED_STRINGS_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings";
-
- private WorkbookDocument workbookDoc;
- private SharedStringsTable sharedStrings;
-
- public HSSFXML(Package container) throws OpenXML4JException, IOException, XmlException {
- super(container, MAIN_CONTENT_TYPE);
-
- workbookDoc =
- WorkbookDocument.Factory.parse(basePart.getInputStream());
-
- PackagePart ssPart = getSinglePartByRelationType(SHARED_STRINGS_RELATION_TYPE, basePart);
- if (ssPart != null) {
- sharedStrings = new SharedStringsTable(ssPart);
- } else {
-
- }
- }
-
- /**
- * Returns the low level workbook base object
- */
- public CTWorkbook getWorkbook() {
- return workbookDoc.getWorkbook();
- }
- /**
- * Returns the references from the workbook to its
- * sheets.
- * You'll need these to figure out the sheet ordering,
- * and to get at the actual sheets themselves
- */
- public CTSheets getSheetReferences() {
- return getWorkbook().getSheets();
- }
- /**
- * Returns the low level (work)sheet object from
- * the supplied sheet reference
- */
- public CTWorksheet getSheet(CTSheet sheet) throws IOException, XmlException {
- PackagePart sheetPart =
- getRelatedPackagePart(sheet.getId());
- WorksheetDocument sheetDoc =
- WorksheetDocument.Factory.parse(sheetPart.getInputStream());
- return sheetDoc.getWorksheet();
- }
-
- /**
- * Returns the shared string at the given index
- */
- public String getSharedString(int index) {
- return this.sharedStrings.get(index);
- }
- protected SharedStringsTable _getSharedStringsTable() {
- return sharedStrings;
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hssf.extractor;
-
-import java.io.File;
-import java.io.IOException;
-
-import org.apache.poi.POIXMLTextExtractor;
-import org.apache.poi.hssf.HSSFXML;
-import org.apache.poi.hssf.usermodel.HSSFXMLCell;
-import org.apache.poi.hssf.usermodel.HSSFXMLWorkbook;
-import org.apache.poi.hxf.HXFDocument;
-import org.apache.xmlbeans.XmlException;
-import org.openxml4j.exceptions.OpenXML4JException;
-import org.openxml4j.opc.Package;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRow;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet;
-
-/**
- * Helper class to extract text from an OOXML Excel file
- */
-public class HXFExcelExtractor extends POIXMLTextExtractor {
- private HSSFXMLWorkbook workbook;
- private boolean includeSheetNames = true;
- private boolean formulasNotResults = false;
-
- public HXFExcelExtractor(Package container) throws XmlException, OpenXML4JException, IOException {
- this(new HSSFXMLWorkbook(
- new HSSFXML(container)
- ));
- }
- public HXFExcelExtractor(HSSFXMLWorkbook workbook) {
- super(workbook);
- this.workbook = workbook;
- }
-
- public static void main(String[] args) throws Exception {
- if(args.length < 1) {
- System.err.println("Use:");
- System.err.println(" HXFExcelExtractor <filename.xlsx>");
- System.exit(1);
- }
- POIXMLTextExtractor extractor =
- new HXFExcelExtractor(HXFDocument.openPackage(
- new File(args[0])
- ));
- System.out.println(extractor.getText());
- }
-
- /**
- * Should sheet names be included? Default is true
- */
- public void setIncludeSheetNames(boolean includeSheetNames) {
- this.includeSheetNames = includeSheetNames;
- }
- /**
- * Should we return the formula itself, and not
- * the result it produces? Default is false
- */
- public void setFormulasNotResults(boolean formulasNotResults) {
- this.formulasNotResults = formulasNotResults;
- }
-
- /**
- * Retreives the text contents of the file
- */
- public String getText() {
- StringBuffer text = new StringBuffer();
-
- CTSheet[] sheetRefs =
- workbook._getHSSFXML().getSheetReferences().getSheetArray();
- for(int i=0; i<sheetRefs.length; i++) {
- try {
- CTWorksheet sheet =
- workbook._getHSSFXML().getSheet(sheetRefs[i]);
- CTRow[] rows =
- sheet.getSheetData().getRowArray();
-
- if(i > 0) {
- text.append("\n");
- }
- if(includeSheetNames) {
- text.append(sheetRefs[i].getName() + "\n");
- }
-
- for(int j=0; j<rows.length; j++) {
- CTCell[] cells = rows[j].getCArray();
- for(int k=0; k<cells.length; k++) {
- CTCell cell = cells[k];
- if(k > 0) {
- text.append("\t");
- }
-
- boolean done = false;
-
- // Is it a formula one?
- if(cell.getF() != null) {
- if(formulasNotResults) {
- text.append(cell.getF().getStringValue());
- done = true;
- }
- }
- if(!done) {
- HSSFXMLCell uCell = new HSSFXMLCell(cell, workbook);
- text.append(uCell.getStringValue());
- }
- }
- text.append("\n");
- }
- } catch(Exception e) {
- throw new RuntimeException(e);
- }
- }
-
- return text.toString();
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-
-package org.apache.poi.hssf.model;
-
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.LinkedList;
-
-import org.apache.xmlbeans.XmlException;
-import org.openxml4j.opc.PackagePart;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRst;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSst;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.SstDocument;
-
-
-public class SharedStringsTable extends LinkedList<String> {
- public static final String MAIN_SML_NS_URI = "http://schemas.openxmlformats.org/spreadsheetml/2006/main";
-
- private SstDocument doc;
- private PackagePart part;
-
- public SharedStringsTable(PackagePart part) throws IOException, XmlException {
- this.part = part;
- doc = SstDocument.Factory.parse(
- part.getInputStream()
- );
- read();
- }
-
- private void read() {
- CTRst[] sts = doc.getSst().getSiArray();
- for (int i = 0; i < sts.length; i++) {
- add(sts[i].getT());
- }
- }
-
- /**
- * Writes the current shared strings table into
- * the associated OOXML PackagePart
- */
- public void write() throws IOException {
- CTSst sst = doc.getSst();
-
- // Remove the old list
- for(int i=sst.sizeOfSiArray() - 1; i>=0; i--) {
- sst.removeSi(i);
- }
-
- // Add the new one
- for(String s : this) {
- sst.addNewSi().setT(s);
- }
-
- // Update the counts
- sst.setCount(this.size());
- sst.setUniqueCount(this.size());
-
- // Write out
- OutputStream out = part.getOutputStream();
- doc.save(out);
- out.close();
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hssf.usermodel;
-
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.STCellType;
-
-/**
- * User facing wrapper around an underlying cell object
- */
-public class HSSFXMLCell {
- private CTCell cell;
-
- /** The workbook to which this cell belongs */
- private final HSSFXMLWorkbook workbook;
-
- public HSSFXMLCell(CTCell rawCell, HSSFXMLWorkbook workbook) {
- this.cell = rawCell;
- this.workbook = workbook;
- }
-
- /**
- * Formats the cell's contents, based on its type,
- * and returns it as a string.
- */
- public String getStringValue() {
-
- switch (cell.getT().intValue()) {
- case STCellType.INT_S:
- return this.workbook.getSharedString(Integer.valueOf(cell.getV()));
- case STCellType.INT_INLINE_STR:
- return cell.getV();
- case STCellType.INT_N:
- return cell.getV();
- // TODO: support other types
- default:
- return "UNSUPPORTED CELL TYPE: '" + cell.getT() + "'";
- }
- }
-
- public String toString() {
- return cell.getR() + " - " + getStringValue();
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hssf.usermodel;
-
-import org.apache.poi.POIXMLDocument;
-import org.apache.poi.hssf.HSSFXML;
-
-/**
- * High level representation of a ooxml workbook.
- * This is the first object most users will construct whether
- * they are reading or writing a workbook. It is also the
- * top level object for creating new sheets/etc.
- */
-public class HSSFXMLWorkbook extends POIXMLDocument {
- private HSSFXML hssfXML;
-
- public HSSFXMLWorkbook(HSSFXML xml) {
- super(xml);
- this.hssfXML = xml;
- }
-
- public HSSFXML _getHSSFXML() {
- return hssfXML;
- }
-
- public String getSharedString(int index) {
- return hssfXML.getSharedString(index);
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hxf;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.PushbackInputStream;
-import java.util.ArrayList;
-
-import org.apache.poi.POIXMLDocument;
-import org.apache.poi.poifs.common.POIFSConstants;
-import org.apache.poi.util.IOUtils;
-import org.apache.xmlbeans.XmlException;
-import org.dom4j.Document;
-import org.dom4j.DocumentException;
-import org.dom4j.io.SAXReader;
-import org.openxml4j.exceptions.InvalidFormatException;
-import org.openxml4j.exceptions.OpenXML4JException;
-import org.openxml4j.opc.Package;
-import org.openxml4j.opc.PackageAccess;
-import org.openxml4j.opc.PackagePart;
-import org.openxml4j.opc.PackagePartName;
-import org.openxml4j.opc.PackageRelationship;
-import org.openxml4j.opc.PackageRelationshipCollection;
-import org.openxml4j.opc.PackagingURIHelper;
-import org.openxml4j.opc.RelationshipSource;
-import org.openxml4j.opc.internal.PackagePropertiesPart;
-import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties;
-import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument;
-
-/**
- * Parent class of the low level interface to
- * all POI XML (OOXML) implementations.
- * Normal users should probably deal with things that
- * extends {@link POIXMLDocument}, unless they really
- * do need to get low level access to the files.
- *
- * If you are using these low level classes, then you
- * will almost certainly need to refer to the OOXML
- * specifications from
- * http://www.ecma-international.org/publications/standards/Ecma-376.htm
- *
- * WARNING - APIs expected to change rapidly
- */
-public abstract class HXFDocument {
- public static final String CORE_PROPERTIES_REL_TYPE = "http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties";
- public static final String EXTENDED_PROPERTIES_REL_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties";
-
- /**
- * File package/container.
- */
- protected Package container;
- /**
- * The Package Part for our base document
- */
- protected PackagePart basePart;
- /**
- * The base document of this instance, eg Workbook for
- * xslsx
- */
- protected Document baseDocument;
-
- protected HXFDocument(Package container, String baseContentType) throws OpenXML4JException {
- this.container = container;
-
- // Find the base document
- basePart = getSinglePartByType(baseContentType);
-
- // And load it up
- try {
- SAXReader reader = new SAXReader();
- baseDocument = reader.read(basePart.getInputStream());
- } catch (DocumentException e) {
- throw new OpenXML4JException(e.getMessage());
- } catch (IOException ioe) {
- throw new OpenXML4JException(ioe.getMessage());
- }
- }
-
- /**
- * Checks that the supplied InputStream (which MUST
- * support mark and reset, or be a PushbackInputStream)
- * has a OOXML (zip) header at the start of it.
- * If your InputStream does not support mark / reset,
- * then wrap it in a PushBackInputStream, then be
- * sure to always use that, and not the original!
- * @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream
- */
- public static boolean hasOOXMLHeader(InputStream inp) throws IOException {
- // We want to peek at the first 4 bytes
- inp.mark(4);
-
- byte[] header = new byte[4];
- IOUtils.readFully(inp, header);
-
- // Wind back those 4 bytes
- if(inp instanceof PushbackInputStream) {
- PushbackInputStream pin = (PushbackInputStream)inp;
- pin.unread(header);
- } else {
- inp.reset();
- }
-
- // Did it match the ooxml zip signature?
- return (
- header[0] == POIFSConstants.OOXML_FILE_HEADER[0] &&
- header[1] == POIFSConstants.OOXML_FILE_HEADER[1] &&
- header[2] == POIFSConstants.OOXML_FILE_HEADER[2] &&
- header[3] == POIFSConstants.OOXML_FILE_HEADER[3]
- );
- }
-
- /**
- * Fetches the (single) PackagePart with the supplied
- * content type.
- * @param contentType The content type to search for
- * @throws IllegalArgumentException If we don't find a single part of that type
- */
- private PackagePart getSinglePartByType(String contentType) throws IllegalArgumentException {
- ArrayList<PackagePart> parts =
- container.getPartsByContentType(contentType);
- if(parts.size() != 1) {
- throw new IllegalArgumentException("Expecting one entry with content type of " + contentType + ", but found " + parts.size());
- }
- return parts.get(0);
- }
-
- /**
- * Fetches the (single) PackagePart which is defined as
- * the supplied relation content type of the specified part,
- * or null if none found.
- * @param relationType The relation content type to search for
- * @throws IllegalArgumentException If we find more than one part of that type
- */
- protected PackagePart getSinglePartByRelationType(String relationType, RelationshipSource part) throws IllegalArgumentException, OpenXML4JException {
- PackageRelationshipCollection rels =
- part.getRelationshipsByType(relationType);
- if(rels.size() == 0) {
- return null;
- }
- if(rels.size() > 1) {
- throw new IllegalArgumentException("Found " + rels.size() + " relations for the type " + relationType + ", should only ever be one!");
- }
- PackageRelationship rel = rels.getRelationship(0);
- return getPackagePart(rel);
- }
-
- /**
- * Fetches the (single) PackagePart which is defined as
- * the supplied relation content type of the base
- * container, or null if none found.
- * @param relationType The relation content type to search for
- * @throws IllegalArgumentException If we find more than one part of that type
- */
- protected PackagePart getSinglePartByRelationType(String relationType) throws IllegalArgumentException, OpenXML4JException {
- return getSinglePartByRelationType(relationType, container);
- }
-
- /**
- * Retrieves the PackagePart for the given relation
- * id. This will normally come from a r:id attribute
- * on part of the base document.
- * @param partId The r:id pointing to the other PackagePart
- */
- protected PackagePart getRelatedPackagePart(String partId) {
- PackageRelationship rel =
- basePart.getRelationship(partId);
- return getPackagePart(rel);
- }
-
- /**
- * Retrieves the PackagePart for the given Relationship
- * object. Normally you'll want to go via a content type
- * or r:id to get one of those.
- */
- protected PackagePart getPackagePart(PackageRelationship rel) {
- PackagePartName relName;
- try {
- relName = PackagingURIHelper.createPartName(rel.getTargetURI());
- } catch(InvalidFormatException e) {
- throw new InternalError(e.getMessage());
- }
-
- PackagePart part = container.getPart(relName);
- if(part == null) {
- throw new IllegalArgumentException("No part found for rel " + rel);
- }
- return part;
- }
-
- /**
- * Retrieves all the PackageParts which are defined as
- * relationships of the base document with the
- * specified content type.
- */
- protected PackagePart[] getRelatedByType(String contentType) throws InvalidFormatException {
- PackageRelationshipCollection partsC =
- basePart.getRelationshipsByType(contentType);
-
- PackagePart[] parts = new PackagePart[partsC.size()];
- int count = 0;
- for (PackageRelationship rel : partsC) {
- parts[count] = getPackagePart(rel);
- count++;
- }
- return parts;
- }
-
- /**
- * Get the package container.
- * @return The package associated to this document.
- */
- public Package getPackage() {
- return container;
- }
-
- /**
- * Get the core document properties (core ooxml properties).
- */
- public PackagePropertiesPart getCoreProperties() throws OpenXML4JException, XmlException, IOException {
- PackagePart propsPart = getSinglePartByRelationType(CORE_PROPERTIES_REL_TYPE);
- if(propsPart == null) {
- return null;
- }
- return (PackagePropertiesPart)propsPart;
- }
-
- /**
- * Get the extended document properties (extended ooxml properties)
- */
- public CTProperties getExtendedProperties() throws OpenXML4JException, XmlException, IOException {
- PackagePart propsPart = getSinglePartByRelationType(EXTENDED_PROPERTIES_REL_TYPE);
-
- PropertiesDocument props = PropertiesDocument.Factory.parse(
- propsPart.getInputStream());
- return props.getProperties();
- }
-
- /**
- * Returns an opened OOXML Package for the supplied File
- * @param f File to open
- */
- public static Package openPackage(File f) throws InvalidFormatException {
- return Package.open(f.toString(), PackageAccess.READ_WRITE);
- }
-}
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hssf;
-
-import java.io.File;
-
-import org.apache.poi.hssf.model.SharedStringsTable;
-import org.apache.poi.hxf.HXFDocument;
-import org.openxml4j.opc.Package;
-import org.openxml4j.opc.PackagePart;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
-
-import junit.framework.TestCase;
-
-public class TestHSSFXML extends TestCase {
- /**
- * Uses the old style schemas.microsoft.com schema uri
- */
- private File sampleFileBeta;
- /**
- * Uses the new style schemas.openxmlformats.org schema uri
- */
- private File sampleFile;
-
- protected void setUp() throws Exception {
- super.setUp();
-
- sampleFile = new File(
- System.getProperty("HSSF.testdata.path") +
- File.separator + "sample.xlsx"
- );
- sampleFileBeta = new File(
- System.getProperty("HSSF.testdata.path") +
- File.separator + "sample-beta.xlsx"
- );
- }
-
- public void testContainsMainContentType() throws Exception {
- Package pack = HXFDocument.openPackage(sampleFile);
-
- boolean found = false;
- for(PackagePart part : pack.getParts()) {
- if(part.getContentType().equals(HSSFXML.MAIN_CONTENT_TYPE)) {
- found = true;
- }
- System.out.println(part);
- }
- assertTrue(found);
- }
-
- public void testOpen() throws Exception {
- HXFDocument.openPackage(sampleFile);
- HXFDocument.openPackage(sampleFileBeta);
-
- HSSFXML xml;
-
- // With an old-style uri, as found in a file produced
- // with the office 2007 beta, will fail, as we don't
- // translate things
- try {
- xml = new HSSFXML(
- HXFDocument.openPackage(sampleFileBeta)
- );
- fail();
- } catch(Exception e) {}
-
- // With the finalised uri, should be fine
- xml = new HSSFXML(
- HXFDocument.openPackage(sampleFile)
- );
-
- // Check it has a workbook
- assertNotNull(xml.getWorkbook());
- }
-
- public void testSheetBasics() throws Exception {
- HSSFXML xml = new HSSFXML(
- HXFDocument.openPackage(sampleFile)
- );
-
- // Should have three sheets
- assertEquals(3, xml.getSheetReferences().sizeOfSheetArray());
- assertEquals(3, xml.getSheetReferences().getSheetArray().length);
-
- // Check they're as expected
- CTSheet[] sheets = xml.getSheetReferences().getSheetArray();
- assertEquals("Sheet1", sheets[0].getName());
- assertEquals("Sheet2", sheets[1].getName());
- assertEquals("Sheet3", sheets[2].getName());
- assertEquals("rId1", sheets[0].getId());
- assertEquals("rId2", sheets[1].getId());
- assertEquals("rId3", sheets[2].getId());
-
- // Now get those objects
- assertNotNull(xml.getSheet(sheets[0]));
- assertNotNull(xml.getSheet(sheets[1]));
- assertNotNull(xml.getSheet(sheets[2]));
- }
-
- public void testMetadataBasics() throws Exception {
- HSSFXML xml = new HSSFXML(
- HXFDocument.openPackage(sampleFile)
- );
- assertNotNull(xml.getCoreProperties());
- assertNotNull(xml.getExtendedProperties());
-
- assertEquals("Microsoft Excel", xml.getExtendedProperties().getApplication());
- assertEquals(0, xml.getExtendedProperties().getCharacters());
- assertEquals(0, xml.getExtendedProperties().getLines());
-
- assertEquals(null, xml.getCoreProperties().getTitleProperty().getValue());
- assertEquals(null, xml.getCoreProperties().getSubjectProperty().getValue());
- }
-
- public void testSharedStringBasics() throws Exception {
- HSSFXML xml = new HSSFXML(
- HXFDocument.openPackage(sampleFile)
- );
- assertNotNull(xml._getSharedStringsTable());
-
- SharedStringsTable sst = xml._getSharedStringsTable();
- assertEquals(10, sst.size());
-
- assertEquals("Lorem", sst.get(0));
- for(int i=0; i<sst.size(); i++) {
- assertEquals(sst.get(i), xml.getSharedString(i));
- }
-
- // Add a few more, then save and reload, checking
- // changes have been kept
- sst.add("Foo");
- sst.add("Bar");
- sst.set(0, "LoremLorem");
-
- sst.write();
-
- xml = new HSSFXML(xml.getPackage());
- sst = xml._getSharedStringsTable();
- assertEquals(12, sst.size());
-
- assertEquals("LoremLorem", sst.get(0));
- for(int i=0; i<sst.size(); i++) {
- assertEquals(sst.get(i), xml.getSharedString(i));
- }
- }
-}
\ No newline at end of file
+++ /dev/null
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.hssf.extractor;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-import junit.framework.TestCase;
-
-import org.apache.poi.POITextExtractor;
-import org.apache.poi.hssf.HSSFXML;
-import org.apache.poi.hssf.usermodel.HSSFWorkbook;
-import org.apache.poi.hssf.usermodel.HSSFXMLWorkbook;
-import org.apache.poi.hxf.HXFDocument;
-
-/**
- * Tests for HXFExcelExtractor
- */
-public class TestHXFExcelExtractor extends TestCase {
- /**
- * A very simple file
- */
- private HSSFXML xmlA;
- /**
- * A fairly complex file
- */
- private HSSFXML xmlB;
-
- /**
- * A fairly simple file - ooxml
- */
- private HSSFXML simpleXLSX;
- /**
- * A fairly simple file - ole2
- */
- private HSSFWorkbook simpleXLS;
-
- protected void setUp() throws Exception {
- super.setUp();
-
- File fileA = new File(
- System.getProperty("HSSF.testdata.path") +
- File.separator + "sample.xlsx"
- );
- File fileB = new File(
- System.getProperty("HSSF.testdata.path") +
- File.separator + "AverageTaxRates.xlsx"
- );
-
- File fileSOOXML = new File(
- System.getProperty("HSSF.testdata.path") +
- File.separator + "SampleSS.xlsx"
- );
- File fileSOLE2 = new File(
- System.getProperty("HSSF.testdata.path") +
- File.separator + "SampleSS.xls"
- );
-
- xmlA = new HSSFXML(HXFDocument.openPackage(fileA));
- xmlB = new HSSFXML(HXFDocument.openPackage(fileB));
-
- simpleXLSX = new HSSFXML(HXFDocument.openPackage(fileSOOXML));
- simpleXLS = new HSSFWorkbook(new FileInputStream(fileSOLE2));
- }
-
- /**
- * Get text out of the simple file
- */
- public void testGetSimpleText() throws Exception {
- new HXFExcelExtractor(xmlA.getPackage());
- new HXFExcelExtractor(new HSSFXMLWorkbook(xmlA));
-
- HXFExcelExtractor extractor =
- new HXFExcelExtractor(xmlA.getPackage());
- extractor.getText();
-
- String text = extractor.getText();
- assertTrue(text.length() > 0);
-
- // Check sheet names
- assertTrue(text.startsWith("Sheet1"));
- assertTrue(text.endsWith("Sheet3\n"));
-
- // Now without, will have text
- extractor.setIncludeSheetNames(false);
- text = extractor.getText();
- assertEquals(
- "Lorem\t111\n" +
- "ipsum\t222\n" +
- "dolor\t333\n" +
- "sit\t444\n" +
- "amet\t555\n" +
- "consectetuer\t666\n" +
- "adipiscing\t777\n" +
- "elit\t888\n" +
- "Nunc\t999\n" +
- "at\t4995\n" +
- "\n\n", text);
-
- // Now get formulas not their values
- extractor.setFormulasNotResults(true);
- text = extractor.getText();
- assertEquals(
- "Lorem\t111\n" +
- "ipsum\t222\n" +
- "dolor\t333\n" +
- "sit\t444\n" +
- "amet\t555\n" +
- "consectetuer\t666\n" +
- "adipiscing\t777\n" +
- "elit\t888\n" +
- "Nunc\t999\n" +
- "at\tSUM(B1:B9)\n" +
- "\n\n", text);
-
- // With sheet names too
- extractor.setIncludeSheetNames(true);
- text = extractor.getText();
- assertEquals(
- "Sheet1\n" +
- "Lorem\t111\n" +
- "ipsum\t222\n" +
- "dolor\t333\n" +
- "sit\t444\n" +
- "amet\t555\n" +
- "consectetuer\t666\n" +
- "adipiscing\t777\n" +
- "elit\t888\n" +
- "Nunc\t999\n" +
- "at\tSUM(B1:B9)\n\n" +
- "Sheet2\n\n" +
- "Sheet3\n"
- , text);
- }
-
- public void testGetComplexText() throws Exception {
- new HXFExcelExtractor(xmlB.getPackage());
- new HXFExcelExtractor(new HSSFXMLWorkbook(xmlB));
-
- HXFExcelExtractor extractor =
- new HXFExcelExtractor(xmlB.getPackage());
- extractor.getText();
-
- String text = extractor.getText();
- assertTrue(text.length() > 0);
-
- // Might not have all formatting it should do!
- // TODO decide if we should really have the "null" in there
- assertTrue(text.startsWith(
- "Avgtxfull\n" +
- "null\t(iii) AVERAGE TAX RATES ON ANNUAL"
- ));
- }
-
- /**
- * Test that we return pretty much the same as
- * ExcelExtractor does, when we're both passed
- * the same file, just saved as xls and xlsx
- */
- public void testComparedToOLE2() throws Exception {
- HXFExcelExtractor ooxmlExtractor =
- new HXFExcelExtractor(simpleXLSX.getPackage());
- ExcelExtractor ole2Extractor =
- new ExcelExtractor(simpleXLS);
-
- POITextExtractor[] extractors =
- new POITextExtractor[] { ooxmlExtractor, ole2Extractor };
- for (int i = 0; i < extractors.length; i++) {
- POITextExtractor extractor = extractors[i];
-
- String text = extractor.getText().replaceAll("[\r\t]", "");
- //System.out.println(text.length());
- //System.out.println(text);
- assertTrue(text.startsWith("First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n"));
- Pattern pattern = Pattern.compile(".*13(\\.0+)?\\s+Sheet3.*", Pattern.DOTALL);
- Matcher m = pattern.matcher(text);
- assertTrue(m.matches());
- }
- }
-}
+++ /dev/null
-
-/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-
-
-package org.apache.poi.hxf;
-
-import junit.framework.TestCase;
-import java.io.*;
-
-/**
- * Class to test that HXF correctly detects OOXML
- * documents
- */
-public class TestDetectAsOOXML extends TestCase
-{
- public String dirname;
-
- public void setUp() {
- dirname = System.getProperty("HSSF.testdata.path");
- }
-
- public void testOpensProperly() throws Exception
- {
- File f = new File(dirname + "/sample.xlsx");
-
- HXFDocument.openPackage(f);
- }
-
- public void testDetectAsPOIFS() throws Exception {
- InputStream in;
-
- // ooxml file is
- in = new PushbackInputStream(
- new FileInputStream(dirname + "/SampleSS.xlsx"), 10
- );
- assertTrue(HXFDocument.hasOOXMLHeader(in));
-
- // xls file isn't
- in = new PushbackInputStream(
- new FileInputStream(dirname + "/SampleSS.xls"), 10
- );
- assertFalse(HXFDocument.hasOOXMLHeader(in));
-
- // text file isn't
- in = new PushbackInputStream(
- new FileInputStream(dirname + "/SampleSS.txt"), 10
- );
- assertFalse(HXFDocument.hasOOXMLHeader(in));
- }
-}