From: Nick Burch Date: Sat, 8 Mar 2008 17:21:29 +0000 (+0000) Subject: More shuffling of things out of src/scratchpad/ooxml-* X-Git-Tag: REL_3_5_BETA2~200 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=2cc22cb45b19b057f6061feac08d18fdc0504d31;p=poi.git More shuffling of things out of src/scratchpad/ooxml-* git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@635021 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/ooxml/java/org/apache/poi/POIXMLDocument.java b/src/ooxml/java/org/apache/poi/POIXMLDocument.java index a9cceb525a..54b92e32de 100644 --- a/src/ooxml/java/org/apache/poi/POIXMLDocument.java +++ b/src/ooxml/java/org/apache/poi/POIXMLDocument.java @@ -17,7 +17,11 @@ package org.apache.poi; import java.io.IOException; +import java.io.InputStream; +import java.io.PushbackInputStream; +import org.apache.poi.poifs.common.POIFSConstants; +import org.apache.poi.util.IOUtils; import org.openxml4j.exceptions.InvalidFormatException; import org.openxml4j.exceptions.OpenXML4JException; import org.openxml4j.opc.Package; diff --git a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java new file mode 100644 index 0000000000..69361e7b4b --- /dev/null +++ b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFExcelExtractor.java @@ -0,0 +1,128 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.xssf.extractor; + +import java.io.File; +import java.io.IOException; + +import org.apache.poi.POIXMLTextExtractor; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.apache.xmlbeans.XmlException; +import org.openxml4j.exceptions.OpenXML4JException; +import org.openxml4j.opc.Package; +import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell; +import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRow; +import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet; +import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet; + +/** + * Helper class to extract text from an OOXML Excel file + */ +public class XSSFExcelExtractor extends POIXMLTextExtractor { + private XSSFWorkbook workbook; + private boolean includeSheetNames = true; + private boolean formulasNotResults = false; + + public XSSFExcelExtractor(Package container) throws XmlException, OpenXML4JException, IOException { + this(new XSSFWorkbook(container)); + } + public XSSFExcelExtractor(XSSFWorkbook workbook) { + super(workbook); + this.workbook = workbook; + } + + public static void main(String[] args) throws Exception { + if(args.length < 1) { + System.err.println("Use:"); + System.err.println(" HXFExcelExtractor "); + System.exit(1); + } + POIXMLTextExtractor extractor = + new HXFExcelExtractor(HXFDocument.openPackage( + new File(args[0]) + )); + System.out.println(extractor.getText()); + } + + /** + * Should sheet names be included? Default is true + */ + public void setIncludeSheetNames(boolean includeSheetNames) { + this.includeSheetNames = includeSheetNames; + } + /** + * Should we return the formula itself, and not + * the result it produces? Default is false + */ + public void setFormulasNotResults(boolean formulasNotResults) { + this.formulasNotResults = formulasNotResults; + } + + /** + * Retreives the text contents of the file + */ + public String getText() { + StringBuffer text = new StringBuffer(); + + CTSheet[] sheetRefs = + workbook._getHSSFXML().getSheetReferences().getSheetArray(); + for(int i=0; i 0) { + text.append("\n"); + } + if(includeSheetNames) { + text.append(sheetRefs[i].getName() + "\n"); + } + + for(int j=0; j 0) { + text.append("\t"); + } + + boolean done = false; + + // Is it a formula one? + if(cell.getF() != null) { + if(formulasNotResults) { + text.append(cell.getF().getStringValue()); + done = true; + } + } + if(!done) { + HSSFXMLCell uCell = new HSSFXMLCell(cell, workbook); + text.append(uCell.getStringValue()); + } + } + text.append("\n"); + } + } catch(Exception e) { + throw new RuntimeException(e); + } + } + + return text.toString(); + } +} diff --git a/src/ooxml/testcases/org/apache/poi/TestDetectAsOOXML.java b/src/ooxml/testcases/org/apache/poi/TestDetectAsOOXML.java new file mode 100644 index 0000000000..36adb497cd --- /dev/null +++ b/src/ooxml/testcases/org/apache/poi/TestDetectAsOOXML.java @@ -0,0 +1,65 @@ + +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + + +package org.apache.poi.hxf; + +import junit.framework.TestCase; +import java.io.*; + +/** + * Class to test that HXF correctly detects OOXML + * documents + */ +public class TestDetectAsOOXML extends TestCase +{ + public String dirname; + + public void setUp() { + dirname = System.getProperty("HSSF.testdata.path"); + } + + public void testOpensProperly() throws Exception + { + File f = new File(dirname + "/sample.xlsx"); + + HXFDocument.openPackage(f); + } + + public void testDetectAsPOIFS() throws Exception { + InputStream in; + + // ooxml file is + in = new PushbackInputStream( + new FileInputStream(dirname + "/SampleSS.xlsx"), 10 + ); + assertTrue(HXFDocument.hasOOXMLHeader(in)); + + // xls file isn't + in = new PushbackInputStream( + new FileInputStream(dirname + "/SampleSS.xls"), 10 + ); + assertFalse(HXFDocument.hasOOXMLHeader(in)); + + // text file isn't + in = new PushbackInputStream( + new FileInputStream(dirname + "/SampleSS.txt"), 10 + ); + assertFalse(HXFDocument.hasOOXMLHeader(in)); + } +} diff --git a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TextXSSFExcelExtractor.java b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TextXSSFExcelExtractor.java new file mode 100644 index 0000000000..a73b60bf72 --- /dev/null +++ b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TextXSSFExcelExtractor.java @@ -0,0 +1,196 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hssf.extractor; + +import java.io.File; +import java.io.FileInputStream; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import junit.framework.TestCase; + +import org.apache.poi.POITextExtractor; +import org.apache.poi.hssf.HSSFXML; +import org.apache.poi.hssf.usermodel.HSSFWorkbook; +import org.apache.poi.hssf.usermodel.HSSFXMLWorkbook; +import org.apache.poi.hxf.HXFDocument; + +/** + * Tests for HXFExcelExtractor + */ +public class TestHXFExcelExtractor extends TestCase { + /** + * A very simple file + */ + private HSSFXML xmlA; + /** + * A fairly complex file + */ + private HSSFXML xmlB; + + /** + * A fairly simple file - ooxml + */ + private HSSFXML simpleXLSX; + /** + * A fairly simple file - ole2 + */ + private HSSFWorkbook simpleXLS; + + protected void setUp() throws Exception { + super.setUp(); + + File fileA = new File( + System.getProperty("HSSF.testdata.path") + + File.separator + "sample.xlsx" + ); + File fileB = new File( + System.getProperty("HSSF.testdata.path") + + File.separator + "AverageTaxRates.xlsx" + ); + + File fileSOOXML = new File( + System.getProperty("HSSF.testdata.path") + + File.separator + "SampleSS.xlsx" + ); + File fileSOLE2 = new File( + System.getProperty("HSSF.testdata.path") + + File.separator + "SampleSS.xls" + ); + + xmlA = new HSSFXML(HXFDocument.openPackage(fileA)); + xmlB = new HSSFXML(HXFDocument.openPackage(fileB)); + + simpleXLSX = new HSSFXML(HXFDocument.openPackage(fileSOOXML)); + simpleXLS = new HSSFWorkbook(new FileInputStream(fileSOLE2)); + } + + /** + * Get text out of the simple file + */ + public void testGetSimpleText() throws Exception { + new HXFExcelExtractor(xmlA.getPackage()); + new HXFExcelExtractor(new HSSFXMLWorkbook(xmlA)); + + HXFExcelExtractor extractor = + new HXFExcelExtractor(xmlA.getPackage()); + extractor.getText(); + + String text = extractor.getText(); + assertTrue(text.length() > 0); + + // Check sheet names + assertTrue(text.startsWith("Sheet1")); + assertTrue(text.endsWith("Sheet3\n")); + + // Now without, will have text + extractor.setIncludeSheetNames(false); + text = extractor.getText(); + assertEquals( + "Lorem\t111\n" + + "ipsum\t222\n" + + "dolor\t333\n" + + "sit\t444\n" + + "amet\t555\n" + + "consectetuer\t666\n" + + "adipiscing\t777\n" + + "elit\t888\n" + + "Nunc\t999\n" + + "at\t4995\n" + + "\n\n", text); + + // Now get formulas not their values + extractor.setFormulasNotResults(true); + text = extractor.getText(); + assertEquals( + "Lorem\t111\n" + + "ipsum\t222\n" + + "dolor\t333\n" + + "sit\t444\n" + + "amet\t555\n" + + "consectetuer\t666\n" + + "adipiscing\t777\n" + + "elit\t888\n" + + "Nunc\t999\n" + + "at\tSUM(B1:B9)\n" + + "\n\n", text); + + // With sheet names too + extractor.setIncludeSheetNames(true); + text = extractor.getText(); + assertEquals( + "Sheet1\n" + + "Lorem\t111\n" + + "ipsum\t222\n" + + "dolor\t333\n" + + "sit\t444\n" + + "amet\t555\n" + + "consectetuer\t666\n" + + "adipiscing\t777\n" + + "elit\t888\n" + + "Nunc\t999\n" + + "at\tSUM(B1:B9)\n\n" + + "Sheet2\n\n" + + "Sheet3\n" + , text); + } + + public void testGetComplexText() throws Exception { + new HXFExcelExtractor(xmlB.getPackage()); + new HXFExcelExtractor(new HSSFXMLWorkbook(xmlB)); + + HXFExcelExtractor extractor = + new HXFExcelExtractor(xmlB.getPackage()); + extractor.getText(); + + String text = extractor.getText(); + assertTrue(text.length() > 0); + + // Might not have all formatting it should do! + // TODO decide if we should really have the "null" in there + assertTrue(text.startsWith( + "Avgtxfull\n" + + "null\t(iii) AVERAGE TAX RATES ON ANNUAL" + )); + } + + /** + * Test that we return pretty much the same as + * ExcelExtractor does, when we're both passed + * the same file, just saved as xls and xlsx + */ + public void testComparedToOLE2() throws Exception { + HXFExcelExtractor ooxmlExtractor = + new HXFExcelExtractor(simpleXLSX.getPackage()); + ExcelExtractor ole2Extractor = + new ExcelExtractor(simpleXLS); + + POITextExtractor[] extractors = + new POITextExtractor[] { ooxmlExtractor, ole2Extractor }; + for (int i = 0; i < extractors.length; i++) { + POITextExtractor extractor = extractors[i]; + + String text = extractor.getText().replaceAll("[\r\t]", ""); + //System.out.println(text.length()); + //System.out.println(text); + assertTrue(text.startsWith("First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n")); + Pattern pattern = Pattern.compile(".*13(\\.0+)?\\s+Sheet3.*", Pattern.DOTALL); + Matcher m = pattern.matcher(text); + assertTrue(m.matches()); + } + } +} diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hssf/HSSFXML.java b/src/scratchpad/ooxml-src/org/apache/poi/hssf/HSSFXML.java deleted file mode 100644 index 3766a046a1..0000000000 --- a/src/scratchpad/ooxml-src/org/apache/poi/hssf/HSSFXML.java +++ /dev/null @@ -1,104 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hssf; - -import java.io.IOException; - -import org.apache.poi.hssf.model.SharedStringsTable; -import org.apache.poi.hxf.HXFDocument; -import org.apache.xmlbeans.XmlException; -import org.openxml4j.exceptions.OpenXML4JException; -import org.openxml4j.opc.Package; -import org.openxml4j.opc.PackagePart; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheets; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbook; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorkbookDocument; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorksheetDocument; - -/** - * Experimental class to do low level processing - * of xlsx files. - * - * If you are using these low level classes, then you - * will almost certainly need to refer to the OOXML - * specifications from - * http://www.ecma-international.org/publications/standards/Ecma-376.htm - * - * WARNING - APIs expected to change rapidly - */ -public class HSSFXML extends HXFDocument { - public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml"; - public static final String SHEET_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml"; - public static final String SHARED_STRINGS_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml"; - public static final String SHARED_STRINGS_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/sharedStrings"; - - private WorkbookDocument workbookDoc; - private SharedStringsTable sharedStrings; - - public HSSFXML(Package container) throws OpenXML4JException, IOException, XmlException { - super(container, MAIN_CONTENT_TYPE); - - workbookDoc = - WorkbookDocument.Factory.parse(basePart.getInputStream()); - - PackagePart ssPart = getSinglePartByRelationType(SHARED_STRINGS_RELATION_TYPE, basePart); - if (ssPart != null) { - sharedStrings = new SharedStringsTable(ssPart); - } else { - - } - } - - /** - * Returns the low level workbook base object - */ - public CTWorkbook getWorkbook() { - return workbookDoc.getWorkbook(); - } - /** - * Returns the references from the workbook to its - * sheets. - * You'll need these to figure out the sheet ordering, - * and to get at the actual sheets themselves - */ - public CTSheets getSheetReferences() { - return getWorkbook().getSheets(); - } - /** - * Returns the low level (work)sheet object from - * the supplied sheet reference - */ - public CTWorksheet getSheet(CTSheet sheet) throws IOException, XmlException { - PackagePart sheetPart = - getRelatedPackagePart(sheet.getId()); - WorksheetDocument sheetDoc = - WorksheetDocument.Factory.parse(sheetPart.getInputStream()); - return sheetDoc.getWorksheet(); - } - - /** - * Returns the shared string at the given index - */ - public String getSharedString(int index) { - return this.sharedStrings.get(index); - } - protected SharedStringsTable _getSharedStringsTable() { - return sharedStrings; - } -} diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java b/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java deleted file mode 100644 index 34ae068005..0000000000 --- a/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java +++ /dev/null @@ -1,133 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hssf.extractor; - -import java.io.File; -import java.io.IOException; - -import org.apache.poi.POIXMLTextExtractor; -import org.apache.poi.hssf.HSSFXML; -import org.apache.poi.hssf.usermodel.HSSFXMLCell; -import org.apache.poi.hssf.usermodel.HSSFXMLWorkbook; -import org.apache.poi.hxf.HXFDocument; -import org.apache.xmlbeans.XmlException; -import org.openxml4j.exceptions.OpenXML4JException; -import org.openxml4j.opc.Package; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRow; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet; - -/** - * Helper class to extract text from an OOXML Excel file - */ -public class HXFExcelExtractor extends POIXMLTextExtractor { - private HSSFXMLWorkbook workbook; - private boolean includeSheetNames = true; - private boolean formulasNotResults = false; - - public HXFExcelExtractor(Package container) throws XmlException, OpenXML4JException, IOException { - this(new HSSFXMLWorkbook( - new HSSFXML(container) - )); - } - public HXFExcelExtractor(HSSFXMLWorkbook workbook) { - super(workbook); - this.workbook = workbook; - } - - public static void main(String[] args) throws Exception { - if(args.length < 1) { - System.err.println("Use:"); - System.err.println(" HXFExcelExtractor "); - System.exit(1); - } - POIXMLTextExtractor extractor = - new HXFExcelExtractor(HXFDocument.openPackage( - new File(args[0]) - )); - System.out.println(extractor.getText()); - } - - /** - * Should sheet names be included? Default is true - */ - public void setIncludeSheetNames(boolean includeSheetNames) { - this.includeSheetNames = includeSheetNames; - } - /** - * Should we return the formula itself, and not - * the result it produces? Default is false - */ - public void setFormulasNotResults(boolean formulasNotResults) { - this.formulasNotResults = formulasNotResults; - } - - /** - * Retreives the text contents of the file - */ - public String getText() { - StringBuffer text = new StringBuffer(); - - CTSheet[] sheetRefs = - workbook._getHSSFXML().getSheetReferences().getSheetArray(); - for(int i=0; i 0) { - text.append("\n"); - } - if(includeSheetNames) { - text.append(sheetRefs[i].getName() + "\n"); - } - - for(int j=0; j 0) { - text.append("\t"); - } - - boolean done = false; - - // Is it a formula one? - if(cell.getF() != null) { - if(formulasNotResults) { - text.append(cell.getF().getStringValue()); - done = true; - } - } - if(!done) { - HSSFXMLCell uCell = new HSSFXMLCell(cell, workbook); - text.append(uCell.getStringValue()); - } - } - text.append("\n"); - } - } catch(Exception e) { - throw new RuntimeException(e); - } - } - - return text.toString(); - } -} diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hssf/model/SharedStringsTable.java b/src/scratchpad/ooxml-src/org/apache/poi/hssf/model/SharedStringsTable.java deleted file mode 100644 index b3e2192566..0000000000 --- a/src/scratchpad/ooxml-src/org/apache/poi/hssf/model/SharedStringsTable.java +++ /dev/null @@ -1,78 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - -package org.apache.poi.hssf.model; - -import java.io.IOException; -import java.io.OutputStream; -import java.util.LinkedList; - -import org.apache.xmlbeans.XmlException; -import org.openxml4j.opc.PackagePart; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRst; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSst; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.SstDocument; - - -public class SharedStringsTable extends LinkedList { - public static final String MAIN_SML_NS_URI = "http://schemas.openxmlformats.org/spreadsheetml/2006/main"; - - private SstDocument doc; - private PackagePart part; - - public SharedStringsTable(PackagePart part) throws IOException, XmlException { - this.part = part; - doc = SstDocument.Factory.parse( - part.getInputStream() - ); - read(); - } - - private void read() { - CTRst[] sts = doc.getSst().getSiArray(); - for (int i = 0; i < sts.length; i++) { - add(sts[i].getT()); - } - } - - /** - * Writes the current shared strings table into - * the associated OOXML PackagePart - */ - public void write() throws IOException { - CTSst sst = doc.getSst(); - - // Remove the old list - for(int i=sst.sizeOfSiArray() - 1; i>=0; i--) { - sst.removeSi(i); - } - - // Add the new one - for(String s : this) { - sst.addNewSi().setT(s); - } - - // Update the counts - sst.setCount(this.size()); - sst.setUniqueCount(this.size()); - - // Write out - OutputStream out = part.getOutputStream(); - doc.save(out); - out.close(); - } -} diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java b/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java deleted file mode 100644 index b24556cd8e..0000000000 --- a/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLCell.java +++ /dev/null @@ -1,58 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hssf.usermodel; - -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.STCellType; - -/** - * User facing wrapper around an underlying cell object - */ -public class HSSFXMLCell { - private CTCell cell; - - /** The workbook to which this cell belongs */ - private final HSSFXMLWorkbook workbook; - - public HSSFXMLCell(CTCell rawCell, HSSFXMLWorkbook workbook) { - this.cell = rawCell; - this.workbook = workbook; - } - - /** - * Formats the cell's contents, based on its type, - * and returns it as a string. - */ - public String getStringValue() { - - switch (cell.getT().intValue()) { - case STCellType.INT_S: - return this.workbook.getSharedString(Integer.valueOf(cell.getV())); - case STCellType.INT_INLINE_STR: - return cell.getV(); - case STCellType.INT_N: - return cell.getV(); - // TODO: support other types - default: - return "UNSUPPORTED CELL TYPE: '" + cell.getT() + "'"; - } - } - - public String toString() { - return cell.getR() + " - " + getStringValue(); - } -} diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLWorkbook.java b/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLWorkbook.java deleted file mode 100644 index 023b80f4d1..0000000000 --- a/src/scratchpad/ooxml-src/org/apache/poi/hssf/usermodel/HSSFXMLWorkbook.java +++ /dev/null @@ -1,43 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hssf.usermodel; - -import org.apache.poi.POIXMLDocument; -import org.apache.poi.hssf.HSSFXML; - -/** - * High level representation of a ooxml workbook. - * This is the first object most users will construct whether - * they are reading or writing a workbook. It is also the - * top level object for creating new sheets/etc. - */ -public class HSSFXMLWorkbook extends POIXMLDocument { - private HSSFXML hssfXML; - - public HSSFXMLWorkbook(HSSFXML xml) { - super(xml); - this.hssfXML = xml; - } - - public HSSFXML _getHSSFXML() { - return hssfXML; - } - - public String getSharedString(int index) { - return hssfXML.getSharedString(index); - } -} diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hxf/HXFDocument.java b/src/scratchpad/ooxml-src/org/apache/poi/hxf/HXFDocument.java deleted file mode 100644 index 2e2900f082..0000000000 --- a/src/scratchpad/ooxml-src/org/apache/poi/hxf/HXFDocument.java +++ /dev/null @@ -1,261 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hxf; - -import java.io.File; -import java.io.IOException; -import java.io.InputStream; -import java.io.PushbackInputStream; -import java.util.ArrayList; - -import org.apache.poi.POIXMLDocument; -import org.apache.poi.poifs.common.POIFSConstants; -import org.apache.poi.util.IOUtils; -import org.apache.xmlbeans.XmlException; -import org.dom4j.Document; -import org.dom4j.DocumentException; -import org.dom4j.io.SAXReader; -import org.openxml4j.exceptions.InvalidFormatException; -import org.openxml4j.exceptions.OpenXML4JException; -import org.openxml4j.opc.Package; -import org.openxml4j.opc.PackageAccess; -import org.openxml4j.opc.PackagePart; -import org.openxml4j.opc.PackagePartName; -import org.openxml4j.opc.PackageRelationship; -import org.openxml4j.opc.PackageRelationshipCollection; -import org.openxml4j.opc.PackagingURIHelper; -import org.openxml4j.opc.RelationshipSource; -import org.openxml4j.opc.internal.PackagePropertiesPart; -import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties; -import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument; - -/** - * Parent class of the low level interface to - * all POI XML (OOXML) implementations. - * Normal users should probably deal with things that - * extends {@link POIXMLDocument}, unless they really - * do need to get low level access to the files. - * - * If you are using these low level classes, then you - * will almost certainly need to refer to the OOXML - * specifications from - * http://www.ecma-international.org/publications/standards/Ecma-376.htm - * - * WARNING - APIs expected to change rapidly - */ -public abstract class HXFDocument { - public static final String CORE_PROPERTIES_REL_TYPE = "http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties"; - public static final String EXTENDED_PROPERTIES_REL_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties"; - - /** - * File package/container. - */ - protected Package container; - /** - * The Package Part for our base document - */ - protected PackagePart basePart; - /** - * The base document of this instance, eg Workbook for - * xslsx - */ - protected Document baseDocument; - - protected HXFDocument(Package container, String baseContentType) throws OpenXML4JException { - this.container = container; - - // Find the base document - basePart = getSinglePartByType(baseContentType); - - // And load it up - try { - SAXReader reader = new SAXReader(); - baseDocument = reader.read(basePart.getInputStream()); - } catch (DocumentException e) { - throw new OpenXML4JException(e.getMessage()); - } catch (IOException ioe) { - throw new OpenXML4JException(ioe.getMessage()); - } - } - - /** - * Checks that the supplied InputStream (which MUST - * support mark and reset, or be a PushbackInputStream) - * has a OOXML (zip) header at the start of it. - * If your InputStream does not support mark / reset, - * then wrap it in a PushBackInputStream, then be - * sure to always use that, and not the original! - * @param inp An InputStream which supports either mark/reset, or is a PushbackInputStream - */ - public static boolean hasOOXMLHeader(InputStream inp) throws IOException { - // We want to peek at the first 4 bytes - inp.mark(4); - - byte[] header = new byte[4]; - IOUtils.readFully(inp, header); - - // Wind back those 4 bytes - if(inp instanceof PushbackInputStream) { - PushbackInputStream pin = (PushbackInputStream)inp; - pin.unread(header); - } else { - inp.reset(); - } - - // Did it match the ooxml zip signature? - return ( - header[0] == POIFSConstants.OOXML_FILE_HEADER[0] && - header[1] == POIFSConstants.OOXML_FILE_HEADER[1] && - header[2] == POIFSConstants.OOXML_FILE_HEADER[2] && - header[3] == POIFSConstants.OOXML_FILE_HEADER[3] - ); - } - - /** - * Fetches the (single) PackagePart with the supplied - * content type. - * @param contentType The content type to search for - * @throws IllegalArgumentException If we don't find a single part of that type - */ - private PackagePart getSinglePartByType(String contentType) throws IllegalArgumentException { - ArrayList parts = - container.getPartsByContentType(contentType); - if(parts.size() != 1) { - throw new IllegalArgumentException("Expecting one entry with content type of " + contentType + ", but found " + parts.size()); - } - return parts.get(0); - } - - /** - * Fetches the (single) PackagePart which is defined as - * the supplied relation content type of the specified part, - * or null if none found. - * @param relationType The relation content type to search for - * @throws IllegalArgumentException If we find more than one part of that type - */ - protected PackagePart getSinglePartByRelationType(String relationType, RelationshipSource part) throws IllegalArgumentException, OpenXML4JException { - PackageRelationshipCollection rels = - part.getRelationshipsByType(relationType); - if(rels.size() == 0) { - return null; - } - if(rels.size() > 1) { - throw new IllegalArgumentException("Found " + rels.size() + " relations for the type " + relationType + ", should only ever be one!"); - } - PackageRelationship rel = rels.getRelationship(0); - return getPackagePart(rel); - } - - /** - * Fetches the (single) PackagePart which is defined as - * the supplied relation content type of the base - * container, or null if none found. - * @param relationType The relation content type to search for - * @throws IllegalArgumentException If we find more than one part of that type - */ - protected PackagePart getSinglePartByRelationType(String relationType) throws IllegalArgumentException, OpenXML4JException { - return getSinglePartByRelationType(relationType, container); - } - - /** - * Retrieves the PackagePart for the given relation - * id. This will normally come from a r:id attribute - * on part of the base document. - * @param partId The r:id pointing to the other PackagePart - */ - protected PackagePart getRelatedPackagePart(String partId) { - PackageRelationship rel = - basePart.getRelationship(partId); - return getPackagePart(rel); - } - - /** - * Retrieves the PackagePart for the given Relationship - * object. Normally you'll want to go via a content type - * or r:id to get one of those. - */ - protected PackagePart getPackagePart(PackageRelationship rel) { - PackagePartName relName; - try { - relName = PackagingURIHelper.createPartName(rel.getTargetURI()); - } catch(InvalidFormatException e) { - throw new InternalError(e.getMessage()); - } - - PackagePart part = container.getPart(relName); - if(part == null) { - throw new IllegalArgumentException("No part found for rel " + rel); - } - return part; - } - - /** - * Retrieves all the PackageParts which are defined as - * relationships of the base document with the - * specified content type. - */ - protected PackagePart[] getRelatedByType(String contentType) throws InvalidFormatException { - PackageRelationshipCollection partsC = - basePart.getRelationshipsByType(contentType); - - PackagePart[] parts = new PackagePart[partsC.size()]; - int count = 0; - for (PackageRelationship rel : partsC) { - parts[count] = getPackagePart(rel); - count++; - } - return parts; - } - - /** - * Get the package container. - * @return The package associated to this document. - */ - public Package getPackage() { - return container; - } - - /** - * Get the core document properties (core ooxml properties). - */ - public PackagePropertiesPart getCoreProperties() throws OpenXML4JException, XmlException, IOException { - PackagePart propsPart = getSinglePartByRelationType(CORE_PROPERTIES_REL_TYPE); - if(propsPart == null) { - return null; - } - return (PackagePropertiesPart)propsPart; - } - - /** - * Get the extended document properties (extended ooxml properties) - */ - public CTProperties getExtendedProperties() throws OpenXML4JException, XmlException, IOException { - PackagePart propsPart = getSinglePartByRelationType(EXTENDED_PROPERTIES_REL_TYPE); - - PropertiesDocument props = PropertiesDocument.Factory.parse( - propsPart.getInputStream()); - return props.getProperties(); - } - - /** - * Returns an opened OOXML Package for the supplied File - * @param f File to open - */ - public static Package openPackage(File f) throws InvalidFormatException { - return Package.open(f.toString(), PackageAccess.READ_WRITE); - } -} diff --git a/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/TestHSSFXML.java b/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/TestHSSFXML.java deleted file mode 100644 index 97453265c4..0000000000 --- a/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/TestHSSFXML.java +++ /dev/null @@ -1,160 +0,0 @@ -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ -package org.apache.poi.hssf; - -import java.io.File; - -import org.apache.poi.hssf.model.SharedStringsTable; -import org.apache.poi.hxf.HXFDocument; -import org.openxml4j.opc.Package; -import org.openxml4j.opc.PackagePart; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet; - -import junit.framework.TestCase; - -public class TestHSSFXML extends TestCase { - /** - * Uses the old style schemas.microsoft.com schema uri - */ - private File sampleFileBeta; - /** - * Uses the new style schemas.openxmlformats.org schema uri - */ - private File sampleFile; - - protected void setUp() throws Exception { - super.setUp(); - - sampleFile = new File( - System.getProperty("HSSF.testdata.path") + - File.separator + "sample.xlsx" - ); - sampleFileBeta = new File( - System.getProperty("HSSF.testdata.path") + - File.separator + "sample-beta.xlsx" - ); - } - - public void testContainsMainContentType() throws Exception { - Package pack = HXFDocument.openPackage(sampleFile); - - boolean found = false; - for(PackagePart part : pack.getParts()) { - if(part.getContentType().equals(HSSFXML.MAIN_CONTENT_TYPE)) { - found = true; - } - System.out.println(part); - } - assertTrue(found); - } - - public void testOpen() throws Exception { - HXFDocument.openPackage(sampleFile); - HXFDocument.openPackage(sampleFileBeta); - - HSSFXML xml; - - // With an old-style uri, as found in a file produced - // with the office 2007 beta, will fail, as we don't - // translate things - try { - xml = new HSSFXML( - HXFDocument.openPackage(sampleFileBeta) - ); - fail(); - } catch(Exception e) {} - - // With the finalised uri, should be fine - xml = new HSSFXML( - HXFDocument.openPackage(sampleFile) - ); - - // Check it has a workbook - assertNotNull(xml.getWorkbook()); - } - - public void testSheetBasics() throws Exception { - HSSFXML xml = new HSSFXML( - HXFDocument.openPackage(sampleFile) - ); - - // Should have three sheets - assertEquals(3, xml.getSheetReferences().sizeOfSheetArray()); - assertEquals(3, xml.getSheetReferences().getSheetArray().length); - - // Check they're as expected - CTSheet[] sheets = xml.getSheetReferences().getSheetArray(); - assertEquals("Sheet1", sheets[0].getName()); - assertEquals("Sheet2", sheets[1].getName()); - assertEquals("Sheet3", sheets[2].getName()); - assertEquals("rId1", sheets[0].getId()); - assertEquals("rId2", sheets[1].getId()); - assertEquals("rId3", sheets[2].getId()); - - // Now get those objects - assertNotNull(xml.getSheet(sheets[0])); - assertNotNull(xml.getSheet(sheets[1])); - assertNotNull(xml.getSheet(sheets[2])); - } - - public void testMetadataBasics() throws Exception { - HSSFXML xml = new HSSFXML( - HXFDocument.openPackage(sampleFile) - ); - assertNotNull(xml.getCoreProperties()); - assertNotNull(xml.getExtendedProperties()); - - assertEquals("Microsoft Excel", xml.getExtendedProperties().getApplication()); - assertEquals(0, xml.getExtendedProperties().getCharacters()); - assertEquals(0, xml.getExtendedProperties().getLines()); - - assertEquals(null, xml.getCoreProperties().getTitleProperty().getValue()); - assertEquals(null, xml.getCoreProperties().getSubjectProperty().getValue()); - } - - public void testSharedStringBasics() throws Exception { - HSSFXML xml = new HSSFXML( - HXFDocument.openPackage(sampleFile) - ); - assertNotNull(xml._getSharedStringsTable()); - - SharedStringsTable sst = xml._getSharedStringsTable(); - assertEquals(10, sst.size()); - - assertEquals("Lorem", sst.get(0)); - for(int i=0; i 0); - - // Check sheet names - assertTrue(text.startsWith("Sheet1")); - assertTrue(text.endsWith("Sheet3\n")); - - // Now without, will have text - extractor.setIncludeSheetNames(false); - text = extractor.getText(); - assertEquals( - "Lorem\t111\n" + - "ipsum\t222\n" + - "dolor\t333\n" + - "sit\t444\n" + - "amet\t555\n" + - "consectetuer\t666\n" + - "adipiscing\t777\n" + - "elit\t888\n" + - "Nunc\t999\n" + - "at\t4995\n" + - "\n\n", text); - - // Now get formulas not their values - extractor.setFormulasNotResults(true); - text = extractor.getText(); - assertEquals( - "Lorem\t111\n" + - "ipsum\t222\n" + - "dolor\t333\n" + - "sit\t444\n" + - "amet\t555\n" + - "consectetuer\t666\n" + - "adipiscing\t777\n" + - "elit\t888\n" + - "Nunc\t999\n" + - "at\tSUM(B1:B9)\n" + - "\n\n", text); - - // With sheet names too - extractor.setIncludeSheetNames(true); - text = extractor.getText(); - assertEquals( - "Sheet1\n" + - "Lorem\t111\n" + - "ipsum\t222\n" + - "dolor\t333\n" + - "sit\t444\n" + - "amet\t555\n" + - "consectetuer\t666\n" + - "adipiscing\t777\n" + - "elit\t888\n" + - "Nunc\t999\n" + - "at\tSUM(B1:B9)\n\n" + - "Sheet2\n\n" + - "Sheet3\n" - , text); - } - - public void testGetComplexText() throws Exception { - new HXFExcelExtractor(xmlB.getPackage()); - new HXFExcelExtractor(new HSSFXMLWorkbook(xmlB)); - - HXFExcelExtractor extractor = - new HXFExcelExtractor(xmlB.getPackage()); - extractor.getText(); - - String text = extractor.getText(); - assertTrue(text.length() > 0); - - // Might not have all formatting it should do! - // TODO decide if we should really have the "null" in there - assertTrue(text.startsWith( - "Avgtxfull\n" + - "null\t(iii) AVERAGE TAX RATES ON ANNUAL" - )); - } - - /** - * Test that we return pretty much the same as - * ExcelExtractor does, when we're both passed - * the same file, just saved as xls and xlsx - */ - public void testComparedToOLE2() throws Exception { - HXFExcelExtractor ooxmlExtractor = - new HXFExcelExtractor(simpleXLSX.getPackage()); - ExcelExtractor ole2Extractor = - new ExcelExtractor(simpleXLS); - - POITextExtractor[] extractors = - new POITextExtractor[] { ooxmlExtractor, ole2Extractor }; - for (int i = 0; i < extractors.length; i++) { - POITextExtractor extractor = extractors[i]; - - String text = extractor.getText().replaceAll("[\r\t]", ""); - //System.out.println(text.length()); - //System.out.println(text); - assertTrue(text.startsWith("First Sheet\nTest spreadsheet\n2nd row2nd row 2nd column\n")); - Pattern pattern = Pattern.compile(".*13(\\.0+)?\\s+Sheet3.*", Pattern.DOTALL); - Matcher m = pattern.matcher(text); - assertTrue(m.matches()); - } - } -} diff --git a/src/scratchpad/ooxml-testcases/org/apache/poi/hxf/TestDetectAsOOXML.java b/src/scratchpad/ooxml-testcases/org/apache/poi/hxf/TestDetectAsOOXML.java deleted file mode 100644 index 36adb497cd..0000000000 --- a/src/scratchpad/ooxml-testcases/org/apache/poi/hxf/TestDetectAsOOXML.java +++ /dev/null @@ -1,65 +0,0 @@ - -/* ==================================================================== - Licensed to the Apache Software Foundation (ASF) under one or more - contributor license agreements. See the NOTICE file distributed with - this work for additional information regarding copyright ownership. - The ASF licenses this file to You under the Apache License, Version 2.0 - (the "License"); you may not use this file except in compliance with - the License. You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - - Unless required by applicable law or agreed to in writing, software - distributed under the License is distributed on an "AS IS" BASIS, - WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - See the License for the specific language governing permissions and - limitations under the License. -==================================================================== */ - - -package org.apache.poi.hxf; - -import junit.framework.TestCase; -import java.io.*; - -/** - * Class to test that HXF correctly detects OOXML - * documents - */ -public class TestDetectAsOOXML extends TestCase -{ - public String dirname; - - public void setUp() { - dirname = System.getProperty("HSSF.testdata.path"); - } - - public void testOpensProperly() throws Exception - { - File f = new File(dirname + "/sample.xlsx"); - - HXFDocument.openPackage(f); - } - - public void testDetectAsPOIFS() throws Exception { - InputStream in; - - // ooxml file is - in = new PushbackInputStream( - new FileInputStream(dirname + "/SampleSS.xlsx"), 10 - ); - assertTrue(HXFDocument.hasOOXMLHeader(in)); - - // xls file isn't - in = new PushbackInputStream( - new FileInputStream(dirname + "/SampleSS.xls"), 10 - ); - assertFalse(HXFDocument.hasOOXMLHeader(in)); - - // text file isn't - in = new PushbackInputStream( - new FileInputStream(dirname + "/SampleSS.txt"), 10 - ); - assertFalse(HXFDocument.hasOOXMLHeader(in)); - } -}