]> source.dussan.org Git - poi.git/commitdiff
A quick play with OOXML parsing. Uses XmlBeans and OpenXml4J to get at the data....
authorNick Burch <nick@apache.org>
Wed, 26 Dec 2007 17:47:27 +0000 (17:47 +0000)
committerNick Burch <nick@apache.org>
Wed, 26 Dec 2007 17:47:27 +0000 (17:47 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@606923 13f79535-47bb-0310-9956-ffa450edef68

src/scratchpad/ooxml-src/org/apache/poi/HXFDocument.java [new file with mode: 0644]
src/scratchpad/ooxml-src/org/apache/poi/POIXMLDocument.java [new file with mode: 0644]
src/scratchpad/ooxml-src/org/apache/poi/hssf/HSSFXML.java [new file with mode: 0644]
src/scratchpad/ooxml-testcases/org/apache/poi/hssf/TestHSSFXML.java [new file with mode: 0644]

diff --git a/src/scratchpad/ooxml-src/org/apache/poi/HXFDocument.java b/src/scratchpad/ooxml-src/org/apache/poi/HXFDocument.java
new file mode 100644 (file)
index 0000000..427e377
--- /dev/null
@@ -0,0 +1,89 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+
+import org.dom4j.Document;
+import org.dom4j.DocumentException;
+import org.dom4j.io.SAXReader;
+import org.openxml4j.exceptions.InvalidFormatException;
+import org.openxml4j.exceptions.OpenXML4JException;
+import org.openxml4j.opc.Package;
+import org.openxml4j.opc.PackageAccess;
+import org.openxml4j.opc.PackagePart;
+
+/**
+ * Parent class of the low level interface to  
+ *  all POI XML (OOXML) implementations.
+ * Normal users should probably deal with things that
+ *  extends {@link POIXMLDocument}, unless they really
+ *  do need to get low level access to the files.
+ *  
+ * WARNING - APIs expected to change rapidly
+ */
+public abstract class HXFDocument {
+       /**
+        * File package/container.
+        */
+       protected Package container;
+       /**
+        * The Package Part for our base document
+        */
+       protected PackagePart basePart;
+       /**
+        * The base document of this instance, eg Workbook for
+        *  xslsx
+        */
+       protected Document baseDocument;
+       
+       protected HXFDocument(Package container, String baseContentType) throws OpenXML4JException {
+               this.container = container;
+               
+               // Find the base document
+               ArrayList<PackagePart> baseParts =
+                       container.getPartsByContentType(baseContentType);
+               if(baseParts.size() != 1) {
+                       throw new OpenXML4JException("Expecting one entry with content type of " + baseContentType + ", but found " + baseParts.size());
+               }
+               basePart = baseParts.get(0);
+               
+               // And load it up
+               try {
+                       SAXReader reader = new SAXReader();
+                       baseDocument = reader.read(basePart.getInputStream());
+               } catch (DocumentException e) {
+                       throw new OpenXML4JException(e.getMessage());
+               } catch (IOException ioe) {
+                       throw new OpenXML4JException(ioe.getMessage());
+               }
+       }
+       
+       public static Package openPackage(File f) throws InvalidFormatException {
+               return Package.open(f.toString(), PackageAccess.READ_WRITE);
+       }
+
+       /**
+        * Get the package container.
+        * @return The package associated to this document.
+        */
+       public Package getPackage() {
+               return container;
+       }
+}
diff --git a/src/scratchpad/ooxml-src/org/apache/poi/POIXMLDocument.java b/src/scratchpad/ooxml-src/org/apache/poi/POIXMLDocument.java
new file mode 100644 (file)
index 0000000..a070e9f
--- /dev/null
@@ -0,0 +1,27 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi;
+
+/** 
+ * Parent class of all UserModel POI XML (ooxml) 
+ *  implementations.
+ * Provides a similar function to {@link POIDocument},
+ *  for the XML based classes.
+ */
+public abstract class POIXMLDocument {
+       // TODO
+}
diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hssf/HSSFXML.java b/src/scratchpad/ooxml-src/org/apache/poi/hssf/HSSFXML.java
new file mode 100644 (file)
index 0000000..20707aa
--- /dev/null
@@ -0,0 +1,51 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.hssf;
+
+import java.io.IOException;
+
+import org.apache.poi.HXFDocument;
+import org.apache.xmlbeans.XmlException;
+import org.openxml4j.exceptions.OpenXML4JException;
+import org.openxml4j.opc.Package;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorkbook;
+import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorkbookDocument;
+
+/**
+ * Experimental class to do low level processing
+ *  of xlsx files.
+ * 
+ * WARNING - APIs expected to change rapidly
+ */
+public class HSSFXML extends HXFDocument {
+       public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet.main+xml";
+       public static final String SHEET_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.worksheet+xml";
+       public static final String SHARED_STRINGS_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.spreadsheetml.sharedStrings+xml";
+       
+       private WorkbookDocument workbookDoc;
+       
+       public HSSFXML(Package container) throws OpenXML4JException, IOException, XmlException {
+               super(container, MAIN_CONTENT_TYPE);
+               
+               workbookDoc =
+                       WorkbookDocument.Factory.parse(basePart.getInputStream());
+       }
+       
+       public CTWorkbook getWorkbook() {
+               return workbookDoc.getWorkbook();
+       }
+}
diff --git a/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/TestHSSFXML.java b/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/TestHSSFXML.java
new file mode 100644 (file)
index 0000000..1013d4f
--- /dev/null
@@ -0,0 +1,68 @@
+package org.apache.poi.hssf;
+
+import java.io.File;
+
+import org.apache.poi.HXFDocument;
+import org.openxml4j.opc.Package;
+import org.openxml4j.opc.PackagePart;
+
+import junit.framework.TestCase;
+
+public class TestHSSFXML extends TestCase {
+       /**
+        * Uses the old style schemas.microsoft.com schema uri
+        */
+       private File sampleFileBeta;
+       /**
+        * Uses the new style schemas.openxmlformats.org schema uri
+        */
+       private File sampleFile;
+
+       protected void setUp() throws Exception {
+               super.setUp();
+               
+               sampleFile = new File(
+                               System.getProperty("HSSF.testdata.path") +
+                               File.separator + "sample.xlsx"
+               );
+               sampleFileBeta = new File(
+                               System.getProperty("HSSF.testdata.path") +
+                               File.separator + "sample-beta.xlsx"
+               );
+       }
+       
+       public void testContainsMainContentType() throws Exception {
+               Package pack = HXFDocument.openPackage(sampleFile);
+               
+               boolean found = false;
+               for(PackagePart part : pack.getParts()) {
+                       if(part.getContentType().equals(HSSFXML.MAIN_CONTENT_TYPE)) {
+                               found = true;
+                       }
+                       System.out.println(part);
+               }
+               assertTrue(found);
+       }
+
+       public void testOpen() throws Exception {
+               HXFDocument.openPackage(sampleFile);
+               HXFDocument.openPackage(sampleFileBeta);
+               
+               HSSFXML xml;
+               
+               // With an old-style uri, as found in a file produced
+               //  with the office 2007 beta, will fail, as we don't
+               //  translate things 
+               try {
+                       xml = new HSSFXML(
+                                       HXFDocument.openPackage(sampleFileBeta)
+                       );
+                       fail();
+               } catch(Exception e) {}
+               
+               // With the finalised uri, should be fine
+               xml = new HSSFXML(
+                               HXFDocument.openPackage(sampleFile)
+               );
+       }
+}
\ No newline at end of file