]> source.dussan.org Git - poi.git/commitdiff
A few small updates to HSSFXML, and start on HWPFXML
authorNick Burch <nick@apache.org>
Sun, 30 Dec 2007 16:21:22 +0000 (16:21 +0000)
committerNick Burch <nick@apache.org>
Sun, 30 Dec 2007 16:21:22 +0000 (16:21 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@607554 13f79535-47bb-0310-9956-ffa450edef68

src/scratchpad/ooxml-src/org/apache/poi/hssf/HSSFXML.java
src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java
src/scratchpad/ooxml-src/org/apache/poi/hwpf/HWPFXML.java [new file with mode: 0644]
src/scratchpad/ooxml-src/org/apache/poi/hxf/HXFDocument.java
src/scratchpad/ooxml-src/org/apache/poi/hxf/dev/HXFLister.java
src/scratchpad/ooxml-testcases/org/apache/poi/hssf/TestHSSFXML.java
src/scratchpad/ooxml-testcases/org/apache/poi/hwpf/TestHWPFXML.java [new file with mode: 0644]

index b85ec33732a86c7b8d72f72cf7b06bf7d1f6fa10..bf2b1b1131cf9eb27c055c6861ace07b66636cbe 100644 (file)
@@ -33,6 +33,11 @@ import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorksheetDocument;
 /**
  * Experimental class to do low level processing
  *  of xlsx files.
+ *  
+ * If you are using these low level classes, then you
+ *  will almost certainly need to refer to the OOXML
+ *  specifications from
+ *  http://www.ecma-international.org/publications/standards/Ecma-376.htm
  * 
  * WARNING - APIs expected to change rapidly
  */
index d2092c422b907db904d401439c58e2fb6e0c2188..d91f049364cdfb1862106fc5cf19ab87b6f7b6be 100644 (file)
@@ -26,7 +26,6 @@ import org.apache.xmlbeans.XmlException;
 import org.openxml4j.exceptions.OpenXML4JException;
 import org.openxml4j.opc.Package;
 import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCellFormula;
 import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRow;
 import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
 import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet;
diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hwpf/HWPFXML.java b/src/scratchpad/ooxml-src/org/apache/poi/hwpf/HWPFXML.java
new file mode 100644 (file)
index 0000000..66bba7e
--- /dev/null
@@ -0,0 +1,92 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.hwpf;
+
+import java.io.IOException;
+
+import org.apache.poi.hxf.HXFDocument;
+import org.apache.xmlbeans.XmlException;
+import org.openxml4j.exceptions.InvalidFormatException;
+import org.openxml4j.exceptions.OpenXML4JException;
+import org.openxml4j.opc.Package;
+import org.openxml4j.opc.PackagePart;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDocument1;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTStyles;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.StylesDocument;
+
+/**
+ * Experimental class to do low level processing
+ *  of docx files.
+ * 
+ * If you are using these low level classes, then you
+ *  will almost certainly need to refer to the OOXML
+ *  specifications from
+ *  http://www.ecma-international.org/publications/standards/Ecma-376.htm
+ *  
+ * WARNING - APIs expected to change rapidly
+ */
+public class HWPFXML extends HXFDocument {
+       public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml";
+       public static final String FOOTER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml";
+       public static final String HEADER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml";
+       public static final String STYLES_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml";
+       public static final String STYLES_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles";
+       
+       private DocumentDocument wordDoc;
+       
+       public HWPFXML(Package container) throws OpenXML4JException, IOException, XmlException {
+               super(container, MAIN_CONTENT_TYPE);
+               
+               wordDoc =
+                       DocumentDocument.Factory.parse(basePart.getInputStream());
+       }
+       
+       /**
+        * Returns the low level document base object
+        */
+       public CTDocument1 getDocument() {
+               return wordDoc.getDocument();
+       }
+       
+       /**
+        * Returns the low level body of the document
+        */
+       public CTBody getDocumentBody() {
+               return getDocument().getBody();
+       }
+       
+       /**
+        * Returns the styles object used
+        */
+       public CTStyles getStyle() throws XmlException, IOException {
+               PackagePart[] parts;
+               try {
+                       parts = getRelatedByType(STYLES_RELATION_TYPE);
+               } catch(InvalidFormatException e) {
+                       throw new IllegalStateException(e);
+               }
+               if(parts.length != 1) {
+                       throw new IllegalStateException("Expecting one Styles document part, but found " + parts.length);
+               }
+               
+               StylesDocument sd =
+                       StylesDocument.Factory.parse(parts[0].getInputStream());
+               return sd.getStyles();
+       }
+}
index a7a17f942138ad2911eabe75616fa7372b652bd6..b9fdfab36d2861d80a66b45eaf93fb0043fb7a75 100644 (file)
@@ -21,6 +21,7 @@ import java.io.IOException;
 import java.util.ArrayList;
 
 import org.apache.poi.POIXMLDocument;
+import org.apache.xmlbeans.XmlException;
 import org.dom4j.Document;
 import org.dom4j.DocumentException;
 import org.dom4j.io.SAXReader;
@@ -31,7 +32,10 @@ import org.openxml4j.opc.PackageAccess;
 import org.openxml4j.opc.PackagePart;
 import org.openxml4j.opc.PackagePartName;
 import org.openxml4j.opc.PackageRelationship;
+import org.openxml4j.opc.PackageRelationshipCollection;
 import org.openxml4j.opc.PackagingURIHelper;
+import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties;
+import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument;
 
 /**
  * Parent class of the low level interface to  
@@ -40,6 +44,11 @@ import org.openxml4j.opc.PackagingURIHelper;
  *  extends {@link POIXMLDocument}, unless they really
  *  do need to get low level access to the files.
  *  
+ * If you are using these low level classes, then you
+ *  will almost certainly need to refer to the OOXML
+ *  specifications from
+ *  http://www.ecma-international.org/publications/standards/Ecma-376.htm
+ *  
  * WARNING - APIs expected to change rapidly
  */
 public abstract class HXFDocument {
@@ -81,14 +90,22 @@ public abstract class HXFDocument {
        
        /**
         * Retrieves the PackagePart for the given relation
-        *  id. This will normally come from a  r:id attribute
+        *  id. This will normally come from a r:id attribute
         *  on part of the base document. 
         * @param partId The r:id pointing to the other PackagePart
         */
        protected PackagePart getRelatedPackagePart(String partId) {
                PackageRelationship rel =
                        basePart.getRelationship(partId);
+               return getPackagePart(rel);
+       }
 
+       /**
+        * Retrieves the PackagePart for the given Relationship
+        *  object. Normally you'll want to go via a content type
+        *  or r:id to get one of those.
+        */
+       protected PackagePart getPackagePart(PackageRelationship rel) {
                PackagePartName relName;
                try {
                        relName = PackagingURIHelper.createPartName(rel.getTargetURI());
@@ -102,6 +119,24 @@ public abstract class HXFDocument {
                }
                return part;
        }
+       
+       /**
+        * Retrieves all the PackageParts which are defined as
+        *  relationships of the base document with the
+        *  specified content type.
+        */
+       protected PackagePart[] getRelatedByType(String contentType) throws InvalidFormatException {
+               PackageRelationshipCollection partsC =
+                       basePart.getRelationshipsByType(contentType);
+               
+               PackagePart[] parts = new PackagePart[partsC.size()];
+               int count = 0;
+               for (PackageRelationship rel : partsC) {
+                       parts[count] = getPackagePart(rel);
+                       count++;
+               }
+               return parts;
+       }
 
        /**
         * Get the package container.
@@ -111,6 +146,26 @@ public abstract class HXFDocument {
                return container;
        }
        
+       /**
+        * Get the document properties (extended ooxml properties)
+        */
+       public CTProperties getDocumentProperties() throws OpenXML4JException, XmlException, IOException {
+               PackageRelationshipCollection docProps =
+                       container.getRelationshipsByType("http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties");
+               if(docProps.size() == 0) {
+                       return null;
+               }
+               if(docProps.size() > 1) {
+                       throw new IllegalStateException("Found " + docProps.size() + " relations for the extended properties, should only ever be one!");
+               }
+               PackageRelationship rel = docProps.getRelationship(0);
+               PackagePart propsPart = getPackagePart(rel);
+               
+               PropertiesDocument props = PropertiesDocument.Factory.parse(
+                               propsPart.getInputStream());
+               return props.getProperties();
+       }
+       
        /**
         * Returns an opened OOXML Package for the supplied File
         * @param f File to open
index 3b0a540d3733ab5e9a9660c018b7ee456a44c93a..032b74b6f16328fdf05ab3f707d48360279075d6 100644 (file)
@@ -102,6 +102,7 @@ public class HXFLister {
                disp.println(indent+"Relationship:");
                disp.println(indent+"\tFrom: "+ rel.getSourceURI());
                disp.println(indent+"\tTo:   " + rel.getTargetURI());
+               disp.println(indent+"\tID:   " + rel.getId());
                disp.println(indent+"\tMode: " + rel.getTargetMode());
                disp.println(indent+"\tType: " + rel.getRelationshipType());
        }
index 4634060399c51cbdc501ec17a5a8913e13dc1f9f..c4b21dffabd36a1c3d9ca22152b37a9c01e1a9b8 100644 (file)
@@ -1,3 +1,19 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
 package org.apache.poi.hssf;
 
 import java.io.File;
@@ -93,4 +109,15 @@ public class TestHSSFXML extends TestCase {
                assertNotNull(xml.getSheet(sheets[1]));
                assertNotNull(xml.getSheet(sheets[2]));
        }
+       
+       public void testMetadataBasics() throws Exception {
+               HSSFXML xml = new HSSFXML(
+                               HXFDocument.openPackage(sampleFile)
+               );
+               assertNotNull(xml.getDocumentProperties());
+               
+               assertEquals("Microsoft Excel", xml.getDocumentProperties().getApplication());
+               assertEquals(0, xml.getDocumentProperties().getCharacters());
+               assertEquals(0, xml.getDocumentProperties().getLines());
+       }
 }
\ No newline at end of file
diff --git a/src/scratchpad/ooxml-testcases/org/apache/poi/hwpf/TestHWPFXML.java b/src/scratchpad/ooxml-testcases/org/apache/poi/hwpf/TestHWPFXML.java
new file mode 100644 (file)
index 0000000..53f2b02
--- /dev/null
@@ -0,0 +1,92 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.hwpf;
+
+import java.io.File;
+
+import org.apache.poi.hssf.HSSFXML;
+import org.apache.poi.hxf.HXFDocument;
+import org.openxml4j.opc.Package;
+import org.openxml4j.opc.PackagePart;
+
+import junit.framework.TestCase;
+
+public class TestHWPFXML extends TestCase {
+       private File sampleFile;
+       private File complexFile;
+
+       protected void setUp() throws Exception {
+               super.setUp();
+               
+               sampleFile = new File(
+                               System.getProperty("HWPF.testdata.path") +
+                               File.separator + "sample.docx"
+               );
+               complexFile = new File(
+                               System.getProperty("HWPF.testdata.path") +
+                               File.separator + "IllustrativeCases.docx"
+               );
+       }
+
+       public void testContainsMainContentType() throws Exception {
+               Package pack = HXFDocument.openPackage(sampleFile);
+               
+               boolean found = false;
+               for(PackagePart part : pack.getParts()) {
+                       if(part.getContentType().equals(HWPFXML.MAIN_CONTENT_TYPE)) {
+                               found = true;
+                       }
+                       System.out.println(part);
+               }
+               assertTrue(found);
+       }
+
+       public void testOpen() throws Exception {
+               HXFDocument.openPackage(sampleFile);
+               HXFDocument.openPackage(complexFile);
+               
+               HWPFXML xml;
+               
+               // Simple file
+               xml = new HWPFXML(
+                               HXFDocument.openPackage(sampleFile)
+               );
+               // Check it has key parts
+               assertNotNull(xml.getDocument());
+               assertNotNull(xml.getDocumentBody());
+               assertNotNull(xml.getStyle());
+               
+               // Complex file
+               xml = new HWPFXML(
+                               HXFDocument.openPackage(complexFile)
+               );
+               assertNotNull(xml.getDocument());
+               assertNotNull(xml.getDocumentBody());
+               assertNotNull(xml.getStyle());
+       }
+       
+       public void testMetadataBasics() throws Exception {
+               HWPFXML xml = new HWPFXML(
+                               HXFDocument.openPackage(sampleFile)
+               );
+               assertNotNull(xml.getDocumentProperties());
+               
+               assertEquals("Microsoft Office Word", xml.getDocumentProperties().getApplication());
+               assertEquals(1315, xml.getDocumentProperties().getCharacters());
+               assertEquals(10, xml.getDocumentProperties().getLines());
+       }
+}