From: Nick Burch Date: Sun, 30 Dec 2007 16:21:22 +0000 (+0000) Subject: A few small updates to HSSFXML, and start on HWPFXML X-Git-Tag: REL_3_0_3_BETA1~236 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=9b96000af0c686768e57dacc77358f6e7be1e0c2;p=poi.git A few small updates to HSSFXML, and start on HWPFXML git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@607554 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hssf/HSSFXML.java b/src/scratchpad/ooxml-src/org/apache/poi/hssf/HSSFXML.java index b85ec33732..bf2b1b1131 100644 --- a/src/scratchpad/ooxml-src/org/apache/poi/hssf/HSSFXML.java +++ b/src/scratchpad/ooxml-src/org/apache/poi/hssf/HSSFXML.java @@ -33,6 +33,11 @@ import org.openxmlformats.schemas.spreadsheetml.x2006.main.WorksheetDocument; /** * Experimental class to do low level processing * of xlsx files. + * + * If you are using these low level classes, then you + * will almost certainly need to refer to the OOXML + * specifications from + * http://www.ecma-international.org/publications/standards/Ecma-376.htm * * WARNING - APIs expected to change rapidly */ diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java b/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java index d2092c422b..d91f049364 100644 --- a/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java +++ b/src/scratchpad/ooxml-src/org/apache/poi/hssf/extractor/HXFExcelExtractor.java @@ -26,7 +26,6 @@ import org.apache.xmlbeans.XmlException; import org.openxml4j.exceptions.OpenXML4JException; import org.openxml4j.opc.Package; import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell; -import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCellFormula; import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRow; import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet; import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet; diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hwpf/HWPFXML.java b/src/scratchpad/ooxml-src/org/apache/poi/hwpf/HWPFXML.java new file mode 100644 index 0000000000..66bba7ee1b --- /dev/null +++ b/src/scratchpad/ooxml-src/org/apache/poi/hwpf/HWPFXML.java @@ -0,0 +1,92 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hwpf; + +import java.io.IOException; + +import org.apache.poi.hxf.HXFDocument; +import org.apache.xmlbeans.XmlException; +import org.openxml4j.exceptions.InvalidFormatException; +import org.openxml4j.exceptions.OpenXML4JException; +import org.openxml4j.opc.Package; +import org.openxml4j.opc.PackagePart; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDocument1; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTStyles; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument; +import org.openxmlformats.schemas.wordprocessingml.x2006.main.StylesDocument; + +/** + * Experimental class to do low level processing + * of docx files. + * + * If you are using these low level classes, then you + * will almost certainly need to refer to the OOXML + * specifications from + * http://www.ecma-international.org/publications/standards/Ecma-376.htm + * + * WARNING - APIs expected to change rapidly + */ +public class HWPFXML extends HXFDocument { + public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml"; + public static final String FOOTER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml"; + public static final String HEADER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml"; + public static final String STYLES_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml"; + public static final String STYLES_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles"; + + private DocumentDocument wordDoc; + + public HWPFXML(Package container) throws OpenXML4JException, IOException, XmlException { + super(container, MAIN_CONTENT_TYPE); + + wordDoc = + DocumentDocument.Factory.parse(basePart.getInputStream()); + } + + /** + * Returns the low level document base object + */ + public CTDocument1 getDocument() { + return wordDoc.getDocument(); + } + + /** + * Returns the low level body of the document + */ + public CTBody getDocumentBody() { + return getDocument().getBody(); + } + + /** + * Returns the styles object used + */ + public CTStyles getStyle() throws XmlException, IOException { + PackagePart[] parts; + try { + parts = getRelatedByType(STYLES_RELATION_TYPE); + } catch(InvalidFormatException e) { + throw new IllegalStateException(e); + } + if(parts.length != 1) { + throw new IllegalStateException("Expecting one Styles document part, but found " + parts.length); + } + + StylesDocument sd = + StylesDocument.Factory.parse(parts[0].getInputStream()); + return sd.getStyles(); + } +} diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hxf/HXFDocument.java b/src/scratchpad/ooxml-src/org/apache/poi/hxf/HXFDocument.java index a7a17f9421..b9fdfab36d 100644 --- a/src/scratchpad/ooxml-src/org/apache/poi/hxf/HXFDocument.java +++ b/src/scratchpad/ooxml-src/org/apache/poi/hxf/HXFDocument.java @@ -21,6 +21,7 @@ import java.io.IOException; import java.util.ArrayList; import org.apache.poi.POIXMLDocument; +import org.apache.xmlbeans.XmlException; import org.dom4j.Document; import org.dom4j.DocumentException; import org.dom4j.io.SAXReader; @@ -31,7 +32,10 @@ import org.openxml4j.opc.PackageAccess; import org.openxml4j.opc.PackagePart; import org.openxml4j.opc.PackagePartName; import org.openxml4j.opc.PackageRelationship; +import org.openxml4j.opc.PackageRelationshipCollection; import org.openxml4j.opc.PackagingURIHelper; +import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties; +import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument; /** * Parent class of the low level interface to @@ -40,6 +44,11 @@ import org.openxml4j.opc.PackagingURIHelper; * extends {@link POIXMLDocument}, unless they really * do need to get low level access to the files. * + * If you are using these low level classes, then you + * will almost certainly need to refer to the OOXML + * specifications from + * http://www.ecma-international.org/publications/standards/Ecma-376.htm + * * WARNING - APIs expected to change rapidly */ public abstract class HXFDocument { @@ -81,14 +90,22 @@ public abstract class HXFDocument { /** * Retrieves the PackagePart for the given relation - * id. This will normally come from a r:id attribute + * id. This will normally come from a r:id attribute * on part of the base document. * @param partId The r:id pointing to the other PackagePart */ protected PackagePart getRelatedPackagePart(String partId) { PackageRelationship rel = basePart.getRelationship(partId); + return getPackagePart(rel); + } + /** + * Retrieves the PackagePart for the given Relationship + * object. Normally you'll want to go via a content type + * or r:id to get one of those. + */ + protected PackagePart getPackagePart(PackageRelationship rel) { PackagePartName relName; try { relName = PackagingURIHelper.createPartName(rel.getTargetURI()); @@ -102,6 +119,24 @@ public abstract class HXFDocument { } return part; } + + /** + * Retrieves all the PackageParts which are defined as + * relationships of the base document with the + * specified content type. + */ + protected PackagePart[] getRelatedByType(String contentType) throws InvalidFormatException { + PackageRelationshipCollection partsC = + basePart.getRelationshipsByType(contentType); + + PackagePart[] parts = new PackagePart[partsC.size()]; + int count = 0; + for (PackageRelationship rel : partsC) { + parts[count] = getPackagePart(rel); + count++; + } + return parts; + } /** * Get the package container. @@ -111,6 +146,26 @@ public abstract class HXFDocument { return container; } + /** + * Get the document properties (extended ooxml properties) + */ + public CTProperties getDocumentProperties() throws OpenXML4JException, XmlException, IOException { + PackageRelationshipCollection docProps = + container.getRelationshipsByType("http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties"); + if(docProps.size() == 0) { + return null; + } + if(docProps.size() > 1) { + throw new IllegalStateException("Found " + docProps.size() + " relations for the extended properties, should only ever be one!"); + } + PackageRelationship rel = docProps.getRelationship(0); + PackagePart propsPart = getPackagePart(rel); + + PropertiesDocument props = PropertiesDocument.Factory.parse( + propsPart.getInputStream()); + return props.getProperties(); + } + /** * Returns an opened OOXML Package for the supplied File * @param f File to open diff --git a/src/scratchpad/ooxml-src/org/apache/poi/hxf/dev/HXFLister.java b/src/scratchpad/ooxml-src/org/apache/poi/hxf/dev/HXFLister.java index 3b0a540d37..032b74b6f1 100644 --- a/src/scratchpad/ooxml-src/org/apache/poi/hxf/dev/HXFLister.java +++ b/src/scratchpad/ooxml-src/org/apache/poi/hxf/dev/HXFLister.java @@ -102,6 +102,7 @@ public class HXFLister { disp.println(indent+"Relationship:"); disp.println(indent+"\tFrom: "+ rel.getSourceURI()); disp.println(indent+"\tTo: " + rel.getTargetURI()); + disp.println(indent+"\tID: " + rel.getId()); disp.println(indent+"\tMode: " + rel.getTargetMode()); disp.println(indent+"\tType: " + rel.getRelationshipType()); } diff --git a/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/TestHSSFXML.java b/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/TestHSSFXML.java index 4634060399..c4b21dffab 100644 --- a/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/TestHSSFXML.java +++ b/src/scratchpad/ooxml-testcases/org/apache/poi/hssf/TestHSSFXML.java @@ -1,3 +1,19 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ package org.apache.poi.hssf; import java.io.File; @@ -93,4 +109,15 @@ public class TestHSSFXML extends TestCase { assertNotNull(xml.getSheet(sheets[1])); assertNotNull(xml.getSheet(sheets[2])); } + + public void testMetadataBasics() throws Exception { + HSSFXML xml = new HSSFXML( + HXFDocument.openPackage(sampleFile) + ); + assertNotNull(xml.getDocumentProperties()); + + assertEquals("Microsoft Excel", xml.getDocumentProperties().getApplication()); + assertEquals(0, xml.getDocumentProperties().getCharacters()); + assertEquals(0, xml.getDocumentProperties().getLines()); + } } \ No newline at end of file diff --git a/src/scratchpad/ooxml-testcases/org/apache/poi/hwpf/TestHWPFXML.java b/src/scratchpad/ooxml-testcases/org/apache/poi/hwpf/TestHWPFXML.java new file mode 100644 index 0000000000..53f2b025ea --- /dev/null +++ b/src/scratchpad/ooxml-testcases/org/apache/poi/hwpf/TestHWPFXML.java @@ -0,0 +1,92 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.hwpf; + +import java.io.File; + +import org.apache.poi.hssf.HSSFXML; +import org.apache.poi.hxf.HXFDocument; +import org.openxml4j.opc.Package; +import org.openxml4j.opc.PackagePart; + +import junit.framework.TestCase; + +public class TestHWPFXML extends TestCase { + private File sampleFile; + private File complexFile; + + protected void setUp() throws Exception { + super.setUp(); + + sampleFile = new File( + System.getProperty("HWPF.testdata.path") + + File.separator + "sample.docx" + ); + complexFile = new File( + System.getProperty("HWPF.testdata.path") + + File.separator + "IllustrativeCases.docx" + ); + } + + public void testContainsMainContentType() throws Exception { + Package pack = HXFDocument.openPackage(sampleFile); + + boolean found = false; + for(PackagePart part : pack.getParts()) { + if(part.getContentType().equals(HWPFXML.MAIN_CONTENT_TYPE)) { + found = true; + } + System.out.println(part); + } + assertTrue(found); + } + + public void testOpen() throws Exception { + HXFDocument.openPackage(sampleFile); + HXFDocument.openPackage(complexFile); + + HWPFXML xml; + + // Simple file + xml = new HWPFXML( + HXFDocument.openPackage(sampleFile) + ); + // Check it has key parts + assertNotNull(xml.getDocument()); + assertNotNull(xml.getDocumentBody()); + assertNotNull(xml.getStyle()); + + // Complex file + xml = new HWPFXML( + HXFDocument.openPackage(complexFile) + ); + assertNotNull(xml.getDocument()); + assertNotNull(xml.getDocumentBody()); + assertNotNull(xml.getStyle()); + } + + public void testMetadataBasics() throws Exception { + HWPFXML xml = new HWPFXML( + HXFDocument.openPackage(sampleFile) + ); + assertNotNull(xml.getDocumentProperties()); + + assertEquals("Microsoft Office Word", xml.getDocumentProperties().getApplication()); + assertEquals(1315, xml.getDocumentProperties().getCharacters()); + assertEquals(10, xml.getDocumentProperties().getLines()); + } +}