/**
* Experimental class to do low level processing
* of xlsx files.
+ *
+ * If you are using these low level classes, then you
+ * will almost certainly need to refer to the OOXML
+ * specifications from
+ * http://www.ecma-international.org/publications/standards/Ecma-376.htm
*
* WARNING - APIs expected to change rapidly
*/
import org.openxml4j.exceptions.OpenXML4JException;
import org.openxml4j.opc.Package;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCell;
-import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTCellFormula;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTRow;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTSheet;
import org.openxmlformats.schemas.spreadsheetml.x2006.main.CTWorksheet;
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hwpf;
+
+import java.io.IOException;
+
+import org.apache.poi.hxf.HXFDocument;
+import org.apache.xmlbeans.XmlException;
+import org.openxml4j.exceptions.InvalidFormatException;
+import org.openxml4j.exceptions.OpenXML4JException;
+import org.openxml4j.opc.Package;
+import org.openxml4j.opc.PackagePart;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTBody;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTDocument1;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.CTStyles;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.DocumentDocument;
+import org.openxmlformats.schemas.wordprocessingml.x2006.main.StylesDocument;
+
+/**
+ * Experimental class to do low level processing
+ * of docx files.
+ *
+ * If you are using these low level classes, then you
+ * will almost certainly need to refer to the OOXML
+ * specifications from
+ * http://www.ecma-international.org/publications/standards/Ecma-376.htm
+ *
+ * WARNING - APIs expected to change rapidly
+ */
+public class HWPFXML extends HXFDocument {
+ public static final String MAIN_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.document.main+xml";
+ public static final String FOOTER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.footer+xml";
+ public static final String HEADER_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.header+xml";
+ public static final String STYLES_CONTENT_TYPE = "application/vnd.openxmlformats-officedocument.wordprocessingml.styles+xml";
+ public static final String STYLES_RELATION_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/styles";
+
+ private DocumentDocument wordDoc;
+
+ public HWPFXML(Package container) throws OpenXML4JException, IOException, XmlException {
+ super(container, MAIN_CONTENT_TYPE);
+
+ wordDoc =
+ DocumentDocument.Factory.parse(basePart.getInputStream());
+ }
+
+ /**
+ * Returns the low level document base object
+ */
+ public CTDocument1 getDocument() {
+ return wordDoc.getDocument();
+ }
+
+ /**
+ * Returns the low level body of the document
+ */
+ public CTBody getDocumentBody() {
+ return getDocument().getBody();
+ }
+
+ /**
+ * Returns the styles object used
+ */
+ public CTStyles getStyle() throws XmlException, IOException {
+ PackagePart[] parts;
+ try {
+ parts = getRelatedByType(STYLES_RELATION_TYPE);
+ } catch(InvalidFormatException e) {
+ throw new IllegalStateException(e);
+ }
+ if(parts.length != 1) {
+ throw new IllegalStateException("Expecting one Styles document part, but found " + parts.length);
+ }
+
+ StylesDocument sd =
+ StylesDocument.Factory.parse(parts[0].getInputStream());
+ return sd.getStyles();
+ }
+}
import java.util.ArrayList;
import org.apache.poi.POIXMLDocument;
+import org.apache.xmlbeans.XmlException;
import org.dom4j.Document;
import org.dom4j.DocumentException;
import org.dom4j.io.SAXReader;
import org.openxml4j.opc.PackagePart;
import org.openxml4j.opc.PackagePartName;
import org.openxml4j.opc.PackageRelationship;
+import org.openxml4j.opc.PackageRelationshipCollection;
import org.openxml4j.opc.PackagingURIHelper;
+import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties;
+import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument;
/**
* Parent class of the low level interface to
* extends {@link POIXMLDocument}, unless they really
* do need to get low level access to the files.
*
+ * If you are using these low level classes, then you
+ * will almost certainly need to refer to the OOXML
+ * specifications from
+ * http://www.ecma-international.org/publications/standards/Ecma-376.htm
+ *
* WARNING - APIs expected to change rapidly
*/
public abstract class HXFDocument {
/**
* Retrieves the PackagePart for the given relation
- * id. This will normally come from a r:id attribute
+ * id. This will normally come from a r:id attribute
* on part of the base document.
* @param partId The r:id pointing to the other PackagePart
*/
protected PackagePart getRelatedPackagePart(String partId) {
PackageRelationship rel =
basePart.getRelationship(partId);
+ return getPackagePart(rel);
+ }
+ /**
+ * Retrieves the PackagePart for the given Relationship
+ * object. Normally you'll want to go via a content type
+ * or r:id to get one of those.
+ */
+ protected PackagePart getPackagePart(PackageRelationship rel) {
PackagePartName relName;
try {
relName = PackagingURIHelper.createPartName(rel.getTargetURI());
}
return part;
}
+
+ /**
+ * Retrieves all the PackageParts which are defined as
+ * relationships of the base document with the
+ * specified content type.
+ */
+ protected PackagePart[] getRelatedByType(String contentType) throws InvalidFormatException {
+ PackageRelationshipCollection partsC =
+ basePart.getRelationshipsByType(contentType);
+
+ PackagePart[] parts = new PackagePart[partsC.size()];
+ int count = 0;
+ for (PackageRelationship rel : partsC) {
+ parts[count] = getPackagePart(rel);
+ count++;
+ }
+ return parts;
+ }
/**
* Get the package container.
return container;
}
+ /**
+ * Get the document properties (extended ooxml properties)
+ */
+ public CTProperties getDocumentProperties() throws OpenXML4JException, XmlException, IOException {
+ PackageRelationshipCollection docProps =
+ container.getRelationshipsByType("http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties");
+ if(docProps.size() == 0) {
+ return null;
+ }
+ if(docProps.size() > 1) {
+ throw new IllegalStateException("Found " + docProps.size() + " relations for the extended properties, should only ever be one!");
+ }
+ PackageRelationship rel = docProps.getRelationship(0);
+ PackagePart propsPart = getPackagePart(rel);
+
+ PropertiesDocument props = PropertiesDocument.Factory.parse(
+ propsPart.getInputStream());
+ return props.getProperties();
+ }
+
/**
* Returns an opened OOXML Package for the supplied File
* @param f File to open
disp.println(indent+"Relationship:");
disp.println(indent+"\tFrom: "+ rel.getSourceURI());
disp.println(indent+"\tTo: " + rel.getTargetURI());
+ disp.println(indent+"\tID: " + rel.getId());
disp.println(indent+"\tMode: " + rel.getTargetMode());
disp.println(indent+"\tType: " + rel.getRelationshipType());
}
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
package org.apache.poi.hssf;
import java.io.File;
assertNotNull(xml.getSheet(sheets[1]));
assertNotNull(xml.getSheet(sheets[2]));
}
+
+ public void testMetadataBasics() throws Exception {
+ HSSFXML xml = new HSSFXML(
+ HXFDocument.openPackage(sampleFile)
+ );
+ assertNotNull(xml.getDocumentProperties());
+
+ assertEquals("Microsoft Excel", xml.getDocumentProperties().getApplication());
+ assertEquals(0, xml.getDocumentProperties().getCharacters());
+ assertEquals(0, xml.getDocumentProperties().getLines());
+ }
}
\ No newline at end of file
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hwpf;
+
+import java.io.File;
+
+import org.apache.poi.hssf.HSSFXML;
+import org.apache.poi.hxf.HXFDocument;
+import org.openxml4j.opc.Package;
+import org.openxml4j.opc.PackagePart;
+
+import junit.framework.TestCase;
+
+public class TestHWPFXML extends TestCase {
+ private File sampleFile;
+ private File complexFile;
+
+ protected void setUp() throws Exception {
+ super.setUp();
+
+ sampleFile = new File(
+ System.getProperty("HWPF.testdata.path") +
+ File.separator + "sample.docx"
+ );
+ complexFile = new File(
+ System.getProperty("HWPF.testdata.path") +
+ File.separator + "IllustrativeCases.docx"
+ );
+ }
+
+ public void testContainsMainContentType() throws Exception {
+ Package pack = HXFDocument.openPackage(sampleFile);
+
+ boolean found = false;
+ for(PackagePart part : pack.getParts()) {
+ if(part.getContentType().equals(HWPFXML.MAIN_CONTENT_TYPE)) {
+ found = true;
+ }
+ System.out.println(part);
+ }
+ assertTrue(found);
+ }
+
+ public void testOpen() throws Exception {
+ HXFDocument.openPackage(sampleFile);
+ HXFDocument.openPackage(complexFile);
+
+ HWPFXML xml;
+
+ // Simple file
+ xml = new HWPFXML(
+ HXFDocument.openPackage(sampleFile)
+ );
+ // Check it has key parts
+ assertNotNull(xml.getDocument());
+ assertNotNull(xml.getDocumentBody());
+ assertNotNull(xml.getStyle());
+
+ // Complex file
+ xml = new HWPFXML(
+ HXFDocument.openPackage(complexFile)
+ );
+ assertNotNull(xml.getDocument());
+ assertNotNull(xml.getDocumentBody());
+ assertNotNull(xml.getStyle());
+ }
+
+ public void testMetadataBasics() throws Exception {
+ HWPFXML xml = new HWPFXML(
+ HXFDocument.openPackage(sampleFile)
+ );
+ assertNotNull(xml.getDocumentProperties());
+
+ assertEquals("Microsoft Office Word", xml.getDocumentProperties().getApplication());
+ assertEquals(1315, xml.getDocumentProperties().getCharacters());
+ assertEquals(10, xml.getDocumentProperties().getLines());
+ }
+}