aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Burch <nick@apache.org>2008-08-12 20:58:31 +0000
committerNick Burch <nick@apache.org>2008-08-12 20:58:31 +0000
commit8f94b59e7ef22d7d56f124179828cbe3e8fe96bd (patch)
treecf4881865d19e094946248a0370dca77b409e471
parent3638f76a8aa229cae91dc0f16dbf7297f0cec725 (diff)
downloadpoi-8f94b59e7ef22d7d56f124179828cbe3e8fe96bd.tar.gz
poi-8f94b59e7ef22d7d56f124179828cbe3e8fe96bd.zip
Add POIXMLPropertiesTextExtractor, which provides to the OOXML file formats a similar function to HPSF's HPSFPropertiesExtractor
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@685315 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--src/documentation/content/xdocs/changes.xml1
-rw-r--r--src/documentation/content/xdocs/status.xml1
-rw-r--r--src/ooxml/java/org/apache/poi/POIXMLDocument.java2
-rw-r--r--src/ooxml/java/org/apache/poi/POIXMLProperties.java50
-rw-r--r--src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java142
-rw-r--r--src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java6
-rw-r--r--src/ooxml/testcases/org/apache/poi/TestXMLPropertiesTextExtractor.java74
7 files changed, 268 insertions, 8 deletions
diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml
index 020c6c9602..04547c7a91 100644
--- a/src/documentation/content/xdocs/changes.xml
+++ b/src/documentation/content/xdocs/changes.xml
@@ -37,6 +37,7 @@
<!-- Don't forget to update status.xml too! -->
<release version="3.5.1-beta2" date="2008-??-??">
+ <action dev="POI-DEVELOPERS" type="add">Add POIXMLPropertiesTextExtractor, which provides to the OOXML file formats a similar function to HPSF's HPSFPropertiesExtractor</action>
<action dev="POI-DEVELOPERS" type="add">45539 - Improve XWPFWordExtractor to extract headers and footers</action>
<action dev="POI-DEVELOPERS" type="fix">Improve how XWPF handles paragraph text</action>
<action dev="POI-DEVELOPERS" type="add">Support in XWPF handles headers and footers</action>
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml
index 998263d8dd..a75dc2837a 100644
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -34,6 +34,7 @@
<!-- Don't forget to update changes.xml too! -->
<changes>
<release version="3.5.1-beta2" date="2008-??-??">
+ <action dev="POI-DEVELOPERS" type="add">Add POIXMLPropertiesTextExtractor, which provides to the OOXML file formats a similar function to HPSF's HPSFPropertiesExtractor</action>
<action dev="POI-DEVELOPERS" type="add">45539 - Improve XWPFWordExtractor to extract headers and footers</action>
<action dev="POI-DEVELOPERS" type="fix">Improve how XWPF handles paragraph text</action>
<action dev="POI-DEVELOPERS" type="add">Support in XWPF handles headers and footers</action>
diff --git a/src/ooxml/java/org/apache/poi/POIXMLDocument.java b/src/ooxml/java/org/apache/poi/POIXMLDocument.java
index 1f61a5cdcf..500d09a829 100644
--- a/src/ooxml/java/org/apache/poi/POIXMLDocument.java
+++ b/src/ooxml/java/org/apache/poi/POIXMLDocument.java
@@ -38,8 +38,8 @@ import org.openxml4j.opc.PackagingURIHelper;
public abstract class POIXMLDocument {
public static final String CORE_PROPERTIES_REL_TYPE = "http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties";
-
public static final String EXTENDED_PROPERTIES_REL_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties";
+ public static final String CUSTOM_PROPERTIES_REL_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties";
// OLE embeddings relation name
public static final String OLE_OBJECT_REL_TYPE="http://schemas.openxmlformats.org/officeDocument/2006/relationships/oleObject";
diff --git a/src/ooxml/java/org/apache/poi/POIXMLProperties.java b/src/ooxml/java/org/apache/poi/POIXMLProperties.java
index 7806c9b781..894f2f800d 100644
--- a/src/ooxml/java/org/apache/poi/POIXMLProperties.java
+++ b/src/ooxml/java/org/apache/poi/POIXMLProperties.java
@@ -23,8 +23,6 @@ import org.openxml4j.exceptions.OpenXML4JException;
import org.openxml4j.opc.Package;
import org.openxml4j.opc.PackageRelationshipCollection;
import org.openxml4j.opc.internal.PackagePropertiesPart;
-import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties;
-import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument;
/**
* Wrapper around the two different kinds of OOXML properties
@@ -34,6 +32,7 @@ public class POIXMLProperties {
private Package pkg;
private CoreProperties core;
private ExtendedProperties ext;
+ private CustomProperties cust;
public POIXMLProperties(Package docPackage) throws IOException, OpenXML4JException, XmlException {
this.pkg = docPackage;
@@ -52,12 +51,24 @@ public class POIXMLProperties {
PackageRelationshipCollection extRel =
pkg.getRelationshipsByType(POIXMLDocument.EXTENDED_PROPERTIES_REL_TYPE);
if(extRel.size() == 1) {
- PropertiesDocument props = PropertiesDocument.Factory.parse(
+ org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props = org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument.Factory.parse(
pkg.getPart( extRel.getRelationship(0) ).getInputStream()
);
ext = new ExtendedProperties(props);
} else {
- ext = new ExtendedProperties(PropertiesDocument.Factory.newInstance());
+ ext = new ExtendedProperties(org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument.Factory.newInstance());
+ }
+
+ // Custom properties
+ PackageRelationshipCollection custRel =
+ pkg.getRelationshipsByType(POIXMLDocument.CUSTOM_PROPERTIES_REL_TYPE);
+ if(custRel.size() == 1) {
+ org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props = org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument.Factory.parse(
+ pkg.getPart( custRel.getRelationship(0) ).getInputStream()
+ );
+ cust = new CustomProperties(props);
+ } else {
+ cust = new CustomProperties(org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument.Factory.newInstance());
}
}
@@ -76,6 +87,13 @@ public class POIXMLProperties {
}
/**
+ * Returns the custom document properties
+ */
+ public CustomProperties getCustomProperties() {
+ return cust;
+ }
+
+ /**
* Writes out the ooxml properties into the supplied,
* new Package
*/
@@ -108,8 +126,26 @@ public class POIXMLProperties {
* Extended document properties
*/
public class ExtendedProperties {
- private PropertiesDocument props;
- private ExtendedProperties(PropertiesDocument props) {
+ private org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props;
+ private ExtendedProperties(org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props) {
+ this.props = props;
+
+ if(props.getProperties() == null) {
+ props.addNewProperties();
+ }
+ }
+
+ public org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties getUnderlyingProperties() {
+ return props.getProperties();
+ }
+ }
+
+ /**
+ * Custom document properties
+ */
+ public class CustomProperties {
+ private org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props;
+ private CustomProperties(org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props) {
this.props = props;
if(props.getProperties() == null) {
@@ -117,7 +153,7 @@ public class POIXMLProperties {
}
}
- public CTProperties getUnderlyingProperties() {
+ public org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties getUnderlyingProperties() {
return props.getProperties();
}
}
diff --git a/src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java b/src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java
new file mode 100644
index 0000000000..455b8ab1bb
--- /dev/null
+++ b/src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java
@@ -0,0 +1,142 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi;
+
+import java.io.IOException;
+
+import org.apache.xmlbeans.XmlException;
+import org.openxml4j.exceptions.OpenXML4JException;
+import org.openxml4j.opc.internal.PackagePropertiesPart;
+import org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty;
+
+/**
+ * A {@link POITextExtractor} for returning the textual
+ * content of the OOXML file properties, eg author
+ * and title.
+ */
+public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor {
+ /**
+ * Creates a new POIXMLPropertiesTextExtractor for the
+ * given open document.
+ */
+ public POIXMLPropertiesTextExtractor(POIXMLDocument doc) {
+ super(doc);
+ }
+ /**
+ * Creates a new POIXMLPropertiesTextExtractor, for the
+ * same file that another TextExtractor is already
+ * working on.
+ */
+ public POIXMLPropertiesTextExtractor(POIXMLTextExtractor otherExtractor) {
+ super(otherExtractor.document);
+ }
+
+ /**
+ * Returns the core document properties, eg author
+ */
+ public String getCorePropertiesText() throws IOException, OpenXML4JException, XmlException {
+ StringBuffer text = new StringBuffer();
+ PackagePropertiesPart props =
+ document.getProperties().getCoreProperties().getUnderlyingProperties();
+
+ text.append("Category = " + props.getCategoryProperty().getValue() + "\n");
+ text.append("ContentStatus = " + props.getContentStatusProperty().getValue() + "\n");
+ text.append("ContentType = " + props.getContentTypeProperty().getValue() + "\n");
+ text.append("Created = " + props.getCreatedProperty().getValue() + "\n");
+ text.append("CreatedString = " + props.getCreatedPropertyString() + "\n");
+ text.append("Creator = " + props.getCreatorProperty().getValue() + "\n");
+ text.append("Description = " + props.getDescriptionProperty().getValue() + "\n");
+ text.append("Identifier = " + props.getIdentifierProperty().getValue() + "\n");
+ text.append("Keywords = " + props.getKeywordsProperty().getValue() + "\n");
+ text.append("Language = " + props.getLanguageProperty().getValue() + "\n");
+ text.append("LastModifiedBy = " + props.getLastModifiedByProperty().getValue() + "\n");
+ text.append("LastPrinted = " + props.getLastPrintedProperty().getValue() + "\n");
+ text.append("LastPrintedString = " + props.getLastPrintedPropertyString() + "\n");
+ text.append("Modified = " + props.getModifiedProperty().getValue() + "\n");
+ text.append("ModifiedString = " + props.getModifiedPropertyString() + "\n");
+ text.append("Revision = " + props.getRevisionProperty().getValue() + "\n");
+ text.append("Subject = " + props.getSubjectProperty().getValue() + "\n");
+ text.append("Title = " + props.getTitleProperty().getValue() + "\n");
+ text.append("Version = " + props.getVersionProperty().getValue() + "\n");
+
+ return text.toString();
+ }
+ /**
+ * Returns the extended document properties, eg
+ * application
+ */
+ public String getExtendedPropertiesText() throws IOException, OpenXML4JException, XmlException {
+ StringBuffer text = new StringBuffer();
+ org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties
+ props = document.getProperties().getExtendedProperties().getUnderlyingProperties();
+
+ text.append("Application = " + props.getApplication() + "\n");
+ text.append("AppVersion = " + props.getAppVersion() + "\n");
+ text.append("Characters = " + props.getCharacters() + "\n");
+ text.append("CharactersWithSpaces = " + props.getCharactersWithSpaces() + "\n");
+ text.append("Company = " + props.getCompany() + "\n");
+ text.append("HyperlinkBase = " + props.getHyperlinkBase() + "\n");
+ text.append("HyperlinksChanged = " + props.getHyperlinksChanged() + "\n");
+ text.append("Lines = " + props.getLines() + "\n");
+ text.append("LinksUpToDate = " + props.getLinksUpToDate() + "\n");
+ text.append("Manager = " + props.getManager() + "\n");
+ text.append("Pages = " + props.getPages() + "\n");
+ text.append("Paragraphs = " + props.getParagraphs() + "\n");
+ text.append("PresentationFormat = " + props.getPresentationFormat() + "\n");
+ text.append("Template = " + props.getTemplate() + "\n");
+ text.append("TotalTime = " + props.getTotalTime() + "\n");
+
+ return text.toString();
+ }
+ /**
+ * Returns the custom document properties, if
+ * there are any
+ */
+ public String getCustomPropertiesText() throws IOException, OpenXML4JException, XmlException {
+ StringBuffer text = new StringBuffer();
+ org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties
+ props = document.getProperties().getCustomProperties().getUnderlyingProperties();
+
+ CTProperty[] properties = props.getPropertyArray();
+ for(int i = 0; i<properties.length; i++) {
+ // TODO - finish off
+ String val = "(not implemented!)";
+
+ text.append(
+ properties[i].getName() +
+ " = " + val + "\n"
+ );
+ }
+
+ return text.toString();
+ }
+
+ public String getText() {
+ try {
+ return
+ getCorePropertiesText() +
+ getExtendedPropertiesText() +
+ getCustomPropertiesText();
+ } catch(Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ public POITextExtractor getMetadataTextExtractor() {
+ throw new IllegalStateException("You already have the Metadata Text Extractor, not recursing!");
+ }
+}
diff --git a/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java b/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java
index 7f09e5e990..7c04aa079f 100644
--- a/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java
+++ b/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java
@@ -47,6 +47,12 @@ public abstract class POIXMLTextExtractor extends POITextExtractor {
public ExtendedProperties getExtendedProperties() throws IOException, OpenXML4JException, XmlException {
return document.getProperties().getExtendedProperties();
}
+ /**
+ * Returns the custom document properties
+ */
+ public CustomProperties getCustomProperties() throws IOException, OpenXML4JException, XmlException {
+ return document.getProperties().getCustomProperties();
+ }
/**
* Returns opened document
diff --git a/src/ooxml/testcases/org/apache/poi/TestXMLPropertiesTextExtractor.java b/src/ooxml/testcases/org/apache/poi/TestXMLPropertiesTextExtractor.java
new file mode 100644
index 0000000000..a8cf7ea992
--- /dev/null
+++ b/src/ooxml/testcases/org/apache/poi/TestXMLPropertiesTextExtractor.java
@@ -0,0 +1,74 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi;
+
+import java.io.File;
+
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.openxml4j.opc.Package;
+
+import junit.framework.TestCase;
+
+public class TestXMLPropertiesTextExtractor extends TestCase {
+ private String dirname;
+
+ protected void setUp() throws Exception {
+ dirname = System.getProperty("OOXML.testdata.path");
+ assertTrue( (new File(dirname)).exists() );
+ }
+
+ public void testCore() throws Exception {
+ org.openxml4j.opc.Package pkg = Package.open(
+ (new File(dirname, "ExcelWithAttachments.xlsx")).toString()
+ );
+ XSSFWorkbook wb = new XSSFWorkbook(pkg);
+
+ POIXMLPropertiesTextExtractor ext = new POIXMLPropertiesTextExtractor(wb);
+ ext.getText();
+
+ // Now check
+ String text = ext.getText();
+ String cText = ext.getCorePropertiesText();
+
+ assertTrue(text.contains("LastModifiedBy = Yury Batrakov"));
+ assertTrue(cText.contains("LastModifiedBy = Yury Batrakov"));
+ }
+
+ public void testExtended() throws Exception {
+ org.openxml4j.opc.Package pkg = Package.open(
+ (new File(dirname, "ExcelWithAttachments.xlsx")).toString()
+ );
+ XSSFWorkbook wb = new XSSFWorkbook(pkg);
+
+ POIXMLPropertiesTextExtractor ext = new POIXMLPropertiesTextExtractor(wb);
+ ext.getText();
+
+ // Now check
+ String text = ext.getText();
+ String eText = ext.getExtendedPropertiesText();
+ System.out.println(eText);
+
+ assertTrue(text.contains("Application = Microsoft Excel"));
+ assertTrue(text.contains("Company = Mera"));
+ assertTrue(eText.contains("Application = Microsoft Excel"));
+ assertTrue(eText.contains("Company = Mera"));
+ }
+
+ public void testCustom() throws Exception {
+ // TODO!
+ }
+}