From 8f94b59e7ef22d7d56f124179828cbe3e8fe96bd Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Tue, 12 Aug 2008 20:58:31 +0000 Subject: [PATCH] Add POIXMLPropertiesTextExtractor, which provides to the OOXML file formats a similar function to HPSF's HPSFPropertiesExtractor git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@685315 13f79535-47bb-0310-9956-ffa450edef68 --- src/documentation/content/xdocs/changes.xml | 1 + src/documentation/content/xdocs/status.xml | 1 + .../java/org/apache/poi/POIXMLDocument.java | 2 +- .../java/org/apache/poi/POIXMLProperties.java | 50 +++++- .../poi/POIXMLPropertiesTextExtractor.java | 142 ++++++++++++++++++ .../org/apache/poi/POIXMLTextExtractor.java | 6 + .../poi/TestXMLPropertiesTextExtractor.java | 74 +++++++++ 7 files changed, 268 insertions(+), 8 deletions(-) create mode 100644 src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java create mode 100644 src/ooxml/testcases/org/apache/poi/TestXMLPropertiesTextExtractor.java diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index 020c6c9602..04547c7a91 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -37,6 +37,7 @@ + Add POIXMLPropertiesTextExtractor, which provides to the OOXML file formats a similar function to HPSF's HPSFPropertiesExtractor 45539 - Improve XWPFWordExtractor to extract headers and footers Improve how XWPF handles paragraph text Support in XWPF handles headers and footers diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 998263d8dd..a75dc2837a 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + Add POIXMLPropertiesTextExtractor, which provides to the OOXML file formats a similar function to HPSF's HPSFPropertiesExtractor 45539 - Improve XWPFWordExtractor to extract headers and footers Improve how XWPF handles paragraph text Support in XWPF handles headers and footers diff --git a/src/ooxml/java/org/apache/poi/POIXMLDocument.java b/src/ooxml/java/org/apache/poi/POIXMLDocument.java index 1f61a5cdcf..500d09a829 100644 --- a/src/ooxml/java/org/apache/poi/POIXMLDocument.java +++ b/src/ooxml/java/org/apache/poi/POIXMLDocument.java @@ -38,8 +38,8 @@ import org.openxml4j.opc.PackagingURIHelper; public abstract class POIXMLDocument { public static final String CORE_PROPERTIES_REL_TYPE = "http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties"; - public static final String EXTENDED_PROPERTIES_REL_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties"; + public static final String CUSTOM_PROPERTIES_REL_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties"; // OLE embeddings relation name public static final String OLE_OBJECT_REL_TYPE="http://schemas.openxmlformats.org/officeDocument/2006/relationships/oleObject"; diff --git a/src/ooxml/java/org/apache/poi/POIXMLProperties.java b/src/ooxml/java/org/apache/poi/POIXMLProperties.java index 7806c9b781..894f2f800d 100644 --- a/src/ooxml/java/org/apache/poi/POIXMLProperties.java +++ b/src/ooxml/java/org/apache/poi/POIXMLProperties.java @@ -23,8 +23,6 @@ import org.openxml4j.exceptions.OpenXML4JException; import org.openxml4j.opc.Package; import org.openxml4j.opc.PackageRelationshipCollection; import org.openxml4j.opc.internal.PackagePropertiesPart; -import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties; -import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument; /** * Wrapper around the two different kinds of OOXML properties @@ -34,6 +32,7 @@ public class POIXMLProperties { private Package pkg; private CoreProperties core; private ExtendedProperties ext; + private CustomProperties cust; public POIXMLProperties(Package docPackage) throws IOException, OpenXML4JException, XmlException { this.pkg = docPackage; @@ -52,12 +51,24 @@ public class POIXMLProperties { PackageRelationshipCollection extRel = pkg.getRelationshipsByType(POIXMLDocument.EXTENDED_PROPERTIES_REL_TYPE); if(extRel.size() == 1) { - PropertiesDocument props = PropertiesDocument.Factory.parse( + org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props = org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument.Factory.parse( pkg.getPart( extRel.getRelationship(0) ).getInputStream() ); ext = new ExtendedProperties(props); } else { - ext = new ExtendedProperties(PropertiesDocument.Factory.newInstance()); + ext = new ExtendedProperties(org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument.Factory.newInstance()); + } + + // Custom properties + PackageRelationshipCollection custRel = + pkg.getRelationshipsByType(POIXMLDocument.CUSTOM_PROPERTIES_REL_TYPE); + if(custRel.size() == 1) { + org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props = org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument.Factory.parse( + pkg.getPart( custRel.getRelationship(0) ).getInputStream() + ); + cust = new CustomProperties(props); + } else { + cust = new CustomProperties(org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument.Factory.newInstance()); } } @@ -75,6 +86,13 @@ public class POIXMLProperties { return ext; } + /** + * Returns the custom document properties + */ + public CustomProperties getCustomProperties() { + return cust; + } + /** * Writes out the ooxml properties into the supplied, * new Package @@ -108,8 +126,26 @@ public class POIXMLProperties { * Extended document properties */ public class ExtendedProperties { - private PropertiesDocument props; - private ExtendedProperties(PropertiesDocument props) { + private org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props; + private ExtendedProperties(org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props) { + this.props = props; + + if(props.getProperties() == null) { + props.addNewProperties(); + } + } + + public org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties getUnderlyingProperties() { + return props.getProperties(); + } + } + + /** + * Custom document properties + */ + public class CustomProperties { + private org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props; + private CustomProperties(org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props) { this.props = props; if(props.getProperties() == null) { @@ -117,7 +153,7 @@ public class POIXMLProperties { } } - public CTProperties getUnderlyingProperties() { + public org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties getUnderlyingProperties() { return props.getProperties(); } } diff --git a/src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java b/src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java new file mode 100644 index 0000000000..455b8ab1bb --- /dev/null +++ b/src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java @@ -0,0 +1,142 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi; + +import java.io.IOException; + +import org.apache.xmlbeans.XmlException; +import org.openxml4j.exceptions.OpenXML4JException; +import org.openxml4j.opc.internal.PackagePropertiesPart; +import org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty; + +/** + * A {@link POITextExtractor} for returning the textual + * content of the OOXML file properties, eg author + * and title. + */ +public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor { + /** + * Creates a new POIXMLPropertiesTextExtractor for the + * given open document. + */ + public POIXMLPropertiesTextExtractor(POIXMLDocument doc) { + super(doc); + } + /** + * Creates a new POIXMLPropertiesTextExtractor, for the + * same file that another TextExtractor is already + * working on. + */ + public POIXMLPropertiesTextExtractor(POIXMLTextExtractor otherExtractor) { + super(otherExtractor.document); + } + + /** + * Returns the core document properties, eg author + */ + public String getCorePropertiesText() throws IOException, OpenXML4JException, XmlException { + StringBuffer text = new StringBuffer(); + PackagePropertiesPart props = + document.getProperties().getCoreProperties().getUnderlyingProperties(); + + text.append("Category = " + props.getCategoryProperty().getValue() + "\n"); + text.append("ContentStatus = " + props.getContentStatusProperty().getValue() + "\n"); + text.append("ContentType = " + props.getContentTypeProperty().getValue() + "\n"); + text.append("Created = " + props.getCreatedProperty().getValue() + "\n"); + text.append("CreatedString = " + props.getCreatedPropertyString() + "\n"); + text.append("Creator = " + props.getCreatorProperty().getValue() + "\n"); + text.append("Description = " + props.getDescriptionProperty().getValue() + "\n"); + text.append("Identifier = " + props.getIdentifierProperty().getValue() + "\n"); + text.append("Keywords = " + props.getKeywordsProperty().getValue() + "\n"); + text.append("Language = " + props.getLanguageProperty().getValue() + "\n"); + text.append("LastModifiedBy = " + props.getLastModifiedByProperty().getValue() + "\n"); + text.append("LastPrinted = " + props.getLastPrintedProperty().getValue() + "\n"); + text.append("LastPrintedString = " + props.getLastPrintedPropertyString() + "\n"); + text.append("Modified = " + props.getModifiedProperty().getValue() + "\n"); + text.append("ModifiedString = " + props.getModifiedPropertyString() + "\n"); + text.append("Revision = " + props.getRevisionProperty().getValue() + "\n"); + text.append("Subject = " + props.getSubjectProperty().getValue() + "\n"); + text.append("Title = " + props.getTitleProperty().getValue() + "\n"); + text.append("Version = " + props.getVersionProperty().getValue() + "\n"); + + return text.toString(); + } + /** + * Returns the extended document properties, eg + * application + */ + public String getExtendedPropertiesText() throws IOException, OpenXML4JException, XmlException { + StringBuffer text = new StringBuffer(); + org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties + props = document.getProperties().getExtendedProperties().getUnderlyingProperties(); + + text.append("Application = " + props.getApplication() + "\n"); + text.append("AppVersion = " + props.getAppVersion() + "\n"); + text.append("Characters = " + props.getCharacters() + "\n"); + text.append("CharactersWithSpaces = " + props.getCharactersWithSpaces() + "\n"); + text.append("Company = " + props.getCompany() + "\n"); + text.append("HyperlinkBase = " + props.getHyperlinkBase() + "\n"); + text.append("HyperlinksChanged = " + props.getHyperlinksChanged() + "\n"); + text.append("Lines = " + props.getLines() + "\n"); + text.append("LinksUpToDate = " + props.getLinksUpToDate() + "\n"); + text.append("Manager = " + props.getManager() + "\n"); + text.append("Pages = " + props.getPages() + "\n"); + text.append("Paragraphs = " + props.getParagraphs() + "\n"); + text.append("PresentationFormat = " + props.getPresentationFormat() + "\n"); + text.append("Template = " + props.getTemplate() + "\n"); + text.append("TotalTime = " + props.getTotalTime() + "\n"); + + return text.toString(); + } + /** + * Returns the custom document properties, if + * there are any + */ + public String getCustomPropertiesText() throws IOException, OpenXML4JException, XmlException { + StringBuffer text = new StringBuffer(); + org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties + props = document.getProperties().getCustomProperties().getUnderlyingProperties(); + + CTProperty[] properties = props.getPropertyArray(); + for(int i = 0; i