]> source.dussan.org Git - poi.git/commitdiff
Add POIXMLPropertiesTextExtractor, which provides to the OOXML file formats a similar...
authorNick Burch <nick@apache.org>
Tue, 12 Aug 2008 20:58:31 +0000 (20:58 +0000)
committerNick Burch <nick@apache.org>
Tue, 12 Aug 2008 20:58:31 +0000 (20:58 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@685315 13f79535-47bb-0310-9956-ffa450edef68

src/documentation/content/xdocs/changes.xml
src/documentation/content/xdocs/status.xml
src/ooxml/java/org/apache/poi/POIXMLDocument.java
src/ooxml/java/org/apache/poi/POIXMLProperties.java
src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java [new file with mode: 0644]
src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java
src/ooxml/testcases/org/apache/poi/TestXMLPropertiesTextExtractor.java [new file with mode: 0644]

index 020c6c9602c7c5efffd2900302b687f10bd07c54..04547c7a91799c2c93472720bc8d8ee6070d022d 100644 (file)
@@ -37,6 +37,7 @@
 
                <!-- Don't forget to update status.xml too! -->
         <release version="3.5.1-beta2" date="2008-??-??">
+           <action dev="POI-DEVELOPERS" type="add">Add POIXMLPropertiesTextExtractor, which provides to the OOXML file formats a similar function to HPSF's HPSFPropertiesExtractor</action>
            <action dev="POI-DEVELOPERS" type="add">45539 - Improve XWPFWordExtractor to extract headers and footers</action>
            <action dev="POI-DEVELOPERS" type="fix">Improve how XWPF handles paragraph text</action>
            <action dev="POI-DEVELOPERS" type="add">Support in XWPF handles headers and footers</action>
index 998263d8dd73c9062169e26a4ff3a3ec65991ab8..a75dc2837a8d9093e18dc275a877b7884b8cb089 100644 (file)
@@ -34,6 +34,7 @@
        <!-- Don't forget to update changes.xml too! -->
     <changes>
         <release version="3.5.1-beta2" date="2008-??-??">
+           <action dev="POI-DEVELOPERS" type="add">Add POIXMLPropertiesTextExtractor, which provides to the OOXML file formats a similar function to HPSF's HPSFPropertiesExtractor</action>
            <action dev="POI-DEVELOPERS" type="add">45539 - Improve XWPFWordExtractor to extract headers and footers</action>
            <action dev="POI-DEVELOPERS" type="fix">Improve how XWPF handles paragraph text</action>
            <action dev="POI-DEVELOPERS" type="add">Support in XWPF handles headers and footers</action>
index 1f61a5cdcf86c1516e4cac101dfa85da03991fd8..500d09a8292b97fe75acafe1e0b330e2008df80a 100644 (file)
@@ -38,8 +38,8 @@ import org.openxml4j.opc.PackagingURIHelper;
 public abstract class POIXMLDocument {
 
     public static final String CORE_PROPERTIES_REL_TYPE = "http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties";
-    
     public static final String EXTENDED_PROPERTIES_REL_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/extended-properties";
+    public static final String CUSTOM_PROPERTIES_REL_TYPE = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/custom-properties";
     
     // OLE embeddings relation name
     public static final String OLE_OBJECT_REL_TYPE="http://schemas.openxmlformats.org/officeDocument/2006/relationships/oleObject";
index 7806c9b78151b2c6cec414e27656b16b83fa7f5d..894f2f800d32d7c0c0d1545293f4fe8aa8c89901 100644 (file)
@@ -23,8 +23,6 @@ import org.openxml4j.exceptions.OpenXML4JException;
 import org.openxml4j.opc.Package;
 import org.openxml4j.opc.PackageRelationshipCollection;
 import org.openxml4j.opc.internal.PackagePropertiesPart;
-import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties;
-import org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument;
 
 /**
  * Wrapper around the two different kinds of OOXML properties 
@@ -34,6 +32,7 @@ public class POIXMLProperties {
        private Package pkg;
        private CoreProperties core;
        private ExtendedProperties ext;
+       private CustomProperties cust;
        
        public POIXMLProperties(Package docPackage) throws IOException, OpenXML4JException, XmlException {
                this.pkg = docPackage;
@@ -52,12 +51,24 @@ public class POIXMLProperties {
                PackageRelationshipCollection extRel =
                        pkg.getRelationshipsByType(POIXMLDocument.EXTENDED_PROPERTIES_REL_TYPE);
                if(extRel.size() == 1) {
-                       PropertiesDocument props = PropertiesDocument.Factory.parse(
+                       org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props = org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument.Factory.parse(
                                        pkg.getPart( extRel.getRelationship(0) ).getInputStream()
                        );
                        ext = new ExtendedProperties(props);
                } else {
-                       ext = new ExtendedProperties(PropertiesDocument.Factory.newInstance());
+                       ext = new ExtendedProperties(org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument.Factory.newInstance());
+               }
+               
+               // Custom properties
+               PackageRelationshipCollection custRel =
+                       pkg.getRelationshipsByType(POIXMLDocument.CUSTOM_PROPERTIES_REL_TYPE);
+               if(custRel.size() == 1) {
+                       org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props = org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument.Factory.parse(
+                                       pkg.getPart( custRel.getRelationship(0) ).getInputStream()
+                       );
+                       cust = new CustomProperties(props);
+               } else {
+                       cust = new CustomProperties(org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument.Factory.newInstance());
                }
        }
        
@@ -75,6 +86,13 @@ public class POIXMLProperties {
                return ext;
        }
        
+       /**
+        * Returns the custom document properties
+        */
+       public CustomProperties getCustomProperties() {
+               return cust;
+       }
+       
        /**
         * Writes out the ooxml properties into the supplied,
         *  new Package
@@ -108,8 +126,26 @@ public class POIXMLProperties {
         * Extended document properties
         */
        public class ExtendedProperties {
-               private PropertiesDocument props;
-               private ExtendedProperties(PropertiesDocument props) {
+               private org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props;
+               private ExtendedProperties(org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.PropertiesDocument props) {
+                       this.props = props;
+                       
+                       if(props.getProperties() == null) {
+                               props.addNewProperties();
+                       }
+               }
+               
+               public org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties getUnderlyingProperties() {
+                       return props.getProperties();
+               }
+       }
+       
+       /**
+        * Custom document properties
+        */
+       public class CustomProperties {
+               private org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props;
+               private CustomProperties(org.openxmlformats.schemas.officeDocument.x2006.customProperties.PropertiesDocument props) {
                        this.props = props;
                        
                        if(props.getProperties() == null) {
@@ -117,7 +153,7 @@ public class POIXMLProperties {
                        }
                }
                
-               public CTProperties getUnderlyingProperties() {
+               public org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties getUnderlyingProperties() {
                        return props.getProperties();
                }
        }
diff --git a/src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java b/src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java
new file mode 100644 (file)
index 0000000..455b8ab
--- /dev/null
@@ -0,0 +1,142 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi;
+
+import java.io.IOException;
+
+import org.apache.xmlbeans.XmlException;
+import org.openxml4j.exceptions.OpenXML4JException;
+import org.openxml4j.opc.internal.PackagePropertiesPart;
+import org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperty;
+
+/**
+ * A {@link POITextExtractor} for returning the textual
+ *  content of the OOXML file properties, eg author
+ *  and title. 
+ */
+public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor {
+       /**
+        * Creates a new POIXMLPropertiesTextExtractor for the
+        *  given open document.
+        */
+       public POIXMLPropertiesTextExtractor(POIXMLDocument doc) {
+               super(doc);
+       }
+       /**
+        * Creates a new POIXMLPropertiesTextExtractor, for the
+        *  same file that another TextExtractor is already
+        *  working on.
+        */
+       public POIXMLPropertiesTextExtractor(POIXMLTextExtractor otherExtractor) {
+               super(otherExtractor.document);
+       }
+       
+       /**
+        * Returns the core document properties, eg author
+        */
+       public String getCorePropertiesText() throws IOException, OpenXML4JException, XmlException {
+               StringBuffer text = new StringBuffer();
+               PackagePropertiesPart props =
+                       document.getProperties().getCoreProperties().getUnderlyingProperties();
+               
+               text.append("Category = " + props.getCategoryProperty().getValue() + "\n");
+               text.append("ContentStatus = " + props.getContentStatusProperty().getValue() + "\n");
+               text.append("ContentType = " + props.getContentTypeProperty().getValue() + "\n");
+               text.append("Created = " + props.getCreatedProperty().getValue() + "\n");
+               text.append("CreatedString = " + props.getCreatedPropertyString() + "\n");
+               text.append("Creator = " + props.getCreatorProperty().getValue() + "\n");
+               text.append("Description = " + props.getDescriptionProperty().getValue() + "\n");
+               text.append("Identifier = " + props.getIdentifierProperty().getValue() + "\n");
+               text.append("Keywords = " + props.getKeywordsProperty().getValue() + "\n");
+               text.append("Language = " + props.getLanguageProperty().getValue() + "\n");
+               text.append("LastModifiedBy = " + props.getLastModifiedByProperty().getValue() + "\n");
+               text.append("LastPrinted = " + props.getLastPrintedProperty().getValue() + "\n");
+               text.append("LastPrintedString = " + props.getLastPrintedPropertyString() + "\n");
+               text.append("Modified = " + props.getModifiedProperty().getValue() + "\n");
+               text.append("ModifiedString = " + props.getModifiedPropertyString() + "\n");
+               text.append("Revision = " + props.getRevisionProperty().getValue() + "\n");
+               text.append("Subject = " + props.getSubjectProperty().getValue() + "\n");
+               text.append("Title = " + props.getTitleProperty().getValue() + "\n");
+               text.append("Version = " + props.getVersionProperty().getValue() + "\n");
+
+               return text.toString();
+       }
+       /**
+        * Returns the extended document properties, eg
+        *  application
+        */
+       public String getExtendedPropertiesText() throws IOException, OpenXML4JException, XmlException {
+               StringBuffer text = new StringBuffer();
+               org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties
+                       props = document.getProperties().getExtendedProperties().getUnderlyingProperties();
+
+               text.append("Application = " + props.getApplication() + "\n");
+               text.append("AppVersion = " + props.getAppVersion() + "\n");
+               text.append("Characters = " + props.getCharacters() + "\n");
+               text.append("CharactersWithSpaces = " + props.getCharactersWithSpaces() + "\n");
+               text.append("Company = " + props.getCompany() + "\n");
+               text.append("HyperlinkBase = " + props.getHyperlinkBase() + "\n");
+               text.append("HyperlinksChanged = " + props.getHyperlinksChanged() + "\n");
+               text.append("Lines = " + props.getLines() + "\n");
+               text.append("LinksUpToDate = " + props.getLinksUpToDate() + "\n");
+               text.append("Manager = " + props.getManager() + "\n");
+               text.append("Pages = " + props.getPages() + "\n");
+               text.append("Paragraphs = " + props.getParagraphs() + "\n");
+               text.append("PresentationFormat = " + props.getPresentationFormat() + "\n");
+               text.append("Template = " + props.getTemplate() + "\n");
+               text.append("TotalTime = " + props.getTotalTime() + "\n");
+               
+               return text.toString();
+       }
+       /**
+        * Returns the custom document properties, if 
+        *  there are any
+        */
+       public String getCustomPropertiesText() throws IOException, OpenXML4JException, XmlException {
+               StringBuffer text = new StringBuffer();
+               org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties
+                       props = document.getProperties().getCustomProperties().getUnderlyingProperties();
+               
+               CTProperty[] properties = props.getPropertyArray();
+               for(int i = 0; i<properties.length; i++) {
+                       // TODO - finish off
+                       String val = "(not implemented!)";
+                       
+                       text.append(
+                                       properties[i].getName() +
+                                       " = " + val + "\n"
+                       );
+               }
+               
+               return text.toString();
+       }
+
+       public String getText() {
+               try {
+                       return 
+                               getCorePropertiesText() + 
+                               getExtendedPropertiesText() +
+                               getCustomPropertiesText();
+               } catch(Exception e) {
+                       throw new RuntimeException(e);
+               }
+       }
+
+       public POITextExtractor getMetadataTextExtractor() {
+               throw new IllegalStateException("You already have the Metadata Text Extractor, not recursing!");
+       }
+}
index 7f09e5e99057f8c19d602823466306df30303e51..7c04aa079fb7c905bf7c4670892940f251de9d39 100644 (file)
@@ -47,6 +47,12 @@ public abstract class POIXMLTextExtractor extends POITextExtractor {
        public ExtendedProperties getExtendedProperties() throws IOException, OpenXML4JException, XmlException {
                return document.getProperties().getExtendedProperties();
        }
+       /**
+        * Returns the custom document properties
+        */
+       public CustomProperties getCustomProperties() throws IOException, OpenXML4JException, XmlException {
+               return document.getProperties().getCustomProperties();
+       }
 
        /**
         * Returns opened document 
diff --git a/src/ooxml/testcases/org/apache/poi/TestXMLPropertiesTextExtractor.java b/src/ooxml/testcases/org/apache/poi/TestXMLPropertiesTextExtractor.java
new file mode 100644 (file)
index 0000000..a8cf7ea
--- /dev/null
@@ -0,0 +1,74 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi;
+
+import java.io.File;
+
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.openxml4j.opc.Package;
+
+import junit.framework.TestCase;
+
+public class TestXMLPropertiesTextExtractor extends TestCase {
+       private String dirname;
+       
+       protected void setUp() throws Exception {
+               dirname = System.getProperty("OOXML.testdata.path");
+               assertTrue( (new File(dirname)).exists() );
+       }
+
+       public void testCore() throws Exception {
+               org.openxml4j.opc.Package pkg = Package.open(
+                               (new File(dirname, "ExcelWithAttachments.xlsx")).toString()
+               );
+               XSSFWorkbook wb = new XSSFWorkbook(pkg);
+               
+               POIXMLPropertiesTextExtractor ext = new POIXMLPropertiesTextExtractor(wb);
+               ext.getText();
+               
+               // Now check
+               String text = ext.getText();
+               String cText = ext.getCorePropertiesText();
+               
+               assertTrue(text.contains("LastModifiedBy = Yury Batrakov"));
+               assertTrue(cText.contains("LastModifiedBy = Yury Batrakov"));
+       }
+       
+       public void testExtended() throws Exception {
+               org.openxml4j.opc.Package pkg = Package.open(
+                               (new File(dirname, "ExcelWithAttachments.xlsx")).toString()
+               );
+               XSSFWorkbook wb = new XSSFWorkbook(pkg);
+               
+               POIXMLPropertiesTextExtractor ext = new POIXMLPropertiesTextExtractor(wb);
+               ext.getText();
+               
+               // Now check
+               String text = ext.getText();
+               String eText = ext.getExtendedPropertiesText();
+               System.out.println(eText);
+               
+               assertTrue(text.contains("Application = Microsoft Excel"));
+               assertTrue(text.contains("Company = Mera"));
+               assertTrue(eText.contains("Application = Microsoft Excel"));
+               assertTrue(eText.contains("Company = Mera"));
+       }
+       
+       public void testCustom() throws Exception {
+               // TODO!
+       }
+}