aboutsummaryrefslogtreecommitdiffstats
path: root/src/java/org/apache/poi
diff options
context:
space:
mode:
authorNick Burch <nick@apache.org>2008-08-12 18:44:50 +0000
committerNick Burch <nick@apache.org>2008-08-12 18:44:50 +0000
commit67de5e70b49cb58017bffef33a8c3075d2698911 (patch)
treed692c7384570f1246f44dfc0f8f41bf5a93e1f23 /src/java/org/apache/poi
parent13d89f642c5bda25e447970b2f5ca8b7790cb727 (diff)
downloadpoi-67de5e70b49cb58017bffef33a8c3075d2698911.tar.gz
poi-67de5e70b49cb58017bffef33a8c3075d2698911.zip
New HPSF based TextExtractor for document metadata, org.apache.poi.hpsf.extractor.HPFSPropertiesExtractor
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@685260 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/java/org/apache/poi')
-rw-r--r--src/java/org/apache/poi/POITextExtractor.java8
-rw-r--r--src/java/org/apache/poi/hpsf/CustomProperties.java13
-rw-r--r--src/java/org/apache/poi/hpsf/DocumentSummaryInformation.java3
-rw-r--r--src/java/org/apache/poi/hpsf/SpecialPropertySet.java6
-rw-r--r--src/java/org/apache/poi/hpsf/SummaryInformation.java3
-rw-r--r--src/java/org/apache/poi/hpsf/extractor/HPFSPropertiesExtractor.java144
6 files changed, 176 insertions, 1 deletions
diff --git a/src/java/org/apache/poi/POITextExtractor.java b/src/java/org/apache/poi/POITextExtractor.java
index 3ba71880eb..a7ffd44197 100644
--- a/src/java/org/apache/poi/POITextExtractor.java
+++ b/src/java/org/apache/poi/POITextExtractor.java
@@ -37,6 +37,14 @@ public abstract class POITextExtractor {
public POITextExtractor(POIDocument document) {
this.document = document;
}
+ /**
+ * Creates a new text extractor, using the same
+ * document as another text extractor. Normally
+ * only used by properties extractors.
+ */
+ protected POITextExtractor(POITextExtractor otherExtractor) {
+ this.document = otherExtractor.document;
+ }
/**
* Retrieves all the text from the document.
diff --git a/src/java/org/apache/poi/hpsf/CustomProperties.java b/src/java/org/apache/poi/hpsf/CustomProperties.java
index 24b19e5d04..420fc2f9bb 100644
--- a/src/java/org/apache/poi/hpsf/CustomProperties.java
+++ b/src/java/org/apache/poi/hpsf/CustomProperties.java
@@ -21,6 +21,7 @@ import java.util.Date;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
+import java.util.Set;
import org.apache.poi.hpsf.wellknown.PropertyIDMap;
@@ -293,8 +294,18 @@ public class CustomProperties extends HashMap
final CustomProperty cp = new CustomProperty(p, name);
return put(cp);
}
-
+
/**
+ * Returns a set of all the names of our
+ * custom properties
+ */
+ public Set keySet() {
+ return dictionaryNameToID.keySet();
+ }
+
+
+
+ /**
* <p>Sets the codepage.</p>
*
* @param codepage the codepage
diff --git a/src/java/org/apache/poi/hpsf/DocumentSummaryInformation.java b/src/java/org/apache/poi/hpsf/DocumentSummaryInformation.java
index b7a7c9ae6d..62c6127ee4 100644
--- a/src/java/org/apache/poi/hpsf/DocumentSummaryInformation.java
+++ b/src/java/org/apache/poi/hpsf/DocumentSummaryInformation.java
@@ -45,6 +45,9 @@ public class DocumentSummaryInformation extends SpecialPropertySet
public static final String DEFAULT_STREAM_NAME =
"\005DocumentSummaryInformation";
+ public PropertyIDMap getPropertySetIDMap() {
+ return PropertyIDMap.getDocumentSummaryInformationProperties();
+ }
/**
diff --git a/src/java/org/apache/poi/hpsf/SpecialPropertySet.java b/src/java/org/apache/poi/hpsf/SpecialPropertySet.java
index 6a02bbc188..f415bd5d12 100644
--- a/src/java/org/apache/poi/hpsf/SpecialPropertySet.java
+++ b/src/java/org/apache/poi/hpsf/SpecialPropertySet.java
@@ -22,6 +22,7 @@ import java.io.InputStream;
import java.io.OutputStream;
import java.util.List;
+import org.apache.poi.hpsf.wellknown.PropertyIDMap;
import org.apache.poi.poifs.filesystem.DirectoryEntry;
/**
@@ -57,6 +58,11 @@ import org.apache.poi.poifs.filesystem.DirectoryEntry;
*/
public abstract class SpecialPropertySet extends MutablePropertySet
{
+ /**
+ * The id to name mapping of the properties
+ * in this set.
+ */
+ public abstract PropertyIDMap getPropertySetIDMap();
/**
* <p>The "real" property set <code>SpecialPropertySet</code>
diff --git a/src/java/org/apache/poi/hpsf/SummaryInformation.java b/src/java/org/apache/poi/hpsf/SummaryInformation.java
index 66d9ce0937..a143e2bad0 100644
--- a/src/java/org/apache/poi/hpsf/SummaryInformation.java
+++ b/src/java/org/apache/poi/hpsf/SummaryInformation.java
@@ -40,6 +40,9 @@ public class SummaryInformation extends SpecialPropertySet
*/
public static final String DEFAULT_STREAM_NAME = "\005SummaryInformation";
+ public PropertyIDMap getPropertySetIDMap() {
+ return PropertyIDMap.getSummaryInformationProperties();
+ }
/**
diff --git a/src/java/org/apache/poi/hpsf/extractor/HPFSPropertiesExtractor.java b/src/java/org/apache/poi/hpsf/extractor/HPFSPropertiesExtractor.java
new file mode 100644
index 0000000000..c85f1bb04c
--- /dev/null
+++ b/src/java/org/apache/poi/hpsf/extractor/HPFSPropertiesExtractor.java
@@ -0,0 +1,144 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.hpsf.extractor;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.Iterator;
+
+import org.apache.poi.POIDocument;
+import org.apache.poi.POITextExtractor;
+import org.apache.poi.hpsf.CustomProperties;
+import org.apache.poi.hpsf.DocumentSummaryInformation;
+import org.apache.poi.hpsf.Property;
+import org.apache.poi.hpsf.SpecialPropertySet;
+import org.apache.poi.hpsf.SummaryInformation;
+import org.apache.poi.hpsf.wellknown.PropertyIDMap;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.util.LittleEndian;
+
+/**
+ * Extracts all of the HPSF properties, both
+ * build in and custom, returning them in
+ * textual form.
+ */
+public class HPFSPropertiesExtractor extends POITextExtractor {
+ public HPFSPropertiesExtractor(POITextExtractor mainExtractor) {
+ super(mainExtractor);
+ }
+ public HPFSPropertiesExtractor(POIDocument doc) {
+ super(doc);
+ }
+ public HPFSPropertiesExtractor(POIFSFileSystem fs) {
+ super(new PropertiesOnlyDocument(fs));
+ }
+
+ public String getDocumentSummaryInformationText() {
+ DocumentSummaryInformation dsi = document.getDocumentSummaryInformation();
+ StringBuffer text = new StringBuffer();
+
+ // Normal properties
+ text.append( getPropertiesText(dsi) );
+
+ // Now custom ones
+ CustomProperties cps = dsi.getCustomProperties();
+ Iterator keys = cps.keySet().iterator();
+ while(keys.hasNext()) {
+ String key = (String)keys.next();
+ String val = getPropertyValueText( cps.get(key) );
+ text.append(key + " = " + val + "\n");
+ }
+
+ // All done
+ return text.toString();
+ }
+ public String getSummaryInformationText() {
+ SummaryInformation si = document.getSummaryInformation();
+
+ // Just normal properties
+ return getPropertiesText(si);
+ }
+
+ private static String getPropertiesText(SpecialPropertySet ps) {
+ if(ps == null) {
+ // Not defined, oh well
+ return "";
+ }
+
+ StringBuffer text = new StringBuffer();
+
+ PropertyIDMap idMap = ps.getPropertySetIDMap();
+ Property[] props = ps.getProperties();
+ for(int i=0; i<props.length; i++) {
+ String type = Long.toString( props[i].getID() );
+ Object typeObj = idMap.get(props[i].getID());
+ if(typeObj != null) {
+ type = typeObj.toString();
+ }
+
+ String val = getPropertyValueText( props[i].getValue() );
+ text.append(type + " = " + val + "\n");
+ }
+
+ return text.toString();
+ }
+ private static String getPropertyValueText(Object val) {
+ if(val == null) {
+ return "(not set)";
+ }
+ if(val instanceof byte[]) {
+ byte[] b = (byte[])val;
+ if(b.length == 0) {
+ return "";
+ }
+ if(b.length == 1) {
+ return Byte.toString(b[0]);
+ }
+ if(b.length == 2) {
+ return Integer.toString( LittleEndian.getUShort(b) );
+ }
+ if(b.length == 4) {
+ return Long.toString( LittleEndian.getUInt(b) );
+ }
+ // Maybe it's a string? who knows!
+ return new String(b);
+ }
+ return val.toString();
+ }
+
+ /**
+ * Return the text of all the properties defined in
+ * the document.
+ */
+ public String getText() {
+ return getSummaryInformationText() + getDocumentSummaryInformationText();
+ }
+
+ /**
+ * So we can get at the properties of any
+ * random OLE2 document.
+ */
+ private static class PropertiesOnlyDocument extends POIDocument {
+ private PropertiesOnlyDocument(POIFSFileSystem fs) {
+ super(fs);
+ }
+
+ public void write(OutputStream out) throws IOException {
+ throw new IllegalStateException("Unable to write, only for properties!");
+ }
+ }
+}