]> source.dussan.org Git - poi.git/commitdiff
Fix a typo in the file name, and add a generic method to POITextExtractor to get...
authorNick Burch <nick@apache.org>
Tue, 12 Aug 2008 19:02:41 +0000 (19:02 +0000)
committerNick Burch <nick@apache.org>
Tue, 12 Aug 2008 19:02:41 +0000 (19:02 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@685267 13f79535-47bb-0310-9956-ffa450edef68

src/documentation/content/xdocs/changes.xml
src/documentation/content/xdocs/hpsf/how-to.xml
src/documentation/content/xdocs/status.xml
src/java/org/apache/poi/POIOLE2TextExtractor.java
src/java/org/apache/poi/POITextExtractor.java
src/java/org/apache/poi/hpsf/extractor/HPFSPropertiesExtractor.java [deleted file]
src/java/org/apache/poi/hpsf/extractor/HPSFPropertiesExtractor.java [new file with mode: 0644]
src/testcases/org/apache/poi/hpsf/extractor/TestHPFSPropertiesExtractor.java [deleted file]
src/testcases/org/apache/poi/hpsf/extractor/TestHPSFPropertiesExtractor.java [new file with mode: 0644]

index 6a0cae2672ab48e11f0fd72189a942342ec1608e..b1cfff6b912520feb16dff69d78b31fec378c2f5 100644 (file)
@@ -37,7 +37,7 @@
 
                <!-- Don't forget to update status.xml too! -->
         <release version="3.1.1-alpha1" date="2008-??-??">
-           <action dev="POI-DEVELOPERS" type="add">New HPSF based TextExtractor for document metadata, org.apache.poi.hpsf.extractor.HPFSPropertiesExtractor</action>
+           <action dev="POI-DEVELOPERS" type="add">New HPSF based TextExtractor for document metadata, org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor</action>
            <action dev="POI-DEVELOPERS" type="fix">Properly update the array of Slide's text runs in HSLF when new text shapes are added</action>
            <action dev="POI-DEVELOPERS" type="fix">45590 - Fix for Header/footer extraction for .ppt files saved in Office 2007</action>
            <action dev="POI-DEVELOPERS" type="fix">Big improvement in how HWPF handles unicode text, and more sanity checking of text ranges within HWPF</action>
index aadf753a44f3678c0b61408e24351c519a9c1a29..964005bf244963385949d67b6a8e47fc814917bd 100644 (file)
@@ -95,7 +95,7 @@
     <p>If all you are interested in is getting the textual content of
      all the document properties, such as for full text indexing, then
      take a look at 
-     <code>org.apache.poi.hpsf.extractor.HPFSPropertiesExtractor</code>. However,
+     <code>org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor</code>. However,
      if you want full access to the properties, please read on!</p>
 
     <p>The first thing you should understand is that a Microsoft Office file is
index 0146f55e7928d07093e1c97c750b78ef4736da97..fbe242aa2ac22e71834c9e5d65d055d5a72bd43a 100644 (file)
@@ -34,7 +34,7 @@
        <!-- Don't forget to update changes.xml too! -->
     <changes>
         <release version="3.1.1-alpha1" date="2008-??-??">
-           <action dev="POI-DEVELOPERS" type="add">New HPSF based TextExtractor for document metadata, org.apache.poi.hpsf.extractor.HPFSPropertiesExtractor</action>
+           <action dev="POI-DEVELOPERS" type="add">New HPSF based TextExtractor for document metadata, org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor</action>
            <action dev="POI-DEVELOPERS" type="fix">Properly update the array of Slide's text runs in HSLF when new text shapes are added</action>
            <action dev="POI-DEVELOPERS" type="fix">45590 - Fix for Header/footer extraction for .ppt files saved in Office 2007</action>
            <action dev="POI-DEVELOPERS" type="fix">Big improvement in how HWPF handles unicode text, and more sanity checking of text ranges within HWPF</action>
index f5aee4cc6dc7a3ee6f59d666f39e6439915739e9..d46c7e4aadd893a74d42736493eedb6b9f8c6776 100644 (file)
@@ -18,6 +18,7 @@ package org.apache.poi;
 
 import org.apache.poi.hpsf.DocumentSummaryInformation;
 import org.apache.poi.hpsf.SummaryInformation;
+import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor;
 
 /**
  * Common Parent for OLE2 based Text Extractors
@@ -50,4 +51,12 @@ public abstract class POIOLE2TextExtractor extends POITextExtractor {
        public SummaryInformation getSummaryInformation() {
                return document.getSummaryInformation();
        }
+       
+       /**
+        * Returns an HPSF powered text extractor for the 
+        *  document properties metadata, such as title and author.
+        */
+       public POITextExtractor getMetadataTextExtractor() {
+               return new HPSFPropertiesExtractor(this);
+       }
 }
index a7ffd44197936dde97006093787d9588f4b6bea7..0b69894d0853c66fbc86992a45dedd6d1707c997 100644 (file)
@@ -54,4 +54,11 @@ public abstract class POITextExtractor {
         * @return All the text from the document
         */
        public abstract String getText();
+       
+       /**
+        * Returns another text extractor, which is able to
+        *  output the textual content of the document
+        *  metadata / properties, such as author and title.
+        */
+       public abstract POITextExtractor getMetadataTextExtractor();
 }
diff --git a/src/java/org/apache/poi/hpsf/extractor/HPFSPropertiesExtractor.java b/src/java/org/apache/poi/hpsf/extractor/HPFSPropertiesExtractor.java
deleted file mode 100644 (file)
index c85f1bb..0000000
+++ /dev/null
@@ -1,144 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi.hpsf.extractor;
-
-import java.io.IOException;
-import java.io.OutputStream;
-import java.util.Iterator;
-
-import org.apache.poi.POIDocument;
-import org.apache.poi.POITextExtractor;
-import org.apache.poi.hpsf.CustomProperties;
-import org.apache.poi.hpsf.DocumentSummaryInformation;
-import org.apache.poi.hpsf.Property;
-import org.apache.poi.hpsf.SpecialPropertySet;
-import org.apache.poi.hpsf.SummaryInformation;
-import org.apache.poi.hpsf.wellknown.PropertyIDMap;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-import org.apache.poi.util.LittleEndian;
-
-/**
- * Extracts all of the HPSF properties, both
- *  build in and custom, returning them in 
- *  textual form.
- */
-public class HPFSPropertiesExtractor extends POITextExtractor {
-       public HPFSPropertiesExtractor(POITextExtractor mainExtractor) {
-               super(mainExtractor);
-       }
-       public HPFSPropertiesExtractor(POIDocument doc) {
-               super(doc);
-       }
-       public HPFSPropertiesExtractor(POIFSFileSystem fs) {
-               super(new PropertiesOnlyDocument(fs));
-       }
-       
-       public String getDocumentSummaryInformationText() {
-               DocumentSummaryInformation dsi = document.getDocumentSummaryInformation();
-               StringBuffer text = new StringBuffer();
-
-               // Normal properties
-               text.append( getPropertiesText(dsi) );
-               
-               // Now custom ones
-               CustomProperties cps = dsi.getCustomProperties();
-               Iterator keys = cps.keySet().iterator();
-               while(keys.hasNext()) {
-                       String key = (String)keys.next();
-                       String val = getPropertyValueText( cps.get(key) );
-                       text.append(key + " = " + val + "\n");
-               }
-               
-               // All done
-               return text.toString();
-       }
-       public String getSummaryInformationText() {
-               SummaryInformation si = document.getSummaryInformation();
-               
-               // Just normal properties
-               return getPropertiesText(si);
-       }
-       
-       private static String getPropertiesText(SpecialPropertySet ps) {
-               if(ps == null) {
-                       // Not defined, oh well
-                       return "";
-               }
-               
-               StringBuffer text = new StringBuffer();
-               
-               PropertyIDMap idMap = ps.getPropertySetIDMap();
-               Property[] props = ps.getProperties();
-               for(int i=0; i<props.length; i++) {
-                       String type = Long.toString( props[i].getID() ); 
-                       Object typeObj = idMap.get(props[i].getID());
-                       if(typeObj != null) {
-                               type = typeObj.toString();
-                       }
-                       
-                       String val = getPropertyValueText( props[i].getValue() );
-                       text.append(type + " = " + val + "\n");
-               }
-               
-               return text.toString();
-       }
-       private static String getPropertyValueText(Object val) {
-               if(val == null) {
-                       return "(not set)";
-               }
-               if(val instanceof byte[]) {
-                       byte[] b = (byte[])val;
-                       if(b.length == 0) {
-                               return "";
-                       }
-                       if(b.length == 1) {
-                               return Byte.toString(b[0]);
-                       }
-                       if(b.length == 2) {
-                               return Integer.toString( LittleEndian.getUShort(b) );
-                       }
-                       if(b.length == 4) {
-                               return Long.toString( LittleEndian.getUInt(b) );
-                       }
-                       // Maybe it's a string? who knows!
-                       return new String(b);
-               }
-               return val.toString();
-       }
-
-       /**
-        * Return the text of all the properties defined in
-        *  the document.
-        */
-       public String getText() {
-               return getSummaryInformationText() + getDocumentSummaryInformationText();
-       }
-
-       /**
-        * So we can get at the properties of any 
-        *  random OLE2 document.
-        */
-       private static class PropertiesOnlyDocument extends POIDocument {
-               private PropertiesOnlyDocument(POIFSFileSystem fs) {
-                       super(fs);
-               }
-
-               public void write(OutputStream out) throws IOException {
-                       throw new IllegalStateException("Unable to write, only for properties!");
-               }
-       }
-}
diff --git a/src/java/org/apache/poi/hpsf/extractor/HPSFPropertiesExtractor.java b/src/java/org/apache/poi/hpsf/extractor/HPSFPropertiesExtractor.java
new file mode 100644 (file)
index 0000000..ecad5c0
--- /dev/null
@@ -0,0 +1,151 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.hpsf.extractor;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.util.Iterator;
+
+import org.apache.poi.POIDocument;
+import org.apache.poi.POITextExtractor;
+import org.apache.poi.hpsf.CustomProperties;
+import org.apache.poi.hpsf.DocumentSummaryInformation;
+import org.apache.poi.hpsf.Property;
+import org.apache.poi.hpsf.SpecialPropertySet;
+import org.apache.poi.hpsf.SummaryInformation;
+import org.apache.poi.hpsf.wellknown.PropertyIDMap;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.util.LittleEndian;
+
+/**
+ * Extracts all of the HPSF properties, both
+ *  build in and custom, returning them in 
+ *  textual form.
+ */
+public class HPSFPropertiesExtractor extends POITextExtractor {
+       public HPSFPropertiesExtractor(POITextExtractor mainExtractor) {
+               super(mainExtractor);
+       }
+       public HPSFPropertiesExtractor(POIDocument doc) {
+               super(doc);
+       }
+       public HPSFPropertiesExtractor(POIFSFileSystem fs) {
+               super(new PropertiesOnlyDocument(fs));
+       }
+       
+       public String getDocumentSummaryInformationText() {
+               DocumentSummaryInformation dsi = document.getDocumentSummaryInformation();
+               StringBuffer text = new StringBuffer();
+
+               // Normal properties
+               text.append( getPropertiesText(dsi) );
+               
+               // Now custom ones
+               CustomProperties cps = dsi.getCustomProperties();
+               Iterator keys = cps.keySet().iterator();
+               while(keys.hasNext()) {
+                       String key = (String)keys.next();
+                       String val = getPropertyValueText( cps.get(key) );
+                       text.append(key + " = " + val + "\n");
+               }
+               
+               // All done
+               return text.toString();
+       }
+       public String getSummaryInformationText() {
+               SummaryInformation si = document.getSummaryInformation();
+               
+               // Just normal properties
+               return getPropertiesText(si);
+       }
+       
+       private static String getPropertiesText(SpecialPropertySet ps) {
+               if(ps == null) {
+                       // Not defined, oh well
+                       return "";
+               }
+               
+               StringBuffer text = new StringBuffer();
+               
+               PropertyIDMap idMap = ps.getPropertySetIDMap();
+               Property[] props = ps.getProperties();
+               for(int i=0; i<props.length; i++) {
+                       String type = Long.toString( props[i].getID() ); 
+                       Object typeObj = idMap.get(props[i].getID());
+                       if(typeObj != null) {
+                               type = typeObj.toString();
+                       }
+                       
+                       String val = getPropertyValueText( props[i].getValue() );
+                       text.append(type + " = " + val + "\n");
+               }
+               
+               return text.toString();
+       }
+       private static String getPropertyValueText(Object val) {
+               if(val == null) {
+                       return "(not set)";
+               }
+               if(val instanceof byte[]) {
+                       byte[] b = (byte[])val;
+                       if(b.length == 0) {
+                               return "";
+                       }
+                       if(b.length == 1) {
+                               return Byte.toString(b[0]);
+                       }
+                       if(b.length == 2) {
+                               return Integer.toString( LittleEndian.getUShort(b) );
+                       }
+                       if(b.length == 4) {
+                               return Long.toString( LittleEndian.getUInt(b) );
+                       }
+                       // Maybe it's a string? who knows!
+                       return new String(b);
+               }
+               return val.toString();
+       }
+
+       /**
+        * Return the text of all the properties defined in
+        *  the document.
+        */
+       public String getText() {
+               return getSummaryInformationText() + getDocumentSummaryInformationText();
+       }
+       
+       /**
+        * Prevent recursion!
+        */
+       public POITextExtractor getMetadataTextExtractor() {
+               throw new IllegalStateException("You already have the Metadata Text Extractor, not recursing!");
+       }
+
+       /**
+        * So we can get at the properties of any 
+        *  random OLE2 document.
+        */
+       private static class PropertiesOnlyDocument extends POIDocument {
+               private PropertiesOnlyDocument(POIFSFileSystem fs) {
+                       super(fs);
+               }
+
+               public void write(OutputStream out) throws IOException {
+                       throw new IllegalStateException("Unable to write, only for properties!");
+               }
+       }
+}
diff --git a/src/testcases/org/apache/poi/hpsf/extractor/TestHPFSPropertiesExtractor.java b/src/testcases/org/apache/poi/hpsf/extractor/TestHPFSPropertiesExtractor.java
deleted file mode 100644 (file)
index 7d96787..0000000
+++ /dev/null
@@ -1,115 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi.hpsf.extractor;
-
-import java.io.File;
-import java.io.FileInputStream;
-
-import org.apache.poi.hssf.extractor.ExcelExtractor;
-import org.apache.poi.hssf.usermodel.HSSFWorkbook;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-
-import junit.framework.TestCase;
-
-public class TestHPFSPropertiesExtractor extends TestCase {
-       private String dir;
-       
-    protected void setUp() throws Exception {
-       dir = System.getProperty("HPSF.testdata.path");
-       assertNotNull("HPSF.testdata.path not set", dir);
-       }
-    
-       public void testNormalProperties() throws Exception {
-               POIFSFileSystem fs = new POIFSFileSystem(
-                               new FileInputStream(new File(dir, "TestMickey.doc"))
-               );
-               HPFSPropertiesExtractor ext = new HPFSPropertiesExtractor(fs);
-               ext.getText();
-               
-               // Check each bit in turn
-               String sinfText = ext.getSummaryInformationText();
-               String dinfText = ext.getDocumentSummaryInformationText();
-               
-               assertTrue(sinfText.indexOf("TEMPLATE = Normal") > -1);
-               assertTrue(sinfText.indexOf("SUBJECT = sample subject") > -1);
-               assertTrue(dinfText.indexOf("MANAGER = sample manager") > -1);
-               assertTrue(dinfText.indexOf("COMPANY = sample company") > -1);
-               
-               // Now overall
-               String text = ext.getText();
-               assertTrue(text.indexOf("TEMPLATE = Normal") > -1);
-               assertTrue(text.indexOf("SUBJECT = sample subject") > -1);
-               assertTrue(text.indexOf("MANAGER = sample manager") > -1);
-               assertTrue(text.indexOf("COMPANY = sample company") > -1);
-       }
-       public void testNormalUnicodeProperties() throws Exception {
-               POIFSFileSystem fs = new POIFSFileSystem(
-                               new FileInputStream(new File(dir, "TestUnicode.xls"))
-               );
-               HPFSPropertiesExtractor ext = new HPFSPropertiesExtractor(fs);
-               ext.getText();
-               
-               // Check each bit in turn
-               String sinfText = ext.getSummaryInformationText();
-               String dinfText = ext.getDocumentSummaryInformationText();
-               
-               assertTrue(sinfText.indexOf("AUTHOR = marshall") > -1);
-               assertTrue(sinfText.indexOf("TITLE = Titel: \u00c4h") > -1);
-               assertTrue(dinfText.indexOf("COMPANY = Schreiner") > -1);
-               assertTrue(dinfText.indexOf("SCALE = false") > -1);
-               
-               // Now overall
-               String text = ext.getText();
-               assertTrue(text.indexOf("AUTHOR = marshall") > -1);
-               assertTrue(text.indexOf("TITLE = Titel: \u00c4h") > -1);
-               assertTrue(text.indexOf("COMPANY = Schreiner") > -1);
-               assertTrue(text.indexOf("SCALE = false") > -1);
-       }
-       public void testCustomProperties() throws Exception {
-               POIFSFileSystem fs = new POIFSFileSystem(
-                               new FileInputStream(new File(dir, "TestMickey.doc"))
-               );
-               HPFSPropertiesExtractor ext = new HPFSPropertiesExtractor(fs);
-
-               // Custom properties are part of the document info stream
-               String dinfText = ext.getDocumentSummaryInformationText();
-               assertTrue(dinfText.indexOf("Client = sample client") > -1);
-               assertTrue(dinfText.indexOf("Division = sample division") > -1);
-               
-               String text = ext.getText();
-               assertTrue(text.indexOf("Client = sample client") > -1);
-               assertTrue(text.indexOf("Division = sample division") > -1);
-       }
-    
-    public void testConstructors() throws Exception {
-               POIFSFileSystem fs = new POIFSFileSystem(
-                               new FileInputStream(new File(dir, "TestUnicode.xls"))
-               );
-               HSSFWorkbook wb = new HSSFWorkbook(fs);
-               ExcelExtractor excelExt = new ExcelExtractor(wb);
-               
-               String fsText = (new HPFSPropertiesExtractor(fs)).getText();
-               String hwText = (new HPFSPropertiesExtractor(wb)).getText();
-               String eeText = (new HPFSPropertiesExtractor(excelExt)).getText();
-               
-               assertEquals(fsText, hwText);
-               assertEquals(fsText, eeText);
-               
-               assertTrue(fsText.indexOf("AUTHOR = marshall") > -1);
-               assertTrue(fsText.indexOf("TITLE = Titel: \u00c4h") > -1);
-    }
-}
diff --git a/src/testcases/org/apache/poi/hpsf/extractor/TestHPSFPropertiesExtractor.java b/src/testcases/org/apache/poi/hpsf/extractor/TestHPSFPropertiesExtractor.java
new file mode 100644 (file)
index 0000000..3a18935
--- /dev/null
@@ -0,0 +1,115 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.hpsf.extractor;
+
+import java.io.File;
+import java.io.FileInputStream;
+
+import org.apache.poi.hssf.extractor.ExcelExtractor;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+
+import junit.framework.TestCase;
+
+public class TestHPSFPropertiesExtractor extends TestCase {
+       private String dir;
+       
+    protected void setUp() throws Exception {
+       dir = System.getProperty("HPSF.testdata.path");
+       assertNotNull("HPSF.testdata.path not set", dir);
+       }
+    
+       public void testNormalProperties() throws Exception {
+               POIFSFileSystem fs = new POIFSFileSystem(
+                               new FileInputStream(new File(dir, "TestMickey.doc"))
+               );
+               HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs);
+               ext.getText();
+               
+               // Check each bit in turn
+               String sinfText = ext.getSummaryInformationText();
+               String dinfText = ext.getDocumentSummaryInformationText();
+               
+               assertTrue(sinfText.indexOf("TEMPLATE = Normal") > -1);
+               assertTrue(sinfText.indexOf("SUBJECT = sample subject") > -1);
+               assertTrue(dinfText.indexOf("MANAGER = sample manager") > -1);
+               assertTrue(dinfText.indexOf("COMPANY = sample company") > -1);
+               
+               // Now overall
+               String text = ext.getText();
+               assertTrue(text.indexOf("TEMPLATE = Normal") > -1);
+               assertTrue(text.indexOf("SUBJECT = sample subject") > -1);
+               assertTrue(text.indexOf("MANAGER = sample manager") > -1);
+               assertTrue(text.indexOf("COMPANY = sample company") > -1);
+       }
+       public void testNormalUnicodeProperties() throws Exception {
+               POIFSFileSystem fs = new POIFSFileSystem(
+                               new FileInputStream(new File(dir, "TestUnicode.xls"))
+               );
+               HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs);
+               ext.getText();
+               
+               // Check each bit in turn
+               String sinfText = ext.getSummaryInformationText();
+               String dinfText = ext.getDocumentSummaryInformationText();
+               
+               assertTrue(sinfText.indexOf("AUTHOR = marshall") > -1);
+               assertTrue(sinfText.indexOf("TITLE = Titel: \u00c4h") > -1);
+               assertTrue(dinfText.indexOf("COMPANY = Schreiner") > -1);
+               assertTrue(dinfText.indexOf("SCALE = false") > -1);
+               
+               // Now overall
+               String text = ext.getText();
+               assertTrue(text.indexOf("AUTHOR = marshall") > -1);
+               assertTrue(text.indexOf("TITLE = Titel: \u00c4h") > -1);
+               assertTrue(text.indexOf("COMPANY = Schreiner") > -1);
+               assertTrue(text.indexOf("SCALE = false") > -1);
+       }
+       public void testCustomProperties() throws Exception {
+               POIFSFileSystem fs = new POIFSFileSystem(
+                               new FileInputStream(new File(dir, "TestMickey.doc"))
+               );
+               HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs);
+
+               // Custom properties are part of the document info stream
+               String dinfText = ext.getDocumentSummaryInformationText();
+               assertTrue(dinfText.indexOf("Client = sample client") > -1);
+               assertTrue(dinfText.indexOf("Division = sample division") > -1);
+               
+               String text = ext.getText();
+               assertTrue(text.indexOf("Client = sample client") > -1);
+               assertTrue(text.indexOf("Division = sample division") > -1);
+       }
+    
+    public void testConstructors() throws Exception {
+               POIFSFileSystem fs = new POIFSFileSystem(
+                               new FileInputStream(new File(dir, "TestUnicode.xls"))
+               );
+               HSSFWorkbook wb = new HSSFWorkbook(fs);
+               ExcelExtractor excelExt = new ExcelExtractor(wb);
+               
+               String fsText = (new HPSFPropertiesExtractor(fs)).getText();
+               String hwText = (new HPSFPropertiesExtractor(wb)).getText();
+               String eeText = (new HPSFPropertiesExtractor(excelExt)).getText();
+               
+               assertEquals(fsText, hwText);
+               assertEquals(fsText, eeText);
+               
+               assertTrue(fsText.indexOf("AUTHOR = marshall") > -1);
+               assertTrue(fsText.indexOf("TITLE = Titel: \u00c4h") > -1);
+    }
+}