aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--src/documentation/content/xdocs/changes.xml2
-rw-r--r--src/documentation/content/xdocs/hpsf/how-to.xml2
-rw-r--r--src/documentation/content/xdocs/status.xml2
-rw-r--r--src/java/org/apache/poi/POIOLE2TextExtractor.java9
-rw-r--r--src/java/org/apache/poi/POITextExtractor.java7
-rw-r--r--src/java/org/apache/poi/hpsf/extractor/HPSFPropertiesExtractor.java (renamed from src/java/org/apache/poi/hpsf/extractor/HPFSPropertiesExtractor.java)15
-rw-r--r--src/testcases/org/apache/poi/hpsf/extractor/TestHPSFPropertiesExtractor.java (renamed from src/testcases/org/apache/poi/hpsf/extractor/TestHPFSPropertiesExtractor.java)14
7 files changed, 37 insertions, 14 deletions
diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml
index 6a0cae2672..b1cfff6b91 100644
--- a/src/documentation/content/xdocs/changes.xml
+++ b/src/documentation/content/xdocs/changes.xml
@@ -37,7 +37,7 @@
<!-- Don't forget to update status.xml too! -->
<release version="3.1.1-alpha1" date="2008-??-??">
- <action dev="POI-DEVELOPERS" type="add">New HPSF based TextExtractor for document metadata, org.apache.poi.hpsf.extractor.HPFSPropertiesExtractor</action>
+ <action dev="POI-DEVELOPERS" type="add">New HPSF based TextExtractor for document metadata, org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor</action>
<action dev="POI-DEVELOPERS" type="fix">Properly update the array of Slide's text runs in HSLF when new text shapes are added</action>
<action dev="POI-DEVELOPERS" type="fix">45590 - Fix for Header/footer extraction for .ppt files saved in Office 2007</action>
<action dev="POI-DEVELOPERS" type="fix">Big improvement in how HWPF handles unicode text, and more sanity checking of text ranges within HWPF</action>
diff --git a/src/documentation/content/xdocs/hpsf/how-to.xml b/src/documentation/content/xdocs/hpsf/how-to.xml
index aadf753a44..964005bf24 100644
--- a/src/documentation/content/xdocs/hpsf/how-to.xml
+++ b/src/documentation/content/xdocs/hpsf/how-to.xml
@@ -95,7 +95,7 @@
<p>If all you are interested in is getting the textual content of
all the document properties, such as for full text indexing, then
take a look at
- <code>org.apache.poi.hpsf.extractor.HPFSPropertiesExtractor</code>. However,
+ <code>org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor</code>. However,
if you want full access to the properties, please read on!</p>
<p>The first thing you should understand is that a Microsoft Office file is
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml
index 0146f55e79..fbe242aa2a 100644
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -34,7 +34,7 @@
<!-- Don't forget to update changes.xml too! -->
<changes>
<release version="3.1.1-alpha1" date="2008-??-??">
- <action dev="POI-DEVELOPERS" type="add">New HPSF based TextExtractor for document metadata, org.apache.poi.hpsf.extractor.HPFSPropertiesExtractor</action>
+ <action dev="POI-DEVELOPERS" type="add">New HPSF based TextExtractor for document metadata, org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor</action>
<action dev="POI-DEVELOPERS" type="fix">Properly update the array of Slide's text runs in HSLF when new text shapes are added</action>
<action dev="POI-DEVELOPERS" type="fix">45590 - Fix for Header/footer extraction for .ppt files saved in Office 2007</action>
<action dev="POI-DEVELOPERS" type="fix">Big improvement in how HWPF handles unicode text, and more sanity checking of text ranges within HWPF</action>
diff --git a/src/java/org/apache/poi/POIOLE2TextExtractor.java b/src/java/org/apache/poi/POIOLE2TextExtractor.java
index f5aee4cc6d..d46c7e4aad 100644
--- a/src/java/org/apache/poi/POIOLE2TextExtractor.java
+++ b/src/java/org/apache/poi/POIOLE2TextExtractor.java
@@ -18,6 +18,7 @@ package org.apache.poi;
import org.apache.poi.hpsf.DocumentSummaryInformation;
import org.apache.poi.hpsf.SummaryInformation;
+import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor;
/**
* Common Parent for OLE2 based Text Extractors
@@ -50,4 +51,12 @@ public abstract class POIOLE2TextExtractor extends POITextExtractor {
public SummaryInformation getSummaryInformation() {
return document.getSummaryInformation();
}
+
+ /**
+ * Returns an HPSF powered text extractor for the
+ * document properties metadata, such as title and author.
+ */
+ public POITextExtractor getMetadataTextExtractor() {
+ return new HPSFPropertiesExtractor(this);
+ }
}
diff --git a/src/java/org/apache/poi/POITextExtractor.java b/src/java/org/apache/poi/POITextExtractor.java
index a7ffd44197..0b69894d08 100644
--- a/src/java/org/apache/poi/POITextExtractor.java
+++ b/src/java/org/apache/poi/POITextExtractor.java
@@ -54,4 +54,11 @@ public abstract class POITextExtractor {
* @return All the text from the document
*/
public abstract String getText();
+
+ /**
+ * Returns another text extractor, which is able to
+ * output the textual content of the document
+ * metadata / properties, such as author and title.
+ */
+ public abstract POITextExtractor getMetadataTextExtractor();
}
diff --git a/src/java/org/apache/poi/hpsf/extractor/HPFSPropertiesExtractor.java b/src/java/org/apache/poi/hpsf/extractor/HPSFPropertiesExtractor.java
index c85f1bb04c..ecad5c05be 100644
--- a/src/java/org/apache/poi/hpsf/extractor/HPFSPropertiesExtractor.java
+++ b/src/java/org/apache/poi/hpsf/extractor/HPSFPropertiesExtractor.java
@@ -36,14 +36,14 @@ import org.apache.poi.util.LittleEndian;
* build in and custom, returning them in
* textual form.
*/
-public class HPFSPropertiesExtractor extends POITextExtractor {
- public HPFSPropertiesExtractor(POITextExtractor mainExtractor) {
+public class HPSFPropertiesExtractor extends POITextExtractor {
+ public HPSFPropertiesExtractor(POITextExtractor mainExtractor) {
super(mainExtractor);
}
- public HPFSPropertiesExtractor(POIDocument doc) {
+ public HPSFPropertiesExtractor(POIDocument doc) {
super(doc);
}
- public HPFSPropertiesExtractor(POIFSFileSystem fs) {
+ public HPSFPropertiesExtractor(POIFSFileSystem fs) {
super(new PropertiesOnlyDocument(fs));
}
@@ -127,6 +127,13 @@ public class HPFSPropertiesExtractor extends POITextExtractor {
public String getText() {
return getSummaryInformationText() + getDocumentSummaryInformationText();
}
+
+ /**
+ * Prevent recursion!
+ */
+ public POITextExtractor getMetadataTextExtractor() {
+ throw new IllegalStateException("You already have the Metadata Text Extractor, not recursing!");
+ }
/**
* So we can get at the properties of any
diff --git a/src/testcases/org/apache/poi/hpsf/extractor/TestHPFSPropertiesExtractor.java b/src/testcases/org/apache/poi/hpsf/extractor/TestHPSFPropertiesExtractor.java
index 7d967874a4..3a189353d0 100644
--- a/src/testcases/org/apache/poi/hpsf/extractor/TestHPFSPropertiesExtractor.java
+++ b/src/testcases/org/apache/poi/hpsf/extractor/TestHPSFPropertiesExtractor.java
@@ -25,7 +25,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import junit.framework.TestCase;
-public class TestHPFSPropertiesExtractor extends TestCase {
+public class TestHPSFPropertiesExtractor extends TestCase {
private String dir;
protected void setUp() throws Exception {
@@ -37,7 +37,7 @@ public class TestHPFSPropertiesExtractor extends TestCase {
POIFSFileSystem fs = new POIFSFileSystem(
new FileInputStream(new File(dir, "TestMickey.doc"))
);
- HPFSPropertiesExtractor ext = new HPFSPropertiesExtractor(fs);
+ HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs);
ext.getText();
// Check each bit in turn
@@ -60,7 +60,7 @@ public class TestHPFSPropertiesExtractor extends TestCase {
POIFSFileSystem fs = new POIFSFileSystem(
new FileInputStream(new File(dir, "TestUnicode.xls"))
);
- HPFSPropertiesExtractor ext = new HPFSPropertiesExtractor(fs);
+ HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs);
ext.getText();
// Check each bit in turn
@@ -83,7 +83,7 @@ public class TestHPFSPropertiesExtractor extends TestCase {
POIFSFileSystem fs = new POIFSFileSystem(
new FileInputStream(new File(dir, "TestMickey.doc"))
);
- HPFSPropertiesExtractor ext = new HPFSPropertiesExtractor(fs);
+ HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs);
// Custom properties are part of the document info stream
String dinfText = ext.getDocumentSummaryInformationText();
@@ -102,9 +102,9 @@ public class TestHPFSPropertiesExtractor extends TestCase {
HSSFWorkbook wb = new HSSFWorkbook(fs);
ExcelExtractor excelExt = new ExcelExtractor(wb);
- String fsText = (new HPFSPropertiesExtractor(fs)).getText();
- String hwText = (new HPFSPropertiesExtractor(wb)).getText();
- String eeText = (new HPFSPropertiesExtractor(excelExt)).getText();
+ String fsText = (new HPSFPropertiesExtractor(fs)).getText();
+ String hwText = (new HPSFPropertiesExtractor(wb)).getText();
+ String eeText = (new HPSFPropertiesExtractor(excelExt)).getText();
assertEquals(fsText, hwText);
assertEquals(fsText, eeText);