diff options
-rw-r--r-- | src/documentation/content/xdocs/changes.xml | 2 | ||||
-rw-r--r-- | src/documentation/content/xdocs/hpsf/how-to.xml | 2 | ||||
-rw-r--r-- | src/documentation/content/xdocs/status.xml | 2 | ||||
-rw-r--r-- | src/java/org/apache/poi/POIOLE2TextExtractor.java | 9 | ||||
-rw-r--r-- | src/java/org/apache/poi/POITextExtractor.java | 7 | ||||
-rw-r--r-- | src/java/org/apache/poi/hpsf/extractor/HPSFPropertiesExtractor.java (renamed from src/java/org/apache/poi/hpsf/extractor/HPFSPropertiesExtractor.java) | 15 | ||||
-rw-r--r-- | src/testcases/org/apache/poi/hpsf/extractor/TestHPSFPropertiesExtractor.java (renamed from src/testcases/org/apache/poi/hpsf/extractor/TestHPFSPropertiesExtractor.java) | 14 |
7 files changed, 37 insertions, 14 deletions
diff --git a/src/documentation/content/xdocs/changes.xml b/src/documentation/content/xdocs/changes.xml index 6a0cae2672..b1cfff6b91 100644 --- a/src/documentation/content/xdocs/changes.xml +++ b/src/documentation/content/xdocs/changes.xml @@ -37,7 +37,7 @@ <!-- Don't forget to update status.xml too! --> <release version="3.1.1-alpha1" date="2008-??-??"> - <action dev="POI-DEVELOPERS" type="add">New HPSF based TextExtractor for document metadata, org.apache.poi.hpsf.extractor.HPFSPropertiesExtractor</action> + <action dev="POI-DEVELOPERS" type="add">New HPSF based TextExtractor for document metadata, org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor</action> <action dev="POI-DEVELOPERS" type="fix">Properly update the array of Slide's text runs in HSLF when new text shapes are added</action> <action dev="POI-DEVELOPERS" type="fix">45590 - Fix for Header/footer extraction for .ppt files saved in Office 2007</action> <action dev="POI-DEVELOPERS" type="fix">Big improvement in how HWPF handles unicode text, and more sanity checking of text ranges within HWPF</action> diff --git a/src/documentation/content/xdocs/hpsf/how-to.xml b/src/documentation/content/xdocs/hpsf/how-to.xml index aadf753a44..964005bf24 100644 --- a/src/documentation/content/xdocs/hpsf/how-to.xml +++ b/src/documentation/content/xdocs/hpsf/how-to.xml @@ -95,7 +95,7 @@ <p>If all you are interested in is getting the textual content of all the document properties, such as for full text indexing, then take a look at - <code>org.apache.poi.hpsf.extractor.HPFSPropertiesExtractor</code>. However, + <code>org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor</code>. However, if you want full access to the properties, please read on!</p> <p>The first thing you should understand is that a Microsoft Office file is diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 0146f55e79..fbe242aa2a 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,7 +34,7 @@ <!-- Don't forget to update changes.xml too! --> <changes> <release version="3.1.1-alpha1" date="2008-??-??"> - <action dev="POI-DEVELOPERS" type="add">New HPSF based TextExtractor for document metadata, org.apache.poi.hpsf.extractor.HPFSPropertiesExtractor</action> + <action dev="POI-DEVELOPERS" type="add">New HPSF based TextExtractor for document metadata, org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor</action> <action dev="POI-DEVELOPERS" type="fix">Properly update the array of Slide's text runs in HSLF when new text shapes are added</action> <action dev="POI-DEVELOPERS" type="fix">45590 - Fix for Header/footer extraction for .ppt files saved in Office 2007</action> <action dev="POI-DEVELOPERS" type="fix">Big improvement in how HWPF handles unicode text, and more sanity checking of text ranges within HWPF</action> diff --git a/src/java/org/apache/poi/POIOLE2TextExtractor.java b/src/java/org/apache/poi/POIOLE2TextExtractor.java index f5aee4cc6d..d46c7e4aad 100644 --- a/src/java/org/apache/poi/POIOLE2TextExtractor.java +++ b/src/java/org/apache/poi/POIOLE2TextExtractor.java @@ -18,6 +18,7 @@ package org.apache.poi; import org.apache.poi.hpsf.DocumentSummaryInformation; import org.apache.poi.hpsf.SummaryInformation; +import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor; /** * Common Parent for OLE2 based Text Extractors @@ -50,4 +51,12 @@ public abstract class POIOLE2TextExtractor extends POITextExtractor { public SummaryInformation getSummaryInformation() { return document.getSummaryInformation(); } + + /** + * Returns an HPSF powered text extractor for the + * document properties metadata, such as title and author. + */ + public POITextExtractor getMetadataTextExtractor() { + return new HPSFPropertiesExtractor(this); + } } diff --git a/src/java/org/apache/poi/POITextExtractor.java b/src/java/org/apache/poi/POITextExtractor.java index a7ffd44197..0b69894d08 100644 --- a/src/java/org/apache/poi/POITextExtractor.java +++ b/src/java/org/apache/poi/POITextExtractor.java @@ -54,4 +54,11 @@ public abstract class POITextExtractor { * @return All the text from the document */ public abstract String getText(); + + /** + * Returns another text extractor, which is able to + * output the textual content of the document + * metadata / properties, such as author and title. + */ + public abstract POITextExtractor getMetadataTextExtractor(); } diff --git a/src/java/org/apache/poi/hpsf/extractor/HPFSPropertiesExtractor.java b/src/java/org/apache/poi/hpsf/extractor/HPSFPropertiesExtractor.java index c85f1bb04c..ecad5c05be 100644 --- a/src/java/org/apache/poi/hpsf/extractor/HPFSPropertiesExtractor.java +++ b/src/java/org/apache/poi/hpsf/extractor/HPSFPropertiesExtractor.java @@ -36,14 +36,14 @@ import org.apache.poi.util.LittleEndian; * build in and custom, returning them in * textual form. */ -public class HPFSPropertiesExtractor extends POITextExtractor { - public HPFSPropertiesExtractor(POITextExtractor mainExtractor) { +public class HPSFPropertiesExtractor extends POITextExtractor { + public HPSFPropertiesExtractor(POITextExtractor mainExtractor) { super(mainExtractor); } - public HPFSPropertiesExtractor(POIDocument doc) { + public HPSFPropertiesExtractor(POIDocument doc) { super(doc); } - public HPFSPropertiesExtractor(POIFSFileSystem fs) { + public HPSFPropertiesExtractor(POIFSFileSystem fs) { super(new PropertiesOnlyDocument(fs)); } @@ -127,6 +127,13 @@ public class HPFSPropertiesExtractor extends POITextExtractor { public String getText() { return getSummaryInformationText() + getDocumentSummaryInformationText(); } + + /** + * Prevent recursion! + */ + public POITextExtractor getMetadataTextExtractor() { + throw new IllegalStateException("You already have the Metadata Text Extractor, not recursing!"); + } /** * So we can get at the properties of any diff --git a/src/testcases/org/apache/poi/hpsf/extractor/TestHPFSPropertiesExtractor.java b/src/testcases/org/apache/poi/hpsf/extractor/TestHPSFPropertiesExtractor.java index 7d967874a4..3a189353d0 100644 --- a/src/testcases/org/apache/poi/hpsf/extractor/TestHPFSPropertiesExtractor.java +++ b/src/testcases/org/apache/poi/hpsf/extractor/TestHPSFPropertiesExtractor.java @@ -25,7 +25,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem; import junit.framework.TestCase; -public class TestHPFSPropertiesExtractor extends TestCase { +public class TestHPSFPropertiesExtractor extends TestCase { private String dir; protected void setUp() throws Exception { @@ -37,7 +37,7 @@ public class TestHPFSPropertiesExtractor extends TestCase { POIFSFileSystem fs = new POIFSFileSystem( new FileInputStream(new File(dir, "TestMickey.doc")) ); - HPFSPropertiesExtractor ext = new HPFSPropertiesExtractor(fs); + HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs); ext.getText(); // Check each bit in turn @@ -60,7 +60,7 @@ public class TestHPFSPropertiesExtractor extends TestCase { POIFSFileSystem fs = new POIFSFileSystem( new FileInputStream(new File(dir, "TestUnicode.xls")) ); - HPFSPropertiesExtractor ext = new HPFSPropertiesExtractor(fs); + HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs); ext.getText(); // Check each bit in turn @@ -83,7 +83,7 @@ public class TestHPFSPropertiesExtractor extends TestCase { POIFSFileSystem fs = new POIFSFileSystem( new FileInputStream(new File(dir, "TestMickey.doc")) ); - HPFSPropertiesExtractor ext = new HPFSPropertiesExtractor(fs); + HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs); // Custom properties are part of the document info stream String dinfText = ext.getDocumentSummaryInformationText(); @@ -102,9 +102,9 @@ public class TestHPFSPropertiesExtractor extends TestCase { HSSFWorkbook wb = new HSSFWorkbook(fs); ExcelExtractor excelExt = new ExcelExtractor(wb); - String fsText = (new HPFSPropertiesExtractor(fs)).getText(); - String hwText = (new HPFSPropertiesExtractor(wb)).getText(); - String eeText = (new HPFSPropertiesExtractor(excelExt)).getText(); + String fsText = (new HPSFPropertiesExtractor(fs)).getText(); + String hwText = (new HPSFPropertiesExtractor(wb)).getText(); + String eeText = (new HPSFPropertiesExtractor(excelExt)).getText(); assertEquals(fsText, hwText); assertEquals(fsText, eeText); |