aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Burch <nick@apache.org>2008-04-08 12:03:05 +0000
committerNick Burch <nick@apache.org>2008-04-08 12:03:05 +0000
commit84a1727a6de1ce85a5e6e47f8070dc2b40c08c25 (patch)
tree435f0009a7243e0a3086536d30e5fa1e2273223c
parent586fc030ce5d36b9bdff1becfa442867dc710e37 (diff)
downloadpoi-84a1727a6de1ce85a5e6e47f8070dc2b40c08c25.tar.gz
poi-84a1727a6de1ce85a5e6e47f8070dc2b40c08c25.zip
More ExtractorFactory support and tests
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@645870 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--src/scratchpad/src/org/apache/poi/extractor/ExtractorFactory.java30
-rw-r--r--src/scratchpad/testcases/org/apache/poi/extractor/TestExtractorFactory.java140
2 files changed, 161 insertions, 9 deletions
diff --git a/src/scratchpad/src/org/apache/poi/extractor/ExtractorFactory.java b/src/scratchpad/src/org/apache/poi/extractor/ExtractorFactory.java
index 548697c3c0..d6c7a1810d 100644
--- a/src/scratchpad/src/org/apache/poi/extractor/ExtractorFactory.java
+++ b/src/scratchpad/src/org/apache/poi/extractor/ExtractorFactory.java
@@ -32,7 +32,9 @@ import org.openxml4j.opc.PackageRelationshipCollection;
import org.apache.poi.POITextExtractor;
import org.apache.poi.POIXMLDocument;
import org.apache.poi.POIXMLTextExtractor;
+import org.apache.poi.hslf.extractor.PowerPointExtractor;
import org.apache.poi.hssf.extractor.ExcelExtractor;
+import org.apache.poi.hwpf.extractor.WordExtractor;
import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.apache.poi.xslf.XSLFSlideShow;
@@ -51,20 +53,21 @@ public class ExtractorFactory {
public static final String CORE_DOCUMENT_REL =
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument";
- public POITextExtractor createExtractor(File f) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
- FileInputStream finp = new FileInputStream(f);
+ public static POITextExtractor createExtractor(File f) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
+ InputStream inp = new PushbackInputStream(
+ new FileInputStream(f), 8);
- if(POIFSFileSystem.hasPOIFSHeader(finp)) {
- return createExtractor(new POIFSFileSystem(finp));
+ if(POIFSFileSystem.hasPOIFSHeader(inp)) {
+ return createExtractor(new POIFSFileSystem(inp));
}
- if(POIXMLDocument.hasOOXMLHeader(finp)) {
- finp.close();
+ if(POIXMLDocument.hasOOXMLHeader(inp)) {
+ inp.close();
return createExtractor(Package.open(f.toString()));
}
throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file");
}
- public POITextExtractor createExtractor(InputStream inp) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
+ public static POITextExtractor createExtractor(InputStream inp) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
// Figure out the kind of stream
// If clearly doesn't do mark/reset, wrap up
if(! inp.markSupported()) {
@@ -80,7 +83,7 @@ public class ExtractorFactory {
throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
}
- public POIXMLTextExtractor createExtractor(Package pkg) throws IOException, OpenXML4JException, XmlException {
+ public static POIXMLTextExtractor createExtractor(Package pkg) throws IOException, OpenXML4JException, XmlException {
PackageRelationshipCollection core =
pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
if(core.size() != 1) {
@@ -100,14 +103,23 @@ public class ExtractorFactory {
throw new IllegalArgumentException("No supported documents found in the OOXML package");
}
- public POITextExtractor createExtractor(POIFSFileSystem fs) throws IOException {
+ public static POITextExtractor createExtractor(POIFSFileSystem fs) throws IOException {
// Look for certain entries in the stream, to figure it
// out from
for(Iterator entries = fs.getRoot().getEntries(); entries.hasNext(); ) {
Entry entry = (Entry)entries.next();
+
+ System.err.println(entry.getName());
if(entry.getName().equals("Workbook")) {
return new ExcelExtractor(fs);
}
+ if(entry.getName().equals("WordDocument")) {
+ return new WordExtractor(fs);
+ }
+ if(entry.getName().equals("PowerPoint Document")) {
+ return new PowerPointExtractor(fs);
+ }
+ // TODO - visio
}
throw new IllegalArgumentException("No supported documents found in the OLE2 stream");
}
diff --git a/src/scratchpad/testcases/org/apache/poi/extractor/TestExtractorFactory.java b/src/scratchpad/testcases/org/apache/poi/extractor/TestExtractorFactory.java
new file mode 100644
index 0000000000..40f9462c58
--- /dev/null
+++ b/src/scratchpad/testcases/org/apache/poi/extractor/TestExtractorFactory.java
@@ -0,0 +1,140 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.extractor;
+
+import java.io.File;
+
+import org.apache.poi.hslf.extractor.PowerPointExtractor;
+import org.apache.poi.hssf.extractor.ExcelExtractor;
+import org.apache.poi.hwpf.extractor.WordExtractor;
+import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
+import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
+import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
+
+import junit.framework.TestCase;
+
+/**
+ * Test that the extractor factory plays nicely
+ */
+public class TestExtractorFactory extends TestCase {
+ private String excel_dir;
+ private String word_dir;
+ private String powerpoint_dir;
+
+ private File txt;
+
+ private File xls;
+ private File xlsx;
+
+ private File doc;
+ private File docx;
+
+ private File ppt;
+ private File pptx;
+
+ protected void setUp() throws Exception {
+ super.setUp();
+
+ excel_dir = System.getProperty("HSSF.testdata.path");
+ word_dir = System.getProperty("HWPF.testdata.path");
+ powerpoint_dir = System.getProperty("HSLF.testdata.path");
+
+ txt = new File(excel_dir, "SampleSS.txt");
+
+ xls = new File(excel_dir, "SampleSS.xls");
+ xlsx = new File(excel_dir, "SampleSS.xlsx");
+
+ doc = new File(word_dir, "SampleDoc.doc");
+ docx = new File(word_dir, "SampleDoc.docx");
+
+ ppt = new File(powerpoint_dir, "SampleShow.ppt");
+ pptx = new File(powerpoint_dir, "SampleShow.pptx");
+ }
+
+ public void testFile() throws Exception {
+ // Excel
+ assertTrue(
+ ExtractorFactory.createExtractor(xls)
+ instanceof ExcelExtractor
+ );
+ assertTrue(
+ ExtractorFactory.createExtractor(xls).getText().length() > 200
+ );
+
+ assertTrue(
+ ExtractorFactory.createExtractor(xlsx)
+ instanceof XSSFExcelExtractor
+ );
+ assertTrue(
+ ExtractorFactory.createExtractor(xlsx).getText().length() > 200
+ );
+
+ // Word
+ assertTrue(
+ ExtractorFactory.createExtractor(doc)
+ instanceof WordExtractor
+ );
+ assertTrue(
+ ExtractorFactory.createExtractor(doc).getText().length() > 120
+ );
+
+ assertTrue(
+ ExtractorFactory.createExtractor(docx)
+ instanceof XWPFWordExtractor
+ );
+ assertTrue(
+ ExtractorFactory.createExtractor(docx).getText().length() > 120
+ );
+
+ // PowerPoint
+ assertTrue(
+ ExtractorFactory.createExtractor(ppt)
+ instanceof PowerPointExtractor
+ );
+ assertTrue(
+ ExtractorFactory.createExtractor(ppt).getText().length() > 120
+ );
+
+ assertTrue(
+ ExtractorFactory.createExtractor(pptx)
+ instanceof XSLFPowerPointExtractor
+ );
+ assertTrue(
+ ExtractorFactory.createExtractor(pptx).getText().length() > 120
+ );
+
+ // Visio
+ // TODO
+
+ // Text
+ try {
+ ExtractorFactory.createExtractor(txt);
+ fail();
+ } catch(IllegalArgumentException e) {
+ // Good
+ }
+ }
+ public void testInputStream() throws Exception {
+
+ }
+ public void testPOIFS() throws Exception {
+
+ }
+ public void testPackage() throws Exception {
+
+ }
+}