]> source.dussan.org Git - poi.git/commitdiff
Shuffle the common text extractor stuff from scratchpad to ooxml, to match the compil...
authorNick Burch <nick@apache.org>
Wed, 9 Apr 2008 09:20:20 +0000 (09:20 +0000)
committerNick Burch <nick@apache.org>
Wed, 9 Apr 2008 09:20:20 +0000 (09:20 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@646239 13f79535-47bb-0310-9956-ffa450edef68

src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java [new file with mode: 0644]
src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java [new file with mode: 0644]
src/scratchpad/src/org/apache/poi/extractor/ExtractorFactory.java [deleted file]
src/scratchpad/testcases/org/apache/poi/extractor/TestExtractorFactory.java [deleted file]

diff --git a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
new file mode 100644 (file)
index 0000000..318b68d
--- /dev/null
@@ -0,0 +1,128 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.extractor;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PushbackInputStream;
+import java.util.Iterator;
+
+import org.openxml4j.exceptions.InvalidFormatException;
+import org.openxml4j.exceptions.OpenXML4JException;
+import org.openxml4j.opc.Package;
+import org.openxml4j.opc.PackagePart;
+import org.openxml4j.opc.PackageRelationshipCollection;
+
+import org.apache.poi.POITextExtractor;
+import org.apache.poi.POIXMLDocument;
+import org.apache.poi.POIXMLTextExtractor;
+import org.apache.poi.hdgf.extractor.VisioTextExtractor;
+import org.apache.poi.hslf.extractor.PowerPointExtractor;
+import org.apache.poi.hssf.extractor.ExcelExtractor;
+import org.apache.poi.hwpf.extractor.WordExtractor;
+import org.apache.poi.poifs.filesystem.Entry;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.xslf.XSLFSlideShow;
+import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
+import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.apache.poi.xwpf.XWPFDocument;
+import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
+import org.apache.xmlbeans.XmlException;
+
+/**
+ * Figures out the correct POITextExtractor for your supplied
+ *  document, and returns it.
+ */
+public class ExtractorFactory {
+       public static final String CORE_DOCUMENT_REL =
+               "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument";
+       
+       public static POITextExtractor createExtractor(File f) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
+               InputStream inp = new PushbackInputStream( 
+                       new FileInputStream(f), 8);
+               
+               if(POIFSFileSystem.hasPOIFSHeader(inp)) {
+                       return createExtractor(new POIFSFileSystem(inp));
+               }
+               if(POIXMLDocument.hasOOXMLHeader(inp)) {
+                       inp.close();
+                       return createExtractor(Package.open(f.toString()));
+               }
+               throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file");
+       }
+       
+       public static POITextExtractor createExtractor(InputStream inp) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
+               // Figure out the kind of stream
+               // If clearly doesn't do mark/reset, wrap up
+               if(! inp.markSupported()) {
+                       inp = new PushbackInputStream(inp, 8);
+               }
+               
+               if(POIFSFileSystem.hasPOIFSHeader(inp)) {
+                       return createExtractor(new POIFSFileSystem(inp));
+               }
+               if(POIXMLDocument.hasOOXMLHeader(inp)) {
+                       return createExtractor(Package.open(inp));
+               }
+               throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
+       }
+       
+       public static POIXMLTextExtractor createExtractor(Package pkg) throws IOException, OpenXML4JException, XmlException {
+               PackageRelationshipCollection core = 
+                       pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
+               if(core.size() != 1) {
+                       throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
+               }
+               
+               PackagePart corePart = pkg.getPart(core.getRelationship(0));
+               if(corePart.getContentType().equals(XSSFWorkbook.WORKBOOK.getContentType())) {
+                       return new XSSFExcelExtractor(pkg);
+               }
+               if(corePart.getContentType().equals(XWPFDocument.MAIN_CONTENT_TYPE)) {
+                       return new XWPFWordExtractor(pkg);
+               }
+               if(corePart.getContentType().equals(XSLFSlideShow.MAIN_CONTENT_TYPE)) {
+                       return new XSLFPowerPointExtractor(pkg);
+               }
+               throw new IllegalArgumentException("No supported documents found in the OOXML package");
+       }
+       
+       public static POITextExtractor createExtractor(POIFSFileSystem fs) throws IOException {
+               // Look for certain entries in the stream, to figure it
+               //  out from
+               for(Iterator entries = fs.getRoot().getEntries(); entries.hasNext(); ) {
+                       Entry entry = (Entry)entries.next();
+                       
+                       if(entry.getName().equals("Workbook")) {
+                               return new ExcelExtractor(fs);
+                       }
+                       if(entry.getName().equals("WordDocument")) {
+                               return new WordExtractor(fs);
+                       }
+                       if(entry.getName().equals("PowerPoint Document")) {
+                               return new PowerPointExtractor(fs);
+                       }
+                       if(entry.getName().equals("VisioDocument")) {
+                               return new VisioTextExtractor(fs);
+                       }
+               }
+               throw new IllegalArgumentException("No supported documents found in the OLE2 stream");
+       }
+}
diff --git a/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java b/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
new file mode 100644 (file)
index 0000000..762eb92
--- /dev/null
@@ -0,0 +1,303 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.extractor;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+
+import org.apache.poi.hdgf.extractor.VisioTextExtractor;
+import org.apache.poi.hslf.extractor.PowerPointExtractor;
+import org.apache.poi.hssf.extractor.ExcelExtractor;
+import org.apache.poi.hwpf.extractor.WordExtractor;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
+import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
+import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
+
+import junit.framework.TestCase;
+
+import org.openxml4j.exceptions.InvalidOperationException;
+import org.openxml4j.opc.Package;
+
+/**
+ * Test that the extractor factory plays nicely
+ */
+public class TestExtractorFactory extends TestCase {
+       private String excel_dir;
+       private String word_dir;
+       private String powerpoint_dir;
+       private String visio_dir;
+       
+       private File txt;
+       
+       private File xls;
+       private File xlsx;
+       
+       private File doc;
+       private File docx;
+
+       private File ppt;
+       private File pptx;
+       
+       private File vsd;
+
+       protected void setUp() throws Exception {
+               super.setUp();
+               
+               excel_dir = System.getProperty("HSSF.testdata.path");
+               word_dir = System.getProperty("HWPF.testdata.path");
+               powerpoint_dir = System.getProperty("HSLF.testdata.path");
+               visio_dir = System.getProperty("HDGF.testdata.path");
+               
+               txt = new File(powerpoint_dir, "SampleShow.txt");
+               
+               xls = new File(excel_dir, "SampleSS.xls");
+               xlsx = new File(excel_dir, "SampleSS.xlsx");
+               
+               doc = new File(word_dir, "SampleDoc.doc");
+               docx = new File(word_dir, "SampleDoc.docx");
+               
+               ppt = new File(powerpoint_dir, "SampleShow.ppt");
+               pptx = new File(powerpoint_dir, "SampleShow.pptx");
+               
+               vsd = new File(visio_dir, "Test_Visio-Some_Random_Text.vsd");
+       }
+
+       public void testFile() throws Exception {
+               // Excel
+               assertTrue(
+                               ExtractorFactory.createExtractor(xls)
+                               instanceof ExcelExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(xls).getText().length() > 200
+               );
+               
+               assertTrue(
+                               ExtractorFactory.createExtractor(xlsx)
+                               instanceof XSSFExcelExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(xlsx).getText().length() > 200
+               );
+               
+               // Word
+               assertTrue(
+                               ExtractorFactory.createExtractor(doc)
+                               instanceof WordExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(doc).getText().length() > 120
+               );
+               
+               assertTrue(
+                               ExtractorFactory.createExtractor(docx)
+                               instanceof XWPFWordExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(docx).getText().length() > 120
+               );
+               
+               // PowerPoint
+               assertTrue(
+                               ExtractorFactory.createExtractor(ppt)
+                               instanceof PowerPointExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(ppt).getText().length() > 120
+               );
+               
+               assertTrue(
+                               ExtractorFactory.createExtractor(pptx)
+                               instanceof XSLFPowerPointExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(pptx).getText().length() > 120
+               );
+               
+               // Visio
+               assertTrue(
+                               ExtractorFactory.createExtractor(vsd)
+                               instanceof VisioTextExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(vsd).getText().length() > 50
+               );
+               
+               // Text
+               try {
+                       ExtractorFactory.createExtractor(txt);
+                       fail();
+               } catch(IllegalArgumentException e) {
+                       // Good
+               }
+       }
+       
+       public void testInputStream() throws Exception {
+               // Excel
+               assertTrue(
+                               ExtractorFactory.createExtractor(new FileInputStream(xls))
+                               instanceof ExcelExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(new FileInputStream(xls)).getText().length() > 200
+               );
+               
+               assertTrue(
+                               ExtractorFactory.createExtractor(new FileInputStream(xlsx))
+                               instanceof XSSFExcelExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(new FileInputStream(xlsx)).getText().length() > 200
+               );
+               
+               // Word
+               assertTrue(
+                               ExtractorFactory.createExtractor(new FileInputStream(doc))
+                               instanceof WordExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(new FileInputStream(doc)).getText().length() > 120
+               );
+               
+               assertTrue(
+                               ExtractorFactory.createExtractor(new FileInputStream(docx))
+                               instanceof XWPFWordExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(new FileInputStream(docx)).getText().length() > 120
+               );
+               
+               // PowerPoint
+               assertTrue(
+                               ExtractorFactory.createExtractor(new FileInputStream(ppt))
+                               instanceof PowerPointExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(new FileInputStream(ppt)).getText().length() > 120
+               );
+               
+               assertTrue(
+                               ExtractorFactory.createExtractor(new FileInputStream(pptx))
+                               instanceof XSLFPowerPointExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(new FileInputStream(pptx)).getText().length() > 120
+               );
+               
+               // Visio
+               assertTrue(
+                               ExtractorFactory.createExtractor(new FileInputStream(vsd))
+                               instanceof VisioTextExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(new FileInputStream(vsd)).getText().length() > 50
+               );
+               
+               // Text
+               try {
+                       ExtractorFactory.createExtractor(new FileInputStream(txt));
+                       fail();
+               } catch(IllegalArgumentException e) {
+                       // Good
+               }
+       }
+       
+       public void testPOIFS() throws Exception {
+               // Excel
+               assertTrue(
+                               ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)))
+                               instanceof ExcelExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls))).getText().length() > 200
+               );
+               
+               // Word
+               assertTrue(
+                               ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc)))
+                               instanceof WordExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc))).getText().length() > 120
+               );
+               
+               // PowerPoint
+               assertTrue(
+                               ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(ppt)))
+                               instanceof PowerPointExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(ppt))).getText().length() > 120
+               );
+               
+               // Visio
+               assertTrue(
+                               ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(vsd)))
+                               instanceof VisioTextExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(vsd))).getText().length() > 50
+               );
+               
+               // Text
+               try {
+                       ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(txt)));
+                       fail();
+               } catch(IOException e) {
+                       // Good
+               }
+       }
+       
+       public void testPackage() throws Exception {
+               // Excel
+               assertTrue(
+                               ExtractorFactory.createExtractor(Package.open(xlsx.toString()))
+                               instanceof XSSFExcelExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(Package.open(xlsx.toString())).getText().length() > 200
+               );
+               
+               // Word
+               assertTrue(
+                               ExtractorFactory.createExtractor(Package.open(docx.toString()))
+                               instanceof XWPFWordExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(Package.open(docx.toString())).getText().length() > 120
+               );
+               
+               // PowerPoint
+               assertTrue(
+                               ExtractorFactory.createExtractor(Package.open(pptx.toString()))
+                               instanceof XSLFPowerPointExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(Package.open(pptx.toString())).getText().length() > 120
+               );
+               
+               // Text
+               try {
+                       ExtractorFactory.createExtractor(Package.open(txt.toString()));
+                       fail();
+               } catch(InvalidOperationException e) {
+                       // Good
+               }
+       }
+}
diff --git a/src/scratchpad/src/org/apache/poi/extractor/ExtractorFactory.java b/src/scratchpad/src/org/apache/poi/extractor/ExtractorFactory.java
deleted file mode 100644 (file)
index 318b68d..0000000
+++ /dev/null
@@ -1,128 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi.extractor;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.io.PushbackInputStream;
-import java.util.Iterator;
-
-import org.openxml4j.exceptions.InvalidFormatException;
-import org.openxml4j.exceptions.OpenXML4JException;
-import org.openxml4j.opc.Package;
-import org.openxml4j.opc.PackagePart;
-import org.openxml4j.opc.PackageRelationshipCollection;
-
-import org.apache.poi.POITextExtractor;
-import org.apache.poi.POIXMLDocument;
-import org.apache.poi.POIXMLTextExtractor;
-import org.apache.poi.hdgf.extractor.VisioTextExtractor;
-import org.apache.poi.hslf.extractor.PowerPointExtractor;
-import org.apache.poi.hssf.extractor.ExcelExtractor;
-import org.apache.poi.hwpf.extractor.WordExtractor;
-import org.apache.poi.poifs.filesystem.Entry;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-import org.apache.poi.xslf.XSLFSlideShow;
-import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
-import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
-import org.apache.poi.xssf.usermodel.XSSFWorkbook;
-import org.apache.poi.xwpf.XWPFDocument;
-import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
-import org.apache.xmlbeans.XmlException;
-
-/**
- * Figures out the correct POITextExtractor for your supplied
- *  document, and returns it.
- */
-public class ExtractorFactory {
-       public static final String CORE_DOCUMENT_REL =
-               "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument";
-       
-       public static POITextExtractor createExtractor(File f) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
-               InputStream inp = new PushbackInputStream( 
-                       new FileInputStream(f), 8);
-               
-               if(POIFSFileSystem.hasPOIFSHeader(inp)) {
-                       return createExtractor(new POIFSFileSystem(inp));
-               }
-               if(POIXMLDocument.hasOOXMLHeader(inp)) {
-                       inp.close();
-                       return createExtractor(Package.open(f.toString()));
-               }
-               throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file");
-       }
-       
-       public static POITextExtractor createExtractor(InputStream inp) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
-               // Figure out the kind of stream
-               // If clearly doesn't do mark/reset, wrap up
-               if(! inp.markSupported()) {
-                       inp = new PushbackInputStream(inp, 8);
-               }
-               
-               if(POIFSFileSystem.hasPOIFSHeader(inp)) {
-                       return createExtractor(new POIFSFileSystem(inp));
-               }
-               if(POIXMLDocument.hasOOXMLHeader(inp)) {
-                       return createExtractor(Package.open(inp));
-               }
-               throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
-       }
-       
-       public static POIXMLTextExtractor createExtractor(Package pkg) throws IOException, OpenXML4JException, XmlException {
-               PackageRelationshipCollection core = 
-                       pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
-               if(core.size() != 1) {
-                       throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size());
-               }
-               
-               PackagePart corePart = pkg.getPart(core.getRelationship(0));
-               if(corePart.getContentType().equals(XSSFWorkbook.WORKBOOK.getContentType())) {
-                       return new XSSFExcelExtractor(pkg);
-               }
-               if(corePart.getContentType().equals(XWPFDocument.MAIN_CONTENT_TYPE)) {
-                       return new XWPFWordExtractor(pkg);
-               }
-               if(corePart.getContentType().equals(XSLFSlideShow.MAIN_CONTENT_TYPE)) {
-                       return new XSLFPowerPointExtractor(pkg);
-               }
-               throw new IllegalArgumentException("No supported documents found in the OOXML package");
-       }
-       
-       public static POITextExtractor createExtractor(POIFSFileSystem fs) throws IOException {
-               // Look for certain entries in the stream, to figure it
-               //  out from
-               for(Iterator entries = fs.getRoot().getEntries(); entries.hasNext(); ) {
-                       Entry entry = (Entry)entries.next();
-                       
-                       if(entry.getName().equals("Workbook")) {
-                               return new ExcelExtractor(fs);
-                       }
-                       if(entry.getName().equals("WordDocument")) {
-                               return new WordExtractor(fs);
-                       }
-                       if(entry.getName().equals("PowerPoint Document")) {
-                               return new PowerPointExtractor(fs);
-                       }
-                       if(entry.getName().equals("VisioDocument")) {
-                               return new VisioTextExtractor(fs);
-                       }
-               }
-               throw new IllegalArgumentException("No supported documents found in the OLE2 stream");
-       }
-}
diff --git a/src/scratchpad/testcases/org/apache/poi/extractor/TestExtractorFactory.java b/src/scratchpad/testcases/org/apache/poi/extractor/TestExtractorFactory.java
deleted file mode 100644 (file)
index 762eb92..0000000
+++ /dev/null
@@ -1,303 +0,0 @@
-/* ====================================================================
-   Licensed to the Apache Software Foundation (ASF) under one or more
-   contributor license agreements.  See the NOTICE file distributed with
-   this work for additional information regarding copyright ownership.
-   The ASF licenses this file to You under the Apache License, Version 2.0
-   (the "License"); you may not use this file except in compliance with
-   the License.  You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
-==================================================================== */
-package org.apache.poi.extractor;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-
-import org.apache.poi.hdgf.extractor.VisioTextExtractor;
-import org.apache.poi.hslf.extractor.PowerPointExtractor;
-import org.apache.poi.hssf.extractor.ExcelExtractor;
-import org.apache.poi.hwpf.extractor.WordExtractor;
-import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
-import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
-import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
-
-import junit.framework.TestCase;
-
-import org.openxml4j.exceptions.InvalidOperationException;
-import org.openxml4j.opc.Package;
-
-/**
- * Test that the extractor factory plays nicely
- */
-public class TestExtractorFactory extends TestCase {
-       private String excel_dir;
-       private String word_dir;
-       private String powerpoint_dir;
-       private String visio_dir;
-       
-       private File txt;
-       
-       private File xls;
-       private File xlsx;
-       
-       private File doc;
-       private File docx;
-
-       private File ppt;
-       private File pptx;
-       
-       private File vsd;
-
-       protected void setUp() throws Exception {
-               super.setUp();
-               
-               excel_dir = System.getProperty("HSSF.testdata.path");
-               word_dir = System.getProperty("HWPF.testdata.path");
-               powerpoint_dir = System.getProperty("HSLF.testdata.path");
-               visio_dir = System.getProperty("HDGF.testdata.path");
-               
-               txt = new File(powerpoint_dir, "SampleShow.txt");
-               
-               xls = new File(excel_dir, "SampleSS.xls");
-               xlsx = new File(excel_dir, "SampleSS.xlsx");
-               
-               doc = new File(word_dir, "SampleDoc.doc");
-               docx = new File(word_dir, "SampleDoc.docx");
-               
-               ppt = new File(powerpoint_dir, "SampleShow.ppt");
-               pptx = new File(powerpoint_dir, "SampleShow.pptx");
-               
-               vsd = new File(visio_dir, "Test_Visio-Some_Random_Text.vsd");
-       }
-
-       public void testFile() throws Exception {
-               // Excel
-               assertTrue(
-                               ExtractorFactory.createExtractor(xls)
-                               instanceof ExcelExtractor
-               );
-               assertTrue(
-                               ExtractorFactory.createExtractor(xls).getText().length() > 200
-               );
-               
-               assertTrue(
-                               ExtractorFactory.createExtractor(xlsx)
-                               instanceof XSSFExcelExtractor
-               );
-               assertTrue(
-                               ExtractorFactory.createExtractor(xlsx).getText().length() > 200
-               );
-               
-               // Word
-               assertTrue(
-                               ExtractorFactory.createExtractor(doc)
-                               instanceof WordExtractor
-               );
-               assertTrue(
-                               ExtractorFactory.createExtractor(doc).getText().length() > 120
-               );
-               
-               assertTrue(
-                               ExtractorFactory.createExtractor(docx)
-                               instanceof XWPFWordExtractor
-               );
-               assertTrue(
-                               ExtractorFactory.createExtractor(docx).getText().length() > 120
-               );
-               
-               // PowerPoint
-               assertTrue(
-                               ExtractorFactory.createExtractor(ppt)
-                               instanceof PowerPointExtractor
-               );
-               assertTrue(
-                               ExtractorFactory.createExtractor(ppt).getText().length() > 120
-               );
-               
-               assertTrue(
-                               ExtractorFactory.createExtractor(pptx)
-                               instanceof XSLFPowerPointExtractor
-               );
-               assertTrue(
-                               ExtractorFactory.createExtractor(pptx).getText().length() > 120
-               );
-               
-               // Visio
-               assertTrue(
-                               ExtractorFactory.createExtractor(vsd)
-                               instanceof VisioTextExtractor
-               );
-               assertTrue(
-                               ExtractorFactory.createExtractor(vsd).getText().length() > 50
-               );
-               
-               // Text
-               try {
-                       ExtractorFactory.createExtractor(txt);
-                       fail();
-               } catch(IllegalArgumentException e) {
-                       // Good
-               }
-       }
-       
-       public void testInputStream() throws Exception {
-               // Excel
-               assertTrue(
-                               ExtractorFactory.createExtractor(new FileInputStream(xls))
-                               instanceof ExcelExtractor
-               );
-               assertTrue(
-                               ExtractorFactory.createExtractor(new FileInputStream(xls)).getText().length() > 200
-               );
-               
-               assertTrue(
-                               ExtractorFactory.createExtractor(new FileInputStream(xlsx))
-                               instanceof XSSFExcelExtractor
-               );
-               assertTrue(
-                               ExtractorFactory.createExtractor(new FileInputStream(xlsx)).getText().length() > 200
-               );
-               
-               // Word
-               assertTrue(
-                               ExtractorFactory.createExtractor(new FileInputStream(doc))
-                               instanceof WordExtractor
-               );
-               assertTrue(
-                               ExtractorFactory.createExtractor(new FileInputStream(doc)).getText().length() > 120
-               );
-               
-               assertTrue(
-                               ExtractorFactory.createExtractor(new FileInputStream(docx))
-                               instanceof XWPFWordExtractor
-               );
-               assertTrue(
-                               ExtractorFactory.createExtractor(new FileInputStream(docx)).getText().length() > 120
-               );
-               
-               // PowerPoint
-               assertTrue(
-                               ExtractorFactory.createExtractor(new FileInputStream(ppt))
-                               instanceof PowerPointExtractor
-               );
-               assertTrue(
-                               ExtractorFactory.createExtractor(new FileInputStream(ppt)).getText().length() > 120
-               );
-               
-               assertTrue(
-                               ExtractorFactory.createExtractor(new FileInputStream(pptx))
-                               instanceof XSLFPowerPointExtractor
-               );
-               assertTrue(
-                               ExtractorFactory.createExtractor(new FileInputStream(pptx)).getText().length() > 120
-               );
-               
-               // Visio
-               assertTrue(
-                               ExtractorFactory.createExtractor(new FileInputStream(vsd))
-                               instanceof VisioTextExtractor
-               );
-               assertTrue(
-                               ExtractorFactory.createExtractor(new FileInputStream(vsd)).getText().length() > 50
-               );
-               
-               // Text
-               try {
-                       ExtractorFactory.createExtractor(new FileInputStream(txt));
-                       fail();
-               } catch(IllegalArgumentException e) {
-                       // Good
-               }
-       }
-       
-       public void testPOIFS() throws Exception {
-               // Excel
-               assertTrue(
-                               ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)))
-                               instanceof ExcelExtractor
-               );
-               assertTrue(
-                               ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls))).getText().length() > 200
-               );
-               
-               // Word
-               assertTrue(
-                               ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc)))
-                               instanceof WordExtractor
-               );
-               assertTrue(
-                               ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc))).getText().length() > 120
-               );
-               
-               // PowerPoint
-               assertTrue(
-                               ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(ppt)))
-                               instanceof PowerPointExtractor
-               );
-               assertTrue(
-                               ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(ppt))).getText().length() > 120
-               );
-               
-               // Visio
-               assertTrue(
-                               ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(vsd)))
-                               instanceof VisioTextExtractor
-               );
-               assertTrue(
-                               ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(vsd))).getText().length() > 50
-               );
-               
-               // Text
-               try {
-                       ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(txt)));
-                       fail();
-               } catch(IOException e) {
-                       // Good
-               }
-       }
-       
-       public void testPackage() throws Exception {
-               // Excel
-               assertTrue(
-                               ExtractorFactory.createExtractor(Package.open(xlsx.toString()))
-                               instanceof XSSFExcelExtractor
-               );
-               assertTrue(
-                               ExtractorFactory.createExtractor(Package.open(xlsx.toString())).getText().length() > 200
-               );
-               
-               // Word
-               assertTrue(
-                               ExtractorFactory.createExtractor(Package.open(docx.toString()))
-                               instanceof XWPFWordExtractor
-               );
-               assertTrue(
-                               ExtractorFactory.createExtractor(Package.open(docx.toString())).getText().length() > 120
-               );
-               
-               // PowerPoint
-               assertTrue(
-                               ExtractorFactory.createExtractor(Package.open(pptx.toString()))
-                               instanceof XSLFPowerPointExtractor
-               );
-               assertTrue(
-                               ExtractorFactory.createExtractor(Package.open(pptx.toString())).getText().length() > 120
-               );
-               
-               // Text
-               try {
-                       ExtractorFactory.createExtractor(Package.open(txt.toString()));
-                       fail();
-               } catch(InvalidOperationException e) {
-                       // Good
-               }
-       }
-}