]> source.dussan.org Git - poi.git/commitdiff
More ExtractorFactory support and tests
authorNick Burch <nick@apache.org>
Tue, 8 Apr 2008 12:03:05 +0000 (12:03 +0000)
committerNick Burch <nick@apache.org>
Tue, 8 Apr 2008 12:03:05 +0000 (12:03 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@645870 13f79535-47bb-0310-9956-ffa450edef68

src/scratchpad/src/org/apache/poi/extractor/ExtractorFactory.java
src/scratchpad/testcases/org/apache/poi/extractor/TestExtractorFactory.java [new file with mode: 0644]

index 548697c3c0234370d9aaebac5822430c38ca0887..d6c7a1810d7872d32156a2bcee4676c7ae60ae0f 100644 (file)
@@ -32,7 +32,9 @@ import org.openxml4j.opc.PackageRelationshipCollection;
 import org.apache.poi.POITextExtractor;
 import org.apache.poi.POIXMLDocument;
 import org.apache.poi.POIXMLTextExtractor;
+import org.apache.poi.hslf.extractor.PowerPointExtractor;
 import org.apache.poi.hssf.extractor.ExcelExtractor;
+import org.apache.poi.hwpf.extractor.WordExtractor;
 import org.apache.poi.poifs.filesystem.Entry;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
 import org.apache.poi.xslf.XSLFSlideShow;
@@ -51,20 +53,21 @@ public class ExtractorFactory {
        public static final String CORE_DOCUMENT_REL =
                "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument";
        
-       public POITextExtractor createExtractor(File f) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
-               FileInputStream finp = new FileInputStream(f);
+       public static POITextExtractor createExtractor(File f) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
+               InputStream inp = new PushbackInputStream( 
+                       new FileInputStream(f), 8);
                
-               if(POIFSFileSystem.hasPOIFSHeader(finp)) {
-                       return createExtractor(new POIFSFileSystem(finp));
+               if(POIFSFileSystem.hasPOIFSHeader(inp)) {
+                       return createExtractor(new POIFSFileSystem(inp));
                }
-               if(POIXMLDocument.hasOOXMLHeader(finp)) {
-                       finp.close();
+               if(POIXMLDocument.hasOOXMLHeader(inp)) {
+                       inp.close();
                        return createExtractor(Package.open(f.toString()));
                }
                throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file");
        }
        
-       public POITextExtractor createExtractor(InputStream inp) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
+       public static POITextExtractor createExtractor(InputStream inp) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
                // Figure out the kind of stream
                // If clearly doesn't do mark/reset, wrap up
                if(! inp.markSupported()) {
@@ -80,7 +83,7 @@ public class ExtractorFactory {
                throw new IllegalArgumentException("Your InputStream was neither an OLE2 stream, nor an OOXML stream");
        }
        
-       public POIXMLTextExtractor createExtractor(Package pkg) throws IOException, OpenXML4JException, XmlException {
+       public static POIXMLTextExtractor createExtractor(Package pkg) throws IOException, OpenXML4JException, XmlException {
                PackageRelationshipCollection core = 
                        pkg.getRelationshipsByType(CORE_DOCUMENT_REL);
                if(core.size() != 1) {
@@ -100,14 +103,23 @@ public class ExtractorFactory {
                throw new IllegalArgumentException("No supported documents found in the OOXML package");
        }
        
-       public POITextExtractor createExtractor(POIFSFileSystem fs) throws IOException {
+       public static POITextExtractor createExtractor(POIFSFileSystem fs) throws IOException {
                // Look for certain entries in the stream, to figure it
                //  out from
                for(Iterator entries = fs.getRoot().getEntries(); entries.hasNext(); ) {
                        Entry entry = (Entry)entries.next();
+                       
+                       System.err.println(entry.getName());
                        if(entry.getName().equals("Workbook")) {
                                return new ExcelExtractor(fs);
                        }
+                       if(entry.getName().equals("WordDocument")) {
+                               return new WordExtractor(fs);
+                       }
+                       if(entry.getName().equals("PowerPoint Document")) {
+                               return new PowerPointExtractor(fs);
+                       }
+                       // TODO - visio
                }
                throw new IllegalArgumentException("No supported documents found in the OLE2 stream");
        }
diff --git a/src/scratchpad/testcases/org/apache/poi/extractor/TestExtractorFactory.java b/src/scratchpad/testcases/org/apache/poi/extractor/TestExtractorFactory.java
new file mode 100644 (file)
index 0000000..40f9462
--- /dev/null
@@ -0,0 +1,140 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.extractor;
+
+import java.io.File;
+
+import org.apache.poi.hslf.extractor.PowerPointExtractor;
+import org.apache.poi.hssf.extractor.ExcelExtractor;
+import org.apache.poi.hwpf.extractor.WordExtractor;
+import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
+import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
+import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
+
+import junit.framework.TestCase;
+
+/**
+ * Test that the extractor factory plays nicely
+ */
+public class TestExtractorFactory extends TestCase {
+       private String excel_dir;
+       private String word_dir;
+       private String powerpoint_dir;
+       
+       private File txt;
+       
+       private File xls;
+       private File xlsx;
+       
+       private File doc;
+       private File docx;
+
+       private File ppt;
+       private File pptx;
+
+       protected void setUp() throws Exception {
+               super.setUp();
+               
+               excel_dir = System.getProperty("HSSF.testdata.path");
+               word_dir = System.getProperty("HWPF.testdata.path");
+               powerpoint_dir = System.getProperty("HSLF.testdata.path");
+               
+               txt = new File(excel_dir, "SampleSS.txt");
+               
+               xls = new File(excel_dir, "SampleSS.xls");
+               xlsx = new File(excel_dir, "SampleSS.xlsx");
+               
+               doc = new File(word_dir, "SampleDoc.doc");
+               docx = new File(word_dir, "SampleDoc.docx");
+               
+               ppt = new File(powerpoint_dir, "SampleShow.ppt");
+               pptx = new File(powerpoint_dir, "SampleShow.pptx");
+       }
+
+       public void testFile() throws Exception {
+               // Excel
+               assertTrue(
+                               ExtractorFactory.createExtractor(xls)
+                               instanceof ExcelExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(xls).getText().length() > 200
+               );
+               
+               assertTrue(
+                               ExtractorFactory.createExtractor(xlsx)
+                               instanceof XSSFExcelExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(xlsx).getText().length() > 200
+               );
+               
+               // Word
+               assertTrue(
+                               ExtractorFactory.createExtractor(doc)
+                               instanceof WordExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(doc).getText().length() > 120
+               );
+               
+               assertTrue(
+                               ExtractorFactory.createExtractor(docx)
+                               instanceof XWPFWordExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(docx).getText().length() > 120
+               );
+               
+               // PowerPoint
+               assertTrue(
+                               ExtractorFactory.createExtractor(ppt)
+                               instanceof PowerPointExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(ppt).getText().length() > 120
+               );
+               
+               assertTrue(
+                               ExtractorFactory.createExtractor(pptx)
+                               instanceof XSLFPowerPointExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(pptx).getText().length() > 120
+               );
+               
+               // Visio
+               // TODO
+               
+               // Text
+               try {
+                       ExtractorFactory.createExtractor(txt);
+                       fail();
+               } catch(IllegalArgumentException e) {
+                       // Good
+               }
+       }
+       public void testInputStream() throws Exception {
+               
+       }
+       public void testPOIFS() throws Exception {
+               
+       }
+       public void testPackage() throws Exception {
+               
+       }
+}