]> source.dussan.org Git - poi.git/commitdiff
More ExtractorFactory support and tests
authorNick Burch <nick@apache.org>
Tue, 8 Apr 2008 12:17:18 +0000 (12:17 +0000)
committerNick Burch <nick@apache.org>
Tue, 8 Apr 2008 12:17:18 +0000 (12:17 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@645872 13f79535-47bb-0310-9956-ffa450edef68

src/scratchpad/src/org/apache/poi/extractor/ExtractorFactory.java
src/scratchpad/testcases/org/apache/poi/extractor/TestExtractorFactory.java

index d6c7a1810d7872d32156a2bcee4676c7ae60ae0f..318b68d8f06f407c78e80d9d42c48db728256bc9 100644 (file)
@@ -32,6 +32,7 @@ import org.openxml4j.opc.PackageRelationshipCollection;
 import org.apache.poi.POITextExtractor;
 import org.apache.poi.POIXMLDocument;
 import org.apache.poi.POIXMLTextExtractor;
+import org.apache.poi.hdgf.extractor.VisioTextExtractor;
 import org.apache.poi.hslf.extractor.PowerPointExtractor;
 import org.apache.poi.hssf.extractor.ExcelExtractor;
 import org.apache.poi.hwpf.extractor.WordExtractor;
@@ -109,7 +110,6 @@ public class ExtractorFactory {
                for(Iterator entries = fs.getRoot().getEntries(); entries.hasNext(); ) {
                        Entry entry = (Entry)entries.next();
                        
-                       System.err.println(entry.getName());
                        if(entry.getName().equals("Workbook")) {
                                return new ExcelExtractor(fs);
                        }
@@ -119,7 +119,9 @@ public class ExtractorFactory {
                        if(entry.getName().equals("PowerPoint Document")) {
                                return new PowerPointExtractor(fs);
                        }
-                       // TODO - visio
+                       if(entry.getName().equals("VisioDocument")) {
+                               return new VisioTextExtractor(fs);
+                       }
                }
                throw new IllegalArgumentException("No supported documents found in the OLE2 stream");
        }
index 40f9462c58f077eaab6059347d35acd73546b93c..e18b7e3989bb1640309a628e372171d3cd5afe89 100644 (file)
 package org.apache.poi.extractor;
 
 import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
 
+import org.apache.poi.hdgf.extractor.VisioTextExtractor;
 import org.apache.poi.hslf.extractor.PowerPointExtractor;
 import org.apache.poi.hssf.extractor.ExcelExtractor;
 import org.apache.poi.hwpf.extractor.WordExtractor;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
 import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
 import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
 import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
@@ -34,6 +38,7 @@ public class TestExtractorFactory extends TestCase {
        private String excel_dir;
        private String word_dir;
        private String powerpoint_dir;
+       private String visio_dir;
        
        private File txt;
        
@@ -45,6 +50,8 @@ public class TestExtractorFactory extends TestCase {
 
        private File ppt;
        private File pptx;
+       
+       private File vsd;
 
        protected void setUp() throws Exception {
                super.setUp();
@@ -52,8 +59,9 @@ public class TestExtractorFactory extends TestCase {
                excel_dir = System.getProperty("HSSF.testdata.path");
                word_dir = System.getProperty("HWPF.testdata.path");
                powerpoint_dir = System.getProperty("HSLF.testdata.path");
+               visio_dir = System.getProperty("HDGF.testdata.path");
                
-               txt = new File(excel_dir, "SampleSS.txt");
+               txt = new File(powerpoint_dir, "SampleShow.txt");
                
                xls = new File(excel_dir, "SampleSS.xls");
                xlsx = new File(excel_dir, "SampleSS.xlsx");
@@ -63,6 +71,8 @@ public class TestExtractorFactory extends TestCase {
                
                ppt = new File(powerpoint_dir, "SampleShow.ppt");
                pptx = new File(powerpoint_dir, "SampleShow.pptx");
+               
+               vsd = new File(visio_dir, "Test_Visio-Some_Random_Text.vsd");
        }
 
        public void testFile() throws Exception {
@@ -118,7 +128,13 @@ public class TestExtractorFactory extends TestCase {
                );
                
                // Visio
-               // TODO
+               assertTrue(
+                               ExtractorFactory.createExtractor(vsd)
+                               instanceof VisioTextExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(vsd).getText().length() > 50
+               );
                
                // Text
                try {
@@ -128,12 +144,123 @@ public class TestExtractorFactory extends TestCase {
                        // Good
                }
        }
+       
        public void testInputStream() throws Exception {
+               // Excel
+               assertTrue(
+                               ExtractorFactory.createExtractor(new FileInputStream(xls))
+                               instanceof ExcelExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(new FileInputStream(xls)).getText().length() > 200
+               );
+               
+               assertTrue(
+                               ExtractorFactory.createExtractor(new FileInputStream(xlsx))
+                               instanceof XSSFExcelExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(new FileInputStream(xlsx)).getText().length() > 200
+               );
+               
+               // Word
+               assertTrue(
+                               ExtractorFactory.createExtractor(new FileInputStream(doc))
+                               instanceof WordExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(new FileInputStream(doc)).getText().length() > 120
+               );
+               
+               assertTrue(
+                               ExtractorFactory.createExtractor(new FileInputStream(docx))
+                               instanceof XWPFWordExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(new FileInputStream(docx)).getText().length() > 120
+               );
+               
+               // PowerPoint
+               assertTrue(
+                               ExtractorFactory.createExtractor(new FileInputStream(ppt))
+                               instanceof PowerPointExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(new FileInputStream(ppt)).getText().length() > 120
+               );
+               
+               assertTrue(
+                               ExtractorFactory.createExtractor(new FileInputStream(pptx))
+                               instanceof XSLFPowerPointExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(new FileInputStream(pptx)).getText().length() > 120
+               );
                
+               // Visio
+               assertTrue(
+                               ExtractorFactory.createExtractor(new FileInputStream(vsd))
+                               instanceof VisioTextExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(new FileInputStream(vsd)).getText().length() > 50
+               );
+               
+               // Text
+               try {
+                       ExtractorFactory.createExtractor(new FileInputStream(txt));
+                       fail();
+               } catch(IllegalArgumentException e) {
+                       // Good
+               }
        }
+       
        public void testPOIFS() throws Exception {
+               // Excel
+               assertTrue(
+                               ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls)))
+                               instanceof ExcelExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls))).getText().length() > 200
+               );
                
+               // Word
+               assertTrue(
+                               ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc)))
+                               instanceof WordExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc))).getText().length() > 120
+               );
+               
+               // PowerPoint
+               assertTrue(
+                               ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(ppt)))
+                               instanceof PowerPointExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(ppt))).getText().length() > 120
+               );
+               
+               // Visio
+               assertTrue(
+                               ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(vsd)))
+                               instanceof VisioTextExtractor
+               );
+               assertTrue(
+                               ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(vsd))).getText().length() > 50
+               );
+               
+               // Text
+               try {
+                       ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(txt)));
+                       fail();
+               } catch(IOException e) {
+                       // Good
+               }
        }
+       
        public void testPackage() throws Exception {
                
        }