From 721e5726624988d98c412ec9c3cd72e6c75d07af Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Tue, 8 Apr 2008 12:17:18 +0000 Subject: [PATCH] More ExtractorFactory support and tests git-svn-id: https://svn.apache.org/repos/asf/poi/branches/ooxml@645872 13f79535-47bb-0310-9956-ffa450edef68 --- .../poi/extractor/ExtractorFactory.java | 6 +- .../poi/extractor/TestExtractorFactory.java | 131 +++++++++++++++++- 2 files changed, 133 insertions(+), 4 deletions(-) diff --git a/src/scratchpad/src/org/apache/poi/extractor/ExtractorFactory.java b/src/scratchpad/src/org/apache/poi/extractor/ExtractorFactory.java index d6c7a1810d..318b68d8f0 100644 --- a/src/scratchpad/src/org/apache/poi/extractor/ExtractorFactory.java +++ b/src/scratchpad/src/org/apache/poi/extractor/ExtractorFactory.java @@ -32,6 +32,7 @@ import org.openxml4j.opc.PackageRelationshipCollection; import org.apache.poi.POITextExtractor; import org.apache.poi.POIXMLDocument; import org.apache.poi.POIXMLTextExtractor; +import org.apache.poi.hdgf.extractor.VisioTextExtractor; import org.apache.poi.hslf.extractor.PowerPointExtractor; import org.apache.poi.hssf.extractor.ExcelExtractor; import org.apache.poi.hwpf.extractor.WordExtractor; @@ -109,7 +110,6 @@ public class ExtractorFactory { for(Iterator entries = fs.getRoot().getEntries(); entries.hasNext(); ) { Entry entry = (Entry)entries.next(); - System.err.println(entry.getName()); if(entry.getName().equals("Workbook")) { return new ExcelExtractor(fs); } @@ -119,7 +119,9 @@ public class ExtractorFactory { if(entry.getName().equals("PowerPoint Document")) { return new PowerPointExtractor(fs); } - // TODO - visio + if(entry.getName().equals("VisioDocument")) { + return new VisioTextExtractor(fs); + } } throw new IllegalArgumentException("No supported documents found in the OLE2 stream"); } diff --git a/src/scratchpad/testcases/org/apache/poi/extractor/TestExtractorFactory.java b/src/scratchpad/testcases/org/apache/poi/extractor/TestExtractorFactory.java index 40f9462c58..e18b7e3989 100644 --- a/src/scratchpad/testcases/org/apache/poi/extractor/TestExtractorFactory.java +++ b/src/scratchpad/testcases/org/apache/poi/extractor/TestExtractorFactory.java @@ -17,10 +17,14 @@ package org.apache.poi.extractor; import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import org.apache.poi.hdgf.extractor.VisioTextExtractor; import org.apache.poi.hslf.extractor.PowerPointExtractor; import org.apache.poi.hssf.extractor.ExcelExtractor; import org.apache.poi.hwpf.extractor.WordExtractor; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor; import org.apache.poi.xssf.extractor.XSSFExcelExtractor; import org.apache.poi.xwpf.extractor.XWPFWordExtractor; @@ -34,6 +38,7 @@ public class TestExtractorFactory extends TestCase { private String excel_dir; private String word_dir; private String powerpoint_dir; + private String visio_dir; private File txt; @@ -45,6 +50,8 @@ public class TestExtractorFactory extends TestCase { private File ppt; private File pptx; + + private File vsd; protected void setUp() throws Exception { super.setUp(); @@ -52,8 +59,9 @@ public class TestExtractorFactory extends TestCase { excel_dir = System.getProperty("HSSF.testdata.path"); word_dir = System.getProperty("HWPF.testdata.path"); powerpoint_dir = System.getProperty("HSLF.testdata.path"); + visio_dir = System.getProperty("HDGF.testdata.path"); - txt = new File(excel_dir, "SampleSS.txt"); + txt = new File(powerpoint_dir, "SampleShow.txt"); xls = new File(excel_dir, "SampleSS.xls"); xlsx = new File(excel_dir, "SampleSS.xlsx"); @@ -63,6 +71,8 @@ public class TestExtractorFactory extends TestCase { ppt = new File(powerpoint_dir, "SampleShow.ppt"); pptx = new File(powerpoint_dir, "SampleShow.pptx"); + + vsd = new File(visio_dir, "Test_Visio-Some_Random_Text.vsd"); } public void testFile() throws Exception { @@ -118,7 +128,13 @@ public class TestExtractorFactory extends TestCase { ); // Visio - // TODO + assertTrue( + ExtractorFactory.createExtractor(vsd) + instanceof VisioTextExtractor + ); + assertTrue( + ExtractorFactory.createExtractor(vsd).getText().length() > 50 + ); // Text try { @@ -128,12 +144,123 @@ public class TestExtractorFactory extends TestCase { // Good } } + public void testInputStream() throws Exception { + // Excel + assertTrue( + ExtractorFactory.createExtractor(new FileInputStream(xls)) + instanceof ExcelExtractor + ); + assertTrue( + ExtractorFactory.createExtractor(new FileInputStream(xls)).getText().length() > 200 + ); + + assertTrue( + ExtractorFactory.createExtractor(new FileInputStream(xlsx)) + instanceof XSSFExcelExtractor + ); + assertTrue( + ExtractorFactory.createExtractor(new FileInputStream(xlsx)).getText().length() > 200 + ); + + // Word + assertTrue( + ExtractorFactory.createExtractor(new FileInputStream(doc)) + instanceof WordExtractor + ); + assertTrue( + ExtractorFactory.createExtractor(new FileInputStream(doc)).getText().length() > 120 + ); + + assertTrue( + ExtractorFactory.createExtractor(new FileInputStream(docx)) + instanceof XWPFWordExtractor + ); + assertTrue( + ExtractorFactory.createExtractor(new FileInputStream(docx)).getText().length() > 120 + ); + + // PowerPoint + assertTrue( + ExtractorFactory.createExtractor(new FileInputStream(ppt)) + instanceof PowerPointExtractor + ); + assertTrue( + ExtractorFactory.createExtractor(new FileInputStream(ppt)).getText().length() > 120 + ); + + assertTrue( + ExtractorFactory.createExtractor(new FileInputStream(pptx)) + instanceof XSLFPowerPointExtractor + ); + assertTrue( + ExtractorFactory.createExtractor(new FileInputStream(pptx)).getText().length() > 120 + ); + // Visio + assertTrue( + ExtractorFactory.createExtractor(new FileInputStream(vsd)) + instanceof VisioTextExtractor + ); + assertTrue( + ExtractorFactory.createExtractor(new FileInputStream(vsd)).getText().length() > 50 + ); + + // Text + try { + ExtractorFactory.createExtractor(new FileInputStream(txt)); + fail(); + } catch(IllegalArgumentException e) { + // Good + } } + public void testPOIFS() throws Exception { + // Excel + assertTrue( + ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls))) + instanceof ExcelExtractor + ); + assertTrue( + ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(xls))).getText().length() > 200 + ); + // Word + assertTrue( + ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc))) + instanceof WordExtractor + ); + assertTrue( + ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(doc))).getText().length() > 120 + ); + + // PowerPoint + assertTrue( + ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(ppt))) + instanceof PowerPointExtractor + ); + assertTrue( + ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(ppt))).getText().length() > 120 + ); + + // Visio + assertTrue( + ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(vsd))) + instanceof VisioTextExtractor + ); + assertTrue( + ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(vsd))).getText().length() > 50 + ); + + // Text + try { + ExtractorFactory.createExtractor(new POIFSFileSystem(new FileInputStream(txt))); + fail(); + } catch(IOException e) { + // Good + } } + public void testPackage() throws Exception { } -- 2.39.5