]> source.dussan.org Git - poi.git/commitdiff
Adjust POIFileScanner for mass regression testing
authorDominik Stadler <centic@apache.org>
Mon, 16 Mar 2020 19:55:04 +0000 (19:55 +0000)
committerDominik Stadler <centic@apache.org>
Mon, 16 Mar 2020 19:55:04 +0000 (19:55 +0000)
Fix closing file-handles when scanning for files to test
Refactor to make it easier to test
Some more tests

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1875267 13f79535-47bb-0310-9956-ffa450edef68

src/integrationtest/org/apache/poi/POIFileScanner.java

index 0ba20db595fa0a1992072541268e8d90929b1954..6ad0ff34582a1342d27d450e47df47a8e097398e 100644 (file)
@@ -23,10 +23,15 @@ import org.apache.poi.hssf.usermodel.HSSFWorkbook;
 import org.apache.poi.hwpf.HWPFDocument;
 import org.apache.poi.poifs.filesystem.FileMagic;
 import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.ss.usermodel.Workbook;
 import org.apache.poi.ss.usermodel.WorkbookFactory;
 import org.apache.poi.stress.FileHandler;
+import org.apache.poi.stress.XSSFFileHandler;
+import org.apache.poi.util.SuppressForbidden;
 import org.apache.poi.xwpf.usermodel.XWPFDocument;
 import org.apache.tools.ant.DirectoryScanner;
+import org.junit.Ignore;
+import org.junit.Test;
 
 import java.io.File;
 import java.io.FileInputStream;
@@ -37,6 +42,8 @@ import java.util.Collection;
 import java.util.List;
 import java.util.Map;
 
+import static org.junit.Assert.assertEquals;
+
 /**
  * Helper class to scan a folder for files and return a collection of
  * found files together with the matching {@link FileHandler}.
@@ -55,7 +62,6 @@ public class POIFileScanner {
      * @throws IOException If determining the file-type fails
      */
     public static Collection<Map.Entry<String, FileHandler>> scan(File rootDir) throws IOException {
-
         DirectoryScanner scanner = new DirectoryScanner();
         scanner.setBasedir(rootDir);
 
@@ -65,10 +71,11 @@ public class POIFileScanner {
 
         scanner.scan();
 
-        System.out.println("Handling " + scanner.getIncludedFiles().length + " files");
+        String[] includedFiles = scanner.getIncludedFiles();
+        System.out.println("Handling " + includedFiles.length + " files");
 
         List<Map.Entry<String, FileHandler>> files = new ArrayList<>();
-        for(String file : scanner.getIncludedFiles()) {
+        for(String file : includedFiles) {
             // breaks files with slash in their name on Linux:
             // file = file.replace('\\', '/'); // ... failures/handlers lookup doesn't work on windows otherwise
 
@@ -100,71 +107,99 @@ public class POIFileScanner {
     protected static FileHandler getFileHandler(File rootDir, String file) throws IOException {
         FileHandler fileHandler = TestAllFiles.HANDLERS.get(TestAllFiles.getExtension(file));
         if(fileHandler == null) {
-            File testFile = new File(rootDir, file);
-            FileMagic magic = FileMagic.valueOf(testFile);
-            // if we have a file-type that we can read, but no extension, we try to determine the
-            // file type manually
+            // we could not detect a type of file based on the extension, so we
+            // need to take a close look at the file
+            fileHandler = detectUnnamedFile(rootDir, file);
+        }
+        return fileHandler;
+    }
 
-            switch(magic) {
-                case OLE2: {
-                    try {
-                        try (POIFSFileSystem fs = new POIFSFileSystem(testFile, true)) {
-                            HSSFWorkbook.getWorkbookDirEntryName(fs.getRoot());
-                        }
+    private static FileHandler detectUnnamedFile(File rootDir, String file) throws IOException {
+        File testFile = new File(rootDir, file);
+
+        // find out if it looks like OLE2 (HSSF, HSLF, HWPF, ...) or OOXML (XSSF, XSLF, XWPF, ...)
+        // and then determine the file type accordingly
+        FileMagic magic = FileMagic.valueOf(testFile);
+        switch (magic) {
+            case OLE2: {
+                try {
+                    try (POIFSFileSystem fs = new POIFSFileSystem(testFile, true)) {
+                        HSSFWorkbook.getWorkbookDirEntryName(fs.getRoot());
+                    }
 
-                        // we did not get an exception, so it seems this is a HSSFWorkbook
-                        fileHandler = TestAllFiles.HANDLERS.get(".xls");
-                    } catch (IOException | RuntimeException e) {
-                        try {
-                            try (FileInputStream istream = new FileInputStream(testFile)) {
-                                try (HWPFDocument ignored = new HWPFDocument(istream)) {
-                                    // seems to be a valid document
-                                    fileHandler = TestAllFiles.HANDLERS.get(".doc");
-                                }
+                    // we did not get an exception, so it seems this is a HSSFWorkbook
+                    return TestAllFiles.HANDLERS.get(".xls");
+                } catch (IOException | RuntimeException e) {
+                    try {
+                        try (FileInputStream istream = new FileInputStream(testFile)) {
+                            try (HWPFDocument ignored = new HWPFDocument(istream)) {
+                                // seems to be a valid document
+                                return TestAllFiles.HANDLERS.get(".doc");
                             }
-                        } catch (IOException | RuntimeException e2) {
-                            System.out.println("Could not open POIFSFileSystem for OLE2 file " + testFile + ": " + e + " and " + e2);
-                            fileHandler = new TestAllFiles.NullFileHandler();
                         }
+                    } catch (IOException | RuntimeException e2) {
+                        System.out.println("Could not open POIFSFileSystem for OLE2 file " + testFile + ": " + e + " and " + e2);
+                        return new TestAllFiles.NullFileHandler();
                     }
-                    break;
                 }
-                case OOXML: {
-                    try {
-                        WorkbookFactory.create(testFile);
-
+            }
+            case OOXML: {
+                try {
+                    try (Workbook ignored = WorkbookFactory.create(testFile, null, true)) {
                         // seems to be a valid workbook
-                        fileHandler = TestAllFiles.HANDLERS.get(".xlsx");
-                    } catch (IOException | RuntimeException e) {
-                        try {
-                            try (FileInputStream is = new FileInputStream(testFile)) {
-                                try (XWPFDocument ignored = new XWPFDocument(is)) {
-                                    // seems to be a valid document
-                                    fileHandler = TestAllFiles.HANDLERS.get(".docx");
-                                }
+                        return TestAllFiles.HANDLERS.get(".xlsx");
+                    }
+                } catch (IOException | RuntimeException e) {
+                    try {
+                        try (FileInputStream is = new FileInputStream(testFile)) {
+                            try (XWPFDocument ignored = new XWPFDocument(is)) {
+                                // seems to be a valid document
+                                return TestAllFiles.HANDLERS.get(".docx");
                             }
-                        } catch (IOException | RuntimeException e2) {
-                            System.out.println("Could not open POIFSFileSystem for OOXML file " + testFile + ": " + e + " and " + e2);
-                            fileHandler = new TestAllFiles.NullFileHandler();
                         }
+                    } catch (IOException | RuntimeException e2) {
+                        System.out.println("Could not open POIFSFileSystem for OOXML file " + testFile + ": " + e + " and " + e2);
+                        return new TestAllFiles.NullFileHandler();
                     }
-                    break;
                 }
-
-                // do not warn about a few detected file types
-                case RTF:
-                case PDF:
-                case HTML:
-                    fileHandler = new TestAllFiles.NullFileHandler();
-                    break;
             }
 
-            if(fileHandler == null) {
-                System.out.println("Did not get a handler for extension " + TestAllFiles.getExtension(file) +
-                        " of file " + file + ": " + magic);
-                fileHandler = new TestAllFiles.NullFileHandler();
-            }
+            // do not warn about a few detected file types
+            case RTF:
+            case PDF:
+            case HTML:
+                return new TestAllFiles.NullFileHandler();
         }
-        return fileHandler;
+
+        System.out.println("Did not get a handler for extension " + TestAllFiles.getExtension(file) +
+                " of file " + file + ": " + magic);
+        return new TestAllFiles.NullFileHandler();
+    }
+
+    @Ignore
+    @Test
+    @SuppressForbidden("Just an ignored test")
+    public void testInvalidFile() throws IOException, InterruptedException {
+        FileHandler fileHandler = POIFileScanner.getFileHandler(new File("/usbc/CommonCrawl"),
+                "www.bgs.ac.uk_downloads_directdownload.cfm_id=2362&noexcl=true&t=west_20sussex_20-_20building_20stone_20quarries");
+
+        assertEquals(XSSFFileHandler.class, fileHandler.getClass());
+
+        // to show the output from ZipFile() from commons-compress
+        // although I did not find out yet why the ZipFile is not closed here
+        System.gc();
+        Thread.sleep(1000);
+        System.gc();
+        Thread.sleep(1000);
+    }
+
+    @Test
+    public void testDetectUnnamedFile() throws IOException {
+        POIFileScanner.detectUnnamedFile(new File("test-data/spreadsheet"), "49156.xlsx");
+    }
+
+    @Test
+    public void test() throws IOException {
+        POIFileScanner.scan(new File("test-data"));
     }
 }