summaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorDominik Stadler <centic@apache.org>2019-06-05 20:27:54 +0000
committerDominik Stadler <centic@apache.org>2019-06-05 20:27:54 +0000
commitd3b794de7ae6094d03bdd3c363fd6e366e6d0e74 (patch)
tree37d81c1f2a95401e1d1122c4cd4912d3e52361ff /src
parent5e33bd4b29ea4ca3071e7f486589e01e341cc179 (diff)
downloadpoi-d3b794de7ae6094d03bdd3c363fd6e366e6d0e74.tar.gz
poi-d3b794de7ae6094d03bdd3c363fd6e366e6d0e74.zip
Add helper class for mass-regression-tests which scans a directory and provides
the matching FileHandlers for each found file. git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1860686 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src')
-rw-r--r--src/integrationtest/org/apache/poi/POIFileScanner.java170
1 files changed, 170 insertions, 0 deletions
diff --git a/src/integrationtest/org/apache/poi/POIFileScanner.java b/src/integrationtest/org/apache/poi/POIFileScanner.java
new file mode 100644
index 0000000000..0ba20db595
--- /dev/null
+++ b/src/integrationtest/org/apache/poi/POIFileScanner.java
@@ -0,0 +1,170 @@
+/*
+ * ====================================================================
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ * ====================================================================
+ */
+
+package org.apache.poi;
+
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.poifs.filesystem.FileMagic;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.ss.usermodel.WorkbookFactory;
+import org.apache.poi.stress.FileHandler;
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.apache.tools.ant.DirectoryScanner;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.AbstractMap;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.List;
+import java.util.Map;
+
+/**
+ * Helper class to scan a folder for files and return a collection of
+ * found files together with the matching {@link FileHandler}.
+ *
+ * Can also be used to get the appropriate FileHandler for a single file.
+ */
+public class POIFileScanner {
+ /**
+ * Scan a folder for files and return a collection of
+ * found files together with the matching {@link FileHandler}.
+ *
+ * Note: unknown files will be assigned to {@link org.apache.poi.TestAllFiles.NullFileHandler}
+ *
+ * @param rootDir The directory to scan
+ * @return A collection with file-FileHandler pairs which can be used for running tests on that file
+ * @throws IOException If determining the file-type fails
+ */
+ public static Collection<Map.Entry<String, FileHandler>> scan(File rootDir) throws IOException {
+
+ DirectoryScanner scanner = new DirectoryScanner();
+ scanner.setBasedir(rootDir);
+
+ scanner.setExcludes(TestAllFiles.SCAN_EXCLUDES);
+
+ System.out.println("Scanning for files in " + rootDir);
+
+ scanner.scan();
+
+ System.out.println("Handling " + scanner.getIncludedFiles().length + " files");
+
+ List<Map.Entry<String, FileHandler>> files = new ArrayList<>();
+ for(String file : scanner.getIncludedFiles()) {
+ // breaks files with slash in their name on Linux:
+ // file = file.replace('\\', '/'); // ... failures/handlers lookup doesn't work on windows otherwise
+
+ FileHandler fileHandler = getFileHandler(rootDir, file);
+
+ files.add(new AbstractMap.SimpleImmutableEntry<>(file, fileHandler));
+
+ if(files.size() % 100 == 0) {
+ System.out.print(".");
+ if(files.size() % 100_000 == 0) {
+ System.out.println(file);
+ }
+ }
+ }
+ System.out.println();
+
+ return files;
+ }
+
+ /**
+ * Get the FileHandler for a single file
+ *
+ * @param rootDir The directory where the file resides
+ * @param file The name of the file without directory
+ * @return The matching {@link FileHandler}, A {@link org.apache.poi.TestAllFiles.NullFileHandler}
+ * is returned if no match is found
+ * @throws IOException If determining the file-type fails
+ */
+ protected static FileHandler getFileHandler(File rootDir, String file) throws IOException {
+ FileHandler fileHandler = TestAllFiles.HANDLERS.get(TestAllFiles.getExtension(file));
+ if(fileHandler == null) {
+ File testFile = new File(rootDir, file);
+ FileMagic magic = FileMagic.valueOf(testFile);
+ // if we have a file-type that we can read, but no extension, we try to determine the
+ // file type manually
+
+ switch(magic) {
+ case OLE2: {
+ try {
+ try (POIFSFileSystem fs = new POIFSFileSystem(testFile, true)) {
+ HSSFWorkbook.getWorkbookDirEntryName(fs.getRoot());
+ }
+
+ // we did not get an exception, so it seems this is a HSSFWorkbook
+ fileHandler = TestAllFiles.HANDLERS.get(".xls");
+ } catch (IOException | RuntimeException e) {
+ try {
+ try (FileInputStream istream = new FileInputStream(testFile)) {
+ try (HWPFDocument ignored = new HWPFDocument(istream)) {
+ // seems to be a valid document
+ fileHandler = TestAllFiles.HANDLERS.get(".doc");
+ }
+ }
+ } catch (IOException | RuntimeException e2) {
+ System.out.println("Could not open POIFSFileSystem for OLE2 file " + testFile + ": " + e + " and " + e2);
+ fileHandler = new TestAllFiles.NullFileHandler();
+ }
+ }
+ break;
+ }
+ case OOXML: {
+ try {
+ WorkbookFactory.create(testFile);
+
+ // seems to be a valid workbook
+ fileHandler = TestAllFiles.HANDLERS.get(".xlsx");
+ } catch (IOException | RuntimeException e) {
+ try {
+ try (FileInputStream is = new FileInputStream(testFile)) {
+ try (XWPFDocument ignored = new XWPFDocument(is)) {
+ // seems to be a valid document
+ fileHandler = TestAllFiles.HANDLERS.get(".docx");
+ }
+ }
+ } catch (IOException | RuntimeException e2) {
+ System.out.println("Could not open POIFSFileSystem for OOXML file " + testFile + ": " + e + " and " + e2);
+ fileHandler = new TestAllFiles.NullFileHandler();
+ }
+ }
+ break;
+ }
+
+ // do not warn about a few detected file types
+ case RTF:
+ case PDF:
+ case HTML:
+ fileHandler = new TestAllFiles.NullFileHandler();
+ break;
+ }
+
+ if(fileHandler == null) {
+ System.out.println("Did not get a handler for extension " + TestAllFiles.getExtension(file) +
+ " of file " + file + ": " + magic);
+ fileHandler = new TestAllFiles.NullFileHandler();
+ }
+ }
+ return fileHandler;
+ }
+}