]> source.dussan.org Git - poi.git/commitdiff
Add some more code from the separate integration test project to be able to publish...
authorDominik Stadler <centic@apache.org>
Wed, 4 Oct 2017 19:54:21 +0000 (19:54 +0000)
committerDominik Stadler <centic@apache.org>
Wed, 4 Oct 2017 19:54:21 +0000 (19:54 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1811144 13f79535-47bb-0310-9956-ffa450edef68

src/integrationtest/org/apache/poi/BaseIntegrationTest.java [new file with mode: 0644]
src/integrationtest/org/apache/poi/TestAllFiles.java
src/integrationtest/org/apache/poi/stress/FileHandlerFactory.java [new file with mode: 0644]

diff --git a/src/integrationtest/org/apache/poi/BaseIntegrationTest.java b/src/integrationtest/org/apache/poi/BaseIntegrationTest.java
new file mode 100644 (file)
index 0000000..291618f
--- /dev/null
@@ -0,0 +1,138 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi;
+
+import org.apache.poi.hslf.exceptions.OldPowerPointFormatException;
+import org.apache.poi.hssf.OldExcelFormatException;
+import org.apache.poi.hwpf.OldWordFileFormatException;
+import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
+import org.apache.poi.stress.*;
+import org.junit.Assume;
+
+import java.io.*;
+import java.util.zip.ZipException;
+
+import static org.junit.Assert.assertNotNull;
+
+public class BaseIntegrationTest {
+       private final File rootDir;
+       private String file;
+       private FileHandler handler;
+
+       public BaseIntegrationTest(File rootDir, String file, FileHandler handler) {
+               this.rootDir = rootDir;
+               this.file = file;
+               this.handler = handler;
+       }
+
+       public void test() throws Exception {
+        assertNotNull("Unknown file extension for file: " + file + ": " + TestAllFiles.getExtension(file), handler);
+
+        File inputFile = new File(rootDir, file);
+        try {
+            handleFile(inputFile);
+        } catch (OfficeXmlFileException e) {
+               // check if the file-extension is wrong
+               if(!e.getMessage().contains("data appears to be in the Office 2007")) {
+                       throw e;
+               }
+
+               // use XWPF instead of HWPF and XSSF instead of HSSF as the file seems to have the wrong extension
+                       handleWrongExtension(inputFile, e);
+               } catch (OldWordFileFormatException | OldExcelFormatException | OldPowerPointFormatException e) {
+               // at least perform extracting tests on these old files
+        } catch (OldFileFormatException e) {
+            // Not even text extraction is supported for these: handler.handleExtracting(inputFile);
+                       //noinspection ConstantConditions
+                       Assume.assumeFalse("File " + file + " excluded because it is unsupported old Excel format", true);
+        } catch (EncryptedDocumentException e) {
+               // Do not try to read encrypted files
+                       //noinspection ConstantConditions
+                       Assume.assumeFalse("File " + file + " excluded because it is password-encrypted", true);
+        } catch (ZipException e) {
+                       // some files are corrupted
+                       if (e.getMessage().equals("unexpected EOF")) {
+                               //noinspection ConstantConditions
+                               Assume.assumeFalse("File " + file + " excluded because the Zip file is incomplete", true);
+                       }
+
+                       throw e;
+               } catch (IOException e) {
+                       // sometimes binary format has XML-format-extension...
+                       if(e.getMessage().contains("rong file format or file extension for OO XML file")) {
+                               handleWrongExtension(inputFile, e);
+                       } else {
+                               throw e;
+                       }
+        } catch (IllegalArgumentException e) {
+               // ignore errors for documents with incorrect extension
+               String message = e.getMessage();
+                       if(message != null && (message.equals("The document is really a RTF file") ||
+                               message.equals("The document is really a PDF file") ||
+                                       message.equals("The document is really a HTML file"))) {
+                               //noinspection ConstantConditions
+                               Assume.assumeFalse("File " + file + " excluded because it is actually a PDF/RTF file", true);
+                       }
+
+                       if(e.getMessage().equals("The document is really a OOXML file")) {
+                               handleWrongExtension(inputFile, e);
+                       } else {
+                               throw e;
+                       }
+        }
+
+        try {
+               handler.handleExtracting(inputFile);
+               } catch (EncryptedDocumentException e) {
+                       // Do not try to read encrypted files
+                       //noinspection ConstantConditions
+                       Assume.assumeFalse("File " + file + " excluded because it is password-encrypted", true);
+               }
+       }
+
+       void handleWrongExtension(File inputFile, Exception e) throws Exception {
+               // use XWPF instead of HWPF and XSSF instead of HSSF as the file seems to have the wrong extension
+               if (handler instanceof HWPFFileHandler) {
+            handler = TestAllFiles.HANDLERS.get(".docx");
+            handleFile(inputFile);
+        } else if (handler instanceof HSSFFileHandler) {
+            handler = TestAllFiles.HANDLERS.get(".xlsx");
+            handleFile(inputFile);
+        } else if (handler instanceof HSLFFileHandler) {
+                       handler = TestAllFiles.HANDLERS.get(".pptx");
+                       handleFile(inputFile);
+               // and the other way around, use HWPF instead of XWPF and so forth
+               } else if(handler instanceof XWPFFileHandler) {
+                       handler = TestAllFiles.HANDLERS.get(".doc");
+                       handleFile(inputFile);
+               } else if(handler instanceof XSSFFileHandler) {
+                       handler = TestAllFiles.HANDLERS.get(".xls");
+                       handleFile(inputFile);
+               } else if(handler instanceof XSLFFileHandler) {
+                       handler = TestAllFiles.HANDLERS.get(".ppt");
+                       handleFile(inputFile);
+        } else {
+            throw e;
+        }
+       }
+
+       private void handleFile(File inputFile) throws Exception {
+               try (InputStream newStream = new BufferedInputStream(new FileInputStream(inputFile), 64*1024)) {
+                       handler.handleFile(newStream, inputFile.getAbsolutePath());
+               }
+       }
+}
index 37aa464ee17a4a906bd98b6f2ac74d0f4c1335e0..812e6c6b4d9b3f444c3ce34b60e4a23b375c0a24 100644 (file)
@@ -91,13 +91,13 @@ import org.junit.runners.Parameterized.Parameters;
 public class TestAllFiles {
     private static final File ROOT_DIR = new File("test-data");
 
-    static final String[] SCAN_EXCLUDES = new String[] { "**/.svn/**", "lost+found", "**/.git/**" };
+    public static final String[] SCAN_EXCLUDES = new String[] { "**/.svn/**", "lost+found", "**/.git/**" };
 
     private static final Map<String,String> FILE_PASSWORD;
 
     
     // map file extensions to the actual mappers
-    static final Map<String, FileHandler> HANDLERS = new HashMap<>();
+    public static final Map<String, FileHandler> HANDLERS = new HashMap<>();
     static {
         // Excel
         HANDLERS.put(".xls", new HSSFFileHandler());
@@ -443,7 +443,7 @@ public class TestAllFiles {
         handler.handleAdditional(inputFile);
     }
 
-    static String getExtension(String file) {
+    public static String getExtension(String file) {
         int pos = file.lastIndexOf('.');
         if(pos == -1 || pos == file.length()-1) {
             return file;
@@ -452,7 +452,7 @@ public class TestAllFiles {
         return file.substring(pos).toLowerCase(Locale.ROOT);
     }
 
-    private static class NullFileHandler implements FileHandler {
+    public static class NullFileHandler implements FileHandler {
         @Override
         public void handleFile(InputStream stream, String path) throws Exception {
         }
diff --git a/src/integrationtest/org/apache/poi/stress/FileHandlerFactory.java b/src/integrationtest/org/apache/poi/stress/FileHandlerFactory.java
new file mode 100644 (file)
index 0000000..8be52b3
--- /dev/null
@@ -0,0 +1,120 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+public class FileHandlerFactory {
+    // map from patterns for mimetypes to the FileHandlers that should be able to
+    // work with that file
+    // use a Set<Pair> to have a defined order of applying the matches
+    private static final Map<Pattern, FileHandler> MIME_TYPES = new HashMap<>();
+    static {
+        ////////////////// Word
+
+        MIME_TYPES.put(Pattern.compile("application/vnd.ms-word.document.macroenabled.12"), new XWPFFileHandler());
+        MIME_TYPES.put(Pattern.compile("application/vnd.ms-word.template.macroenabled.12"), new XWPFFileHandler());
+
+        // application/msword
+        MIME_TYPES.put(Pattern.compile(".*msword.*"), new HWPFFileHandler());
+        // application/vnd.ms-word
+        MIME_TYPES.put(Pattern.compile(".*ms-word.*"), new HWPFFileHandler());
+
+        // application/vnd.openxmlformats-officedocument.wordprocessingml.document
+        MIME_TYPES.put(Pattern.compile(".*wordprocessingml.*"), new XWPFFileHandler());
+
+        ////////////////// Excel
+        MIME_TYPES.put(Pattern.compile("application/vnd.ms-excel.addin.macroEnabled.12"), new XSSFFileHandler());
+        MIME_TYPES.put(Pattern.compile("application/vnd.ms-excel.sheet.binary.macroEnabled.12"), new XSSFFileHandler());
+
+        // application/msexcel
+        MIME_TYPES.put(Pattern.compile(".*msexcel.*"), new HSSFFileHandler());
+        // application/vnd.ms-excel
+        MIME_TYPES.put(Pattern.compile(".*ms-excel.*"), new HSSFFileHandler());
+
+        // application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
+        MIME_TYPES.put(Pattern.compile(".*spreadsheetml.*"), new XSSFFileHandler());
+
+        ////////////////// Powerpoint
+
+        // application/vnd.ms-powerpoint
+        MIME_TYPES.put(Pattern.compile("application/vnd.ms-powerpoint"), new HSLFFileHandler());
+        // application/vnd.ms-officetheme
+        MIME_TYPES.put(Pattern.compile("application/vnd.ms-officetheme"), new HSLFFileHandler());
+
+        // application/vnd.openxmlformats-officedocument.presentationml.presentation
+        MIME_TYPES.put(Pattern.compile(".*presentationml.*"), new XSLFFileHandler());
+        // application/vnd.ms-powerpoint.presentation.macroenabled.12
+        MIME_TYPES.put(Pattern.compile("application/vnd.ms-powerpoint.presentation.macroenabled.12"), new XSLFFileHandler());
+        // application/vnd.ms-powerpoint.slideshow.macroenabled.12
+        MIME_TYPES.put(Pattern.compile("application/vnd.ms-powerpoint.slideshow.macroenabled.12"), new XSLFFileHandler());
+
+        ////////////////// Mail/TNEF
+
+        // application/vnd.ms-tnef
+        MIME_TYPES.put(Pattern.compile(".*ms-tnef.*"), new HMEFFileHandler());
+
+        // application/vnd.ms-outlook
+        MIME_TYPES.put(Pattern.compile("application/vnd.ms-outlook"), new HSMFFileHandler());
+
+        ////////////////// Visio
+
+        // application/vnd.visio
+        MIME_TYPES.put(Pattern.compile("application/vnd.visio.*"), new HDGFFileHandler());
+
+        // application/vnd.ms-visio.drawing
+        MIME_TYPES.put(Pattern.compile(".*vnd.ms-visio\\."), new XDGFFileHandler());
+
+        //application/vnd.ms-visio.viewer
+        MIME_TYPES.put(Pattern.compile(".*visio.*"), new HDGFFileHandler());
+
+
+        ////////////////// Publisher
+
+        // application/x-mspublisher
+        MIME_TYPES.put(Pattern.compile("application/x-mspublisher"), new HPBFFileHandler());
+
+
+        ////////////////// Others
+
+        // special type used by Tika
+        MIME_TYPES.put(Pattern.compile("application/x-tika-ooxml.*"), new OPCFileHandler());
+        // special type used by Tika
+        MIME_TYPES.put(Pattern.compile("application/x-tika-msoffice.*"), new POIFSFileHandler());
+
+        // application/x-tika-old-excel
+        MIME_TYPES.put(Pattern.compile("application/x-tika-old-excel"), new POIFSFileHandler());
+
+        // application/vnd.openxmlformats-officedocument.drawingml.chart+xml
+        // ?!MIME_TYPES.put(Pattern.compile(".*drawingml.*"), ".dwg");
+
+        // application/vnd.openxmlformats-officedocument.vmlDrawing
+        // ?!MIME_TYPES.put(Pattern.compile(".*vmlDrawing.*"), ".dwg");
+    }
+
+    public static FileHandler getHandler(String mimeType) {
+        for(Map.Entry<Pattern,FileHandler> entry : MIME_TYPES.entrySet()) {
+            if(entry.getKey().matcher(mimeType).matches()) {
+                return entry.getValue();
+            }
+        }
+
+        return null;
+    }
+}