From: Dominik Stadler Date: Fri, 24 Apr 2020 20:58:23 +0000 (+0000) Subject: Re-establish class which is used in mass-regression-testing X-Git-Tag: before_ooxml_3rd_edition~298 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=a415ae13f7769cc894de3cc06f4379073d78d8f5;p=poi.git Re-establish class which is used in mass-regression-testing git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1876946 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/integrationtest/org/apache/poi/BaseIntegrationTest.java b/src/integrationtest/org/apache/poi/BaseIntegrationTest.java new file mode 100644 index 0000000000..10c534eacc --- /dev/null +++ b/src/integrationtest/org/apache/poi/BaseIntegrationTest.java @@ -0,0 +1,175 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi; + +import org.apache.poi.poifs.filesystem.OfficeXmlFileException; +import org.apache.poi.stress.FileHandler; +import org.apache.poi.stress.HSLFFileHandler; +import org.apache.poi.stress.HSSFFileHandler; +import org.apache.poi.stress.HWPFFileHandler; +import org.apache.poi.stress.XSLFFileHandler; +import org.apache.poi.stress.XSSFFileHandler; +import org.apache.poi.stress.XWPFFileHandler; +import org.junit.Assume; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.zip.ZipException; + +import static org.junit.Assert.assertNotNull; + +/** + * This class is used for mass-regression testing via a + * separate project, this class provides functionality to + * run integration tests on one file and handle some + * types of files/exceptions, e.g. old file formats. + * + */ +public class BaseIntegrationTest { + private final File rootDir; + private String file; + private FileHandler handler; + + public BaseIntegrationTest(File rootDir, String file, FileHandler handler) { + this.rootDir = rootDir; + this.file = file; + this.handler = handler; + } + + public void test() throws Exception { + assertNotNull("Unknown file extension for file: " + file + ": " + TestAllFiles.getExtension(file), handler); + testOneFile(new File(rootDir, file)); + } + + protected void testOneFile(File inputFile) throws Exception { + try { + handleFile(inputFile); + } catch (OfficeXmlFileException e) { + // switch XWPF and HWPF and so forth depending on the error message + handleWrongOLE2XMLExtension(inputFile, e); + } catch (OldFileFormatException e) { + // Not even text extraction is supported for these: handler.handleExtracting(inputFile); + Assume.assumeFalse("File " + file + " excluded because it is unsupported old Excel format", true); + } catch (EncryptedDocumentException e) { + // Do not try to read encrypted files + Assume.assumeFalse("File " + file + " excluded because it is password-encrypted", true); + } catch (ZipException e) { + // some files are corrupted + if (e.getMessage().equals("unexpected EOF") || e.getMessage().equals("Truncated ZIP file")) { + Assume.assumeFalse("File " + file + " excluded because the Zip file is incomplete", true); + } + + throw e; + } catch (IOException e) { + // ignore some other ways of corrupted files + String message = e.getMessage(); + if(message != null && message.contains("Truncated ZIP file")) { + Assume.assumeFalse("File " + file + " excluded because the Zip file is incomplete", true); + } + + // sometimes binary format has XML-format-extension... + if(message != null && message.contains("rong file format or file extension for OO XML file")) { + handleWrongOLE2XMLExtension(inputFile, e); + return; + } + + throw e; + } catch (IllegalArgumentException e) { + // ignore errors for documents with incorrect extension + String message = e.getMessage(); + if(message != null && (message.equals("The document is really a RTF file") || + message.equals("The document is really a PDF file") || + message.equals("The document is really a HTML file"))) { + Assume.assumeFalse("File " + file + " excluded because it is actually a PDF/RTF/HTML file", true); + } + + if(message != null && message.equals("The document is really a OOXML file")) { + handleWrongOLE2XMLExtension(inputFile, e); + return; + } + + throw e; + } + + try { + handler.handleExtracting(inputFile); + } catch (EncryptedDocumentException e) { + // Do not try to read encrypted files + Assume.assumeFalse("File " + file + " excluded because it is password-encrypted", true); + } + } + + void handleWrongOLE2XMLExtension(File inputFile, Exception e) throws Exception { + // we sometimes have wrong extensions, so for some exceptions we try to handle it + // with the correct FileHandler instead + String message = e.getMessage(); + + // ignore some file-types that we do not want to handle here + Assume.assumeFalse("File " + file + " excluded because it is actually a PDF/RTF/HTML file", + message != null && (message.equals("The document is really a RTF file") || + message.equals("The document is really a PDF file") || + message.equals("The document is really a HTML file"))); + + if(message != null && (message.equals("The document is really a XLS file"))) { + handler = TestAllFiles.HANDLERS.get(".xls"); + handleFile(inputFile); + } else if(message != null && (message.equals("The document is really a PPT file"))) { + handler = TestAllFiles.HANDLERS.get(".ppt"); + handleFile(inputFile); + } else if(message != null && (message.equals("The document is really a DOC file"))) { + handler = TestAllFiles.HANDLERS.get(".doc"); + handleFile(inputFile); + } else if(message != null && (message.equals("The document is really a VSD file"))) { + handler = TestAllFiles.HANDLERS.get(".vsd"); + handleFile(inputFile); + + // use XWPF instead of HWPF and XSSF instead of HSSF as the file seems to have the wrong extension + } else if (handler instanceof HWPFFileHandler) { + handler = TestAllFiles.HANDLERS.get(".docx"); + handleFile(inputFile); + } else if (handler instanceof HSSFFileHandler) { + handler = TestAllFiles.HANDLERS.get(".xlsx"); + handleFile(inputFile); + } else if (handler instanceof HSLFFileHandler) { + handler = TestAllFiles.HANDLERS.get(".pptx"); + handleFile(inputFile); + + // and the other way around, use HWPF instead of XWPF and so forth + } else if(handler instanceof XWPFFileHandler) { + handler = TestAllFiles.HANDLERS.get(".doc"); + handleFile(inputFile); + } else if(handler instanceof XSSFFileHandler) { + handler = TestAllFiles.HANDLERS.get(".xls"); + handleFile(inputFile); + } else if(handler instanceof XSLFFileHandler) { + handler = TestAllFiles.HANDLERS.get(".ppt"); + handleFile(inputFile); + } else { + // nothing matched => throw the exception to the outside + throw e; + } + } + + private void handleFile(File inputFile) throws Exception { + try (InputStream newStream = new BufferedInputStream(new FileInputStream(inputFile), 64*1024)) { + handler.handleFile(newStream, inputFile.getAbsolutePath()); + } + } +}