git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1811144 13f79535-47bb-0310-9956-ffa450edef68pull/79/head
@@ -0,0 +1,138 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi; | |||
import org.apache.poi.hslf.exceptions.OldPowerPointFormatException; | |||
import org.apache.poi.hssf.OldExcelFormatException; | |||
import org.apache.poi.hwpf.OldWordFileFormatException; | |||
import org.apache.poi.poifs.filesystem.OfficeXmlFileException; | |||
import org.apache.poi.stress.*; | |||
import org.junit.Assume; | |||
import java.io.*; | |||
import java.util.zip.ZipException; | |||
import static org.junit.Assert.assertNotNull; | |||
public class BaseIntegrationTest { | |||
private final File rootDir; | |||
private String file; | |||
private FileHandler handler; | |||
public BaseIntegrationTest(File rootDir, String file, FileHandler handler) { | |||
this.rootDir = rootDir; | |||
this.file = file; | |||
this.handler = handler; | |||
} | |||
public void test() throws Exception { | |||
assertNotNull("Unknown file extension for file: " + file + ": " + TestAllFiles.getExtension(file), handler); | |||
File inputFile = new File(rootDir, file); | |||
try { | |||
handleFile(inputFile); | |||
} catch (OfficeXmlFileException e) { | |||
// check if the file-extension is wrong | |||
if(!e.getMessage().contains("data appears to be in the Office 2007")) { | |||
throw e; | |||
} | |||
// use XWPF instead of HWPF and XSSF instead of HSSF as the file seems to have the wrong extension | |||
handleWrongExtension(inputFile, e); | |||
} catch (OldWordFileFormatException | OldExcelFormatException | OldPowerPointFormatException e) { | |||
// at least perform extracting tests on these old files | |||
} catch (OldFileFormatException e) { | |||
// Not even text extraction is supported for these: handler.handleExtracting(inputFile); | |||
//noinspection ConstantConditions | |||
Assume.assumeFalse("File " + file + " excluded because it is unsupported old Excel format", true); | |||
} catch (EncryptedDocumentException e) { | |||
// Do not try to read encrypted files | |||
//noinspection ConstantConditions | |||
Assume.assumeFalse("File " + file + " excluded because it is password-encrypted", true); | |||
} catch (ZipException e) { | |||
// some files are corrupted | |||
if (e.getMessage().equals("unexpected EOF")) { | |||
//noinspection ConstantConditions | |||
Assume.assumeFalse("File " + file + " excluded because the Zip file is incomplete", true); | |||
} | |||
throw e; | |||
} catch (IOException e) { | |||
// sometimes binary format has XML-format-extension... | |||
if(e.getMessage().contains("rong file format or file extension for OO XML file")) { | |||
handleWrongExtension(inputFile, e); | |||
} else { | |||
throw e; | |||
} | |||
} catch (IllegalArgumentException e) { | |||
// ignore errors for documents with incorrect extension | |||
String message = e.getMessage(); | |||
if(message != null && (message.equals("The document is really a RTF file") || | |||
message.equals("The document is really a PDF file") || | |||
message.equals("The document is really a HTML file"))) { | |||
//noinspection ConstantConditions | |||
Assume.assumeFalse("File " + file + " excluded because it is actually a PDF/RTF file", true); | |||
} | |||
if(e.getMessage().equals("The document is really a OOXML file")) { | |||
handleWrongExtension(inputFile, e); | |||
} else { | |||
throw e; | |||
} | |||
} | |||
try { | |||
handler.handleExtracting(inputFile); | |||
} catch (EncryptedDocumentException e) { | |||
// Do not try to read encrypted files | |||
//noinspection ConstantConditions | |||
Assume.assumeFalse("File " + file + " excluded because it is password-encrypted", true); | |||
} | |||
} | |||
void handleWrongExtension(File inputFile, Exception e) throws Exception { | |||
// use XWPF instead of HWPF and XSSF instead of HSSF as the file seems to have the wrong extension | |||
if (handler instanceof HWPFFileHandler) { | |||
handler = TestAllFiles.HANDLERS.get(".docx"); | |||
handleFile(inputFile); | |||
} else if (handler instanceof HSSFFileHandler) { | |||
handler = TestAllFiles.HANDLERS.get(".xlsx"); | |||
handleFile(inputFile); | |||
} else if (handler instanceof HSLFFileHandler) { | |||
handler = TestAllFiles.HANDLERS.get(".pptx"); | |||
handleFile(inputFile); | |||
// and the other way around, use HWPF instead of XWPF and so forth | |||
} else if(handler instanceof XWPFFileHandler) { | |||
handler = TestAllFiles.HANDLERS.get(".doc"); | |||
handleFile(inputFile); | |||
} else if(handler instanceof XSSFFileHandler) { | |||
handler = TestAllFiles.HANDLERS.get(".xls"); | |||
handleFile(inputFile); | |||
} else if(handler instanceof XSLFFileHandler) { | |||
handler = TestAllFiles.HANDLERS.get(".ppt"); | |||
handleFile(inputFile); | |||
} else { | |||
throw e; | |||
} | |||
} | |||
private void handleFile(File inputFile) throws Exception { | |||
try (InputStream newStream = new BufferedInputStream(new FileInputStream(inputFile), 64*1024)) { | |||
handler.handleFile(newStream, inputFile.getAbsolutePath()); | |||
} | |||
} | |||
} |
@@ -91,13 +91,13 @@ import org.junit.runners.Parameterized.Parameters; | |||
public class TestAllFiles { | |||
private static final File ROOT_DIR = new File("test-data"); | |||
static final String[] SCAN_EXCLUDES = new String[] { "**/.svn/**", "lost+found", "**/.git/**" }; | |||
public static final String[] SCAN_EXCLUDES = new String[] { "**/.svn/**", "lost+found", "**/.git/**" }; | |||
private static final Map<String,String> FILE_PASSWORD; | |||
// map file extensions to the actual mappers | |||
static final Map<String, FileHandler> HANDLERS = new HashMap<>(); | |||
public static final Map<String, FileHandler> HANDLERS = new HashMap<>(); | |||
static { | |||
// Excel | |||
HANDLERS.put(".xls", new HSSFFileHandler()); | |||
@@ -443,7 +443,7 @@ public class TestAllFiles { | |||
handler.handleAdditional(inputFile); | |||
} | |||
static String getExtension(String file) { | |||
public static String getExtension(String file) { | |||
int pos = file.lastIndexOf('.'); | |||
if(pos == -1 || pos == file.length()-1) { | |||
return file; | |||
@@ -452,7 +452,7 @@ public class TestAllFiles { | |||
return file.substring(pos).toLowerCase(Locale.ROOT); | |||
} | |||
private static class NullFileHandler implements FileHandler { | |||
public static class NullFileHandler implements FileHandler { | |||
@Override | |||
public void handleFile(InputStream stream, String path) throws Exception { | |||
} |
@@ -0,0 +1,120 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.stress; | |||
import java.util.HashMap; | |||
import java.util.Map; | |||
import java.util.regex.Pattern; | |||
public class FileHandlerFactory { | |||
// map from patterns for mimetypes to the FileHandlers that should be able to | |||
// work with that file | |||
// use a Set<Pair> to have a defined order of applying the matches | |||
private static final Map<Pattern, FileHandler> MIME_TYPES = new HashMap<>(); | |||
static { | |||
////////////////// Word | |||
MIME_TYPES.put(Pattern.compile("application/vnd.ms-word.document.macroenabled.12"), new XWPFFileHandler()); | |||
MIME_TYPES.put(Pattern.compile("application/vnd.ms-word.template.macroenabled.12"), new XWPFFileHandler()); | |||
// application/msword | |||
MIME_TYPES.put(Pattern.compile(".*msword.*"), new HWPFFileHandler()); | |||
// application/vnd.ms-word | |||
MIME_TYPES.put(Pattern.compile(".*ms-word.*"), new HWPFFileHandler()); | |||
// application/vnd.openxmlformats-officedocument.wordprocessingml.document | |||
MIME_TYPES.put(Pattern.compile(".*wordprocessingml.*"), new XWPFFileHandler()); | |||
////////////////// Excel | |||
MIME_TYPES.put(Pattern.compile("application/vnd.ms-excel.addin.macroEnabled.12"), new XSSFFileHandler()); | |||
MIME_TYPES.put(Pattern.compile("application/vnd.ms-excel.sheet.binary.macroEnabled.12"), new XSSFFileHandler()); | |||
// application/msexcel | |||
MIME_TYPES.put(Pattern.compile(".*msexcel.*"), new HSSFFileHandler()); | |||
// application/vnd.ms-excel | |||
MIME_TYPES.put(Pattern.compile(".*ms-excel.*"), new HSSFFileHandler()); | |||
// application/vnd.openxmlformats-officedocument.spreadsheetml.sheet | |||
MIME_TYPES.put(Pattern.compile(".*spreadsheetml.*"), new XSSFFileHandler()); | |||
////////////////// Powerpoint | |||
// application/vnd.ms-powerpoint | |||
MIME_TYPES.put(Pattern.compile("application/vnd.ms-powerpoint"), new HSLFFileHandler()); | |||
// application/vnd.ms-officetheme | |||
MIME_TYPES.put(Pattern.compile("application/vnd.ms-officetheme"), new HSLFFileHandler()); | |||
// application/vnd.openxmlformats-officedocument.presentationml.presentation | |||
MIME_TYPES.put(Pattern.compile(".*presentationml.*"), new XSLFFileHandler()); | |||
// application/vnd.ms-powerpoint.presentation.macroenabled.12 | |||
MIME_TYPES.put(Pattern.compile("application/vnd.ms-powerpoint.presentation.macroenabled.12"), new XSLFFileHandler()); | |||
// application/vnd.ms-powerpoint.slideshow.macroenabled.12 | |||
MIME_TYPES.put(Pattern.compile("application/vnd.ms-powerpoint.slideshow.macroenabled.12"), new XSLFFileHandler()); | |||
////////////////// Mail/TNEF | |||
// application/vnd.ms-tnef | |||
MIME_TYPES.put(Pattern.compile(".*ms-tnef.*"), new HMEFFileHandler()); | |||
// application/vnd.ms-outlook | |||
MIME_TYPES.put(Pattern.compile("application/vnd.ms-outlook"), new HSMFFileHandler()); | |||
////////////////// Visio | |||
// application/vnd.visio | |||
MIME_TYPES.put(Pattern.compile("application/vnd.visio.*"), new HDGFFileHandler()); | |||
// application/vnd.ms-visio.drawing | |||
MIME_TYPES.put(Pattern.compile(".*vnd.ms-visio\\."), new XDGFFileHandler()); | |||
//application/vnd.ms-visio.viewer | |||
MIME_TYPES.put(Pattern.compile(".*visio.*"), new HDGFFileHandler()); | |||
////////////////// Publisher | |||
// application/x-mspublisher | |||
MIME_TYPES.put(Pattern.compile("application/x-mspublisher"), new HPBFFileHandler()); | |||
////////////////// Others | |||
// special type used by Tika | |||
MIME_TYPES.put(Pattern.compile("application/x-tika-ooxml.*"), new OPCFileHandler()); | |||
// special type used by Tika | |||
MIME_TYPES.put(Pattern.compile("application/x-tika-msoffice.*"), new POIFSFileHandler()); | |||
// application/x-tika-old-excel | |||
MIME_TYPES.put(Pattern.compile("application/x-tika-old-excel"), new POIFSFileHandler()); | |||
// application/vnd.openxmlformats-officedocument.drawingml.chart+xml | |||
// ?!MIME_TYPES.put(Pattern.compile(".*drawingml.*"), ".dwg"); | |||
// application/vnd.openxmlformats-officedocument.vmlDrawing | |||
// ?!MIME_TYPES.put(Pattern.compile(".*vmlDrawing.*"), ".dwg"); | |||
} | |||
public static FileHandler getHandler(String mimeType) { | |||
for(Map.Entry<Pattern,FileHandler> entry : MIME_TYPES.entrySet()) { | |||
if(entry.getKey().matcher(mimeType).matches()) { | |||
return entry.getValue(); | |||
} | |||
} | |||
return null; | |||
} | |||
} |