@RunWith(Parameterized.class)
public class TestAllFiles {
private static final File ROOT_DIR = new File("test-data");
+ private static final boolean IGNORE_SCRATCHPAD = Boolean.getBoolean("scratchpad.ignore");
public static final String[] SCAN_EXCLUDES = new String[] { "**/.svn/**", "lost+found", "**/.git/**" };
// map file extensions to the actual mappers
public static final Map<String, FileHandler> HANDLERS = new HashMap<>();
+
static {
// Excel
HANDLERS.put(".xls", new HSSFFileHandler());
HANDLERS.put(".xlsb", new XSSFBFileHandler());
// Word
- HANDLERS.put(".doc", new HWPFFileHandler());
+ HANDLERS.put(".doc", IGNORE_SCRATCHPAD ? new HPSFFileHandler() : new HWPFFileHandler());
HANDLERS.put(".docx", new XWPFFileHandler());
HANDLERS.put(".dotx", new XWPFFileHandler());
HANDLERS.put(".docm", new XWPFFileHandler());
// OpenXML4J files
- HANDLERS.put(".ooxml", new OPCFileHandler()); // OPCPackage
- HANDLERS.put(".zip", new OPCFileHandler()); // OPCPackage
+ HANDLERS.put(".ooxml", new OPCFileHandler());
+ HANDLERS.put(".zip", new OPCFileHandler());
// Powerpoint
- HANDLERS.put(".ppt", new HSLFFileHandler());
+ HANDLERS.put(".ppt", IGNORE_SCRATCHPAD ? new HPSFFileHandler() : new HSLFFileHandler());
HANDLERS.put(".pptx", new XSLFFileHandler());
HANDLERS.put(".pptm", new XSLFFileHandler());
HANDLERS.put(".ppsm", new XSLFFileHandler());
HANDLERS.put(".potx", new XSLFFileHandler());
// Outlook
- HANDLERS.put(".msg", new HSMFFileHandler());
+ HANDLERS.put(".msg", IGNORE_SCRATCHPAD ? new HPSFFileHandler() : new HSMFFileHandler());
// Publisher
- HANDLERS.put(".pub", new HPBFFileHandler());
+ HANDLERS.put(".pub", IGNORE_SCRATCHPAD ? new HPSFFileHandler() : new HPBFFileHandler());
// Visio - binary
- HANDLERS.put(".vsd", new HDGFFileHandler());
+ HANDLERS.put(".vsd", IGNORE_SCRATCHPAD ? new HPSFFileHandler() : new HDGFFileHandler());
// Visio - ooxml
HANDLERS.put(".vsdm", new XDGFFileHandler());
HANDLERS.put(".adm", new HPSFFileHandler());
// Microsoft TNEF
- HANDLERS.put(".dat", new HMEFFileHandler());
+ HANDLERS.put(".dat", IGNORE_SCRATCHPAD ? new HPSFFileHandler() : new HMEFFileHandler());
// TODO: are these readable by some of the formats?
HANDLERS.put(".wri", new NullFileHandler());
"spreadsheet/54764-2.xlsx", // see TestXSSFBugs.bug54764()
"spreadsheet/54764.xlsx", // see TestXSSFBugs.bug54764()
"poifs/unknown_properties.msg", // POIFS properties corrupted
- "poifs/only-zero-byte-streams.ole2", // No actual contents
+ (IGNORE_SCRATCHPAD ? "" : "poifs/only-zero-byte-streams.ole2"), // No actual contents
"spreadsheet/poc-xmlbomb.xlsx", // contains xml-entity-expansion
"spreadsheet/poc-xmlbomb-empty.xlsx", // contains xml-entity-expansion
"spreadsheet/poc-shared-strings.xlsx", // contains shared-string-entity-expansion
}
}
- // let some file handlers do additional stuff
- handler.handleAdditional(inputFile);
+ try {
+ // let some file handlers do additional stuff
+ handler.handleAdditional(inputFile);
+ } catch (AssumptionViolatedException e) {
+ // file handler ignored this file
+ } catch (Exception e) {
+ if(!EXPECTED_FAILURES.contains(file) && !AbstractFileHandler.EXPECTED_EXTRACTOR_FAILURES.contains(file)) {
+ System.out.println("Failed: " + file);
+ throw new Exception("While handling " + file, e);
+ }
+ }
}
public static String getExtension(String file) {
long modified = file.lastModified();
POITextExtractor extractor = null;
- try {
+ try {
extractor = ExtractorFactory.createExtractor(file);
assertNotNull("Should get a POITextExtractor but had none for file " + file, extractor);
assertNotNull("Should get some text but had none for file " + file, extractor.getText());
-
+
// also try metadata
@SuppressWarnings("resource")
POITextExtractor metadataExtractor = extractor.getMetadataTextExtractor();
assertNotNull(metadataExtractor.getText());
- assertFalse("Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!",
+ assertFalse("Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!",
EXPECTED_EXTRACTOR_FAILURES.contains(file.getParentFile().getName() + "/" + file.getName()));
-
+
assertEquals("File should not be modified by extractor", length, file.length());
assertEquals("File should not be modified by extractor", modified, file.lastModified());
-
+
handleExtractingAsStream(file);
-
- if(extractor instanceof POIOLE2TextExtractor) {
+
+ if (extractor instanceof POIOLE2TextExtractor) {
try (HPSFPropertiesExtractor hpsfExtractor = new HPSFPropertiesExtractor((POIOLE2TextExtractor) extractor)) {
assertNotNull(hpsfExtractor.getDocumentSummaryInformationText());
assertNotNull(hpsfExtractor.getSummaryInformationText());
String msg = "org.apache.poi.EncryptedDocumentException: Export Restrictions in place - please install JCE Unlimited Strength Jurisdiction Policy files";
assumeFalse(msg.equals(e.getMessage()));
throw e;
+ } catch (IllegalStateException e) {
+ if (!e.getMessage().contains("POI Scratchpad jar missing") || !Boolean.getBoolean("scratchpad.ignore")) {
+ throw e;
+ }
} finally {
IOUtils.closeQuietly(extractor);
}