File inputFile = new File(rootDir, file);
try {
- handleFile(inputFile);
+ try {
+ handleFile(inputFile);
+ } catch (IllegalArgumentException e) {
+ handleWrongFileExtension(inputFile, e);
+ return;
+ }
} catch (OfficeXmlFileException e) {
// check if the file-extension is wrong
if(!e.getMessage().contains("data appears to be in the Office 2007")) {
- throw e;
+ // use XWPF instead of HWPF and XSSF instead of HSSF as the file seems to have the wrong extension
+ handleWrongOLE2XMLExtension(inputFile, e);
+ return;
}
- // use XWPF instead of HWPF and XSSF instead of HSSF as the file seems to have the wrong extension
- handleWrongExtension(inputFile, e);
+ throw e;
} catch (OldFileFormatException e) {
if (e.getClass().equals(OldFileFormatException.class)) {
// Not even text extraction is supported for these: handler.handleExtracting(inputFile);
- //noinspection ConstantConditions
Assume.assumeFalse("File " + file + " excluded because it is unsupported old Excel format", true);
}
// otherwise try at least to perform extracting tests on these old files
} catch (EncryptedDocumentException e) {
// Do not try to read encrypted files
- //noinspection ConstantConditions
Assume.assumeFalse("File " + file + " excluded because it is password-encrypted", true);
} catch (ZipException e) {
// some files are corrupted
- if (e.getMessage().equals("unexpected EOF")) {
- //noinspection ConstantConditions
+ if (e.getMessage().equals("unexpected EOF") || e.getMessage().equals("Truncated ZIP file")) {
Assume.assumeFalse("File " + file + " excluded because the Zip file is incomplete", true);
}
} catch (IOException e) {
// sometimes binary format has XML-format-extension...
if(e.getMessage().contains("rong file format or file extension for OO XML file")) {
- handleWrongExtension(inputFile, e);
- } else {
- throw e;
+ handleWrongOLE2XMLExtension(inputFile, e);
+ return;
}
+
+ throw e;
} catch (IllegalArgumentException e) {
// ignore errors for documents with incorrect extension
String message = e.getMessage();
if(message != null && (message.equals("The document is really a RTF file") ||
message.equals("The document is really a PDF file") ||
message.equals("The document is really a HTML file"))) {
- //noinspection ConstantConditions
- Assume.assumeFalse("File " + file + " excluded because it is actually a PDF/RTF file", true);
+ Assume.assumeFalse("File " + file + " excluded because it is actually a PDF/RTF/HTML file", true);
}
- if(e.getMessage().equals("The document is really a OOXML file")) {
- handleWrongExtension(inputFile, e);
- } else {
- throw e;
+ if(message != null && message.equals("The document is really a OOXML file")) {
+ handleWrongOLE2XMLExtension(inputFile, e);
+ return;
}
+
+ throw e;
}
try {
handler.handleExtracting(inputFile);
} catch (EncryptedDocumentException e) {
// Do not try to read encrypted files
- //noinspection ConstantConditions
Assume.assumeFalse("File " + file + " excluded because it is password-encrypted", true);
}
}
- void handleWrongExtension(File inputFile, Exception e) throws Exception {
+ private void handleWrongFileExtension(File inputFile, IllegalArgumentException e) throws Exception {
+ // we sometimes have wrong extensions, so for some exceptions we try to handle it
+ // with the correct FileHandler instead
+ String message = e.getMessage();
+ if(message != null && (message.equals("The document is really a XLS file"))) {
+ handler = TestAllFiles.HANDLERS.get(".xls");
+ handleFile(inputFile);
+ } else if(message != null && (message.equals("The document is really a PPT file"))) {
+ handler = TestAllFiles.HANDLERS.get(".ppt");
+ handleFile(inputFile);
+ } else if(message != null && (message.equals("The document is really a DOC file"))) {
+ handler = TestAllFiles.HANDLERS.get(".doc");
+ handleFile(inputFile);
+ } else if(message != null && (message.equals("The document is really a VSD file"))) {
+ handler = TestAllFiles.HANDLERS.get(".vsd");
+ handleFile(inputFile);
+ }
+
+ throw e;
+ }
+
+ void handleWrongOLE2XMLExtension(File inputFile, Exception e) throws Exception {
// use XWPF instead of HWPF and XSSF instead of HSSF as the file seems to have the wrong extension
if (handler instanceof HWPFFileHandler) {
handler = TestAllFiles.HANDLERS.get(".docx");
}
// check for an encrypted .xlsx file - they get OLE2 wrapped
- try {
- directory.getEntry(Decryptor.DEFAULT_POIFS_ENTRY);
+ if(directory.hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) {
throw new EncryptedDocumentException("The supplied spreadsheet seems to be an Encrypted .xlsx file. " +
"It must be decrypted before use by XSSF, it cannot be used by HSSF");
- } catch (FileNotFoundException e) {
- // fall through
}
// check for previous version of file format
- try {
- directory.getEntry(OLD_WORKBOOK_DIR_ENTRY_NAME);
+ if(directory.hasEntry(OLD_WORKBOOK_DIR_ENTRY_NAME)) {
throw new OldExcelFormatException("The supplied spreadsheet seems to be Excel 5.0/7.0 (BIFF5) format. "
+ "POI only supports BIFF8 format (from Excel versions 97/2000/XP/2003)");
- } catch (FileNotFoundException e) {
- // fall through
+ }
+
+ // throw more useful exceptions for known wrong file-extensions
+ if(directory.hasEntry("WordDocument")) {
+ throw new IllegalArgumentException("The document is really a DOC file");
}
throw new IllegalArgumentException("The supplied POIFSFileSystem does not contain a BIFF8 'Workbook' entry. "