git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1890125 13f79535-47bb-0310-9956-ffa450edef68tags/REL_5_1_0
public static final Set<String> EXPECTED_EXTRACTOR_FAILURES = new HashSet<>(); | public static final Set<String> EXPECTED_EXTRACTOR_FAILURES = new HashSet<>(); | ||||
static { | static { | ||||
// password protected files without password | // password protected files without password | ||||
// ... currently none ... | |||||
// ... currently none ... | |||||
// unsupported file-types, no supported OLE2 parts | // unsupported file-types, no supported OLE2 parts | ||||
EXPECTED_EXTRACTOR_FAILURES.add("hmef/quick-winmail.dat"); | EXPECTED_EXTRACTOR_FAILURES.add("hmef/quick-winmail.dat"); | ||||
/* Did fail for some documents with special XML contents... | /* Did fail for some documents with special XML contents... | ||||
try { | try { | ||||
OOXMLPrettyPrint.main(new String[] { file.getAbsolutePath(), | OOXMLPrettyPrint.main(new String[] { file.getAbsolutePath(), | ||||
"/tmp/pretty-" + file.getName() }); | |||||
"/tmp/pretty-" + file.getName() }); | |||||
} catch (ZipException e) { | } catch (ZipException e) { | ||||
// ignore, not a Zip/OOXML file | |||||
// ignore, not a Zip/OOXML file | |||||
}*/ | }*/ | ||||
} | } | ||||
* | * | ||||
*/ | */ | ||||
public class BaseIntegrationTest { | public class BaseIntegrationTest { | ||||
private final File rootDir; | |||||
private final String file; | |||||
private FileHandler handler; | |||||
public BaseIntegrationTest(File rootDir, String file, FileHandler handler) { | |||||
this.rootDir = rootDir; | |||||
this.file = file; | |||||
this.handler = handler; | |||||
} | |||||
/** | |||||
* Keep this public so it can be used by the regression-tests | |||||
*/ | |||||
public void test() throws Exception { | |||||
private final File rootDir; | |||||
private final String file; | |||||
private FileHandler handler; | |||||
public BaseIntegrationTest(File rootDir, String file, FileHandler handler) { | |||||
this.rootDir = rootDir; | |||||
this.file = file; | |||||
this.handler = handler; | |||||
} | |||||
/** | |||||
* Keep this public so it can be used by the regression-tests | |||||
*/ | |||||
public void test() throws Exception { | |||||
assertNotNull( handler, "Unknown file extension for file: " + file ); | assertNotNull( handler, "Unknown file extension for file: " + file ); | ||||
testOneFile(new File(rootDir, file)); | |||||
} | |||||
protected void testOneFile(File inputFile) throws Exception { | |||||
try { | |||||
handleFile(inputFile); | |||||
} catch (OfficeXmlFileException e) { | |||||
// switch XWPF and HWPF and so forth depending on the error message | |||||
handleWrongOLE2XMLExtension(inputFile, e); | |||||
} catch (OldFileFormatException e) { | |||||
// Not even text extraction is supported for these: handler.handleExtracting(inputFile); | |||||
assumeFalse( true, "File " + file + " excluded because it is an unsupported old format" ); | |||||
} catch (EncryptedDocumentException e) { | |||||
// Do not try to read encrypted files | |||||
assumeFalse( true, "File " + file + " excluded because it is password-encrypted" ); | |||||
} catch (ZipException e) { | |||||
// some files are corrupted | |||||
if (e.getMessage().equals("unexpected EOF") || e.getMessage().equals("Truncated ZIP file")) { | |||||
assumeFalse( true, "File " + file + " excluded because the Zip file is incomplete" ); | |||||
} | |||||
throw e; | |||||
} catch (IOException e) { | |||||
// ignore some other ways of corrupted files | |||||
String message = e.getMessage(); | |||||
if(message != null && message.contains("Truncated ZIP file")) { | |||||
assumeFalse( true, "File " + file + " excluded because the Zip file is incomplete" ); | |||||
} | |||||
// sometimes binary format has XML-format-extension... | |||||
if(message != null && message.contains("rong file format or file extension for OO XML file")) { | |||||
handleWrongOLE2XMLExtension(inputFile, e); | |||||
return; | |||||
} | |||||
throw e; | |||||
} catch (IllegalArgumentException e) { | |||||
// ignore errors for documents with incorrect extension | |||||
String message = e.getMessage(); | |||||
if(message != null && (message.equals("The document is really a RTF file") || | |||||
message.equals("The document is really a PDF file") || | |||||
message.equals("The document is really a HTML file"))) { | |||||
assumeFalse( true, "File " + file + " excluded because it is actually a PDF/RTF/HTML file" ); | |||||
} | |||||
if(message != null && message.equals("The document is really a OOXML file")) { | |||||
handleWrongOLE2XMLExtension(inputFile, e); | |||||
return; | |||||
} | |||||
throw e; | |||||
} | |||||
try { | |||||
handler.handleExtracting(inputFile); | |||||
} catch (EncryptedDocumentException e) { | |||||
// Do not try to read encrypted files | |||||
assumeFalse( true, "File " + file + " excluded because it is password-encrypted" ); | |||||
} | |||||
} | |||||
testOneFile(new File(rootDir, file)); | |||||
} | |||||
protected void testOneFile(File inputFile) throws Exception { | |||||
try { | |||||
handleFile(inputFile); | |||||
} catch (OfficeXmlFileException e) { | |||||
// switch XWPF and HWPF and so forth depending on the error message | |||||
handleWrongOLE2XMLExtension(inputFile, e); | |||||
} catch (OldFileFormatException e) { | |||||
// Not even text extraction is supported for these: handler.handleExtracting(inputFile); | |||||
assumeFalse( true, "File " + file + " excluded because it is an unsupported old format" ); | |||||
} catch (EncryptedDocumentException e) { | |||||
// Do not try to read encrypted files | |||||
assumeFalse( true, "File " + file + " excluded because it is password-encrypted" ); | |||||
} catch (ZipException e) { | |||||
// some files are corrupted | |||||
if (e.getMessage().equals("unexpected EOF") || e.getMessage().equals("Truncated ZIP file")) { | |||||
assumeFalse( true, "File " + file + " excluded because the Zip file is incomplete" ); | |||||
} | |||||
throw e; | |||||
} catch (IOException e) { | |||||
// ignore some other ways of corrupted files | |||||
String message = e.getMessage(); | |||||
if(message != null && message.contains("Truncated ZIP file")) { | |||||
assumeFalse( true, "File " + file + " excluded because the Zip file is incomplete" ); | |||||
} | |||||
// sometimes binary format has XML-format-extension... | |||||
if(message != null && message.contains("rong file format or file extension for OO XML file")) { | |||||
handleWrongOLE2XMLExtension(inputFile, e); | |||||
return; | |||||
} | |||||
throw e; | |||||
} catch (IllegalArgumentException e) { | |||||
// ignore errors for documents with incorrect extension | |||||
String message = e.getMessage(); | |||||
if(message != null && (message.equals("The document is really a RTF file") || | |||||
message.equals("The document is really a PDF file") || | |||||
message.equals("The document is really a HTML file"))) { | |||||
assumeFalse( true, "File " + file + " excluded because it is actually a PDF/RTF/HTML file" ); | |||||
} | |||||
if(message != null && message.equals("The document is really a OOXML file")) { | |||||
handleWrongOLE2XMLExtension(inputFile, e); | |||||
return; | |||||
} | |||||
throw e; | |||||
} | |||||
try { | |||||
handler.handleExtracting(inputFile); | |||||
} catch (EncryptedDocumentException e) { | |||||
// Do not try to read encrypted files | |||||
assumeFalse( true, "File " + file + " excluded because it is password-encrypted" ); | |||||
} | |||||
} | |||||
void handleWrongOLE2XMLExtension(File inputFile, Exception e) throws Exception { | void handleWrongOLE2XMLExtension(File inputFile, Exception e) throws Exception { | ||||
// we sometimes have wrong extensions, so for some exceptions we try to handle it | |||||
// with the correct FileHandler instead | |||||
String message = e.getMessage(); | |||||
// ignore some file-types that we do not want to handle here | |||||
assumeFalse( message != null && (message.equals("The document is really a RTF file") || | |||||
message.equals("The document is really a PDF file") || | |||||
message.equals("The document is really a HTML file")), "File " + file + " excluded because it is actually a PDF/RTF/HTML file" ); | |||||
if(message != null && (message.equals("The document is really a XLS file"))) { | |||||
handler = new HSSFFileHandler(); | |||||
} else if(message != null && (message.equals("The document is really a PPT file"))) { | |||||
handler = new HSLFFileHandler(); | |||||
} else if(message != null && (message.equals("The document is really a DOC file"))) { | |||||
handler = new HWPFFileHandler(); | |||||
} else if(message != null && (message.equals("The document is really a VSD file"))) { | |||||
handler = new HDGFFileHandler(); | |||||
// use XWPF instead of HWPF and XSSF instead of HSSF as the file seems to have the wrong extension | |||||
} else if (handler instanceof HWPFFileHandler) { | |||||
// we sometimes have wrong extensions, so for some exceptions we try to handle it | |||||
// with the correct FileHandler instead | |||||
String message = e.getMessage(); | |||||
// ignore some file-types that we do not want to handle here | |||||
assumeFalse( message != null && (message.equals("The document is really a RTF file") || | |||||
message.equals("The document is really a PDF file") || | |||||
message.equals("The document is really a HTML file")), "File " + file + " excluded because it is actually a PDF/RTF/HTML file" ); | |||||
if(message != null && (message.equals("The document is really a XLS file"))) { | |||||
handler = new HSSFFileHandler(); | |||||
} else if(message != null && (message.equals("The document is really a PPT file"))) { | |||||
handler = new HSLFFileHandler(); | |||||
} else if(message != null && (message.equals("The document is really a DOC file"))) { | |||||
handler = new HWPFFileHandler(); | |||||
} else if(message != null && (message.equals("The document is really a VSD file"))) { | |||||
handler = new HDGFFileHandler(); | |||||
// use XWPF instead of HWPF and XSSF instead of HSSF as the file seems to have the wrong extension | |||||
} else if (handler instanceof HWPFFileHandler) { | |||||
handler = new XWPFFileHandler(); | handler = new XWPFFileHandler(); | ||||
} else if (handler instanceof HSSFFileHandler) { | } else if (handler instanceof HSSFFileHandler) { | ||||
handler = new XSSFFileHandler(); | handler = new XSSFFileHandler(); | ||||
} else if (handler instanceof HSLFFileHandler) { | } else if (handler instanceof HSLFFileHandler) { | ||||
handler = new XSLFFileHandler(); | |||||
// and the other way around, use HWPF instead of XWPF and so forth | |||||
} else if(handler instanceof XWPFFileHandler) { | |||||
handler = new HWPFFileHandler(); | |||||
} else if(handler instanceof XSSFFileHandler) { | |||||
handler = new HSSFFileHandler(); | |||||
} else if(handler instanceof XSLFFileHandler) { | |||||
handler = new HSLFFileHandler(); | |||||
handler = new XSLFFileHandler(); | |||||
// and the other way around, use HWPF instead of XWPF and so forth | |||||
} else if(handler instanceof XWPFFileHandler) { | |||||
handler = new HWPFFileHandler(); | |||||
} else if(handler instanceof XSSFFileHandler) { | |||||
handler = new HSSFFileHandler(); | |||||
} else if(handler instanceof XSLFFileHandler) { | |||||
handler = new HSLFFileHandler(); | |||||
} else { | } else { | ||||
// nothing matched => throw the exception to the outside | |||||
throw e; | |||||
} | |||||
// we found a different handler to try processing again | |||||
handleFile(inputFile); | |||||
} | |||||
private void handleFile(File inputFile) throws Exception { | |||||
try (InputStream newStream = new BufferedInputStream(new FileInputStream(inputFile), 64*1024)) { | |||||
handler.handleFile(newStream, inputFile.getAbsolutePath()); | |||||
} | |||||
} | |||||
// nothing matched => throw the exception to the outside | |||||
throw e; | |||||
} | |||||
// we found a different handler to try processing again | |||||
handleFile(inputFile); | |||||
} | |||||
private void handleFile(File inputFile) throws Exception { | |||||
try (InputStream newStream = new BufferedInputStream(new FileInputStream(inputFile), 64*1024)) { | |||||
handler.handleFile(newStream, inputFile.getAbsolutePath()); | |||||
} | |||||
} | |||||
} | } |
* used in the stress testing. | * used in the stress testing. | ||||
*/ | */ | ||||
public interface FileHandler { | public interface FileHandler { | ||||
/** | |||||
* The FileHandler receives a stream ready for reading the | |||||
* file and should handle the content that is provided and | |||||
* try to read and interpret the data. | |||||
* | |||||
* Closing is handled by the framework outside this call. | |||||
* | |||||
* @param stream The input stream to read the file from. | |||||
* @param path the relative path to the file | |||||
* @throws Exception If an error happens in the file-specific handler | |||||
*/ | |||||
void handleFile(InputStream stream, String path) throws Exception; | |||||
/** | |||||
* The FileHandler receives a stream ready for reading the | |||||
* file and should handle the content that is provided and | |||||
* try to read and interpret the data. | |||||
* | |||||
* Closing is handled by the framework outside this call. | |||||
* | |||||
* @param stream The input stream to read the file from. | |||||
* @param path the relative path to the file | |||||
* @throws Exception If an error happens in the file-specific handler | |||||
*/ | |||||
void handleFile(InputStream stream, String path) throws Exception; | |||||
/** | |||||
* Ensures that extracting text from the given file | |||||
* is returning some text. | |||||
*/ | |||||
void handleExtracting(File file) throws Exception; | |||||
/** | |||||
* Ensures that extracting text from the given file | |||||
* is returning some text. | |||||
*/ | |||||
void handleExtracting(File file) throws Exception; | |||||
/** | |||||
* Allows to perform some additional work, e.g. run | |||||
* some of the example applications | |||||
*/ | |||||
void handleAdditional(File file) throws Exception; | |||||
/** | |||||
* Allows to perform some additional work, e.g. run | |||||
* some of the example applications | |||||
*/ | |||||
void handleAdditional(File file) throws Exception; | |||||
} | } |
import org.junit.jupiter.api.Test; | import org.junit.jupiter.api.Test; | ||||
class HDGFFileHandler extends POIFSFileHandler { | class HDGFFileHandler extends POIFSFileHandler { | ||||
@Override | |||||
public void handleFile(InputStream stream, String path) throws IOException { | |||||
POIFSFileSystem poifs = new POIFSFileSystem(stream); | |||||
HDGFDiagram diagram = new HDGFDiagram(poifs); | |||||
Stream[] topLevelStreams = diagram.getTopLevelStreams(); | |||||
assertNotNull(topLevelStreams); | |||||
for(Stream str : topLevelStreams) { | |||||
assertTrue(str.getPointer().getLength() >= 0); | |||||
} | |||||
@Override | |||||
public void handleFile(InputStream stream, String path) throws IOException { | |||||
POIFSFileSystem poifs = new POIFSFileSystem(stream); | |||||
HDGFDiagram diagram = new HDGFDiagram(poifs); | |||||
Stream[] topLevelStreams = diagram.getTopLevelStreams(); | |||||
assertNotNull(topLevelStreams); | |||||
for(Stream str : topLevelStreams) { | |||||
assertTrue(str.getPointer().getLength() >= 0); | |||||
} | |||||
TrailerStream trailerStream = diagram.getTrailerStream(); | |||||
assertNotNull(trailerStream); | |||||
assertTrue(trailerStream.getPointer().getLength() >= 0); | |||||
diagram.close(); | |||||
poifs.close(); | |||||
TrailerStream trailerStream = diagram.getTrailerStream(); | |||||
assertNotNull(trailerStream); | |||||
assertTrue(trailerStream.getPointer().getLength() >= 0); | |||||
diagram.close(); | |||||
poifs.close(); | |||||
// writing is not yet implemented... handlePOIDocument(diagram); | |||||
} | |||||
// writing is not yet implemented... handlePOIDocument(diagram); | |||||
} | |||||
// a test-case to test this locally without executing the full TestAllFiles | |||||
@Override | |||||
// a test-case to test this locally without executing the full TestAllFiles | |||||
@Override | |||||
@Test | @Test | ||||
void test() throws Exception { | |||||
void test() throws Exception { | |||||
File file = new File("test-data/diagram/44501.vsd"); | File file = new File("test-data/diagram/44501.vsd"); | ||||
InputStream stream = new FileInputStream(file); | InputStream stream = new FileInputStream(file); | ||||
try { | |||||
handleFile(stream, file.getPath()); | |||||
} finally { | |||||
stream.close(); | |||||
} | |||||
try { | |||||
handleFile(stream, file.getPath()); | |||||
} finally { | |||||
stream.close(); | |||||
} | |||||
handleExtracting(file); | |||||
handleExtracting(file); | |||||
stream = new FileInputStream(file); | |||||
try { | |||||
try (VisioTextExtractor extractor = new VisioTextExtractor(stream)) { | |||||
assertNotNull(extractor.getText()); | |||||
} | |||||
} finally { | |||||
stream.close(); | |||||
} | |||||
} | |||||
stream = new FileInputStream(file); | |||||
try { | |||||
try (VisioTextExtractor extractor = new VisioTextExtractor(stream)) { | |||||
assertNotNull(extractor.getText()); | |||||
} | |||||
} finally { | |||||
stream.close(); | |||||
} | |||||
} | |||||
} | } |
class HMEFFileHandler extends AbstractFileHandler { | class HMEFFileHandler extends AbstractFileHandler { | ||||
@Override | |||||
public void handleExtracting(File file) throws Exception { | |||||
FileMagic fm = FileMagic.valueOf(file); | |||||
if (fm == FileMagic.OLE2) { | |||||
super.handleExtracting(file); | |||||
} | |||||
} | |||||
@Override | |||||
public void handleExtracting(File file) throws Exception { | |||||
FileMagic fm = FileMagic.valueOf(file); | |||||
if (fm == FileMagic.OLE2) { | |||||
super.handleExtracting(file); | |||||
} | |||||
} | |||||
@Override | |||||
@Override | |||||
public void handleFile(InputStream stream, String path) throws Exception { | public void handleFile(InputStream stream, String path) throws Exception { | ||||
HMEFMessage msg = new HMEFMessage(stream); | |||||
HMEFMessage msg = new HMEFMessage(stream); | |||||
// there are test-files that have no body... | |||||
String[] HTML_BODY = { | |||||
"Testing TNEF Message", "TNEF test message with attachments", "Test" | |||||
}; | |||||
String bodyStr; | |||||
if(Arrays.asList(HTML_BODY).contains(msg.getSubject())) { | |||||
MAPIAttribute bodyHtml = msg.getMessageMAPIAttribute(MAPIProperty.BODY_HTML); | |||||
assertNotNull(bodyHtml); | |||||
bodyStr = new String(bodyHtml.getData(), getEncoding(msg)); | |||||
} else { | |||||
bodyStr = msg.getBody(); | |||||
} | |||||
assertNotNull( bodyStr, "Body is not set" ); | |||||
assertNotNull( msg.getSubject(), "Subject is not set" ); | |||||
} | |||||
// there are test-files that have no body... | |||||
String[] HTML_BODY = { | |||||
"Testing TNEF Message", "TNEF test message with attachments", "Test" | |||||
}; | |||||
String bodyStr; | |||||
if(Arrays.asList(HTML_BODY).contains(msg.getSubject())) { | |||||
MAPIAttribute bodyHtml = msg.getMessageMAPIAttribute(MAPIProperty.BODY_HTML); | |||||
assertNotNull(bodyHtml); | |||||
bodyStr = new String(bodyHtml.getData(), getEncoding(msg)); | |||||
} else { | |||||
bodyStr = msg.getBody(); | |||||
} | |||||
assertNotNull( bodyStr, "Body is not set" ); | |||||
assertNotNull( msg.getSubject(), "Subject is not set" ); | |||||
} | |||||
// a test-case to test this locally without executing the full TestAllFiles | |||||
@Test | |||||
void test() throws Exception { | |||||
String path = "test-data/hmef/quick-winmail.dat"; | |||||
try (InputStream stream = new FileInputStream(path)) { | |||||
handleFile(stream, path); | |||||
} | |||||
} | |||||
// a test-case to test this locally without executing the full TestAllFiles | |||||
@Test | |||||
void test() throws Exception { | |||||
String path = "test-data/hmef/quick-winmail.dat"; | |||||
try (InputStream stream = new FileInputStream(path)) { | |||||
handleFile(stream, path); | |||||
} | |||||
} | |||||
private String getEncoding(HMEFMessage tnefDat) { | |||||
TNEFAttribute oemCP = tnefDat.getMessageAttribute(TNEFProperty.ID_OEMCODEPAGE); | |||||
MAPIAttribute cpId = tnefDat.getMessageMAPIAttribute(MAPIProperty.INTERNET_CPID); | |||||
int codePage = 1252; | |||||
if (oemCP != null) { | |||||
codePage = LittleEndian.getInt(oemCP.getData()); | |||||
} else if (cpId != null) { | |||||
codePage = LittleEndian.getInt(cpId.getData()); | |||||
} | |||||
switch (codePage) { | |||||
// see http://en.wikipedia.org/wiki/Code_page for more | |||||
case 1252: return "Windows-1252"; | |||||
case 20127: return "US-ASCII"; | |||||
default: return "cp"+codePage; | |||||
} | |||||
} | |||||
private String getEncoding(HMEFMessage tnefDat) { | |||||
TNEFAttribute oemCP = tnefDat.getMessageAttribute(TNEFProperty.ID_OEMCODEPAGE); | |||||
MAPIAttribute cpId = tnefDat.getMessageMAPIAttribute(MAPIProperty.INTERNET_CPID); | |||||
int codePage = 1252; | |||||
if (oemCP != null) { | |||||
codePage = LittleEndian.getInt(oemCP.getData()); | |||||
} else if (cpId != null) { | |||||
codePage = LittleEndian.getInt(cpId.getData()); | |||||
} | |||||
switch (codePage) { | |||||
// see http://en.wikipedia.org/wiki/Code_page for more | |||||
case 1252: return "Windows-1252"; | |||||
case 20127: return "US-ASCII"; | |||||
default: return "cp"+codePage; | |||||
} | |||||
} | |||||
} | } |
import org.junit.jupiter.api.Test; | import org.junit.jupiter.api.Test; | ||||
class HPBFFileHandler extends POIFSFileHandler { | class HPBFFileHandler extends POIFSFileHandler { | ||||
@Override | |||||
public void handleFile(InputStream stream, String path) throws Exception { | |||||
HPBFDocument pub = new HPBFDocument(new POIFSFileSystem(stream)); | |||||
assertNotNull(pub.getEscherDelayStm()); | |||||
assertNotNull(pub.getMainContents()); | |||||
assertNotNull(pub.getQuillContents()); | |||||
@Override | |||||
public void handleFile(InputStream stream, String path) throws Exception { | |||||
HPBFDocument pub = new HPBFDocument(new POIFSFileSystem(stream)); | |||||
assertNotNull(pub.getEscherDelayStm()); | |||||
assertNotNull(pub.getMainContents()); | |||||
assertNotNull(pub.getQuillContents()); | |||||
// writing is not yet implemented... handlePOIDocument(pub); | |||||
pub.close(); | |||||
} | |||||
// writing is not yet implemented... handlePOIDocument(pub); | |||||
pub.close(); | |||||
} | |||||
// a test-case to test this locally without executing the full TestAllFiles | |||||
@Override | |||||
// a test-case to test this locally without executing the full TestAllFiles | |||||
@Override | |||||
@Test | @Test | ||||
void test() throws Exception { | |||||
void test() throws Exception { | |||||
File file = new File("test-data/publisher/SampleBrochure.pub"); | File file = new File("test-data/publisher/SampleBrochure.pub"); | ||||
InputStream stream = new FileInputStream(file); | InputStream stream = new FileInputStream(file); | ||||
try { | |||||
handleFile(stream, file.getPath()); | |||||
} finally { | |||||
stream.close(); | |||||
} | |||||
try { | |||||
handleFile(stream, file.getPath()); | |||||
} finally { | |||||
stream.close(); | |||||
} | |||||
handleExtracting(file); | |||||
handleExtracting(file); | |||||
stream = new FileInputStream(file); | |||||
try { | |||||
try (PublisherTextExtractor extractor = new PublisherTextExtractor(stream)) { | |||||
assertNotNull(extractor.getText()); | |||||
} | |||||
} finally { | |||||
stream.close(); | |||||
} | |||||
} | |||||
stream = new FileInputStream(file); | |||||
try { | |||||
try (PublisherTextExtractor extractor = new PublisherTextExtractor(stream)) { | |||||
assertNotNull(extractor.getText()); | |||||
} | |||||
} finally { | |||||
stream.close(); | |||||
} | |||||
} | |||||
} | } |
@Override | @Override | ||||
public void handleFile(InputStream stream, String path) throws Exception { | public void handleFile(InputStream stream, String path) throws Exception { | ||||
POIFSFileSystem poifs = new POIFSFileSystem(stream); | |||||
HPSFPropertiesOnlyDocument hpsf = new HPSFPropertiesOnlyDocument(poifs); | |||||
DocumentSummaryInformation dsi = hpsf.getDocumentSummaryInformation(); | |||||
SummaryInformation si = hpsf.getSummaryInformation(); | |||||
boolean hasDSI = hasPropertyStream(poifs, DocumentSummaryInformation.DEFAULT_STREAM_NAME); | |||||
boolean hasSI = hasPropertyStream(poifs, SummaryInformation.DEFAULT_STREAM_NAME); | |||||
assertEquals(hasDSI, dsi != null); | |||||
POIFSFileSystem poifs = new POIFSFileSystem(stream); | |||||
HPSFPropertiesOnlyDocument hpsf = new HPSFPropertiesOnlyDocument(poifs); | |||||
DocumentSummaryInformation dsi = hpsf.getDocumentSummaryInformation(); | |||||
SummaryInformation si = hpsf.getSummaryInformation(); | |||||
boolean hasDSI = hasPropertyStream(poifs, DocumentSummaryInformation.DEFAULT_STREAM_NAME); | |||||
boolean hasSI = hasPropertyStream(poifs, SummaryInformation.DEFAULT_STREAM_NAME); | |||||
assertEquals(hasDSI, dsi != null); | |||||
assertEquals(hasSI, si != null); | assertEquals(hasSI, si != null); | ||||
handlePOIDocument(hpsf); | |||||
} | |||||
handlePOIDocument(hpsf); | |||||
} | |||||
private static boolean hasPropertyStream(POIFSFileSystem poifs, String streamName) throws IOException { | |||||
private static boolean hasPropertyStream(POIFSFileSystem poifs, String streamName) throws IOException { | |||||
DirectoryNode root = poifs.getRoot(); | DirectoryNode root = poifs.getRoot(); | ||||
if (!root.hasEntry(streamName)) { | |||||
return false; | |||||
} | |||||
if (!root.hasEntry(streamName)) { | |||||
return false; | |||||
} | |||||
try (DocumentInputStream dis = root.createDocumentInputStream(streamName)) { | try (DocumentInputStream dis = root.createDocumentInputStream(streamName)) { | ||||
return PropertySet.isPropertySetStream(dis); | return PropertySet.isPropertySetStream(dis); | ||||
} | } | ||||
} | |||||
} | |||||
private static File getTempFile() { | |||||
private static File getTempFile() { | |||||
File f = null; | File f = null; | ||||
try { | try { | ||||
f = TempFile.createTempFile("hpsfCopy", "out"); | f = TempFile.createTempFile("hpsfCopy", "out"); | ||||
} | } | ||||
// a test-case to test this locally without executing the full TestAllFiles | |||||
@Override | |||||
// a test-case to test this locally without executing the full TestAllFiles | |||||
@Override | |||||
@Test | @Test | ||||
@SuppressWarnings("java:S2699") | @SuppressWarnings("java:S2699") | ||||
void test() throws Exception { | |||||
String path = "test-data/diagram/44501.vsd"; | |||||
void test() throws Exception { | |||||
String path = "test-data/diagram/44501.vsd"; | |||||
try (InputStream stream = new FileInputStream(path)) { | try (InputStream stream = new FileInputStream(path)) { | ||||
handleFile(stream, path); | handleFile(stream, path); | ||||
} | } | ||||
} | |||||
} | |||||
// a test-case to test this locally without executing the full TestAllFiles | // a test-case to test this locally without executing the full TestAllFiles | ||||
@Test | @Test |
import org.junit.jupiter.api.Test; | import org.junit.jupiter.api.Test; | ||||
class HSMFFileHandler extends POIFSFileHandler { | class HSMFFileHandler extends POIFSFileHandler { | ||||
@Override | |||||
public void handleFile(InputStream stream, String path) throws Exception { | |||||
MAPIMessage mapi = new MAPIMessage(stream); | |||||
assertNotNull(mapi.getAttachmentFiles()); | |||||
assertNotNull(mapi.getDisplayBCC()); | |||||
assertNotNull(mapi.getMessageDate()); | |||||
AttachmentChunks[] attachments = mapi.getAttachmentFiles(); | |||||
for(AttachmentChunks attachment : attachments) { | |||||
DirectoryChunk chunkDirectory = attachment.getAttachmentDirectory(); | |||||
if(chunkDirectory != null) { | |||||
MAPIMessage attachmentMSG = chunkDirectory.getAsEmbeddedMessage(); | |||||
assertNotNull(attachmentMSG); | |||||
String body = attachmentMSG.getTextBody(); | |||||
assertNotNull(body); | |||||
} | |||||
} | |||||
/* => Writing isn't yet supported... | |||||
// write out the file | |||||
File file = TempFile.createTempFile("StressTest", ".msg"); | |||||
writeToFile(mapi, file); | |||||
MAPIMessage read = new MAPIMessage(file.getAbsolutePath()); | |||||
assertNotNull(read.getAttachmentFiles()); | |||||
assertNotNull(read.getDisplayBCC()); | |||||
assertNotNull(read.getMessageDate()); | |||||
*/ | |||||
// writing is not yet supported... handlePOIDocument(mapi); | |||||
mapi.close(); | |||||
} | |||||
// private void writeToFile(MAPIMessage mapi, File file) | |||||
// throws FileNotFoundException, IOException { | |||||
// OutputStream stream = new FileOutputStream(file); | |||||
// try { | |||||
// mapi.write(stream); | |||||
// } finally { | |||||
// stream.close(); | |||||
// } | |||||
// } | |||||
// a test-case to test this locally without executing the full TestAllFiles | |||||
@Override | |||||
@Override | |||||
public void handleFile(InputStream stream, String path) throws Exception { | |||||
MAPIMessage mapi = new MAPIMessage(stream); | |||||
assertNotNull(mapi.getAttachmentFiles()); | |||||
assertNotNull(mapi.getDisplayBCC()); | |||||
assertNotNull(mapi.getMessageDate()); | |||||
AttachmentChunks[] attachments = mapi.getAttachmentFiles(); | |||||
for(AttachmentChunks attachment : attachments) { | |||||
DirectoryChunk chunkDirectory = attachment.getAttachmentDirectory(); | |||||
if(chunkDirectory != null) { | |||||
MAPIMessage attachmentMSG = chunkDirectory.getAsEmbeddedMessage(); | |||||
assertNotNull(attachmentMSG); | |||||
String body = attachmentMSG.getTextBody(); | |||||
assertNotNull(body); | |||||
} | |||||
} | |||||
/* => Writing isn't yet supported... | |||||
// write out the file | |||||
File file = TempFile.createTempFile("StressTest", ".msg"); | |||||
writeToFile(mapi, file); | |||||
MAPIMessage read = new MAPIMessage(file.getAbsolutePath()); | |||||
assertNotNull(read.getAttachmentFiles()); | |||||
assertNotNull(read.getDisplayBCC()); | |||||
assertNotNull(read.getMessageDate()); | |||||
*/ | |||||
// writing is not yet supported... handlePOIDocument(mapi); | |||||
mapi.close(); | |||||
} | |||||
// private void writeToFile(MAPIMessage mapi, File file) | |||||
// throws FileNotFoundException, IOException { | |||||
// OutputStream stream = new FileOutputStream(file); | |||||
// try { | |||||
// mapi.write(stream); | |||||
// } finally { | |||||
// stream.close(); | |||||
// } | |||||
// } | |||||
// a test-case to test this locally without executing the full TestAllFiles | |||||
@Override | |||||
@Test | @Test | ||||
void test() throws Exception { | |||||
void test() throws Exception { | |||||
File file = new File("test-data/hsmf/logsat.com_signatures_valid.msg"); | File file = new File("test-data/hsmf/logsat.com_signatures_valid.msg"); | ||||
try (InputStream stream = new FileInputStream(file)) { | |||||
handleFile(stream, file.getPath()); | |||||
} | |||||
try (InputStream stream = new FileInputStream(file)) { | |||||
handleFile(stream, file.getPath()); | |||||
} | |||||
handleExtracting(file); | |||||
} | |||||
handleExtracting(file); | |||||
} | |||||
} | } |
import org.junit.jupiter.api.Test; | import org.junit.jupiter.api.Test; | ||||
class HSSFFileHandler extends SpreadsheetHandler { | class HSSFFileHandler extends SpreadsheetHandler { | ||||
private final POIFSFileHandler delegate = new POIFSFileHandler(); | |||||
@Override | |||||
private final POIFSFileHandler delegate = new POIFSFileHandler(); | |||||
@Override | |||||
public void handleFile(InputStream stream, String path) throws Exception { | public void handleFile(InputStream stream, String path) throws Exception { | ||||
HSSFWorkbook wb = new HSSFWorkbook(stream); | |||||
handleWorkbook(wb); | |||||
HSSFWorkbook wb = new HSSFWorkbook(stream); | |||||
handleWorkbook(wb); | |||||
// TODO: some documents fail currently... | |||||
// TODO: some documents fail currently... | |||||
// Note - as of Bugzilla 48036 (svn r828244, r828247) POI is capable of evaluating | // Note - as of Bugzilla 48036 (svn r828244, r828247) POI is capable of evaluating | ||||
// IntersectionPtg. However it is still not capable of parsing it. | // IntersectionPtg. However it is still not capable of parsing it. | ||||
// So FormulaEvalTestData.xls now contains a few formulas that produce errors here. | // So FormulaEvalTestData.xls now contains a few formulas that produce errors here. | ||||
//HSSFFormulaEvaluator evaluator = new HSSFFormulaEvaluator(wb); | //HSSFFormulaEvaluator evaluator = new HSSFFormulaEvaluator(wb); | ||||
//evaluator.evaluateAll(); | //evaluator.evaluateAll(); | ||||
delegate.handlePOIDocument(wb); | |||||
// also try to see if some of the Records behave incorrectly | |||||
// TODO: still fails on some records... RecordsStresser.handleWorkbook(wb); | |||||
HSSFOptimiser.optimiseCellStyles(wb); | |||||
for(Sheet sheet : wb) { | |||||
for (Row row : sheet) { | |||||
for (Cell cell : row) { | |||||
assertNotNull(cell.getCellStyle()); | |||||
} | |||||
} | |||||
} | |||||
HSSFOptimiser.optimiseFonts(wb); | |||||
} | |||||
private static final Set<String> EXPECTED_ADDITIONAL_FAILURES = new HashSet<>(); | |||||
static { | |||||
// encrypted | |||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/35897-type4.xls"); | |||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/xor-encryption-abc.xls"); | |||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/password.xls"); | |||||
// broken files | |||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/43493.xls"); | |||||
// TODO: ok to ignore? | |||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/50833.xls"); | |||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/51832.xls"); | |||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/XRefCalc.xls"); | |||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/61300.xls"); | |||||
} | |||||
@Override | |||||
public void handleAdditional(File file) throws Exception { | |||||
// redirect stdout as the examples often write lots of text | |||||
PrintStream oldOut = System.out; | |||||
String fileWithParent = file.getParentFile().getName() + "/" + file.getName(); | |||||
try { | |||||
System.setOut(new NullPrintStream()); | |||||
BiffViewer.main(new String[]{file.getAbsolutePath()}); | |||||
assertFalse( EXPECTED_ADDITIONAL_FAILURES.contains(fileWithParent), "Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!" ); | |||||
} catch (OldExcelFormatException e) { | |||||
// old excel formats are not supported here | |||||
} catch (RuntimeException e) { | |||||
if(!EXPECTED_ADDITIONAL_FAILURES.contains(fileWithParent)) { | |||||
throw e; | |||||
} | |||||
} finally { | |||||
System.setOut(oldOut); | |||||
} | |||||
} | |||||
// a test-case to test this locally without executing the full TestAllFiles | |||||
@Test | |||||
void test() throws Exception { | |||||
delegate.handlePOIDocument(wb); | |||||
// also try to see if some of the Records behave incorrectly | |||||
// TODO: still fails on some records... RecordsStresser.handleWorkbook(wb); | |||||
HSSFOptimiser.optimiseCellStyles(wb); | |||||
for(Sheet sheet : wb) { | |||||
for (Row row : sheet) { | |||||
for (Cell cell : row) { | |||||
assertNotNull(cell.getCellStyle()); | |||||
} | |||||
} | |||||
} | |||||
HSSFOptimiser.optimiseFonts(wb); | |||||
} | |||||
private static final Set<String> EXPECTED_ADDITIONAL_FAILURES = new HashSet<>(); | |||||
static { | |||||
// encrypted | |||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/35897-type4.xls"); | |||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/xor-encryption-abc.xls"); | |||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/password.xls"); | |||||
// broken files | |||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/43493.xls"); | |||||
// TODO: ok to ignore? | |||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/50833.xls"); | |||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/51832.xls"); | |||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/XRefCalc.xls"); | |||||
EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/61300.xls"); | |||||
} | |||||
@Override | |||||
public void handleAdditional(File file) throws Exception { | |||||
// redirect stdout as the examples often write lots of text | |||||
PrintStream oldOut = System.out; | |||||
String fileWithParent = file.getParentFile().getName() + "/" + file.getName(); | |||||
try { | |||||
System.setOut(new NullPrintStream()); | |||||
BiffViewer.main(new String[]{file.getAbsolutePath()}); | |||||
assertFalse( EXPECTED_ADDITIONAL_FAILURES.contains(fileWithParent), "Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!" ); | |||||
} catch (OldExcelFormatException e) { | |||||
// old excel formats are not supported here | |||||
} catch (RuntimeException e) { | |||||
if(!EXPECTED_ADDITIONAL_FAILURES.contains(fileWithParent)) { | |||||
throw e; | |||||
} | |||||
} finally { | |||||
System.setOut(oldOut); | |||||
} | |||||
} | |||||
// a test-case to test this locally without executing the full TestAllFiles | |||||
@Test | |||||
void test() throws Exception { | |||||
File file = new File("../test-data/spreadsheet/59074.xls"); | File file = new File("../test-data/spreadsheet/59074.xls"); | ||||
try (InputStream stream = new FileInputStream(file)) { | |||||
handleFile(stream, file.getPath()); | |||||
} | |||||
try (InputStream stream = new FileInputStream(file)) { | |||||
handleFile(stream, file.getPath()); | |||||
} | |||||
handleExtracting(file); | |||||
handleExtracting(file); | |||||
handleAdditional(file); | |||||
} | |||||
handleAdditional(file); | |||||
} | |||||
// a test-case to test this locally without executing the full TestAllFiles | |||||
// a test-case to test this locally without executing the full TestAllFiles | |||||
@Test | @Test | ||||
@SuppressWarnings("java:S2699") | |||||
@SuppressWarnings("java:S2699") | |||||
void testExtractor() throws Exception { | void testExtractor() throws Exception { | ||||
handleExtracting(new File("../test-data/spreadsheet/59074.xls")); | handleExtracting(new File("../test-data/spreadsheet/59074.xls")); | ||||
} | } |
import org.junit.jupiter.api.Test; | import org.junit.jupiter.api.Test; | ||||
class OPCFileHandler extends AbstractFileHandler { | class OPCFileHandler extends AbstractFileHandler { | ||||
@Override | |||||
@Override | |||||
public void handleFile(InputStream stream, String path) throws Exception { | public void handleFile(InputStream stream, String path) throws Exception { | ||||
// ignore password protected files | // ignore password protected files | ||||
if (POIXMLDocumentHandler.isEncrypted(stream)) return; | if (POIXMLDocumentHandler.isEncrypted(stream)) return; | ||||
// text-extraction is not possible currently for these types of files | // text-extraction is not possible currently for these types of files | ||||
} | } | ||||
// a test-case to test this locally without executing the full TestAllFiles | |||||
@Test | |||||
void test() throws Exception { | |||||
// a test-case to test this locally without executing the full TestAllFiles | |||||
@Test | |||||
void test() throws Exception { | |||||
File file = new File("test-data/diagram/test.vsdx"); | File file = new File("test-data/diagram/test.vsdx"); | ||||
try (InputStream stream = new PushbackInputStream(new FileInputStream(file), 100000)) { | try (InputStream stream = new PushbackInputStream(new FileInputStream(file), 100000)) { | ||||
handleFile(stream, file.getPath()); | handleFile(stream, file.getPath()); | ||||
} | } | ||||
handleExtracting(file); | |||||
} | |||||
handleExtracting(file); | |||||
} | |||||
} | } |
class POIFSFileHandler extends AbstractFileHandler { | class POIFSFileHandler extends AbstractFileHandler { | ||||
@Override | |||||
@Override | |||||
public void handleFile(InputStream stream, String path) throws Exception { | public void handleFile(InputStream stream, String path) throws Exception { | ||||
try (POIFSFileSystem fs = new POIFSFileSystem(stream)) { | try (POIFSFileSystem fs = new POIFSFileSystem(stream)) { | ||||
handlePOIFSFileSystem(fs); | handlePOIFSFileSystem(fs); | ||||
handleHPSFProperties(fs); | handleHPSFProperties(fs); | ||||
} | } | ||||
} | |||||
} | |||||
private void handleHPSFProperties(POIFSFileSystem fs) throws IOException { | |||||
private void handleHPSFProperties(POIFSFileSystem fs) throws IOException { | |||||
try (HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs)) { | try (HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs)) { | ||||
// can be null | // can be null | ||||
ext.getDocSummaryInformation(); | ext.getDocSummaryInformation(); | ||||
} | } | ||||
private void handlePOIFSFileSystem(POIFSFileSystem fs) { | private void handlePOIFSFileSystem(POIFSFileSystem fs) { | ||||
assertNotNull(fs); | |||||
assertNotNull(fs.getRoot()); | |||||
} | |||||
assertNotNull(fs); | |||||
assertNotNull(fs.getRoot()); | |||||
} | |||||
protected void handlePOIDocument(POIDocument doc) throws Exception { | |||||
try (UnsynchronizedByteArrayOutputStream out = new UnsynchronizedByteArrayOutputStream()) { | |||||
protected void handlePOIDocument(POIDocument doc) throws Exception { | |||||
try (UnsynchronizedByteArrayOutputStream out = new UnsynchronizedByteArrayOutputStream()) { | |||||
doc.write(out); | doc.write(out); | ||||
try (InputStream in = out.toInputStream(); | try (InputStream in = out.toInputStream(); | ||||
handlePOIFSFileSystem(fs); | handlePOIFSFileSystem(fs); | ||||
} | } | ||||
} | } | ||||
} | |||||
} | |||||
// a test-case to test this locally without executing the full TestAllFiles | // a test-case to test this locally without executing the full TestAllFiles | ||||
@Test | @Test |
import org.apache.xmlbeans.XmlObject; | import org.apache.xmlbeans.XmlObject; | ||||
public final class POIXMLDocumentHandler { | public final class POIXMLDocumentHandler { | ||||
protected void handlePOIXMLDocument(POIXMLDocument doc) throws Exception { | |||||
assertNotNull(doc.getAllEmbeddedParts()); | |||||
assertNotNull(doc.getPackage()); | |||||
assertNotNull(doc.getPackagePart()); | |||||
assertNotNull(doc.getProperties()); | |||||
assertNotNull(doc.getRelations()); | |||||
} | |||||
protected void handlePOIXMLDocument(POIXMLDocument doc) throws Exception { | |||||
assertNotNull(doc.getAllEmbeddedParts()); | |||||
assertNotNull(doc.getPackage()); | |||||
assertNotNull(doc.getPackagePart()); | |||||
assertNotNull(doc.getProperties()); | |||||
assertNotNull(doc.getRelations()); | |||||
} | |||||
protected static boolean isEncrypted(InputStream stream) throws IOException { | protected static boolean isEncrypted(InputStream stream) throws IOException { | ||||
if (FileMagic.valueOf(stream) == FileMagic.OLE2) { | if (FileMagic.valueOf(stream) == FileMagic.OLE2) { |
import org.apache.poi.xssf.usermodel.XSSFChartSheet; | import org.apache.poi.xssf.usermodel.XSSFChartSheet; | ||||
public abstract class SpreadsheetHandler extends AbstractFileHandler { | public abstract class SpreadsheetHandler extends AbstractFileHandler { | ||||
public void handleWorkbook(Workbook wb) throws IOException { | |||||
// try to access some of the content | |||||
readContent(wb); | |||||
public void handleWorkbook(Workbook wb) throws IOException { | |||||
// try to access some of the content | |||||
readContent(wb); | |||||
// write out the file | |||||
writeToArray(wb); | |||||
// write out the file | |||||
writeToArray(wb); | |||||
// access some more content (we had cases where writing corrupts the data in memory) | |||||
readContent(wb); | |||||
// access some more content (we had cases where writing corrupts the data in memory) | |||||
readContent(wb); | |||||
// write once more | |||||
UnsynchronizedByteArrayOutputStream out = writeToArray(wb); | |||||
// write once more | |||||
UnsynchronizedByteArrayOutputStream out = writeToArray(wb); | |||||
// read in the written file | |||||
Workbook read = WorkbookFactory.create(out.toInputStream()); | |||||
// read in the written file | |||||
Workbook read = WorkbookFactory.create(out.toInputStream()); | |||||
assertNotNull(read); | |||||
assertNotNull(read); | |||||
readContent(read); | |||||
readContent(read); | |||||
extractEmbedded(read); | |||||
extractEmbedded(read); | |||||
modifyContent(read); | |||||
modifyContent(read); | |||||
read.close(); | |||||
} | |||||
read.close(); | |||||
} | |||||
private UnsynchronizedByteArrayOutputStream writeToArray(Workbook wb) throws IOException { | |||||
UnsynchronizedByteArrayOutputStream stream = new UnsynchronizedByteArrayOutputStream(); | |||||
wb.write(stream); | |||||
return stream; | |||||
} | |||||
private UnsynchronizedByteArrayOutputStream writeToArray(Workbook wb) throws IOException { | |||||
UnsynchronizedByteArrayOutputStream stream = new UnsynchronizedByteArrayOutputStream(); | |||||
wb.write(stream); | |||||
return stream; | |||||
} | |||||
private void readContent(Workbook wb) { | |||||
for(int i = 0;i < wb.getNumberOfSheets();i++) { | |||||
Sheet sheet = wb.getSheetAt(i); | |||||
assertNotNull(wb.getSheet(sheet.getSheetName())); | |||||
sheet.groupColumn((short) 4, (short) 5); | |||||
sheet.setColumnGroupCollapsed(4, true); | |||||
sheet.setColumnGroupCollapsed(4, false); | |||||
private void readContent(Workbook wb) { | |||||
for(int i = 0;i < wb.getNumberOfSheets();i++) { | |||||
Sheet sheet = wb.getSheetAt(i); | |||||
assertNotNull(wb.getSheet(sheet.getSheetName())); | |||||
sheet.groupColumn((short) 4, (short) 5); | |||||
sheet.setColumnGroupCollapsed(4, true); | |||||
sheet.setColumnGroupCollapsed(4, false); | |||||
// don't do this for very large sheets as it will take a long time | |||||
if(sheet.getPhysicalNumberOfRows() > 1000) { | |||||
continue; | |||||
} | |||||
// don't do this for very large sheets as it will take a long time | |||||
if(sheet.getPhysicalNumberOfRows() > 1000) { | |||||
continue; | |||||
} | |||||
for(Row row : sheet) { | |||||
for(Cell cell : row) { | |||||
assertNotNull(cell.toString()); | |||||
} | |||||
} | |||||
} | |||||
for(Row row : sheet) { | |||||
for(Cell cell : row) { | |||||
assertNotNull(cell.toString()); | |||||
} | |||||
} | |||||
} | |||||
for (Name name : wb.getAllNames()) { | |||||
// this sometimes caused exceptions | |||||
for (Name name : wb.getAllNames()) { | |||||
// this sometimes caused exceptions | |||||
if(!name.isFunctionName()) { | if(!name.isFunctionName()) { | ||||
name.getRefersToFormula(); | name.getRefersToFormula(); | ||||
} | } | ||||
} | |||||
} | |||||
} | |||||
} | |||||
private void extractEmbedded(Workbook wb) throws IOException { | |||||
private void extractEmbedded(Workbook wb) throws IOException { | |||||
EmbeddedExtractor ee = new EmbeddedExtractor(); | EmbeddedExtractor ee = new EmbeddedExtractor(); | ||||
for (Sheet s : wb) { | for (Sheet s : wb) { | ||||
assertNotNull(ed.getShape()); | assertNotNull(ed.getShape()); | ||||
} | } | ||||
} | } | ||||
} | |||||
private void modifyContent(Workbook wb) { | |||||
/* a number of file fail because of various things: udf, unimplemented functions, ... | |||||
we would need quite a list of excludes and the large regression tests would probably | |||||
take a lot longer to run... | |||||
try { | |||||
// try to re-compute all formulas to find cases where parsing fails | |||||
wb.getCreationHelper().createFormulaEvaluator().evaluateAll(); | |||||
} catch (RuntimeException e) { | |||||
// only allow a specific exception which indicates that an external | |||||
// reference was not found | |||||
if(!e.getMessage().contains("Could not resolve external workbook name")) { | |||||
throw e; | |||||
} | |||||
}*/ | |||||
for (int i=wb.getNumberOfSheets()-1; i>=0; i--) { | |||||
if(wb.getSheetAt(i) instanceof XSSFChartSheet) { | |||||
// clone for chart-sheets is not supported | |||||
continue; | |||||
} | |||||
try { | |||||
wb.cloneSheet(i); | |||||
} catch (RecordFormatException e) { | |||||
if (e.getCause() instanceof CloneNotSupportedException) { | |||||
// ignore me | |||||
continue; | |||||
} | |||||
throw e; | |||||
} catch (RuntimeException e) { | |||||
if ("Could not find 'internal references' EXTERNALBOOK".equals(e.getMessage()) || | |||||
"CountryRecord not found".equals(e.getMessage()) || | |||||
"CountryRecord or SSTRecord not found".equals(e.getMessage()) || | |||||
"Cannot add more than 65535 shapes".equals(e.getMessage()) ) { | |||||
// ignore these here for now | |||||
continue; | |||||
} | |||||
private void modifyContent(Workbook wb) { | |||||
/* a number of file fail because of various things: udf, unimplemented functions, ... | |||||
we would need quite a list of excludes and the large regression tests would probably | |||||
take a lot longer to run... | |||||
try { | |||||
// try to re-compute all formulas to find cases where parsing fails | |||||
wb.getCreationHelper().createFormulaEvaluator().evaluateAll(); | |||||
} catch (RuntimeException e) { | |||||
// only allow a specific exception which indicates that an external | |||||
// reference was not found | |||||
if(!e.getMessage().contains("Could not resolve external workbook name")) { | |||||
throw e; | |||||
} | |||||
}*/ | |||||
for (int i=wb.getNumberOfSheets()-1; i>=0; i--) { | |||||
if(wb.getSheetAt(i) instanceof XSSFChartSheet) { | |||||
// clone for chart-sheets is not supported | |||||
continue; | |||||
} | |||||
try { | |||||
wb.cloneSheet(i); | |||||
} catch (RecordFormatException e) { | |||||
if (e.getCause() instanceof CloneNotSupportedException) { | |||||
// ignore me | |||||
continue; | |||||
} | } | ||||
throw e; | |||||
} | |||||
} | |||||
} | |||||
throw e; | |||||
} catch (RuntimeException e) { | |||||
if ("Could not find 'internal references' EXTERNALBOOK".equals(e.getMessage()) || | |||||
"CountryRecord not found".equals(e.getMessage()) || | |||||
"CountryRecord or SSTRecord not found".equals(e.getMessage()) || | |||||
"Cannot add more than 65535 shapes".equals(e.getMessage()) ) { | |||||
// ignore these here for now | |||||
continue; | |||||
} | |||||
throw e; | |||||
} | |||||
} | |||||
} | |||||
} | } |
import org.junit.jupiter.api.Test; | import org.junit.jupiter.api.Test; | ||||
class XSLFFileHandler extends SlideShowHandler { | class XSLFFileHandler extends SlideShowHandler { | ||||
@Override | |||||
@Override | |||||
public void handleFile(InputStream stream, String path) throws Exception { | public void handleFile(InputStream stream, String path) throws Exception { | ||||
try (XMLSlideShow slide = new XMLSlideShow(stream); | |||||
XSLFSlideShow slideInner = new XSLFSlideShow(slide.getPackage())) { | |||||
; | |||||
assertNotNull(slideInner.getPresentation()); | |||||
assertNotNull(slideInner.getSlideMasterReferences()); | |||||
assertNotNull(slideInner.getSlideReferences()); | |||||
new POIXMLDocumentHandler().handlePOIXMLDocument(slide); | |||||
handleSlideShow(slide); | |||||
} catch (POIXMLException e) { | |||||
Exception cause = (Exception)e.getCause(); | |||||
throw cause == null ? e : cause; | |||||
} | |||||
} | |||||
@Override | |||||
try (XMLSlideShow slide = new XMLSlideShow(stream); | |||||
XSLFSlideShow slideInner = new XSLFSlideShow(slide.getPackage())) { | |||||
; | |||||
assertNotNull(slideInner.getPresentation()); | |||||
assertNotNull(slideInner.getSlideMasterReferences()); | |||||
assertNotNull(slideInner.getSlideReferences()); | |||||
new POIXMLDocumentHandler().handlePOIXMLDocument(slide); | |||||
handleSlideShow(slide); | |||||
} catch (POIXMLException e) { | |||||
Exception cause = (Exception)e.getCause(); | |||||
throw cause == null ? e : cause; | |||||
} | |||||
} | |||||
@Override | |||||
public void handleExtracting(File file) throws Exception { | public void handleExtracting(File file) throws Exception { | ||||
super.handleExtracting(file); | super.handleExtracting(file); | ||||
// additionally try the other getText() methods | // additionally try the other getText() methods | ||||
try (SlideShowExtractor<?,?> extractor = (SlideShowExtractor<?, ?>) ExtractorFactory.createExtractor(file)) { | |||||
assertNotNull(extractor); | |||||
extractor.setSlidesByDefault(true); | |||||
extractor.setNotesByDefault(true); | |||||
extractor.setMasterByDefault(true); | |||||
try (SlideShowExtractor<?,?> extractor = (SlideShowExtractor<?, ?>) ExtractorFactory.createExtractor(file)) { | |||||
assertNotNull(extractor); | |||||
extractor.setSlidesByDefault(true); | |||||
extractor.setNotesByDefault(true); | |||||
extractor.setMasterByDefault(true); | |||||
assertNotNull(extractor.getText()); | |||||
assertNotNull(extractor.getText()); | |||||
extractor.setSlidesByDefault(false); | |||||
extractor.setNotesByDefault(false); | |||||
extractor.setMasterByDefault(false); | |||||
extractor.setSlidesByDefault(false); | |||||
extractor.setNotesByDefault(false); | |||||
extractor.setMasterByDefault(false); | |||||
assertEquals("", extractor.getText(), "With all options disabled we should not get text"); | |||||
} | |||||
assertEquals("", extractor.getText(), "With all options disabled we should not get text"); | |||||
} | |||||
} | } | ||||
// a test-case to test this locally without executing the full TestAllFiles | // a test-case to test this locally without executing the full TestAllFiles | ||||
@Override | |||||
@Override | |||||
@Test | @Test | ||||
void test() throws Exception { | |||||
void test() throws Exception { | |||||
File file = new File("test-data/slideshow/ca.ubc.cs.people_~emhill_presentations_HowWeRefactor.pptx"); | File file = new File("test-data/slideshow/ca.ubc.cs.people_~emhill_presentations_HowWeRefactor.pptx"); | ||||
try (InputStream stream = new FileInputStream(file)) { | |||||
handleFile(stream, file.getPath()); | |||||
} | |||||
try (InputStream stream = new FileInputStream(file)) { | |||||
handleFile(stream, file.getPath()); | |||||
} | |||||
handleExtracting(file); | |||||
} | |||||
handleExtracting(file); | |||||
} | |||||
} | } |