aboutsummaryrefslogtreecommitdiffstats
path: root/integrationtest/src/test/java/org/apache/poi
diff options
context:
space:
mode:
Diffstat (limited to 'integrationtest/src/test/java/org/apache/poi')
-rw-r--r--integrationtest/src/test/java/org/apache/poi/stress/AbstractFileHandler.java178
-rw-r--r--integrationtest/src/test/java/org/apache/poi/stress/BaseIntegrationTest.java165
-rw-r--r--integrationtest/src/test/java/org/apache/poi/stress/ExcInfo.java96
-rw-r--r--integrationtest/src/test/java/org/apache/poi/stress/FileHandler.java51
-rw-r--r--integrationtest/src/test/java/org/apache/poi/stress/FileHandlerFactory.java120
-rw-r--r--integrationtest/src/test/java/org/apache/poi/stress/FileHandlerKnown.java60
-rw-r--r--integrationtest/src/test/java/org/apache/poi/stress/HDGFFileHandler.java78
-rw-r--r--integrationtest/src/test/java/org/apache/poi/stress/HMEFFileHandler.java91
-rw-r--r--integrationtest/src/test/java/org/apache/poi/stress/HPBFFileHandler.java67
-rw-r--r--integrationtest/src/test/java/org/apache/poi/stress/HPSFFileHandler.java132
-rw-r--r--integrationtest/src/test/java/org/apache/poi/stress/HSLFFileHandler.java94
-rw-r--r--integrationtest/src/test/java/org/apache/poi/stress/HSMFFileHandler.java88
-rw-r--r--integrationtest/src/test/java/org/apache/poi/stress/HSSFFileHandler.java127
-rw-r--r--integrationtest/src/test/java/org/apache/poi/stress/HWPFFileHandler.java72
-rw-r--r--integrationtest/src/test/java/org/apache/poi/stress/HeapDump.java97
-rw-r--r--integrationtest/src/test/java/org/apache/poi/stress/OPCFileHandler.java73
-rw-r--r--integrationtest/src/test/java/org/apache/poi/stress/OWPFFileHandler.java65
-rw-r--r--integrationtest/src/test/java/org/apache/poi/stress/POIFSFileHandler.java81
-rw-r--r--integrationtest/src/test/java/org/apache/poi/stress/POIXMLDocumentHandler.java72
-rw-r--r--integrationtest/src/test/java/org/apache/poi/stress/SlideShowHandler.java168
-rw-r--r--integrationtest/src/test/java/org/apache/poi/stress/SpreadsheetHandler.java157
-rw-r--r--integrationtest/src/test/java/org/apache/poi/stress/StressMap.java153
-rw-r--r--integrationtest/src/test/java/org/apache/poi/stress/TestAllFiles.java201
-rw-r--r--integrationtest/src/test/java/org/apache/poi/stress/XDGFFileHandler.java45
-rw-r--r--integrationtest/src/test/java/org/apache/poi/stress/XSLFFileHandler.java85
-rw-r--r--integrationtest/src/test/java/org/apache/poi/stress/XSSFBFileHandler.java96
-rw-r--r--integrationtest/src/test/java/org/apache/poi/stress/XSSFFileHandler.java234
-rw-r--r--integrationtest/src/test/java/org/apache/poi/stress/XWPFFileHandler.java56
28 files changed, 3002 insertions, 0 deletions
diff --git a/integrationtest/src/test/java/org/apache/poi/stress/AbstractFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/AbstractFileHandler.java
new file mode 100644
index 0000000000..01a4ebc82d
--- /dev/null
+++ b/integrationtest/src/test/java/org/apache/poi/stress/AbstractFileHandler.java
@@ -0,0 +1,178 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assumptions.assumeFalse;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.poi.EncryptedDocumentException;
+import org.apache.poi.extractor.ExtractorFactory;
+import org.apache.poi.extractor.POIOLE2TextExtractor;
+import org.apache.poi.extractor.POITextExtractor;
+import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor;
+import org.apache.poi.hssf.extractor.EventBasedExcelExtractor;
+import org.apache.poi.ooxml.POIXMLException;
+import org.apache.poi.ss.extractor.ExcelExtractor;
+import org.apache.poi.util.IOUtils;
+
+/**
+ * Base class with things that can be run for any supported file handler
+ * in the integration tests, mostly text-extraction related at the moment.
+ */
+public abstract class AbstractFileHandler implements FileHandler {
+ public static final Set<String> EXPECTED_EXTRACTOR_FAILURES = new HashSet<>();
+ static {
+ // password protected files without password
+ // ... currently none ...
+
+ // unsupported file-types, no supported OLE2 parts
+ EXPECTED_EXTRACTOR_FAILURES.add("hmef/quick-winmail.dat");
+ EXPECTED_EXTRACTOR_FAILURES.add("hmef/winmail-sample1.dat");
+ EXPECTED_EXTRACTOR_FAILURES.add("hmef/bug52400-winmail-simple.dat");
+ EXPECTED_EXTRACTOR_FAILURES.add("hmef/bug52400-winmail-with-attachments.dat");
+ EXPECTED_EXTRACTOR_FAILURES.add("hmef/bug63955-winmail.dat");
+ EXPECTED_EXTRACTOR_FAILURES.add("hpsf/Test0313rur.adm");
+ EXPECTED_EXTRACTOR_FAILURES.add("poifs/Notes.ole2");
+ EXPECTED_EXTRACTOR_FAILURES.add("poifs/64322.ole2");
+ }
+
+ @Override
+ public void handleExtracting(File file) throws Exception {
+ boolean before = ExtractorFactory.getThreadPrefersEventExtractors();
+ try {
+ ExtractorFactory.setThreadPrefersEventExtractors(true);
+ handleExtractingInternal(file);
+
+ ExtractorFactory.setThreadPrefersEventExtractors(false);
+ handleExtractingInternal(file);
+ } finally {
+ ExtractorFactory.setThreadPrefersEventExtractors(before);
+ }
+
+ /* Did fail for some documents with special XML contents...
+ try {
+ OOXMLPrettyPrint.main(new String[] { file.getAbsolutePath(),
+ "/tmp/pretty-" + file.getName() });
+ } catch (ZipException e) {
+ // ignore, not a Zip/OOXML file
+ }*/
+ }
+
+ private void handleExtractingInternal(File file) throws Exception {
+ long length = file.length();
+ long modified = file.lastModified();
+
+ POITextExtractor extractor = null;
+ String fileAndParentName = file.getParentFile().getName() + "/" + file.getName();
+ try {
+ // fix windows absolute paths for exception message tracking
+ String relPath = file.getPath().replaceAll(".*test-data", "test-data").replace('\\', '/');
+ extractor = ExtractorFactory.createExtractor(file);
+ assertNotNull(extractor, "Should get a POITextExtractor but had none for file " + relPath);
+
+ assertNotNull(extractor.getText(), "Should get some text but had none for file " + relPath);
+
+ // also try metadata
+ @SuppressWarnings("resource")
+ POITextExtractor metadataExtractor = extractor.getMetadataTextExtractor();
+ assertNotNull(metadataExtractor.getText());
+
+ assertFalse(EXPECTED_EXTRACTOR_FAILURES.contains(fileAndParentName),
+ "Expected Extraction to fail for file " + relPath + " and handler " + this + ", but did not fail!");
+
+ assertEquals(length, file.length(), "File should not be modified by extractor");
+ assertEquals(modified, file.lastModified(), "File should not be modified by extractor");
+
+ handleExtractingAsStream(file);
+
+ if (extractor instanceof POIOLE2TextExtractor) {
+ try (HPSFPropertiesExtractor hpsfExtractor = new HPSFPropertiesExtractor((POIOLE2TextExtractor) extractor)) {
+ assertNotNull(hpsfExtractor.getDocumentSummaryInformationText());
+ assertNotNull(hpsfExtractor.getSummaryInformationText());
+ String text = hpsfExtractor.getText();
+ //System.out.println(text);
+ assertNotNull(text);
+ }
+ }
+
+ // test again with including formulas and cell-comments as this caused some bugs
+ if (extractor instanceof ExcelExtractor &&
+ // comment-extraction and formula extraction are not well supported in event based extraction
+ !(extractor instanceof EventBasedExcelExtractor)) {
+ ((ExcelExtractor) extractor).setFormulasNotResults(true);
+
+ String text = extractor.getText();
+ assertNotNull(text);
+ // */
+
+ ((ExcelExtractor) extractor).setIncludeCellComments(true);
+
+ text = extractor.getText();
+ assertNotNull(text);
+ }
+ } catch (IOException | POIXMLException e) {
+ Exception prevE = e;
+ Throwable cause;
+ while ((cause = prevE.getCause()) instanceof Exception) {
+ if (cause instanceof IOException || cause instanceof POIXMLException) {
+ prevE = (Exception)cause;
+ } else {
+ throw (Exception)cause;
+ }
+ }
+ throw e;
+ } catch (IllegalArgumentException e) {
+ if(!EXPECTED_EXTRACTOR_FAILURES.contains(fileAndParentName)) {
+ throw e;
+ }
+ } catch (EncryptedDocumentException e) {
+ String msg = "org.apache.poi.EncryptedDocumentException: Export Restrictions in place - please install JCE Unlimited Strength Jurisdiction Policy files";
+ assumeFalse(msg.equals(e.getMessage()));
+ throw e;
+ } catch (IllegalStateException e) {
+ if (!e.getMessage().contains("POI Scratchpad jar missing") || !Boolean.getBoolean("scratchpad.ignore")) {
+ throw e;
+ }
+ } finally {
+ IOUtils.closeQuietly(extractor);
+ }
+ }
+
+ private void handleExtractingAsStream(File file) throws IOException {
+ try (InputStream stream = new FileInputStream(file)) {
+ try (POITextExtractor streamExtractor = ExtractorFactory.createExtractor(stream)) {
+ assertNotNull(streamExtractor);
+
+ assertNotNull(streamExtractor.getText());
+ }
+ }
+ }
+
+ @Override
+ public void handleAdditional(File file) throws Exception {
+ // by default we do nothing here
+ }
+}
diff --git a/integrationtest/src/test/java/org/apache/poi/stress/BaseIntegrationTest.java b/integrationtest/src/test/java/org/apache/poi/stress/BaseIntegrationTest.java
new file mode 100644
index 0000000000..5d63d1f5a1
--- /dev/null
+++ b/integrationtest/src/test/java/org/apache/poi/stress/BaseIntegrationTest.java
@@ -0,0 +1,165 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assumptions.assumeFalse;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.zip.ZipException;
+
+import org.apache.poi.EncryptedDocumentException;
+import org.apache.poi.OldFileFormatException;
+import org.apache.poi.poifs.filesystem.OfficeXmlFileException;
+
+/**
+ * This class is used for mass-regression testing via a
+ * separate project, this class provides functionality to
+ * run integration tests on one file and handle some
+ * types of files/exceptions, e.g. old file formats.
+ *
+ */
+public class BaseIntegrationTest {
+ private final File rootDir;
+ private final String file;
+ private FileHandler handler;
+
+ public BaseIntegrationTest(File rootDir, String file, FileHandler handler) {
+ this.rootDir = rootDir;
+ this.file = file;
+ this.handler = handler;
+ }
+
+ /**
+ * Keep this public so it can be used by the regression-tests
+ */
+ public void test() throws Exception {
+ assertNotNull( handler, "Unknown file extension for file: " + file );
+ testOneFile(new File(rootDir, file));
+ }
+
+ protected void testOneFile(File inputFile) throws Exception {
+ try {
+ handleFile(inputFile);
+ } catch (OfficeXmlFileException e) {
+ // switch XWPF and HWPF and so forth depending on the error message
+ handleWrongOLE2XMLExtension(inputFile, e);
+ } catch (OldFileFormatException e) {
+ // Not even text extraction is supported for these: handler.handleExtracting(inputFile);
+ assumeFalse( true, "File " + file + " excluded because it is an unsupported old format" );
+ } catch (EncryptedDocumentException e) {
+ // Do not try to read encrypted files
+ assumeFalse( true, "File " + file + " excluded because it is password-encrypted" );
+ } catch (ZipException e) {
+ // some files are corrupted
+ if (e.getMessage().equals("unexpected EOF") || e.getMessage().equals("Truncated ZIP file")) {
+ assumeFalse( true, "File " + file + " excluded because the Zip file is incomplete" );
+ }
+
+ throw e;
+ } catch (IOException e) {
+ // ignore some other ways of corrupted files
+ String message = e.getMessage();
+ if(message != null && message.contains("Truncated ZIP file")) {
+ assumeFalse( true, "File " + file + " excluded because the Zip file is incomplete" );
+ }
+
+ // sometimes binary format has XML-format-extension...
+ if(message != null && message.contains("rong file format or file extension for OO XML file")) {
+ handleWrongOLE2XMLExtension(inputFile, e);
+ return;
+ }
+
+ throw e;
+ } catch (IllegalArgumentException e) {
+ // ignore errors for documents with incorrect extension
+ String message = e.getMessage();
+ if(message != null && (message.equals("The document is really a RTF file") ||
+ message.equals("The document is really a PDF file") ||
+ message.equals("The document is really a HTML file"))) {
+ assumeFalse( true, "File " + file + " excluded because it is actually a PDF/RTF/HTML file" );
+ }
+
+ if(message != null && message.equals("The document is really a OOXML file")) {
+ handleWrongOLE2XMLExtension(inputFile, e);
+ return;
+ }
+
+ throw e;
+ }
+
+ try {
+ handler.handleExtracting(inputFile);
+ } catch (EncryptedDocumentException e) {
+ // Do not try to read encrypted files
+ assumeFalse( true, "File " + file + " excluded because it is password-encrypted" );
+ }
+ }
+
+ void handleWrongOLE2XMLExtension(File inputFile, Exception e) throws Exception {
+ // we sometimes have wrong extensions, so for some exceptions we try to handle it
+ // with the correct FileHandler instead
+ String message = e.getMessage();
+
+ // ignore some file-types that we do not want to handle here
+ assumeFalse( message != null && (message.equals("The document is really a RTF file") ||
+ message.equals("The document is really a PDF file") ||
+ message.equals("The document is really a HTML file")), "File " + file + " excluded because it is actually a PDF/RTF/HTML file" );
+
+ if(message != null && (message.equals("The document is really a XLS file"))) {
+ handler = new HSSFFileHandler();
+ } else if(message != null && (message.equals("The document is really a PPT file"))) {
+ handler = new HSLFFileHandler();
+ } else if(message != null && (message.equals("The document is really a DOC file"))) {
+ handler = new HWPFFileHandler();
+ } else if(message != null && (message.equals("The document is really a VSD file"))) {
+ handler = new HDGFFileHandler();
+
+ // use XWPF instead of HWPF and XSSF instead of HSSF as the file seems to have the wrong extension
+ } else if (handler instanceof HWPFFileHandler) {
+ handler = new XWPFFileHandler();
+ } else if (handler instanceof HSSFFileHandler) {
+ handler = new XSSFFileHandler();
+ } else if (handler instanceof HSLFFileHandler) {
+ handler = new XSLFFileHandler();
+
+ // and the other way around, use HWPF instead of XWPF and so forth
+ } else if(handler instanceof XWPFFileHandler) {
+ handler = new HWPFFileHandler();
+ } else if(handler instanceof XSSFFileHandler) {
+ handler = new HSSFFileHandler();
+ } else if(handler instanceof XSLFFileHandler) {
+ handler = new HSLFFileHandler();
+ } else {
+ // nothing matched => throw the exception to the outside
+ throw e;
+ }
+
+ // we found a different handler to try processing again
+ handleFile(inputFile);
+ }
+
+ private void handleFile(File inputFile) throws Exception {
+ try (InputStream newStream = new BufferedInputStream(new FileInputStream(inputFile), 64*1024)) {
+ handler.handleFile(newStream, inputFile.getAbsolutePath());
+ }
+ }
+}
diff --git a/integrationtest/src/test/java/org/apache/poi/stress/ExcInfo.java b/integrationtest/src/test/java/org/apache/poi/stress/ExcInfo.java
new file mode 100644
index 0000000000..c9241cb0f7
--- /dev/null
+++ b/integrationtest/src/test/java/org/apache/poi/stress/ExcInfo.java
@@ -0,0 +1,96 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import static org.junit.jupiter.api.Assertions.fail;
+
+public class ExcInfo {
+ private static final String IGNORED_TESTS = "IGNORE";
+
+ private String file;
+ private String tests;
+ private String handler;
+ private String password;
+ private Class<? extends Throwable> exClazz;
+ private String exMessage;
+
+ public String getFile() {
+ return file;
+ }
+
+ public void setFile(String file) {
+ this.file = file;
+ }
+
+ public String getTests() {
+ return tests;
+ }
+
+ public void setTests(String tests) {
+ this.tests = tests;
+ }
+
+ public String getHandler() {
+ return handler;
+ }
+
+ public void setHandler(String handler) {
+ this.handler = handler;
+ }
+
+ public String getPassword() {
+ return password;
+ }
+
+ public void setPassword(String password) {
+ this.password = password;
+ }
+
+ public Class<? extends Throwable> getExClazz() {
+ return exClazz;
+ }
+
+ @SuppressWarnings("unchecked")
+ public void setExClazz(String exClazz) {
+ try {
+ this.exClazz = (Class<? extends Exception>) Class.forName(exClazz);
+ } catch (ClassNotFoundException ex) {
+ fail(ex);
+ }
+ }
+
+ public String getExMessage() {
+ return exMessage;
+ }
+
+ public void setExMessage(String exMessage) {
+ this.exMessage = exMessage;
+ }
+
+ public boolean isMatch(String testName, String handler) {
+ return
+ (tests == null || tests.contains(testName) || IGNORED_TESTS.equals(tests)) &&
+ (this.handler == null || this.handler.contains(handler));
+ }
+
+ public boolean isValid(String testName, String handler) {
+ return
+ !IGNORED_TESTS.equals(tests) &&
+ (tests == null || tests.contains(testName)) &&
+ (this.handler == null || this.handler.contains(handler));
+ }
+}
diff --git a/integrationtest/src/test/java/org/apache/poi/stress/FileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/FileHandler.java
new file mode 100644
index 0000000000..62e5d81ced
--- /dev/null
+++ b/integrationtest/src/test/java/org/apache/poi/stress/FileHandler.java
@@ -0,0 +1,51 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import java.io.File;
+import java.io.InputStream;
+
+/**
+ * Base interface for the various file types that are
+ * used in the stress testing.
+ */
+public interface FileHandler {
+ /**
+ * The FileHandler receives a stream ready for reading the
+ * file and should handle the content that is provided and
+ * try to read and interpret the data.
+ *
+ * Closing is handled by the framework outside this call.
+ *
+ * @param stream The input stream to read the file from.
+ * @param path the relative path to the file
+ * @throws Exception If an error happens in the file-specific handler
+ */
+ void handleFile(InputStream stream, String path) throws Exception;
+
+ /**
+ * Ensures that extracting text from the given file
+ * is returning some text.
+ */
+ void handleExtracting(File file) throws Exception;
+
+ /**
+ * Allows to perform some additional work, e.g. run
+ * some of the example applications
+ */
+ void handleAdditional(File file) throws Exception;
+}
diff --git a/integrationtest/src/test/java/org/apache/poi/stress/FileHandlerFactory.java b/integrationtest/src/test/java/org/apache/poi/stress/FileHandlerFactory.java
new file mode 100644
index 0000000000..8be52b35a6
--- /dev/null
+++ b/integrationtest/src/test/java/org/apache/poi/stress/FileHandlerFactory.java
@@ -0,0 +1,120 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import java.util.HashMap;
+import java.util.Map;
+import java.util.regex.Pattern;
+
+public class FileHandlerFactory {
+ // map from patterns for mimetypes to the FileHandlers that should be able to
+ // work with that file
+ // use a Set<Pair> to have a defined order of applying the matches
+ private static final Map<Pattern, FileHandler> MIME_TYPES = new HashMap<>();
+ static {
+ ////////////////// Word
+
+ MIME_TYPES.put(Pattern.compile("application/vnd.ms-word.document.macroenabled.12"), new XWPFFileHandler());
+ MIME_TYPES.put(Pattern.compile("application/vnd.ms-word.template.macroenabled.12"), new XWPFFileHandler());
+
+ // application/msword
+ MIME_TYPES.put(Pattern.compile(".*msword.*"), new HWPFFileHandler());
+ // application/vnd.ms-word
+ MIME_TYPES.put(Pattern.compile(".*ms-word.*"), new HWPFFileHandler());
+
+ // application/vnd.openxmlformats-officedocument.wordprocessingml.document
+ MIME_TYPES.put(Pattern.compile(".*wordprocessingml.*"), new XWPFFileHandler());
+
+ ////////////////// Excel
+ MIME_TYPES.put(Pattern.compile("application/vnd.ms-excel.addin.macroEnabled.12"), new XSSFFileHandler());
+ MIME_TYPES.put(Pattern.compile("application/vnd.ms-excel.sheet.binary.macroEnabled.12"), new XSSFFileHandler());
+
+ // application/msexcel
+ MIME_TYPES.put(Pattern.compile(".*msexcel.*"), new HSSFFileHandler());
+ // application/vnd.ms-excel
+ MIME_TYPES.put(Pattern.compile(".*ms-excel.*"), new HSSFFileHandler());
+
+ // application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
+ MIME_TYPES.put(Pattern.compile(".*spreadsheetml.*"), new XSSFFileHandler());
+
+ ////////////////// Powerpoint
+
+ // application/vnd.ms-powerpoint
+ MIME_TYPES.put(Pattern.compile("application/vnd.ms-powerpoint"), new HSLFFileHandler());
+ // application/vnd.ms-officetheme
+ MIME_TYPES.put(Pattern.compile("application/vnd.ms-officetheme"), new HSLFFileHandler());
+
+ // application/vnd.openxmlformats-officedocument.presentationml.presentation
+ MIME_TYPES.put(Pattern.compile(".*presentationml.*"), new XSLFFileHandler());
+ // application/vnd.ms-powerpoint.presentation.macroenabled.12
+ MIME_TYPES.put(Pattern.compile("application/vnd.ms-powerpoint.presentation.macroenabled.12"), new XSLFFileHandler());
+ // application/vnd.ms-powerpoint.slideshow.macroenabled.12
+ MIME_TYPES.put(Pattern.compile("application/vnd.ms-powerpoint.slideshow.macroenabled.12"), new XSLFFileHandler());
+
+ ////////////////// Mail/TNEF
+
+ // application/vnd.ms-tnef
+ MIME_TYPES.put(Pattern.compile(".*ms-tnef.*"), new HMEFFileHandler());
+
+ // application/vnd.ms-outlook
+ MIME_TYPES.put(Pattern.compile("application/vnd.ms-outlook"), new HSMFFileHandler());
+
+ ////////////////// Visio
+
+ // application/vnd.visio
+ MIME_TYPES.put(Pattern.compile("application/vnd.visio.*"), new HDGFFileHandler());
+
+ // application/vnd.ms-visio.drawing
+ MIME_TYPES.put(Pattern.compile(".*vnd.ms-visio\\."), new XDGFFileHandler());
+
+ //application/vnd.ms-visio.viewer
+ MIME_TYPES.put(Pattern.compile(".*visio.*"), new HDGFFileHandler());
+
+
+ ////////////////// Publisher
+
+ // application/x-mspublisher
+ MIME_TYPES.put(Pattern.compile("application/x-mspublisher"), new HPBFFileHandler());
+
+
+ ////////////////// Others
+
+ // special type used by Tika
+ MIME_TYPES.put(Pattern.compile("application/x-tika-ooxml.*"), new OPCFileHandler());
+ // special type used by Tika
+ MIME_TYPES.put(Pattern.compile("application/x-tika-msoffice.*"), new POIFSFileHandler());
+
+ // application/x-tika-old-excel
+ MIME_TYPES.put(Pattern.compile("application/x-tika-old-excel"), new POIFSFileHandler());
+
+ // application/vnd.openxmlformats-officedocument.drawingml.chart+xml
+ // ?!MIME_TYPES.put(Pattern.compile(".*drawingml.*"), ".dwg");
+
+ // application/vnd.openxmlformats-officedocument.vmlDrawing
+ // ?!MIME_TYPES.put(Pattern.compile(".*vmlDrawing.*"), ".dwg");
+ }
+
+ public static FileHandler getHandler(String mimeType) {
+ for(Map.Entry<Pattern,FileHandler> entry : MIME_TYPES.entrySet()) {
+ if(entry.getKey().matcher(mimeType).matches()) {
+ return entry.getValue();
+ }
+ }
+
+ return null;
+ }
+}
diff --git a/integrationtest/src/test/java/org/apache/poi/stress/FileHandlerKnown.java b/integrationtest/src/test/java/org/apache/poi/stress/FileHandlerKnown.java
new file mode 100644
index 0000000000..aa3c827f1f
--- /dev/null
+++ b/integrationtest/src/test/java/org/apache/poi/stress/FileHandlerKnown.java
@@ -0,0 +1,60 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import java.io.File;
+import java.io.InputStream;
+import java.util.function.Supplier;
+
+@SuppressWarnings("unused")
+public enum FileHandlerKnown {
+ HDGF(HDGFFileHandler::new),
+ HMEF(HMEFFileHandler::new),
+ HPBF(HPBFFileHandler::new),
+ HPSF(HPSFFileHandler::new),
+ HSLF(HSLFFileHandler::new),
+ HSMF(HSMFFileHandler::new),
+ HSSF(HSSFFileHandler::new),
+ HWPF(HWPFFileHandler::new),
+ OPC(OPCFileHandler::new),
+ POIFS(POIFSFileHandler::new),
+ XDGF(XDGFFileHandler::new),
+ XSLF(XSLFFileHandler::new),
+ XSSFB(XSSFBFileHandler::new),
+ XSSF(XSSFFileHandler::new),
+ XWPF(XWPFFileHandler::new),
+ OWPF(OWPFFileHandler::new),
+ NULL(NullFileHandler::new)
+ ;
+
+ public final Supplier<FileHandler> fileHandler;
+
+ FileHandlerKnown(Supplier<FileHandler> fileHandler) {
+ this.fileHandler = fileHandler;
+ }
+
+ private static class NullFileHandler implements FileHandler {
+ @Override
+ public void handleFile(InputStream stream, String path) {}
+
+ @Override
+ public void handleExtracting(File file) {}
+
+ @Override
+ public void handleAdditional(File file) {}
+ }
+}
diff --git a/integrationtest/src/test/java/org/apache/poi/stress/HDGFFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/HDGFFileHandler.java
new file mode 100644
index 0000000000..1d2ca65d4c
--- /dev/null
+++ b/integrationtest/src/test/java/org/apache/poi/stress/HDGFFileHandler.java
@@ -0,0 +1,78 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.poi.hdgf.HDGFDiagram;
+import org.apache.poi.hdgf.extractor.VisioTextExtractor;
+import org.apache.poi.hdgf.streams.Stream;
+import org.apache.poi.hdgf.streams.TrailerStream;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.junit.jupiter.api.Test;
+
+class HDGFFileHandler extends POIFSFileHandler {
+ @Override
+ public void handleFile(InputStream stream, String path) throws IOException {
+ POIFSFileSystem poifs = new POIFSFileSystem(stream);
+ HDGFDiagram diagram = new HDGFDiagram(poifs);
+ Stream[] topLevelStreams = diagram.getTopLevelStreams();
+ assertNotNull(topLevelStreams);
+ for(Stream str : topLevelStreams) {
+ assertTrue(str.getPointer().getLength() >= 0);
+ }
+
+ TrailerStream trailerStream = diagram.getTrailerStream();
+ assertNotNull(trailerStream);
+ assertTrue(trailerStream.getPointer().getLength() >= 0);
+ diagram.close();
+ poifs.close();
+
+ // writing is not yet implemented... handlePOIDocument(diagram);
+ }
+
+ // a test-case to test this locally without executing the full TestAllFiles
+ @Override
+ @Test
+ void test() throws Exception {
+ File file = new File("test-data/diagram/44501.vsd");
+
+ InputStream stream = new FileInputStream(file);
+ try {
+ handleFile(stream, file.getPath());
+ } finally {
+ stream.close();
+ }
+
+ handleExtracting(file);
+
+ stream = new FileInputStream(file);
+ try {
+ try (VisioTextExtractor extractor = new VisioTextExtractor(stream)) {
+ assertNotNull(extractor.getText());
+ }
+ } finally {
+ stream.close();
+ }
+ }
+}
diff --git a/integrationtest/src/test/java/org/apache/poi/stress/HMEFFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/HMEFFileHandler.java
new file mode 100644
index 0000000000..34cf9ee5b9
--- /dev/null
+++ b/integrationtest/src/test/java/org/apache/poi/stress/HMEFFileHandler.java
@@ -0,0 +1,91 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.util.Arrays;
+
+import org.apache.poi.hmef.HMEFMessage;
+import org.apache.poi.hmef.attribute.MAPIAttribute;
+import org.apache.poi.hmef.attribute.TNEFAttribute;
+import org.apache.poi.hmef.attribute.TNEFProperty;
+import org.apache.poi.hsmf.datatypes.MAPIProperty;
+import org.apache.poi.poifs.filesystem.FileMagic;
+import org.apache.poi.util.LittleEndian;
+import org.junit.jupiter.api.Test;
+
+class HMEFFileHandler extends AbstractFileHandler {
+
+ @Override
+ public void handleExtracting(File file) throws Exception {
+ FileMagic fm = FileMagic.valueOf(file);
+ if (fm == FileMagic.OLE2) {
+ super.handleExtracting(file);
+ }
+ }
+
+ @Override
+ public void handleFile(InputStream stream, String path) throws Exception {
+ HMEFMessage msg = new HMEFMessage(stream);
+
+ // there are test-files that have no body...
+ String[] HTML_BODY = {
+ "Testing TNEF Message", "TNEF test message with attachments", "Test"
+ };
+ String bodyStr;
+ if(Arrays.asList(HTML_BODY).contains(msg.getSubject())) {
+ MAPIAttribute bodyHtml = msg.getMessageMAPIAttribute(MAPIProperty.BODY_HTML);
+ assertNotNull(bodyHtml);
+ bodyStr = new String(bodyHtml.getData(), getEncoding(msg));
+ } else {
+ bodyStr = msg.getBody();
+ }
+ assertNotNull( bodyStr, "Body is not set" );
+ assertNotNull( msg.getSubject(), "Subject is not set" );
+ }
+
+ // a test-case to test this locally without executing the full TestAllFiles
+ @Test
+ void test() throws Exception {
+ String path = "test-data/hmef/quick-winmail.dat";
+ try (InputStream stream = new FileInputStream(path)) {
+ handleFile(stream, path);
+ }
+ }
+
+ private String getEncoding(HMEFMessage tnefDat) {
+ TNEFAttribute oemCP = tnefDat.getMessageAttribute(TNEFProperty.ID_OEMCODEPAGE);
+ MAPIAttribute cpId = tnefDat.getMessageMAPIAttribute(MAPIProperty.INTERNET_CPID);
+ int codePage = 1252;
+ if (oemCP != null) {
+ codePage = LittleEndian.getInt(oemCP.getData());
+ } else if (cpId != null) {
+ codePage = LittleEndian.getInt(cpId.getData());
+ }
+ switch (codePage) {
+ // see http://en.wikipedia.org/wiki/Code_page for more
+ case 1252: return "Windows-1252";
+ case 20127: return "US-ASCII";
+ default: return "cp"+codePage;
+ }
+ }
+
+}
diff --git a/integrationtest/src/test/java/org/apache/poi/stress/HPBFFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/HPBFFileHandler.java
new file mode 100644
index 0000000000..419840d696
--- /dev/null
+++ b/integrationtest/src/test/java/org/apache/poi/stress/HPBFFileHandler.java
@@ -0,0 +1,67 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+
+import org.apache.poi.hpbf.HPBFDocument;
+import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.junit.jupiter.api.Test;
+
+class HPBFFileHandler extends POIFSFileHandler {
+ @Override
+ public void handleFile(InputStream stream, String path) throws Exception {
+ HPBFDocument pub = new HPBFDocument(new POIFSFileSystem(stream));
+ assertNotNull(pub.getEscherDelayStm());
+ assertNotNull(pub.getMainContents());
+ assertNotNull(pub.getQuillContents());
+
+ // writing is not yet implemented... handlePOIDocument(pub);
+ pub.close();
+ }
+
+ // a test-case to test this locally without executing the full TestAllFiles
+ @Override
+ @Test
+ void test() throws Exception {
+ File file = new File("test-data/publisher/SampleBrochure.pub");
+
+ InputStream stream = new FileInputStream(file);
+ try {
+ handleFile(stream, file.getPath());
+ } finally {
+ stream.close();
+ }
+
+ handleExtracting(file);
+
+ stream = new FileInputStream(file);
+ try {
+ try (PublisherTextExtractor extractor = new PublisherTextExtractor(stream)) {
+ assertNotNull(extractor.getText());
+ }
+ } finally {
+ stream.close();
+ }
+ }
+
+}
diff --git a/integrationtest/src/test/java/org/apache/poi/stress/HPSFFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/HPSFFileHandler.java
new file mode 100644
index 0000000000..088f77ef45
--- /dev/null
+++ b/integrationtest/src/test/java/org/apache/poi/stress/HPSFFileHandler.java
@@ -0,0 +1,132 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.fail;
+import static org.junit.jupiter.api.Assumptions.assumeFalse;
+
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.poi.examples.hpsf.CopyCompare;
+import org.apache.poi.hpsf.DocumentSummaryInformation;
+import org.apache.poi.hpsf.HPSFPropertiesOnlyDocument;
+import org.apache.poi.hpsf.PropertySet;
+import org.apache.poi.hpsf.SummaryInformation;
+import org.apache.poi.poifs.filesystem.DirectoryNode;
+import org.apache.poi.poifs.filesystem.DocumentInputStream;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.util.TempFile;
+import org.junit.jupiter.api.Test;
+
+class HPSFFileHandler extends POIFSFileHandler {
+ private static final String NL = System.getProperty("line.separator");
+
+ private static final ThreadLocal<File> copyOutput = ThreadLocal.withInitial(HPSFFileHandler::getTempFile);
+
+ static final Set<String> EXCLUDES_HANDLE_ADD = unmodifiableHashSet(
+ "spreadsheet/45290.xls",
+ "spreadsheet/46904.xls",
+ "spreadsheet/55982.xls",
+ "spreadsheet/testEXCEL_3.xls",
+ "spreadsheet/testEXCEL_4.xls",
+ "hpsf/Test_Humor-Generation.ppt",
+ "document/word2.doc"
+ );
+
+ private static Set<String> unmodifiableHashSet(String... a) {
+ return Collections.unmodifiableSet(new HashSet<>(Arrays.asList(a)));
+ }
+
+
+ @Override
+ public void handleFile(InputStream stream, String path) throws Exception {
+ POIFSFileSystem poifs = new POIFSFileSystem(stream);
+ HPSFPropertiesOnlyDocument hpsf = new HPSFPropertiesOnlyDocument(poifs);
+ DocumentSummaryInformation dsi = hpsf.getDocumentSummaryInformation();
+ SummaryInformation si = hpsf.getSummaryInformation();
+ boolean hasDSI = hasPropertyStream(poifs, DocumentSummaryInformation.DEFAULT_STREAM_NAME);
+ boolean hasSI = hasPropertyStream(poifs, SummaryInformation.DEFAULT_STREAM_NAME);
+
+ assertEquals(hasDSI, dsi != null);
+ assertEquals(hasSI, si != null);
+
+ handlePOIDocument(hpsf);
+ }
+
+ private static boolean hasPropertyStream(POIFSFileSystem poifs, String streamName) throws IOException {
+ DirectoryNode root = poifs.getRoot();
+ if (!root.hasEntry(streamName)) {
+ return false;
+ }
+ try (DocumentInputStream dis = root.createDocumentInputStream(streamName)) {
+ return PropertySet.isPropertySetStream(dis);
+ }
+ }
+
+ private static File getTempFile() {
+ File f = null;
+ try {
+ f = TempFile.createTempFile("hpsfCopy", "out");
+ } catch (IOException e) {
+ fail(e);
+ }
+ f.deleteOnExit();
+ return f;
+ }
+
+ @Override
+ public void handleAdditional(File file) throws Exception {
+ assumeFalse(EXCLUDES_HANDLE_ADD.contains(file.getParentFile().getName()+"/"+file.getName()));
+
+ ByteArrayOutputStream bos = new ByteArrayOutputStream();
+ PrintStream psNew = new PrintStream(bos, true, "ISO-8859-1");
+ CopyCompare.setOut(psNew);
+ CopyCompare.main(new String[]{file.getAbsolutePath(), copyOutput.get().getAbsolutePath()});
+ assertEquals("Equal" + NL, bos.toString(StandardCharsets.UTF_8.name()));
+ }
+
+
+ // a test-case to test this locally without executing the full TestAllFiles
+ @Override
+ @Test
+ @SuppressWarnings("java:S2699")
+ void test() throws Exception {
+ String path = "test-data/diagram/44501.vsd";
+ try (InputStream stream = new FileInputStream(path)) {
+ handleFile(stream, path);
+ }
+ }
+
+ // a test-case to test this locally without executing the full TestAllFiles
+ @Test
+ void testExtractor() {
+ File file = new File("test-data/hpsf/TestBug44375.xls");
+ assertDoesNotThrow(() -> handleExtracting(file));
+ }
+}
diff --git a/integrationtest/src/test/java/org/apache/poi/stress/HSLFFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/HSLFFileHandler.java
new file mode 100644
index 0000000000..fc82b6612d
--- /dev/null
+++ b/integrationtest/src/test/java/org/apache/poi/stress/HSLFFileHandler.java
@@ -0,0 +1,94 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
+import org.apache.poi.hslf.usermodel.HSLFSlideShow;
+import org.apache.poi.hslf.usermodel.HSLFSlideShowImpl;
+import org.junit.jupiter.api.Test;
+
+class HSLFFileHandler extends SlideShowHandler {
+
+ private static final Logger LOGGER = LogManager.getLogger(HSLFFileHandler.class);
+
+ @Override
+ public void handleFile(InputStream stream, String path) throws Exception {
+ HSLFSlideShowImpl slide = new HSLFSlideShowImpl(stream);
+ assertNotNull(slide.getCurrentUserAtom());
+ assertNotNull(slide.getEmbeddedObjects());
+ assertNotNull(slide.getUnderlyingBytes());
+ assertNotNull(slide.getPictureData());
+ org.apache.poi.hslf.record.Record[] records = slide.getRecords();
+ assertNotNull(records);
+ for(org.apache.poi.hslf.record.Record record : records) {
+ assertNotNull( record, "Found a record which was null" );
+ assertTrue(record.getRecordType() >= 0);
+ }
+
+ handlePOIDocument(slide);
+
+ HSLFSlideShow ss = new HSLFSlideShow(slide);
+ handleSlideShow(ss);
+ }
+
+ @Test
+ void testOne() throws Exception {
+ testOneFile(new File("test-data/slideshow/54880_chinese.ppt"));
+ }
+
+ // a test-case to test all .ppt files without executing the full TestAllFiles
+ @Override
+ @Test
+ void test() throws Exception {
+ File[] files = new File("test-data/slideshow/").listFiles((dir, name) -> name.endsWith(".ppt"));
+ assertNotNull(files);
+
+ System.out.println("Testing " + files.length + " files");
+
+ for(File file : files) {
+ try {
+ testOneFile(file);
+ } catch (Throwable e) {
+ LOGGER.atWarn().withThrowable(e).log("Failed to handle file {}", file);
+ }
+ }
+ }
+
+ private void testOneFile(File file) throws Exception {
+ System.out.println(file);
+
+ try (InputStream stream = new FileInputStream(file)) {
+ handleFile(stream, file.getPath());
+ }
+
+ handleExtracting(file);
+ }
+
+ public static void main(String[] args) throws Exception {
+ try (InputStream stream = new FileInputStream(args[0])) {
+ new HSLFFileHandler().handleFile(stream, args[0]);
+ }
+ }
+}
diff --git a/integrationtest/src/test/java/org/apache/poi/stress/HSMFFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/HSMFFileHandler.java
new file mode 100644
index 0000000000..8a21781bc4
--- /dev/null
+++ b/integrationtest/src/test/java/org/apache/poi/stress/HSMFFileHandler.java
@@ -0,0 +1,88 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+
+import org.apache.poi.hsmf.MAPIMessage;
+import org.apache.poi.hsmf.datatypes.AttachmentChunks;
+import org.apache.poi.hsmf.datatypes.DirectoryChunk;
+import org.junit.jupiter.api.Test;
+
+class HSMFFileHandler extends POIFSFileHandler {
+ @Override
+ public void handleFile(InputStream stream, String path) throws Exception {
+ MAPIMessage mapi = new MAPIMessage(stream);
+ assertNotNull(mapi.getAttachmentFiles());
+ assertNotNull(mapi.getDisplayBCC());
+ assertNotNull(mapi.getMessageDate());
+
+ AttachmentChunks[] attachments = mapi.getAttachmentFiles();
+
+ for(AttachmentChunks attachment : attachments) {
+
+ DirectoryChunk chunkDirectory = attachment.getAttachmentDirectory();
+ if(chunkDirectory != null) {
+ MAPIMessage attachmentMSG = chunkDirectory.getAsEmbeddedMessage();
+ assertNotNull(attachmentMSG);
+ String body = attachmentMSG.getTextBody();
+ assertNotNull(body);
+ }
+ }
+
+ /* => Writing isn't yet supported...
+ // write out the file
+ File file = TempFile.createTempFile("StressTest", ".msg");
+ writeToFile(mapi, file);
+
+ MAPIMessage read = new MAPIMessage(file.getAbsolutePath());
+ assertNotNull(read.getAttachmentFiles());
+ assertNotNull(read.getDisplayBCC());
+ assertNotNull(read.getMessageDate());
+ */
+
+ // writing is not yet supported... handlePOIDocument(mapi);
+
+ mapi.close();
+ }
+
+// private void writeToFile(MAPIMessage mapi, File file)
+// throws FileNotFoundException, IOException {
+// OutputStream stream = new FileOutputStream(file);
+// try {
+// mapi.write(stream);
+// } finally {
+// stream.close();
+// }
+// }
+
+ // a test-case to test this locally without executing the full TestAllFiles
+ @Override
+ @Test
+ void test() throws Exception {
+ File file = new File("test-data/hsmf/logsat.com_signatures_valid.msg");
+ try (InputStream stream = new FileInputStream(file)) {
+ handleFile(stream, file.getPath());
+ }
+
+ handleExtracting(file);
+ }
+} \ No newline at end of file
diff --git a/integrationtest/src/test/java/org/apache/poi/stress/HSSFFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/HSSFFileHandler.java
new file mode 100644
index 0000000000..9ee54b4790
--- /dev/null
+++ b/integrationtest/src/test/java/org/apache/poi/stress/HSSFFileHandler.java
@@ -0,0 +1,127 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.util.HashSet;
+import java.util.Set;
+
+import org.apache.poi.hssf.OldExcelFormatException;
+import org.apache.poi.hssf.dev.BiffViewer;
+import org.apache.poi.hssf.usermodel.HSSFOptimiser;
+import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.util.NullPrintStream;
+import org.junit.jupiter.api.Test;
+
+class HSSFFileHandler extends SpreadsheetHandler {
+ private final POIFSFileHandler delegate = new POIFSFileHandler();
+ @Override
+ public void handleFile(InputStream stream, String path) throws Exception {
+ HSSFWorkbook wb = new HSSFWorkbook(stream);
+ handleWorkbook(wb);
+
+ // TODO: some documents fail currently...
+ // Note - as of Bugzilla 48036 (svn r828244, r828247) POI is capable of evaluating
+ // IntersectionPtg. However it is still not capable of parsing it.
+ // So FormulaEvalTestData.xls now contains a few formulas that produce errors here.
+ //HSSFFormulaEvaluator evaluator = new HSSFFormulaEvaluator(wb);
+ //evaluator.evaluateAll();
+
+ delegate.handlePOIDocument(wb);
+
+ // also try to see if some of the Records behave incorrectly
+ // TODO: still fails on some records... RecordsStresser.handleWorkbook(wb);
+
+ HSSFOptimiser.optimiseCellStyles(wb);
+ for(Sheet sheet : wb) {
+ for (Row row : sheet) {
+ for (Cell cell : row) {
+ assertNotNull(cell.getCellStyle());
+ }
+ }
+ }
+
+ HSSFOptimiser.optimiseFonts(wb);
+ }
+
+ private static final Set<String> EXPECTED_ADDITIONAL_FAILURES = new HashSet<>();
+ static {
+ // encrypted
+ EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/35897-type4.xls");
+ EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/xor-encryption-abc.xls");
+ EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/password.xls");
+ // broken files
+ EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/43493.xls");
+ // TODO: ok to ignore?
+ EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/50833.xls");
+ EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/51832.xls");
+ EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/XRefCalc.xls");
+ EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/61300.xls");
+ }
+
+ @Override
+ public void handleAdditional(File file) throws Exception {
+ // redirect stdout as the examples often write lots of text
+ PrintStream oldOut = System.out;
+ String fileWithParent = file.getParentFile().getName() + "/" + file.getName();
+ try {
+ System.setOut(new NullPrintStream());
+
+ BiffViewer.main(new String[]{file.getAbsolutePath()});
+
+ assertFalse( EXPECTED_ADDITIONAL_FAILURES.contains(fileWithParent), "Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!" );
+ } catch (OldExcelFormatException e) {
+ // old excel formats are not supported here
+ } catch (RuntimeException e) {
+ if(!EXPECTED_ADDITIONAL_FAILURES.contains(fileWithParent)) {
+ throw e;
+ }
+ } finally {
+ System.setOut(oldOut);
+ }
+ }
+
+ // a test-case to test this locally without executing the full TestAllFiles
+ @Test
+ void test() throws Exception {
+ File file = new File("test-data/spreadsheet/49219.xls");
+
+ try (InputStream stream = new FileInputStream(file)) {
+ handleFile(stream, file.getPath());
+ }
+
+ handleExtracting(file);
+
+ handleAdditional(file);
+ }
+
+ // a test-case to test this locally without executing the full TestAllFiles
+ @Test
+ @SuppressWarnings("java:S2699")
+ void testExtractor() throws Exception {
+ handleExtracting(new File("test-data/spreadsheet/BOOK_in_capitals.xls"));
+ }
+} \ No newline at end of file
diff --git a/integrationtest/src/test/java/org/apache/poi/stress/HWPFFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/HWPFFileHandler.java
new file mode 100644
index 0000000000..da07a9b87b
--- /dev/null
+++ b/integrationtest/src/test/java/org/apache/poi/stress/HWPFFileHandler.java
@@ -0,0 +1,72 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.util.List;
+
+import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.hwpf.extractor.WordExtractor;
+import org.apache.poi.hwpf.model.PicturesTable;
+import org.apache.poi.hwpf.usermodel.Picture;
+import org.junit.jupiter.api.Test;
+
+class HWPFFileHandler extends POIFSFileHandler {
+ @Override
+ public void handleFile(InputStream stream, String path) throws Exception {
+ HWPFDocument doc = new HWPFDocument(stream);
+ assertNotNull(doc.getBookmarks());
+ assertNotNull(doc.getCharacterTable());
+ assertNotNull(doc.getEndnotes());
+
+ PicturesTable picturesTable = doc.getPicturesTable();
+ List<Picture> pictures = picturesTable.getAllPictures();
+ assertNotNull(pictures);
+
+ handlePOIDocument(doc);
+ }
+
+ // a test-case to test this locally without executing the full TestAllFiles
+ @Override
+ @Test
+ @SuppressWarnings("java:S2699")
+ void test() throws Exception {
+ File file = new File("test-data/document/52117.doc");
+
+ try (InputStream stream = new FileInputStream(file)) {
+ handleFile(stream, file.getPath());
+ }
+
+ handleExtracting(file);
+
+ try (FileInputStream stream = new FileInputStream(file);
+ WordExtractor extractor = new WordExtractor(stream)) {
+ assertNotNull(extractor.getText());
+ }
+ }
+
+ @Test
+ void testExtractingOld() {
+ File file = new File("test-data/document/52117.doc");
+ assertDoesNotThrow(() -> handleExtracting(file));
+ }
+}
diff --git a/integrationtest/src/test/java/org/apache/poi/stress/HeapDump.java b/integrationtest/src/test/java/org/apache/poi/stress/HeapDump.java
new file mode 100644
index 0000000000..7928975f56
--- /dev/null
+++ b/integrationtest/src/test/java/org/apache/poi/stress/HeapDump.java
@@ -0,0 +1,97 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import java.io.IOException;
+import java.lang.management.ManagementFactory;
+import java.lang.reflect.InvocationTargetException;
+import java.lang.reflect.Method;
+
+import com.sun.management.HotSpotDiagnosticMXBean;
+import org.apache.poi.util.SuppressForbidden;
+
+@SuppressForbidden("class only exists for manual tests in XSSFFileHandler")
+public class HeapDump {
+ // This is the name of the HotSpot Diagnostic MBean
+ private static final String HOTSPOT_BEAN_NAME =
+ "com.sun.management:type=HotSpotDiagnostic";
+
+ // field to store the hotspot diagnostic MBean
+ private static volatile HotSpotDiagnosticMXBean hotspotMBean;
+
+ /**
+ * Call this method from your application whenever you
+ * want to dump the heap snapshot into a file.
+ *
+ * @param fileName name of the heap dump file
+ * @param live flag that tells whether to dump
+ * only the live objects
+ */
+ public static void dumpHeap(String fileName, boolean live) throws IOException {
+ try {
+ if (isIbmVm()) {
+ dumpHeapJ9(fileName);
+ } else {
+
+ // initialize hotspot diagnostic MBean
+ initHotspotMBean();
+ dumpHeapHotSpot(fileName, live);
+ }
+ } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException | ClassNotFoundException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ // initialize the hotspot diagnostic MBean field
+ private static void initHotspotMBean() throws IOException {
+ if (hotspotMBean == null) {
+ synchronized (HeapDump.class) {
+ if (hotspotMBean == null) {
+ hotspotMBean = getHotspotMBean();
+ }
+ }
+ }
+ }
+
+ // get the hotspot diagnostic MBean from the platform MBean server
+ private static HotSpotDiagnosticMXBean getHotspotMBean() throws IOException {
+ return ManagementFactory.newPlatformMXBeanProxy(ManagementFactory.getPlatformMBeanServer(),
+ HOTSPOT_BEAN_NAME, HotSpotDiagnosticMXBean.class);
+ }
+
+ private static boolean isIbmVm() {
+ try {
+ Class.forName("com.ibm.jvm.Dump");
+ return true;
+ } catch (ClassNotFoundException e) {
+ return false;
+ }
+ }
+
+ private static void dumpHeapJ9(String fileName) throws ClassNotFoundException, NoSuchMethodException,
+ InvocationTargetException, IllegalAccessException {
+ Class<?> dump = Class.forName("com.ibm.jvm.Dump");
+ Method heapDumpToFile = dump.getMethod("heapDumpToFile", String.class);
+ heapDumpToFile.invoke(dump, fileName);
+ }
+
+ private static void dumpHeapHotSpot(String fileName, boolean live) throws NoSuchMethodException,
+ InvocationTargetException, IllegalAccessException {
+ Method dumpHeap = hotspotMBean.getClass().getMethod("dumpHeap", String.class, boolean.class);
+ dumpHeap.invoke(hotspotMBean, fileName, live);
+ }
+}
diff --git a/integrationtest/src/test/java/org/apache/poi/stress/OPCFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/OPCFileHandler.java
new file mode 100644
index 0000000000..c4f5485806
--- /dev/null
+++ b/integrationtest/src/test/java/org/apache/poi/stress/OPCFileHandler.java
@@ -0,0 +1,73 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+import java.io.PushbackInputStream;
+
+import org.apache.poi.openxml4j.opc.ContentTypes;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.xwpf.usermodel.XWPFRelation;
+import org.junit.jupiter.api.Test;
+
+class OPCFileHandler extends AbstractFileHandler {
+ @Override
+ public void handleFile(InputStream stream, String path) throws Exception {
+ // ignore password protected files
+ if (POIXMLDocumentHandler.isEncrypted(stream)) return;
+
+ OPCPackage p = OPCPackage.open(stream);
+
+ for (PackagePart part : p.getParts()) {
+ if (part.getPartName().toString().equals("/docProps/core.xml")) {
+ assertEquals(ContentTypes.CORE_PROPERTIES_PART, part.getContentType());
+ }
+ if (part.getPartName().toString().equals("/word/document.xml")) {
+ assertTrue( XWPFRelation.DOCUMENT.getContentType().equals(part.getContentType()) ||
+ XWPFRelation.MACRO_DOCUMENT.getContentType().equals(part.getContentType()) ||
+ XWPFRelation.TEMPLATE.getContentType().equals(part.getContentType()), "Expected one of " + XWPFRelation.MACRO_DOCUMENT + ", " + XWPFRelation.DOCUMENT + ", " + XWPFRelation.TEMPLATE +
+ ", but had " + part.getContentType() );
+ }
+ if (part.getPartName().toString().equals("/word/theme/theme1.xml")) {
+ assertEquals(XWPFRelation.THEME.getContentType(), part.getContentType());
+ }
+ }
+ }
+
+ @Override
+ public void handleExtracting(File file) {
+ // text-extraction is not possible currently for these types of files
+ }
+
+ // a test-case to test this locally without executing the full TestAllFiles
+ @Test
+ void test() throws Exception {
+ File file = new File("test-data/diagram/test.vsdx");
+
+ try (InputStream stream = new PushbackInputStream(new FileInputStream(file), 100000)) {
+ handleFile(stream, file.getPath());
+ }
+
+ handleExtracting(file);
+ }
+}
diff --git a/integrationtest/src/test/java/org/apache/poi/stress/OWPFFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/OWPFFileHandler.java
new file mode 100644
index 0000000000..7ad20d0585
--- /dev/null
+++ b/integrationtest/src/test/java/org/apache/poi/stress/OWPFFileHandler.java
@@ -0,0 +1,65 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+
+import org.apache.poi.hwpf.HWPFOldDocument;
+import org.apache.poi.hwpf.extractor.WordExtractor;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.junit.jupiter.api.Test;
+
+public class OWPFFileHandler extends POIFSFileHandler {
+ @Override
+ public void handleFile(InputStream stream, String path) throws Exception {
+ try (POIFSFileSystem poifs = new POIFSFileSystem(stream)) {
+ HWPFOldDocument doc = new HWPFOldDocument(poifs);
+ assertNotNull(doc.getOldFontTable());
+ assertNotNull(doc.getCharacterTable());
+ }
+ }
+
+ // a test-case to test this locally without executing the full TestAllFiles
+ @Override
+ @Test
+ @SuppressWarnings("java:S2699")
+ public void test() throws Exception {
+ File file = new File("test-data/document/52117.doc");
+
+ try (InputStream stream = new FileInputStream(file)) {
+ handleFile(stream, file.getPath());
+ }
+
+ handleExtracting(file);
+
+ try (FileInputStream stream = new FileInputStream(file);
+ WordExtractor extractor = new WordExtractor(stream)) {
+ assertNotNull(extractor.getText());
+ }
+ }
+
+ @Test
+ public void testExtractingOld() {
+ File file = new File("test-data/document/52117.doc");
+ assertDoesNotThrow(() -> handleExtracting(file));
+ }
+}
diff --git a/integrationtest/src/test/java/org/apache/poi/stress/POIFSFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/POIFSFileHandler.java
new file mode 100644
index 0000000000..b92f91f5f9
--- /dev/null
+++ b/integrationtest/src/test/java/org/apache/poi/stress/POIFSFileHandler.java
@@ -0,0 +1,81 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.poi.POIDocument;
+import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.junit.jupiter.api.Test;
+
+class POIFSFileHandler extends AbstractFileHandler {
+
+ @Override
+ public void handleFile(InputStream stream, String path) throws Exception {
+ try (POIFSFileSystem fs = new POIFSFileSystem(stream)) {
+ handlePOIFSFileSystem(fs);
+ handleHPSFProperties(fs);
+ }
+ }
+
+ private void handleHPSFProperties(POIFSFileSystem fs) throws IOException {
+ try (HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs)) {
+ // can be null
+ ext.getDocSummaryInformation();
+ ext.getSummaryInformation();
+
+ assertNotNull(ext.getDocumentSummaryInformationText());
+ assertNotNull(ext.getSummaryInformationText());
+ assertNotNull(ext.getText());
+ }
+ }
+
+ private void handlePOIFSFileSystem(POIFSFileSystem fs) {
+ assertNotNull(fs);
+ assertNotNull(fs.getRoot());
+ }
+
+ protected void handlePOIDocument(POIDocument doc) throws Exception {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ doc.write(out);
+
+ ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
+ POIFSFileSystem fs = new POIFSFileSystem(in);
+ handlePOIFSFileSystem(fs);
+ fs.close();
+ }
+
+ // a test-case to test this locally without executing the full TestAllFiles
+ @Test
+ void test() throws Exception {
+ File file = new File("test-data/poifs/Notes.ole2");
+
+ try (InputStream stream = new FileInputStream(file)) {
+ handleFile(stream, file.getPath());
+ }
+
+ //handleExtracting(file);
+ }
+}
diff --git a/integrationtest/src/test/java/org/apache/poi/stress/POIXMLDocumentHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/POIXMLDocumentHandler.java
new file mode 100644
index 0000000000..930c904a77
--- /dev/null
+++ b/integrationtest/src/test/java/org/apache/poi/stress/POIXMLDocumentHandler.java
@@ -0,0 +1,72 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.poi.ooxml.POIXMLDocument;
+import org.apache.poi.poifs.crypt.Decryptor;
+import org.apache.poi.poifs.filesystem.FileMagic;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.xmlbeans.XmlCursor;
+import org.apache.xmlbeans.XmlObject;
+
+public final class POIXMLDocumentHandler {
+ protected void handlePOIXMLDocument(POIXMLDocument doc) throws Exception {
+ assertNotNull(doc.getAllEmbeddedParts());
+ assertNotNull(doc.getPackage());
+ assertNotNull(doc.getPackagePart());
+ assertNotNull(doc.getProperties());
+ assertNotNull(doc.getRelations());
+ }
+
+ protected static boolean isEncrypted(InputStream stream) throws IOException {
+ if (FileMagic.valueOf(stream) == FileMagic.OLE2) {
+ try (POIFSFileSystem poifs = new POIFSFileSystem(stream)) {
+ if (poifs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) {
+ return true;
+ }
+ }
+ throw new IOException("Wrong file format or file extension for OO XML file");
+ }
+ return false;
+ }
+
+ /**
+ * Recurse through the document and convert all elements so they are available in the ooxml-lite jar.
+ * This method only makes sense for hierarchical documents like .docx.
+ * If the document is split up in different parts like in .pptx, each part needs to be provided.
+ *
+ * @param base the entry point
+ */
+ protected static void cursorRecursive(XmlObject base) {
+ XmlCursor cur = base.newCursor();
+ try {
+ if (!cur.toFirstChild()) {
+ return;
+ }
+ do {
+ cursorRecursive(cur.getObject());
+ } while (cur.toNextSibling());
+ } finally {
+ cur.dispose();
+ }
+ }
+}
diff --git a/integrationtest/src/test/java/org/apache/poi/stress/SlideShowHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/SlideShowHandler.java
new file mode 100644
index 0000000000..9a1defbf21
--- /dev/null
+++ b/integrationtest/src/test/java/org/apache/poi/stress/SlideShowHandler.java
@@ -0,0 +1,168 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+
+import java.awt.Dimension;
+import java.awt.Graphics2D;
+import java.awt.RenderingHints;
+import java.awt.image.BufferedImage;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.lang.ref.WeakReference;
+
+import org.apache.poi.sl.draw.Drawable;
+import org.apache.poi.sl.usermodel.GroupShape;
+import org.apache.poi.sl.usermodel.Notes;
+import org.apache.poi.sl.usermodel.PictureData;
+import org.apache.poi.sl.usermodel.Shape;
+import org.apache.poi.sl.usermodel.SimpleShape;
+import org.apache.poi.sl.usermodel.Slide;
+import org.apache.poi.sl.usermodel.SlideShow;
+import org.apache.poi.sl.usermodel.SlideShowFactory;
+import org.apache.poi.sl.usermodel.TextParagraph;
+import org.apache.poi.sl.usermodel.TextRun;
+import org.apache.poi.sl.usermodel.TextShape;
+import org.junit.jupiter.api.Assumptions;
+import org.junit.platform.commons.util.ExceptionUtils;
+
+public abstract class SlideShowHandler extends POIFSFileHandler {
+ public void handleSlideShow(SlideShow<?,?> ss) throws IOException {
+ renderSlides(ss);
+
+ readContent(ss);
+ readPictures(ss);
+
+ // write out the file
+ ByteArrayOutputStream out = writeToArray(ss);
+
+ readContent(ss);
+
+ // read in the written file
+ try (SlideShow<?, ?> read = SlideShowFactory.create(new ByteArrayInputStream(out.toByteArray()))) {
+ assertNotNull(read);
+ readContent(read);
+ }
+ }
+
+ private ByteArrayOutputStream writeToArray(SlideShow<?,?> ss) throws IOException {
+ ByteArrayOutputStream stream = new ByteArrayOutputStream();
+ try {
+ ss.write(stream);
+ } finally {
+ stream.close();
+ }
+
+ return stream;
+ }
+
+
+ private void readContent(SlideShow<?,?> ss) {
+ for (Slide<?,?> s : ss.getSlides()) {
+ s.getTitle();
+
+ for (Shape<?,?> shape : s) {
+ readShapes(shape);
+ }
+
+ Notes<?, ?> notes = s.getNotes();
+ if(notes != null) {
+ for (Shape<?, ?> shape : notes) {
+ readShapes(shape);
+ }
+ }
+
+ for (Shape<?,?> shape : s.getMasterSheet()) {
+ readShapes(shape);
+ }
+ }
+ }
+
+ private void readShapes(Shape<?,?> s) {
+ // recursively walk group-shapes
+ if(s instanceof GroupShape) {
+ GroupShape<? extends Shape<?,?>, ?> shapes = (GroupShape<? extends Shape<?,?>, ?>) s;
+ for (Shape<? extends Shape<?,?>, ?> shape : shapes) {
+ readShapes(shape);
+ }
+ }
+
+ if(s instanceof SimpleShape) {
+ SimpleShape<?, ?> simpleShape = (SimpleShape<?, ?>) s;
+
+ simpleShape.getFillColor();
+ simpleShape.getFillStyle();
+ simpleShape.getStrokeStyle();
+ simpleShape.getLineDecoration();
+ }
+
+ readText(s);
+ }
+
+ private void readText(Shape<?,?> s) {
+ if (s instanceof TextShape) {
+ for (TextParagraph<?,?,?> tp : (TextShape<?,?>)s) {
+ for (TextRun tr : tp) {
+ tr.getRawText();
+ }
+ }
+ }
+ }
+
+ private void readPictures(SlideShow<?,?> ss) {
+ for (PictureData pd : ss.getPictureData()) {
+ Dimension dim = pd.getImageDimension();
+ assertTrue( dim.getHeight() >= 0, "Expecting a valid height, but had an image with height: " + dim.getHeight() );
+ assertTrue( dim.getWidth() >= 0, "Expecting a valid width, but had an image with width: " + dim.getWidth() );
+ }
+ }
+
+ private void renderSlides(SlideShow<?,?> ss) {
+ Dimension pgSize = ss.getPageSize();
+
+ for (Slide<?,?> s : ss.getSlides()) {
+ BufferedImage img = new BufferedImage(pgSize.width, pgSize.height, BufferedImage.TYPE_INT_ARGB);
+ Graphics2D graphics = img.createGraphics();
+
+ // default rendering options
+ graphics.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON);
+ graphics.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY);
+ graphics.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC);
+ graphics.setRenderingHint(RenderingHints.KEY_FRACTIONALMETRICS, RenderingHints.VALUE_FRACTIONALMETRICS_ON);
+ graphics.setRenderingHint(Drawable.BUFFERED_IMAGE, new WeakReference<>(img));
+
+ try {
+ // draw stuff
+ s.draw(graphics);
+ } catch (ArrayIndexOutOfBoundsException e) {
+ // We saw exceptions with JDK 8 on Windows in the Jenkins CI which
+ // seem to only be triggered by some font (maybe Calibri?!)
+ // We cannot avoid this, so let's try to not make the tests fail in this case
+ if (!"-1".equals(e.getMessage()) ||
+ !ExceptionUtils.readStackTrace(e).contains("ExtendedTextSourceLabel.getJustificationInfos")) {
+ throw e;
+ }
+ }
+
+ graphics.dispose();
+ img.flush();
+ }
+ }
+} \ No newline at end of file
diff --git a/integrationtest/src/test/java/org/apache/poi/stress/SpreadsheetHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/SpreadsheetHandler.java
new file mode 100644
index 0000000000..dcee603bf2
--- /dev/null
+++ b/integrationtest/src/test/java/org/apache/poi/stress/SpreadsheetHandler.java
@@ -0,0 +1,157 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+
+import org.apache.poi.ss.extractor.EmbeddedData;
+import org.apache.poi.ss.extractor.EmbeddedExtractor;
+import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.Name;
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.ss.usermodel.Workbook;
+import org.apache.poi.ss.usermodel.WorkbookFactory;
+import org.apache.poi.util.RecordFormatException;
+import org.apache.poi.xssf.usermodel.XSSFChartSheet;
+
+public abstract class SpreadsheetHandler extends AbstractFileHandler {
+ public void handleWorkbook(Workbook wb) throws IOException {
+ // try to access some of the content
+ readContent(wb);
+
+ // write out the file
+ writeToArray(wb);
+
+ // access some more content (we had cases where writing corrupts the data in memory)
+ readContent(wb);
+
+ // write once more
+ ByteArrayOutputStream out = writeToArray(wb);
+
+ // read in the written file
+ Workbook read = WorkbookFactory.create(new ByteArrayInputStream(out.toByteArray()));
+
+ assertNotNull(read);
+
+ readContent(read);
+
+ extractEmbedded(read);
+
+ modifyContent(read);
+
+ read.close();
+ }
+
+ private ByteArrayOutputStream writeToArray(Workbook wb) throws IOException {
+ ByteArrayOutputStream stream = new ByteArrayOutputStream();
+ try {
+ wb.write(stream);
+ } finally {
+ stream.close();
+ }
+
+ return stream;
+ }
+
+ private void readContent(Workbook wb) {
+ for(int i = 0;i < wb.getNumberOfSheets();i++) {
+ Sheet sheet = wb.getSheetAt(i);
+ assertNotNull(wb.getSheet(sheet.getSheetName()));
+ sheet.groupColumn((short) 4, (short) 5);
+ sheet.setColumnGroupCollapsed(4, true);
+ sheet.setColumnGroupCollapsed(4, false);
+
+ // don't do this for very large sheets as it will take a long time
+ if(sheet.getPhysicalNumberOfRows() > 1000) {
+ continue;
+ }
+
+ for(Row row : sheet) {
+ for(Cell cell : row) {
+ assertNotNull(cell.toString());
+ }
+ }
+ }
+
+ for (Name name : wb.getAllNames()) {
+ // this sometimes caused exceptions
+ if(!name.isFunctionName()) {
+ name.getRefersToFormula();
+ }
+ }
+ }
+
+ private void extractEmbedded(Workbook wb) throws IOException {
+ EmbeddedExtractor ee = new EmbeddedExtractor();
+
+ for (Sheet s : wb) {
+ for (EmbeddedData ed : ee.extractAll(s)) {
+ assertNotNull(ed.getFilename());
+ assertNotNull(ed.getEmbeddedData());
+ assertNotNull(ed.getShape());
+ }
+ }
+ }
+
+ private void modifyContent(Workbook wb) {
+ /* a number of file fail because of various things: udf, unimplemented functions, ...
+ we would need quite a list of excludes and the large regression tests would probably
+ take a lot longer to run...
+ try {
+ // try to re-compute all formulas to find cases where parsing fails
+ wb.getCreationHelper().createFormulaEvaluator().evaluateAll();
+ } catch (RuntimeException e) {
+ // only allow a specific exception which indicates that an external
+ // reference was not found
+ if(!e.getMessage().contains("Could not resolve external workbook name")) {
+ throw e;
+ }
+
+ }*/
+
+ for (int i=wb.getNumberOfSheets()-1; i>=0; i--) {
+ if(wb.getSheetAt(i) instanceof XSSFChartSheet) {
+ // clone for chart-sheets is not supported
+ continue;
+ }
+
+ try {
+ wb.cloneSheet(i);
+ } catch (RecordFormatException e) {
+ if (e.getCause() instanceof CloneNotSupportedException) {
+ // ignore me
+ continue;
+ }
+ throw e;
+ } catch (RuntimeException e) {
+ if ("Could not find 'internal references' EXTERNALBOOK".equals(e.getMessage()) ||
+ "CountryRecord not found".equals(e.getMessage()) ||
+ "CountryRecord or SSTRecord not found".equals(e.getMessage()) ||
+ "Cannot add more than 65535 shapes".equals(e.getMessage()) ) {
+ // ignore these here for now
+ continue;
+ }
+ throw e;
+ }
+ }
+ }
+} \ No newline at end of file
diff --git a/integrationtest/src/test/java/org/apache/poi/stress/StressMap.java b/integrationtest/src/test/java/org/apache/poi/stress/StressMap.java
new file mode 100644
index 0000000000..f69fbfbf4f
--- /dev/null
+++ b/integrationtest/src/test/java/org/apache/poi/stress/StressMap.java
@@ -0,0 +1,153 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import java.io.File;
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.function.BiConsumer;
+import java.util.stream.Collectors;
+import java.util.stream.Stream;
+import java.util.stream.StreamSupport;
+
+import org.apache.commons.collections4.MultiValuedMap;
+import org.apache.commons.collections4.multimap.ArrayListValuedHashMap;
+import org.apache.poi.ss.usermodel.Cell;
+import org.apache.poi.ss.usermodel.CellType;
+import org.apache.poi.ss.usermodel.Row;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.ss.usermodel.Workbook;
+import org.apache.poi.ss.usermodel.WorkbookFactory;
+
+public class StressMap {
+ private final MultiValuedMap<String, ExcInfo> exMap = new ArrayListValuedHashMap<>();
+ private final Map<String,String> handlerMap = new LinkedHashMap<>();
+
+
+ public void load(File mapFile) throws IOException {
+ try (Workbook wb = WorkbookFactory.create(mapFile)) {
+ readExMap(wb.getSheet("Exceptions"));
+ readHandlerMap(wb.getSheet("Handlers"));
+ }
+ }
+
+ public List<FileHandlerKnown> getHandler(String file) {
+ // ... failures/handlers lookup doesn't work on windows otherwise
+ final String uniFile = file.replace('\\', '/');
+
+ String firstHandler = handlerMap.entrySet().stream()
+ .filter(me -> uniFile.endsWith(me.getKey()))
+ .map(Map.Entry::getValue).findFirst().orElse("NULL");
+
+ return Stream.of(firstHandler, secondHandler(firstHandler))
+ .filter(h -> !"NULL".equals(h))
+ .map(FileHandlerKnown::valueOf)
+ .collect(Collectors.toList());
+ }
+
+ public ExcInfo getExcInfo(String file, String testName, FileHandlerKnown handler) {
+ // ... failures/handlers lookup doesn't work on windows otherwise
+ final String uniFile = file.replace('\\', '/');
+
+ return exMap.get(uniFile).stream()
+ .filter(e -> e.isMatch(testName, handler.name()))
+ .findFirst().orElse(null);
+ }
+
+ public void readHandlerMap(Sheet sh) {
+ if (sh == null) {
+ return;
+ }
+
+ handlerMap.clear();
+
+ boolean IGNORE_SCRATCHPAD = Boolean.getBoolean("scratchpad.ignore");
+ boolean isFirst = true;
+ for (Row row : sh) {
+ if (isFirst) {
+ isFirst = false;
+ continue;
+ }
+ Cell cell = row.getCell(2);
+ if (IGNORE_SCRATCHPAD || cell == null || cell.getCellType() != CellType.STRING) {
+ cell = row.getCell(1);
+ }
+ handlerMap.put(row.getCell(0).getStringCellValue(), cell.getStringCellValue());
+ }
+ }
+
+
+ public void readExMap(Sheet sh) {
+ if (sh == null) {
+ return;
+ }
+
+ exMap.clear();
+
+ Iterator<Row> iter = sh.iterator();
+ List<BiConsumer<ExcInfo,String>> cols = initCols(iter.next());
+
+ while (iter.hasNext()) {
+ ExcInfo info = new ExcInfo();
+ for (Cell cell : iter.next()) {
+ if (cell.getCellType() == CellType.STRING) {
+ cols.get(cell.getColumnIndex()).accept(info, cell.getStringCellValue());
+ }
+ }
+ exMap.put(info.getFile(), info);
+ }
+ }
+
+ private static List<BiConsumer<ExcInfo,String>> initCols(Row row) {
+ Map<String,BiConsumer<ExcInfo,String>> m = new HashMap<>();
+ m.put("File", ExcInfo::setFile);
+ m.put("Tests", ExcInfo::setTests);
+ m.put("Handler", ExcInfo::setHandler);
+ m.put("Password", ExcInfo::setPassword);
+ m.put("Exception Class", ExcInfo::setExClazz);
+ m.put("Exception Message", ExcInfo::setExMessage);
+
+ return StreamSupport
+ .stream(row.spliterator(), false)
+ .map(Cell::getStringCellValue)
+ .map(v -> m.getOrDefault(v, (e,s) -> {}))
+ .collect(Collectors.toList());
+ }
+
+ private static String secondHandler(String handlerStr) {
+ switch (handlerStr) {
+ case "XSSF":
+ case "XWPF":
+ case "XSLF":
+ case "XDGF":
+ return "OPC";
+ case "HSSF":
+ case "HWPF":
+ case "HSLF":
+ case "HDGF":
+ case "HSMF":
+ case "HBPF":
+ return "HPSF";
+ default:
+ return "NULL";
+ }
+ }
+}
diff --git a/integrationtest/src/test/java/org/apache/poi/stress/TestAllFiles.java b/integrationtest/src/test/java/org/apache/poi/stress/TestAllFiles.java
new file mode 100644
index 0000000000..55777d12ee
--- /dev/null
+++ b/integrationtest/src/test/java/org/apache/poi/stress/TestAllFiles.java
@@ -0,0 +1,201 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+
+import static org.junit.jupiter.api.Assertions.assertDoesNotThrow;
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertThrows;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assertions.fail;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Stream;
+
+import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
+import org.apache.tools.ant.DirectoryScanner;
+import org.junit.jupiter.api.function.Executable;
+import org.junit.jupiter.api.parallel.Execution;
+import org.junit.jupiter.api.parallel.ExecutionMode;
+import org.junit.jupiter.params.ParameterizedTest;
+import org.junit.jupiter.params.provider.Arguments;
+import org.junit.jupiter.params.provider.MethodSource;
+import org.opentest4j.AssertionFailedError;
+
+/**
+ * This is an integration test which performs various actions on all stored test-files and tries
+ * to reveal problems which are introduced, but not covered (yet) by unit tests.
+ *
+ * This test looks for any file under the test-data directory and tries to do some useful
+ * processing with it based on it's type.
+ *
+ * The test is implemented as a junit {@link ParameterizedTest} test, which leads
+ * to one test-method call for each file (currently around 950 files are handled).
+ *
+ * There is a a mapping of extension to implementations of the interface
+ * {@link FileHandler} which defines how the file is loaded and which actions are
+ * tried with the file.
+ *
+ * The test can be expanded by adding more actions to the FileHandlers, this automatically
+ * applies the action to any such file in our test-data repository.
+ *
+ * There is also a list of files that should actually fail.
+ *
+ * Note: It is also a test-failure if a file that is expected to fail now actually works,
+ * i.e. if a bug was fixed in POI itself, the file should be removed from the expected-failures
+ * here as well! This is to ensure that files that should not work really do not work, e.g.
+ * that we do not remove expected sanity checks.
+ */
+// also need to set JVM parameter: -Djunit.jupiter.execution.parallel.enabled=true
+@Execution(ExecutionMode.CONCURRENT)
+public class TestAllFiles {
+ private static final String DEFAULT_TEST_DATA_PATH = "test-data";
+ public static final File ROOT_DIR = new File(System.getProperty("POI.testdata.path", DEFAULT_TEST_DATA_PATH));
+
+ public static final String[] SCAN_EXCLUDES = {
+ "**/.svn/**",
+ "lost+found",
+ "**/.git/**",
+ };
+
+ public static Stream<Arguments> allfiles(String testName) throws IOException {
+ StressMap sm = new StressMap();
+ sm.load(new File(ROOT_DIR, "spreadsheet/stress.xls"));
+
+ DirectoryScanner scanner = new DirectoryScanner();
+ scanner.setBasedir(ROOT_DIR);
+ scanner.setExcludes(SCAN_EXCLUDES);
+
+ scanner.scan();
+
+ final List<Arguments> result = new ArrayList<>(100);
+ for (String file : scanner.getIncludedFiles()) {
+ for (FileHandlerKnown handler : sm.getHandler(file)) {
+ ExcInfo info1 = sm.getExcInfo(file, testName, handler);
+ if (info1 == null || info1.isValid(testName, handler.name())) {
+ result.add(Arguments.of(
+ file,
+ handler,
+ (info1 != null) ? info1.getPassword() : null,
+ (info1 != null) ? info1.getExClazz() : null,
+ (info1 != null) ? info1.getExMessage() : null
+ ));
+ }
+ }
+ }
+
+ return result.stream();
+ }
+
+ public static Stream<Arguments> extractFiles() throws IOException {
+ return allfiles("extract");
+ }
+
+ @ParameterizedTest(name = "#{index} {0} {1}")
+ @MethodSource("extractFiles")
+ void handleExtracting(String file, FileHandlerKnown handler, String password, Class<? extends Throwable> exClass, String exMessage) throws IOException {
+ System.out.println("Running extractFiles on "+file);
+ FileHandler fileHandler = handler.fileHandler.get();
+ assertNotNull(fileHandler, "Did not find a handler for file " + file);
+ Executable exec = () -> fileHandler.handleExtracting(new File(ROOT_DIR, file));
+ verify(file, exec, exClass, exMessage, password);
+ }
+
+ public static Stream<Arguments> handleFiles() throws IOException {
+ return allfiles("handle");
+ }
+
+ @ParameterizedTest(name = "#{index} {0} {1}")
+ @MethodSource("handleFiles")
+ void handleFile(String file, FileHandlerKnown handler, String password, Class<? extends Throwable> exClass, String exMessage) throws IOException {
+ System.out.println("Running handleFiles on "+file);
+ FileHandler fileHandler = handler.fileHandler.get();
+ assertNotNull(fileHandler, "Did not find a handler for file " + file);
+ try (InputStream stream = new BufferedInputStream(new FileInputStream(new File(ROOT_DIR, file)), 64 * 1024)) {
+ Executable exec = () -> fileHandler.handleFile(stream, file);
+ verify(file, exec, exClass, exMessage, password);
+ }
+ }
+
+ public static Stream<Arguments> handleAdditionals() throws IOException {
+ return allfiles("additional");
+ }
+
+ @ParameterizedTest(name = "#{index} {0} {1}")
+ @MethodSource("handleAdditionals")
+ void handleAdditional(String file, FileHandlerKnown handler, String password, Class<? extends Throwable> exClass, String exMessage) {
+ System.out.println("Running additionals on "+file);
+ FileHandler fileHandler = handler.fileHandler.get();
+ assertNotNull(fileHandler, "Did not find a handler for file " + file);
+ Executable exec = () -> fileHandler.handleAdditional(new File(ROOT_DIR, file));
+ verify(file, exec, exClass, exMessage, password);
+ }
+
+ @SuppressWarnings("unchecked")
+ private static void verify(String file, Executable exec, Class<? extends Throwable> exClass, String exMessage, String password) {
+ final String errPrefix = file + " - failed. ";
+ // this also removes the password for non encrypted files
+ Biff8EncryptionKey.setCurrentUserPassword(password);
+ if (exClass != null && AssertionFailedError.class.isAssignableFrom(exClass)) {
+ try {
+ exec.execute();
+ fail(errPrefix + "Expected failed assertion");
+ } catch (AssertionFailedError e) {
+ String actMsg = pathReplace(e.getMessage());
+ assertEquals(exMessage, actMsg, errPrefix);
+ } catch (Throwable e) {
+ fail(errPrefix + "Unexpected exception", e);
+ }
+ } else if (exClass != null) {
+ Exception e = assertThrows((Class<? extends Exception>)exClass, exec);
+ String actMsg = pathReplace(e.getMessage());
+ if (NullPointerException.class.isAssignableFrom(exClass)) {
+ if (actMsg != null) {
+ assertTrue(actMsg.contains(exMessage), errPrefix + "Message: "+actMsg+" - didn't contain: "+exMessage);
+ }
+ } else {
+ assertNotNull(actMsg, errPrefix);
+ assertTrue(actMsg.contains(exMessage), errPrefix + "Message: "+actMsg+" - didn't contain: "+exMessage);
+ }
+ } else {
+ assertDoesNotThrow(exec, errPrefix);
+ }
+ }
+
+ private static String pathReplace(String msg) {
+ if (msg == null) return null;
+
+ // Windows path replacement
+ msg = msg.replace('\\', '/');
+
+ // Adjust file paths to remove unwanted file path info.
+ int filePathIndex = msg.indexOf(ROOT_DIR.toString());
+ if (filePathIndex > 0) {
+ int testDataDirectoryIndex = msg.indexOf(DEFAULT_TEST_DATA_PATH);
+ msg = msg.substring(0, filePathIndex) + msg.substring(testDataDirectoryIndex);
+ }
+
+ return msg;
+ }
+}
diff --git a/integrationtest/src/test/java/org/apache/poi/stress/XDGFFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/XDGFFileHandler.java
new file mode 100644
index 0000000000..d588e4b496
--- /dev/null
+++ b/integrationtest/src/test/java/org/apache/poi/stress/XDGFFileHandler.java
@@ -0,0 +1,45 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import java.io.InputStream;
+
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackageAccess;
+import org.apache.poi.xdgf.usermodel.XmlVisioDocument;
+import org.junit.jupiter.api.Test;
+
+class XDGFFileHandler extends AbstractFileHandler {
+ @Override
+ public void handleFile(InputStream stream, String path) throws Exception {
+ // ignore password protected files
+ if (POIXMLDocumentHandler.isEncrypted(stream)) return;
+
+ XmlVisioDocument doc = new XmlVisioDocument(stream);
+ new POIXMLDocumentHandler().handlePOIXMLDocument(doc);
+ }
+
+ // a test-case to test this locally without executing the full TestAllFiles
+ @Test
+ @SuppressWarnings("java:S2699")
+ void test() throws Exception {
+ try (OPCPackage pkg = OPCPackage.open("test-data/diagram/test.vsdx", PackageAccess.READ)) {
+ XmlVisioDocument doc = new XmlVisioDocument(pkg);
+ new POIXMLDocumentHandler().handlePOIXMLDocument(doc);
+ }
+ }
+} \ No newline at end of file
diff --git a/integrationtest/src/test/java/org/apache/poi/stress/XSLFFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/XSLFFileHandler.java
new file mode 100644
index 0000000000..7a5aebd4c4
--- /dev/null
+++ b/integrationtest/src/test/java/org/apache/poi/stress/XSLFFileHandler.java
@@ -0,0 +1,85 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import static org.junit.jupiter.api.Assertions.assertEquals;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+
+import org.apache.poi.extractor.ExtractorFactory;
+import org.apache.poi.ooxml.POIXMLException;
+import org.apache.poi.sl.extractor.SlideShowExtractor;
+import org.apache.poi.xslf.usermodel.XMLSlideShow;
+import org.apache.poi.xslf.usermodel.XSLFSlideShow;
+import org.junit.jupiter.api.Test;
+
+class XSLFFileHandler extends SlideShowHandler {
+ @Override
+ public void handleFile(InputStream stream, String path) throws Exception {
+ try (XMLSlideShow slide = new XMLSlideShow(stream);
+ XSLFSlideShow slideInner = new XSLFSlideShow(slide.getPackage())) {
+ ;
+ assertNotNull(slideInner.getPresentation());
+ assertNotNull(slideInner.getSlideMasterReferences());
+ assertNotNull(slideInner.getSlideReferences());
+
+ new POIXMLDocumentHandler().handlePOIXMLDocument(slide);
+
+ handleSlideShow(slide);
+ } catch (POIXMLException e) {
+ Exception cause = (Exception)e.getCause();
+ throw cause == null ? e : cause;
+ }
+ }
+
+ @Override
+ public void handleExtracting(File file) throws Exception {
+ super.handleExtracting(file);
+
+
+ // additionally try the other getText() methods
+ try (SlideShowExtractor<?,?> extractor = (SlideShowExtractor<?, ?>) ExtractorFactory.createExtractor(file)) {
+ assertNotNull(extractor);
+ extractor.setSlidesByDefault(true);
+ extractor.setNotesByDefault(true);
+ extractor.setMasterByDefault(true);
+
+ assertNotNull(extractor.getText());
+
+ extractor.setSlidesByDefault(false);
+ extractor.setNotesByDefault(false);
+ extractor.setMasterByDefault(false);
+
+ assertEquals("", extractor.getText(), "With all options disabled we should not get text");
+ }
+ }
+
+ // a test-case to test this locally without executing the full TestAllFiles
+ @Override
+ @Test
+ void test() throws Exception {
+ File file = new File("test-data/slideshow/ca.ubc.cs.people_~emhill_presentations_HowWeRefactor.pptx");
+ try (InputStream stream = new FileInputStream(file)) {
+ handleFile(stream, file.getPath());
+ }
+
+ handleExtracting(file);
+ }
+}
diff --git a/integrationtest/src/test/java/org/apache/poi/stress/XSSFBFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/XSSFBFileHandler.java
new file mode 100644
index 0000000000..1bc23c49ed
--- /dev/null
+++ b/integrationtest/src/test/java/org/apache/poi/stress/XSSFBFileHandler.java
@@ -0,0 +1,96 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackageAccess;
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.xssf.XLSBUnsupportedException;
+import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.junit.jupiter.api.Test;
+
+class XSSFBFileHandler extends AbstractFileHandler {
+
+ static {
+ //add expected failures here:
+ AbstractFileHandler.EXPECTED_EXTRACTOR_FAILURES.add("spreadsheet/protected_passtika.xlsb");
+ }
+
+ @Override
+ public void handleFile(InputStream stream, String path) throws Exception {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ IOUtils.copy(stream, out);
+
+ final byte[] bytes = out.toByteArray();
+ try (OPCPackage opcPackage = OPCPackage.open(new ByteArrayInputStream(bytes))) {
+ testOne(opcPackage);
+ }
+
+ testNotHandledByWorkbookException(OPCPackage.open(new ByteArrayInputStream(bytes)));
+ }
+
+ private void testNotHandledByWorkbookException(OPCPackage pkg) throws IOException {
+ try {
+ new XSSFWorkbook(pkg).close();
+ } catch (XLSBUnsupportedException e) {
+ //this is what we'd expect
+ //swallow
+ }
+ }
+
+ @Override
+ public void handleExtracting(File file) throws Exception {
+ OPCPackage pkg = OPCPackage.open(file, PackageAccess.READ);
+ try {
+ testOne(pkg);
+ } finally {
+ pkg.close();
+ }
+
+ pkg = OPCPackage.open(file, PackageAccess.READ);
+ try {
+ testNotHandledByWorkbookException(pkg);
+ } finally {
+ pkg.close();
+ }
+ }
+
+ private void testOne(OPCPackage pkg) throws Exception {
+ XSSFBEventBasedExcelExtractor ex = new XSSFBEventBasedExcelExtractor(pkg);
+ String txt = ex.getText();
+ if (txt.length() < 1) {
+ throw new RuntimeException("Should have gotten some text.");
+ }
+ }
+
+ @Test
+ void testLocal() throws Exception {
+ File file = new File("test-data/spreadsheet/Simple.xlsb");
+ try (FileInputStream stream = new FileInputStream(file)) {
+ handleFile(stream, file.getPath());
+ }
+ handleExtracting(file);
+ }
+}
diff --git a/integrationtest/src/test/java/org/apache/poi/stress/XSSFFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/XSSFFileHandler.java
new file mode 100644
index 0000000000..beb3a8edf8
--- /dev/null
+++ b/integrationtest/src/test/java/org/apache/poi/stress/XSSFFileHandler.java
@@ -0,0 +1,234 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import static org.junit.jupiter.api.Assertions.assertFalse;
+import static org.junit.jupiter.api.Assertions.assertNotNull;
+import static org.junit.jupiter.api.Assertions.assertTrue;
+import static org.junit.jupiter.api.Assumptions.assumeFalse;
+
+import java.io.BufferedInputStream;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.PrintStream;
+import java.util.HashSet;
+import java.util.Iterator;
+import java.util.Set;
+
+import javax.xml.transform.TransformerException;
+
+import org.apache.poi.EncryptedDocumentException;
+import org.apache.poi.examples.ss.ExcelComparator;
+import org.apache.poi.examples.xssf.eventusermodel.FromHowTo;
+import org.apache.poi.examples.xssf.eventusermodel.XLSX2CSV;
+import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey;
+import org.apache.poi.ooxml.POIXMLException;
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.poifs.crypt.Decryptor;
+import org.apache.poi.poifs.crypt.EncryptionInfo;
+import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.ss.usermodel.Sheet;
+import org.apache.poi.util.IOUtils;
+import org.apache.poi.util.NullPrintStream;
+import org.apache.poi.xssf.eventusermodel.XSSFReader;
+import org.apache.poi.xssf.extractor.XSSFExportToXml;
+import org.apache.poi.xssf.usermodel.XSSFMap;
+import org.apache.poi.xssf.usermodel.XSSFSheet;
+import org.apache.poi.xssf.usermodel.XSSFWorkbook;
+import org.junit.jupiter.api.Test;
+import org.xml.sax.SAXException;
+
+class XSSFFileHandler extends SpreadsheetHandler {
+ @Override
+ public void handleFile(InputStream stream, String path) throws Exception {
+ // ignore password protected files if password is unknown
+ String pass = Biff8EncryptionKey.getCurrentUserPassword();
+ assumeFalse(pass == null && POIXMLDocumentHandler.isEncrypted(stream));
+
+ final XSSFWorkbook wb;
+
+ // make sure the potentially large byte-array is freed up quickly again
+ {
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ IOUtils.copy(stream, out);
+ ByteArrayInputStream bytes = new ByteArrayInputStream(out.toByteArray());
+
+ if (pass != null) {
+ POIFSFileSystem poifs = new POIFSFileSystem(bytes);
+ EncryptionInfo ei = new EncryptionInfo(poifs);
+ Decryptor dec = ei.getDecryptor();
+ try {
+ boolean b = dec.verifyPassword(pass);
+ assertTrue( b, "password mismatch" );
+ } catch (EncryptedDocumentException e) {
+ String msg = "Export Restrictions in place - please install JCE Unlimited Strength Jurisdiction Policy files";
+ assumeFalse(msg.equals(e.getMessage()));
+ throw e;
+ }
+ InputStream is = dec.getDataStream(poifs);
+ out.reset();
+ IOUtils.copy(is, out);
+ is.close();
+ poifs.close();
+ bytes = new ByteArrayInputStream(out.toByteArray());
+ }
+ checkXSSFReader(OPCPackage.open(bytes));
+ bytes.reset();
+ wb = new XSSFWorkbook(bytes);
+ }
+
+ // use the combined handler for HSSF/XSSF
+ handleWorkbook(wb);
+
+ // TODO: some documents fail currently...
+ //XSSFFormulaEvaluator evaluator = new XSSFFormulaEvaluator(wb);
+ //evaluator.evaluateAll();
+
+ // also verify general POIFS-stuff
+ new POIXMLDocumentHandler().handlePOIXMLDocument(wb);
+
+ POIXMLDocumentHandler.cursorRecursive(wb.getCTWorkbook());
+ for (Sheet sh : wb) {
+ POIXMLDocumentHandler.cursorRecursive(((XSSFSheet)sh).getCTWorksheet());
+ }
+
+ // and finally ensure that exporting to XML works
+ exportToXML(wb);
+
+ // this allows to trigger a heap-dump at this point to see which memory is still allocated
+ //HeapDump.dumpHeap("/tmp/poi.hprof", false);
+
+ wb.close();
+ }
+
+
+ private void checkXSSFReader(OPCPackage p) throws IOException, OpenXML4JException {
+ XSSFReader reader = new XSSFReader(p);
+
+ // these can be null...
+ InputStream sharedStringsData = reader.getSharedStringsData();
+ if(sharedStringsData != null) {
+ sharedStringsData.close();
+ }
+ reader.getSharedStringsTable();
+
+ InputStream stylesData = reader.getStylesData();
+ if(stylesData != null) {
+ stylesData.close();
+ }
+ reader.getStylesTable();
+
+ InputStream themesData = reader.getThemesData();
+ if(themesData != null) {
+ themesData.close();
+ }
+
+ assertNotNull(reader.getWorkbookData());
+
+ Iterator<InputStream> sheetsData = reader.getSheetsData();
+ while(sheetsData.hasNext()) {
+ InputStream str = sheetsData.next();
+ str.close();
+ }
+ }
+
+ private void exportToXML(XSSFWorkbook wb) throws SAXException,
+ TransformerException {
+ for (XSSFMap map : wb.getCustomXMLMappings()) {
+ XSSFExportToXml exporter = new XSSFExportToXml(map);
+
+ ByteArrayOutputStream os = new ByteArrayOutputStream();
+ exporter.exportToXML(os, true);
+ }
+ }
+
+ private static final Set<String> EXPECTED_ADDITIONAL_FAILURES = new HashSet<>();
+ static {
+ // expected sheet-id not found
+ // EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/52348.xlsx");
+ // EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/59021.xlsx");
+ // zip-bomb
+ EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/54764.xlsx");
+ EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/54764-2.xlsx");
+ EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/poc-xmlbomb.xlsx");
+ EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/poc-xmlbomb-empty.xlsx");
+ // strict OOXML
+ EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/57914.xlsx");
+ EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/SampleSS.strict.xlsx");
+ EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/SimpleStrict.xlsx");
+ EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/sample.strict.xlsx");
+ // TODO: good to ignore?
+ EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/sample-beta.xlsx");
+
+ // corrupt/invalid
+ EXPECTED_ADDITIONAL_FAILURES.add("openxml4j/invalid.xlsx");
+ }
+
+ @SuppressWarnings("resource")
+ @Override
+ public void handleAdditional(File file) throws Exception {
+ // redirect stdout as the examples often write lots of text
+ PrintStream oldOut = System.out;
+ String testFile = file.getParentFile().getName() + "/" + file.getName();
+ try {
+ System.setOut(new NullPrintStream());
+ FromHowTo.main(new String[]{file.getAbsolutePath()});
+ XLSX2CSV.main(new String[]{file.getAbsolutePath()});
+ ExcelComparator.main(new String[]{file.getAbsolutePath(), file.getAbsolutePath()});
+
+ assertFalse( EXPECTED_ADDITIONAL_FAILURES.contains(testFile), "Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!" );
+
+ } catch (OLE2NotOfficeXmlFileException e) {
+ // we have some files that are not actually OOXML and thus cannot be tested here
+ } catch (IllegalArgumentException | InvalidFormatException | POIXMLException | IOException e) {
+ if(!EXPECTED_ADDITIONAL_FAILURES.contains(testFile)) {
+ throw e;
+ }
+ } finally {
+ System.setOut(oldOut);
+ }
+ }
+
+ // a test-case to test this locally without executing the full TestAllFiles
+ @Test
+ void test() throws Exception {
+ File file = new File("test-data/spreadsheet/ref-56737.xlsx");
+
+ try (InputStream stream = new BufferedInputStream(new FileInputStream(file))) {
+ handleFile(stream, file.getPath());
+ }
+
+ handleExtracting(file);
+ }
+
+ @Test
+ void testExtracting() throws Exception {
+ handleExtracting(new File("test-data/spreadsheet/ref-56737.xlsx"));
+ }
+
+ @Test
+ void testAdditional() throws Exception {
+ handleAdditional(new File("test-data/spreadsheet/poc-xmlbomb.xlsx"));
+ }
+}
diff --git a/integrationtest/src/test/java/org/apache/poi/stress/XWPFFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/XWPFFileHandler.java
new file mode 100644
index 0000000000..0970d4fa1a
--- /dev/null
+++ b/integrationtest/src/test/java/org/apache/poi/stress/XWPFFileHandler.java
@@ -0,0 +1,56 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import java.io.BufferedInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.InputStream;
+
+import org.apache.poi.ooxml.POIXMLException;
+import org.apache.poi.xwpf.usermodel.XWPFDocument;
+import org.junit.jupiter.api.Test;
+
+class XWPFFileHandler extends AbstractFileHandler {
+ @Override
+ public void handleFile(InputStream stream, String path) throws Exception {
+ // ignore password protected files
+ if (POIXMLDocumentHandler.isEncrypted(stream)) return;
+
+ try (XWPFDocument doc = new XWPFDocument(stream)) {
+
+ new POIXMLDocumentHandler().handlePOIXMLDocument(doc);
+ POIXMLDocumentHandler.cursorRecursive(doc.getDocument());
+ } catch (POIXMLException e) {
+ Exception cause = (Exception)e.getCause();
+ throw cause == null ? e : cause;
+ }
+ }
+
+ // a test-case to test this locally without executing the full TestAllFiles
+ @Test
+ @SuppressWarnings("java:S2699")
+ void test() throws Exception {
+ File file = new File("test-data/document/51921-Word-Crash067.docx");
+
+ try (InputStream stream = new BufferedInputStream(new FileInputStream(file))) {
+ handleFile(stream, file.getPath());
+ }
+
+ handleExtracting(file);
+ }
+} \ No newline at end of file