diff options
author | Andreas Beeker <kiwiwings@apache.org> | 2021-03-27 14:03:16 +0000 |
---|---|---|
committer | Andreas Beeker <kiwiwings@apache.org> | 2021-03-27 14:03:16 +0000 |
commit | 37791e4bdfc706aa5684745594260f243b4be7ee (patch) | |
tree | a8dd8d0976fc478074d52cd3de79e0e6b5e6a33a /integrationtest/src/test/java/org/apache | |
parent | 2bb3839bfe3e3bacff79f8157465633e311239ce (diff) | |
download | poi-37791e4bdfc706aa5684745594260f243b4be7ee.tar.gz poi-37791e4bdfc706aa5684745594260f243b4be7ee.zip |
65206 - Migrate ant / maven to gradle build
update gradle files and project structure along https://github.com/centic9/poi/tree/gradle_build
remove eclipse IDE project files
remove obsolete record generator files
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1888111 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'integrationtest/src/test/java/org/apache')
28 files changed, 3002 insertions, 0 deletions
diff --git a/integrationtest/src/test/java/org/apache/poi/stress/AbstractFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/AbstractFileHandler.java new file mode 100644 index 0000000000..01a4ebc82d --- /dev/null +++ b/integrationtest/src/test/java/org/apache/poi/stress/AbstractFileHandler.java @@ -0,0 +1,178 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assumptions.assumeFalse; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.HashSet; +import java.util.Set; + +import org.apache.poi.EncryptedDocumentException; +import org.apache.poi.extractor.ExtractorFactory; +import org.apache.poi.extractor.POIOLE2TextExtractor; +import org.apache.poi.extractor.POITextExtractor; +import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor; +import org.apache.poi.hssf.extractor.EventBasedExcelExtractor; +import org.apache.poi.ooxml.POIXMLException; +import org.apache.poi.ss.extractor.ExcelExtractor; +import org.apache.poi.util.IOUtils; + +/** + * Base class with things that can be run for any supported file handler + * in the integration tests, mostly text-extraction related at the moment. + */ +public abstract class AbstractFileHandler implements FileHandler { + public static final Set<String> EXPECTED_EXTRACTOR_FAILURES = new HashSet<>(); + static { + // password protected files without password + // ... currently none ... + + // unsupported file-types, no supported OLE2 parts + EXPECTED_EXTRACTOR_FAILURES.add("hmef/quick-winmail.dat"); + EXPECTED_EXTRACTOR_FAILURES.add("hmef/winmail-sample1.dat"); + EXPECTED_EXTRACTOR_FAILURES.add("hmef/bug52400-winmail-simple.dat"); + EXPECTED_EXTRACTOR_FAILURES.add("hmef/bug52400-winmail-with-attachments.dat"); + EXPECTED_EXTRACTOR_FAILURES.add("hmef/bug63955-winmail.dat"); + EXPECTED_EXTRACTOR_FAILURES.add("hpsf/Test0313rur.adm"); + EXPECTED_EXTRACTOR_FAILURES.add("poifs/Notes.ole2"); + EXPECTED_EXTRACTOR_FAILURES.add("poifs/64322.ole2"); + } + + @Override + public void handleExtracting(File file) throws Exception { + boolean before = ExtractorFactory.getThreadPrefersEventExtractors(); + try { + ExtractorFactory.setThreadPrefersEventExtractors(true); + handleExtractingInternal(file); + + ExtractorFactory.setThreadPrefersEventExtractors(false); + handleExtractingInternal(file); + } finally { + ExtractorFactory.setThreadPrefersEventExtractors(before); + } + + /* Did fail for some documents with special XML contents... + try { + OOXMLPrettyPrint.main(new String[] { file.getAbsolutePath(), + "/tmp/pretty-" + file.getName() }); + } catch (ZipException e) { + // ignore, not a Zip/OOXML file + }*/ + } + + private void handleExtractingInternal(File file) throws Exception { + long length = file.length(); + long modified = file.lastModified(); + + POITextExtractor extractor = null; + String fileAndParentName = file.getParentFile().getName() + "/" + file.getName(); + try { + // fix windows absolute paths for exception message tracking + String relPath = file.getPath().replaceAll(".*test-data", "test-data").replace('\\', '/'); + extractor = ExtractorFactory.createExtractor(file); + assertNotNull(extractor, "Should get a POITextExtractor but had none for file " + relPath); + + assertNotNull(extractor.getText(), "Should get some text but had none for file " + relPath); + + // also try metadata + @SuppressWarnings("resource") + POITextExtractor metadataExtractor = extractor.getMetadataTextExtractor(); + assertNotNull(metadataExtractor.getText()); + + assertFalse(EXPECTED_EXTRACTOR_FAILURES.contains(fileAndParentName), + "Expected Extraction to fail for file " + relPath + " and handler " + this + ", but did not fail!"); + + assertEquals(length, file.length(), "File should not be modified by extractor"); + assertEquals(modified, file.lastModified(), "File should not be modified by extractor"); + + handleExtractingAsStream(file); + + if (extractor instanceof POIOLE2TextExtractor) { + try (HPSFPropertiesExtractor hpsfExtractor = new HPSFPropertiesExtractor((POIOLE2TextExtractor) extractor)) { + assertNotNull(hpsfExtractor.getDocumentSummaryInformationText()); + assertNotNull(hpsfExtractor.getSummaryInformationText()); + String text = hpsfExtractor.getText(); + //System.out.println(text); + assertNotNull(text); + } + } + + // test again with including formulas and cell-comments as this caused some bugs + if (extractor instanceof ExcelExtractor && + // comment-extraction and formula extraction are not well supported in event based extraction + !(extractor instanceof EventBasedExcelExtractor)) { + ((ExcelExtractor) extractor).setFormulasNotResults(true); + + String text = extractor.getText(); + assertNotNull(text); + // */ + + ((ExcelExtractor) extractor).setIncludeCellComments(true); + + text = extractor.getText(); + assertNotNull(text); + } + } catch (IOException | POIXMLException e) { + Exception prevE = e; + Throwable cause; + while ((cause = prevE.getCause()) instanceof Exception) { + if (cause instanceof IOException || cause instanceof POIXMLException) { + prevE = (Exception)cause; + } else { + throw (Exception)cause; + } + } + throw e; + } catch (IllegalArgumentException e) { + if(!EXPECTED_EXTRACTOR_FAILURES.contains(fileAndParentName)) { + throw e; + } + } catch (EncryptedDocumentException e) { + String msg = "org.apache.poi.EncryptedDocumentException: Export Restrictions in place - please install JCE Unlimited Strength Jurisdiction Policy files"; + assumeFalse(msg.equals(e.getMessage())); + throw e; + } catch (IllegalStateException e) { + if (!e.getMessage().contains("POI Scratchpad jar missing") || !Boolean.getBoolean("scratchpad.ignore")) { + throw e; + } + } finally { + IOUtils.closeQuietly(extractor); + } + } + + private void handleExtractingAsStream(File file) throws IOException { + try (InputStream stream = new FileInputStream(file)) { + try (POITextExtractor streamExtractor = ExtractorFactory.createExtractor(stream)) { + assertNotNull(streamExtractor); + + assertNotNull(streamExtractor.getText()); + } + } + } + + @Override + public void handleAdditional(File file) throws Exception { + // by default we do nothing here + } +} diff --git a/integrationtest/src/test/java/org/apache/poi/stress/BaseIntegrationTest.java b/integrationtest/src/test/java/org/apache/poi/stress/BaseIntegrationTest.java new file mode 100644 index 0000000000..5d63d1f5a1 --- /dev/null +++ b/integrationtest/src/test/java/org/apache/poi/stress/BaseIntegrationTest.java @@ -0,0 +1,165 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assumptions.assumeFalse; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.zip.ZipException; + +import org.apache.poi.EncryptedDocumentException; +import org.apache.poi.OldFileFormatException; +import org.apache.poi.poifs.filesystem.OfficeXmlFileException; + +/** + * This class is used for mass-regression testing via a + * separate project, this class provides functionality to + * run integration tests on one file and handle some + * types of files/exceptions, e.g. old file formats. + * + */ +public class BaseIntegrationTest { + private final File rootDir; + private final String file; + private FileHandler handler; + + public BaseIntegrationTest(File rootDir, String file, FileHandler handler) { + this.rootDir = rootDir; + this.file = file; + this.handler = handler; + } + + /** + * Keep this public so it can be used by the regression-tests + */ + public void test() throws Exception { + assertNotNull( handler, "Unknown file extension for file: " + file ); + testOneFile(new File(rootDir, file)); + } + + protected void testOneFile(File inputFile) throws Exception { + try { + handleFile(inputFile); + } catch (OfficeXmlFileException e) { + // switch XWPF and HWPF and so forth depending on the error message + handleWrongOLE2XMLExtension(inputFile, e); + } catch (OldFileFormatException e) { + // Not even text extraction is supported for these: handler.handleExtracting(inputFile); + assumeFalse( true, "File " + file + " excluded because it is an unsupported old format" ); + } catch (EncryptedDocumentException e) { + // Do not try to read encrypted files + assumeFalse( true, "File " + file + " excluded because it is password-encrypted" ); + } catch (ZipException e) { + // some files are corrupted + if (e.getMessage().equals("unexpected EOF") || e.getMessage().equals("Truncated ZIP file")) { + assumeFalse( true, "File " + file + " excluded because the Zip file is incomplete" ); + } + + throw e; + } catch (IOException e) { + // ignore some other ways of corrupted files + String message = e.getMessage(); + if(message != null && message.contains("Truncated ZIP file")) { + assumeFalse( true, "File " + file + " excluded because the Zip file is incomplete" ); + } + + // sometimes binary format has XML-format-extension... + if(message != null && message.contains("rong file format or file extension for OO XML file")) { + handleWrongOLE2XMLExtension(inputFile, e); + return; + } + + throw e; + } catch (IllegalArgumentException e) { + // ignore errors for documents with incorrect extension + String message = e.getMessage(); + if(message != null && (message.equals("The document is really a RTF file") || + message.equals("The document is really a PDF file") || + message.equals("The document is really a HTML file"))) { + assumeFalse( true, "File " + file + " excluded because it is actually a PDF/RTF/HTML file" ); + } + + if(message != null && message.equals("The document is really a OOXML file")) { + handleWrongOLE2XMLExtension(inputFile, e); + return; + } + + throw e; + } + + try { + handler.handleExtracting(inputFile); + } catch (EncryptedDocumentException e) { + // Do not try to read encrypted files + assumeFalse( true, "File " + file + " excluded because it is password-encrypted" ); + } + } + + void handleWrongOLE2XMLExtension(File inputFile, Exception e) throws Exception { + // we sometimes have wrong extensions, so for some exceptions we try to handle it + // with the correct FileHandler instead + String message = e.getMessage(); + + // ignore some file-types that we do not want to handle here + assumeFalse( message != null && (message.equals("The document is really a RTF file") || + message.equals("The document is really a PDF file") || + message.equals("The document is really a HTML file")), "File " + file + " excluded because it is actually a PDF/RTF/HTML file" ); + + if(message != null && (message.equals("The document is really a XLS file"))) { + handler = new HSSFFileHandler(); + } else if(message != null && (message.equals("The document is really a PPT file"))) { + handler = new HSLFFileHandler(); + } else if(message != null && (message.equals("The document is really a DOC file"))) { + handler = new HWPFFileHandler(); + } else if(message != null && (message.equals("The document is really a VSD file"))) { + handler = new HDGFFileHandler(); + + // use XWPF instead of HWPF and XSSF instead of HSSF as the file seems to have the wrong extension + } else if (handler instanceof HWPFFileHandler) { + handler = new XWPFFileHandler(); + } else if (handler instanceof HSSFFileHandler) { + handler = new XSSFFileHandler(); + } else if (handler instanceof HSLFFileHandler) { + handler = new XSLFFileHandler(); + + // and the other way around, use HWPF instead of XWPF and so forth + } else if(handler instanceof XWPFFileHandler) { + handler = new HWPFFileHandler(); + } else if(handler instanceof XSSFFileHandler) { + handler = new HSSFFileHandler(); + } else if(handler instanceof XSLFFileHandler) { + handler = new HSLFFileHandler(); + } else { + // nothing matched => throw the exception to the outside + throw e; + } + + // we found a different handler to try processing again + handleFile(inputFile); + } + + private void handleFile(File inputFile) throws Exception { + try (InputStream newStream = new BufferedInputStream(new FileInputStream(inputFile), 64*1024)) { + handler.handleFile(newStream, inputFile.getAbsolutePath()); + } + } +} diff --git a/integrationtest/src/test/java/org/apache/poi/stress/ExcInfo.java b/integrationtest/src/test/java/org/apache/poi/stress/ExcInfo.java new file mode 100644 index 0000000000..c9241cb0f7 --- /dev/null +++ b/integrationtest/src/test/java/org/apache/poi/stress/ExcInfo.java @@ -0,0 +1,96 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + +import static org.junit.jupiter.api.Assertions.fail; + +public class ExcInfo { + private static final String IGNORED_TESTS = "IGNORE"; + + private String file; + private String tests; + private String handler; + private String password; + private Class<? extends Throwable> exClazz; + private String exMessage; + + public String getFile() { + return file; + } + + public void setFile(String file) { + this.file = file; + } + + public String getTests() { + return tests; + } + + public void setTests(String tests) { + this.tests = tests; + } + + public String getHandler() { + return handler; + } + + public void setHandler(String handler) { + this.handler = handler; + } + + public String getPassword() { + return password; + } + + public void setPassword(String password) { + this.password = password; + } + + public Class<? extends Throwable> getExClazz() { + return exClazz; + } + + @SuppressWarnings("unchecked") + public void setExClazz(String exClazz) { + try { + this.exClazz = (Class<? extends Exception>) Class.forName(exClazz); + } catch (ClassNotFoundException ex) { + fail(ex); + } + } + + public String getExMessage() { + return exMessage; + } + + public void setExMessage(String exMessage) { + this.exMessage = exMessage; + } + + public boolean isMatch(String testName, String handler) { + return + (tests == null || tests.contains(testName) || IGNORED_TESTS.equals(tests)) && + (this.handler == null || this.handler.contains(handler)); + } + + public boolean isValid(String testName, String handler) { + return + !IGNORED_TESTS.equals(tests) && + (tests == null || tests.contains(testName)) && + (this.handler == null || this.handler.contains(handler)); + } +} diff --git a/integrationtest/src/test/java/org/apache/poi/stress/FileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/FileHandler.java new file mode 100644 index 0000000000..62e5d81ced --- /dev/null +++ b/integrationtest/src/test/java/org/apache/poi/stress/FileHandler.java @@ -0,0 +1,51 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + +import java.io.File; +import java.io.InputStream; + +/** + * Base interface for the various file types that are + * used in the stress testing. + */ +public interface FileHandler { + /** + * The FileHandler receives a stream ready for reading the + * file and should handle the content that is provided and + * try to read and interpret the data. + * + * Closing is handled by the framework outside this call. + * + * @param stream The input stream to read the file from. + * @param path the relative path to the file + * @throws Exception If an error happens in the file-specific handler + */ + void handleFile(InputStream stream, String path) throws Exception; + + /** + * Ensures that extracting text from the given file + * is returning some text. + */ + void handleExtracting(File file) throws Exception; + + /** + * Allows to perform some additional work, e.g. run + * some of the example applications + */ + void handleAdditional(File file) throws Exception; +} diff --git a/integrationtest/src/test/java/org/apache/poi/stress/FileHandlerFactory.java b/integrationtest/src/test/java/org/apache/poi/stress/FileHandlerFactory.java new file mode 100644 index 0000000000..8be52b35a6 --- /dev/null +++ b/integrationtest/src/test/java/org/apache/poi/stress/FileHandlerFactory.java @@ -0,0 +1,120 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + +import java.util.HashMap; +import java.util.Map; +import java.util.regex.Pattern; + +public class FileHandlerFactory { + // map from patterns for mimetypes to the FileHandlers that should be able to + // work with that file + // use a Set<Pair> to have a defined order of applying the matches + private static final Map<Pattern, FileHandler> MIME_TYPES = new HashMap<>(); + static { + ////////////////// Word + + MIME_TYPES.put(Pattern.compile("application/vnd.ms-word.document.macroenabled.12"), new XWPFFileHandler()); + MIME_TYPES.put(Pattern.compile("application/vnd.ms-word.template.macroenabled.12"), new XWPFFileHandler()); + + // application/msword + MIME_TYPES.put(Pattern.compile(".*msword.*"), new HWPFFileHandler()); + // application/vnd.ms-word + MIME_TYPES.put(Pattern.compile(".*ms-word.*"), new HWPFFileHandler()); + + // application/vnd.openxmlformats-officedocument.wordprocessingml.document + MIME_TYPES.put(Pattern.compile(".*wordprocessingml.*"), new XWPFFileHandler()); + + ////////////////// Excel + MIME_TYPES.put(Pattern.compile("application/vnd.ms-excel.addin.macroEnabled.12"), new XSSFFileHandler()); + MIME_TYPES.put(Pattern.compile("application/vnd.ms-excel.sheet.binary.macroEnabled.12"), new XSSFFileHandler()); + + // application/msexcel + MIME_TYPES.put(Pattern.compile(".*msexcel.*"), new HSSFFileHandler()); + // application/vnd.ms-excel + MIME_TYPES.put(Pattern.compile(".*ms-excel.*"), new HSSFFileHandler()); + + // application/vnd.openxmlformats-officedocument.spreadsheetml.sheet + MIME_TYPES.put(Pattern.compile(".*spreadsheetml.*"), new XSSFFileHandler()); + + ////////////////// Powerpoint + + // application/vnd.ms-powerpoint + MIME_TYPES.put(Pattern.compile("application/vnd.ms-powerpoint"), new HSLFFileHandler()); + // application/vnd.ms-officetheme + MIME_TYPES.put(Pattern.compile("application/vnd.ms-officetheme"), new HSLFFileHandler()); + + // application/vnd.openxmlformats-officedocument.presentationml.presentation + MIME_TYPES.put(Pattern.compile(".*presentationml.*"), new XSLFFileHandler()); + // application/vnd.ms-powerpoint.presentation.macroenabled.12 + MIME_TYPES.put(Pattern.compile("application/vnd.ms-powerpoint.presentation.macroenabled.12"), new XSLFFileHandler()); + // application/vnd.ms-powerpoint.slideshow.macroenabled.12 + MIME_TYPES.put(Pattern.compile("application/vnd.ms-powerpoint.slideshow.macroenabled.12"), new XSLFFileHandler()); + + ////////////////// Mail/TNEF + + // application/vnd.ms-tnef + MIME_TYPES.put(Pattern.compile(".*ms-tnef.*"), new HMEFFileHandler()); + + // application/vnd.ms-outlook + MIME_TYPES.put(Pattern.compile("application/vnd.ms-outlook"), new HSMFFileHandler()); + + ////////////////// Visio + + // application/vnd.visio + MIME_TYPES.put(Pattern.compile("application/vnd.visio.*"), new HDGFFileHandler()); + + // application/vnd.ms-visio.drawing + MIME_TYPES.put(Pattern.compile(".*vnd.ms-visio\\."), new XDGFFileHandler()); + + //application/vnd.ms-visio.viewer + MIME_TYPES.put(Pattern.compile(".*visio.*"), new HDGFFileHandler()); + + + ////////////////// Publisher + + // application/x-mspublisher + MIME_TYPES.put(Pattern.compile("application/x-mspublisher"), new HPBFFileHandler()); + + + ////////////////// Others + + // special type used by Tika + MIME_TYPES.put(Pattern.compile("application/x-tika-ooxml.*"), new OPCFileHandler()); + // special type used by Tika + MIME_TYPES.put(Pattern.compile("application/x-tika-msoffice.*"), new POIFSFileHandler()); + + // application/x-tika-old-excel + MIME_TYPES.put(Pattern.compile("application/x-tika-old-excel"), new POIFSFileHandler()); + + // application/vnd.openxmlformats-officedocument.drawingml.chart+xml + // ?!MIME_TYPES.put(Pattern.compile(".*drawingml.*"), ".dwg"); + + // application/vnd.openxmlformats-officedocument.vmlDrawing + // ?!MIME_TYPES.put(Pattern.compile(".*vmlDrawing.*"), ".dwg"); + } + + public static FileHandler getHandler(String mimeType) { + for(Map.Entry<Pattern,FileHandler> entry : MIME_TYPES.entrySet()) { + if(entry.getKey().matcher(mimeType).matches()) { + return entry.getValue(); + } + } + + return null; + } +} diff --git a/integrationtest/src/test/java/org/apache/poi/stress/FileHandlerKnown.java b/integrationtest/src/test/java/org/apache/poi/stress/FileHandlerKnown.java new file mode 100644 index 0000000000..aa3c827f1f --- /dev/null +++ b/integrationtest/src/test/java/org/apache/poi/stress/FileHandlerKnown.java @@ -0,0 +1,60 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + +import java.io.File; +import java.io.InputStream; +import java.util.function.Supplier; + +@SuppressWarnings("unused") +public enum FileHandlerKnown { + HDGF(HDGFFileHandler::new), + HMEF(HMEFFileHandler::new), + HPBF(HPBFFileHandler::new), + HPSF(HPSFFileHandler::new), + HSLF(HSLFFileHandler::new), + HSMF(HSMFFileHandler::new), + HSSF(HSSFFileHandler::new), + HWPF(HWPFFileHandler::new), + OPC(OPCFileHandler::new), + POIFS(POIFSFileHandler::new), + XDGF(XDGFFileHandler::new), + XSLF(XSLFFileHandler::new), + XSSFB(XSSFBFileHandler::new), + XSSF(XSSFFileHandler::new), + XWPF(XWPFFileHandler::new), + OWPF(OWPFFileHandler::new), + NULL(NullFileHandler::new) + ; + + public final Supplier<FileHandler> fileHandler; + + FileHandlerKnown(Supplier<FileHandler> fileHandler) { + this.fileHandler = fileHandler; + } + + private static class NullFileHandler implements FileHandler { + @Override + public void handleFile(InputStream stream, String path) {} + + @Override + public void handleExtracting(File file) {} + + @Override + public void handleAdditional(File file) {} + } +} diff --git a/integrationtest/src/test/java/org/apache/poi/stress/HDGFFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/HDGFFileHandler.java new file mode 100644 index 0000000000..1d2ca65d4c --- /dev/null +++ b/integrationtest/src/test/java/org/apache/poi/stress/HDGFFileHandler.java @@ -0,0 +1,78 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; + +import org.apache.poi.hdgf.HDGFDiagram; +import org.apache.poi.hdgf.extractor.VisioTextExtractor; +import org.apache.poi.hdgf.streams.Stream; +import org.apache.poi.hdgf.streams.TrailerStream; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.junit.jupiter.api.Test; + +class HDGFFileHandler extends POIFSFileHandler { + @Override + public void handleFile(InputStream stream, String path) throws IOException { + POIFSFileSystem poifs = new POIFSFileSystem(stream); + HDGFDiagram diagram = new HDGFDiagram(poifs); + Stream[] topLevelStreams = diagram.getTopLevelStreams(); + assertNotNull(topLevelStreams); + for(Stream str : topLevelStreams) { + assertTrue(str.getPointer().getLength() >= 0); + } + + TrailerStream trailerStream = diagram.getTrailerStream(); + assertNotNull(trailerStream); + assertTrue(trailerStream.getPointer().getLength() >= 0); + diagram.close(); + poifs.close(); + + // writing is not yet implemented... handlePOIDocument(diagram); + } + + // a test-case to test this locally without executing the full TestAllFiles + @Override + @Test + void test() throws Exception { + File file = new File("test-data/diagram/44501.vsd"); + + InputStream stream = new FileInputStream(file); + try { + handleFile(stream, file.getPath()); + } finally { + stream.close(); + } + + handleExtracting(file); + + stream = new FileInputStream(file); + try { + try (VisioTextExtractor extractor = new VisioTextExtractor(stream)) { + assertNotNull(extractor.getText()); + } + } finally { + stream.close(); + } + } +} diff --git a/integrationtest/src/test/java/org/apache/poi/stress/HMEFFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/HMEFFileHandler.java new file mode 100644 index 0000000000..34cf9ee5b9 --- /dev/null +++ b/integrationtest/src/test/java/org/apache/poi/stress/HMEFFileHandler.java @@ -0,0 +1,91 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; +import java.util.Arrays; + +import org.apache.poi.hmef.HMEFMessage; +import org.apache.poi.hmef.attribute.MAPIAttribute; +import org.apache.poi.hmef.attribute.TNEFAttribute; +import org.apache.poi.hmef.attribute.TNEFProperty; +import org.apache.poi.hsmf.datatypes.MAPIProperty; +import org.apache.poi.poifs.filesystem.FileMagic; +import org.apache.poi.util.LittleEndian; +import org.junit.jupiter.api.Test; + +class HMEFFileHandler extends AbstractFileHandler { + + @Override + public void handleExtracting(File file) throws Exception { + FileMagic fm = FileMagic.valueOf(file); + if (fm == FileMagic.OLE2) { + super.handleExtracting(file); + } + } + + @Override + public void handleFile(InputStream stream, String path) throws Exception { + HMEFMessage msg = new HMEFMessage(stream); + + // there are test-files that have no body... + String[] HTML_BODY = { + "Testing TNEF Message", "TNEF test message with attachments", "Test" + }; + String bodyStr; + if(Arrays.asList(HTML_BODY).contains(msg.getSubject())) { + MAPIAttribute bodyHtml = msg.getMessageMAPIAttribute(MAPIProperty.BODY_HTML); + assertNotNull(bodyHtml); + bodyStr = new String(bodyHtml.getData(), getEncoding(msg)); + } else { + bodyStr = msg.getBody(); + } + assertNotNull( bodyStr, "Body is not set" ); + assertNotNull( msg.getSubject(), "Subject is not set" ); + } + + // a test-case to test this locally without executing the full TestAllFiles + @Test + void test() throws Exception { + String path = "test-data/hmef/quick-winmail.dat"; + try (InputStream stream = new FileInputStream(path)) { + handleFile(stream, path); + } + } + + private String getEncoding(HMEFMessage tnefDat) { + TNEFAttribute oemCP = tnefDat.getMessageAttribute(TNEFProperty.ID_OEMCODEPAGE); + MAPIAttribute cpId = tnefDat.getMessageMAPIAttribute(MAPIProperty.INTERNET_CPID); + int codePage = 1252; + if (oemCP != null) { + codePage = LittleEndian.getInt(oemCP.getData()); + } else if (cpId != null) { + codePage = LittleEndian.getInt(cpId.getData()); + } + switch (codePage) { + // see http://en.wikipedia.org/wiki/Code_page for more + case 1252: return "Windows-1252"; + case 20127: return "US-ASCII"; + default: return "cp"+codePage; + } + } + +} diff --git a/integrationtest/src/test/java/org/apache/poi/stress/HPBFFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/HPBFFileHandler.java new file mode 100644 index 0000000000..419840d696 --- /dev/null +++ b/integrationtest/src/test/java/org/apache/poi/stress/HPBFFileHandler.java @@ -0,0 +1,67 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; + +import org.apache.poi.hpbf.HPBFDocument; +import org.apache.poi.hpbf.extractor.PublisherTextExtractor; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.junit.jupiter.api.Test; + +class HPBFFileHandler extends POIFSFileHandler { + @Override + public void handleFile(InputStream stream, String path) throws Exception { + HPBFDocument pub = new HPBFDocument(new POIFSFileSystem(stream)); + assertNotNull(pub.getEscherDelayStm()); + assertNotNull(pub.getMainContents()); + assertNotNull(pub.getQuillContents()); + + // writing is not yet implemented... handlePOIDocument(pub); + pub.close(); + } + + // a test-case to test this locally without executing the full TestAllFiles + @Override + @Test + void test() throws Exception { + File file = new File("test-data/publisher/SampleBrochure.pub"); + + InputStream stream = new FileInputStream(file); + try { + handleFile(stream, file.getPath()); + } finally { + stream.close(); + } + + handleExtracting(file); + + stream = new FileInputStream(file); + try { + try (PublisherTextExtractor extractor = new PublisherTextExtractor(stream)) { + assertNotNull(extractor.getText()); + } + } finally { + stream.close(); + } + } + +} diff --git a/integrationtest/src/test/java/org/apache/poi/stress/HPSFFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/HPSFFileHandler.java new file mode 100644 index 0000000000..088f77ef45 --- /dev/null +++ b/integrationtest/src/test/java/org/apache/poi/stress/HPSFFileHandler.java @@ -0,0 +1,132 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.fail; +import static org.junit.jupiter.api.Assumptions.assumeFalse; + +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.PrintStream; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Collections; +import java.util.HashSet; +import java.util.Set; + +import org.apache.poi.examples.hpsf.CopyCompare; +import org.apache.poi.hpsf.DocumentSummaryInformation; +import org.apache.poi.hpsf.HPSFPropertiesOnlyDocument; +import org.apache.poi.hpsf.PropertySet; +import org.apache.poi.hpsf.SummaryInformation; +import org.apache.poi.poifs.filesystem.DirectoryNode; +import org.apache.poi.poifs.filesystem.DocumentInputStream; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.apache.poi.util.TempFile; +import org.junit.jupiter.api.Test; + +class HPSFFileHandler extends POIFSFileHandler { + private static final String NL = System.getProperty("line.separator"); + + private static final ThreadLocal<File> copyOutput = ThreadLocal.withInitial(HPSFFileHandler::getTempFile); + + static final Set<String> EXCLUDES_HANDLE_ADD = unmodifiableHashSet( + "spreadsheet/45290.xls", + "spreadsheet/46904.xls", + "spreadsheet/55982.xls", + "spreadsheet/testEXCEL_3.xls", + "spreadsheet/testEXCEL_4.xls", + "hpsf/Test_Humor-Generation.ppt", + "document/word2.doc" + ); + + private static Set<String> unmodifiableHashSet(String... a) { + return Collections.unmodifiableSet(new HashSet<>(Arrays.asList(a))); + } + + + @Override + public void handleFile(InputStream stream, String path) throws Exception { + POIFSFileSystem poifs = new POIFSFileSystem(stream); + HPSFPropertiesOnlyDocument hpsf = new HPSFPropertiesOnlyDocument(poifs); + DocumentSummaryInformation dsi = hpsf.getDocumentSummaryInformation(); + SummaryInformation si = hpsf.getSummaryInformation(); + boolean hasDSI = hasPropertyStream(poifs, DocumentSummaryInformation.DEFAULT_STREAM_NAME); + boolean hasSI = hasPropertyStream(poifs, SummaryInformation.DEFAULT_STREAM_NAME); + + assertEquals(hasDSI, dsi != null); + assertEquals(hasSI, si != null); + + handlePOIDocument(hpsf); + } + + private static boolean hasPropertyStream(POIFSFileSystem poifs, String streamName) throws IOException { + DirectoryNode root = poifs.getRoot(); + if (!root.hasEntry(streamName)) { + return false; + } + try (DocumentInputStream dis = root.createDocumentInputStream(streamName)) { + return PropertySet.isPropertySetStream(dis); + } + } + + private static File getTempFile() { + File f = null; + try { + f = TempFile.createTempFile("hpsfCopy", "out"); + } catch (IOException e) { + fail(e); + } + f.deleteOnExit(); + return f; + } + + @Override + public void handleAdditional(File file) throws Exception { + assumeFalse(EXCLUDES_HANDLE_ADD.contains(file.getParentFile().getName()+"/"+file.getName())); + + ByteArrayOutputStream bos = new ByteArrayOutputStream(); + PrintStream psNew = new PrintStream(bos, true, "ISO-8859-1"); + CopyCompare.setOut(psNew); + CopyCompare.main(new String[]{file.getAbsolutePath(), copyOutput.get().getAbsolutePath()}); + assertEquals("Equal" + NL, bos.toString(StandardCharsets.UTF_8.name())); + } + + + // a test-case to test this locally without executing the full TestAllFiles + @Override + @Test + @SuppressWarnings("java:S2699") + void test() throws Exception { + String path = "test-data/diagram/44501.vsd"; + try (InputStream stream = new FileInputStream(path)) { + handleFile(stream, path); + } + } + + // a test-case to test this locally without executing the full TestAllFiles + @Test + void testExtractor() { + File file = new File("test-data/hpsf/TestBug44375.xls"); + assertDoesNotThrow(() -> handleExtracting(file)); + } +} diff --git a/integrationtest/src/test/java/org/apache/poi/stress/HSLFFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/HSLFFileHandler.java new file mode 100644 index 0000000000..fc82b6612d --- /dev/null +++ b/integrationtest/src/test/java/org/apache/poi/stress/HSLFFileHandler.java @@ -0,0 +1,94 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; + +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; +import org.apache.poi.hslf.usermodel.HSLFSlideShow; +import org.apache.poi.hslf.usermodel.HSLFSlideShowImpl; +import org.junit.jupiter.api.Test; + +class HSLFFileHandler extends SlideShowHandler { + + private static final Logger LOGGER = LogManager.getLogger(HSLFFileHandler.class); + + @Override + public void handleFile(InputStream stream, String path) throws Exception { + HSLFSlideShowImpl slide = new HSLFSlideShowImpl(stream); + assertNotNull(slide.getCurrentUserAtom()); + assertNotNull(slide.getEmbeddedObjects()); + assertNotNull(slide.getUnderlyingBytes()); + assertNotNull(slide.getPictureData()); + org.apache.poi.hslf.record.Record[] records = slide.getRecords(); + assertNotNull(records); + for(org.apache.poi.hslf.record.Record record : records) { + assertNotNull( record, "Found a record which was null" ); + assertTrue(record.getRecordType() >= 0); + } + + handlePOIDocument(slide); + + HSLFSlideShow ss = new HSLFSlideShow(slide); + handleSlideShow(ss); + } + + @Test + void testOne() throws Exception { + testOneFile(new File("test-data/slideshow/54880_chinese.ppt")); + } + + // a test-case to test all .ppt files without executing the full TestAllFiles + @Override + @Test + void test() throws Exception { + File[] files = new File("test-data/slideshow/").listFiles((dir, name) -> name.endsWith(".ppt")); + assertNotNull(files); + + System.out.println("Testing " + files.length + " files"); + + for(File file : files) { + try { + testOneFile(file); + } catch (Throwable e) { + LOGGER.atWarn().withThrowable(e).log("Failed to handle file {}", file); + } + } + } + + private void testOneFile(File file) throws Exception { + System.out.println(file); + + try (InputStream stream = new FileInputStream(file)) { + handleFile(stream, file.getPath()); + } + + handleExtracting(file); + } + + public static void main(String[] args) throws Exception { + try (InputStream stream = new FileInputStream(args[0])) { + new HSLFFileHandler().handleFile(stream, args[0]); + } + } +} diff --git a/integrationtest/src/test/java/org/apache/poi/stress/HSMFFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/HSMFFileHandler.java new file mode 100644 index 0000000000..8a21781bc4 --- /dev/null +++ b/integrationtest/src/test/java/org/apache/poi/stress/HSMFFileHandler.java @@ -0,0 +1,88 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; + +import org.apache.poi.hsmf.MAPIMessage; +import org.apache.poi.hsmf.datatypes.AttachmentChunks; +import org.apache.poi.hsmf.datatypes.DirectoryChunk; +import org.junit.jupiter.api.Test; + +class HSMFFileHandler extends POIFSFileHandler { + @Override + public void handleFile(InputStream stream, String path) throws Exception { + MAPIMessage mapi = new MAPIMessage(stream); + assertNotNull(mapi.getAttachmentFiles()); + assertNotNull(mapi.getDisplayBCC()); + assertNotNull(mapi.getMessageDate()); + + AttachmentChunks[] attachments = mapi.getAttachmentFiles(); + + for(AttachmentChunks attachment : attachments) { + + DirectoryChunk chunkDirectory = attachment.getAttachmentDirectory(); + if(chunkDirectory != null) { + MAPIMessage attachmentMSG = chunkDirectory.getAsEmbeddedMessage(); + assertNotNull(attachmentMSG); + String body = attachmentMSG.getTextBody(); + assertNotNull(body); + } + } + + /* => Writing isn't yet supported... + // write out the file + File file = TempFile.createTempFile("StressTest", ".msg"); + writeToFile(mapi, file); + + MAPIMessage read = new MAPIMessage(file.getAbsolutePath()); + assertNotNull(read.getAttachmentFiles()); + assertNotNull(read.getDisplayBCC()); + assertNotNull(read.getMessageDate()); + */ + + // writing is not yet supported... handlePOIDocument(mapi); + + mapi.close(); + } + +// private void writeToFile(MAPIMessage mapi, File file) +// throws FileNotFoundException, IOException { +// OutputStream stream = new FileOutputStream(file); +// try { +// mapi.write(stream); +// } finally { +// stream.close(); +// } +// } + + // a test-case to test this locally without executing the full TestAllFiles + @Override + @Test + void test() throws Exception { + File file = new File("test-data/hsmf/logsat.com_signatures_valid.msg"); + try (InputStream stream = new FileInputStream(file)) { + handleFile(stream, file.getPath()); + } + + handleExtracting(file); + } +}
\ No newline at end of file diff --git a/integrationtest/src/test/java/org/apache/poi/stress/HSSFFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/HSSFFileHandler.java new file mode 100644 index 0000000000..9ee54b4790 --- /dev/null +++ b/integrationtest/src/test/java/org/apache/poi/stress/HSSFFileHandler.java @@ -0,0 +1,127 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; +import java.io.PrintStream; +import java.util.HashSet; +import java.util.Set; + +import org.apache.poi.hssf.OldExcelFormatException; +import org.apache.poi.hssf.dev.BiffViewer; +import org.apache.poi.hssf.usermodel.HSSFOptimiser; +import org.apache.poi.hssf.usermodel.HSSFWorkbook; +import org.apache.poi.ss.usermodel.Cell; +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.util.NullPrintStream; +import org.junit.jupiter.api.Test; + +class HSSFFileHandler extends SpreadsheetHandler { + private final POIFSFileHandler delegate = new POIFSFileHandler(); + @Override + public void handleFile(InputStream stream, String path) throws Exception { + HSSFWorkbook wb = new HSSFWorkbook(stream); + handleWorkbook(wb); + + // TODO: some documents fail currently... + // Note - as of Bugzilla 48036 (svn r828244, r828247) POI is capable of evaluating + // IntersectionPtg. However it is still not capable of parsing it. + // So FormulaEvalTestData.xls now contains a few formulas that produce errors here. + //HSSFFormulaEvaluator evaluator = new HSSFFormulaEvaluator(wb); + //evaluator.evaluateAll(); + + delegate.handlePOIDocument(wb); + + // also try to see if some of the Records behave incorrectly + // TODO: still fails on some records... RecordsStresser.handleWorkbook(wb); + + HSSFOptimiser.optimiseCellStyles(wb); + for(Sheet sheet : wb) { + for (Row row : sheet) { + for (Cell cell : row) { + assertNotNull(cell.getCellStyle()); + } + } + } + + HSSFOptimiser.optimiseFonts(wb); + } + + private static final Set<String> EXPECTED_ADDITIONAL_FAILURES = new HashSet<>(); + static { + // encrypted + EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/35897-type4.xls"); + EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/xor-encryption-abc.xls"); + EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/password.xls"); + // broken files + EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/43493.xls"); + // TODO: ok to ignore? + EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/50833.xls"); + EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/51832.xls"); + EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/XRefCalc.xls"); + EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/61300.xls"); + } + + @Override + public void handleAdditional(File file) throws Exception { + // redirect stdout as the examples often write lots of text + PrintStream oldOut = System.out; + String fileWithParent = file.getParentFile().getName() + "/" + file.getName(); + try { + System.setOut(new NullPrintStream()); + + BiffViewer.main(new String[]{file.getAbsolutePath()}); + + assertFalse( EXPECTED_ADDITIONAL_FAILURES.contains(fileWithParent), "Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!" ); + } catch (OldExcelFormatException e) { + // old excel formats are not supported here + } catch (RuntimeException e) { + if(!EXPECTED_ADDITIONAL_FAILURES.contains(fileWithParent)) { + throw e; + } + } finally { + System.setOut(oldOut); + } + } + + // a test-case to test this locally without executing the full TestAllFiles + @Test + void test() throws Exception { + File file = new File("test-data/spreadsheet/49219.xls"); + + try (InputStream stream = new FileInputStream(file)) { + handleFile(stream, file.getPath()); + } + + handleExtracting(file); + + handleAdditional(file); + } + + // a test-case to test this locally without executing the full TestAllFiles + @Test + @SuppressWarnings("java:S2699") + void testExtractor() throws Exception { + handleExtracting(new File("test-data/spreadsheet/BOOK_in_capitals.xls")); + } +}
\ No newline at end of file diff --git a/integrationtest/src/test/java/org/apache/poi/stress/HWPFFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/HWPFFileHandler.java new file mode 100644 index 0000000000..da07a9b87b --- /dev/null +++ b/integrationtest/src/test/java/org/apache/poi/stress/HWPFFileHandler.java @@ -0,0 +1,72 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; +import java.util.List; + +import org.apache.poi.hwpf.HWPFDocument; +import org.apache.poi.hwpf.extractor.WordExtractor; +import org.apache.poi.hwpf.model.PicturesTable; +import org.apache.poi.hwpf.usermodel.Picture; +import org.junit.jupiter.api.Test; + +class HWPFFileHandler extends POIFSFileHandler { + @Override + public void handleFile(InputStream stream, String path) throws Exception { + HWPFDocument doc = new HWPFDocument(stream); + assertNotNull(doc.getBookmarks()); + assertNotNull(doc.getCharacterTable()); + assertNotNull(doc.getEndnotes()); + + PicturesTable picturesTable = doc.getPicturesTable(); + List<Picture> pictures = picturesTable.getAllPictures(); + assertNotNull(pictures); + + handlePOIDocument(doc); + } + + // a test-case to test this locally without executing the full TestAllFiles + @Override + @Test + @SuppressWarnings("java:S2699") + void test() throws Exception { + File file = new File("test-data/document/52117.doc"); + + try (InputStream stream = new FileInputStream(file)) { + handleFile(stream, file.getPath()); + } + + handleExtracting(file); + + try (FileInputStream stream = new FileInputStream(file); + WordExtractor extractor = new WordExtractor(stream)) { + assertNotNull(extractor.getText()); + } + } + + @Test + void testExtractingOld() { + File file = new File("test-data/document/52117.doc"); + assertDoesNotThrow(() -> handleExtracting(file)); + } +} diff --git a/integrationtest/src/test/java/org/apache/poi/stress/HeapDump.java b/integrationtest/src/test/java/org/apache/poi/stress/HeapDump.java new file mode 100644 index 0000000000..7928975f56 --- /dev/null +++ b/integrationtest/src/test/java/org/apache/poi/stress/HeapDump.java @@ -0,0 +1,97 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + +import java.io.IOException; +import java.lang.management.ManagementFactory; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; + +import com.sun.management.HotSpotDiagnosticMXBean; +import org.apache.poi.util.SuppressForbidden; + +@SuppressForbidden("class only exists for manual tests in XSSFFileHandler") +public class HeapDump { + // This is the name of the HotSpot Diagnostic MBean + private static final String HOTSPOT_BEAN_NAME = + "com.sun.management:type=HotSpotDiagnostic"; + + // field to store the hotspot diagnostic MBean + private static volatile HotSpotDiagnosticMXBean hotspotMBean; + + /** + * Call this method from your application whenever you + * want to dump the heap snapshot into a file. + * + * @param fileName name of the heap dump file + * @param live flag that tells whether to dump + * only the live objects + */ + public static void dumpHeap(String fileName, boolean live) throws IOException { + try { + if (isIbmVm()) { + dumpHeapJ9(fileName); + } else { + + // initialize hotspot diagnostic MBean + initHotspotMBean(); + dumpHeapHotSpot(fileName, live); + } + } catch (NoSuchMethodException | IllegalAccessException | InvocationTargetException | ClassNotFoundException e) { + throw new RuntimeException(e); + } + } + + // initialize the hotspot diagnostic MBean field + private static void initHotspotMBean() throws IOException { + if (hotspotMBean == null) { + synchronized (HeapDump.class) { + if (hotspotMBean == null) { + hotspotMBean = getHotspotMBean(); + } + } + } + } + + // get the hotspot diagnostic MBean from the platform MBean server + private static HotSpotDiagnosticMXBean getHotspotMBean() throws IOException { + return ManagementFactory.newPlatformMXBeanProxy(ManagementFactory.getPlatformMBeanServer(), + HOTSPOT_BEAN_NAME, HotSpotDiagnosticMXBean.class); + } + + private static boolean isIbmVm() { + try { + Class.forName("com.ibm.jvm.Dump"); + return true; + } catch (ClassNotFoundException e) { + return false; + } + } + + private static void dumpHeapJ9(String fileName) throws ClassNotFoundException, NoSuchMethodException, + InvocationTargetException, IllegalAccessException { + Class<?> dump = Class.forName("com.ibm.jvm.Dump"); + Method heapDumpToFile = dump.getMethod("heapDumpToFile", String.class); + heapDumpToFile.invoke(dump, fileName); + } + + private static void dumpHeapHotSpot(String fileName, boolean live) throws NoSuchMethodException, + InvocationTargetException, IllegalAccessException { + Method dumpHeap = hotspotMBean.getClass().getMethod("dumpHeap", String.class, boolean.class); + dumpHeap.invoke(hotspotMBean, fileName, live); + } +} diff --git a/integrationtest/src/test/java/org/apache/poi/stress/OPCFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/OPCFileHandler.java new file mode 100644 index 0000000000..c4f5485806 --- /dev/null +++ b/integrationtest/src/test/java/org/apache/poi/stress/OPCFileHandler.java @@ -0,0 +1,73 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; +import java.io.PushbackInputStream; + +import org.apache.poi.openxml4j.opc.ContentTypes; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.opc.PackagePart; +import org.apache.poi.xwpf.usermodel.XWPFRelation; +import org.junit.jupiter.api.Test; + +class OPCFileHandler extends AbstractFileHandler { + @Override + public void handleFile(InputStream stream, String path) throws Exception { + // ignore password protected files + if (POIXMLDocumentHandler.isEncrypted(stream)) return; + + OPCPackage p = OPCPackage.open(stream); + + for (PackagePart part : p.getParts()) { + if (part.getPartName().toString().equals("/docProps/core.xml")) { + assertEquals(ContentTypes.CORE_PROPERTIES_PART, part.getContentType()); + } + if (part.getPartName().toString().equals("/word/document.xml")) { + assertTrue( XWPFRelation.DOCUMENT.getContentType().equals(part.getContentType()) || + XWPFRelation.MACRO_DOCUMENT.getContentType().equals(part.getContentType()) || + XWPFRelation.TEMPLATE.getContentType().equals(part.getContentType()), "Expected one of " + XWPFRelation.MACRO_DOCUMENT + ", " + XWPFRelation.DOCUMENT + ", " + XWPFRelation.TEMPLATE + + ", but had " + part.getContentType() ); + } + if (part.getPartName().toString().equals("/word/theme/theme1.xml")) { + assertEquals(XWPFRelation.THEME.getContentType(), part.getContentType()); + } + } + } + + @Override + public void handleExtracting(File file) { + // text-extraction is not possible currently for these types of files + } + + // a test-case to test this locally without executing the full TestAllFiles + @Test + void test() throws Exception { + File file = new File("test-data/diagram/test.vsdx"); + + try (InputStream stream = new PushbackInputStream(new FileInputStream(file), 100000)) { + handleFile(stream, file.getPath()); + } + + handleExtracting(file); + } +} diff --git a/integrationtest/src/test/java/org/apache/poi/stress/OWPFFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/OWPFFileHandler.java new file mode 100644 index 0000000000..7ad20d0585 --- /dev/null +++ b/integrationtest/src/test/java/org/apache/poi/stress/OWPFFileHandler.java @@ -0,0 +1,65 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; + +import org.apache.poi.hwpf.HWPFOldDocument; +import org.apache.poi.hwpf.extractor.WordExtractor; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.junit.jupiter.api.Test; + +public class OWPFFileHandler extends POIFSFileHandler { + @Override + public void handleFile(InputStream stream, String path) throws Exception { + try (POIFSFileSystem poifs = new POIFSFileSystem(stream)) { + HWPFOldDocument doc = new HWPFOldDocument(poifs); + assertNotNull(doc.getOldFontTable()); + assertNotNull(doc.getCharacterTable()); + } + } + + // a test-case to test this locally without executing the full TestAllFiles + @Override + @Test + @SuppressWarnings("java:S2699") + public void test() throws Exception { + File file = new File("test-data/document/52117.doc"); + + try (InputStream stream = new FileInputStream(file)) { + handleFile(stream, file.getPath()); + } + + handleExtracting(file); + + try (FileInputStream stream = new FileInputStream(file); + WordExtractor extractor = new WordExtractor(stream)) { + assertNotNull(extractor.getText()); + } + } + + @Test + public void testExtractingOld() { + File file = new File("test-data/document/52117.doc"); + assertDoesNotThrow(() -> handleExtracting(file)); + } +} diff --git a/integrationtest/src/test/java/org/apache/poi/stress/POIFSFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/POIFSFileHandler.java new file mode 100644 index 0000000000..b92f91f5f9 --- /dev/null +++ b/integrationtest/src/test/java/org/apache/poi/stress/POIFSFileHandler.java @@ -0,0 +1,81 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; + +import org.apache.poi.POIDocument; +import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.junit.jupiter.api.Test; + +class POIFSFileHandler extends AbstractFileHandler { + + @Override + public void handleFile(InputStream stream, String path) throws Exception { + try (POIFSFileSystem fs = new POIFSFileSystem(stream)) { + handlePOIFSFileSystem(fs); + handleHPSFProperties(fs); + } + } + + private void handleHPSFProperties(POIFSFileSystem fs) throws IOException { + try (HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs)) { + // can be null + ext.getDocSummaryInformation(); + ext.getSummaryInformation(); + + assertNotNull(ext.getDocumentSummaryInformationText()); + assertNotNull(ext.getSummaryInformationText()); + assertNotNull(ext.getText()); + } + } + + private void handlePOIFSFileSystem(POIFSFileSystem fs) { + assertNotNull(fs); + assertNotNull(fs.getRoot()); + } + + protected void handlePOIDocument(POIDocument doc) throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + doc.write(out); + + ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray()); + POIFSFileSystem fs = new POIFSFileSystem(in); + handlePOIFSFileSystem(fs); + fs.close(); + } + + // a test-case to test this locally without executing the full TestAllFiles + @Test + void test() throws Exception { + File file = new File("test-data/poifs/Notes.ole2"); + + try (InputStream stream = new FileInputStream(file)) { + handleFile(stream, file.getPath()); + } + + //handleExtracting(file); + } +} diff --git a/integrationtest/src/test/java/org/apache/poi/stress/POIXMLDocumentHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/POIXMLDocumentHandler.java new file mode 100644 index 0000000000..930c904a77 --- /dev/null +++ b/integrationtest/src/test/java/org/apache/poi/stress/POIXMLDocumentHandler.java @@ -0,0 +1,72 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import java.io.IOException; +import java.io.InputStream; + +import org.apache.poi.ooxml.POIXMLDocument; +import org.apache.poi.poifs.crypt.Decryptor; +import org.apache.poi.poifs.filesystem.FileMagic; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.apache.xmlbeans.XmlCursor; +import org.apache.xmlbeans.XmlObject; + +public final class POIXMLDocumentHandler { + protected void handlePOIXMLDocument(POIXMLDocument doc) throws Exception { + assertNotNull(doc.getAllEmbeddedParts()); + assertNotNull(doc.getPackage()); + assertNotNull(doc.getPackagePart()); + assertNotNull(doc.getProperties()); + assertNotNull(doc.getRelations()); + } + + protected static boolean isEncrypted(InputStream stream) throws IOException { + if (FileMagic.valueOf(stream) == FileMagic.OLE2) { + try (POIFSFileSystem poifs = new POIFSFileSystem(stream)) { + if (poifs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) { + return true; + } + } + throw new IOException("Wrong file format or file extension for OO XML file"); + } + return false; + } + + /** + * Recurse through the document and convert all elements so they are available in the ooxml-lite jar. + * This method only makes sense for hierarchical documents like .docx. + * If the document is split up in different parts like in .pptx, each part needs to be provided. + * + * @param base the entry point + */ + protected static void cursorRecursive(XmlObject base) { + XmlCursor cur = base.newCursor(); + try { + if (!cur.toFirstChild()) { + return; + } + do { + cursorRecursive(cur.getObject()); + } while (cur.toNextSibling()); + } finally { + cur.dispose(); + } + } +} diff --git a/integrationtest/src/test/java/org/apache/poi/stress/SlideShowHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/SlideShowHandler.java new file mode 100644 index 0000000000..9a1defbf21 --- /dev/null +++ b/integrationtest/src/test/java/org/apache/poi/stress/SlideShowHandler.java @@ -0,0 +1,168 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.awt.Dimension; +import java.awt.Graphics2D; +import java.awt.RenderingHints; +import java.awt.image.BufferedImage; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.lang.ref.WeakReference; + +import org.apache.poi.sl.draw.Drawable; +import org.apache.poi.sl.usermodel.GroupShape; +import org.apache.poi.sl.usermodel.Notes; +import org.apache.poi.sl.usermodel.PictureData; +import org.apache.poi.sl.usermodel.Shape; +import org.apache.poi.sl.usermodel.SimpleShape; +import org.apache.poi.sl.usermodel.Slide; +import org.apache.poi.sl.usermodel.SlideShow; +import org.apache.poi.sl.usermodel.SlideShowFactory; +import org.apache.poi.sl.usermodel.TextParagraph; +import org.apache.poi.sl.usermodel.TextRun; +import org.apache.poi.sl.usermodel.TextShape; +import org.junit.jupiter.api.Assumptions; +import org.junit.platform.commons.util.ExceptionUtils; + +public abstract class SlideShowHandler extends POIFSFileHandler { + public void handleSlideShow(SlideShow<?,?> ss) throws IOException { + renderSlides(ss); + + readContent(ss); + readPictures(ss); + + // write out the file + ByteArrayOutputStream out = writeToArray(ss); + + readContent(ss); + + // read in the written file + try (SlideShow<?, ?> read = SlideShowFactory.create(new ByteArrayInputStream(out.toByteArray()))) { + assertNotNull(read); + readContent(read); + } + } + + private ByteArrayOutputStream writeToArray(SlideShow<?,?> ss) throws IOException { + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + try { + ss.write(stream); + } finally { + stream.close(); + } + + return stream; + } + + + private void readContent(SlideShow<?,?> ss) { + for (Slide<?,?> s : ss.getSlides()) { + s.getTitle(); + + for (Shape<?,?> shape : s) { + readShapes(shape); + } + + Notes<?, ?> notes = s.getNotes(); + if(notes != null) { + for (Shape<?, ?> shape : notes) { + readShapes(shape); + } + } + + for (Shape<?,?> shape : s.getMasterSheet()) { + readShapes(shape); + } + } + } + + private void readShapes(Shape<?,?> s) { + // recursively walk group-shapes + if(s instanceof GroupShape) { + GroupShape<? extends Shape<?,?>, ?> shapes = (GroupShape<? extends Shape<?,?>, ?>) s; + for (Shape<? extends Shape<?,?>, ?> shape : shapes) { + readShapes(shape); + } + } + + if(s instanceof SimpleShape) { + SimpleShape<?, ?> simpleShape = (SimpleShape<?, ?>) s; + + simpleShape.getFillColor(); + simpleShape.getFillStyle(); + simpleShape.getStrokeStyle(); + simpleShape.getLineDecoration(); + } + + readText(s); + } + + private void readText(Shape<?,?> s) { + if (s instanceof TextShape) { + for (TextParagraph<?,?,?> tp : (TextShape<?,?>)s) { + for (TextRun tr : tp) { + tr.getRawText(); + } + } + } + } + + private void readPictures(SlideShow<?,?> ss) { + for (PictureData pd : ss.getPictureData()) { + Dimension dim = pd.getImageDimension(); + assertTrue( dim.getHeight() >= 0, "Expecting a valid height, but had an image with height: " + dim.getHeight() ); + assertTrue( dim.getWidth() >= 0, "Expecting a valid width, but had an image with width: " + dim.getWidth() ); + } + } + + private void renderSlides(SlideShow<?,?> ss) { + Dimension pgSize = ss.getPageSize(); + + for (Slide<?,?> s : ss.getSlides()) { + BufferedImage img = new BufferedImage(pgSize.width, pgSize.height, BufferedImage.TYPE_INT_ARGB); + Graphics2D graphics = img.createGraphics(); + + // default rendering options + graphics.setRenderingHint(RenderingHints.KEY_ANTIALIASING, RenderingHints.VALUE_ANTIALIAS_ON); + graphics.setRenderingHint(RenderingHints.KEY_RENDERING, RenderingHints.VALUE_RENDER_QUALITY); + graphics.setRenderingHint(RenderingHints.KEY_INTERPOLATION, RenderingHints.VALUE_INTERPOLATION_BICUBIC); + graphics.setRenderingHint(RenderingHints.KEY_FRACTIONALMETRICS, RenderingHints.VALUE_FRACTIONALMETRICS_ON); + graphics.setRenderingHint(Drawable.BUFFERED_IMAGE, new WeakReference<>(img)); + + try { + // draw stuff + s.draw(graphics); + } catch (ArrayIndexOutOfBoundsException e) { + // We saw exceptions with JDK 8 on Windows in the Jenkins CI which + // seem to only be triggered by some font (maybe Calibri?!) + // We cannot avoid this, so let's try to not make the tests fail in this case + if (!"-1".equals(e.getMessage()) || + !ExceptionUtils.readStackTrace(e).contains("ExtendedTextSourceLabel.getJustificationInfos")) { + throw e; + } + } + + graphics.dispose(); + img.flush(); + } + } +}
\ No newline at end of file diff --git a/integrationtest/src/test/java/org/apache/poi/stress/SpreadsheetHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/SpreadsheetHandler.java new file mode 100644 index 0000000000..dcee603bf2 --- /dev/null +++ b/integrationtest/src/test/java/org/apache/poi/stress/SpreadsheetHandler.java @@ -0,0 +1,157 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; + +import org.apache.poi.ss.extractor.EmbeddedData; +import org.apache.poi.ss.extractor.EmbeddedExtractor; +import org.apache.poi.ss.usermodel.Cell; +import org.apache.poi.ss.usermodel.Name; +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.ss.usermodel.Workbook; +import org.apache.poi.ss.usermodel.WorkbookFactory; +import org.apache.poi.util.RecordFormatException; +import org.apache.poi.xssf.usermodel.XSSFChartSheet; + +public abstract class SpreadsheetHandler extends AbstractFileHandler { + public void handleWorkbook(Workbook wb) throws IOException { + // try to access some of the content + readContent(wb); + + // write out the file + writeToArray(wb); + + // access some more content (we had cases where writing corrupts the data in memory) + readContent(wb); + + // write once more + ByteArrayOutputStream out = writeToArray(wb); + + // read in the written file + Workbook read = WorkbookFactory.create(new ByteArrayInputStream(out.toByteArray())); + + assertNotNull(read); + + readContent(read); + + extractEmbedded(read); + + modifyContent(read); + + read.close(); + } + + private ByteArrayOutputStream writeToArray(Workbook wb) throws IOException { + ByteArrayOutputStream stream = new ByteArrayOutputStream(); + try { + wb.write(stream); + } finally { + stream.close(); + } + + return stream; + } + + private void readContent(Workbook wb) { + for(int i = 0;i < wb.getNumberOfSheets();i++) { + Sheet sheet = wb.getSheetAt(i); + assertNotNull(wb.getSheet(sheet.getSheetName())); + sheet.groupColumn((short) 4, (short) 5); + sheet.setColumnGroupCollapsed(4, true); + sheet.setColumnGroupCollapsed(4, false); + + // don't do this for very large sheets as it will take a long time + if(sheet.getPhysicalNumberOfRows() > 1000) { + continue; + } + + for(Row row : sheet) { + for(Cell cell : row) { + assertNotNull(cell.toString()); + } + } + } + + for (Name name : wb.getAllNames()) { + // this sometimes caused exceptions + if(!name.isFunctionName()) { + name.getRefersToFormula(); + } + } + } + + private void extractEmbedded(Workbook wb) throws IOException { + EmbeddedExtractor ee = new EmbeddedExtractor(); + + for (Sheet s : wb) { + for (EmbeddedData ed : ee.extractAll(s)) { + assertNotNull(ed.getFilename()); + assertNotNull(ed.getEmbeddedData()); + assertNotNull(ed.getShape()); + } + } + } + + private void modifyContent(Workbook wb) { + /* a number of file fail because of various things: udf, unimplemented functions, ... + we would need quite a list of excludes and the large regression tests would probably + take a lot longer to run... + try { + // try to re-compute all formulas to find cases where parsing fails + wb.getCreationHelper().createFormulaEvaluator().evaluateAll(); + } catch (RuntimeException e) { + // only allow a specific exception which indicates that an external + // reference was not found + if(!e.getMessage().contains("Could not resolve external workbook name")) { + throw e; + } + + }*/ + + for (int i=wb.getNumberOfSheets()-1; i>=0; i--) { + if(wb.getSheetAt(i) instanceof XSSFChartSheet) { + // clone for chart-sheets is not supported + continue; + } + + try { + wb.cloneSheet(i); + } catch (RecordFormatException e) { + if (e.getCause() instanceof CloneNotSupportedException) { + // ignore me + continue; + } + throw e; + } catch (RuntimeException e) { + if ("Could not find 'internal references' EXTERNALBOOK".equals(e.getMessage()) || + "CountryRecord not found".equals(e.getMessage()) || + "CountryRecord or SSTRecord not found".equals(e.getMessage()) || + "Cannot add more than 65535 shapes".equals(e.getMessage()) ) { + // ignore these here for now + continue; + } + throw e; + } + } + } +}
\ No newline at end of file diff --git a/integrationtest/src/test/java/org/apache/poi/stress/StressMap.java b/integrationtest/src/test/java/org/apache/poi/stress/StressMap.java new file mode 100644 index 0000000000..f69fbfbf4f --- /dev/null +++ b/integrationtest/src/test/java/org/apache/poi/stress/StressMap.java @@ -0,0 +1,153 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + +import java.io.File; +import java.io.IOException; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.function.BiConsumer; +import java.util.stream.Collectors; +import java.util.stream.Stream; +import java.util.stream.StreamSupport; + +import org.apache.commons.collections4.MultiValuedMap; +import org.apache.commons.collections4.multimap.ArrayListValuedHashMap; +import org.apache.poi.ss.usermodel.Cell; +import org.apache.poi.ss.usermodel.CellType; +import org.apache.poi.ss.usermodel.Row; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.ss.usermodel.Workbook; +import org.apache.poi.ss.usermodel.WorkbookFactory; + +public class StressMap { + private final MultiValuedMap<String, ExcInfo> exMap = new ArrayListValuedHashMap<>(); + private final Map<String,String> handlerMap = new LinkedHashMap<>(); + + + public void load(File mapFile) throws IOException { + try (Workbook wb = WorkbookFactory.create(mapFile)) { + readExMap(wb.getSheet("Exceptions")); + readHandlerMap(wb.getSheet("Handlers")); + } + } + + public List<FileHandlerKnown> getHandler(String file) { + // ... failures/handlers lookup doesn't work on windows otherwise + final String uniFile = file.replace('\\', '/'); + + String firstHandler = handlerMap.entrySet().stream() + .filter(me -> uniFile.endsWith(me.getKey())) + .map(Map.Entry::getValue).findFirst().orElse("NULL"); + + return Stream.of(firstHandler, secondHandler(firstHandler)) + .filter(h -> !"NULL".equals(h)) + .map(FileHandlerKnown::valueOf) + .collect(Collectors.toList()); + } + + public ExcInfo getExcInfo(String file, String testName, FileHandlerKnown handler) { + // ... failures/handlers lookup doesn't work on windows otherwise + final String uniFile = file.replace('\\', '/'); + + return exMap.get(uniFile).stream() + .filter(e -> e.isMatch(testName, handler.name())) + .findFirst().orElse(null); + } + + public void readHandlerMap(Sheet sh) { + if (sh == null) { + return; + } + + handlerMap.clear(); + + boolean IGNORE_SCRATCHPAD = Boolean.getBoolean("scratchpad.ignore"); + boolean isFirst = true; + for (Row row : sh) { + if (isFirst) { + isFirst = false; + continue; + } + Cell cell = row.getCell(2); + if (IGNORE_SCRATCHPAD || cell == null || cell.getCellType() != CellType.STRING) { + cell = row.getCell(1); + } + handlerMap.put(row.getCell(0).getStringCellValue(), cell.getStringCellValue()); + } + } + + + public void readExMap(Sheet sh) { + if (sh == null) { + return; + } + + exMap.clear(); + + Iterator<Row> iter = sh.iterator(); + List<BiConsumer<ExcInfo,String>> cols = initCols(iter.next()); + + while (iter.hasNext()) { + ExcInfo info = new ExcInfo(); + for (Cell cell : iter.next()) { + if (cell.getCellType() == CellType.STRING) { + cols.get(cell.getColumnIndex()).accept(info, cell.getStringCellValue()); + } + } + exMap.put(info.getFile(), info); + } + } + + private static List<BiConsumer<ExcInfo,String>> initCols(Row row) { + Map<String,BiConsumer<ExcInfo,String>> m = new HashMap<>(); + m.put("File", ExcInfo::setFile); + m.put("Tests", ExcInfo::setTests); + m.put("Handler", ExcInfo::setHandler); + m.put("Password", ExcInfo::setPassword); + m.put("Exception Class", ExcInfo::setExClazz); + m.put("Exception Message", ExcInfo::setExMessage); + + return StreamSupport + .stream(row.spliterator(), false) + .map(Cell::getStringCellValue) + .map(v -> m.getOrDefault(v, (e,s) -> {})) + .collect(Collectors.toList()); + } + + private static String secondHandler(String handlerStr) { + switch (handlerStr) { + case "XSSF": + case "XWPF": + case "XSLF": + case "XDGF": + return "OPC"; + case "HSSF": + case "HWPF": + case "HSLF": + case "HDGF": + case "HSMF": + case "HBPF": + return "HPSF"; + default: + return "NULL"; + } + } +} diff --git a/integrationtest/src/test/java/org/apache/poi/stress/TestAllFiles.java b/integrationtest/src/test/java/org/apache/poi/stress/TestAllFiles.java new file mode 100644 index 0000000000..55777d12ee --- /dev/null +++ b/integrationtest/src/test/java/org/apache/poi/stress/TestAllFiles.java @@ -0,0 +1,201 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + + +import static org.junit.jupiter.api.Assertions.assertDoesNotThrow; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assertions.fail; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.ArrayList; +import java.util.List; +import java.util.stream.Stream; + +import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey; +import org.apache.tools.ant.DirectoryScanner; +import org.junit.jupiter.api.function.Executable; +import org.junit.jupiter.api.parallel.Execution; +import org.junit.jupiter.api.parallel.ExecutionMode; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.Arguments; +import org.junit.jupiter.params.provider.MethodSource; +import org.opentest4j.AssertionFailedError; + +/** + * This is an integration test which performs various actions on all stored test-files and tries + * to reveal problems which are introduced, but not covered (yet) by unit tests. + * + * This test looks for any file under the test-data directory and tries to do some useful + * processing with it based on it's type. + * + * The test is implemented as a junit {@link ParameterizedTest} test, which leads + * to one test-method call for each file (currently around 950 files are handled). + * + * There is a a mapping of extension to implementations of the interface + * {@link FileHandler} which defines how the file is loaded and which actions are + * tried with the file. + * + * The test can be expanded by adding more actions to the FileHandlers, this automatically + * applies the action to any such file in our test-data repository. + * + * There is also a list of files that should actually fail. + * + * Note: It is also a test-failure if a file that is expected to fail now actually works, + * i.e. if a bug was fixed in POI itself, the file should be removed from the expected-failures + * here as well! This is to ensure that files that should not work really do not work, e.g. + * that we do not remove expected sanity checks. + */ +// also need to set JVM parameter: -Djunit.jupiter.execution.parallel.enabled=true +@Execution(ExecutionMode.CONCURRENT) +public class TestAllFiles { + private static final String DEFAULT_TEST_DATA_PATH = "test-data"; + public static final File ROOT_DIR = new File(System.getProperty("POI.testdata.path", DEFAULT_TEST_DATA_PATH)); + + public static final String[] SCAN_EXCLUDES = { + "**/.svn/**", + "lost+found", + "**/.git/**", + }; + + public static Stream<Arguments> allfiles(String testName) throws IOException { + StressMap sm = new StressMap(); + sm.load(new File(ROOT_DIR, "spreadsheet/stress.xls")); + + DirectoryScanner scanner = new DirectoryScanner(); + scanner.setBasedir(ROOT_DIR); + scanner.setExcludes(SCAN_EXCLUDES); + + scanner.scan(); + + final List<Arguments> result = new ArrayList<>(100); + for (String file : scanner.getIncludedFiles()) { + for (FileHandlerKnown handler : sm.getHandler(file)) { + ExcInfo info1 = sm.getExcInfo(file, testName, handler); + if (info1 == null || info1.isValid(testName, handler.name())) { + result.add(Arguments.of( + file, + handler, + (info1 != null) ? info1.getPassword() : null, + (info1 != null) ? info1.getExClazz() : null, + (info1 != null) ? info1.getExMessage() : null + )); + } + } + } + + return result.stream(); + } + + public static Stream<Arguments> extractFiles() throws IOException { + return allfiles("extract"); + } + + @ParameterizedTest(name = "#{index} {0} {1}") + @MethodSource("extractFiles") + void handleExtracting(String file, FileHandlerKnown handler, String password, Class<? extends Throwable> exClass, String exMessage) throws IOException { + System.out.println("Running extractFiles on "+file); + FileHandler fileHandler = handler.fileHandler.get(); + assertNotNull(fileHandler, "Did not find a handler for file " + file); + Executable exec = () -> fileHandler.handleExtracting(new File(ROOT_DIR, file)); + verify(file, exec, exClass, exMessage, password); + } + + public static Stream<Arguments> handleFiles() throws IOException { + return allfiles("handle"); + } + + @ParameterizedTest(name = "#{index} {0} {1}") + @MethodSource("handleFiles") + void handleFile(String file, FileHandlerKnown handler, String password, Class<? extends Throwable> exClass, String exMessage) throws IOException { + System.out.println("Running handleFiles on "+file); + FileHandler fileHandler = handler.fileHandler.get(); + assertNotNull(fileHandler, "Did not find a handler for file " + file); + try (InputStream stream = new BufferedInputStream(new FileInputStream(new File(ROOT_DIR, file)), 64 * 1024)) { + Executable exec = () -> fileHandler.handleFile(stream, file); + verify(file, exec, exClass, exMessage, password); + } + } + + public static Stream<Arguments> handleAdditionals() throws IOException { + return allfiles("additional"); + } + + @ParameterizedTest(name = "#{index} {0} {1}") + @MethodSource("handleAdditionals") + void handleAdditional(String file, FileHandlerKnown handler, String password, Class<? extends Throwable> exClass, String exMessage) { + System.out.println("Running additionals on "+file); + FileHandler fileHandler = handler.fileHandler.get(); + assertNotNull(fileHandler, "Did not find a handler for file " + file); + Executable exec = () -> fileHandler.handleAdditional(new File(ROOT_DIR, file)); + verify(file, exec, exClass, exMessage, password); + } + + @SuppressWarnings("unchecked") + private static void verify(String file, Executable exec, Class<? extends Throwable> exClass, String exMessage, String password) { + final String errPrefix = file + " - failed. "; + // this also removes the password for non encrypted files + Biff8EncryptionKey.setCurrentUserPassword(password); + if (exClass != null && AssertionFailedError.class.isAssignableFrom(exClass)) { + try { + exec.execute(); + fail(errPrefix + "Expected failed assertion"); + } catch (AssertionFailedError e) { + String actMsg = pathReplace(e.getMessage()); + assertEquals(exMessage, actMsg, errPrefix); + } catch (Throwable e) { + fail(errPrefix + "Unexpected exception", e); + } + } else if (exClass != null) { + Exception e = assertThrows((Class<? extends Exception>)exClass, exec); + String actMsg = pathReplace(e.getMessage()); + if (NullPointerException.class.isAssignableFrom(exClass)) { + if (actMsg != null) { + assertTrue(actMsg.contains(exMessage), errPrefix + "Message: "+actMsg+" - didn't contain: "+exMessage); + } + } else { + assertNotNull(actMsg, errPrefix); + assertTrue(actMsg.contains(exMessage), errPrefix + "Message: "+actMsg+" - didn't contain: "+exMessage); + } + } else { + assertDoesNotThrow(exec, errPrefix); + } + } + + private static String pathReplace(String msg) { + if (msg == null) return null; + + // Windows path replacement + msg = msg.replace('\\', '/'); + + // Adjust file paths to remove unwanted file path info. + int filePathIndex = msg.indexOf(ROOT_DIR.toString()); + if (filePathIndex > 0) { + int testDataDirectoryIndex = msg.indexOf(DEFAULT_TEST_DATA_PATH); + msg = msg.substring(0, filePathIndex) + msg.substring(testDataDirectoryIndex); + } + + return msg; + } +} diff --git a/integrationtest/src/test/java/org/apache/poi/stress/XDGFFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/XDGFFileHandler.java new file mode 100644 index 0000000000..d588e4b496 --- /dev/null +++ b/integrationtest/src/test/java/org/apache/poi/stress/XDGFFileHandler.java @@ -0,0 +1,45 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + +import java.io.InputStream; + +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.opc.PackageAccess; +import org.apache.poi.xdgf.usermodel.XmlVisioDocument; +import org.junit.jupiter.api.Test; + +class XDGFFileHandler extends AbstractFileHandler { + @Override + public void handleFile(InputStream stream, String path) throws Exception { + // ignore password protected files + if (POIXMLDocumentHandler.isEncrypted(stream)) return; + + XmlVisioDocument doc = new XmlVisioDocument(stream); + new POIXMLDocumentHandler().handlePOIXMLDocument(doc); + } + + // a test-case to test this locally without executing the full TestAllFiles + @Test + @SuppressWarnings("java:S2699") + void test() throws Exception { + try (OPCPackage pkg = OPCPackage.open("test-data/diagram/test.vsdx", PackageAccess.READ)) { + XmlVisioDocument doc = new XmlVisioDocument(pkg); + new POIXMLDocumentHandler().handlePOIXMLDocument(doc); + } + } +}
\ No newline at end of file diff --git a/integrationtest/src/test/java/org/apache/poi/stress/XSLFFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/XSLFFileHandler.java new file mode 100644 index 0000000000..7a5aebd4c4 --- /dev/null +++ b/integrationtest/src/test/java/org/apache/poi/stress/XSLFFileHandler.java @@ -0,0 +1,85 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertNotNull; + +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; + +import org.apache.poi.extractor.ExtractorFactory; +import org.apache.poi.ooxml.POIXMLException; +import org.apache.poi.sl.extractor.SlideShowExtractor; +import org.apache.poi.xslf.usermodel.XMLSlideShow; +import org.apache.poi.xslf.usermodel.XSLFSlideShow; +import org.junit.jupiter.api.Test; + +class XSLFFileHandler extends SlideShowHandler { + @Override + public void handleFile(InputStream stream, String path) throws Exception { + try (XMLSlideShow slide = new XMLSlideShow(stream); + XSLFSlideShow slideInner = new XSLFSlideShow(slide.getPackage())) { + ; + assertNotNull(slideInner.getPresentation()); + assertNotNull(slideInner.getSlideMasterReferences()); + assertNotNull(slideInner.getSlideReferences()); + + new POIXMLDocumentHandler().handlePOIXMLDocument(slide); + + handleSlideShow(slide); + } catch (POIXMLException e) { + Exception cause = (Exception)e.getCause(); + throw cause == null ? e : cause; + } + } + + @Override + public void handleExtracting(File file) throws Exception { + super.handleExtracting(file); + + + // additionally try the other getText() methods + try (SlideShowExtractor<?,?> extractor = (SlideShowExtractor<?, ?>) ExtractorFactory.createExtractor(file)) { + assertNotNull(extractor); + extractor.setSlidesByDefault(true); + extractor.setNotesByDefault(true); + extractor.setMasterByDefault(true); + + assertNotNull(extractor.getText()); + + extractor.setSlidesByDefault(false); + extractor.setNotesByDefault(false); + extractor.setMasterByDefault(false); + + assertEquals("", extractor.getText(), "With all options disabled we should not get text"); + } + } + + // a test-case to test this locally without executing the full TestAllFiles + @Override + @Test + void test() throws Exception { + File file = new File("test-data/slideshow/ca.ubc.cs.people_~emhill_presentations_HowWeRefactor.pptx"); + try (InputStream stream = new FileInputStream(file)) { + handleFile(stream, file.getPath()); + } + + handleExtracting(file); + } +} diff --git a/integrationtest/src/test/java/org/apache/poi/stress/XSSFBFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/XSSFBFileHandler.java new file mode 100644 index 0000000000..1bc23c49ed --- /dev/null +++ b/integrationtest/src/test/java/org/apache/poi/stress/XSSFBFileHandler.java @@ -0,0 +1,96 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; + +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.opc.PackageAccess; +import org.apache.poi.util.IOUtils; +import org.apache.poi.xssf.XLSBUnsupportedException; +import org.apache.poi.xssf.extractor.XSSFBEventBasedExcelExtractor; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.junit.jupiter.api.Test; + +class XSSFBFileHandler extends AbstractFileHandler { + + static { + //add expected failures here: + AbstractFileHandler.EXPECTED_EXTRACTOR_FAILURES.add("spreadsheet/protected_passtika.xlsb"); + } + + @Override + public void handleFile(InputStream stream, String path) throws Exception { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IOUtils.copy(stream, out); + + final byte[] bytes = out.toByteArray(); + try (OPCPackage opcPackage = OPCPackage.open(new ByteArrayInputStream(bytes))) { + testOne(opcPackage); + } + + testNotHandledByWorkbookException(OPCPackage.open(new ByteArrayInputStream(bytes))); + } + + private void testNotHandledByWorkbookException(OPCPackage pkg) throws IOException { + try { + new XSSFWorkbook(pkg).close(); + } catch (XLSBUnsupportedException e) { + //this is what we'd expect + //swallow + } + } + + @Override + public void handleExtracting(File file) throws Exception { + OPCPackage pkg = OPCPackage.open(file, PackageAccess.READ); + try { + testOne(pkg); + } finally { + pkg.close(); + } + + pkg = OPCPackage.open(file, PackageAccess.READ); + try { + testNotHandledByWorkbookException(pkg); + } finally { + pkg.close(); + } + } + + private void testOne(OPCPackage pkg) throws Exception { + XSSFBEventBasedExcelExtractor ex = new XSSFBEventBasedExcelExtractor(pkg); + String txt = ex.getText(); + if (txt.length() < 1) { + throw new RuntimeException("Should have gotten some text."); + } + } + + @Test + void testLocal() throws Exception { + File file = new File("test-data/spreadsheet/Simple.xlsb"); + try (FileInputStream stream = new FileInputStream(file)) { + handleFile(stream, file.getPath()); + } + handleExtracting(file); + } +} diff --git a/integrationtest/src/test/java/org/apache/poi/stress/XSSFFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/XSSFFileHandler.java new file mode 100644 index 0000000000..beb3a8edf8 --- /dev/null +++ b/integrationtest/src/test/java/org/apache/poi/stress/XSSFFileHandler.java @@ -0,0 +1,234 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertTrue; +import static org.junit.jupiter.api.Assumptions.assumeFalse; + +import java.io.BufferedInputStream; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.PrintStream; +import java.util.HashSet; +import java.util.Iterator; +import java.util.Set; + +import javax.xml.transform.TransformerException; + +import org.apache.poi.EncryptedDocumentException; +import org.apache.poi.examples.ss.ExcelComparator; +import org.apache.poi.examples.xssf.eventusermodel.FromHowTo; +import org.apache.poi.examples.xssf.eventusermodel.XLSX2CSV; +import org.apache.poi.hssf.record.crypto.Biff8EncryptionKey; +import org.apache.poi.ooxml.POIXMLException; +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.openxml4j.exceptions.OLE2NotOfficeXmlFileException; +import org.apache.poi.openxml4j.exceptions.OpenXML4JException; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.poifs.crypt.Decryptor; +import org.apache.poi.poifs.crypt.EncryptionInfo; +import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.apache.poi.ss.usermodel.Sheet; +import org.apache.poi.util.IOUtils; +import org.apache.poi.util.NullPrintStream; +import org.apache.poi.xssf.eventusermodel.XSSFReader; +import org.apache.poi.xssf.extractor.XSSFExportToXml; +import org.apache.poi.xssf.usermodel.XSSFMap; +import org.apache.poi.xssf.usermodel.XSSFSheet; +import org.apache.poi.xssf.usermodel.XSSFWorkbook; +import org.junit.jupiter.api.Test; +import org.xml.sax.SAXException; + +class XSSFFileHandler extends SpreadsheetHandler { + @Override + public void handleFile(InputStream stream, String path) throws Exception { + // ignore password protected files if password is unknown + String pass = Biff8EncryptionKey.getCurrentUserPassword(); + assumeFalse(pass == null && POIXMLDocumentHandler.isEncrypted(stream)); + + final XSSFWorkbook wb; + + // make sure the potentially large byte-array is freed up quickly again + { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + IOUtils.copy(stream, out); + ByteArrayInputStream bytes = new ByteArrayInputStream(out.toByteArray()); + + if (pass != null) { + POIFSFileSystem poifs = new POIFSFileSystem(bytes); + EncryptionInfo ei = new EncryptionInfo(poifs); + Decryptor dec = ei.getDecryptor(); + try { + boolean b = dec.verifyPassword(pass); + assertTrue( b, "password mismatch" ); + } catch (EncryptedDocumentException e) { + String msg = "Export Restrictions in place - please install JCE Unlimited Strength Jurisdiction Policy files"; + assumeFalse(msg.equals(e.getMessage())); + throw e; + } + InputStream is = dec.getDataStream(poifs); + out.reset(); + IOUtils.copy(is, out); + is.close(); + poifs.close(); + bytes = new ByteArrayInputStream(out.toByteArray()); + } + checkXSSFReader(OPCPackage.open(bytes)); + bytes.reset(); + wb = new XSSFWorkbook(bytes); + } + + // use the combined handler for HSSF/XSSF + handleWorkbook(wb); + + // TODO: some documents fail currently... + //XSSFFormulaEvaluator evaluator = new XSSFFormulaEvaluator(wb); + //evaluator.evaluateAll(); + + // also verify general POIFS-stuff + new POIXMLDocumentHandler().handlePOIXMLDocument(wb); + + POIXMLDocumentHandler.cursorRecursive(wb.getCTWorkbook()); + for (Sheet sh : wb) { + POIXMLDocumentHandler.cursorRecursive(((XSSFSheet)sh).getCTWorksheet()); + } + + // and finally ensure that exporting to XML works + exportToXML(wb); + + // this allows to trigger a heap-dump at this point to see which memory is still allocated + //HeapDump.dumpHeap("/tmp/poi.hprof", false); + + wb.close(); + } + + + private void checkXSSFReader(OPCPackage p) throws IOException, OpenXML4JException { + XSSFReader reader = new XSSFReader(p); + + // these can be null... + InputStream sharedStringsData = reader.getSharedStringsData(); + if(sharedStringsData != null) { + sharedStringsData.close(); + } + reader.getSharedStringsTable(); + + InputStream stylesData = reader.getStylesData(); + if(stylesData != null) { + stylesData.close(); + } + reader.getStylesTable(); + + InputStream themesData = reader.getThemesData(); + if(themesData != null) { + themesData.close(); + } + + assertNotNull(reader.getWorkbookData()); + + Iterator<InputStream> sheetsData = reader.getSheetsData(); + while(sheetsData.hasNext()) { + InputStream str = sheetsData.next(); + str.close(); + } + } + + private void exportToXML(XSSFWorkbook wb) throws SAXException, + TransformerException { + for (XSSFMap map : wb.getCustomXMLMappings()) { + XSSFExportToXml exporter = new XSSFExportToXml(map); + + ByteArrayOutputStream os = new ByteArrayOutputStream(); + exporter.exportToXML(os, true); + } + } + + private static final Set<String> EXPECTED_ADDITIONAL_FAILURES = new HashSet<>(); + static { + // expected sheet-id not found + // EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/52348.xlsx"); + // EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/59021.xlsx"); + // zip-bomb + EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/54764.xlsx"); + EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/54764-2.xlsx"); + EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/poc-xmlbomb.xlsx"); + EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/poc-xmlbomb-empty.xlsx"); + // strict OOXML + EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/57914.xlsx"); + EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/SampleSS.strict.xlsx"); + EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/SimpleStrict.xlsx"); + EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/sample.strict.xlsx"); + // TODO: good to ignore? + EXPECTED_ADDITIONAL_FAILURES.add("spreadsheet/sample-beta.xlsx"); + + // corrupt/invalid + EXPECTED_ADDITIONAL_FAILURES.add("openxml4j/invalid.xlsx"); + } + + @SuppressWarnings("resource") + @Override + public void handleAdditional(File file) throws Exception { + // redirect stdout as the examples often write lots of text + PrintStream oldOut = System.out; + String testFile = file.getParentFile().getName() + "/" + file.getName(); + try { + System.setOut(new NullPrintStream()); + FromHowTo.main(new String[]{file.getAbsolutePath()}); + XLSX2CSV.main(new String[]{file.getAbsolutePath()}); + ExcelComparator.main(new String[]{file.getAbsolutePath(), file.getAbsolutePath()}); + + assertFalse( EXPECTED_ADDITIONAL_FAILURES.contains(testFile), "Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!" ); + + } catch (OLE2NotOfficeXmlFileException e) { + // we have some files that are not actually OOXML and thus cannot be tested here + } catch (IllegalArgumentException | InvalidFormatException | POIXMLException | IOException e) { + if(!EXPECTED_ADDITIONAL_FAILURES.contains(testFile)) { + throw e; + } + } finally { + System.setOut(oldOut); + } + } + + // a test-case to test this locally without executing the full TestAllFiles + @Test + void test() throws Exception { + File file = new File("test-data/spreadsheet/ref-56737.xlsx"); + + try (InputStream stream = new BufferedInputStream(new FileInputStream(file))) { + handleFile(stream, file.getPath()); + } + + handleExtracting(file); + } + + @Test + void testExtracting() throws Exception { + handleExtracting(new File("test-data/spreadsheet/ref-56737.xlsx")); + } + + @Test + void testAdditional() throws Exception { + handleAdditional(new File("test-data/spreadsheet/poc-xmlbomb.xlsx")); + } +} diff --git a/integrationtest/src/test/java/org/apache/poi/stress/XWPFFileHandler.java b/integrationtest/src/test/java/org/apache/poi/stress/XWPFFileHandler.java new file mode 100644 index 0000000000..0970d4fa1a --- /dev/null +++ b/integrationtest/src/test/java/org/apache/poi/stress/XWPFFileHandler.java @@ -0,0 +1,56 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + +import java.io.BufferedInputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.InputStream; + +import org.apache.poi.ooxml.POIXMLException; +import org.apache.poi.xwpf.usermodel.XWPFDocument; +import org.junit.jupiter.api.Test; + +class XWPFFileHandler extends AbstractFileHandler { + @Override + public void handleFile(InputStream stream, String path) throws Exception { + // ignore password protected files + if (POIXMLDocumentHandler.isEncrypted(stream)) return; + + try (XWPFDocument doc = new XWPFDocument(stream)) { + + new POIXMLDocumentHandler().handlePOIXMLDocument(doc); + POIXMLDocumentHandler.cursorRecursive(doc.getDocument()); + } catch (POIXMLException e) { + Exception cause = (Exception)e.getCause(); + throw cause == null ? e : cause; + } + } + + // a test-case to test this locally without executing the full TestAllFiles + @Test + @SuppressWarnings("java:S2699") + void test() throws Exception { + File file = new File("test-data/document/51921-Word-Crash067.docx"); + + try (InputStream stream = new BufferedInputStream(new FileInputStream(file))) { + handleFile(stream, file.getPath()); + } + + handleExtracting(file); + } +}
\ No newline at end of file |