aboutsummaryrefslogtreecommitdiffstats
path: root/src/integrationtest
diff options
context:
space:
mode:
authorAndreas Beeker <kiwiwings@apache.org>2015-07-19 19:00:32 +0000
committerAndreas Beeker <kiwiwings@apache.org>2015-07-19 19:00:32 +0000
commit89ab6304a47d06a3f16178e6d6c7451474200c8c (patch)
treef24892a1d13eb23901d1d2673c46f79ed8f3b560 /src/integrationtest
parent9b09cb683ab24180411f033a8ba9ed2d6073ebca (diff)
parenta27b7d5b2c80b11bc9d0c49170c684f0201b16fe (diff)
downloadpoi-89ab6304a47d06a3f16178e6d6c7451474200c8c.tar.gz
poi-89ab6304a47d06a3f16178e6d6c7451474200c8c.zip
merge trunk to common sl branch
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/common_sl@1691843 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/integrationtest')
-rw-r--r--src/integrationtest/build.xml145
-rw-r--r--src/integrationtest/org/apache/poi/TestAllFiles.java398
-rw-r--r--src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java143
-rw-r--r--src/integrationtest/org/apache/poi/stress/FileHandler.java7
-rw-r--r--src/integrationtest/org/apache/poi/stress/HDGFFileHandler.java20
-rw-r--r--src/integrationtest/org/apache/poi/stress/HMEFFileHandler.java2
-rw-r--r--src/integrationtest/org/apache/poi/stress/HPBFFileHandler.java21
-rw-r--r--src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java9
-rw-r--r--src/integrationtest/org/apache/poi/stress/HSMFFileHandler.java15
-rw-r--r--src/integrationtest/org/apache/poi/stress/HSSFFileHandler.java7
-rw-r--r--src/integrationtest/org/apache/poi/stress/HWPFFileHandler.java54
-rw-r--r--src/integrationtest/org/apache/poi/stress/POIFSFileHandler.java2
-rw-r--r--src/integrationtest/org/apache/poi/stress/POIXMLDocumentHandler.java32
-rw-r--r--src/integrationtest/org/apache/poi/stress/SpreadsheetHandler.java2
-rw-r--r--src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java72
-rw-r--r--src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java7
-rw-r--r--src/integrationtest/org/apache/poi/stress/XWPFFileHandler.java10
17 files changed, 756 insertions, 190 deletions
diff --git a/src/integrationtest/build.xml b/src/integrationtest/build.xml
new file mode 100644
index 0000000000..795788e441
--- /dev/null
+++ b/src/integrationtest/build.xml
@@ -0,0 +1,145 @@
+<?xml version="1.0"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<project name="POI Testbuild" default="run" basedir=".">
+
+ <description>Test-Ant file which verifies that the Apache POI distribution build sources can be compiled successfully.
+
+ Before running this, you should execute the "assemble" target in the main build.xml to have the packaged files
+ created correctly.
+
+ </description>
+
+ <property name="dist" value="../../build/dist"/>
+ <property name="build" value="../../build/distsourcebuild"/>
+
+ <target name="init" depends="">
+ </target>
+
+ <target name="run" depends="init,runSourceBuild,runCompileTest"/>
+
+ <target name="runSourceBuild" depends="init">
+ <!-- clean out old stuff in build-dir -->
+ <delete dir="${build}"/>
+ <mkdir dir="${build}"/>
+
+ <!-- select latest built source zip -->
+ <pathconvert property="srcpackage">
+ <last>
+ <sort>
+ <date xmlns="antlib:org.apache.tools.ant.types.resources.comparators"/>
+ <resources>
+ <fileset dir="${dist}">
+ <include name="poi-src-*.zip" />
+ </fileset>
+ </resources>
+ </sort>
+ </last>
+ </pathconvert>
+
+ <echo message="Found source package at ${srcpackage}"/>
+ <unzip src="${srcpackage}" dest="${build}" failOnEmptyArchive="true"/>
+
+ <!-- look for name of sub-dir, do this dynamically as it changes with every (beta|rc)-release -->
+ <pathconvert property="dirversion">
+ <dirset dir="${build}">
+ <include name="*" />
+ </dirset>
+ </pathconvert>
+
+ <!-- finally call Ant on the extracted source to check if we can build the packages -->
+ <echo message="Building in temporary dir ${dirversion}/"/>
+ <ant dir="${dirversion}" target="jar" inheritAll="false" inheritRefs="false" useNativeBasedir="true"/>
+ </target>
+
+ <target name="runCompileTest" depends="init" description="Verify that we can compile most examples without including excelant or scratchpad jars">
+ <!-- clean out old stuff in build-dir -->
+ <delete dir="${build}"/>
+ <mkdir dir="${build}"/>
+
+ <!-- select latest built jar files without scratchpad.jar -->
+ <pathconvert property="jarpackage">
+ <sort>
+ <resources>
+ <fileset dir="${dist}">
+ <include name="poi-3.*.jar" />
+ <include name="poi-ooxml-3.*.jar" />
+ <include name="poi-ooxml-schemas-3.*.jar" />
+ <exclude name="*-javadocs-*" />
+ <exclude name="*-sources-*" />
+ </fileset>
+ </resources>
+ </sort>
+ </pathconvert>
+
+ <echo message="Found jar packages at ${jarpackage}"/>
+
+ <path id="libs">
+ <fileset dir="../../lib">
+ <include name="junit*.jar" />
+ </fileset>
+ <fileset dir="../../ooxml-lib">
+ <include name="ooxml-schemas-*.jar" />
+ <include name="xmlbeans-*.jar" />
+ <exclude name="xmlbeans-2.3.*.jar" />
+ </fileset>
+ </path>
+
+ <echo message="Compiling examples without linking to scratchpad.jar to ensure that only some specific ones require this jar" />
+ <javac srcdir="../examples/src" destdir="${build}"
+ target="1.6"
+ source="1.6"
+ debug="trye"
+ encoding="ASCII"
+ fork="yes"
+ includeantruntime="false"
+ excludes="org/apache/poi/hslf/**,org/apache/poi/hsmf/**,**/EmbeddedObjects.java,**/EmeddedObjects.java,**/Word2Forrest.java"
+ classpath="${jarpackage}"
+ classpathref="libs">
+ </javac>
+
+ <!-- select latest built jar files with additionally scratchpad.jar -->
+ <pathconvert property="jarpackagescratchpad">
+ <sort>
+ <resources>
+ <fileset dir="${dist}">
+ <include name="poi-3.*.jar" />
+ <include name="poi-ooxml-3.*.jar" />
+ <include name="poi-ooxml-schemas-3.*.jar" />
+ <include name="poi-scratchpad-3.*.jar" />
+ <exclude name="*-javadocs-*" />
+ <exclude name="*-sources-*" />
+ </fileset>
+ </resources>
+ </sort>
+ </pathconvert>
+
+ <echo message="Compiling all examples with the additinal scratchpad.jar" />
+ <javac srcdir="../examples/src" destdir="${build}"
+ target="1.6"
+ source="1.6"
+ debug="trye"
+ encoding="ASCII"
+ fork="yes"
+ includeantruntime="false"
+ classpath="${jarpackagescratchpad}"
+ classpathref="libs">
+ </javac>
+ </target>
+</project>
diff --git a/src/integrationtest/org/apache/poi/TestAllFiles.java b/src/integrationtest/org/apache/poi/TestAllFiles.java
index 4608303537..85b0580841 100644
--- a/src/integrationtest/org/apache/poi/TestAllFiles.java
+++ b/src/integrationtest/org/apache/poi/TestAllFiles.java
@@ -31,6 +31,7 @@ import java.util.List;
import java.util.Map;
import java.util.Set;
+import org.apache.poi.hwpf.OldWordFileFormatException;
import org.apache.poi.stress.*;
import org.apache.tools.ant.DirectoryScanner;
import org.junit.Test;
@@ -65,83 +66,91 @@ import org.junit.runners.Parameterized.Parameters;
*/
@RunWith(Parameterized.class)
public class TestAllFiles {
- private static final File ROOT_DIR = new File("test-data");
+ private static final File ROOT_DIR = new File("test-data");
// map file extensions to the actual mappers
- private static final Map<String, FileHandler> HANDLERS = new HashMap<String, FileHandler>();
- static {
- // Excel
- HANDLERS.put(".xls", new HSSFFileHandler());
- HANDLERS.put(".xlsx", new XSSFFileHandler());
- HANDLERS.put(".xlsm", new XSSFFileHandler());
- HANDLERS.put(".xltx", new XSSFFileHandler());
- HANDLERS.put(".xlsb", new XSSFFileHandler());
-
- // Word
- HANDLERS.put(".doc", new HWPFFileHandler());
- HANDLERS.put(".docx", new XWPFFileHandler());
- HANDLERS.put(".dotx", new XWPFFileHandler());
- HANDLERS.put(".docm", new XWPFFileHandler());
- HANDLERS.put(".ooxml", new XWPFFileHandler()); // OPCPackage
-
- // Powerpoint
- HANDLERS.put(".ppt", new HSLFFileHandler());
- HANDLERS.put(".pptx", new XSLFFileHandler());
- HANDLERS.put(".pptm", new XSLFFileHandler());
- HANDLERS.put(".ppsm", new XSLFFileHandler());
- HANDLERS.put(".ppsx", new XSLFFileHandler());
- HANDLERS.put(".thmx", new XSLFFileHandler());
-
- // Outlook
- HANDLERS.put(".msg", new HSMFFileHandler());
-
- // Publisher
- HANDLERS.put(".pub", new HPBFFileHandler());
-
- // Visio
- HANDLERS.put(".vsd", new HDGFFileHandler());
-
- // POIFS
- HANDLERS.put(".ole2", new POIFSFileHandler());
-
- // Microsoft Admin Template?
- HANDLERS.put(".adm", new HPSFFileHandler());
-
- // Microsoft TNEF
- HANDLERS.put(".dat", new HMEFFileHandler());
-
- // TODO: are these readable by some of the formats?
- HANDLERS.put(".shw", new NullFileHandler());
- HANDLERS.put(".zvi", new NullFileHandler());
- HANDLERS.put(".mpp", new NullFileHandler());
- HANDLERS.put(".qwp", new NullFileHandler());
- HANDLERS.put(".wps", new NullFileHandler());
- HANDLERS.put(".bin", new NullFileHandler());
- HANDLERS.put(".xps", new NullFileHandler());
- HANDLERS.put(".sldprt", new NullFileHandler());
- HANDLERS.put(".mdb", new NullFileHandler());
- HANDLERS.put(".vml", new NullFileHandler());
-
- // ignore some file types, images, other formats, ...
- HANDLERS.put(".txt", new NullFileHandler());
- HANDLERS.put(".pdf", new NullFileHandler());
- HANDLERS.put(".rtf", new NullFileHandler());
- HANDLERS.put(".gif", new NullFileHandler());
- HANDLERS.put(".html", new NullFileHandler());
- HANDLERS.put(".png", new NullFileHandler());
- HANDLERS.put(".wmf", new NullFileHandler());
- HANDLERS.put(".emf", new NullFileHandler());
- HANDLERS.put(".dib", new NullFileHandler());
- HANDLERS.put(".svg", new NullFileHandler());
- HANDLERS.put(".pict", new NullFileHandler());
- HANDLERS.put(".jpg", new NullFileHandler());
- HANDLERS.put(".wav", new NullFileHandler());
- HANDLERS.put(".pfx", new NullFileHandler());
- HANDLERS.put(".xml", new NullFileHandler());
- HANDLERS.put(".csv", new NullFileHandler());
-
- // map some files without extension
- HANDLERS.put("spreadsheet/BigSSTRecord", new NullFileHandler());
+ private static final Map<String, FileHandler> HANDLERS = new HashMap<String, FileHandler>();
+ static {
+ // Excel
+ HANDLERS.put(".xls", new HSSFFileHandler());
+ HANDLERS.put(".xlsx", new XSSFFileHandler());
+ HANDLERS.put(".xlsm", new XSSFFileHandler());
+ HANDLERS.put(".xltx", new XSSFFileHandler());
+ HANDLERS.put(".xlsb", new XSSFFileHandler());
+
+ // Word
+ HANDLERS.put(".doc", new HWPFFileHandler());
+ HANDLERS.put(".docx", new XWPFFileHandler());
+ HANDLERS.put(".dotx", new XWPFFileHandler());
+ HANDLERS.put(".docm", new XWPFFileHandler());
+ HANDLERS.put(".ooxml", new XWPFFileHandler()); // OPCPackage
+
+ // Powerpoint
+ HANDLERS.put(".ppt", new HSLFFileHandler());
+ HANDLERS.put(".pptx", new XSLFFileHandler());
+ HANDLERS.put(".pptm", new XSLFFileHandler());
+ HANDLERS.put(".ppsm", new XSLFFileHandler());
+ HANDLERS.put(".ppsx", new XSLFFileHandler());
+ HANDLERS.put(".thmx", new XSLFFileHandler());
+
+ // Outlook
+ HANDLERS.put(".msg", new HSMFFileHandler());
+
+ // Publisher
+ HANDLERS.put(".pub", new HPBFFileHandler());
+
+ // Visio - binary
+ HANDLERS.put(".vsd", new HDGFFileHandler());
+
+ // Visio - ooxml (currently unsupported)
+ HANDLERS.put(".vsdm", new NullFileHandler());
+ HANDLERS.put(".vsdx", new NullFileHandler());
+ HANDLERS.put(".vssm", new NullFileHandler());
+ HANDLERS.put(".vssx", new NullFileHandler());
+ HANDLERS.put(".vstm", new NullFileHandler());
+ HANDLERS.put(".vstx", new NullFileHandler());
+
+ // POIFS
+ HANDLERS.put(".ole2", new POIFSFileHandler());
+
+ // Microsoft Admin Template?
+ HANDLERS.put(".adm", new HPSFFileHandler());
+
+ // Microsoft TNEF
+ HANDLERS.put(".dat", new HMEFFileHandler());
+
+ // TODO: are these readable by some of the formats?
+ HANDLERS.put(".shw", new NullFileHandler());
+ HANDLERS.put(".zvi", new NullFileHandler());
+ HANDLERS.put(".mpp", new NullFileHandler());
+ HANDLERS.put(".qwp", new NullFileHandler());
+ HANDLERS.put(".wps", new NullFileHandler());
+ HANDLERS.put(".bin", new NullFileHandler());
+ HANDLERS.put(".xps", new NullFileHandler());
+ HANDLERS.put(".sldprt", new NullFileHandler());
+ HANDLERS.put(".mdb", new NullFileHandler());
+ HANDLERS.put(".vml", new NullFileHandler());
+
+ // ignore some file types, images, other formats, ...
+ HANDLERS.put(".txt", new NullFileHandler());
+ HANDLERS.put(".pdf", new NullFileHandler());
+ HANDLERS.put(".rtf", new NullFileHandler());
+ HANDLERS.put(".gif", new NullFileHandler());
+ HANDLERS.put(".html", new NullFileHandler());
+ HANDLERS.put(".png", new NullFileHandler());
+ HANDLERS.put(".wmf", new NullFileHandler());
+ HANDLERS.put(".emf", new NullFileHandler());
+ HANDLERS.put(".dib", new NullFileHandler());
+ HANDLERS.put(".svg", new NullFileHandler());
+ HANDLERS.put(".pict", new NullFileHandler());
+ HANDLERS.put(".jpg", new NullFileHandler());
+ HANDLERS.put(".wav", new NullFileHandler());
+ HANDLERS.put(".pfx", new NullFileHandler());
+ HANDLERS.put(".xml", new NullFileHandler());
+ HANDLERS.put(".csv", new NullFileHandler());
+
+ // map some files without extension
+ HANDLERS.put("spreadsheet/BigSSTRecord", new NullFileHandler());
HANDLERS.put("spreadsheet/BigSSTRecord2", new NullFileHandler());
HANDLERS.put("spreadsheet/BigSSTRecord2CR1", new NullFileHandler());
HANDLERS.put("spreadsheet/BigSSTRecord2CR2", new NullFileHandler());
@@ -151,88 +160,104 @@ public class TestAllFiles {
HANDLERS.put("spreadsheet/BigSSTRecord2CR6", new NullFileHandler());
HANDLERS.put("spreadsheet/BigSSTRecord2CR7", new NullFileHandler());
HANDLERS.put("spreadsheet/BigSSTRecordCR", new NullFileHandler());
- HANDLERS.put("spreadsheet/test_properties1", new NullFileHandler());
- }
-
- private static final Set<String> EXPECTED_FAILURES = new HashSet<String>();
- static {
- // password protected files
- EXPECTED_FAILURES.add("spreadsheet/password.xls");
- EXPECTED_FAILURES.add("spreadsheet/51832.xls");
- EXPECTED_FAILURES.add("document/PasswordProtected.doc");
- EXPECTED_FAILURES.add("slideshow/Password_Protected-hello.ppt");
- EXPECTED_FAILURES.add("slideshow/Password_Protected-56-hello.ppt");
- EXPECTED_FAILURES.add("slideshow/Password_Protected-np-hello.ppt");
- EXPECTED_FAILURES.add("slideshow/cryptoapi-proc2356.ppt");
- //EXPECTED_FAILURES.add("document/bug53475-password-is-pass.docx");
- //EXPECTED_FAILURES.add("document/bug53475-password-is-solrcell.docx");
- EXPECTED_FAILURES.add("spreadsheet/xor-encryption-abc.xls");
+ HANDLERS.put("spreadsheet/test_properties1", new NullFileHandler());
+ }
+
+ // Old Word Documents where we can at least extract some text
+ private static final Set<String> OLD_FILES = new HashSet<String>();
+ static {
+ OLD_FILES.add("document/Bug49933.doc");
+ OLD_FILES.add("document/Bug51944.doc");
+ OLD_FILES.add("document/Word6.doc");
+ OLD_FILES.add("document/Word6_sections.doc");
+ OLD_FILES.add("document/Word6_sections2.doc");
+ OLD_FILES.add("document/Word95.doc");
+ OLD_FILES.add("document/word95err.doc");
+ OLD_FILES.add("hpsf/TestMickey.doc");
+ OLD_FILES.add("document/52117.doc");
+ }
+
+ private static final Set<String> EXPECTED_FAILURES = new HashSet<String>();
+ static {
+ // password protected files
+ EXPECTED_FAILURES.add("spreadsheet/password.xls");
+ EXPECTED_FAILURES.add("spreadsheet/protected_passtika.xlsx");
+ EXPECTED_FAILURES.add("spreadsheet/51832.xls");
+ EXPECTED_FAILURES.add("document/PasswordProtected.doc");
+ EXPECTED_FAILURES.add("slideshow/Password_Protected-hello.ppt");
+ EXPECTED_FAILURES.add("slideshow/Password_Protected-56-hello.ppt");
+ EXPECTED_FAILURES.add("slideshow/Password_Protected-np-hello.ppt");
+ EXPECTED_FAILURES.add("slideshow/cryptoapi-proc2356.ppt");
+ //EXPECTED_FAILURES.add("document/bug53475-password-is-pass.docx");
+ //EXPECTED_FAILURES.add("document/bug53475-password-is-solrcell.docx");
+ EXPECTED_FAILURES.add("spreadsheet/xor-encryption-abc.xls");
EXPECTED_FAILURES.add("spreadsheet/35897-type4.xls");
//EXPECTED_FAILURES.add("poifs/protect.xlsx");
//EXPECTED_FAILURES.add("poifs/protected_sha512.xlsx");
//EXPECTED_FAILURES.add("poifs/extenxls_pwd123.xlsx");
//EXPECTED_FAILURES.add("poifs/protected_agile.docx");
-
- // TODO: fails XMLExportTest, is this ok?
- EXPECTED_FAILURES.add("spreadsheet/CustomXMLMapping-singleattributenamespace.xlsx");
- EXPECTED_FAILURES.add("spreadsheet/55864.xlsx");
-
- // TODO: these fail now with some NPE/file read error because we now try to compute every value via Cell.toString()!
- EXPECTED_FAILURES.add("spreadsheet/44958.xls");
- EXPECTED_FAILURES.add("spreadsheet/44958_1.xls");
- EXPECTED_FAILURES.add("spreadsheet/testArraysAndTables.xls");
-
- // TODO: good to ignore?
- EXPECTED_FAILURES.add("spreadsheet/sample-beta.xlsx");
- EXPECTED_FAILURES.add("spreadsheet/49931.xls");
- EXPECTED_FAILURES.add("openxml4j/ContentTypeHasParameters.ooxml");
-
- // This is actually a spreadsheet!
- EXPECTED_FAILURES.add("hpsf/TestRobert_Flaherty.doc");
-
- // some files that are broken, Excel 5.0/95, Word 95, ...
- EXPECTED_FAILURES.add("spreadsheet/43493.xls");
- EXPECTED_FAILURES.add("spreadsheet/46904.xls");
- EXPECTED_FAILURES.add("document/56880.doc");
- EXPECTED_FAILURES.add("document/Bug49933.doc");
- EXPECTED_FAILURES.add("document/Bug50955.doc");
- EXPECTED_FAILURES.add("document/Bug51944.doc");
- EXPECTED_FAILURES.add("document/Word6.doc");
- EXPECTED_FAILURES.add("document/Word6_sections.doc");
- EXPECTED_FAILURES.add("document/Word6_sections2.doc");
- EXPECTED_FAILURES.add("document/Word95.doc");
- EXPECTED_FAILURES.add("document/word95err.doc");
- EXPECTED_FAILURES.add("hpsf/TestMickey.doc");
- EXPECTED_FAILURES.add("slideshow/PPT95.ppt");
- EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_DCTermsNamespaceLimitedUseFAIL.docx");
- EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_DoNotUseCompatibilityMarkupFAIL.docx");
- EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_LimitedXSITypeAttribute_NotPresentFAIL.docx");
- EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_LimitedXSITypeAttribute_PresentWithUnauthorizedValueFAIL.docx");
- EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_OnlyOneCorePropertiesPartFAIL.docx");
- EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_UnauthorizedXMLLangAttributeFAIL.docx");
- EXPECTED_FAILURES.add("openxml4j/OPCCompliance_DerivedPartNameFAIL.docx");
- EXPECTED_FAILURES.add("spreadsheet/54764-2.xlsx"); // see TestXSSFBugs.bug54764()
- EXPECTED_FAILURES.add("spreadsheet/54764.xlsx"); // see TestXSSFBugs.bug54764()
+
+ // TODO: fails XMLExportTest, is this ok?
+ EXPECTED_FAILURES.add("spreadsheet/CustomXMLMapping-singleattributenamespace.xlsx");
+ EXPECTED_FAILURES.add("spreadsheet/55864.xlsx");
+
+ // TODO: these fail now with some NPE/file read error because we now try to compute every value via Cell.toString()!
+ EXPECTED_FAILURES.add("spreadsheet/44958.xls");
+ EXPECTED_FAILURES.add("spreadsheet/44958_1.xls");
+ EXPECTED_FAILURES.add("spreadsheet/testArraysAndTables.xls");
+
+ // TODO: good to ignore?
+ EXPECTED_FAILURES.add("spreadsheet/sample-beta.xlsx");
+ EXPECTED_FAILURES.add("spreadsheet/49931.xls");
+ EXPECTED_FAILURES.add("openxml4j/ContentTypeHasParameters.ooxml");
+
+ // This is actually a spreadsheet!
+ EXPECTED_FAILURES.add("hpsf/TestRobert_Flaherty.doc");
+
+ // some files that are broken, eg Word 95, ...
+ EXPECTED_FAILURES.add("spreadsheet/43493.xls");
+ EXPECTED_FAILURES.add("spreadsheet/46904.xls");
+ EXPECTED_FAILURES.add("document/56880.doc");
+ EXPECTED_FAILURES.add("document/Bug50955.doc");
+ EXPECTED_FAILURES.add("slideshow/PPT95.ppt");
+ EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_DCTermsNamespaceLimitedUseFAIL.docx");
+ EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_DoNotUseCompatibilityMarkupFAIL.docx");
+ EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_LimitedXSITypeAttribute_NotPresentFAIL.docx");
+ EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_LimitedXSITypeAttribute_PresentWithUnauthorizedValueFAIL.docx");
+ EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_OnlyOneCorePropertiesPartFAIL.docx");
+ EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_UnauthorizedXMLLangAttributeFAIL.docx");
+ EXPECTED_FAILURES.add("openxml4j/OPCCompliance_DerivedPartNameFAIL.docx");
+ EXPECTED_FAILURES.add("spreadsheet/54764-2.xlsx"); // see TestXSSFBugs.bug54764()
+ EXPECTED_FAILURES.add("spreadsheet/54764.xlsx"); // see TestXSSFBugs.bug54764()
EXPECTED_FAILURES.add("spreadsheet/Simple.xlsb");
+ EXPECTED_FAILURES.add("poifs/unknown_properties.msg"); // POIFS properties corrupted
+ EXPECTED_FAILURES.add("poifs/only-zero-byte-streams.ole2"); // No actual contents
+
+ // old Excel files, which we only support simple text extraction of
EXPECTED_FAILURES.add("spreadsheet/testEXCEL_2.xls");
EXPECTED_FAILURES.add("spreadsheet/testEXCEL_3.xls");
EXPECTED_FAILURES.add("spreadsheet/testEXCEL_4.xls");
EXPECTED_FAILURES.add("spreadsheet/testEXCEL_5.xls");
EXPECTED_FAILURES.add("spreadsheet/testEXCEL_95.xls");
+
+ // OOXML Strict is not yet supported, see bug #57699
+ EXPECTED_FAILURES.add("spreadsheet/SampleSS.strict.xlsx");
+ EXPECTED_FAILURES.add("spreadsheet/SimpleStrict.xlsx");
+ EXPECTED_FAILURES.add("spreadsheet/sample.strict.xlsx");
+
+ // non-TNEF files
+ EXPECTED_FAILURES.add("ddf/Container.dat");
+ EXPECTED_FAILURES.add("ddf/47143.dat");
+ }
- // non-TNEF files
- EXPECTED_FAILURES.add("ddf/Container.dat");
- EXPECTED_FAILURES.add("ddf/47143.dat");
- }
-
@Parameters(name="{index}: {0} using {1}")
public static Iterable<Object[]> files() {
DirectoryScanner scanner = new DirectoryScanner();
scanner.setBasedir(ROOT_DIR);
scanner.setExcludes(new String[] { "**/.svn/**" });
-
+
scanner.scan();
-
+
System.out.println("Handling " + scanner.getIncludedFiles().length + " files");
List<Object[]> files = new ArrayList<Object[]>();
@@ -240,47 +265,72 @@ public class TestAllFiles {
file = file.replace('\\', '/'); // ... failures/handlers lookup doesn't work on windows otherwise
files.add(new Object[] { file, HANDLERS.get(getExtension(file)) });
}
-
+
return files;
- }
-
+ }
+
@Parameter(value=0)
public String file;
-
+
@Parameter(value=1)
public FileHandler handler;
-
+
@Test
public void testAllFiles() throws Exception {
- assertNotNull("Unknown file extension for file: " + file + ": " + getExtension(file), handler);
- InputStream stream = new BufferedInputStream(new FileInputStream(new File(ROOT_DIR, file)),100);
- try {
- handler.handleFile(stream);
-
- assertFalse("Expected to fail for file " + file + " and handler " + handler + ", but did not fail!",
- EXPECTED_FAILURES.contains(file));
- } catch (Exception e) {
- // check if we expect failure for this file
- if(!EXPECTED_FAILURES.contains(file)) {
- throw new Exception("While handling " + file, e);
- }
- } finally {
- stream.close();
- }
- }
-
- private static String getExtension(String file) {
- int pos = file.lastIndexOf('.');
- if(pos == -1 || pos == file.length()-1) {
- return file;
- }
-
- return file.substring(pos);
- }
-
- private static class NullFileHandler implements FileHandler {
- @Override
+ assertNotNull("Unknown file extension for file: " + file + ": " + getExtension(file), handler);
+ File inputFile = new File(ROOT_DIR, file);
+
+ try {
+ InputStream stream = new BufferedInputStream(new FileInputStream(inputFile), 64*1024);
+ try {
+ handler.handleFile(stream);
+
+ assertFalse("Expected to fail for file " + file + " and handler " + handler + ", but did not fail!",
+ OLD_FILES.contains(file));
+ } finally {
+ stream.close();
+ }
+
+ handler.handleExtracting(inputFile);
+
+ assertFalse("Expected to fail for file " + file + " and handler " + handler + ", but did not fail!",
+ EXPECTED_FAILURES.contains(file));
+ } catch (OldWordFileFormatException e) {
+ // for old word files we should still support extracting text
+ if(OLD_FILES.contains(file)) {
+ handler.handleExtracting(inputFile);
+ } else {
+ // check if we expect failure for this file
+ if(!EXPECTED_FAILURES.contains(file) && !AbstractFileHandler.EXPECTED_EXTRACTOR_FAILURES.contains(file)) {
+ System.out.println("Failed: " + file);
+ throw new Exception("While handling " + file, e);
+ }
+ }
+ } catch (Exception e) {
+ // check if we expect failure for this file
+ if(!EXPECTED_FAILURES.contains(file) && !AbstractFileHandler.EXPECTED_EXTRACTOR_FAILURES.contains(file)) {
+ System.out.println("Failed: " + file);
+ throw new Exception("While handling " + file, e);
+ }
+ }
+ }
+
+ private static String getExtension(String file) {
+ int pos = file.lastIndexOf('.');
+ if(pos == -1 || pos == file.length()-1) {
+ return file;
+ }
+
+ return file.substring(pos);
+ }
+
+ private static class NullFileHandler implements FileHandler {
+ @Override
public void handleFile(InputStream stream) throws Exception {
- }
- }
+ }
+
+ @Override
+ public void handleExtracting(File file) throws Exception {
+ }
+ }
}
diff --git a/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java b/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java
new file mode 100644
index 0000000000..55e2c368f2
--- /dev/null
+++ b/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java
@@ -0,0 +1,143 @@
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.zip.ZipException;
+
+import org.apache.poi.POIOLE2TextExtractor;
+import org.apache.poi.POITextExtractor;
+import org.apache.poi.dev.OOXMLPrettyPrint;
+import org.apache.poi.extractor.ExtractorFactory;
+import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor;
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.xmlbeans.XmlException;
+
+public abstract class AbstractFileHandler implements FileHandler {
+ public static final Set<String> EXPECTED_EXTRACTOR_FAILURES = new HashSet<String>();
+ static {
+ // password protected files
+ EXPECTED_EXTRACTOR_FAILURES.add("document/bug53475-password-is-pass.docx");
+ EXPECTED_EXTRACTOR_FAILURES.add("poifs/extenxls_pwd123.xlsx");
+ EXPECTED_EXTRACTOR_FAILURES.add("poifs/protect.xlsx");
+ EXPECTED_EXTRACTOR_FAILURES.add("poifs/protected_agile.docx");
+ EXPECTED_EXTRACTOR_FAILURES.add("poifs/protected_sha512.xlsx");
+
+ // unsupported file-types, no supported OLE2 parts
+ EXPECTED_EXTRACTOR_FAILURES.add("hmef/quick-winmail.dat");
+ EXPECTED_EXTRACTOR_FAILURES.add("hmef/winmail-sample1.dat");
+ EXPECTED_EXTRACTOR_FAILURES.add("hmef/bug52400-winmail-simple.dat");
+ EXPECTED_EXTRACTOR_FAILURES.add("hmef/bug52400-winmail-with-attachments.dat");
+ EXPECTED_EXTRACTOR_FAILURES.add("hpsf/Test0313rur.adm");
+ EXPECTED_EXTRACTOR_FAILURES.add("hsmf/attachment_msg_pdf.msg");
+ EXPECTED_EXTRACTOR_FAILURES.add("poifs/Notes.ole2");
+ EXPECTED_EXTRACTOR_FAILURES.add("slideshow/testPPT.thmx");
+ }
+
+ public void handleExtracting(File file) throws Exception {
+ boolean before = ExtractorFactory.getThreadPrefersEventExtractors();
+ try {
+ ExtractorFactory.setThreadPrefersEventExtractors(true);
+ handleExtractingInternal(file);
+
+ ExtractorFactory.setThreadPrefersEventExtractors(false);
+ handleExtractingInternal(file);
+ } finally {
+ ExtractorFactory.setThreadPrefersEventExtractors(before);
+ }
+
+ /* Did fail for some documents with special XML contents...
+ try {
+ OOXMLPrettyPrint.main(new String[] { file.getAbsolutePath(),
+ "/tmp/pretty-" + file.getName() });
+ } catch (ZipException e) {
+ // ignore, not a Zip/OOXML file
+ }*/
+ }
+
+ private void handleExtractingInternal(File file) throws Exception {
+ long length = file.length();
+ long modified = file.lastModified();
+
+ POITextExtractor extractor = ExtractorFactory.createExtractor(file);
+ try {
+ assertNotNull(extractor);
+
+ assertNotNull(extractor.getText());
+
+ // also try metadata
+ POITextExtractor metadataExtractor = extractor.getMetadataTextExtractor();
+ assertNotNull(metadataExtractor.getText());
+
+ assertFalse("Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!",
+ EXPECTED_EXTRACTOR_FAILURES.contains(file));
+
+ assertEquals("File should not be modified by extractor", length, file.length());
+ assertEquals("File should not be modified by extractor", modified, file.lastModified());
+
+ handleExtractingAsStream(file);
+
+ if(extractor instanceof POIOLE2TextExtractor) {
+ HPSFPropertiesExtractor hpsfExtractor = new HPSFPropertiesExtractor((POIOLE2TextExtractor)extractor);
+ try {
+ assertNotNull(hpsfExtractor.getDocumentSummaryInformationText());
+ assertNotNull(hpsfExtractor.getSummaryInformationText());
+ String text = hpsfExtractor.getText();
+ //System.out.println(text);
+ assertNotNull(text);
+ } finally {
+ hpsfExtractor.close();
+ }
+ }
+ } catch (IllegalArgumentException e) {
+ if(!EXPECTED_EXTRACTOR_FAILURES.contains(file)) {
+ throw new Exception("While handling " + file, e);
+ }
+ } finally {
+ extractor.close();
+ }
+ }
+
+ private void handleExtractingAsStream(File file) throws FileNotFoundException,
+ IOException, InvalidFormatException, OpenXML4JException,
+ XmlException {
+ InputStream stream = new FileInputStream(file);
+ try {
+ POITextExtractor streamExtractor = ExtractorFactory.createExtractor(stream);
+ try {
+ assertNotNull(streamExtractor);
+
+ assertNotNull(streamExtractor.getText());
+ } finally {
+ streamExtractor.close();
+ }
+ } finally {
+ stream.close();
+ }
+ }
+}
diff --git a/src/integrationtest/org/apache/poi/stress/FileHandler.java b/src/integrationtest/org/apache/poi/stress/FileHandler.java
index e6f3385f02..ce2991b0bc 100644
--- a/src/integrationtest/org/apache/poi/stress/FileHandler.java
+++ b/src/integrationtest/org/apache/poi/stress/FileHandler.java
@@ -16,6 +16,7 @@
==================================================================== */
package org.apache.poi.stress;
+import java.io.File;
import java.io.InputStream;
/**
@@ -34,4 +35,10 @@ public interface FileHandler {
* @throws Exception
*/
void handleFile(InputStream stream) throws Exception;
+
+ /**
+ * Ensures that extracting text from the given file
+ * is returning some text.
+ */
+ void handleExtracting(File file) throws Exception;
}
diff --git a/src/integrationtest/org/apache/poi/stress/HDGFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HDGFFileHandler.java
index 7fac6647a3..b9fe93a668 100644
--- a/src/integrationtest/org/apache/poi/stress/HDGFFileHandler.java
+++ b/src/integrationtest/org/apache/poi/stress/HDGFFileHandler.java
@@ -19,10 +19,12 @@ package org.apache.poi.stress;
import static org.junit.Assert.assertNotNull;
import static org.junit.Assert.assertTrue;
+import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import org.apache.poi.hdgf.HDGFDiagram;
+import org.apache.poi.hdgf.extractor.VisioTextExtractor;
import org.apache.poi.hdgf.streams.Stream;
import org.apache.poi.hdgf.streams.TrailerStream;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
@@ -48,11 +50,27 @@ public class HDGFFileHandler extends POIFSFileHandler {
// a test-case to test this locally without executing the full TestAllFiles
@Test
public void test() throws Exception {
- InputStream stream = new FileInputStream("test-data/diagram/44501.vsd");
+ File file = new File("test-data/diagram/44501.vsd");
+
+ InputStream stream = new FileInputStream(file);
try {
handleFile(stream);
} finally {
stream.close();
}
+
+ handleExtracting(file);
+
+ stream = new FileInputStream(file);
+ try {
+ VisioTextExtractor extractor = new VisioTextExtractor(stream);
+ try {
+ assertNotNull(extractor.getText());
+ } finally {
+ extractor.close();
+ }
+ } finally {
+ stream.close();
+ }
}
}
diff --git a/src/integrationtest/org/apache/poi/stress/HMEFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HMEFFileHandler.java
index dfa8750058..9f492bf0ed 100644
--- a/src/integrationtest/org/apache/poi/stress/HMEFFileHandler.java
+++ b/src/integrationtest/org/apache/poi/stress/HMEFFileHandler.java
@@ -26,7 +26,7 @@ import org.apache.poi.hmef.attribute.MAPIAttribute;
import org.apache.poi.hmef.attribute.MAPIStringAttribute;
import org.junit.Test;
-public class HMEFFileHandler implements FileHandler {
+public class HMEFFileHandler extends AbstractFileHandler {
@Override
public void handleFile(InputStream stream) throws Exception {
diff --git a/src/integrationtest/org/apache/poi/stress/HPBFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HPBFFileHandler.java
index 31ad8bc123..a41b6ebadf 100644
--- a/src/integrationtest/org/apache/poi/stress/HPBFFileHandler.java
+++ b/src/integrationtest/org/apache/poi/stress/HPBFFileHandler.java
@@ -18,10 +18,12 @@ package org.apache.poi.stress;
import static org.junit.Assert.assertNotNull;
+import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
import org.apache.poi.hpbf.HPBFDocument;
+import org.apache.poi.hpbf.extractor.PublisherTextExtractor;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.junit.Test;
@@ -39,11 +41,28 @@ public class HPBFFileHandler extends POIFSFileHandler {
// a test-case to test this locally without executing the full TestAllFiles
@Test
public void test() throws Exception {
- InputStream stream = new FileInputStream("test-data/publisher/SampleBrochure.pub");
+ File file = new File("test-data/publisher/SampleBrochure.pub");
+
+ InputStream stream = new FileInputStream(file);
try {
handleFile(stream);
} finally {
stream.close();
}
+
+ handleExtracting(file);
+
+ stream = new FileInputStream(file);
+ try {
+ PublisherTextExtractor extractor = new PublisherTextExtractor(stream);
+ try {
+ assertNotNull(extractor.getText());
+ } finally {
+ extractor.close();
+ }
+ } finally {
+ stream.close();
+ }
}
+
}
diff --git a/src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java
index b7d846ae62..6a53b2e009 100644
--- a/src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java
+++ b/src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java
@@ -18,6 +18,7 @@ package org.apache.poi.stress;
import static org.junit.Assert.assertNotNull;
+import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
@@ -25,7 +26,7 @@ import org.apache.poi.hpsf.HPSFPropertiesOnlyDocument;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
import org.junit.Test;
-public class HPSFFileHandler implements FileHandler {
+public class HPSFFileHandler extends AbstractFileHandler {
@Override
public void handleFile(InputStream stream) throws Exception {
HPSFPropertiesOnlyDocument hpsf = new HPSFPropertiesOnlyDocument(new POIFSFileSystem(stream));
@@ -43,4 +44,10 @@ public class HPSFFileHandler implements FileHandler {
stream.close();
}
}
+
+ // a test-case to test this locally without executing the full TestAllFiles
+ @Test
+ public void testExtractor() throws Exception {
+ handleExtracting(new File("test-data/hpsf/TestBug44375.xls"));
+ }
}
diff --git a/src/integrationtest/org/apache/poi/stress/HSMFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HSMFFileHandler.java
index 9de8b798c5..d68504a04c 100644
--- a/src/integrationtest/org/apache/poi/stress/HSMFFileHandler.java
+++ b/src/integrationtest/org/apache/poi/stress/HSMFFileHandler.java
@@ -22,6 +22,8 @@ import java.io.FileInputStream;
import java.io.InputStream;
import org.apache.poi.hsmf.MAPIMessage;
+import org.apache.poi.hsmf.datatypes.AttachmentChunks;
+import org.apache.poi.hsmf.datatypes.DirectoryChunk;
import org.junit.Test;
public class HSMFFileHandler extends POIFSFileHandler {
@@ -32,6 +34,19 @@ public class HSMFFileHandler extends POIFSFileHandler {
assertNotNull(mapi.getDisplayBCC());
assertNotNull(mapi.getMessageDate());
+ AttachmentChunks[] attachments = mapi.getAttachmentFiles();
+
+ for(AttachmentChunks attachment : attachments) {
+
+ DirectoryChunk chunkDirectory = attachment.attachmentDirectory;
+ if(chunkDirectory != null) {
+ MAPIMessage attachmentMSG = chunkDirectory.getAsEmbededMessage();
+ assertNotNull(attachmentMSG);
+ String body = attachmentMSG.getTextBody();
+ assertNotNull(body);
+ }
+ }
+
/* => Writing isn't yet supported...
// write out the file
File file = TempFile.createTempFile("StressTest", ".msg");
diff --git a/src/integrationtest/org/apache/poi/stress/HSSFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HSSFFileHandler.java
index 19dbd97a0e..dd579c4dba 100644
--- a/src/integrationtest/org/apache/poi/stress/HSSFFileHandler.java
+++ b/src/integrationtest/org/apache/poi/stress/HSSFFileHandler.java
@@ -16,6 +16,7 @@
==================================================================== */
package org.apache.poi.stress;
+import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
@@ -49,4 +50,10 @@ public class HSSFFileHandler extends SpreadsheetHandler {
stream.close();
}
}
+
+ // a test-case to test this locally without executing the full TestAllFiles
+ @Test
+ public void testExtractor() throws Exception {
+ handleExtracting(new File("test-data/spreadsheet/BOOK_in_capitals.xls"));
+ }
} \ No newline at end of file
diff --git a/src/integrationtest/org/apache/poi/stress/HWPFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HWPFFileHandler.java
index 1b6d4646c7..a56ddd2dc6 100644
--- a/src/integrationtest/org/apache/poi/stress/HWPFFileHandler.java
+++ b/src/integrationtest/org/apache/poi/stress/HWPFFileHandler.java
@@ -18,12 +18,21 @@ package org.apache.poi.stress;
import static org.junit.Assert.assertNotNull;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
import java.io.FileInputStream;
+import java.io.IOException;
import java.io.InputStream;
+import java.io.PrintWriter;
+import java.io.StringWriter;
+import org.apache.poi.hdf.extractor.WordDocument;
import org.apache.poi.hwpf.HWPFDocument;
+import org.apache.poi.hwpf.extractor.WordExtractor;
import org.junit.Test;
+@SuppressWarnings("deprecation")
public class HWPFFileHandler extends POIFSFileHandler {
@Override
public void handleFile(InputStream stream) throws Exception {
@@ -33,16 +42,57 @@ public class HWPFFileHandler extends POIFSFileHandler {
assertNotNull(doc.getEndnotes());
handlePOIDocument(doc);
+
+ // fails for many documents, but is deprecated anyway...
+ // handleWordDocument(doc);
+ }
+
+ protected void handleWordDocument(HWPFDocument doc) throws IOException {
+ ByteArrayOutputStream outStream = new ByteArrayOutputStream();
+ doc.write(outStream);
+
+ WordDocument wordDoc = new WordDocument(new ByteArrayInputStream(outStream.toByteArray()));
+
+ StringWriter docTextWriter = new StringWriter();
+ PrintWriter out = new PrintWriter(docTextWriter);
+ try {
+ wordDoc.writeAllText(out);
+ } finally {
+ out.close();
+ }
+ docTextWriter.close();
}
// a test-case to test this locally without executing the full TestAllFiles
@Test
public void test() throws Exception {
- InputStream stream = new FileInputStream("test-data/document/HeaderFooterUnicode.doc");
+ File file = new File("test-data/document/52117.doc");
+
+ InputStream stream = new FileInputStream(file);
try {
handleFile(stream);
} finally {
stream.close();
}
+
+ handleExtracting(file);
+
+ stream = new FileInputStream(file);
+ try {
+ WordExtractor extractor = new WordExtractor(stream);
+ try {
+ assertNotNull(extractor.getText());
+ } finally {
+ extractor.close();
+ }
+ } finally {
+ stream.close();
+ }
+ }
+
+ @Test
+ public void testExtractingOld() throws Exception {
+ File file = new File("test-data/document/52117.doc");
+ handleExtracting(file);
}
-} \ No newline at end of file
+}
diff --git a/src/integrationtest/org/apache/poi/stress/POIFSFileHandler.java b/src/integrationtest/org/apache/poi/stress/POIFSFileHandler.java
index 31deac7106..5c4a36e3ca 100644
--- a/src/integrationtest/org/apache/poi/stress/POIFSFileHandler.java
+++ b/src/integrationtest/org/apache/poi/stress/POIFSFileHandler.java
@@ -25,7 +25,7 @@ import java.io.InputStream;
import org.apache.poi.POIDocument;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-public class POIFSFileHandler implements FileHandler {
+public class POIFSFileHandler extends AbstractFileHandler {
@Override
public void handleFile(InputStream stream) throws Exception {
diff --git a/src/integrationtest/org/apache/poi/stress/POIXMLDocumentHandler.java b/src/integrationtest/org/apache/poi/stress/POIXMLDocumentHandler.java
index 103bb9be7e..7b0821dcc0 100644
--- a/src/integrationtest/org/apache/poi/stress/POIXMLDocumentHandler.java
+++ b/src/integrationtest/org/apache/poi/stress/POIXMLDocumentHandler.java
@@ -20,9 +20,17 @@ import static org.junit.Assert.assertNotNull;
import java.io.IOException;
import java.io.InputStream;
+import java.util.List;
import org.apache.poi.POIXMLDocument;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.openxml4j.opc.PackageAccess;
+import org.apache.poi.openxml4j.opc.PackagePart;
+import org.apache.poi.poifs.crypt.Decryptor;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.junit.Ignore;
+import org.junit.Test;
public final class POIXMLDocumentHandler {
protected void handlePOIXMLDocument(POIXMLDocument doc) throws Exception {
@@ -36,11 +44,33 @@ public final class POIXMLDocumentHandler {
protected static boolean isEncrypted(InputStream stream) throws IOException {
if (POIFSFileSystem.hasPOIFSHeader(stream)) {
POIFSFileSystem poifs = new POIFSFileSystem(stream);
- if (poifs.getRoot().hasEntry("EncryptedPackage")) {
+ if (poifs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) {
return true;
}
throw new IOException("wrong file format or file extension for OO XML file");
}
return false;
}
+
+ // a test-case to test this locally without executing the full TestAllFiles
+ @Ignore("POIXMLDocument cannot handle this Visio file currently...")
+ @Test
+ public void test() throws Exception {
+ OPCPackage pkg = OPCPackage.open("test-data/diagram/test.vsdx", PackageAccess.READ);
+ try {
+ handlePOIXMLDocument(new TestPOIXMLDocument(pkg));
+ } finally {
+ pkg.close();
+ }
+ }
+
+ private final static class TestPOIXMLDocument extends POIXMLDocument {
+ public TestPOIXMLDocument(OPCPackage pkg) {
+ super(pkg);
+ }
+
+ public List<PackagePart> getAllEmbedds() throws OpenXML4JException {
+ return null;
+ }
+ }
}
diff --git a/src/integrationtest/org/apache/poi/stress/SpreadsheetHandler.java b/src/integrationtest/org/apache/poi/stress/SpreadsheetHandler.java
index aad703ce98..f12bbd2de5 100644
--- a/src/integrationtest/org/apache/poi/stress/SpreadsheetHandler.java
+++ b/src/integrationtest/org/apache/poi/stress/SpreadsheetHandler.java
@@ -30,7 +30,7 @@ import org.apache.poi.ss.usermodel.Sheet;
import org.apache.poi.ss.usermodel.Workbook;
import org.apache.poi.ss.usermodel.WorkbookFactory;
-public abstract class SpreadsheetHandler implements FileHandler {
+public abstract class SpreadsheetHandler extends AbstractFileHandler {
public void handleWorkbook(Workbook wb, String extension) throws IOException {
// try to access some of the content
readContent(wb);
diff --git a/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java b/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java
index 3464218fd9..2669238be1 100644
--- a/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java
+++ b/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java
@@ -18,35 +18,97 @@ package org.apache.poi.stress;
import static org.junit.Assert.assertNotNull;
+import java.awt.Dimension;
+import java.awt.Graphics2D;
+import java.awt.image.BufferedImage;
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.File;
import java.io.FileInputStream;
+import java.io.IOException;
import java.io.InputStream;
import org.apache.poi.openxml4j.opc.OPCPackage;
+import org.apache.poi.sl.draw.DrawFactory;
+import org.apache.poi.sl.draw.Drawable;
import org.apache.poi.xslf.XSLFSlideShow;
+import org.apache.poi.xslf.usermodel.XMLSlideShow;
+import org.apache.poi.xslf.usermodel.XSLFNotes;
+import org.apache.poi.xslf.usermodel.XSLFShape;
+import org.apache.poi.xslf.usermodel.XSLFSlide;
+import org.apache.poi.xslf.usermodel.XSLFTextParagraph;
+import org.apache.poi.xslf.usermodel.XSLFTextShape;
import org.junit.Test;
-public class XSLFFileHandler implements FileHandler {
+public class XSLFFileHandler extends AbstractFileHandler {
@Override
public void handleFile(InputStream stream) throws Exception {
- // ignore password protected files
- if (POIXMLDocumentHandler.isEncrypted(stream)) return;
-
XSLFSlideShow slide = new XSLFSlideShow(OPCPackage.open(stream));
assertNotNull(slide.getPresentation());
assertNotNull(slide.getSlideMasterReferences());
assertNotNull(slide.getSlideReferences());
new POIXMLDocumentHandler().handlePOIXMLDocument(slide);
+
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ try {
+ slide.write(out);
+ } finally {
+ out.close();
+ }
+
+ createBitmaps(out);
}
+ private void createBitmaps(ByteArrayOutputStream out) throws IOException {
+ XMLSlideShow ppt = new XMLSlideShow(new ByteArrayInputStream(out.toByteArray()));
+ Dimension pgsize = ppt.getPageSize();
+ for (XSLFSlide xmlSlide : ppt.getSlides()) {
+// System.out.println("slide-" + (i + 1));
+// System.out.println("" + xmlSlide[i].getTitle());
+
+ BufferedImage img = new BufferedImage(pgsize.width, pgsize.height, BufferedImage.TYPE_INT_RGB);
+ Graphics2D graphics = img.createGraphics();
+
+ // draw stuff
+ xmlSlide.draw(graphics);
+
+ // Also try to read notes
+ XSLFNotes notes = xmlSlide.getNotes();
+ if(notes != null) {
+ for (XSLFShape note : notes) {
+ DrawFactory df = DrawFactory.getInstance(graphics);
+ Drawable d = df.getDrawable(note);
+ d.draw(graphics);
+
+ if (note instanceof XSLFTextShape) {
+ XSLFTextShape txShape = (XSLFTextShape) note;
+ for (XSLFTextParagraph xslfParagraph : txShape.getTextParagraphs()) {
+ xslfParagraph.getText();
+ }
+ }
+ }
+ }
+ }
+
+ ppt.close();
+ }
+
// a test-case to test this locally without executing the full TestAllFiles
@Test
public void test() throws Exception {
- InputStream stream = new FileInputStream("test-data/slideshow/testPPT.pptx");
+ InputStream stream = new FileInputStream("test-data/slideshow/SampleShow.pptx");
try {
handleFile(stream);
} finally {
stream.close();
}
}
+
+
+ // a test-case to test this locally without executing the full TestAllFiles
+ @Test
+ public void testExtractor() throws Exception {
+ handleExtracting(new File("test-data/slideshow/testPPT.thmx"));
+ }
} \ No newline at end of file
diff --git a/src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java b/src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java
index 54a386ea00..a268ed4658 100644
--- a/src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java
+++ b/src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java
@@ -17,6 +17,7 @@
package org.apache.poi.stress;
import java.io.ByteArrayOutputStream;
+import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
@@ -71,4 +72,10 @@ public class XSSFFileHandler extends SpreadsheetHandler {
stream.close();
}
}
+
+ // a test-case to test this locally without executing the full TestAllFiles
+ @Test
+ public void testExtractor() throws Exception {
+ handleExtracting(new File("test-data/spreadsheet/56278.xlsx"));
+ }
} \ No newline at end of file
diff --git a/src/integrationtest/org/apache/poi/stress/XWPFFileHandler.java b/src/integrationtest/org/apache/poi/stress/XWPFFileHandler.java
index a96d46da31..c097dc9f71 100644
--- a/src/integrationtest/org/apache/poi/stress/XWPFFileHandler.java
+++ b/src/integrationtest/org/apache/poi/stress/XWPFFileHandler.java
@@ -16,13 +16,15 @@
==================================================================== */
package org.apache.poi.stress;
+import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
+import java.io.PushbackInputStream;
import org.apache.poi.xwpf.usermodel.XWPFDocument;
import org.junit.Test;
-public class XWPFFileHandler implements FileHandler {
+public class XWPFFileHandler extends AbstractFileHandler {
@Override
public void handleFile(InputStream stream) throws Exception {
// ignore password protected files
@@ -36,12 +38,16 @@ public class XWPFFileHandler implements FileHandler {
// a test-case to test this locally without executing the full TestAllFiles
@Test
public void test() throws Exception {
- InputStream stream = new FileInputStream("test-data/document/footnotes.docx");
+ File file = new File("test-data/document/51921-Word-Crash067.docx");
+
+ InputStream stream = new PushbackInputStream(new FileInputStream(file), 100000);
try {
handleFile(stream);
} finally {
stream.close();
}
+
+ handleExtracting(file);
}
} \ No newline at end of file