diff options
author | Andreas Beeker <kiwiwings@apache.org> | 2015-07-19 19:00:32 +0000 |
---|---|---|
committer | Andreas Beeker <kiwiwings@apache.org> | 2015-07-19 19:00:32 +0000 |
commit | 89ab6304a47d06a3f16178e6d6c7451474200c8c (patch) | |
tree | f24892a1d13eb23901d1d2673c46f79ed8f3b560 /src/integrationtest | |
parent | 9b09cb683ab24180411f033a8ba9ed2d6073ebca (diff) | |
parent | a27b7d5b2c80b11bc9d0c49170c684f0201b16fe (diff) | |
download | poi-89ab6304a47d06a3f16178e6d6c7451474200c8c.tar.gz poi-89ab6304a47d06a3f16178e6d6c7451474200c8c.zip |
merge trunk to common sl branch
git-svn-id: https://svn.apache.org/repos/asf/poi/branches/common_sl@1691843 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src/integrationtest')
17 files changed, 756 insertions, 190 deletions
diff --git a/src/integrationtest/build.xml b/src/integrationtest/build.xml new file mode 100644 index 0000000000..795788e441 --- /dev/null +++ b/src/integrationtest/build.xml @@ -0,0 +1,145 @@ +<?xml version="1.0"?> +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> +<project name="POI Testbuild" default="run" basedir="."> + + <description>Test-Ant file which verifies that the Apache POI distribution build sources can be compiled successfully. + + Before running this, you should execute the "assemble" target in the main build.xml to have the packaged files + created correctly. + + </description> + + <property name="dist" value="../../build/dist"/> + <property name="build" value="../../build/distsourcebuild"/> + + <target name="init" depends=""> + </target> + + <target name="run" depends="init,runSourceBuild,runCompileTest"/> + + <target name="runSourceBuild" depends="init"> + <!-- clean out old stuff in build-dir --> + <delete dir="${build}"/> + <mkdir dir="${build}"/> + + <!-- select latest built source zip --> + <pathconvert property="srcpackage"> + <last> + <sort> + <date xmlns="antlib:org.apache.tools.ant.types.resources.comparators"/> + <resources> + <fileset dir="${dist}"> + <include name="poi-src-*.zip" /> + </fileset> + </resources> + </sort> + </last> + </pathconvert> + + <echo message="Found source package at ${srcpackage}"/> + <unzip src="${srcpackage}" dest="${build}" failOnEmptyArchive="true"/> + + <!-- look for name of sub-dir, do this dynamically as it changes with every (beta|rc)-release --> + <pathconvert property="dirversion"> + <dirset dir="${build}"> + <include name="*" /> + </dirset> + </pathconvert> + + <!-- finally call Ant on the extracted source to check if we can build the packages --> + <echo message="Building in temporary dir ${dirversion}/"/> + <ant dir="${dirversion}" target="jar" inheritAll="false" inheritRefs="false" useNativeBasedir="true"/> + </target> + + <target name="runCompileTest" depends="init" description="Verify that we can compile most examples without including excelant or scratchpad jars"> + <!-- clean out old stuff in build-dir --> + <delete dir="${build}"/> + <mkdir dir="${build}"/> + + <!-- select latest built jar files without scratchpad.jar --> + <pathconvert property="jarpackage"> + <sort> + <resources> + <fileset dir="${dist}"> + <include name="poi-3.*.jar" /> + <include name="poi-ooxml-3.*.jar" /> + <include name="poi-ooxml-schemas-3.*.jar" /> + <exclude name="*-javadocs-*" /> + <exclude name="*-sources-*" /> + </fileset> + </resources> + </sort> + </pathconvert> + + <echo message="Found jar packages at ${jarpackage}"/> + + <path id="libs"> + <fileset dir="../../lib"> + <include name="junit*.jar" /> + </fileset> + <fileset dir="../../ooxml-lib"> + <include name="ooxml-schemas-*.jar" /> + <include name="xmlbeans-*.jar" /> + <exclude name="xmlbeans-2.3.*.jar" /> + </fileset> + </path> + + <echo message="Compiling examples without linking to scratchpad.jar to ensure that only some specific ones require this jar" /> + <javac srcdir="../examples/src" destdir="${build}" + target="1.6" + source="1.6" + debug="trye" + encoding="ASCII" + fork="yes" + includeantruntime="false" + excludes="org/apache/poi/hslf/**,org/apache/poi/hsmf/**,**/EmbeddedObjects.java,**/EmeddedObjects.java,**/Word2Forrest.java" + classpath="${jarpackage}" + classpathref="libs"> + </javac> + + <!-- select latest built jar files with additionally scratchpad.jar --> + <pathconvert property="jarpackagescratchpad"> + <sort> + <resources> + <fileset dir="${dist}"> + <include name="poi-3.*.jar" /> + <include name="poi-ooxml-3.*.jar" /> + <include name="poi-ooxml-schemas-3.*.jar" /> + <include name="poi-scratchpad-3.*.jar" /> + <exclude name="*-javadocs-*" /> + <exclude name="*-sources-*" /> + </fileset> + </resources> + </sort> + </pathconvert> + + <echo message="Compiling all examples with the additinal scratchpad.jar" /> + <javac srcdir="../examples/src" destdir="${build}" + target="1.6" + source="1.6" + debug="trye" + encoding="ASCII" + fork="yes" + includeantruntime="false" + classpath="${jarpackagescratchpad}" + classpathref="libs"> + </javac> + </target> +</project> diff --git a/src/integrationtest/org/apache/poi/TestAllFiles.java b/src/integrationtest/org/apache/poi/TestAllFiles.java index 4608303537..85b0580841 100644 --- a/src/integrationtest/org/apache/poi/TestAllFiles.java +++ b/src/integrationtest/org/apache/poi/TestAllFiles.java @@ -31,6 +31,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.poi.hwpf.OldWordFileFormatException; import org.apache.poi.stress.*; import org.apache.tools.ant.DirectoryScanner; import org.junit.Test; @@ -65,83 +66,91 @@ import org.junit.runners.Parameterized.Parameters; */ @RunWith(Parameterized.class) public class TestAllFiles { - private static final File ROOT_DIR = new File("test-data"); + private static final File ROOT_DIR = new File("test-data"); // map file extensions to the actual mappers - private static final Map<String, FileHandler> HANDLERS = new HashMap<String, FileHandler>(); - static { - // Excel - HANDLERS.put(".xls", new HSSFFileHandler()); - HANDLERS.put(".xlsx", new XSSFFileHandler()); - HANDLERS.put(".xlsm", new XSSFFileHandler()); - HANDLERS.put(".xltx", new XSSFFileHandler()); - HANDLERS.put(".xlsb", new XSSFFileHandler()); - - // Word - HANDLERS.put(".doc", new HWPFFileHandler()); - HANDLERS.put(".docx", new XWPFFileHandler()); - HANDLERS.put(".dotx", new XWPFFileHandler()); - HANDLERS.put(".docm", new XWPFFileHandler()); - HANDLERS.put(".ooxml", new XWPFFileHandler()); // OPCPackage - - // Powerpoint - HANDLERS.put(".ppt", new HSLFFileHandler()); - HANDLERS.put(".pptx", new XSLFFileHandler()); - HANDLERS.put(".pptm", new XSLFFileHandler()); - HANDLERS.put(".ppsm", new XSLFFileHandler()); - HANDLERS.put(".ppsx", new XSLFFileHandler()); - HANDLERS.put(".thmx", new XSLFFileHandler()); - - // Outlook - HANDLERS.put(".msg", new HSMFFileHandler()); - - // Publisher - HANDLERS.put(".pub", new HPBFFileHandler()); - - // Visio - HANDLERS.put(".vsd", new HDGFFileHandler()); - - // POIFS - HANDLERS.put(".ole2", new POIFSFileHandler()); - - // Microsoft Admin Template? - HANDLERS.put(".adm", new HPSFFileHandler()); - - // Microsoft TNEF - HANDLERS.put(".dat", new HMEFFileHandler()); - - // TODO: are these readable by some of the formats? - HANDLERS.put(".shw", new NullFileHandler()); - HANDLERS.put(".zvi", new NullFileHandler()); - HANDLERS.put(".mpp", new NullFileHandler()); - HANDLERS.put(".qwp", new NullFileHandler()); - HANDLERS.put(".wps", new NullFileHandler()); - HANDLERS.put(".bin", new NullFileHandler()); - HANDLERS.put(".xps", new NullFileHandler()); - HANDLERS.put(".sldprt", new NullFileHandler()); - HANDLERS.put(".mdb", new NullFileHandler()); - HANDLERS.put(".vml", new NullFileHandler()); - - // ignore some file types, images, other formats, ... - HANDLERS.put(".txt", new NullFileHandler()); - HANDLERS.put(".pdf", new NullFileHandler()); - HANDLERS.put(".rtf", new NullFileHandler()); - HANDLERS.put(".gif", new NullFileHandler()); - HANDLERS.put(".html", new NullFileHandler()); - HANDLERS.put(".png", new NullFileHandler()); - HANDLERS.put(".wmf", new NullFileHandler()); - HANDLERS.put(".emf", new NullFileHandler()); - HANDLERS.put(".dib", new NullFileHandler()); - HANDLERS.put(".svg", new NullFileHandler()); - HANDLERS.put(".pict", new NullFileHandler()); - HANDLERS.put(".jpg", new NullFileHandler()); - HANDLERS.put(".wav", new NullFileHandler()); - HANDLERS.put(".pfx", new NullFileHandler()); - HANDLERS.put(".xml", new NullFileHandler()); - HANDLERS.put(".csv", new NullFileHandler()); - - // map some files without extension - HANDLERS.put("spreadsheet/BigSSTRecord", new NullFileHandler()); + private static final Map<String, FileHandler> HANDLERS = new HashMap<String, FileHandler>(); + static { + // Excel + HANDLERS.put(".xls", new HSSFFileHandler()); + HANDLERS.put(".xlsx", new XSSFFileHandler()); + HANDLERS.put(".xlsm", new XSSFFileHandler()); + HANDLERS.put(".xltx", new XSSFFileHandler()); + HANDLERS.put(".xlsb", new XSSFFileHandler()); + + // Word + HANDLERS.put(".doc", new HWPFFileHandler()); + HANDLERS.put(".docx", new XWPFFileHandler()); + HANDLERS.put(".dotx", new XWPFFileHandler()); + HANDLERS.put(".docm", new XWPFFileHandler()); + HANDLERS.put(".ooxml", new XWPFFileHandler()); // OPCPackage + + // Powerpoint + HANDLERS.put(".ppt", new HSLFFileHandler()); + HANDLERS.put(".pptx", new XSLFFileHandler()); + HANDLERS.put(".pptm", new XSLFFileHandler()); + HANDLERS.put(".ppsm", new XSLFFileHandler()); + HANDLERS.put(".ppsx", new XSLFFileHandler()); + HANDLERS.put(".thmx", new XSLFFileHandler()); + + // Outlook + HANDLERS.put(".msg", new HSMFFileHandler()); + + // Publisher + HANDLERS.put(".pub", new HPBFFileHandler()); + + // Visio - binary + HANDLERS.put(".vsd", new HDGFFileHandler()); + + // Visio - ooxml (currently unsupported) + HANDLERS.put(".vsdm", new NullFileHandler()); + HANDLERS.put(".vsdx", new NullFileHandler()); + HANDLERS.put(".vssm", new NullFileHandler()); + HANDLERS.put(".vssx", new NullFileHandler()); + HANDLERS.put(".vstm", new NullFileHandler()); + HANDLERS.put(".vstx", new NullFileHandler()); + + // POIFS + HANDLERS.put(".ole2", new POIFSFileHandler()); + + // Microsoft Admin Template? + HANDLERS.put(".adm", new HPSFFileHandler()); + + // Microsoft TNEF + HANDLERS.put(".dat", new HMEFFileHandler()); + + // TODO: are these readable by some of the formats? + HANDLERS.put(".shw", new NullFileHandler()); + HANDLERS.put(".zvi", new NullFileHandler()); + HANDLERS.put(".mpp", new NullFileHandler()); + HANDLERS.put(".qwp", new NullFileHandler()); + HANDLERS.put(".wps", new NullFileHandler()); + HANDLERS.put(".bin", new NullFileHandler()); + HANDLERS.put(".xps", new NullFileHandler()); + HANDLERS.put(".sldprt", new NullFileHandler()); + HANDLERS.put(".mdb", new NullFileHandler()); + HANDLERS.put(".vml", new NullFileHandler()); + + // ignore some file types, images, other formats, ... + HANDLERS.put(".txt", new NullFileHandler()); + HANDLERS.put(".pdf", new NullFileHandler()); + HANDLERS.put(".rtf", new NullFileHandler()); + HANDLERS.put(".gif", new NullFileHandler()); + HANDLERS.put(".html", new NullFileHandler()); + HANDLERS.put(".png", new NullFileHandler()); + HANDLERS.put(".wmf", new NullFileHandler()); + HANDLERS.put(".emf", new NullFileHandler()); + HANDLERS.put(".dib", new NullFileHandler()); + HANDLERS.put(".svg", new NullFileHandler()); + HANDLERS.put(".pict", new NullFileHandler()); + HANDLERS.put(".jpg", new NullFileHandler()); + HANDLERS.put(".wav", new NullFileHandler()); + HANDLERS.put(".pfx", new NullFileHandler()); + HANDLERS.put(".xml", new NullFileHandler()); + HANDLERS.put(".csv", new NullFileHandler()); + + // map some files without extension + HANDLERS.put("spreadsheet/BigSSTRecord", new NullFileHandler()); HANDLERS.put("spreadsheet/BigSSTRecord2", new NullFileHandler()); HANDLERS.put("spreadsheet/BigSSTRecord2CR1", new NullFileHandler()); HANDLERS.put("spreadsheet/BigSSTRecord2CR2", new NullFileHandler()); @@ -151,88 +160,104 @@ public class TestAllFiles { HANDLERS.put("spreadsheet/BigSSTRecord2CR6", new NullFileHandler()); HANDLERS.put("spreadsheet/BigSSTRecord2CR7", new NullFileHandler()); HANDLERS.put("spreadsheet/BigSSTRecordCR", new NullFileHandler()); - HANDLERS.put("spreadsheet/test_properties1", new NullFileHandler()); - } - - private static final Set<String> EXPECTED_FAILURES = new HashSet<String>(); - static { - // password protected files - EXPECTED_FAILURES.add("spreadsheet/password.xls"); - EXPECTED_FAILURES.add("spreadsheet/51832.xls"); - EXPECTED_FAILURES.add("document/PasswordProtected.doc"); - EXPECTED_FAILURES.add("slideshow/Password_Protected-hello.ppt"); - EXPECTED_FAILURES.add("slideshow/Password_Protected-56-hello.ppt"); - EXPECTED_FAILURES.add("slideshow/Password_Protected-np-hello.ppt"); - EXPECTED_FAILURES.add("slideshow/cryptoapi-proc2356.ppt"); - //EXPECTED_FAILURES.add("document/bug53475-password-is-pass.docx"); - //EXPECTED_FAILURES.add("document/bug53475-password-is-solrcell.docx"); - EXPECTED_FAILURES.add("spreadsheet/xor-encryption-abc.xls"); + HANDLERS.put("spreadsheet/test_properties1", new NullFileHandler()); + } + + // Old Word Documents where we can at least extract some text + private static final Set<String> OLD_FILES = new HashSet<String>(); + static { + OLD_FILES.add("document/Bug49933.doc"); + OLD_FILES.add("document/Bug51944.doc"); + OLD_FILES.add("document/Word6.doc"); + OLD_FILES.add("document/Word6_sections.doc"); + OLD_FILES.add("document/Word6_sections2.doc"); + OLD_FILES.add("document/Word95.doc"); + OLD_FILES.add("document/word95err.doc"); + OLD_FILES.add("hpsf/TestMickey.doc"); + OLD_FILES.add("document/52117.doc"); + } + + private static final Set<String> EXPECTED_FAILURES = new HashSet<String>(); + static { + // password protected files + EXPECTED_FAILURES.add("spreadsheet/password.xls"); + EXPECTED_FAILURES.add("spreadsheet/protected_passtika.xlsx"); + EXPECTED_FAILURES.add("spreadsheet/51832.xls"); + EXPECTED_FAILURES.add("document/PasswordProtected.doc"); + EXPECTED_FAILURES.add("slideshow/Password_Protected-hello.ppt"); + EXPECTED_FAILURES.add("slideshow/Password_Protected-56-hello.ppt"); + EXPECTED_FAILURES.add("slideshow/Password_Protected-np-hello.ppt"); + EXPECTED_FAILURES.add("slideshow/cryptoapi-proc2356.ppt"); + //EXPECTED_FAILURES.add("document/bug53475-password-is-pass.docx"); + //EXPECTED_FAILURES.add("document/bug53475-password-is-solrcell.docx"); + EXPECTED_FAILURES.add("spreadsheet/xor-encryption-abc.xls"); EXPECTED_FAILURES.add("spreadsheet/35897-type4.xls"); //EXPECTED_FAILURES.add("poifs/protect.xlsx"); //EXPECTED_FAILURES.add("poifs/protected_sha512.xlsx"); //EXPECTED_FAILURES.add("poifs/extenxls_pwd123.xlsx"); //EXPECTED_FAILURES.add("poifs/protected_agile.docx"); - - // TODO: fails XMLExportTest, is this ok? - EXPECTED_FAILURES.add("spreadsheet/CustomXMLMapping-singleattributenamespace.xlsx"); - EXPECTED_FAILURES.add("spreadsheet/55864.xlsx"); - - // TODO: these fail now with some NPE/file read error because we now try to compute every value via Cell.toString()! - EXPECTED_FAILURES.add("spreadsheet/44958.xls"); - EXPECTED_FAILURES.add("spreadsheet/44958_1.xls"); - EXPECTED_FAILURES.add("spreadsheet/testArraysAndTables.xls"); - - // TODO: good to ignore? - EXPECTED_FAILURES.add("spreadsheet/sample-beta.xlsx"); - EXPECTED_FAILURES.add("spreadsheet/49931.xls"); - EXPECTED_FAILURES.add("openxml4j/ContentTypeHasParameters.ooxml"); - - // This is actually a spreadsheet! - EXPECTED_FAILURES.add("hpsf/TestRobert_Flaherty.doc"); - - // some files that are broken, Excel 5.0/95, Word 95, ... - EXPECTED_FAILURES.add("spreadsheet/43493.xls"); - EXPECTED_FAILURES.add("spreadsheet/46904.xls"); - EXPECTED_FAILURES.add("document/56880.doc"); - EXPECTED_FAILURES.add("document/Bug49933.doc"); - EXPECTED_FAILURES.add("document/Bug50955.doc"); - EXPECTED_FAILURES.add("document/Bug51944.doc"); - EXPECTED_FAILURES.add("document/Word6.doc"); - EXPECTED_FAILURES.add("document/Word6_sections.doc"); - EXPECTED_FAILURES.add("document/Word6_sections2.doc"); - EXPECTED_FAILURES.add("document/Word95.doc"); - EXPECTED_FAILURES.add("document/word95err.doc"); - EXPECTED_FAILURES.add("hpsf/TestMickey.doc"); - EXPECTED_FAILURES.add("slideshow/PPT95.ppt"); - EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_DCTermsNamespaceLimitedUseFAIL.docx"); - EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_DoNotUseCompatibilityMarkupFAIL.docx"); - EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_LimitedXSITypeAttribute_NotPresentFAIL.docx"); - EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_LimitedXSITypeAttribute_PresentWithUnauthorizedValueFAIL.docx"); - EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_OnlyOneCorePropertiesPartFAIL.docx"); - EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_UnauthorizedXMLLangAttributeFAIL.docx"); - EXPECTED_FAILURES.add("openxml4j/OPCCompliance_DerivedPartNameFAIL.docx"); - EXPECTED_FAILURES.add("spreadsheet/54764-2.xlsx"); // see TestXSSFBugs.bug54764() - EXPECTED_FAILURES.add("spreadsheet/54764.xlsx"); // see TestXSSFBugs.bug54764() + + // TODO: fails XMLExportTest, is this ok? + EXPECTED_FAILURES.add("spreadsheet/CustomXMLMapping-singleattributenamespace.xlsx"); + EXPECTED_FAILURES.add("spreadsheet/55864.xlsx"); + + // TODO: these fail now with some NPE/file read error because we now try to compute every value via Cell.toString()! + EXPECTED_FAILURES.add("spreadsheet/44958.xls"); + EXPECTED_FAILURES.add("spreadsheet/44958_1.xls"); + EXPECTED_FAILURES.add("spreadsheet/testArraysAndTables.xls"); + + // TODO: good to ignore? + EXPECTED_FAILURES.add("spreadsheet/sample-beta.xlsx"); + EXPECTED_FAILURES.add("spreadsheet/49931.xls"); + EXPECTED_FAILURES.add("openxml4j/ContentTypeHasParameters.ooxml"); + + // This is actually a spreadsheet! + EXPECTED_FAILURES.add("hpsf/TestRobert_Flaherty.doc"); + + // some files that are broken, eg Word 95, ... + EXPECTED_FAILURES.add("spreadsheet/43493.xls"); + EXPECTED_FAILURES.add("spreadsheet/46904.xls"); + EXPECTED_FAILURES.add("document/56880.doc"); + EXPECTED_FAILURES.add("document/Bug50955.doc"); + EXPECTED_FAILURES.add("slideshow/PPT95.ppt"); + EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_DCTermsNamespaceLimitedUseFAIL.docx"); + EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_DoNotUseCompatibilityMarkupFAIL.docx"); + EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_LimitedXSITypeAttribute_NotPresentFAIL.docx"); + EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_LimitedXSITypeAttribute_PresentWithUnauthorizedValueFAIL.docx"); + EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_OnlyOneCorePropertiesPartFAIL.docx"); + EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_UnauthorizedXMLLangAttributeFAIL.docx"); + EXPECTED_FAILURES.add("openxml4j/OPCCompliance_DerivedPartNameFAIL.docx"); + EXPECTED_FAILURES.add("spreadsheet/54764-2.xlsx"); // see TestXSSFBugs.bug54764() + EXPECTED_FAILURES.add("spreadsheet/54764.xlsx"); // see TestXSSFBugs.bug54764() EXPECTED_FAILURES.add("spreadsheet/Simple.xlsb"); + EXPECTED_FAILURES.add("poifs/unknown_properties.msg"); // POIFS properties corrupted + EXPECTED_FAILURES.add("poifs/only-zero-byte-streams.ole2"); // No actual contents + + // old Excel files, which we only support simple text extraction of EXPECTED_FAILURES.add("spreadsheet/testEXCEL_2.xls"); EXPECTED_FAILURES.add("spreadsheet/testEXCEL_3.xls"); EXPECTED_FAILURES.add("spreadsheet/testEXCEL_4.xls"); EXPECTED_FAILURES.add("spreadsheet/testEXCEL_5.xls"); EXPECTED_FAILURES.add("spreadsheet/testEXCEL_95.xls"); + + // OOXML Strict is not yet supported, see bug #57699 + EXPECTED_FAILURES.add("spreadsheet/SampleSS.strict.xlsx"); + EXPECTED_FAILURES.add("spreadsheet/SimpleStrict.xlsx"); + EXPECTED_FAILURES.add("spreadsheet/sample.strict.xlsx"); + + // non-TNEF files + EXPECTED_FAILURES.add("ddf/Container.dat"); + EXPECTED_FAILURES.add("ddf/47143.dat"); + } - // non-TNEF files - EXPECTED_FAILURES.add("ddf/Container.dat"); - EXPECTED_FAILURES.add("ddf/47143.dat"); - } - @Parameters(name="{index}: {0} using {1}") public static Iterable<Object[]> files() { DirectoryScanner scanner = new DirectoryScanner(); scanner.setBasedir(ROOT_DIR); scanner.setExcludes(new String[] { "**/.svn/**" }); - + scanner.scan(); - + System.out.println("Handling " + scanner.getIncludedFiles().length + " files"); List<Object[]> files = new ArrayList<Object[]>(); @@ -240,47 +265,72 @@ public class TestAllFiles { file = file.replace('\\', '/'); // ... failures/handlers lookup doesn't work on windows otherwise files.add(new Object[] { file, HANDLERS.get(getExtension(file)) }); } - + return files; - } - + } + @Parameter(value=0) public String file; - + @Parameter(value=1) public FileHandler handler; - + @Test public void testAllFiles() throws Exception { - assertNotNull("Unknown file extension for file: " + file + ": " + getExtension(file), handler); - InputStream stream = new BufferedInputStream(new FileInputStream(new File(ROOT_DIR, file)),100); - try { - handler.handleFile(stream); - - assertFalse("Expected to fail for file " + file + " and handler " + handler + ", but did not fail!", - EXPECTED_FAILURES.contains(file)); - } catch (Exception e) { - // check if we expect failure for this file - if(!EXPECTED_FAILURES.contains(file)) { - throw new Exception("While handling " + file, e); - } - } finally { - stream.close(); - } - } - - private static String getExtension(String file) { - int pos = file.lastIndexOf('.'); - if(pos == -1 || pos == file.length()-1) { - return file; - } - - return file.substring(pos); - } - - private static class NullFileHandler implements FileHandler { - @Override + assertNotNull("Unknown file extension for file: " + file + ": " + getExtension(file), handler); + File inputFile = new File(ROOT_DIR, file); + + try { + InputStream stream = new BufferedInputStream(new FileInputStream(inputFile), 64*1024); + try { + handler.handleFile(stream); + + assertFalse("Expected to fail for file " + file + " and handler " + handler + ", but did not fail!", + OLD_FILES.contains(file)); + } finally { + stream.close(); + } + + handler.handleExtracting(inputFile); + + assertFalse("Expected to fail for file " + file + " and handler " + handler + ", but did not fail!", + EXPECTED_FAILURES.contains(file)); + } catch (OldWordFileFormatException e) { + // for old word files we should still support extracting text + if(OLD_FILES.contains(file)) { + handler.handleExtracting(inputFile); + } else { + // check if we expect failure for this file + if(!EXPECTED_FAILURES.contains(file) && !AbstractFileHandler.EXPECTED_EXTRACTOR_FAILURES.contains(file)) { + System.out.println("Failed: " + file); + throw new Exception("While handling " + file, e); + } + } + } catch (Exception e) { + // check if we expect failure for this file + if(!EXPECTED_FAILURES.contains(file) && !AbstractFileHandler.EXPECTED_EXTRACTOR_FAILURES.contains(file)) { + System.out.println("Failed: " + file); + throw new Exception("While handling " + file, e); + } + } + } + + private static String getExtension(String file) { + int pos = file.lastIndexOf('.'); + if(pos == -1 || pos == file.length()-1) { + return file; + } + + return file.substring(pos); + } + + private static class NullFileHandler implements FileHandler { + @Override public void handleFile(InputStream stream) throws Exception { - } - } + } + + @Override + public void handleExtracting(File file) throws Exception { + } + } } diff --git a/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java b/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java new file mode 100644 index 0000000000..55e2c368f2 --- /dev/null +++ b/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java @@ -0,0 +1,143 @@ +/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.stress;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.zip.ZipException;
+
+import org.apache.poi.POIOLE2TextExtractor;
+import org.apache.poi.POITextExtractor;
+import org.apache.poi.dev.OOXMLPrettyPrint;
+import org.apache.poi.extractor.ExtractorFactory;
+import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor;
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.xmlbeans.XmlException;
+
+public abstract class AbstractFileHandler implements FileHandler {
+ public static final Set<String> EXPECTED_EXTRACTOR_FAILURES = new HashSet<String>();
+ static {
+ // password protected files
+ EXPECTED_EXTRACTOR_FAILURES.add("document/bug53475-password-is-pass.docx");
+ EXPECTED_EXTRACTOR_FAILURES.add("poifs/extenxls_pwd123.xlsx");
+ EXPECTED_EXTRACTOR_FAILURES.add("poifs/protect.xlsx");
+ EXPECTED_EXTRACTOR_FAILURES.add("poifs/protected_agile.docx");
+ EXPECTED_EXTRACTOR_FAILURES.add("poifs/protected_sha512.xlsx");
+
+ // unsupported file-types, no supported OLE2 parts
+ EXPECTED_EXTRACTOR_FAILURES.add("hmef/quick-winmail.dat");
+ EXPECTED_EXTRACTOR_FAILURES.add("hmef/winmail-sample1.dat");
+ EXPECTED_EXTRACTOR_FAILURES.add("hmef/bug52400-winmail-simple.dat");
+ EXPECTED_EXTRACTOR_FAILURES.add("hmef/bug52400-winmail-with-attachments.dat");
+ EXPECTED_EXTRACTOR_FAILURES.add("hpsf/Test0313rur.adm");
+ EXPECTED_EXTRACTOR_FAILURES.add("hsmf/attachment_msg_pdf.msg");
+ EXPECTED_EXTRACTOR_FAILURES.add("poifs/Notes.ole2");
+ EXPECTED_EXTRACTOR_FAILURES.add("slideshow/testPPT.thmx");
+ }
+
+ public void handleExtracting(File file) throws Exception {
+ boolean before = ExtractorFactory.getThreadPrefersEventExtractors();
+ try {
+ ExtractorFactory.setThreadPrefersEventExtractors(true);
+ handleExtractingInternal(file);
+
+ ExtractorFactory.setThreadPrefersEventExtractors(false);
+ handleExtractingInternal(file);
+ } finally {
+ ExtractorFactory.setThreadPrefersEventExtractors(before);
+ }
+
+ /* Did fail for some documents with special XML contents...
+ try {
+ OOXMLPrettyPrint.main(new String[] { file.getAbsolutePath(),
+ "/tmp/pretty-" + file.getName() });
+ } catch (ZipException e) {
+ // ignore, not a Zip/OOXML file
+ }*/
+ }
+
+ private void handleExtractingInternal(File file) throws Exception {
+ long length = file.length();
+ long modified = file.lastModified();
+
+ POITextExtractor extractor = ExtractorFactory.createExtractor(file);
+ try {
+ assertNotNull(extractor);
+
+ assertNotNull(extractor.getText());
+
+ // also try metadata
+ POITextExtractor metadataExtractor = extractor.getMetadataTextExtractor();
+ assertNotNull(metadataExtractor.getText());
+
+ assertFalse("Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!",
+ EXPECTED_EXTRACTOR_FAILURES.contains(file));
+
+ assertEquals("File should not be modified by extractor", length, file.length());
+ assertEquals("File should not be modified by extractor", modified, file.lastModified());
+
+ handleExtractingAsStream(file);
+
+ if(extractor instanceof POIOLE2TextExtractor) {
+ HPSFPropertiesExtractor hpsfExtractor = new HPSFPropertiesExtractor((POIOLE2TextExtractor)extractor);
+ try {
+ assertNotNull(hpsfExtractor.getDocumentSummaryInformationText());
+ assertNotNull(hpsfExtractor.getSummaryInformationText());
+ String text = hpsfExtractor.getText();
+ //System.out.println(text);
+ assertNotNull(text);
+ } finally {
+ hpsfExtractor.close();
+ }
+ }
+ } catch (IllegalArgumentException e) {
+ if(!EXPECTED_EXTRACTOR_FAILURES.contains(file)) {
+ throw new Exception("While handling " + file, e);
+ }
+ } finally {
+ extractor.close();
+ }
+ }
+
+ private void handleExtractingAsStream(File file) throws FileNotFoundException,
+ IOException, InvalidFormatException, OpenXML4JException,
+ XmlException {
+ InputStream stream = new FileInputStream(file);
+ try {
+ POITextExtractor streamExtractor = ExtractorFactory.createExtractor(stream);
+ try {
+ assertNotNull(streamExtractor);
+
+ assertNotNull(streamExtractor.getText());
+ } finally {
+ streamExtractor.close();
+ }
+ } finally {
+ stream.close();
+ }
+ }
+}
diff --git a/src/integrationtest/org/apache/poi/stress/FileHandler.java b/src/integrationtest/org/apache/poi/stress/FileHandler.java index e6f3385f02..ce2991b0bc 100644 --- a/src/integrationtest/org/apache/poi/stress/FileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/FileHandler.java @@ -16,6 +16,7 @@ ==================================================================== */ package org.apache.poi.stress; +import java.io.File; import java.io.InputStream; /** @@ -34,4 +35,10 @@ public interface FileHandler { * @throws Exception */ void handleFile(InputStream stream) throws Exception; + + /** + * Ensures that extracting text from the given file + * is returning some text. + */ + void handleExtracting(File file) throws Exception; } diff --git a/src/integrationtest/org/apache/poi/stress/HDGFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HDGFFileHandler.java index 7fac6647a3..b9fe93a668 100644 --- a/src/integrationtest/org/apache/poi/stress/HDGFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/HDGFFileHandler.java @@ -19,10 +19,12 @@ package org.apache.poi.stress; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; +import java.io.File; import java.io.FileInputStream; import java.io.InputStream; import org.apache.poi.hdgf.HDGFDiagram; +import org.apache.poi.hdgf.extractor.VisioTextExtractor; import org.apache.poi.hdgf.streams.Stream; import org.apache.poi.hdgf.streams.TrailerStream; import org.apache.poi.poifs.filesystem.POIFSFileSystem; @@ -48,11 +50,27 @@ public class HDGFFileHandler extends POIFSFileHandler { // a test-case to test this locally without executing the full TestAllFiles @Test public void test() throws Exception { - InputStream stream = new FileInputStream("test-data/diagram/44501.vsd"); + File file = new File("test-data/diagram/44501.vsd"); + + InputStream stream = new FileInputStream(file); try { handleFile(stream); } finally { stream.close(); } + + handleExtracting(file); + + stream = new FileInputStream(file); + try { + VisioTextExtractor extractor = new VisioTextExtractor(stream); + try { + assertNotNull(extractor.getText()); + } finally { + extractor.close(); + } + } finally { + stream.close(); + } } } diff --git a/src/integrationtest/org/apache/poi/stress/HMEFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HMEFFileHandler.java index dfa8750058..9f492bf0ed 100644 --- a/src/integrationtest/org/apache/poi/stress/HMEFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/HMEFFileHandler.java @@ -26,7 +26,7 @@ import org.apache.poi.hmef.attribute.MAPIAttribute; import org.apache.poi.hmef.attribute.MAPIStringAttribute; import org.junit.Test; -public class HMEFFileHandler implements FileHandler { +public class HMEFFileHandler extends AbstractFileHandler { @Override public void handleFile(InputStream stream) throws Exception { diff --git a/src/integrationtest/org/apache/poi/stress/HPBFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HPBFFileHandler.java index 31ad8bc123..a41b6ebadf 100644 --- a/src/integrationtest/org/apache/poi/stress/HPBFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/HPBFFileHandler.java @@ -18,10 +18,12 @@ package org.apache.poi.stress; import static org.junit.Assert.assertNotNull; +import java.io.File; import java.io.FileInputStream; import java.io.InputStream; import org.apache.poi.hpbf.HPBFDocument; +import org.apache.poi.hpbf.extractor.PublisherTextExtractor; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.junit.Test; @@ -39,11 +41,28 @@ public class HPBFFileHandler extends POIFSFileHandler { // a test-case to test this locally without executing the full TestAllFiles @Test public void test() throws Exception { - InputStream stream = new FileInputStream("test-data/publisher/SampleBrochure.pub"); + File file = new File("test-data/publisher/SampleBrochure.pub"); + + InputStream stream = new FileInputStream(file); try { handleFile(stream); } finally { stream.close(); } + + handleExtracting(file); + + stream = new FileInputStream(file); + try { + PublisherTextExtractor extractor = new PublisherTextExtractor(stream); + try { + assertNotNull(extractor.getText()); + } finally { + extractor.close(); + } + } finally { + stream.close(); + } } + } diff --git a/src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java index b7d846ae62..6a53b2e009 100644 --- a/src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java @@ -18,6 +18,7 @@ package org.apache.poi.stress; import static org.junit.Assert.assertNotNull; +import java.io.File; import java.io.FileInputStream; import java.io.InputStream; @@ -25,7 +26,7 @@ import org.apache.poi.hpsf.HPSFPropertiesOnlyDocument; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.junit.Test; -public class HPSFFileHandler implements FileHandler { +public class HPSFFileHandler extends AbstractFileHandler { @Override public void handleFile(InputStream stream) throws Exception { HPSFPropertiesOnlyDocument hpsf = new HPSFPropertiesOnlyDocument(new POIFSFileSystem(stream)); @@ -43,4 +44,10 @@ public class HPSFFileHandler implements FileHandler { stream.close(); } } + + // a test-case to test this locally without executing the full TestAllFiles + @Test + public void testExtractor() throws Exception { + handleExtracting(new File("test-data/hpsf/TestBug44375.xls")); + } } diff --git a/src/integrationtest/org/apache/poi/stress/HSMFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HSMFFileHandler.java index 9de8b798c5..d68504a04c 100644 --- a/src/integrationtest/org/apache/poi/stress/HSMFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/HSMFFileHandler.java @@ -22,6 +22,8 @@ import java.io.FileInputStream; import java.io.InputStream; import org.apache.poi.hsmf.MAPIMessage; +import org.apache.poi.hsmf.datatypes.AttachmentChunks; +import org.apache.poi.hsmf.datatypes.DirectoryChunk; import org.junit.Test; public class HSMFFileHandler extends POIFSFileHandler { @@ -32,6 +34,19 @@ public class HSMFFileHandler extends POIFSFileHandler { assertNotNull(mapi.getDisplayBCC()); assertNotNull(mapi.getMessageDate()); + AttachmentChunks[] attachments = mapi.getAttachmentFiles(); + + for(AttachmentChunks attachment : attachments) { + + DirectoryChunk chunkDirectory = attachment.attachmentDirectory; + if(chunkDirectory != null) { + MAPIMessage attachmentMSG = chunkDirectory.getAsEmbededMessage(); + assertNotNull(attachmentMSG); + String body = attachmentMSG.getTextBody(); + assertNotNull(body); + } + } + /* => Writing isn't yet supported... // write out the file File file = TempFile.createTempFile("StressTest", ".msg"); diff --git a/src/integrationtest/org/apache/poi/stress/HSSFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HSSFFileHandler.java index 19dbd97a0e..dd579c4dba 100644 --- a/src/integrationtest/org/apache/poi/stress/HSSFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/HSSFFileHandler.java @@ -16,6 +16,7 @@ ==================================================================== */ package org.apache.poi.stress; +import java.io.File; import java.io.FileInputStream; import java.io.InputStream; @@ -49,4 +50,10 @@ public class HSSFFileHandler extends SpreadsheetHandler { stream.close(); } } + + // a test-case to test this locally without executing the full TestAllFiles + @Test + public void testExtractor() throws Exception { + handleExtracting(new File("test-data/spreadsheet/BOOK_in_capitals.xls")); + } }
\ No newline at end of file diff --git a/src/integrationtest/org/apache/poi/stress/HWPFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HWPFFileHandler.java index 1b6d4646c7..a56ddd2dc6 100644 --- a/src/integrationtest/org/apache/poi/stress/HWPFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/HWPFFileHandler.java @@ -18,12 +18,21 @@ package org.apache.poi.stress; import static org.junit.Assert.assertNotNull; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.FileInputStream; +import java.io.IOException; import java.io.InputStream; +import java.io.PrintWriter; +import java.io.StringWriter; +import org.apache.poi.hdf.extractor.WordDocument; import org.apache.poi.hwpf.HWPFDocument; +import org.apache.poi.hwpf.extractor.WordExtractor; import org.junit.Test; +@SuppressWarnings("deprecation") public class HWPFFileHandler extends POIFSFileHandler { @Override public void handleFile(InputStream stream) throws Exception { @@ -33,16 +42,57 @@ public class HWPFFileHandler extends POIFSFileHandler { assertNotNull(doc.getEndnotes()); handlePOIDocument(doc); + + // fails for many documents, but is deprecated anyway... + // handleWordDocument(doc); + } + + protected void handleWordDocument(HWPFDocument doc) throws IOException { + ByteArrayOutputStream outStream = new ByteArrayOutputStream(); + doc.write(outStream); + + WordDocument wordDoc = new WordDocument(new ByteArrayInputStream(outStream.toByteArray())); + + StringWriter docTextWriter = new StringWriter(); + PrintWriter out = new PrintWriter(docTextWriter); + try { + wordDoc.writeAllText(out); + } finally { + out.close(); + } + docTextWriter.close(); } // a test-case to test this locally without executing the full TestAllFiles @Test public void test() throws Exception { - InputStream stream = new FileInputStream("test-data/document/HeaderFooterUnicode.doc"); + File file = new File("test-data/document/52117.doc"); + + InputStream stream = new FileInputStream(file); try { handleFile(stream); } finally { stream.close(); } + + handleExtracting(file); + + stream = new FileInputStream(file); + try { + WordExtractor extractor = new WordExtractor(stream); + try { + assertNotNull(extractor.getText()); + } finally { + extractor.close(); + } + } finally { + stream.close(); + } + } + + @Test + public void testExtractingOld() throws Exception { + File file = new File("test-data/document/52117.doc"); + handleExtracting(file); } -}
\ No newline at end of file +} diff --git a/src/integrationtest/org/apache/poi/stress/POIFSFileHandler.java b/src/integrationtest/org/apache/poi/stress/POIFSFileHandler.java index 31deac7106..5c4a36e3ca 100644 --- a/src/integrationtest/org/apache/poi/stress/POIFSFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/POIFSFileHandler.java @@ -25,7 +25,7 @@ import java.io.InputStream; import org.apache.poi.POIDocument; import org.apache.poi.poifs.filesystem.POIFSFileSystem; -public class POIFSFileHandler implements FileHandler { +public class POIFSFileHandler extends AbstractFileHandler { @Override public void handleFile(InputStream stream) throws Exception { diff --git a/src/integrationtest/org/apache/poi/stress/POIXMLDocumentHandler.java b/src/integrationtest/org/apache/poi/stress/POIXMLDocumentHandler.java index 103bb9be7e..7b0821dcc0 100644 --- a/src/integrationtest/org/apache/poi/stress/POIXMLDocumentHandler.java +++ b/src/integrationtest/org/apache/poi/stress/POIXMLDocumentHandler.java @@ -20,9 +20,17 @@ import static org.junit.Assert.assertNotNull; import java.io.IOException; import java.io.InputStream; +import java.util.List; import org.apache.poi.POIXMLDocument; +import org.apache.poi.openxml4j.exceptions.OpenXML4JException; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.opc.PackageAccess; +import org.apache.poi.openxml4j.opc.PackagePart; +import org.apache.poi.poifs.crypt.Decryptor; import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.junit.Ignore; +import org.junit.Test; public final class POIXMLDocumentHandler { protected void handlePOIXMLDocument(POIXMLDocument doc) throws Exception { @@ -36,11 +44,33 @@ public final class POIXMLDocumentHandler { protected static boolean isEncrypted(InputStream stream) throws IOException { if (POIFSFileSystem.hasPOIFSHeader(stream)) { POIFSFileSystem poifs = new POIFSFileSystem(stream); - if (poifs.getRoot().hasEntry("EncryptedPackage")) { + if (poifs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) { return true; } throw new IOException("wrong file format or file extension for OO XML file"); } return false; } + + // a test-case to test this locally without executing the full TestAllFiles + @Ignore("POIXMLDocument cannot handle this Visio file currently...") + @Test + public void test() throws Exception { + OPCPackage pkg = OPCPackage.open("test-data/diagram/test.vsdx", PackageAccess.READ); + try { + handlePOIXMLDocument(new TestPOIXMLDocument(pkg)); + } finally { + pkg.close(); + } + } + + private final static class TestPOIXMLDocument extends POIXMLDocument { + public TestPOIXMLDocument(OPCPackage pkg) { + super(pkg); + } + + public List<PackagePart> getAllEmbedds() throws OpenXML4JException { + return null; + } + } } diff --git a/src/integrationtest/org/apache/poi/stress/SpreadsheetHandler.java b/src/integrationtest/org/apache/poi/stress/SpreadsheetHandler.java index aad703ce98..f12bbd2de5 100644 --- a/src/integrationtest/org/apache/poi/stress/SpreadsheetHandler.java +++ b/src/integrationtest/org/apache/poi/stress/SpreadsheetHandler.java @@ -30,7 +30,7 @@ import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.ss.usermodel.WorkbookFactory; -public abstract class SpreadsheetHandler implements FileHandler { +public abstract class SpreadsheetHandler extends AbstractFileHandler { public void handleWorkbook(Workbook wb, String extension) throws IOException { // try to access some of the content readContent(wb); diff --git a/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java b/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java index 3464218fd9..2669238be1 100644 --- a/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java @@ -18,35 +18,97 @@ package org.apache.poi.stress; import static org.junit.Assert.assertNotNull; +import java.awt.Dimension; +import java.awt.Graphics2D; +import java.awt.image.BufferedImage; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.FileInputStream; +import java.io.IOException; import java.io.InputStream; import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.sl.draw.DrawFactory; +import org.apache.poi.sl.draw.Drawable; import org.apache.poi.xslf.XSLFSlideShow; +import org.apache.poi.xslf.usermodel.XMLSlideShow; +import org.apache.poi.xslf.usermodel.XSLFNotes; +import org.apache.poi.xslf.usermodel.XSLFShape; +import org.apache.poi.xslf.usermodel.XSLFSlide; +import org.apache.poi.xslf.usermodel.XSLFTextParagraph; +import org.apache.poi.xslf.usermodel.XSLFTextShape; import org.junit.Test; -public class XSLFFileHandler implements FileHandler { +public class XSLFFileHandler extends AbstractFileHandler { @Override public void handleFile(InputStream stream) throws Exception { - // ignore password protected files - if (POIXMLDocumentHandler.isEncrypted(stream)) return; - XSLFSlideShow slide = new XSLFSlideShow(OPCPackage.open(stream)); assertNotNull(slide.getPresentation()); assertNotNull(slide.getSlideMasterReferences()); assertNotNull(slide.getSlideReferences()); new POIXMLDocumentHandler().handlePOIXMLDocument(slide); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + try { + slide.write(out); + } finally { + out.close(); + } + + createBitmaps(out); } + private void createBitmaps(ByteArrayOutputStream out) throws IOException { + XMLSlideShow ppt = new XMLSlideShow(new ByteArrayInputStream(out.toByteArray())); + Dimension pgsize = ppt.getPageSize(); + for (XSLFSlide xmlSlide : ppt.getSlides()) { +// System.out.println("slide-" + (i + 1)); +// System.out.println("" + xmlSlide[i].getTitle()); + + BufferedImage img = new BufferedImage(pgsize.width, pgsize.height, BufferedImage.TYPE_INT_RGB); + Graphics2D graphics = img.createGraphics(); + + // draw stuff + xmlSlide.draw(graphics); + + // Also try to read notes + XSLFNotes notes = xmlSlide.getNotes(); + if(notes != null) { + for (XSLFShape note : notes) { + DrawFactory df = DrawFactory.getInstance(graphics); + Drawable d = df.getDrawable(note); + d.draw(graphics); + + if (note instanceof XSLFTextShape) { + XSLFTextShape txShape = (XSLFTextShape) note; + for (XSLFTextParagraph xslfParagraph : txShape.getTextParagraphs()) { + xslfParagraph.getText(); + } + } + } + } + } + + ppt.close(); + } + // a test-case to test this locally without executing the full TestAllFiles @Test public void test() throws Exception { - InputStream stream = new FileInputStream("test-data/slideshow/testPPT.pptx"); + InputStream stream = new FileInputStream("test-data/slideshow/SampleShow.pptx"); try { handleFile(stream); } finally { stream.close(); } } + + + // a test-case to test this locally without executing the full TestAllFiles + @Test + public void testExtractor() throws Exception { + handleExtracting(new File("test-data/slideshow/testPPT.thmx")); + } }
\ No newline at end of file diff --git a/src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java b/src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java index 54a386ea00..a268ed4658 100644 --- a/src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java @@ -17,6 +17,7 @@ package org.apache.poi.stress; import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.FileInputStream; import java.io.InputStream; @@ -71,4 +72,10 @@ public class XSSFFileHandler extends SpreadsheetHandler { stream.close(); } } + + // a test-case to test this locally without executing the full TestAllFiles + @Test + public void testExtractor() throws Exception { + handleExtracting(new File("test-data/spreadsheet/56278.xlsx")); + } }
\ No newline at end of file diff --git a/src/integrationtest/org/apache/poi/stress/XWPFFileHandler.java b/src/integrationtest/org/apache/poi/stress/XWPFFileHandler.java index a96d46da31..c097dc9f71 100644 --- a/src/integrationtest/org/apache/poi/stress/XWPFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/XWPFFileHandler.java @@ -16,13 +16,15 @@ ==================================================================== */ package org.apache.poi.stress; +import java.io.File; import java.io.FileInputStream; import java.io.InputStream; +import java.io.PushbackInputStream; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.junit.Test; -public class XWPFFileHandler implements FileHandler { +public class XWPFFileHandler extends AbstractFileHandler { @Override public void handleFile(InputStream stream) throws Exception { // ignore password protected files @@ -36,12 +38,16 @@ public class XWPFFileHandler implements FileHandler { // a test-case to test this locally without executing the full TestAllFiles @Test public void test() throws Exception { - InputStream stream = new FileInputStream("test-data/document/footnotes.docx"); + File file = new File("test-data/document/51921-Word-Crash067.docx"); + + InputStream stream = new PushbackInputStream(new FileInputStream(file), 100000); try { handleFile(stream); } finally { stream.close(); } + + handleExtracting(file); } }
\ No newline at end of file |