diff options
Diffstat (limited to 'src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java')
-rw-r--r-- | src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java | 284 |
1 files changed, 142 insertions, 142 deletions
diff --git a/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java b/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java index 6bfeee2e71..9b49cf7718 100644 --- a/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java @@ -1,142 +1,142 @@ -/* ====================================================================
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
-==================================================================== */
-package org.apache.poi.stress;
-
-import static org.junit.Assert.assertEquals;
-import static org.junit.Assert.assertFalse;
-import static org.junit.Assert.assertNotNull;
-
-import java.io.File;
-import java.io.FileInputStream;
-import java.io.IOException;
-import java.io.InputStream;
-import java.util.HashSet;
-import java.util.Set;
-
-import org.apache.poi.POIOLE2TextExtractor;
-import org.apache.poi.POITextExtractor;
-import org.apache.poi.extractor.ExtractorFactory;
-import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor;
-import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
-import org.apache.xmlbeans.XmlException;
-
-public abstract class AbstractFileHandler implements FileHandler {
- public static final Set<String> EXPECTED_EXTRACTOR_FAILURES = new HashSet<String>();
- static {
- // password protected files
- EXPECTED_EXTRACTOR_FAILURES.add("document/bug53475-password-is-pass.docx");
- EXPECTED_EXTRACTOR_FAILURES.add("poifs/extenxls_pwd123.xlsx");
- EXPECTED_EXTRACTOR_FAILURES.add("poifs/protect.xlsx");
- EXPECTED_EXTRACTOR_FAILURES.add("poifs/protected_agile.docx");
- EXPECTED_EXTRACTOR_FAILURES.add("poifs/protected_sha512.xlsx");
-
- // unsupported file-types, no supported OLE2 parts
- EXPECTED_EXTRACTOR_FAILURES.add("hmef/quick-winmail.dat");
- EXPECTED_EXTRACTOR_FAILURES.add("hmef/winmail-sample1.dat");
- EXPECTED_EXTRACTOR_FAILURES.add("hmef/bug52400-winmail-simple.dat");
- EXPECTED_EXTRACTOR_FAILURES.add("hmef/bug52400-winmail-with-attachments.dat");
- EXPECTED_EXTRACTOR_FAILURES.add("hpsf/Test0313rur.adm");
- EXPECTED_EXTRACTOR_FAILURES.add("poifs/Notes.ole2");
- }
-
- @Override
- public void handleExtracting(File file) throws Exception {
- boolean before = ExtractorFactory.getThreadPrefersEventExtractors();
- try {
- ExtractorFactory.setThreadPrefersEventExtractors(true);
- handleExtractingInternal(file);
-
- ExtractorFactory.setThreadPrefersEventExtractors(false);
- handleExtractingInternal(file);
- } finally {
- ExtractorFactory.setThreadPrefersEventExtractors(before);
- }
-
- /* Did fail for some documents with special XML contents...
- try {
- OOXMLPrettyPrint.main(new String[] { file.getAbsolutePath(),
- "/tmp/pretty-" + file.getName() });
- } catch (ZipException e) {
- // ignore, not a Zip/OOXML file
- }*/
- }
-
- private void handleExtractingInternal(File file) throws Exception {
- long length = file.length();
- long modified = file.lastModified();
-
- POITextExtractor extractor = ExtractorFactory.createExtractor(file);
- try {
- assertNotNull(extractor);
-
- assertNotNull(extractor.getText());
-
- // also try metadata
- @SuppressWarnings("resource")
- POITextExtractor metadataExtractor = extractor.getMetadataTextExtractor();
- assertNotNull(metadataExtractor.getText());
-
- assertFalse("Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!",
- EXPECTED_EXTRACTOR_FAILURES.contains(file.getParentFile().getName() + "/" + file.getName()));
-
- assertEquals("File should not be modified by extractor", length, file.length());
- assertEquals("File should not be modified by extractor", modified, file.lastModified());
-
- handleExtractingAsStream(file);
-
- if(extractor instanceof POIOLE2TextExtractor) {
- HPSFPropertiesExtractor hpsfExtractor = new HPSFPropertiesExtractor((POIOLE2TextExtractor)extractor);
- try {
- assertNotNull(hpsfExtractor.getDocumentSummaryInformationText());
- assertNotNull(hpsfExtractor.getSummaryInformationText());
- String text = hpsfExtractor.getText();
- //System.out.println(text);
- assertNotNull(text);
- } finally {
- hpsfExtractor.close();
- }
- }
- } catch (IllegalArgumentException e) {
- if(!EXPECTED_EXTRACTOR_FAILURES.contains(file.getParentFile().getName() + "/" + file.getName())) {
- throw e;
- }
- } finally {
- extractor.close();
- }
- }
-
- private void handleExtractingAsStream(File file) throws IOException, OpenXML4JException, XmlException {
- InputStream stream = new FileInputStream(file);
- try {
- POITextExtractor streamExtractor = ExtractorFactory.createExtractor(stream);
- try {
- assertNotNull(streamExtractor);
-
- assertNotNull(streamExtractor.getText());
- } finally {
- streamExtractor.close();
- }
- } finally {
- stream.close();
- }
- }
-
- @Override
- public void handleAdditional(File file) throws Exception {
- // by default we do nothing here
- }
-}
+/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.stress; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; + +import java.io.File; +import java.io.FileInputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.HashSet; +import java.util.Set; + +import org.apache.poi.POIOLE2TextExtractor; +import org.apache.poi.POITextExtractor; +import org.apache.poi.extractor.ExtractorFactory; +import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor; +import org.apache.poi.openxml4j.exceptions.OpenXML4JException; +import org.apache.xmlbeans.XmlException; + +public abstract class AbstractFileHandler implements FileHandler { + public static final Set<String> EXPECTED_EXTRACTOR_FAILURES = new HashSet<String>(); + static { + // password protected files + EXPECTED_EXTRACTOR_FAILURES.add("document/bug53475-password-is-pass.docx"); + EXPECTED_EXTRACTOR_FAILURES.add("poifs/extenxls_pwd123.xlsx"); + EXPECTED_EXTRACTOR_FAILURES.add("poifs/protect.xlsx"); + EXPECTED_EXTRACTOR_FAILURES.add("poifs/protected_agile.docx"); + EXPECTED_EXTRACTOR_FAILURES.add("poifs/protected_sha512.xlsx"); + + // unsupported file-types, no supported OLE2 parts + EXPECTED_EXTRACTOR_FAILURES.add("hmef/quick-winmail.dat"); + EXPECTED_EXTRACTOR_FAILURES.add("hmef/winmail-sample1.dat"); + EXPECTED_EXTRACTOR_FAILURES.add("hmef/bug52400-winmail-simple.dat"); + EXPECTED_EXTRACTOR_FAILURES.add("hmef/bug52400-winmail-with-attachments.dat"); + EXPECTED_EXTRACTOR_FAILURES.add("hpsf/Test0313rur.adm"); + EXPECTED_EXTRACTOR_FAILURES.add("poifs/Notes.ole2"); + } + + @Override + public void handleExtracting(File file) throws Exception { + boolean before = ExtractorFactory.getThreadPrefersEventExtractors(); + try { + ExtractorFactory.setThreadPrefersEventExtractors(true); + handleExtractingInternal(file); + + ExtractorFactory.setThreadPrefersEventExtractors(false); + handleExtractingInternal(file); + } finally { + ExtractorFactory.setThreadPrefersEventExtractors(before); + } + + /* Did fail for some documents with special XML contents... + try { + OOXMLPrettyPrint.main(new String[] { file.getAbsolutePath(), + "/tmp/pretty-" + file.getName() }); + } catch (ZipException e) { + // ignore, not a Zip/OOXML file + }*/ + } + + private void handleExtractingInternal(File file) throws Exception { + long length = file.length(); + long modified = file.lastModified(); + + POITextExtractor extractor = ExtractorFactory.createExtractor(file); + try { + assertNotNull(extractor); + + assertNotNull(extractor.getText()); + + // also try metadata + @SuppressWarnings("resource") + POITextExtractor metadataExtractor = extractor.getMetadataTextExtractor(); + assertNotNull(metadataExtractor.getText()); + + assertFalse("Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!", + EXPECTED_EXTRACTOR_FAILURES.contains(file.getParentFile().getName() + "/" + file.getName())); + + assertEquals("File should not be modified by extractor", length, file.length()); + assertEquals("File should not be modified by extractor", modified, file.lastModified()); + + handleExtractingAsStream(file); + + if(extractor instanceof POIOLE2TextExtractor) { + HPSFPropertiesExtractor hpsfExtractor = new HPSFPropertiesExtractor((POIOLE2TextExtractor)extractor); + try { + assertNotNull(hpsfExtractor.getDocumentSummaryInformationText()); + assertNotNull(hpsfExtractor.getSummaryInformationText()); + String text = hpsfExtractor.getText(); + //System.out.println(text); + assertNotNull(text); + } finally { + hpsfExtractor.close(); + } + } + } catch (IllegalArgumentException e) { + if(!EXPECTED_EXTRACTOR_FAILURES.contains(file.getParentFile().getName() + "/" + file.getName())) { + throw e; + } + } finally { + extractor.close(); + } + } + + private void handleExtractingAsStream(File file) throws IOException, OpenXML4JException, XmlException { + InputStream stream = new FileInputStream(file); + try { + POITextExtractor streamExtractor = ExtractorFactory.createExtractor(stream); + try { + assertNotNull(streamExtractor); + + assertNotNull(streamExtractor.getText()); + } finally { + streamExtractor.close(); + } + } finally { + stream.close(); + } + } + + @Override + public void handleAdditional(File file) throws Exception { + // by default we do nothing here + } +} |