From 76307fe94bb6a3555967372ff7879a6adcb0f0f7 Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Fri, 27 Feb 2015 09:59:14 +0000 Subject: * Add text-extraction verification to integration-tests via a new abstract base FileHandler * Fix NullPointerException found in some documents when running against the test-data * Add support for extracting text from Dir-Entries WORKBOOK and BOOK to support some old/strangely formatted XLS files. git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1662652 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/TestAllFiles.java | 36 +++++++++----- .../org/apache/poi/stress/AbstractFileHandler.java | 55 ++++++++++++++++++++++ .../org/apache/poi/stress/FileHandler.java | 7 +++ .../org/apache/poi/stress/HMEFFileHandler.java | 2 +- .../org/apache/poi/stress/HPSFFileHandler.java | 2 +- .../org/apache/poi/stress/HSSFFileHandler.java | 7 +++ .../org/apache/poi/stress/POIFSFileHandler.java | 2 +- .../org/apache/poi/stress/SpreadsheetHandler.java | 2 +- .../org/apache/poi/stress/XSLFFileHandler.java | 2 +- .../org/apache/poi/stress/XWPFFileHandler.java | 2 +- 10 files changed, 98 insertions(+), 19 deletions(-) create mode 100644 src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java (limited to 'src/integrationtest') diff --git a/src/integrationtest/org/apache/poi/TestAllFiles.java b/src/integrationtest/org/apache/poi/TestAllFiles.java index 4608303537..d0439b40fd 100644 --- a/src/integrationtest/org/apache/poi/TestAllFiles.java +++ b/src/integrationtest/org/apache/poi/TestAllFiles.java @@ -253,20 +253,26 @@ public class TestAllFiles { @Test public void testAllFiles() throws Exception { assertNotNull("Unknown file extension for file: " + file + ": " + getExtension(file), handler); - InputStream stream = new BufferedInputStream(new FileInputStream(new File(ROOT_DIR, file)),100); + File inputFile = new File(ROOT_DIR, file); + try { - handler.handleFile(stream); - - assertFalse("Expected to fail for file " + file + " and handler " + handler + ", but did not fail!", - EXPECTED_FAILURES.contains(file)); - } catch (Exception e) { - // check if we expect failure for this file - if(!EXPECTED_FAILURES.contains(file)) { - throw new Exception("While handling " + file, e); - } - } finally { - stream.close(); - } + InputStream stream = new BufferedInputStream(new FileInputStream(inputFile),100); + try { + handler.handleFile(stream); + + assertFalse("Expected to fail for file " + file + " and handler " + handler + ", but did not fail!", + EXPECTED_FAILURES.contains(file)); + } finally { + stream.close(); + } + + handler.handleExtracting(inputFile); + } catch (Exception e) { + // check if we expect failure for this file + if(!EXPECTED_FAILURES.contains(file) && !AbstractFileHandler.EXPECTED_EXTRACTOR_FAILURES.contains(file)) { + throw new Exception("While handling " + file, e); + } + } } private static String getExtension(String file) { @@ -282,5 +288,9 @@ public class TestAllFiles { @Override public void handleFile(InputStream stream) throws Exception { } + + @Override + public void handleExtracting(File file) throws Exception { + } } } diff --git a/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java b/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java new file mode 100644 index 0000000000..85ebb1b451 --- /dev/null +++ b/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java @@ -0,0 +1,55 @@ +package org.apache.poi.stress; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; + +import java.io.File; +import java.util.HashSet; +import java.util.Set; + +import org.apache.poi.POITextExtractor; +import org.apache.poi.extractor.ExtractorFactory; + +public abstract class AbstractFileHandler implements FileHandler { + public static final Set EXPECTED_EXTRACTOR_FAILURES = new HashSet(); + static { + // password protected files + EXPECTED_EXTRACTOR_FAILURES.add("document/bug53475-password-is-pass.docx"); + EXPECTED_EXTRACTOR_FAILURES.add("poifs/extenxls_pwd123.xlsx"); + EXPECTED_EXTRACTOR_FAILURES.add("poifs/protect.xlsx"); + EXPECTED_EXTRACTOR_FAILURES.add("poifs/protected_agile.docx"); + EXPECTED_EXTRACTOR_FAILURES.add("poifs/protected_sha512.xlsx"); + + // unsupported file-types, no supported OLE2 parts + EXPECTED_EXTRACTOR_FAILURES.add("hmef/quick-winmail.dat"); + EXPECTED_EXTRACTOR_FAILURES.add("hmef/winmail-sample1.dat"); + EXPECTED_EXTRACTOR_FAILURES.add("hmef/bug52400-winmail-simple.dat"); + EXPECTED_EXTRACTOR_FAILURES.add("hmef/bug52400-winmail-with-attachments.dat"); + EXPECTED_EXTRACTOR_FAILURES.add("hpsf/Test0313rur.adm"); + EXPECTED_EXTRACTOR_FAILURES.add("hsmf/attachment_msg_pdf.msg"); + EXPECTED_EXTRACTOR_FAILURES.add("poifs/Notes.ole2"); + EXPECTED_EXTRACTOR_FAILURES.add("slideshow/testPPT.thmx"); + } + + public void handleExtracting(File file) throws Exception { + POITextExtractor extractor = ExtractorFactory.createExtractor(file); + try { + assertNotNull(extractor); + + assertNotNull(extractor.getText()); + + // also try metadata + POITextExtractor metadataExtractor = extractor.getMetadataTextExtractor(); + assertNotNull(metadataExtractor.getText()); + + assertFalse("Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!", + EXPECTED_EXTRACTOR_FAILURES.contains(file)); + } catch (IllegalArgumentException e) { + if(!EXPECTED_EXTRACTOR_FAILURES.contains(file)) { + throw new Exception("While handling " + file, e); + } + } finally { + extractor.close(); + } + } +} diff --git a/src/integrationtest/org/apache/poi/stress/FileHandler.java b/src/integrationtest/org/apache/poi/stress/FileHandler.java index e6f3385f02..ce2991b0bc 100644 --- a/src/integrationtest/org/apache/poi/stress/FileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/FileHandler.java @@ -16,6 +16,7 @@ ==================================================================== */ package org.apache.poi.stress; +import java.io.File; import java.io.InputStream; /** @@ -34,4 +35,10 @@ public interface FileHandler { * @throws Exception */ void handleFile(InputStream stream) throws Exception; + + /** + * Ensures that extracting text from the given file + * is returning some text. + */ + void handleExtracting(File file) throws Exception; } diff --git a/src/integrationtest/org/apache/poi/stress/HMEFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HMEFFileHandler.java index dfa8750058..9f492bf0ed 100644 --- a/src/integrationtest/org/apache/poi/stress/HMEFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/HMEFFileHandler.java @@ -26,7 +26,7 @@ import org.apache.poi.hmef.attribute.MAPIAttribute; import org.apache.poi.hmef.attribute.MAPIStringAttribute; import org.junit.Test; -public class HMEFFileHandler implements FileHandler { +public class HMEFFileHandler extends AbstractFileHandler { @Override public void handleFile(InputStream stream) throws Exception { diff --git a/src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java index b7d846ae62..477ee859cb 100644 --- a/src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java @@ -25,7 +25,7 @@ import org.apache.poi.hpsf.HPSFPropertiesOnlyDocument; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.junit.Test; -public class HPSFFileHandler implements FileHandler { +public class HPSFFileHandler extends AbstractFileHandler { @Override public void handleFile(InputStream stream) throws Exception { HPSFPropertiesOnlyDocument hpsf = new HPSFPropertiesOnlyDocument(new POIFSFileSystem(stream)); diff --git a/src/integrationtest/org/apache/poi/stress/HSSFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HSSFFileHandler.java index 19dbd97a0e..dd579c4dba 100644 --- a/src/integrationtest/org/apache/poi/stress/HSSFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/HSSFFileHandler.java @@ -16,6 +16,7 @@ ==================================================================== */ package org.apache.poi.stress; +import java.io.File; import java.io.FileInputStream; import java.io.InputStream; @@ -49,4 +50,10 @@ public class HSSFFileHandler extends SpreadsheetHandler { stream.close(); } } + + // a test-case to test this locally without executing the full TestAllFiles + @Test + public void testExtractor() throws Exception { + handleExtracting(new File("test-data/spreadsheet/BOOK_in_capitals.xls")); + } } \ No newline at end of file diff --git a/src/integrationtest/org/apache/poi/stress/POIFSFileHandler.java b/src/integrationtest/org/apache/poi/stress/POIFSFileHandler.java index 31deac7106..5c4a36e3ca 100644 --- a/src/integrationtest/org/apache/poi/stress/POIFSFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/POIFSFileHandler.java @@ -25,7 +25,7 @@ import java.io.InputStream; import org.apache.poi.POIDocument; import org.apache.poi.poifs.filesystem.POIFSFileSystem; -public class POIFSFileHandler implements FileHandler { +public class POIFSFileHandler extends AbstractFileHandler { @Override public void handleFile(InputStream stream) throws Exception { diff --git a/src/integrationtest/org/apache/poi/stress/SpreadsheetHandler.java b/src/integrationtest/org/apache/poi/stress/SpreadsheetHandler.java index aad703ce98..f12bbd2de5 100644 --- a/src/integrationtest/org/apache/poi/stress/SpreadsheetHandler.java +++ b/src/integrationtest/org/apache/poi/stress/SpreadsheetHandler.java @@ -30,7 +30,7 @@ import org.apache.poi.ss.usermodel.Sheet; import org.apache.poi.ss.usermodel.Workbook; import org.apache.poi.ss.usermodel.WorkbookFactory; -public abstract class SpreadsheetHandler implements FileHandler { +public abstract class SpreadsheetHandler extends AbstractFileHandler { public void handleWorkbook(Workbook wb, String extension) throws IOException { // try to access some of the content readContent(wb); diff --git a/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java b/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java index 3464218fd9..e6cbb184b2 100644 --- a/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java @@ -25,7 +25,7 @@ import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.xslf.XSLFSlideShow; import org.junit.Test; -public class XSLFFileHandler implements FileHandler { +public class XSLFFileHandler extends AbstractFileHandler { @Override public void handleFile(InputStream stream) throws Exception { // ignore password protected files diff --git a/src/integrationtest/org/apache/poi/stress/XWPFFileHandler.java b/src/integrationtest/org/apache/poi/stress/XWPFFileHandler.java index a96d46da31..47c18d8aa0 100644 --- a/src/integrationtest/org/apache/poi/stress/XWPFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/XWPFFileHandler.java @@ -22,7 +22,7 @@ import java.io.InputStream; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.junit.Test; -public class XWPFFileHandler implements FileHandler { +public class XWPFFileHandler extends AbstractFileHandler { @Override public void handleFile(InputStream stream) throws Exception { // ignore password protected files -- cgit v1.2.3 From 27c6da8286197840c0dbe909abb767a25fb5a28e Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Fri, 27 Feb 2015 11:33:36 +0000 Subject: Add missing javadoc-header git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1662661 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/stress/AbstractFileHandler.java | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'src/integrationtest') diff --git a/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java b/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java index 85ebb1b451..d7e303e4a8 100644 --- a/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java @@ -1,3 +1,19 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ package org.apache.poi.stress; import static org.junit.Assert.assertFalse; -- cgit v1.2.3 From a3e087268a6ff9d5b90d3d334593d56693e400b8 Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Fri, 27 Feb 2015 14:58:41 +0000 Subject: * Verify some more Text-Extraction features as part of integration tests, fix some NullPointerExceptions that showed up now because the event-based extraction does not have a Document available * Also handle a XLSX which does not have row-numbers in the sheet-xml. Excel can read it so it makes sense to also allow to read it in the XSSFSheetXMLHandler * Remove some Eclipse warnings in test-code git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1662691 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/stress/AbstractFileHandler.java | 47 ++++++ .../org/apache/poi/stress/HPSFFileHandler.java | 7 + .../org/apache/poi/stress/XSSFFileHandler.java | 7 + .../hpsf/extractor/HPSFPropertiesExtractor.java | 8 + .../poi/hssf/eventusermodel/HSSFEventFactory.java | 24 ++- .../apache/poi/POIXMLPropertiesTextExtractor.java | 23 ++- .../org/apache/poi/extractor/ExtractorFactory.java | 6 +- .../xssf/eventusermodel/XSSFSheetXMLHandler.java | 13 +- .../TestXSSFEventBasedExcelExtractor.java | 23 ++- .../extractor/TestHPSFPropertiesExtractor.java | 165 ++++++++++++++------- .../hssf/eventusermodel/TestHSSFEventFactory.java | 16 +- .../poi/hssf/extractor/TestExcelExtractor.java | 151 +++++++++++-------- 12 files changed, 355 insertions(+), 135 deletions(-) (limited to 'src/integrationtest') diff --git a/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java b/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java index d7e303e4a8..8a27e6d0e9 100644 --- a/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java @@ -16,15 +16,23 @@ ==================================================================== */ package org.apache.poi.stress; +import static org.junit.Assert.assertEquals; import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertNotNull; import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; import java.util.HashSet; import java.util.Set; import org.apache.poi.POITextExtractor; import org.apache.poi.extractor.ExtractorFactory; +import org.apache.poi.openxml4j.exceptions.InvalidFormatException; +import org.apache.poi.openxml4j.exceptions.OpenXML4JException; +import org.apache.xmlbeans.XmlException; public abstract class AbstractFileHandler implements FileHandler { public static final Set EXPECTED_EXTRACTOR_FAILURES = new HashSet(); @@ -48,6 +56,22 @@ public abstract class AbstractFileHandler implements FileHandler { } public void handleExtracting(File file) throws Exception { + boolean before = ExtractorFactory.getThreadPrefersEventExtractors(); + try { + ExtractorFactory.setThreadPrefersEventExtractors(true); + handleExtractingInternal(file); + + ExtractorFactory.setThreadPrefersEventExtractors(false); + handleExtractingInternal(file); + } finally { + ExtractorFactory.setThreadPrefersEventExtractors(before); + } + } + + private void handleExtractingInternal(File file) throws Exception { + long length = file.length(); + long modified = file.lastModified(); + POITextExtractor extractor = ExtractorFactory.createExtractor(file); try { assertNotNull(extractor); @@ -60,6 +84,11 @@ public abstract class AbstractFileHandler implements FileHandler { assertFalse("Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!", EXPECTED_EXTRACTOR_FAILURES.contains(file)); + + assertEquals("File should not be modified by extractor", length, file.length()); + assertEquals("File should not be modified by extractor", modified, file.lastModified()); + + handleExtractingAsStream(file); } catch (IllegalArgumentException e) { if(!EXPECTED_EXTRACTOR_FAILURES.contains(file)) { throw new Exception("While handling " + file, e); @@ -68,4 +97,22 @@ public abstract class AbstractFileHandler implements FileHandler { extractor.close(); } } + + private void handleExtractingAsStream(File file) throws FileNotFoundException, + IOException, InvalidFormatException, OpenXML4JException, + XmlException { + InputStream stream = new FileInputStream(file); + try { + POITextExtractor streamExtractor = ExtractorFactory.createExtractor(stream); + try { + assertNotNull(streamExtractor); + + assertNotNull(streamExtractor.getText()); + } finally { + streamExtractor.close(); + } + } finally { + stream.close(); + } + } } diff --git a/src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java index 477ee859cb..6a53b2e009 100644 --- a/src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java @@ -18,6 +18,7 @@ package org.apache.poi.stress; import static org.junit.Assert.assertNotNull; +import java.io.File; import java.io.FileInputStream; import java.io.InputStream; @@ -43,4 +44,10 @@ public class HPSFFileHandler extends AbstractFileHandler { stream.close(); } } + + // a test-case to test this locally without executing the full TestAllFiles + @Test + public void testExtractor() throws Exception { + handleExtracting(new File("test-data/hpsf/TestBug44375.xls")); + } } diff --git a/src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java b/src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java index 54a386ea00..a268ed4658 100644 --- a/src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java @@ -17,6 +17,7 @@ package org.apache.poi.stress; import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.FileInputStream; import java.io.InputStream; @@ -71,4 +72,10 @@ public class XSSFFileHandler extends SpreadsheetHandler { stream.close(); } } + + // a test-case to test this locally without executing the full TestAllFiles + @Test + public void testExtractor() throws Exception { + handleExtracting(new File("test-data/spreadsheet/56278.xlsx")); + } } \ No newline at end of file diff --git a/src/java/org/apache/poi/hpsf/extractor/HPSFPropertiesExtractor.java b/src/java/org/apache/poi/hpsf/extractor/HPSFPropertiesExtractor.java index ce5301ac60..1a0db03897 100644 --- a/src/java/org/apache/poi/hpsf/extractor/HPSFPropertiesExtractor.java +++ b/src/java/org/apache/poi/hpsf/extractor/HPSFPropertiesExtractor.java @@ -57,6 +57,10 @@ public class HPSFPropertiesExtractor extends POITextExtractor { } public String getDocumentSummaryInformationText() { + if(document == null) { // event based extractor does not have a document + return ""; + } + DocumentSummaryInformation dsi = document.getDocumentSummaryInformation(); StringBuffer text = new StringBuffer(); @@ -78,6 +82,10 @@ public class HPSFPropertiesExtractor extends POITextExtractor { return text.toString(); } public String getSummaryInformationText() { + if(document == null) { // event based extractor does not have a document + return ""; + } + SummaryInformation si = document.getSummaryInformation(); // Just normal properties diff --git a/src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java b/src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java index 45ab8d8131..4d0b894e01 100644 --- a/src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java +++ b/src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java @@ -19,6 +19,7 @@ package org.apache.poi.hssf.eventusermodel; import java.io.InputStream; import java.io.IOException; +import java.util.Set; import org.apache.poi.hssf.eventusermodel.HSSFUserException; import org.apache.poi.hssf.record.*; @@ -56,11 +57,24 @@ public class HSSFEventFactory { * @param req an Instance of HSSFRequest which has your registered listeners * @param dir a DirectoryNode containing your workbook */ - public void processWorkbookEvents(HSSFRequest req, DirectoryNode dir) throws IOException { - InputStream in = dir.createDocumentInputStream("Workbook"); - - processEvents(req, in); - } + public void processWorkbookEvents(HSSFRequest req, DirectoryNode dir) throws IOException { + // some old documents have "WORKBOOK" or "BOOK" + final String name; + Set entryNames = dir.getEntryNames(); + if (entryNames.contains("Workbook")) { + name = "Workbook"; + } else if (entryNames.contains("WORKBOOK")) { + name = "WORKBOOK"; + } else if (entryNames.contains("BOOK")) { + name = "BOOK"; + } else { + name = "Workbook"; + } + + InputStream in = dir.createDocumentInputStream(name); + + processEvents(req, in); + } /** * Processes a file into essentially record events. diff --git a/src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java b/src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java index ce576439f2..8a35a34e4c 100644 --- a/src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java +++ b/src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java @@ -67,9 +67,14 @@ public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor { * Returns the core document properties, eg author */ public String getCorePropertiesText() { + POIXMLDocument document = getDocument(); + if(document == null) { // event based extractor does not have a document + return ""; + } + StringBuffer text = new StringBuffer(); - PackagePropertiesPart props = - getDocument().getProperties().getCoreProperties().getUnderlyingProperties(); + PackagePropertiesPart props = + document.getProperties().getCoreProperties().getUnderlyingProperties(); appendIfPresent(text, "Category", props.getCategoryProperty().getValue()); appendIfPresent(text, "Category", props.getCategoryProperty().getValue()); @@ -99,9 +104,14 @@ public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor { * application */ public String getExtendedPropertiesText() { + POIXMLDocument document = getDocument(); + if(document == null) { // event based extractor does not have a document + return ""; + } + StringBuffer text = new StringBuffer(); org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties - props = getDocument().getProperties().getExtendedProperties().getUnderlyingProperties(); + props = document.getProperties().getExtendedProperties().getUnderlyingProperties(); appendIfPresent(text, "Application", props.getApplication()); appendIfPresent(text, "AppVersion", props.getAppVersion()); @@ -127,9 +137,14 @@ public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor { */ @SuppressWarnings("deprecation") public String getCustomPropertiesText() { + POIXMLDocument document = getDocument(); + if(document == null) { // event based extractor does not have a document + return ""; + } + StringBuilder text = new StringBuilder(); org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties - props = getDocument().getProperties().getCustomProperties().getUnderlyingProperties(); + props = document.getProperties().getCustomProperties().getUnderlyingProperties(); for(CTProperty property : props.getPropertyArray()) { String val = "(not implemented!)"; diff --git a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java index a0b6b5db17..60a0f51810 100644 --- a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java +++ b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java @@ -265,10 +265,10 @@ public class ExtractorFactory { /** * Returns an array of text extractors, one for each of - * the embeded documents in the file (if there are any). - * If there are no embeded documents, you'll get back an + * the embedded documents in the file (if there are any). + * If there are no embedded documents, you'll get back an * empty array. Otherwise, you'll get one open - * {@link POITextExtractor} for each embeded file. + * {@link POITextExtractor} for each embedded file. */ public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, InvalidFormatException, OpenXML4JException, XmlException { // All the embded directories we spotted diff --git a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java index 7677426876..227441859e 100644 --- a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java +++ b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java @@ -96,6 +96,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler { private String formatString; private final DataFormatter formatter; private int rowNum; + private int nextRowNum; // some sheets do not have rowNums, Excel can read them so we should try to handle them correctly as well private String cellRef; private boolean formulasNotResults; @@ -240,7 +241,12 @@ public class XSSFSheetXMLHandler extends DefaultHandler { headerFooter.setLength(0); } else if("row".equals(name)) { - rowNum = Integer.parseInt(attributes.getValue("r")) - 1; + String rowNumStr = attributes.getValue("r"); + if(rowNumStr != null) { + rowNum = Integer.parseInt(rowNumStr) - 1; + } else { + rowNum = nextRowNum; + } output.startRow(rowNum); } // c => cell @@ -343,7 +349,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler { case NUMBER: String n = value.toString(); - if (this.formatString != null) + if (this.formatString != null && n.length() > 0) thisStr = formatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex, this.formatString); else thisStr = n; @@ -370,6 +376,9 @@ public class XSSFSheetXMLHandler extends DefaultHandler { // Finish up the row output.endRow(rowNum); + + // some sheets do not have rowNum set in the XML, Excel can read them so we should try to read them as well + nextRowNum = rowNum + 1; } else if ("sheetData".equals(name)) { // Handle any "missing" cells which had comments attached checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_SHEET_DATA); diff --git a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java index e48787be0c..b56b3791f0 100644 --- a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java @@ -23,6 +23,7 @@ import java.util.regex.Pattern; import junit.framework.TestCase; import org.apache.poi.POITextExtractor; +import org.apache.poi.POIXMLTextExtractor; import org.apache.poi.hssf.HSSFTestDataSamples; import org.apache.poi.hssf.extractor.ExcelExtractor; import org.apache.poi.xssf.XSSFTestDataSamples; @@ -155,7 +156,6 @@ public class TestXSSFEventBasedExcelExtractor extends TestCase { POITextExtractor[] extractors = new POITextExtractor[] { ooxmlExtractor, ole2Extractor }; for (int i = 0; i < extractors.length; i++) { - @SuppressWarnings("resource") POITextExtractor extractor = extractors[i]; String text = extractor.getText().replaceAll("[\r\t]", ""); @@ -316,4 +316,25 @@ public class TestXSSFEventBasedExcelExtractor extends TestCase { fixture.close(); } } + + public void testFile56278_normal() throws Exception { + // first with normal Text Extractor + POIXMLTextExtractor extractor = new XSSFExcelExtractor( + XSSFTestDataSamples.openSampleWorkbook("56278.xlsx")); + try { + assertNotNull(extractor.getText()); + } finally { + extractor.close(); + } + } + + public void testFile56278_event() throws Exception { + // then with event based one + POIXMLTextExtractor extractor = getExtractor("56278.xlsx"); + try { + assertNotNull(extractor.getText()); + } finally { + extractor.close(); + } + } } diff --git a/src/testcases/org/apache/poi/hpsf/extractor/TestHPSFPropertiesExtractor.java b/src/testcases/org/apache/poi/hpsf/extractor/TestHPSFPropertiesExtractor.java index 22238d75f1..c6ad03db2d 100644 --- a/src/testcases/org/apache/poi/hpsf/extractor/TestHPSFPropertiesExtractor.java +++ b/src/testcases/org/apache/poi/hpsf/extractor/TestHPSFPropertiesExtractor.java @@ -22,10 +22,12 @@ import java.io.IOException; import junit.framework.TestCase; import org.apache.poi.POIDataSamples; +import org.apache.poi.POITextExtractor; import org.apache.poi.hpsf.Thumbnail; import org.apache.poi.hssf.HSSFTestDataSamples; import org.apache.poi.hssf.extractor.ExcelExtractor; import org.apache.poi.hssf.usermodel.HSSFWorkbook; +import org.apache.poi.hwpf.extractor.Word6Extractor; import org.apache.poi.poifs.filesystem.POIFSFileSystem; public final class TestHPSFPropertiesExtractor extends TestCase { @@ -34,45 +36,53 @@ public final class TestHPSFPropertiesExtractor extends TestCase { public void testNormalProperties() throws Exception { POIFSFileSystem fs = new POIFSFileSystem(_samples.openResourceAsStream("TestMickey.doc")); HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs); - ext.getText(); - - // Check each bit in turn - String sinfText = ext.getSummaryInformationText(); - String dinfText = ext.getDocumentSummaryInformationText(); - - assertTrue(sinfText.indexOf("TEMPLATE = Normal") > -1); - assertTrue(sinfText.indexOf("SUBJECT = sample subject") > -1); - assertTrue(dinfText.indexOf("MANAGER = sample manager") > -1); - assertTrue(dinfText.indexOf("COMPANY = sample company") > -1); - - // Now overall - String text = ext.getText(); - assertTrue(text.indexOf("TEMPLATE = Normal") > -1); - assertTrue(text.indexOf("SUBJECT = sample subject") > -1); - assertTrue(text.indexOf("MANAGER = sample manager") > -1); - assertTrue(text.indexOf("COMPANY = sample company") > -1); + try { + ext.getText(); + + // Check each bit in turn + String sinfText = ext.getSummaryInformationText(); + String dinfText = ext.getDocumentSummaryInformationText(); + + assertTrue(sinfText.indexOf("TEMPLATE = Normal") > -1); + assertTrue(sinfText.indexOf("SUBJECT = sample subject") > -1); + assertTrue(dinfText.indexOf("MANAGER = sample manager") > -1); + assertTrue(dinfText.indexOf("COMPANY = sample company") > -1); + + // Now overall + String text = ext.getText(); + assertTrue(text.indexOf("TEMPLATE = Normal") > -1); + assertTrue(text.indexOf("SUBJECT = sample subject") > -1); + assertTrue(text.indexOf("MANAGER = sample manager") > -1); + assertTrue(text.indexOf("COMPANY = sample company") > -1); + } finally { + ext.close(); + } } public void testNormalUnicodeProperties() throws Exception { POIFSFileSystem fs = new POIFSFileSystem(_samples.openResourceAsStream("TestUnicode.xls")); HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs); - ext.getText(); - - // Check each bit in turn - String sinfText = ext.getSummaryInformationText(); - String dinfText = ext.getDocumentSummaryInformationText(); - - assertTrue(sinfText.indexOf("AUTHOR = marshall") > -1); - assertTrue(sinfText.indexOf("TITLE = Titel: \u00c4h") > -1); - assertTrue(dinfText.indexOf("COMPANY = Schreiner") > -1); - assertTrue(dinfText.indexOf("SCALE = false") > -1); - - // Now overall - String text = ext.getText(); - assertTrue(text.indexOf("AUTHOR = marshall") > -1); - assertTrue(text.indexOf("TITLE = Titel: \u00c4h") > -1); - assertTrue(text.indexOf("COMPANY = Schreiner") > -1); - assertTrue(text.indexOf("SCALE = false") > -1); + try { + ext.getText(); + + // Check each bit in turn + String sinfText = ext.getSummaryInformationText(); + String dinfText = ext.getDocumentSummaryInformationText(); + + assertTrue(sinfText.indexOf("AUTHOR = marshall") > -1); + assertTrue(sinfText.indexOf("TITLE = Titel: \u00c4h") > -1); + assertTrue(dinfText.indexOf("COMPANY = Schreiner") > -1); + assertTrue(dinfText.indexOf("SCALE = false") > -1); + + // Now overall + String text = ext.getText(); + assertTrue(text.indexOf("AUTHOR = marshall") > -1); + assertTrue(text.indexOf("TITLE = Titel: \u00c4h") > -1); + assertTrue(text.indexOf("COMPANY = Schreiner") > -1); + assertTrue(text.indexOf("SCALE = false") > -1); + } finally { + ext.close(); + } } public void testCustomProperties() throws Exception { @@ -80,18 +90,21 @@ public final class TestHPSFPropertiesExtractor extends TestCase { _samples.openResourceAsStream("TestMickey.doc") ); HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs); - - // Custom properties are part of the document info stream - String dinfText = ext.getDocumentSummaryInformationText(); - assertTrue(dinfText.indexOf("Client = sample client") > -1); - assertTrue(dinfText.indexOf("Division = sample division") > -1); - - String text = ext.getText(); - assertTrue(text.indexOf("Client = sample client") > -1); - assertTrue(text.indexOf("Division = sample division") > -1); + try { + // Custom properties are part of the document info stream + String dinfText = ext.getDocumentSummaryInformationText(); + assertTrue(dinfText.indexOf("Client = sample client") > -1); + assertTrue(dinfText.indexOf("Division = sample division") > -1); + + String text = ext.getText(); + assertTrue(text.indexOf("Client = sample client") > -1); + assertTrue(text.indexOf("Division = sample division") > -1); + } finally { + ext.close(); + } } - public void testConstructors() { + public void testConstructors() throws IOException { POIFSFileSystem fs; HSSFWorkbook wb; try { @@ -102,9 +115,29 @@ public final class TestHPSFPropertiesExtractor extends TestCase { } ExcelExtractor excelExt = new ExcelExtractor(wb); - String fsText = (new HPSFPropertiesExtractor(fs)).getText(); - String hwText = (new HPSFPropertiesExtractor(wb)).getText(); - String eeText = (new HPSFPropertiesExtractor(excelExt)).getText(); + final String fsText; + HPSFPropertiesExtractor fsExt = new HPSFPropertiesExtractor(fs); + try { + fsText = fsExt.getText(); + } finally { + fsExt.close(); + } + + final String hwText; + HPSFPropertiesExtractor hwExt = new HPSFPropertiesExtractor(wb); + try { + hwText = hwExt.getText(); + } finally { + hwExt.close(); + } + + final String eeText; + HPSFPropertiesExtractor eeExt = new HPSFPropertiesExtractor(excelExt); + try { + eeText = eeExt.getText(); + } finally { + eeExt.close(); + } assertEquals(fsText, hwText); assertEquals(fsText, eeText); @@ -113,13 +146,17 @@ public final class TestHPSFPropertiesExtractor extends TestCase { assertTrue(fsText.indexOf("TITLE = Titel: \u00c4h") > -1); } - public void test42726() { - HPSFPropertiesExtractor ex = new HPSFPropertiesExtractor(HSSFTestDataSamples.openSampleWorkbook("42726.xls")); - String txt = ex.getText(); - assertTrue(txt.indexOf("PID_AUTHOR") != -1); - assertTrue(txt.indexOf("PID_EDITTIME") != -1); - assertTrue(txt.indexOf("PID_REVNUMBER") != -1); - assertTrue(txt.indexOf("PID_THUMBNAIL") != -1); + public void test42726() throws IOException { + HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(HSSFTestDataSamples.openSampleWorkbook("42726.xls")); + try { + String txt = ext.getText(); + assertTrue(txt.indexOf("PID_AUTHOR") != -1); + assertTrue(txt.indexOf("PID_EDITTIME") != -1); + assertTrue(txt.indexOf("PID_REVNUMBER") != -1); + assertTrue(txt.indexOf("PID_THUMBNAIL") != -1); + } finally { + ext.close(); + } } public void testThumbnail() throws Exception { @@ -131,4 +168,24 @@ public final class TestHPSFPropertiesExtractor extends TestCase { assertNotNull(thumbnail.getThumbnailAsWMF()); wb.close(); } + + public void testExtractorFromWord6Extractor() throws Exception { + POIFSFileSystem fs = new POIFSFileSystem(_samples.openResourceAsStream("TestMickey.doc")); + Word6Extractor wExt = new Word6Extractor(fs); + try { + POITextExtractor ext = wExt.getMetadataTextExtractor(); + try { + // Now overall + String text = ext.getText(); + assertTrue(text.indexOf("TEMPLATE = Normal") > -1); + assertTrue(text.indexOf("SUBJECT = sample subject") > -1); + assertTrue(text.indexOf("MANAGER = sample manager") > -1); + assertTrue(text.indexOf("COMPANY = sample company") > -1); + } finally { + ext.close(); + } + } finally { + wExt.close(); + } + } } diff --git a/src/testcases/org/apache/poi/hssf/eventusermodel/TestHSSFEventFactory.java b/src/testcases/org/apache/poi/hssf/eventusermodel/TestHSSFEventFactory.java index 9766331085..ff76cfa19c 100644 --- a/src/testcases/org/apache/poi/hssf/eventusermodel/TestHSSFEventFactory.java +++ b/src/testcases/org/apache/poi/hssf/eventusermodel/TestHSSFEventFactory.java @@ -107,8 +107,6 @@ public final class TestHSSFEventFactory extends TestCase { POIFSFileSystem fs = new POIFSFileSystem(openSample("42844.xls")); HSSFEventFactory factory = new HSSFEventFactory(); factory.processWorkbookEvents(req, fs); - - assertTrue("no errors while processing the file", true); } private static class MockHSSFListener implements HSSFListener { @@ -125,4 +123,18 @@ public final class TestHSSFEventFactory extends TestCase { records.add(record); } } + + public void testWithDifferentWorkbookName() throws Exception { + HSSFRequest req = new HSSFRequest(); + MockHSSFListener mockListen = new MockHSSFListener(); + req.addListenerForAllRecords(mockListen); + + POIFSFileSystem fs = new POIFSFileSystem(openSample("BOOK_in_capitals.xls")); + HSSFEventFactory factory = new HSSFEventFactory(); + factory.processWorkbookEvents(req, fs); + + fs = new POIFSFileSystem(openSample("WORKBOOK_in_capitals.xls")); + factory = new HSSFEventFactory(); + factory.processWorkbookEvents(req, fs); + } } diff --git a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java index de82210291..f7584ff11b 100644 --- a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java +++ b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java @@ -46,15 +46,18 @@ public final class TestExcelExtractor extends TestCase { } - public void testSimple() { - + public void testSimple() throws IOException { ExcelExtractor extractor = createExtractor("Simple.xls"); - assertEquals("Sheet1\nreplaceMe\nSheet2\nSheet3\n", extractor.getText()); - - // Now turn off sheet names - extractor.setIncludeSheetNames(false); - assertEquals("replaceMe\n", extractor.getText()); + try { + assertEquals("Sheet1\nreplaceMe\nSheet2\nSheet3\n", extractor.getText()); + + // Now turn off sheet names + extractor.setIncludeSheetNames(false); + assertEquals("replaceMe\n", extractor.getText()); + } finally { + extractor.close(); + } } public void testNumericFormula() { @@ -126,45 +129,47 @@ public final class TestExcelExtractor extends TestCase { public void testEventExtractor() throws Exception { - EventBasedExcelExtractor extractor; - // First up, a simple file with string // based formulas in it - extractor = new EventBasedExcelExtractor( + EventBasedExcelExtractor extractor = new EventBasedExcelExtractor( new POIFSFileSystem( HSSFTestDataSamples.openSampleFileStream("SimpleWithFormula.xls") ) ); - extractor.setIncludeSheetNames(true); - - String text = extractor.getText(); - assertEquals("Sheet1\nreplaceme\nreplaceme\nreplacemereplaceme\nSheet2\nSheet3\n", text); - - extractor.setIncludeSheetNames(false); - extractor.setFormulasNotResults(true); - - text = extractor.getText(); - assertEquals("replaceme\nreplaceme\nCONCATENATE(A1,A2)\n", text); - - - // Now, a slightly longer file with numeric formulas - extractor = new EventBasedExcelExtractor( - new POIFSFileSystem( - HSSFTestDataSamples.openSampleFileStream("sumifformula.xls") - ) - ); - extractor.setIncludeSheetNames(false); - extractor.setFormulasNotResults(true); - - text = extractor.getText(); - assertEquals( - "1000\t1\tSUMIF(A1:A5,\">4000\",B1:B5)\n" + - "2000\t2\n" + - "3000\t3\n" + - "4000\t4\n" + - "5000\t5\n", - text - ); + try { + extractor.setIncludeSheetNames(true); + + String text = extractor.getText(); + assertEquals("Sheet1\nreplaceme\nreplaceme\nreplacemereplaceme\nSheet2\nSheet3\n", text); + + extractor.setIncludeSheetNames(false); + extractor.setFormulasNotResults(true); + + text = extractor.getText(); + assertEquals("replaceme\nreplaceme\nCONCATENATE(A1,A2)\n", text); + + + // Now, a slightly longer file with numeric formulas + extractor = new EventBasedExcelExtractor( + new POIFSFileSystem( + HSSFTestDataSamples.openSampleFileStream("sumifformula.xls") + ) + ); + extractor.setIncludeSheetNames(false); + extractor.setFormulasNotResults(true); + + text = extractor.getText(); + assertEquals( + "1000\t1\tSUMIF(A1:A5,\">4000\",B1:B5)\n" + + "2000\t2\n" + + "3000\t3\n" + + "4000\t4\n" + + "5000\t5\n", + text + ); + } finally { + extractor.close(); + } } public void testWithComments() { @@ -272,15 +277,22 @@ public final class TestExcelExtractor extends TestCase { HSSFWorkbook wbB = new HSSFWorkbook(dirB, fs, true); ExcelExtractor exA = new ExcelExtractor(wbA); - ExcelExtractor exB = new ExcelExtractor(wbB); - - assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n", - exA.getText()); - assertEquals("Sample Excel", exA.getSummaryInformation().getTitle()); - - assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n", - exB.getText()); - assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle()); + try { + ExcelExtractor exB = new ExcelExtractor(wbB); + try { + assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n", + exA.getText()); + assertEquals("Sample Excel", exA.getSummaryInformation().getTitle()); + + assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n", + exB.getText()); + assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle()); + } finally { + exB.close(); + } + } finally { + exA.close(); + } } /** @@ -299,21 +311,32 @@ public final class TestExcelExtractor extends TestCase { HSSFWorkbook wbB = new HSSFWorkbook(dirB, fs, true); ExcelExtractor exA = new ExcelExtractor(wbA); - ExcelExtractor exB = new ExcelExtractor(wbB); - - assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n", - exA.getText()); - assertEquals("Sample Excel", exA.getSummaryInformation().getTitle()); - - assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n", - exB.getText()); - assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle()); - - // And the base file too - ExcelExtractor ex = new ExcelExtractor(fs); - assertEquals("Sheet1\nI have lots of embeded files in me\nSheet2\nSheet3\n", - ex.getText()); - assertEquals("Excel With Embeded", ex.getSummaryInformation().getTitle()); + try { + ExcelExtractor exB = new ExcelExtractor(wbB); + try { + assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n", + exA.getText()); + assertEquals("Sample Excel", exA.getSummaryInformation().getTitle()); + + assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n", + exB.getText()); + assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle()); + + // And the base file too + ExcelExtractor ex = new ExcelExtractor(fs); + try { + assertEquals("Sheet1\nI have lots of embeded files in me\nSheet2\nSheet3\n", + ex.getText()); + assertEquals("Excel With Embeded", ex.getSummaryInformation().getTitle()); + } finally { + ex.close(); + } + } finally { + exB.close(); + } + } finally { + exA.close(); + } } /** -- cgit v1.2.3 From 6eeb0a7c19287ad149d13e3b6741a5233273aac5 Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Sun, 1 Mar 2015 17:50:16 +0000 Subject: Add missing close and handle theme-pptx in ExtractorFactory. Add creating slide-bitmaps to PPTX integration test. git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1663137 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/stress/XSLFFileHandler.java | 66 ++++++++++++++++++++-- .../org/apache/poi/extractor/ExtractorFactory.java | 9 +++ 2 files changed, 71 insertions(+), 4 deletions(-) (limited to 'src/integrationtest') diff --git a/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java b/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java index e6cbb184b2..b734c4e4bc 100644 --- a/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java @@ -18,35 +18,93 @@ package org.apache.poi.stress; import static org.junit.Assert.assertNotNull; +import java.awt.Dimension; +import java.awt.Graphics2D; +import java.awt.image.BufferedImage; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.FileInputStream; +import java.io.IOException; import java.io.InputStream; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.xslf.XSLFSlideShow; +import org.apache.poi.xslf.usermodel.XMLSlideShow; +import org.apache.poi.xslf.usermodel.XSLFNotes; +import org.apache.poi.xslf.usermodel.XSLFShape; +import org.apache.poi.xslf.usermodel.XSLFSlide; +import org.apache.poi.xslf.usermodel.XSLFTextParagraph; +import org.apache.poi.xslf.usermodel.XSLFTextShape; import org.junit.Test; public class XSLFFileHandler extends AbstractFileHandler { @Override public void handleFile(InputStream stream) throws Exception { - // ignore password protected files - if (POIXMLDocumentHandler.isEncrypted(stream)) return; - XSLFSlideShow slide = new XSLFSlideShow(OPCPackage.open(stream)); assertNotNull(slide.getPresentation()); assertNotNull(slide.getSlideMasterReferences()); assertNotNull(slide.getSlideReferences()); new POIXMLDocumentHandler().handlePOIXMLDocument(slide); + + ByteArrayOutputStream out = new ByteArrayOutputStream(); + try { + slide.write(out); + } finally { + out.close(); + } + + createBitmaps(out); } + private void createBitmaps(ByteArrayOutputStream out) throws IOException { + XMLSlideShow ppt = new XMLSlideShow(new ByteArrayInputStream(out.toByteArray())); + Dimension pgsize = ppt.getPageSize(); + XSLFSlide[] xmlSlide = ppt.getSlides(); + int slideSize = xmlSlide.length; + for (int i = 0; i < slideSize; i++) { +// System.out.println("slide-" + (i + 1)); +// System.out.println("" + xmlSlide[i].getTitle()); + + BufferedImage img = new BufferedImage(pgsize.width, pgsize.height, BufferedImage.TYPE_INT_RGB); + Graphics2D graphics = img.createGraphics(); + + // draw stuff + xmlSlide[i].draw(graphics); + + // Also try to read notes + XSLFNotes notes = xmlSlide[i].getNotes(); + if(notes != null) { + for (XSLFShape note : notes) { + note.draw(graphics); + + if (note instanceof XSLFTextShape) { + XSLFTextShape txShape = (XSLFTextShape) note; + for (XSLFTextParagraph xslfParagraph : txShape.getTextParagraphs()) { + xslfParagraph.getText(); + } + } + } + } + } + } + // a test-case to test this locally without executing the full TestAllFiles @Test public void test() throws Exception { - InputStream stream = new FileInputStream("test-data/slideshow/testPPT.pptx"); + InputStream stream = new FileInputStream("test-data/slideshow/pptx2svg.pptx"); try { handleFile(stream); } finally { stream.close(); } } + + + // a test-case to test this locally without executing the full TestAllFiles + @Test + public void testExtractor() throws Exception { + handleExtracting(new File("test-data/slideshow/testPPT.thmx")); + } } \ No newline at end of file diff --git a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java index 60a0f51810..46cd2cd386 100644 --- a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java +++ b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java @@ -51,6 +51,7 @@ import org.apache.poi.poifs.filesystem.DirectoryEntry; import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.Entry; import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.apache.poi.xslf.XSLFSlideShow; import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor; import org.apache.poi.xslf.usermodel.XSLFRelation; import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor; @@ -190,6 +191,14 @@ public class ExtractorFactory { } } + // special handling for SlideShow-Theme-files, + if(XSLFRelation.THEME_MANAGER.getContentType().equals(corePart.getContentType())) { + return new XSLFPowerPointExtractor(new XSLFSlideShow(pkg)); + } + + // ensure that we close the package again if there is an error opening it, however + // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor! + pkg.revert(); throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+corePart.getContentType()+")"); } -- cgit v1.2.3 From 8f8886986a0775e7c4f0616f1f40530039cca1df Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Wed, 11 Mar 2015 19:07:13 +0000 Subject: Fix inconsistent indents git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1665970 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/TestAllFiles.java | 344 ++++++++++----------- 1 file changed, 172 insertions(+), 172 deletions(-) (limited to 'src/integrationtest') diff --git a/src/integrationtest/org/apache/poi/TestAllFiles.java b/src/integrationtest/org/apache/poi/TestAllFiles.java index d0439b40fd..f9782400d6 100644 --- a/src/integrationtest/org/apache/poi/TestAllFiles.java +++ b/src/integrationtest/org/apache/poi/TestAllFiles.java @@ -65,83 +65,83 @@ import org.junit.runners.Parameterized.Parameters; */ @RunWith(Parameterized.class) public class TestAllFiles { - private static final File ROOT_DIR = new File("test-data"); + private static final File ROOT_DIR = new File("test-data"); // map file extensions to the actual mappers - private static final Map HANDLERS = new HashMap(); - static { - // Excel - HANDLERS.put(".xls", new HSSFFileHandler()); - HANDLERS.put(".xlsx", new XSSFFileHandler()); - HANDLERS.put(".xlsm", new XSSFFileHandler()); - HANDLERS.put(".xltx", new XSSFFileHandler()); - HANDLERS.put(".xlsb", new XSSFFileHandler()); - - // Word - HANDLERS.put(".doc", new HWPFFileHandler()); - HANDLERS.put(".docx", new XWPFFileHandler()); - HANDLERS.put(".dotx", new XWPFFileHandler()); - HANDLERS.put(".docm", new XWPFFileHandler()); - HANDLERS.put(".ooxml", new XWPFFileHandler()); // OPCPackage - - // Powerpoint - HANDLERS.put(".ppt", new HSLFFileHandler()); - HANDLERS.put(".pptx", new XSLFFileHandler()); - HANDLERS.put(".pptm", new XSLFFileHandler()); - HANDLERS.put(".ppsm", new XSLFFileHandler()); - HANDLERS.put(".ppsx", new XSLFFileHandler()); - HANDLERS.put(".thmx", new XSLFFileHandler()); - - // Outlook - HANDLERS.put(".msg", new HSMFFileHandler()); - - // Publisher - HANDLERS.put(".pub", new HPBFFileHandler()); - - // Visio - HANDLERS.put(".vsd", new HDGFFileHandler()); - - // POIFS - HANDLERS.put(".ole2", new POIFSFileHandler()); - - // Microsoft Admin Template? - HANDLERS.put(".adm", new HPSFFileHandler()); - - // Microsoft TNEF - HANDLERS.put(".dat", new HMEFFileHandler()); - - // TODO: are these readable by some of the formats? - HANDLERS.put(".shw", new NullFileHandler()); - HANDLERS.put(".zvi", new NullFileHandler()); - HANDLERS.put(".mpp", new NullFileHandler()); - HANDLERS.put(".qwp", new NullFileHandler()); - HANDLERS.put(".wps", new NullFileHandler()); - HANDLERS.put(".bin", new NullFileHandler()); - HANDLERS.put(".xps", new NullFileHandler()); - HANDLERS.put(".sldprt", new NullFileHandler()); - HANDLERS.put(".mdb", new NullFileHandler()); - HANDLERS.put(".vml", new NullFileHandler()); - - // ignore some file types, images, other formats, ... - HANDLERS.put(".txt", new NullFileHandler()); - HANDLERS.put(".pdf", new NullFileHandler()); - HANDLERS.put(".rtf", new NullFileHandler()); - HANDLERS.put(".gif", new NullFileHandler()); - HANDLERS.put(".html", new NullFileHandler()); - HANDLERS.put(".png", new NullFileHandler()); - HANDLERS.put(".wmf", new NullFileHandler()); - HANDLERS.put(".emf", new NullFileHandler()); - HANDLERS.put(".dib", new NullFileHandler()); - HANDLERS.put(".svg", new NullFileHandler()); - HANDLERS.put(".pict", new NullFileHandler()); - HANDLERS.put(".jpg", new NullFileHandler()); - HANDLERS.put(".wav", new NullFileHandler()); - HANDLERS.put(".pfx", new NullFileHandler()); - HANDLERS.put(".xml", new NullFileHandler()); - HANDLERS.put(".csv", new NullFileHandler()); - - // map some files without extension - HANDLERS.put("spreadsheet/BigSSTRecord", new NullFileHandler()); + private static final Map HANDLERS = new HashMap(); + static { + // Excel + HANDLERS.put(".xls", new HSSFFileHandler()); + HANDLERS.put(".xlsx", new XSSFFileHandler()); + HANDLERS.put(".xlsm", new XSSFFileHandler()); + HANDLERS.put(".xltx", new XSSFFileHandler()); + HANDLERS.put(".xlsb", new XSSFFileHandler()); + + // Word + HANDLERS.put(".doc", new HWPFFileHandler()); + HANDLERS.put(".docx", new XWPFFileHandler()); + HANDLERS.put(".dotx", new XWPFFileHandler()); + HANDLERS.put(".docm", new XWPFFileHandler()); + HANDLERS.put(".ooxml", new XWPFFileHandler()); // OPCPackage + + // Powerpoint + HANDLERS.put(".ppt", new HSLFFileHandler()); + HANDLERS.put(".pptx", new XSLFFileHandler()); + HANDLERS.put(".pptm", new XSLFFileHandler()); + HANDLERS.put(".ppsm", new XSLFFileHandler()); + HANDLERS.put(".ppsx", new XSLFFileHandler()); + HANDLERS.put(".thmx", new XSLFFileHandler()); + + // Outlook + HANDLERS.put(".msg", new HSMFFileHandler()); + + // Publisher + HANDLERS.put(".pub", new HPBFFileHandler()); + + // Visio + HANDLERS.put(".vsd", new HDGFFileHandler()); + + // POIFS + HANDLERS.put(".ole2", new POIFSFileHandler()); + + // Microsoft Admin Template? + HANDLERS.put(".adm", new HPSFFileHandler()); + + // Microsoft TNEF + HANDLERS.put(".dat", new HMEFFileHandler()); + + // TODO: are these readable by some of the formats? + HANDLERS.put(".shw", new NullFileHandler()); + HANDLERS.put(".zvi", new NullFileHandler()); + HANDLERS.put(".mpp", new NullFileHandler()); + HANDLERS.put(".qwp", new NullFileHandler()); + HANDLERS.put(".wps", new NullFileHandler()); + HANDLERS.put(".bin", new NullFileHandler()); + HANDLERS.put(".xps", new NullFileHandler()); + HANDLERS.put(".sldprt", new NullFileHandler()); + HANDLERS.put(".mdb", new NullFileHandler()); + HANDLERS.put(".vml", new NullFileHandler()); + + // ignore some file types, images, other formats, ... + HANDLERS.put(".txt", new NullFileHandler()); + HANDLERS.put(".pdf", new NullFileHandler()); + HANDLERS.put(".rtf", new NullFileHandler()); + HANDLERS.put(".gif", new NullFileHandler()); + HANDLERS.put(".html", new NullFileHandler()); + HANDLERS.put(".png", new NullFileHandler()); + HANDLERS.put(".wmf", new NullFileHandler()); + HANDLERS.put(".emf", new NullFileHandler()); + HANDLERS.put(".dib", new NullFileHandler()); + HANDLERS.put(".svg", new NullFileHandler()); + HANDLERS.put(".pict", new NullFileHandler()); + HANDLERS.put(".jpg", new NullFileHandler()); + HANDLERS.put(".wav", new NullFileHandler()); + HANDLERS.put(".pfx", new NullFileHandler()); + HANDLERS.put(".xml", new NullFileHandler()); + HANDLERS.put(".csv", new NullFileHandler()); + + // map some files without extension + HANDLERS.put("spreadsheet/BigSSTRecord", new NullFileHandler()); HANDLERS.put("spreadsheet/BigSSTRecord2", new NullFileHandler()); HANDLERS.put("spreadsheet/BigSSTRecord2CR1", new NullFileHandler()); HANDLERS.put("spreadsheet/BigSSTRecord2CR2", new NullFileHandler()); @@ -151,68 +151,68 @@ public class TestAllFiles { HANDLERS.put("spreadsheet/BigSSTRecord2CR6", new NullFileHandler()); HANDLERS.put("spreadsheet/BigSSTRecord2CR7", new NullFileHandler()); HANDLERS.put("spreadsheet/BigSSTRecordCR", new NullFileHandler()); - HANDLERS.put("spreadsheet/test_properties1", new NullFileHandler()); - } - - private static final Set EXPECTED_FAILURES = new HashSet(); - static { - // password protected files - EXPECTED_FAILURES.add("spreadsheet/password.xls"); - EXPECTED_FAILURES.add("spreadsheet/51832.xls"); - EXPECTED_FAILURES.add("document/PasswordProtected.doc"); - EXPECTED_FAILURES.add("slideshow/Password_Protected-hello.ppt"); - EXPECTED_FAILURES.add("slideshow/Password_Protected-56-hello.ppt"); - EXPECTED_FAILURES.add("slideshow/Password_Protected-np-hello.ppt"); - EXPECTED_FAILURES.add("slideshow/cryptoapi-proc2356.ppt"); - //EXPECTED_FAILURES.add("document/bug53475-password-is-pass.docx"); - //EXPECTED_FAILURES.add("document/bug53475-password-is-solrcell.docx"); - EXPECTED_FAILURES.add("spreadsheet/xor-encryption-abc.xls"); + HANDLERS.put("spreadsheet/test_properties1", new NullFileHandler()); + } + + private static final Set EXPECTED_FAILURES = new HashSet(); + static { + // password protected files + EXPECTED_FAILURES.add("spreadsheet/password.xls"); + EXPECTED_FAILURES.add("spreadsheet/51832.xls"); + EXPECTED_FAILURES.add("document/PasswordProtected.doc"); + EXPECTED_FAILURES.add("slideshow/Password_Protected-hello.ppt"); + EXPECTED_FAILURES.add("slideshow/Password_Protected-56-hello.ppt"); + EXPECTED_FAILURES.add("slideshow/Password_Protected-np-hello.ppt"); + EXPECTED_FAILURES.add("slideshow/cryptoapi-proc2356.ppt"); + //EXPECTED_FAILURES.add("document/bug53475-password-is-pass.docx"); + //EXPECTED_FAILURES.add("document/bug53475-password-is-solrcell.docx"); + EXPECTED_FAILURES.add("spreadsheet/xor-encryption-abc.xls"); EXPECTED_FAILURES.add("spreadsheet/35897-type4.xls"); //EXPECTED_FAILURES.add("poifs/protect.xlsx"); //EXPECTED_FAILURES.add("poifs/protected_sha512.xlsx"); //EXPECTED_FAILURES.add("poifs/extenxls_pwd123.xlsx"); //EXPECTED_FAILURES.add("poifs/protected_agile.docx"); - - // TODO: fails XMLExportTest, is this ok? - EXPECTED_FAILURES.add("spreadsheet/CustomXMLMapping-singleattributenamespace.xlsx"); - EXPECTED_FAILURES.add("spreadsheet/55864.xlsx"); - - // TODO: these fail now with some NPE/file read error because we now try to compute every value via Cell.toString()! - EXPECTED_FAILURES.add("spreadsheet/44958.xls"); - EXPECTED_FAILURES.add("spreadsheet/44958_1.xls"); - EXPECTED_FAILURES.add("spreadsheet/testArraysAndTables.xls"); - - // TODO: good to ignore? - EXPECTED_FAILURES.add("spreadsheet/sample-beta.xlsx"); - EXPECTED_FAILURES.add("spreadsheet/49931.xls"); - EXPECTED_FAILURES.add("openxml4j/ContentTypeHasParameters.ooxml"); - - // This is actually a spreadsheet! - EXPECTED_FAILURES.add("hpsf/TestRobert_Flaherty.doc"); - - // some files that are broken, Excel 5.0/95, Word 95, ... - EXPECTED_FAILURES.add("spreadsheet/43493.xls"); - EXPECTED_FAILURES.add("spreadsheet/46904.xls"); - EXPECTED_FAILURES.add("document/56880.doc"); - EXPECTED_FAILURES.add("document/Bug49933.doc"); - EXPECTED_FAILURES.add("document/Bug50955.doc"); - EXPECTED_FAILURES.add("document/Bug51944.doc"); - EXPECTED_FAILURES.add("document/Word6.doc"); - EXPECTED_FAILURES.add("document/Word6_sections.doc"); - EXPECTED_FAILURES.add("document/Word6_sections2.doc"); - EXPECTED_FAILURES.add("document/Word95.doc"); - EXPECTED_FAILURES.add("document/word95err.doc"); - EXPECTED_FAILURES.add("hpsf/TestMickey.doc"); - EXPECTED_FAILURES.add("slideshow/PPT95.ppt"); - EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_DCTermsNamespaceLimitedUseFAIL.docx"); - EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_DoNotUseCompatibilityMarkupFAIL.docx"); - EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_LimitedXSITypeAttribute_NotPresentFAIL.docx"); - EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_LimitedXSITypeAttribute_PresentWithUnauthorizedValueFAIL.docx"); - EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_OnlyOneCorePropertiesPartFAIL.docx"); - EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_UnauthorizedXMLLangAttributeFAIL.docx"); - EXPECTED_FAILURES.add("openxml4j/OPCCompliance_DerivedPartNameFAIL.docx"); - EXPECTED_FAILURES.add("spreadsheet/54764-2.xlsx"); // see TestXSSFBugs.bug54764() - EXPECTED_FAILURES.add("spreadsheet/54764.xlsx"); // see TestXSSFBugs.bug54764() + + // TODO: fails XMLExportTest, is this ok? + EXPECTED_FAILURES.add("spreadsheet/CustomXMLMapping-singleattributenamespace.xlsx"); + EXPECTED_FAILURES.add("spreadsheet/55864.xlsx"); + + // TODO: these fail now with some NPE/file read error because we now try to compute every value via Cell.toString()! + EXPECTED_FAILURES.add("spreadsheet/44958.xls"); + EXPECTED_FAILURES.add("spreadsheet/44958_1.xls"); + EXPECTED_FAILURES.add("spreadsheet/testArraysAndTables.xls"); + + // TODO: good to ignore? + EXPECTED_FAILURES.add("spreadsheet/sample-beta.xlsx"); + EXPECTED_FAILURES.add("spreadsheet/49931.xls"); + EXPECTED_FAILURES.add("openxml4j/ContentTypeHasParameters.ooxml"); + + // This is actually a spreadsheet! + EXPECTED_FAILURES.add("hpsf/TestRobert_Flaherty.doc"); + + // some files that are broken, Excel 5.0/95, Word 95, ... + EXPECTED_FAILURES.add("spreadsheet/43493.xls"); + EXPECTED_FAILURES.add("spreadsheet/46904.xls"); + EXPECTED_FAILURES.add("document/56880.doc"); + EXPECTED_FAILURES.add("document/Bug49933.doc"); + EXPECTED_FAILURES.add("document/Bug50955.doc"); + EXPECTED_FAILURES.add("document/Bug51944.doc"); + EXPECTED_FAILURES.add("document/Word6.doc"); + EXPECTED_FAILURES.add("document/Word6_sections.doc"); + EXPECTED_FAILURES.add("document/Word6_sections2.doc"); + EXPECTED_FAILURES.add("document/Word95.doc"); + EXPECTED_FAILURES.add("document/word95err.doc"); + EXPECTED_FAILURES.add("hpsf/TestMickey.doc"); + EXPECTED_FAILURES.add("slideshow/PPT95.ppt"); + EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_DCTermsNamespaceLimitedUseFAIL.docx"); + EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_DoNotUseCompatibilityMarkupFAIL.docx"); + EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_LimitedXSITypeAttribute_NotPresentFAIL.docx"); + EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_LimitedXSITypeAttribute_PresentWithUnauthorizedValueFAIL.docx"); + EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_OnlyOneCorePropertiesPartFAIL.docx"); + EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_UnauthorizedXMLLangAttributeFAIL.docx"); + EXPECTED_FAILURES.add("openxml4j/OPCCompliance_DerivedPartNameFAIL.docx"); + EXPECTED_FAILURES.add("spreadsheet/54764-2.xlsx"); // see TestXSSFBugs.bug54764() + EXPECTED_FAILURES.add("spreadsheet/54764.xlsx"); // see TestXSSFBugs.bug54764() EXPECTED_FAILURES.add("spreadsheet/Simple.xlsb"); EXPECTED_FAILURES.add("spreadsheet/testEXCEL_2.xls"); EXPECTED_FAILURES.add("spreadsheet/testEXCEL_3.xls"); @@ -220,19 +220,19 @@ public class TestAllFiles { EXPECTED_FAILURES.add("spreadsheet/testEXCEL_5.xls"); EXPECTED_FAILURES.add("spreadsheet/testEXCEL_95.xls"); - // non-TNEF files - EXPECTED_FAILURES.add("ddf/Container.dat"); - EXPECTED_FAILURES.add("ddf/47143.dat"); - } - + // non-TNEF files + EXPECTED_FAILURES.add("ddf/Container.dat"); + EXPECTED_FAILURES.add("ddf/47143.dat"); + } + @Parameters(name="{index}: {0} using {1}") public static Iterable files() { DirectoryScanner scanner = new DirectoryScanner(); scanner.setBasedir(ROOT_DIR); scanner.setExcludes(new String[] { "**/.svn/**" }); - + scanner.scan(); - + System.out.println("Handling " + scanner.getIncludedFiles().length + " files"); List files = new ArrayList(); @@ -240,31 +240,31 @@ public class TestAllFiles { file = file.replace('\\', '/'); // ... failures/handlers lookup doesn't work on windows otherwise files.add(new Object[] { file, HANDLERS.get(getExtension(file)) }); } - + return files; - } - + } + @Parameter(value=0) public String file; - + @Parameter(value=1) public FileHandler handler; - + @Test public void testAllFiles() throws Exception { - assertNotNull("Unknown file extension for file: " + file + ": " + getExtension(file), handler); - File inputFile = new File(ROOT_DIR, file); - - try { + assertNotNull("Unknown file extension for file: " + file + ": " + getExtension(file), handler); + File inputFile = new File(ROOT_DIR, file); + + try { InputStream stream = new BufferedInputStream(new FileInputStream(inputFile),100); - try { - handler.handleFile(stream); - - assertFalse("Expected to fail for file " + file + " and handler " + handler + ", but did not fail!", - EXPECTED_FAILURES.contains(file)); - } finally { - stream.close(); - } + try { + handler.handleFile(stream); + + assertFalse("Expected to fail for file " + file + " and handler " + handler + ", but did not fail!", + EXPECTED_FAILURES.contains(file)); + } finally { + stream.close(); + } handler.handleExtracting(inputFile); } catch (Exception e) { @@ -273,24 +273,24 @@ public class TestAllFiles { throw new Exception("While handling " + file, e); } } - } - - private static String getExtension(String file) { - int pos = file.lastIndexOf('.'); - if(pos == -1 || pos == file.length()-1) { - return file; - } - - return file.substring(pos); - } - - private static class NullFileHandler implements FileHandler { - @Override + } + + private static String getExtension(String file) { + int pos = file.lastIndexOf('.'); + if(pos == -1 || pos == file.length()-1) { + return file; + } + + return file.substring(pos); + } + + private static class NullFileHandler implements FileHandler { + @Override public void handleFile(InputStream stream) throws Exception { - } + } - @Override + @Override public void handleExtracting(File file) throws Exception { } - } + } } -- cgit v1.2.3 From ac92165d20b95fee5e78ed93e861857606f515b4 Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Wed, 11 Mar 2015 19:08:59 +0000 Subject: No support as yet for the OOXML visio files, so have these ignored git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1665971 13f79535-47bb-0310-9956-ffa450edef68 --- src/integrationtest/org/apache/poi/TestAllFiles.java | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'src/integrationtest') diff --git a/src/integrationtest/org/apache/poi/TestAllFiles.java b/src/integrationtest/org/apache/poi/TestAllFiles.java index f9782400d6..25e837b927 100644 --- a/src/integrationtest/org/apache/poi/TestAllFiles.java +++ b/src/integrationtest/org/apache/poi/TestAllFiles.java @@ -98,8 +98,16 @@ public class TestAllFiles { // Publisher HANDLERS.put(".pub", new HPBFFileHandler()); - // Visio + // Visio - binary HANDLERS.put(".vsd", new HDGFFileHandler()); + + // Visio - ooxml (currently unsupported) + HANDLERS.put(".vsdm", new NullFileHandler()); + HANDLERS.put(".vsdx", new NullFileHandler()); + HANDLERS.put(".vssm", new NullFileHandler()); + HANDLERS.put(".vssx", new NullFileHandler()); + HANDLERS.put(".vstm", new NullFileHandler()); + HANDLERS.put(".vstx", new NullFileHandler()); // POIFS HANDLERS.put(".ole2", new POIFSFileHandler()); -- cgit v1.2.3 From 44d37a24ade3758fda609083bdcec67f8a4a2d50 Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Wed, 11 Mar 2015 19:53:26 +0000 Subject: Add helper test to verify that vsdx cannot even be loaded by POIXMLDocument curently. git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1665984 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/poi/stress/POIXMLDocumentHandler.java | 29 ++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'src/integrationtest') diff --git a/src/integrationtest/org/apache/poi/stress/POIXMLDocumentHandler.java b/src/integrationtest/org/apache/poi/stress/POIXMLDocumentHandler.java index 103bb9be7e..c720272352 100644 --- a/src/integrationtest/org/apache/poi/stress/POIXMLDocumentHandler.java +++ b/src/integrationtest/org/apache/poi/stress/POIXMLDocumentHandler.java @@ -20,9 +20,16 @@ import static org.junit.Assert.assertNotNull; import java.io.IOException; import java.io.InputStream; +import java.util.List; import org.apache.poi.POIXMLDocument; +import org.apache.poi.openxml4j.exceptions.OpenXML4JException; +import org.apache.poi.openxml4j.opc.OPCPackage; +import org.apache.poi.openxml4j.opc.PackageAccess; +import org.apache.poi.openxml4j.opc.PackagePart; import org.apache.poi.poifs.filesystem.POIFSFileSystem; +import org.junit.Ignore; +import org.junit.Test; public final class POIXMLDocumentHandler { protected void handlePOIXMLDocument(POIXMLDocument doc) throws Exception { @@ -43,4 +50,26 @@ public final class POIXMLDocumentHandler { } return false; } + + // a test-case to test this locally without executing the full TestAllFiles + @Ignore("POIXMLDocument cannot handle this Visio file currently...") + @Test + public void test() throws Exception { + OPCPackage pkg = OPCPackage.open("test-data/diagram/test.vsdx", PackageAccess.READ); + try { + handlePOIXMLDocument(new TestPOIXMLDocument(pkg)); + } finally { + pkg.close(); + } + } + + private final static class TestPOIXMLDocument extends POIXMLDocument { + public TestPOIXMLDocument(OPCPackage pkg) { + super(pkg); + } + + public List getAllEmbedds() throws OpenXML4JException { + return null; + } + } } -- cgit v1.2.3 From 1f3f88a299fe43c84626764de825bb95e0d2783a Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Fri, 13 Mar 2015 13:03:53 +0000 Subject: Exclude the OOXML strict files for now, not yet supported, see bug #57699 git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1666433 13f79535-47bb-0310-9956-ffa450edef68 --- src/integrationtest/org/apache/poi/TestAllFiles.java | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'src/integrationtest') diff --git a/src/integrationtest/org/apache/poi/TestAllFiles.java b/src/integrationtest/org/apache/poi/TestAllFiles.java index 25e837b927..e8de685f9c 100644 --- a/src/integrationtest/org/apache/poi/TestAllFiles.java +++ b/src/integrationtest/org/apache/poi/TestAllFiles.java @@ -227,6 +227,11 @@ public class TestAllFiles { EXPECTED_FAILURES.add("spreadsheet/testEXCEL_4.xls"); EXPECTED_FAILURES.add("spreadsheet/testEXCEL_5.xls"); EXPECTED_FAILURES.add("spreadsheet/testEXCEL_95.xls"); + + // OOXML Strict is not yet supported, see bug #57699 + EXPECTED_FAILURES.add("spreadsheet/SampleSS.strict.xlsx"); + EXPECTED_FAILURES.add("spreadsheet/SimpleStrict.xlsx"); + EXPECTED_FAILURES.add("spreadsheet/sample.strict.xlsx"); // non-TNEF files EXPECTED_FAILURES.add("ddf/Container.dat"); -- cgit v1.2.3 From 0850e7d846a71cbd90f919f841917a1cfedad006 Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Sun, 22 Mar 2015 13:33:43 +0000 Subject: Bug 47304: use fixed encoding when extracting text in WordDocument git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1668367 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/stress/HWPFFileHandler.java | 48 ++++++++++++++++++++- .../org/apache/poi/hdf/extractor/WordDocument.java | 2 +- .../apache/poi/hdf/extractor/TestWordDocument.java | 36 ++++++++++++++++ test-data/document/47304.doc | Bin 0 -> 22016 bytes 4 files changed, 84 insertions(+), 2 deletions(-) create mode 100644 test-data/document/47304.doc (limited to 'src/integrationtest') diff --git a/src/integrationtest/org/apache/poi/stress/HWPFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HWPFFileHandler.java index 1b6d4646c7..5f24337fbb 100644 --- a/src/integrationtest/org/apache/poi/stress/HWPFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/HWPFFileHandler.java @@ -18,12 +18,21 @@ package org.apache.poi.stress; import static org.junit.Assert.assertNotNull; +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.File; import java.io.FileInputStream; +import java.io.IOException; import java.io.InputStream; +import java.io.PrintWriter; +import java.io.StringWriter; +import org.apache.poi.hdf.extractor.WordDocument; import org.apache.poi.hwpf.HWPFDocument; +import org.apache.poi.hwpf.extractor.WordExtractor; import org.junit.Test; +@SuppressWarnings("deprecation") public class HWPFFileHandler extends POIFSFileHandler { @Override public void handleFile(InputStream stream) throws Exception { @@ -33,16 +42,53 @@ public class HWPFFileHandler extends POIFSFileHandler { assertNotNull(doc.getEndnotes()); handlePOIDocument(doc); + + // fails for many documents, but is deprecated anyway... + // handleWordDocument(doc); + } + + protected void handleWordDocument(HWPFDocument doc) throws IOException { + ByteArrayOutputStream outStream = new ByteArrayOutputStream(); + doc.write(outStream); + + WordDocument wordDoc = new WordDocument(new ByteArrayInputStream(outStream.toByteArray())); + + StringWriter docTextWriter = new StringWriter(); + PrintWriter out = new PrintWriter(docTextWriter); + try { + wordDoc.writeAllText(out); + } finally { + out.close(); + } + docTextWriter.close(); } + + // a test-case to test this locally without executing the full TestAllFiles @Test public void test() throws Exception { - InputStream stream = new FileInputStream("test-data/document/HeaderFooterUnicode.doc"); + File file = new File("test-data/document/47304.doc"); + + InputStream stream = new FileInputStream(file); try { handleFile(stream); } finally { stream.close(); } + + handleExtracting(file); + + stream = new FileInputStream(file); + try { + WordExtractor extractor = new WordExtractor(stream); + try { + assertNotNull(extractor.getText()); + } finally { + extractor.close(); + } + } finally { + stream.close(); + } } } \ No newline at end of file diff --git a/src/scratchpad/src/org/apache/poi/hdf/extractor/WordDocument.java b/src/scratchpad/src/org/apache/poi/hdf/extractor/WordDocument.java index 929de311b7..ff53300321 100644 --- a/src/scratchpad/src/org/apache/poi/hdf/extractor/WordDocument.java +++ b/src/scratchpad/src/org/apache/poi/hdf/extractor/WordDocument.java @@ -177,7 +177,7 @@ public final class WordDocument { } else { - String sText = new String(_header, start, end-start); + String sText = new String(_header, start, end-start, "windows-1252"); out.write(sText); } } diff --git a/src/scratchpad/testcases/org/apache/poi/hdf/extractor/TestWordDocument.java b/src/scratchpad/testcases/org/apache/poi/hdf/extractor/TestWordDocument.java index 1cf29f4376..f0941674ff 100644 --- a/src/scratchpad/testcases/org/apache/poi/hdf/extractor/TestWordDocument.java +++ b/src/scratchpad/testcases/org/apache/poi/hdf/extractor/TestWordDocument.java @@ -17,6 +17,15 @@ package org.apache.poi.hdf.extractor; +import static org.junit.Assert.*; + +import java.io.IOException; +import java.io.PrintWriter; +import java.io.StringWriter; + +import org.apache.poi.hwpf.HWPFDocument; +import org.apache.poi.hwpf.HWPFTestDataSamples; +import org.apache.poi.hwpf.extractor.WordExtractor; import org.junit.Test; @@ -31,4 +40,31 @@ public class TestWordDocument { //WordDocument.main(new String[] {"test-data/document/Word6.doc", "/tmp/test.doc"}); WordDocument.main(new String[] {"test-data/document/53446.doc", "/tmp/test.doc"}); } + + @SuppressWarnings("deprecation") + @Test + public void test47304() throws IOException { + HWPFDocument doc = HWPFTestDataSamples.openSampleFile("47304.doc"); + assertNotNull(doc); + + WordExtractor extractor = new WordExtractor(doc); + String text = extractor.getText(); + //System.out.println(text); + assertTrue("Had: " + text, text.contains("Just a \u201Ctest\u201D")); + extractor.close(); + + WordDocument wordDoc = new WordDocument("test-data/document/47304.doc"); + + StringWriter docTextWriter = new StringWriter(); + PrintWriter out = new PrintWriter(docTextWriter); + try { + wordDoc.writeAllText(out); + } finally { + out.close(); + } + docTextWriter.close(); + + //System.out.println(docTextWriter.toString()); + assertTrue("Had: " + docTextWriter.toString(), docTextWriter.toString().contains("Just a \u201Ctest\u201D")); + } } diff --git a/test-data/document/47304.doc b/test-data/document/47304.doc new file mode 100644 index 0000000000..d59d8d7ee1 Binary files /dev/null and b/test-data/document/47304.doc differ -- cgit v1.2.3 From d6f8268e561ad2418e8282cea8c1c7a31306d673 Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Sun, 22 Mar 2015 13:42:06 +0000 Subject: Add more test-files from bug 44501, all seem to work fine now git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1668369 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/stress/HDGFFileHandler.java | 20 +++++++++++++++++++- test-data/diagram/44501a.vsd | Bin 0 -> 74752 bytes test-data/diagram/44501b.vsd | Bin 0 -> 87040 bytes test-data/diagram/44501c.vsd | Bin 0 -> 22016 bytes test-data/diagram/44501d.vsd | Bin 0 -> 87040 bytes test-data/diagram/44501e.vsd | Bin 0 -> 209920 bytes 6 files changed, 19 insertions(+), 1 deletion(-) create mode 100755 test-data/diagram/44501a.vsd create mode 100755 test-data/diagram/44501b.vsd create mode 100644 test-data/diagram/44501c.vsd create mode 100755 test-data/diagram/44501d.vsd create mode 100644 test-data/diagram/44501e.vsd (limited to 'src/integrationtest') diff --git a/src/integrationtest/org/apache/poi/stress/HDGFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HDGFFileHandler.java index 7fac6647a3..b9fe93a668 100644 --- a/src/integrationtest/org/apache/poi/stress/HDGFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/HDGFFileHandler.java @@ -19,10 +19,12 @@ package org.apache.poi.stress; import static org.junit.Assert.assertNotNull; import static org.junit.Assert.assertTrue; +import java.io.File; import java.io.FileInputStream; import java.io.InputStream; import org.apache.poi.hdgf.HDGFDiagram; +import org.apache.poi.hdgf.extractor.VisioTextExtractor; import org.apache.poi.hdgf.streams.Stream; import org.apache.poi.hdgf.streams.TrailerStream; import org.apache.poi.poifs.filesystem.POIFSFileSystem; @@ -48,11 +50,27 @@ public class HDGFFileHandler extends POIFSFileHandler { // a test-case to test this locally without executing the full TestAllFiles @Test public void test() throws Exception { - InputStream stream = new FileInputStream("test-data/diagram/44501.vsd"); + File file = new File("test-data/diagram/44501.vsd"); + + InputStream stream = new FileInputStream(file); try { handleFile(stream); } finally { stream.close(); } + + handleExtracting(file); + + stream = new FileInputStream(file); + try { + VisioTextExtractor extractor = new VisioTextExtractor(stream); + try { + assertNotNull(extractor.getText()); + } finally { + extractor.close(); + } + } finally { + stream.close(); + } } } diff --git a/test-data/diagram/44501a.vsd b/test-data/diagram/44501a.vsd new file mode 100755 index 0000000000..7d9a3cefc6 Binary files /dev/null and b/test-data/diagram/44501a.vsd differ diff --git a/test-data/diagram/44501b.vsd b/test-data/diagram/44501b.vsd new file mode 100755 index 0000000000..c8bd7a190d Binary files /dev/null and b/test-data/diagram/44501b.vsd differ diff --git a/test-data/diagram/44501c.vsd b/test-data/diagram/44501c.vsd new file mode 100644 index 0000000000..51de23a325 Binary files /dev/null and b/test-data/diagram/44501c.vsd differ diff --git a/test-data/diagram/44501d.vsd b/test-data/diagram/44501d.vsd new file mode 100755 index 0000000000..2c1632ebae Binary files /dev/null and b/test-data/diagram/44501d.vsd differ diff --git a/test-data/diagram/44501e.vsd b/test-data/diagram/44501e.vsd new file mode 100644 index 0000000000..3ca5552304 Binary files /dev/null and b/test-data/diagram/44501e.vsd differ -- cgit v1.2.3 From a4b104d68e4db5856d6da098646760e59de6d8f6 Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Sun, 22 Mar 2015 21:47:19 +0000 Subject: Add test-document to verify that bug 51921 is fixed already git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1668482 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/stress/HPBFFileHandler.java | 21 ++++++++++++++++++++- .../org/apache/poi/stress/HWPFFileHandler.java | 4 ++-- .../org/apache/poi/stress/XWPFFileHandler.java | 8 +++++++- test-data/document/51921-Word-Crash067.doc | Bin 0 -> 56832 bytes test-data/document/51921-Word-Crash067.docx | Bin 0 -> 20006 bytes 5 files changed, 29 insertions(+), 4 deletions(-) create mode 100644 test-data/document/51921-Word-Crash067.doc create mode 100644 test-data/document/51921-Word-Crash067.docx (limited to 'src/integrationtest') diff --git a/src/integrationtest/org/apache/poi/stress/HPBFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HPBFFileHandler.java index 31ad8bc123..a41b6ebadf 100644 --- a/src/integrationtest/org/apache/poi/stress/HPBFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/HPBFFileHandler.java @@ -18,10 +18,12 @@ package org.apache.poi.stress; import static org.junit.Assert.assertNotNull; +import java.io.File; import java.io.FileInputStream; import java.io.InputStream; import org.apache.poi.hpbf.HPBFDocument; +import org.apache.poi.hpbf.extractor.PublisherTextExtractor; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.junit.Test; @@ -39,11 +41,28 @@ public class HPBFFileHandler extends POIFSFileHandler { // a test-case to test this locally without executing the full TestAllFiles @Test public void test() throws Exception { - InputStream stream = new FileInputStream("test-data/publisher/SampleBrochure.pub"); + File file = new File("test-data/publisher/SampleBrochure.pub"); + + InputStream stream = new FileInputStream(file); try { handleFile(stream); } finally { stream.close(); } + + handleExtracting(file); + + stream = new FileInputStream(file); + try { + PublisherTextExtractor extractor = new PublisherTextExtractor(stream); + try { + assertNotNull(extractor.getText()); + } finally { + extractor.close(); + } + } finally { + stream.close(); + } } + } diff --git a/src/integrationtest/org/apache/poi/stress/HWPFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HWPFFileHandler.java index 5f24337fbb..3a223674cd 100644 --- a/src/integrationtest/org/apache/poi/stress/HWPFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/HWPFFileHandler.java @@ -68,7 +68,7 @@ public class HWPFFileHandler extends POIFSFileHandler { // a test-case to test this locally without executing the full TestAllFiles @Test public void test() throws Exception { - File file = new File("test-data/document/47304.doc"); + File file = new File("test-data/document/51921-Word-Crash067.doc"); InputStream stream = new FileInputStream(file); try { @@ -91,4 +91,4 @@ public class HWPFFileHandler extends POIFSFileHandler { stream.close(); } } -} \ No newline at end of file +} diff --git a/src/integrationtest/org/apache/poi/stress/XWPFFileHandler.java b/src/integrationtest/org/apache/poi/stress/XWPFFileHandler.java index 47c18d8aa0..c097dc9f71 100644 --- a/src/integrationtest/org/apache/poi/stress/XWPFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/XWPFFileHandler.java @@ -16,8 +16,10 @@ ==================================================================== */ package org.apache.poi.stress; +import java.io.File; import java.io.FileInputStream; import java.io.InputStream; +import java.io.PushbackInputStream; import org.apache.poi.xwpf.usermodel.XWPFDocument; import org.junit.Test; @@ -36,12 +38,16 @@ public class XWPFFileHandler extends AbstractFileHandler { // a test-case to test this locally without executing the full TestAllFiles @Test public void test() throws Exception { - InputStream stream = new FileInputStream("test-data/document/footnotes.docx"); + File file = new File("test-data/document/51921-Word-Crash067.docx"); + + InputStream stream = new PushbackInputStream(new FileInputStream(file), 100000); try { handleFile(stream); } finally { stream.close(); } + + handleExtracting(file); } } \ No newline at end of file diff --git a/test-data/document/51921-Word-Crash067.doc b/test-data/document/51921-Word-Crash067.doc new file mode 100644 index 0000000000..dc8bfff278 Binary files /dev/null and b/test-data/document/51921-Word-Crash067.doc differ diff --git a/test-data/document/51921-Word-Crash067.docx b/test-data/document/51921-Word-Crash067.docx new file mode 100644 index 0000000000..12c27c85e4 Binary files /dev/null and b/test-data/document/51921-Word-Crash067.docx differ -- cgit v1.2.3 From b39c87b884892b37b71b2dde3984d0bd24b0a6ab Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Sun, 22 Mar 2015 21:47:44 +0000 Subject: Integration tests: Expect exception for old word documents and still run the text extraction for them. Also add executing HPSFPropertiesExtractor where possible git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1668483 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/TestAllFiles.java | 37 ++++++++++++++++----- .../org/apache/poi/stress/AbstractFileHandler.java | 15 +++++++++ .../org/apache/poi/stress/HWPFFileHandler.java | 10 ++++-- test-data/document/52117.doc | Bin 0 -> 7168 bytes 4 files changed, 50 insertions(+), 12 deletions(-) create mode 100644 test-data/document/52117.doc (limited to 'src/integrationtest') diff --git a/src/integrationtest/org/apache/poi/TestAllFiles.java b/src/integrationtest/org/apache/poi/TestAllFiles.java index e8de685f9c..8a66024f7b 100644 --- a/src/integrationtest/org/apache/poi/TestAllFiles.java +++ b/src/integrationtest/org/apache/poi/TestAllFiles.java @@ -31,6 +31,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.poi.hwpf.OldWordFileFormatException; import org.apache.poi.stress.*; import org.apache.tools.ant.DirectoryScanner; import org.junit.Test; @@ -162,6 +163,20 @@ public class TestAllFiles { HANDLERS.put("spreadsheet/test_properties1", new NullFileHandler()); } + // Old Word Documents where we can at least extract some text + private static final Set OLD_FILES = new HashSet(); + static { + OLD_FILES.add("document/Bug49933.doc"); + OLD_FILES.add("document/Bug51944.doc"); + OLD_FILES.add("document/Word6.doc"); + OLD_FILES.add("document/Word6_sections.doc"); + OLD_FILES.add("document/Word6_sections2.doc"); + OLD_FILES.add("document/Word95.doc"); + OLD_FILES.add("document/word95err.doc"); + OLD_FILES.add("hpsf/TestMickey.doc"); + OLD_FILES.add("document/52117.doc"); + } + private static final Set EXPECTED_FAILURES = new HashSet(); static { // password protected files @@ -202,15 +217,7 @@ public class TestAllFiles { EXPECTED_FAILURES.add("spreadsheet/43493.xls"); EXPECTED_FAILURES.add("spreadsheet/46904.xls"); EXPECTED_FAILURES.add("document/56880.doc"); - EXPECTED_FAILURES.add("document/Bug49933.doc"); EXPECTED_FAILURES.add("document/Bug50955.doc"); - EXPECTED_FAILURES.add("document/Bug51944.doc"); - EXPECTED_FAILURES.add("document/Word6.doc"); - EXPECTED_FAILURES.add("document/Word6_sections.doc"); - EXPECTED_FAILURES.add("document/Word6_sections2.doc"); - EXPECTED_FAILURES.add("document/Word95.doc"); - EXPECTED_FAILURES.add("document/word95err.doc"); - EXPECTED_FAILURES.add("hpsf/TestMickey.doc"); EXPECTED_FAILURES.add("slideshow/PPT95.ppt"); EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_DCTermsNamespaceLimitedUseFAIL.docx"); EXPECTED_FAILURES.add("openxml4j/OPCCompliance_CoreProperties_DoNotUseCompatibilityMarkupFAIL.docx"); @@ -269,17 +276,29 @@ public class TestAllFiles { File inputFile = new File(ROOT_DIR, file); try { - InputStream stream = new BufferedInputStream(new FileInputStream(inputFile),100); + InputStream stream = new BufferedInputStream(new FileInputStream(inputFile), 64*1024); try { handler.handleFile(stream); assertFalse("Expected to fail for file " + file + " and handler " + handler + ", but did not fail!", EXPECTED_FAILURES.contains(file)); + assertFalse("Expected to fail for file " + file + " and handler " + handler + ", but did not fail!", + OLD_FILES.contains(file)); } finally { stream.close(); } handler.handleExtracting(inputFile); + } catch (OldWordFileFormatException e) { + // for old word files we should still support extracting text + if(OLD_FILES.contains(file)) { + handler.handleExtracting(inputFile); + } else { + // check if we expect failure for this file + if(!EXPECTED_FAILURES.contains(file) && !AbstractFileHandler.EXPECTED_EXTRACTOR_FAILURES.contains(file)) { + throw new Exception("While handling " + file, e); + } + } } catch (Exception e) { // check if we expect failure for this file if(!EXPECTED_FAILURES.contains(file) && !AbstractFileHandler.EXPECTED_EXTRACTOR_FAILURES.contains(file)) { diff --git a/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java b/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java index 8a27e6d0e9..8819083771 100644 --- a/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java @@ -28,8 +28,10 @@ import java.io.InputStream; import java.util.HashSet; import java.util.Set; +import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.POITextExtractor; import org.apache.poi.extractor.ExtractorFactory; +import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.exceptions.OpenXML4JException; import org.apache.xmlbeans.XmlException; @@ -89,6 +91,19 @@ public abstract class AbstractFileHandler implements FileHandler { assertEquals("File should not be modified by extractor", modified, file.lastModified()); handleExtractingAsStream(file); + + if(extractor instanceof POIOLE2TextExtractor) { + HPSFPropertiesExtractor hpsfExtractor = new HPSFPropertiesExtractor((POIOLE2TextExtractor)extractor); + try { + assertNotNull(hpsfExtractor.getDocumentSummaryInformationText()); + assertNotNull(hpsfExtractor.getSummaryInformationText()); + String text = hpsfExtractor.getText(); + //System.out.println(text); + assertNotNull(text); + } finally { + hpsfExtractor.close(); + } + } } catch (IllegalArgumentException e) { if(!EXPECTED_EXTRACTOR_FAILURES.contains(file)) { throw new Exception("While handling " + file, e); diff --git a/src/integrationtest/org/apache/poi/stress/HWPFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HWPFFileHandler.java index 3a223674cd..a56ddd2dc6 100644 --- a/src/integrationtest/org/apache/poi/stress/HWPFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/HWPFFileHandler.java @@ -63,12 +63,10 @@ public class HWPFFileHandler extends POIFSFileHandler { docTextWriter.close(); } - - // a test-case to test this locally without executing the full TestAllFiles @Test public void test() throws Exception { - File file = new File("test-data/document/51921-Word-Crash067.doc"); + File file = new File("test-data/document/52117.doc"); InputStream stream = new FileInputStream(file); try { @@ -91,4 +89,10 @@ public class HWPFFileHandler extends POIFSFileHandler { stream.close(); } } + + @Test + public void testExtractingOld() throws Exception { + File file = new File("test-data/document/52117.doc"); + handleExtracting(file); + } } diff --git a/test-data/document/52117.doc b/test-data/document/52117.doc new file mode 100644 index 0000000000..4f966c01c5 Binary files /dev/null and b/test-data/document/52117.doc differ -- cgit v1.2.3 From dd875c9c040ac0e42326bb3c5b76a1d6748908ed Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Sun, 22 Mar 2015 21:48:27 +0000 Subject: Integration test: Do a few more things with the MapiMessage git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1668485 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/stress/HSMFFileHandler.java | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'src/integrationtest') diff --git a/src/integrationtest/org/apache/poi/stress/HSMFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HSMFFileHandler.java index 9de8b798c5..d68504a04c 100644 --- a/src/integrationtest/org/apache/poi/stress/HSMFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/HSMFFileHandler.java @@ -22,6 +22,8 @@ import java.io.FileInputStream; import java.io.InputStream; import org.apache.poi.hsmf.MAPIMessage; +import org.apache.poi.hsmf.datatypes.AttachmentChunks; +import org.apache.poi.hsmf.datatypes.DirectoryChunk; import org.junit.Test; public class HSMFFileHandler extends POIFSFileHandler { @@ -32,6 +34,19 @@ public class HSMFFileHandler extends POIFSFileHandler { assertNotNull(mapi.getDisplayBCC()); assertNotNull(mapi.getMessageDate()); + AttachmentChunks[] attachments = mapi.getAttachmentFiles(); + + for(AttachmentChunks attachment : attachments) { + + DirectoryChunk chunkDirectory = attachment.attachmentDirectory; + if(chunkDirectory != null) { + MAPIMessage attachmentMSG = chunkDirectory.getAsEmbededMessage(); + assertNotNull(attachmentMSG); + String body = attachmentMSG.getTextBody(); + assertNotNull(body); + } + } + /* => Writing isn't yet supported... // write out the file File file = TempFile.createTempFile("StressTest", ".msg"); -- cgit v1.2.3 From 563cab96816fb36879b2f3df4fe2e0ce17457e58 Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Fri, 3 Apr 2015 15:54:59 +0000 Subject: XMLPrettyPrint: Don't try to pretty-print non-XML files and print out which file from the ooxml-file fails to parse git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1671095 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/stress/AbstractFileHandler.java | 10 ++++++++++ .../java/org/apache/poi/dev/OOXMLPrettyPrint.java | 22 ++++++++++++++-------- 2 files changed, 24 insertions(+), 8 deletions(-) (limited to 'src/integrationtest') diff --git a/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java b/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java index 8819083771..55e2c368f2 100644 --- a/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java @@ -27,9 +27,11 @@ import java.io.IOException; import java.io.InputStream; import java.util.HashSet; import java.util.Set; +import java.util.zip.ZipException; import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.POITextExtractor; +import org.apache.poi.dev.OOXMLPrettyPrint; import org.apache.poi.extractor.ExtractorFactory; import org.apache.poi.hpsf.extractor.HPSFPropertiesExtractor; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; @@ -68,6 +70,14 @@ public abstract class AbstractFileHandler implements FileHandler { } finally { ExtractorFactory.setThreadPrefersEventExtractors(before); } + + /* Did fail for some documents with special XML contents... + try { + OOXMLPrettyPrint.main(new String[] { file.getAbsolutePath(), + "/tmp/pretty-" + file.getName() }); + } catch (ZipException e) { + // ignore, not a Zip/OOXML file + }*/ } private void handleExtractingInternal(File file) throws Exception { diff --git a/src/ooxml/java/org/apache/poi/dev/OOXMLPrettyPrint.java b/src/ooxml/java/org/apache/poi/dev/OOXMLPrettyPrint.java index aab8112875..48341d0c9a 100644 --- a/src/ooxml/java/org/apache/poi/dev/OOXMLPrettyPrint.java +++ b/src/ooxml/java/org/apache/poi/dev/OOXMLPrettyPrint.java @@ -18,7 +18,6 @@ package org.apache.poi.dev; import java.io.BufferedOutputStream; import java.io.File; -import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.OutputStream; @@ -40,9 +39,9 @@ import javax.xml.transform.TransformerFactory; import javax.xml.transform.dom.DOMSource; import javax.xml.transform.stream.StreamResult; +import org.apache.poi.util.IOUtils; import org.w3c.dom.Document; import org.xml.sax.InputSource; -import org.xml.sax.SAXException; /** * Reads a zipped OOXML file and produces a copy with the included @@ -80,8 +79,7 @@ public class OOXMLPrettyPrint { } private static void handleFile(File file, File outFile) throws ZipException, - IOException, FileNotFoundException, SAXException, - TransformerException, ParserConfigurationException { + IOException, TransformerException, ParserConfigurationException { System.out.println("Reading zip-file " + file + " and writing pretty-printed XML to " + outFile); ZipFile zipFile = new ZipFile(file); @@ -99,15 +97,23 @@ public class OOXMLPrettyPrint { } } - private void handle(ZipFile file, ZipOutputStream out) throws SAXException, IOException, TransformerException { + private void handle(ZipFile file, ZipOutputStream out) throws IOException, TransformerException { Enumeration entries = file.entries(); while(entries.hasMoreElements()) { ZipEntry entry = entries.nextElement(); - out.putNextEntry(new ZipEntry(entry.getName())); + String name = entry.getName(); + out.putNextEntry(new ZipEntry(name)); try { - Document document = documentBuilder.parse(new InputSource(file.getInputStream(entry))); - pretty(document, out, 2); + if(name.endsWith(".xml") || name.endsWith(".rels")) { + Document document = documentBuilder.parse(new InputSource(file.getInputStream(entry))); + pretty(document, out, 2); + } else { + System.out.println("Not pretty-printing non-XML file " + name); + IOUtils.copy(file.getInputStream(entry), out); + } + } catch (Exception e) { + throw new IOException("While handling entry " + name, e); } finally { out.closeEntry(); } -- cgit v1.2.3 From ee6a6d6584f3ad56043cfbeaa10f677a560e6ae6 Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Thu, 23 Apr 2015 21:09:42 +0000 Subject: Add another file exclude, and hae the test print out which fails failed to make it quicker to spot in the jenkins failure email git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1675719 13f79535-47bb-0310-9956-ffa450edef68 --- src/integrationtest/org/apache/poi/TestAllFiles.java | 3 +++ 1 file changed, 3 insertions(+) (limited to 'src/integrationtest') diff --git a/src/integrationtest/org/apache/poi/TestAllFiles.java b/src/integrationtest/org/apache/poi/TestAllFiles.java index 8a66024f7b..701e504e1b 100644 --- a/src/integrationtest/org/apache/poi/TestAllFiles.java +++ b/src/integrationtest/org/apache/poi/TestAllFiles.java @@ -234,6 +234,7 @@ public class TestAllFiles { EXPECTED_FAILURES.add("spreadsheet/testEXCEL_4.xls"); EXPECTED_FAILURES.add("spreadsheet/testEXCEL_5.xls"); EXPECTED_FAILURES.add("spreadsheet/testEXCEL_95.xls"); + EXPECTED_FAILURES.add("poifs/unknown_properties.msg"); // POIFS properties corrupted // OOXML Strict is not yet supported, see bug #57699 EXPECTED_FAILURES.add("spreadsheet/SampleSS.strict.xlsx"); @@ -296,12 +297,14 @@ public class TestAllFiles { } else { // check if we expect failure for this file if(!EXPECTED_FAILURES.contains(file) && !AbstractFileHandler.EXPECTED_EXTRACTOR_FAILURES.contains(file)) { + System.out.println("Failed: " + file); throw new Exception("While handling " + file, e); } } } catch (Exception e) { // check if we expect failure for this file if(!EXPECTED_FAILURES.contains(file) && !AbstractFileHandler.EXPECTED_EXTRACTOR_FAILURES.contains(file)) { + System.out.println("Failed: " + file); throw new Exception("While handling " + file, e); } } -- cgit v1.2.3 From 25cf50433d03d33e78db2c3df03f1a3df6e6dfaf Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Wed, 29 Apr 2015 19:47:35 +0000 Subject: Use a constant for the name of the OOXML encrypted package node git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1676838 13f79535-47bb-0310-9956-ffa450edef68 --- src/integrationtest/org/apache/poi/stress/POIXMLDocumentHandler.java | 3 ++- src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java | 3 ++- src/java/org/apache/poi/poifs/crypt/ChunkedCipherOutputStream.java | 4 +++- src/java/org/apache/poi/poifs/crypt/DataSpaceMapUtils.java | 2 +- src/java/org/apache/poi/poifs/crypt/Decryptor.java | 1 + src/java/org/apache/poi/poifs/crypt/Encryptor.java | 1 + src/java/org/apache/poi/poifs/crypt/binaryrc4/BinaryRC4Decryptor.java | 4 ++-- src/java/org/apache/poi/poifs/crypt/standard/StandardDecryptor.java | 2 +- src/java/org/apache/poi/poifs/crypt/standard/StandardEncryptor.java | 2 +- src/ooxml/java/org/apache/poi/poifs/crypt/agile/AgileDecryptor.java | 2 +- src/ooxml/testcases/org/apache/poi/poifs/crypt/TestEncryptor.java | 4 ++-- 11 files changed, 17 insertions(+), 11 deletions(-) (limited to 'src/integrationtest') diff --git a/src/integrationtest/org/apache/poi/stress/POIXMLDocumentHandler.java b/src/integrationtest/org/apache/poi/stress/POIXMLDocumentHandler.java index c720272352..7b0821dcc0 100644 --- a/src/integrationtest/org/apache/poi/stress/POIXMLDocumentHandler.java +++ b/src/integrationtest/org/apache/poi/stress/POIXMLDocumentHandler.java @@ -27,6 +27,7 @@ import org.apache.poi.openxml4j.exceptions.OpenXML4JException; import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.openxml4j.opc.PackageAccess; import org.apache.poi.openxml4j.opc.PackagePart; +import org.apache.poi.poifs.crypt.Decryptor; import org.apache.poi.poifs.filesystem.POIFSFileSystem; import org.junit.Ignore; import org.junit.Test; @@ -43,7 +44,7 @@ public final class POIXMLDocumentHandler { protected static boolean isEncrypted(InputStream stream) throws IOException { if (POIFSFileSystem.hasPOIFSHeader(stream)) { POIFSFileSystem poifs = new POIFSFileSystem(stream); - if (poifs.getRoot().hasEntry("EncryptedPackage")) { + if (poifs.getRoot().hasEntry(Decryptor.DEFAULT_POIFS_ENTRY)) { return true; } throw new IOException("wrong file format or file extension for OO XML file"); diff --git a/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java b/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java index 503ed64d6c..aee2d5444a 100644 --- a/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java +++ b/src/java/org/apache/poi/hssf/usermodel/HSSFWorkbook.java @@ -65,6 +65,7 @@ import org.apache.poi.hssf.record.UnknownRecord; import org.apache.poi.hssf.record.aggregates.RecordAggregate.RecordVisitor; import org.apache.poi.hssf.record.common.UnicodeString; import org.apache.poi.hssf.util.CellReference; +import org.apache.poi.poifs.crypt.Decryptor; import org.apache.poi.poifs.filesystem.DirectoryEntry; import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.EntryUtils; @@ -248,7 +249,7 @@ public final class HSSFWorkbook extends POIDocument implements org.apache.poi.ss // check for an encrypted .xlsx file - they get OLE2 wrapped try { - directory.getEntry("EncryptedPackage"); + directory.getEntry(Decryptor.DEFAULT_POIFS_ENTRY); throw new EncryptedDocumentException("The supplied spreadsheet seems to be an Encrypted .xlsx file. " + "It must be decrypted before use by XSSF, it cannot be used by HSSF"); } catch (FileNotFoundException e) { diff --git a/src/java/org/apache/poi/poifs/crypt/ChunkedCipherOutputStream.java b/src/java/org/apache/poi/poifs/crypt/ChunkedCipherOutputStream.java index 8a2bf00454..f663c7e089 100644 --- a/src/java/org/apache/poi/poifs/crypt/ChunkedCipherOutputStream.java +++ b/src/java/org/apache/poi/poifs/crypt/ChunkedCipherOutputStream.java @@ -16,6 +16,8 @@ ==================================================================== */ package org.apache.poi.poifs.crypt; +import static org.apache.poi.poifs.crypt.Decryptor.DEFAULT_POIFS_ENTRY; + import java.io.File; import java.io.FileInputStream; import java.io.FileOutputStream; @@ -133,7 +135,7 @@ public abstract class ChunkedCipherOutputStream extends FilterOutputStream { int oleStreamSize = (int)(fileOut.length()+LittleEndianConsts.LONG_SIZE); calculateChecksum(fileOut, oleStreamSize); - dir.createDocument("EncryptedPackage", oleStreamSize, new EncryptedPackageWriter()); + dir.createDocument(DEFAULT_POIFS_ENTRY, oleStreamSize, new EncryptedPackageWriter()); createEncryptionInfoEntry(dir, fileOut); } catch (GeneralSecurityException e) { throw new IOException(e); diff --git a/src/java/org/apache/poi/poifs/crypt/DataSpaceMapUtils.java b/src/java/org/apache/poi/poifs/crypt/DataSpaceMapUtils.java index 0c80c6c2b3..f6477fb9b5 100644 --- a/src/java/org/apache/poi/poifs/crypt/DataSpaceMapUtils.java +++ b/src/java/org/apache/poi/poifs/crypt/DataSpaceMapUtils.java @@ -36,7 +36,7 @@ public class DataSpaceMapUtils { public static void addDefaultDataSpace(DirectoryEntry dir) throws IOException { DataSpaceMapEntry dsme = new DataSpaceMapEntry( new int[]{ 0 } - , new String[]{ "EncryptedPackage" } + , new String[]{ Decryptor.DEFAULT_POIFS_ENTRY } , "StrongEncryptionDataSpace" ); DataSpaceMap dsm = new DataSpaceMap(new DataSpaceMapEntry[]{dsme}); diff --git a/src/java/org/apache/poi/poifs/crypt/Decryptor.java b/src/java/org/apache/poi/poifs/crypt/Decryptor.java index af449290e8..d584346f9c 100644 --- a/src/java/org/apache/poi/poifs/crypt/Decryptor.java +++ b/src/java/org/apache/poi/poifs/crypt/Decryptor.java @@ -29,6 +29,7 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem; public abstract class Decryptor { public static final String DEFAULT_PASSWORD="VelvetSweatshop"; + public static final String DEFAULT_POIFS_ENTRY="EncryptedPackage"; protected final EncryptionInfoBuilder builder; private SecretKey secretKey; diff --git a/src/java/org/apache/poi/poifs/crypt/Encryptor.java b/src/java/org/apache/poi/poifs/crypt/Encryptor.java index abfd693306..4c1b51258f 100644 --- a/src/java/org/apache/poi/poifs/crypt/Encryptor.java +++ b/src/java/org/apache/poi/poifs/crypt/Encryptor.java @@ -27,6 +27,7 @@ import org.apache.poi.poifs.filesystem.NPOIFSFileSystem; import org.apache.poi.poifs.filesystem.POIFSFileSystem; public abstract class Encryptor { + protected static final String DEFAULT_POIFS_ENTRY = Decryptor.DEFAULT_POIFS_ENTRY; private SecretKey secretKey; /** diff --git a/src/java/org/apache/poi/poifs/crypt/binaryrc4/BinaryRC4Decryptor.java b/src/java/org/apache/poi/poifs/crypt/binaryrc4/BinaryRC4Decryptor.java index 40eab54e3a..a2d3d6f0e1 100644 --- a/src/java/org/apache/poi/poifs/crypt/binaryrc4/BinaryRC4Decryptor.java +++ b/src/java/org/apache/poi/poifs/crypt/binaryrc4/BinaryRC4Decryptor.java @@ -118,7 +118,7 @@ public class BinaryRC4Decryptor extends Decryptor { public InputStream getDataStream(DirectoryNode dir) throws IOException, GeneralSecurityException { - DocumentInputStream dis = dir.createDocumentInputStream("EncryptedPackage"); + DocumentInputStream dis = dir.createDocumentInputStream(DEFAULT_POIFS_ENTRY); _length = dis.readLong(); BinaryRC4CipherInputStream cipherStream = new BinaryRC4CipherInputStream(dis, _length); return cipherStream; @@ -131,4 +131,4 @@ public class BinaryRC4Decryptor extends Decryptor { return _length; } -} \ No newline at end of file +} diff --git a/src/java/org/apache/poi/poifs/crypt/standard/StandardDecryptor.java b/src/java/org/apache/poi/poifs/crypt/standard/StandardDecryptor.java index 2b2c75b520..1d6ddd398e 100644 --- a/src/java/org/apache/poi/poifs/crypt/standard/StandardDecryptor.java +++ b/src/java/org/apache/poi/poifs/crypt/standard/StandardDecryptor.java @@ -123,7 +123,7 @@ public class StandardDecryptor extends Decryptor { } public InputStream getDataStream(DirectoryNode dir) throws IOException { - DocumentInputStream dis = dir.createDocumentInputStream("EncryptedPackage"); + DocumentInputStream dis = dir.createDocumentInputStream(DEFAULT_POIFS_ENTRY); _length = dis.readLong(); diff --git a/src/java/org/apache/poi/poifs/crypt/standard/StandardEncryptor.java b/src/java/org/apache/poi/poifs/crypt/standard/StandardEncryptor.java index 7049c715de..ae6304fbb5 100644 --- a/src/java/org/apache/poi/poifs/crypt/standard/StandardEncryptor.java +++ b/src/java/org/apache/poi/poifs/crypt/standard/StandardEncryptor.java @@ -166,7 +166,7 @@ public class StandardEncryptor extends Encryptor { void writeToPOIFS() throws IOException { int oleStreamSize = (int)(fileOut.length()+LittleEndianConsts.LONG_SIZE); - dir.createDocument("EncryptedPackage", oleStreamSize, this); + dir.createDocument(DEFAULT_POIFS_ENTRY, oleStreamSize, this); // TODO: any properties??? } diff --git a/src/ooxml/java/org/apache/poi/poifs/crypt/agile/AgileDecryptor.java b/src/ooxml/java/org/apache/poi/poifs/crypt/agile/AgileDecryptor.java index 05499685f5..3af78128c8 100644 --- a/src/ooxml/java/org/apache/poi/poifs/crypt/agile/AgileDecryptor.java +++ b/src/ooxml/java/org/apache/poi/poifs/crypt/agile/AgileDecryptor.java @@ -279,7 +279,7 @@ public class AgileDecryptor extends Decryptor { } public InputStream getDataStream(DirectoryNode dir) throws IOException, GeneralSecurityException { - DocumentInputStream dis = dir.createDocumentInputStream("EncryptedPackage"); + DocumentInputStream dis = dir.createDocumentInputStream(DEFAULT_POIFS_ENTRY); _length = dis.readLong(); ChunkedCipherInputStream cipherStream = new AgileCipherInputStream(dis, _length); diff --git a/src/ooxml/testcases/org/apache/poi/poifs/crypt/TestEncryptor.java b/src/ooxml/testcases/org/apache/poi/poifs/crypt/TestEncryptor.java index 2cd9889bd2..fd494ba267 100644 --- a/src/ooxml/testcases/org/apache/poi/poifs/crypt/TestEncryptor.java +++ b/src/ooxml/testcases/org/apache/poi/poifs/crypt/TestEncryptor.java @@ -111,7 +111,7 @@ public class TestEncryptor { long decPackLenExpected = decExpected.getLength(); assertEquals(decPackLenExpected, payloadExpected.length); - is = nfs.getRoot().createDocumentInputStream("EncryptedPackage"); + is = nfs.getRoot().createDocumentInputStream(Decryptor.DEFAULT_POIFS_ENTRY); is = new BoundedInputStream(is, is.available()-16); // ignore padding block byte encPackExpected[] = IOUtils.toByteArray(is); is.close(); @@ -163,7 +163,7 @@ public class TestEncryptor { long decPackLenActual = decActual.getLength(); - is = nfs.getRoot().createDocumentInputStream("EncryptedPackage"); + is = nfs.getRoot().createDocumentInputStream(Decryptor.DEFAULT_POIFS_ENTRY); is = new BoundedInputStream(is, is.available()-16); // ignore padding block byte encPackActual[] = IOUtils.toByteArray(is); is.close(); -- cgit v1.2.3 From 6d5e376c884db604eb04eae6c4b534f823151c1e Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Wed, 29 Apr 2015 22:07:43 +0000 Subject: Skip the new password protected file, and re-org a little bit some other failures git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1676874 13f79535-47bb-0310-9956-ffa450edef68 --- src/integrationtest/org/apache/poi/TestAllFiles.java | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'src/integrationtest') diff --git a/src/integrationtest/org/apache/poi/TestAllFiles.java b/src/integrationtest/org/apache/poi/TestAllFiles.java index 701e504e1b..b63684dda0 100644 --- a/src/integrationtest/org/apache/poi/TestAllFiles.java +++ b/src/integrationtest/org/apache/poi/TestAllFiles.java @@ -181,6 +181,7 @@ public class TestAllFiles { static { // password protected files EXPECTED_FAILURES.add("spreadsheet/password.xls"); + EXPECTED_FAILURES.add("spreadsheet/protected_passtika.xlsx"); EXPECTED_FAILURES.add("spreadsheet/51832.xls"); EXPECTED_FAILURES.add("document/PasswordProtected.doc"); EXPECTED_FAILURES.add("slideshow/Password_Protected-hello.ppt"); @@ -213,7 +214,7 @@ public class TestAllFiles { // This is actually a spreadsheet! EXPECTED_FAILURES.add("hpsf/TestRobert_Flaherty.doc"); - // some files that are broken, Excel 5.0/95, Word 95, ... + // some files that are broken, eg Word 95, ... EXPECTED_FAILURES.add("spreadsheet/43493.xls"); EXPECTED_FAILURES.add("spreadsheet/46904.xls"); EXPECTED_FAILURES.add("document/56880.doc"); @@ -229,12 +230,14 @@ public class TestAllFiles { EXPECTED_FAILURES.add("spreadsheet/54764-2.xlsx"); // see TestXSSFBugs.bug54764() EXPECTED_FAILURES.add("spreadsheet/54764.xlsx"); // see TestXSSFBugs.bug54764() EXPECTED_FAILURES.add("spreadsheet/Simple.xlsb"); + EXPECTED_FAILURES.add("poifs/unknown_properties.msg"); // POIFS properties corrupted + + // old Excel files, which we only support simple text extraction of EXPECTED_FAILURES.add("spreadsheet/testEXCEL_2.xls"); EXPECTED_FAILURES.add("spreadsheet/testEXCEL_3.xls"); EXPECTED_FAILURES.add("spreadsheet/testEXCEL_4.xls"); EXPECTED_FAILURES.add("spreadsheet/testEXCEL_5.xls"); EXPECTED_FAILURES.add("spreadsheet/testEXCEL_95.xls"); - EXPECTED_FAILURES.add("poifs/unknown_properties.msg"); // POIFS properties corrupted // OOXML Strict is not yet supported, see bug #57699 EXPECTED_FAILURES.add("spreadsheet/SampleSS.strict.xlsx"); -- cgit v1.2.3 From 5a925d115d5ca02221b6411b16c56c273c45bd69 Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Thu, 30 Apr 2015 05:50:49 +0000 Subject: Adjust test to handle a file correctly which works for normal handling but fails expectedly in text-extraction testing. git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1676902 13f79535-47bb-0310-9956-ffa450edef68 --- src/integrationtest/org/apache/poi/TestAllFiles.java | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'src/integrationtest') diff --git a/src/integrationtest/org/apache/poi/TestAllFiles.java b/src/integrationtest/org/apache/poi/TestAllFiles.java index b63684dda0..96b00fa9df 100644 --- a/src/integrationtest/org/apache/poi/TestAllFiles.java +++ b/src/integrationtest/org/apache/poi/TestAllFiles.java @@ -284,8 +284,6 @@ public class TestAllFiles { try { handler.handleFile(stream); - assertFalse("Expected to fail for file " + file + " and handler " + handler + ", but did not fail!", - EXPECTED_FAILURES.contains(file)); assertFalse("Expected to fail for file " + file + " and handler " + handler + ", but did not fail!", OLD_FILES.contains(file)); } finally { @@ -293,6 +291,9 @@ public class TestAllFiles { } handler.handleExtracting(inputFile); + + assertFalse("Expected to fail for file " + file + " and handler " + handler + ", but did not fail!", + EXPECTED_FAILURES.contains(file)); } catch (OldWordFileFormatException e) { // for old word files we should still support extracting text if(OLD_FILES.contains(file)) { -- cgit v1.2.3 From 2d71e80930e4b3effc33580c8aff5734c5eab181 Mon Sep 17 00:00:00 2001 From: Nick Burch Date: Tue, 26 May 2015 18:38:34 +0000 Subject: Add ignore - file has no content streams git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1681809 13f79535-47bb-0310-9956-ffa450edef68 --- src/integrationtest/org/apache/poi/TestAllFiles.java | 1 + 1 file changed, 1 insertion(+) (limited to 'src/integrationtest') diff --git a/src/integrationtest/org/apache/poi/TestAllFiles.java b/src/integrationtest/org/apache/poi/TestAllFiles.java index 96b00fa9df..85b0580841 100644 --- a/src/integrationtest/org/apache/poi/TestAllFiles.java +++ b/src/integrationtest/org/apache/poi/TestAllFiles.java @@ -231,6 +231,7 @@ public class TestAllFiles { EXPECTED_FAILURES.add("spreadsheet/54764.xlsx"); // see TestXSSFBugs.bug54764() EXPECTED_FAILURES.add("spreadsheet/Simple.xlsb"); EXPECTED_FAILURES.add("poifs/unknown_properties.msg"); // POIFS properties corrupted + EXPECTED_FAILURES.add("poifs/only-zero-byte-streams.ole2"); // No actual contents // old Excel files, which we only support simple text extraction of EXPECTED_FAILURES.add("spreadsheet/testEXCEL_2.xls"); -- cgit v1.2.3 From b051a10087bc5c82120d59eee17c4da461d9f00c Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Sun, 31 May 2015 20:11:28 +0000 Subject: Bug 57904: Add full source build from dist-packages to CI builds git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1682786 13f79535-47bb-0310-9956-ffa450edef68 --- src/integrationtest/build.xml | 45 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 src/integrationtest/build.xml (limited to 'src/integrationtest') diff --git a/src/integrationtest/build.xml b/src/integrationtest/build.xml new file mode 100644 index 0000000000..1a5e940473 --- /dev/null +++ b/src/integrationtest/build.xml @@ -0,0 +1,45 @@ + + + + Test-Ant file which verifies that the Apache POI distribution build sources can be compiled successfully. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -- cgit v1.2.3 From e5c1a36de91909b8f6b938f6f0c9d9f260368d48 Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Sun, 31 May 2015 21:02:39 +0000 Subject: Add missing license header git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1682794 13f79535-47bb-0310-9956-ffa450edef68 --- src/integrationtest/build.xml | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'src/integrationtest') diff --git a/src/integrationtest/build.xml b/src/integrationtest/build.xml index 1a5e940473..21d327a12d 100644 --- a/src/integrationtest/build.xml +++ b/src/integrationtest/build.xml @@ -1,4 +1,22 @@ + Test-Ant file which verifies that the Apache POI distribution build sources can be compiled successfully. -- cgit v1.2.3 From e8a5994956e222f4eb147aa7af95b25fcbaeee8f Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Tue, 16 Jun 2015 05:42:36 +0000 Subject: Bug 57963: Add a task to the integration-test build which verifies that most examples can be built without scratchpad.jar git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1685705 13f79535-47bb-0310-9956-ffa450edef68 --- src/integrationtest/build.xml | 88 +++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 85 insertions(+), 3 deletions(-) (limited to 'src/integrationtest') diff --git a/src/integrationtest/build.xml b/src/integrationtest/build.xml index 21d327a12d..795788e441 100644 --- a/src/integrationtest/build.xml +++ b/src/integrationtest/build.xml @@ -19,7 +19,12 @@ under the License. --> - Test-Ant file which verifies that the Apache POI distribution build sources can be compiled successfully. + Test-Ant file which verifies that the Apache POI distribution build sources can be compiled successfully. + + Before running this, you should execute the "assemble" target in the main build.xml to have the packaged files + created correctly. + + @@ -27,12 +32,14 @@ under the License. - + + + - + @@ -60,4 +67,79 @@ under the License. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + -- cgit v1.2.3 From 5845a41b78b0f01735cf7ada45bd044ab4f90e5e Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Tue, 7 Jul 2015 07:44:11 +0000 Subject: Add some missing close(), reduce output in unit tests and remove some other Eclipse warnings git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1689590 13f79535-47bb-0310-9956-ffa450edef68 --- .../org/apache/poi/stress/XSLFFileHandler.java | 4 ++- .../hsmf/extractor/TestOutlookTextExtractor.java | 41 +++++++++++++++------- .../apache/poi/hssf/dev/BaseXLSIteratingTest.java | 2 +- .../org/apache/poi/hssf/usermodel/TestBugs.java | 1 - 4 files changed, 33 insertions(+), 15 deletions(-) (limited to 'src/integrationtest') diff --git a/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java b/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java index b734c4e4bc..b6f5f7cffe 100644 --- a/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java +++ b/src/integrationtest/org/apache/poi/stress/XSLFFileHandler.java @@ -88,12 +88,14 @@ public class XSLFFileHandler extends AbstractFileHandler { } } } + + ppt.close(); } // a test-case to test this locally without executing the full TestAllFiles @Test public void test() throws Exception { - InputStream stream = new FileInputStream("test-data/slideshow/pptx2svg.pptx"); + InputStream stream = new FileInputStream("test-data/slideshow/SampleShow.pptx"); try { handleFile(stream); } finally { diff --git a/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java b/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java index 3d276ce64a..adbb966a84 100644 --- a/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java +++ b/src/scratchpad/testcases/org/apache/poi/hsmf/extractor/TestOutlookTextExtractor.java @@ -60,6 +60,8 @@ public final class TestOutlookTextExtractor extends POITestCase { String dateText = f.format(cal.getTime()); assertContains(text, "Date: " + dateText + "\n"); assertContains(text, "The quick brown fox jumps over the lazy dog"); + + ext.close(); } public void testSimple() throws Exception { @@ -77,21 +79,28 @@ public final class TestOutlookTextExtractor extends POITestCase { assertContains(text, "Subject: test message\n"); assertContains(text, "Date: Fri, 6 Jul 2007 05:27:17 +0000\n"); assertContains(text, "This is a test message."); + + ext.close(); } public void testConstructors() throws Exception { - String inp = (new OutlookTextExtactor(new FileInputStream( - samples.getFile("simple_test_msg.msg") - )).getText()); - String poifs = (new OutlookTextExtactor(new POIFSFileSystem(new FileInputStream( - samples.getFile("simple_test_msg.msg") - ))).getText()); - String mapi = (new OutlookTextExtactor(new MAPIMessage(new FileInputStream( - samples.getFile("simple_test_msg.msg") - ))).getText()); - - assertEquals(inp, poifs); - assertEquals(inp, mapi); + OutlookTextExtactor ext = new OutlookTextExtactor(new FileInputStream( + samples.getFile("simple_test_msg.msg"))); + String inp = ext.getText(); + ext.close(); + + ext = new OutlookTextExtactor(new POIFSFileSystem(new FileInputStream( + samples.getFile("simple_test_msg.msg")))); + String poifs = ext.getText(); + ext.close(); + + ext = new OutlookTextExtactor(new MAPIMessage(new FileInputStream( + samples.getFile("simple_test_msg.msg")))); + String mapi = ext.getText(); + ext.close(); + + assertEquals(inp, poifs); + assertEquals(inp, mapi); } /** @@ -128,6 +137,8 @@ public final class TestOutlookTextExtractor extends POITestCase { assertContains(text, "Subject: This is a test message please ignore\n"); assertContains(text, "Date:"); assertContains(text, "The quick brown fox jumps over the lazy dog"); + + ext.close(); } } @@ -164,6 +175,8 @@ public final class TestOutlookTextExtractor extends POITestCase { assertContains(text, "Subject: This is a test message please ignore\n"); assertContains(text, "Date: Mon, 11 Jan 2010 16:2"); // Exact times differ slightly assertContains(text, "The quick brown fox jumps over the lazy dog"); + + ext.close(); } } @@ -192,6 +205,8 @@ public final class TestOutlookTextExtractor extends POITestCase { // Embeded bits are checked in // TestExtractorFactory + + ext.close(); } public void testEncodings() throws Exception { @@ -209,5 +224,7 @@ public final class TestOutlookTextExtractor extends POITestCase { // And check some chinese bits assertContains(text, "(\u5f35\u6bd3\u502b)"); assertContains(text, "( MSG \u683c\u5f0f\u6e2c\u8a66 )"); + + ext.close(); } } diff --git a/src/testcases/org/apache/poi/hssf/dev/BaseXLSIteratingTest.java b/src/testcases/org/apache/poi/hssf/dev/BaseXLSIteratingTest.java index 521b9ad679..0adb2cf984 100644 --- a/src/testcases/org/apache/poi/hssf/dev/BaseXLSIteratingTest.java +++ b/src/testcases/org/apache/poi/hssf/dev/BaseXLSIteratingTest.java @@ -79,11 +79,11 @@ public abstract class BaseXLSIteratingTest { try { runOneFile(dir, file, failed); } catch (Exception e) { - System.out.println("Failed: " + file); if(SILENT_EXCLUDED.contains(file)) { continue; } + System.out.println("Failed: " + file); e.printStackTrace(); // try to read it in HSSFWorkbook to quickly fail if we cannot read the file there at all and thus probably can use SILENT_EXCLUDED instead diff --git a/src/testcases/org/apache/poi/hssf/usermodel/TestBugs.java b/src/testcases/org/apache/poi/hssf/usermodel/TestBugs.java index 3fd83b4cf5..5223bb5b06 100644 --- a/src/testcases/org/apache/poi/hssf/usermodel/TestBugs.java +++ b/src/testcases/org/apache/poi/hssf/usermodel/TestBugs.java @@ -32,7 +32,6 @@ import java.io.FileNotFoundException; import java.io.FileOutputStream; import java.io.IOException; import java.io.InputStream; -import java.io.OutputStream; import java.util.ArrayList; import java.util.Arrays; import java.util.Calendar; -- cgit v1.2.3