aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDominik Stadler <centic@apache.org>2015-02-27 14:58:41 +0000
committerDominik Stadler <centic@apache.org>2015-02-27 14:58:41 +0000
commita3e087268a6ff9d5b90d3d334593d56693e400b8 (patch)
tree8f46d441d3321425accdcf5a5a99f598544957f1
parent27c6da8286197840c0dbe909abb767a25fb5a28e (diff)
downloadpoi-a3e087268a6ff9d5b90d3d334593d56693e400b8.tar.gz
poi-a3e087268a6ff9d5b90d3d334593d56693e400b8.zip
* Verify some more Text-Extraction features as part of integration tests, fix some NullPointerExceptions that showed up now because the event-based extraction does not have a Document available
* Also handle a XLSX which does not have row-numbers in the sheet-xml. Excel can read it so it makes sense to also allow to read it in the XSSFSheetXMLHandler * Remove some Eclipse warnings in test-code git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1662691 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java47
-rw-r--r--src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java7
-rw-r--r--src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java7
-rw-r--r--src/java/org/apache/poi/hpsf/extractor/HPSFPropertiesExtractor.java8
-rw-r--r--src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java24
-rw-r--r--src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java23
-rw-r--r--src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java6
-rw-r--r--src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java13
-rw-r--r--src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java23
-rw-r--r--src/testcases/org/apache/poi/hpsf/extractor/TestHPSFPropertiesExtractor.java165
-rw-r--r--src/testcases/org/apache/poi/hssf/eventusermodel/TestHSSFEventFactory.java16
-rw-r--r--src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java151
12 files changed, 355 insertions, 135 deletions
diff --git a/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java b/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java
index d7e303e4a8..8a27e6d0e9 100644
--- a/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java
+++ b/src/integrationtest/org/apache/poi/stress/AbstractFileHandler.java
@@ -16,15 +16,23 @@
==================================================================== */
package org.apache.poi.stress;
+import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertNotNull;
import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileNotFoundException;
+import java.io.IOException;
+import java.io.InputStream;
import java.util.HashSet;
import java.util.Set;
import org.apache.poi.POITextExtractor;
import org.apache.poi.extractor.ExtractorFactory;
+import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
+import org.apache.poi.openxml4j.exceptions.OpenXML4JException;
+import org.apache.xmlbeans.XmlException;
public abstract class AbstractFileHandler implements FileHandler {
public static final Set<String> EXPECTED_EXTRACTOR_FAILURES = new HashSet<String>();
@@ -48,6 +56,22 @@ public abstract class AbstractFileHandler implements FileHandler {
}
public void handleExtracting(File file) throws Exception {
+ boolean before = ExtractorFactory.getThreadPrefersEventExtractors();
+ try {
+ ExtractorFactory.setThreadPrefersEventExtractors(true);
+ handleExtractingInternal(file);
+
+ ExtractorFactory.setThreadPrefersEventExtractors(false);
+ handleExtractingInternal(file);
+ } finally {
+ ExtractorFactory.setThreadPrefersEventExtractors(before);
+ }
+ }
+
+ private void handleExtractingInternal(File file) throws Exception {
+ long length = file.length();
+ long modified = file.lastModified();
+
POITextExtractor extractor = ExtractorFactory.createExtractor(file);
try {
assertNotNull(extractor);
@@ -60,6 +84,11 @@ public abstract class AbstractFileHandler implements FileHandler {
assertFalse("Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!",
EXPECTED_EXTRACTOR_FAILURES.contains(file));
+
+ assertEquals("File should not be modified by extractor", length, file.length());
+ assertEquals("File should not be modified by extractor", modified, file.lastModified());
+
+ handleExtractingAsStream(file);
} catch (IllegalArgumentException e) {
if(!EXPECTED_EXTRACTOR_FAILURES.contains(file)) {
throw new Exception("While handling " + file, e);
@@ -68,4 +97,22 @@ public abstract class AbstractFileHandler implements FileHandler {
extractor.close();
}
}
+
+ private void handleExtractingAsStream(File file) throws FileNotFoundException,
+ IOException, InvalidFormatException, OpenXML4JException,
+ XmlException {
+ InputStream stream = new FileInputStream(file);
+ try {
+ POITextExtractor streamExtractor = ExtractorFactory.createExtractor(stream);
+ try {
+ assertNotNull(streamExtractor);
+
+ assertNotNull(streamExtractor.getText());
+ } finally {
+ streamExtractor.close();
+ }
+ } finally {
+ stream.close();
+ }
+ }
}
diff --git a/src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java b/src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java
index 477ee859cb..6a53b2e009 100644
--- a/src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java
+++ b/src/integrationtest/org/apache/poi/stress/HPSFFileHandler.java
@@ -18,6 +18,7 @@ package org.apache.poi.stress;
import static org.junit.Assert.assertNotNull;
+import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
@@ -43,4 +44,10 @@ public class HPSFFileHandler extends AbstractFileHandler {
stream.close();
}
}
+
+ // a test-case to test this locally without executing the full TestAllFiles
+ @Test
+ public void testExtractor() throws Exception {
+ handleExtracting(new File("test-data/hpsf/TestBug44375.xls"));
+ }
}
diff --git a/src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java b/src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java
index 54a386ea00..a268ed4658 100644
--- a/src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java
+++ b/src/integrationtest/org/apache/poi/stress/XSSFFileHandler.java
@@ -17,6 +17,7 @@
package org.apache.poi.stress;
import java.io.ByteArrayOutputStream;
+import java.io.File;
import java.io.FileInputStream;
import java.io.InputStream;
@@ -71,4 +72,10 @@ public class XSSFFileHandler extends SpreadsheetHandler {
stream.close();
}
}
+
+ // a test-case to test this locally without executing the full TestAllFiles
+ @Test
+ public void testExtractor() throws Exception {
+ handleExtracting(new File("test-data/spreadsheet/56278.xlsx"));
+ }
} \ No newline at end of file
diff --git a/src/java/org/apache/poi/hpsf/extractor/HPSFPropertiesExtractor.java b/src/java/org/apache/poi/hpsf/extractor/HPSFPropertiesExtractor.java
index ce5301ac60..1a0db03897 100644
--- a/src/java/org/apache/poi/hpsf/extractor/HPSFPropertiesExtractor.java
+++ b/src/java/org/apache/poi/hpsf/extractor/HPSFPropertiesExtractor.java
@@ -57,6 +57,10 @@ public class HPSFPropertiesExtractor extends POITextExtractor {
}
public String getDocumentSummaryInformationText() {
+ if(document == null) { // event based extractor does not have a document
+ return "";
+ }
+
DocumentSummaryInformation dsi = document.getDocumentSummaryInformation();
StringBuffer text = new StringBuffer();
@@ -78,6 +82,10 @@ public class HPSFPropertiesExtractor extends POITextExtractor {
return text.toString();
}
public String getSummaryInformationText() {
+ if(document == null) { // event based extractor does not have a document
+ return "";
+ }
+
SummaryInformation si = document.getSummaryInformation();
// Just normal properties
diff --git a/src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java b/src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java
index 45ab8d8131..4d0b894e01 100644
--- a/src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java
+++ b/src/java/org/apache/poi/hssf/eventusermodel/HSSFEventFactory.java
@@ -19,6 +19,7 @@ package org.apache.poi.hssf.eventusermodel;
import java.io.InputStream;
import java.io.IOException;
+import java.util.Set;
import org.apache.poi.hssf.eventusermodel.HSSFUserException;
import org.apache.poi.hssf.record.*;
@@ -56,11 +57,24 @@ public class HSSFEventFactory {
* @param req an Instance of HSSFRequest which has your registered listeners
* @param dir a DirectoryNode containing your workbook
*/
- public void processWorkbookEvents(HSSFRequest req, DirectoryNode dir) throws IOException {
- InputStream in = dir.createDocumentInputStream("Workbook");
-
- processEvents(req, in);
- }
+ public void processWorkbookEvents(HSSFRequest req, DirectoryNode dir) throws IOException {
+ // some old documents have "WORKBOOK" or "BOOK"
+ final String name;
+ Set<String> entryNames = dir.getEntryNames();
+ if (entryNames.contains("Workbook")) {
+ name = "Workbook";
+ } else if (entryNames.contains("WORKBOOK")) {
+ name = "WORKBOOK";
+ } else if (entryNames.contains("BOOK")) {
+ name = "BOOK";
+ } else {
+ name = "Workbook";
+ }
+
+ InputStream in = dir.createDocumentInputStream(name);
+
+ processEvents(req, in);
+ }
/**
* Processes a file into essentially record events.
diff --git a/src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java b/src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java
index ce576439f2..8a35a34e4c 100644
--- a/src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java
+++ b/src/ooxml/java/org/apache/poi/POIXMLPropertiesTextExtractor.java
@@ -67,9 +67,14 @@ public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor {
* Returns the core document properties, eg author
*/
public String getCorePropertiesText() {
+ POIXMLDocument document = getDocument();
+ if(document == null) { // event based extractor does not have a document
+ return "";
+ }
+
StringBuffer text = new StringBuffer();
- PackagePropertiesPart props =
- getDocument().getProperties().getCoreProperties().getUnderlyingProperties();
+ PackagePropertiesPart props =
+ document.getProperties().getCoreProperties().getUnderlyingProperties();
appendIfPresent(text, "Category", props.getCategoryProperty().getValue());
appendIfPresent(text, "Category", props.getCategoryProperty().getValue());
@@ -99,9 +104,14 @@ public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor {
* application
*/
public String getExtendedPropertiesText() {
+ POIXMLDocument document = getDocument();
+ if(document == null) { // event based extractor does not have a document
+ return "";
+ }
+
StringBuffer text = new StringBuffer();
org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties
- props = getDocument().getProperties().getExtendedProperties().getUnderlyingProperties();
+ props = document.getProperties().getExtendedProperties().getUnderlyingProperties();
appendIfPresent(text, "Application", props.getApplication());
appendIfPresent(text, "AppVersion", props.getAppVersion());
@@ -127,9 +137,14 @@ public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor {
*/
@SuppressWarnings("deprecation")
public String getCustomPropertiesText() {
+ POIXMLDocument document = getDocument();
+ if(document == null) { // event based extractor does not have a document
+ return "";
+ }
+
StringBuilder text = new StringBuilder();
org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties
- props = getDocument().getProperties().getCustomProperties().getUnderlyingProperties();
+ props = document.getProperties().getCustomProperties().getUnderlyingProperties();
for(CTProperty property : props.getPropertyArray()) {
String val = "(not implemented!)";
diff --git a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
index a0b6b5db17..60a0f51810 100644
--- a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
+++ b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
@@ -265,10 +265,10 @@ public class ExtractorFactory {
/**
* Returns an array of text extractors, one for each of
- * the embeded documents in the file (if there are any).
- * If there are no embeded documents, you'll get back an
+ * the embedded documents in the file (if there are any).
+ * If there are no embedded documents, you'll get back an
* empty array. Otherwise, you'll get one open
- * {@link POITextExtractor} for each embeded file.
+ * {@link POITextExtractor} for each embedded file.
*/
public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
// All the embded directories we spotted
diff --git a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java
index 7677426876..227441859e 100644
--- a/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java
+++ b/src/ooxml/java/org/apache/poi/xssf/eventusermodel/XSSFSheetXMLHandler.java
@@ -96,6 +96,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
private String formatString;
private final DataFormatter formatter;
private int rowNum;
+ private int nextRowNum; // some sheets do not have rowNums, Excel can read them so we should try to handle them correctly as well
private String cellRef;
private boolean formulasNotResults;
@@ -240,7 +241,12 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
headerFooter.setLength(0);
}
else if("row".equals(name)) {
- rowNum = Integer.parseInt(attributes.getValue("r")) - 1;
+ String rowNumStr = attributes.getValue("r");
+ if(rowNumStr != null) {
+ rowNum = Integer.parseInt(rowNumStr) - 1;
+ } else {
+ rowNum = nextRowNum;
+ }
output.startRow(rowNum);
}
// c => cell
@@ -343,7 +349,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
case NUMBER:
String n = value.toString();
- if (this.formatString != null)
+ if (this.formatString != null && n.length() > 0)
thisStr = formatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex, this.formatString);
else
thisStr = n;
@@ -370,6 +376,9 @@ public class XSSFSheetXMLHandler extends DefaultHandler {
// Finish up the row
output.endRow(rowNum);
+
+ // some sheets do not have rowNum set in the XML, Excel can read them so we should try to read them as well
+ nextRowNum = rowNum + 1;
} else if ("sheetData".equals(name)) {
// Handle any "missing" cells which had comments attached
checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_SHEET_DATA);
diff --git a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java
index e48787be0c..b56b3791f0 100644
--- a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java
+++ b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java
@@ -23,6 +23,7 @@ import java.util.regex.Pattern;
import junit.framework.TestCase;
import org.apache.poi.POITextExtractor;
+import org.apache.poi.POIXMLTextExtractor;
import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.hssf.extractor.ExcelExtractor;
import org.apache.poi.xssf.XSSFTestDataSamples;
@@ -155,7 +156,6 @@ public class TestXSSFEventBasedExcelExtractor extends TestCase {
POITextExtractor[] extractors =
new POITextExtractor[] { ooxmlExtractor, ole2Extractor };
for (int i = 0; i < extractors.length; i++) {
- @SuppressWarnings("resource")
POITextExtractor extractor = extractors[i];
String text = extractor.getText().replaceAll("[\r\t]", "");
@@ -316,4 +316,25 @@ public class TestXSSFEventBasedExcelExtractor extends TestCase {
fixture.close();
}
}
+
+ public void testFile56278_normal() throws Exception {
+ // first with normal Text Extractor
+ POIXMLTextExtractor extractor = new XSSFExcelExtractor(
+ XSSFTestDataSamples.openSampleWorkbook("56278.xlsx"));
+ try {
+ assertNotNull(extractor.getText());
+ } finally {
+ extractor.close();
+ }
+ }
+
+ public void testFile56278_event() throws Exception {
+ // then with event based one
+ POIXMLTextExtractor extractor = getExtractor("56278.xlsx");
+ try {
+ assertNotNull(extractor.getText());
+ } finally {
+ extractor.close();
+ }
+ }
}
diff --git a/src/testcases/org/apache/poi/hpsf/extractor/TestHPSFPropertiesExtractor.java b/src/testcases/org/apache/poi/hpsf/extractor/TestHPSFPropertiesExtractor.java
index 22238d75f1..c6ad03db2d 100644
--- a/src/testcases/org/apache/poi/hpsf/extractor/TestHPSFPropertiesExtractor.java
+++ b/src/testcases/org/apache/poi/hpsf/extractor/TestHPSFPropertiesExtractor.java
@@ -22,10 +22,12 @@ import java.io.IOException;
import junit.framework.TestCase;
import org.apache.poi.POIDataSamples;
+import org.apache.poi.POITextExtractor;
import org.apache.poi.hpsf.Thumbnail;
import org.apache.poi.hssf.HSSFTestDataSamples;
import org.apache.poi.hssf.extractor.ExcelExtractor;
import org.apache.poi.hssf.usermodel.HSSFWorkbook;
+import org.apache.poi.hwpf.extractor.Word6Extractor;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
public final class TestHPSFPropertiesExtractor extends TestCase {
@@ -34,45 +36,53 @@ public final class TestHPSFPropertiesExtractor extends TestCase {
public void testNormalProperties() throws Exception {
POIFSFileSystem fs = new POIFSFileSystem(_samples.openResourceAsStream("TestMickey.doc"));
HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs);
- ext.getText();
-
- // Check each bit in turn
- String sinfText = ext.getSummaryInformationText();
- String dinfText = ext.getDocumentSummaryInformationText();
-
- assertTrue(sinfText.indexOf("TEMPLATE = Normal") > -1);
- assertTrue(sinfText.indexOf("SUBJECT = sample subject") > -1);
- assertTrue(dinfText.indexOf("MANAGER = sample manager") > -1);
- assertTrue(dinfText.indexOf("COMPANY = sample company") > -1);
-
- // Now overall
- String text = ext.getText();
- assertTrue(text.indexOf("TEMPLATE = Normal") > -1);
- assertTrue(text.indexOf("SUBJECT = sample subject") > -1);
- assertTrue(text.indexOf("MANAGER = sample manager") > -1);
- assertTrue(text.indexOf("COMPANY = sample company") > -1);
+ try {
+ ext.getText();
+
+ // Check each bit in turn
+ String sinfText = ext.getSummaryInformationText();
+ String dinfText = ext.getDocumentSummaryInformationText();
+
+ assertTrue(sinfText.indexOf("TEMPLATE = Normal") > -1);
+ assertTrue(sinfText.indexOf("SUBJECT = sample subject") > -1);
+ assertTrue(dinfText.indexOf("MANAGER = sample manager") > -1);
+ assertTrue(dinfText.indexOf("COMPANY = sample company") > -1);
+
+ // Now overall
+ String text = ext.getText();
+ assertTrue(text.indexOf("TEMPLATE = Normal") > -1);
+ assertTrue(text.indexOf("SUBJECT = sample subject") > -1);
+ assertTrue(text.indexOf("MANAGER = sample manager") > -1);
+ assertTrue(text.indexOf("COMPANY = sample company") > -1);
+ } finally {
+ ext.close();
+ }
}
public void testNormalUnicodeProperties() throws Exception {
POIFSFileSystem fs = new POIFSFileSystem(_samples.openResourceAsStream("TestUnicode.xls"));
HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs);
- ext.getText();
-
- // Check each bit in turn
- String sinfText = ext.getSummaryInformationText();
- String dinfText = ext.getDocumentSummaryInformationText();
-
- assertTrue(sinfText.indexOf("AUTHOR = marshall") > -1);
- assertTrue(sinfText.indexOf("TITLE = Titel: \u00c4h") > -1);
- assertTrue(dinfText.indexOf("COMPANY = Schreiner") > -1);
- assertTrue(dinfText.indexOf("SCALE = false") > -1);
-
- // Now overall
- String text = ext.getText();
- assertTrue(text.indexOf("AUTHOR = marshall") > -1);
- assertTrue(text.indexOf("TITLE = Titel: \u00c4h") > -1);
- assertTrue(text.indexOf("COMPANY = Schreiner") > -1);
- assertTrue(text.indexOf("SCALE = false") > -1);
+ try {
+ ext.getText();
+
+ // Check each bit in turn
+ String sinfText = ext.getSummaryInformationText();
+ String dinfText = ext.getDocumentSummaryInformationText();
+
+ assertTrue(sinfText.indexOf("AUTHOR = marshall") > -1);
+ assertTrue(sinfText.indexOf("TITLE = Titel: \u00c4h") > -1);
+ assertTrue(dinfText.indexOf("COMPANY = Schreiner") > -1);
+ assertTrue(dinfText.indexOf("SCALE = false") > -1);
+
+ // Now overall
+ String text = ext.getText();
+ assertTrue(text.indexOf("AUTHOR = marshall") > -1);
+ assertTrue(text.indexOf("TITLE = Titel: \u00c4h") > -1);
+ assertTrue(text.indexOf("COMPANY = Schreiner") > -1);
+ assertTrue(text.indexOf("SCALE = false") > -1);
+ } finally {
+ ext.close();
+ }
}
public void testCustomProperties() throws Exception {
@@ -80,18 +90,21 @@ public final class TestHPSFPropertiesExtractor extends TestCase {
_samples.openResourceAsStream("TestMickey.doc")
);
HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs);
-
- // Custom properties are part of the document info stream
- String dinfText = ext.getDocumentSummaryInformationText();
- assertTrue(dinfText.indexOf("Client = sample client") > -1);
- assertTrue(dinfText.indexOf("Division = sample division") > -1);
-
- String text = ext.getText();
- assertTrue(text.indexOf("Client = sample client") > -1);
- assertTrue(text.indexOf("Division = sample division") > -1);
+ try {
+ // Custom properties are part of the document info stream
+ String dinfText = ext.getDocumentSummaryInformationText();
+ assertTrue(dinfText.indexOf("Client = sample client") > -1);
+ assertTrue(dinfText.indexOf("Division = sample division") > -1);
+
+ String text = ext.getText();
+ assertTrue(text.indexOf("Client = sample client") > -1);
+ assertTrue(text.indexOf("Division = sample division") > -1);
+ } finally {
+ ext.close();
+ }
}
- public void testConstructors() {
+ public void testConstructors() throws IOException {
POIFSFileSystem fs;
HSSFWorkbook wb;
try {
@@ -102,9 +115,29 @@ public final class TestHPSFPropertiesExtractor extends TestCase {
}
ExcelExtractor excelExt = new ExcelExtractor(wb);
- String fsText = (new HPSFPropertiesExtractor(fs)).getText();
- String hwText = (new HPSFPropertiesExtractor(wb)).getText();
- String eeText = (new HPSFPropertiesExtractor(excelExt)).getText();
+ final String fsText;
+ HPSFPropertiesExtractor fsExt = new HPSFPropertiesExtractor(fs);
+ try {
+ fsText = fsExt.getText();
+ } finally {
+ fsExt.close();
+ }
+
+ final String hwText;
+ HPSFPropertiesExtractor hwExt = new HPSFPropertiesExtractor(wb);
+ try {
+ hwText = hwExt.getText();
+ } finally {
+ hwExt.close();
+ }
+
+ final String eeText;
+ HPSFPropertiesExtractor eeExt = new HPSFPropertiesExtractor(excelExt);
+ try {
+ eeText = eeExt.getText();
+ } finally {
+ eeExt.close();
+ }
assertEquals(fsText, hwText);
assertEquals(fsText, eeText);
@@ -113,13 +146,17 @@ public final class TestHPSFPropertiesExtractor extends TestCase {
assertTrue(fsText.indexOf("TITLE = Titel: \u00c4h") > -1);
}
- public void test42726() {
- HPSFPropertiesExtractor ex = new HPSFPropertiesExtractor(HSSFTestDataSamples.openSampleWorkbook("42726.xls"));
- String txt = ex.getText();
- assertTrue(txt.indexOf("PID_AUTHOR") != -1);
- assertTrue(txt.indexOf("PID_EDITTIME") != -1);
- assertTrue(txt.indexOf("PID_REVNUMBER") != -1);
- assertTrue(txt.indexOf("PID_THUMBNAIL") != -1);
+ public void test42726() throws IOException {
+ HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(HSSFTestDataSamples.openSampleWorkbook("42726.xls"));
+ try {
+ String txt = ext.getText();
+ assertTrue(txt.indexOf("PID_AUTHOR") != -1);
+ assertTrue(txt.indexOf("PID_EDITTIME") != -1);
+ assertTrue(txt.indexOf("PID_REVNUMBER") != -1);
+ assertTrue(txt.indexOf("PID_THUMBNAIL") != -1);
+ } finally {
+ ext.close();
+ }
}
public void testThumbnail() throws Exception {
@@ -131,4 +168,24 @@ public final class TestHPSFPropertiesExtractor extends TestCase {
assertNotNull(thumbnail.getThumbnailAsWMF());
wb.close();
}
+
+ public void testExtractorFromWord6Extractor() throws Exception {
+ POIFSFileSystem fs = new POIFSFileSystem(_samples.openResourceAsStream("TestMickey.doc"));
+ Word6Extractor wExt = new Word6Extractor(fs);
+ try {
+ POITextExtractor ext = wExt.getMetadataTextExtractor();
+ try {
+ // Now overall
+ String text = ext.getText();
+ assertTrue(text.indexOf("TEMPLATE = Normal") > -1);
+ assertTrue(text.indexOf("SUBJECT = sample subject") > -1);
+ assertTrue(text.indexOf("MANAGER = sample manager") > -1);
+ assertTrue(text.indexOf("COMPANY = sample company") > -1);
+ } finally {
+ ext.close();
+ }
+ } finally {
+ wExt.close();
+ }
+ }
}
diff --git a/src/testcases/org/apache/poi/hssf/eventusermodel/TestHSSFEventFactory.java b/src/testcases/org/apache/poi/hssf/eventusermodel/TestHSSFEventFactory.java
index 9766331085..ff76cfa19c 100644
--- a/src/testcases/org/apache/poi/hssf/eventusermodel/TestHSSFEventFactory.java
+++ b/src/testcases/org/apache/poi/hssf/eventusermodel/TestHSSFEventFactory.java
@@ -107,8 +107,6 @@ public final class TestHSSFEventFactory extends TestCase {
POIFSFileSystem fs = new POIFSFileSystem(openSample("42844.xls"));
HSSFEventFactory factory = new HSSFEventFactory();
factory.processWorkbookEvents(req, fs);
-
- assertTrue("no errors while processing the file", true);
}
private static class MockHSSFListener implements HSSFListener {
@@ -125,4 +123,18 @@ public final class TestHSSFEventFactory extends TestCase {
records.add(record);
}
}
+
+ public void testWithDifferentWorkbookName() throws Exception {
+ HSSFRequest req = new HSSFRequest();
+ MockHSSFListener mockListen = new MockHSSFListener();
+ req.addListenerForAllRecords(mockListen);
+
+ POIFSFileSystem fs = new POIFSFileSystem(openSample("BOOK_in_capitals.xls"));
+ HSSFEventFactory factory = new HSSFEventFactory();
+ factory.processWorkbookEvents(req, fs);
+
+ fs = new POIFSFileSystem(openSample("WORKBOOK_in_capitals.xls"));
+ factory = new HSSFEventFactory();
+ factory.processWorkbookEvents(req, fs);
+ }
}
diff --git a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java
index de82210291..f7584ff11b 100644
--- a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java
+++ b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java
@@ -46,15 +46,18 @@ public final class TestExcelExtractor extends TestCase {
}
- public void testSimple() {
-
+ public void testSimple() throws IOException {
ExcelExtractor extractor = createExtractor("Simple.xls");
- assertEquals("Sheet1\nreplaceMe\nSheet2\nSheet3\n", extractor.getText());
-
- // Now turn off sheet names
- extractor.setIncludeSheetNames(false);
- assertEquals("replaceMe\n", extractor.getText());
+ try {
+ assertEquals("Sheet1\nreplaceMe\nSheet2\nSheet3\n", extractor.getText());
+
+ // Now turn off sheet names
+ extractor.setIncludeSheetNames(false);
+ assertEquals("replaceMe\n", extractor.getText());
+ } finally {
+ extractor.close();
+ }
}
public void testNumericFormula() {
@@ -126,45 +129,47 @@ public final class TestExcelExtractor extends TestCase {
public void testEventExtractor() throws Exception {
- EventBasedExcelExtractor extractor;
-
// First up, a simple file with string
// based formulas in it
- extractor = new EventBasedExcelExtractor(
+ EventBasedExcelExtractor extractor = new EventBasedExcelExtractor(
new POIFSFileSystem(
HSSFTestDataSamples.openSampleFileStream("SimpleWithFormula.xls")
)
);
- extractor.setIncludeSheetNames(true);
-
- String text = extractor.getText();
- assertEquals("Sheet1\nreplaceme\nreplaceme\nreplacemereplaceme\nSheet2\nSheet3\n", text);
-
- extractor.setIncludeSheetNames(false);
- extractor.setFormulasNotResults(true);
-
- text = extractor.getText();
- assertEquals("replaceme\nreplaceme\nCONCATENATE(A1,A2)\n", text);
-
-
- // Now, a slightly longer file with numeric formulas
- extractor = new EventBasedExcelExtractor(
- new POIFSFileSystem(
- HSSFTestDataSamples.openSampleFileStream("sumifformula.xls")
- )
- );
- extractor.setIncludeSheetNames(false);
- extractor.setFormulasNotResults(true);
-
- text = extractor.getText();
- assertEquals(
- "1000\t1\tSUMIF(A1:A5,\">4000\",B1:B5)\n" +
- "2000\t2\n" +
- "3000\t3\n" +
- "4000\t4\n" +
- "5000\t5\n",
- text
- );
+ try {
+ extractor.setIncludeSheetNames(true);
+
+ String text = extractor.getText();
+ assertEquals("Sheet1\nreplaceme\nreplaceme\nreplacemereplaceme\nSheet2\nSheet3\n", text);
+
+ extractor.setIncludeSheetNames(false);
+ extractor.setFormulasNotResults(true);
+
+ text = extractor.getText();
+ assertEquals("replaceme\nreplaceme\nCONCATENATE(A1,A2)\n", text);
+
+
+ // Now, a slightly longer file with numeric formulas
+ extractor = new EventBasedExcelExtractor(
+ new POIFSFileSystem(
+ HSSFTestDataSamples.openSampleFileStream("sumifformula.xls")
+ )
+ );
+ extractor.setIncludeSheetNames(false);
+ extractor.setFormulasNotResults(true);
+
+ text = extractor.getText();
+ assertEquals(
+ "1000\t1\tSUMIF(A1:A5,\">4000\",B1:B5)\n" +
+ "2000\t2\n" +
+ "3000\t3\n" +
+ "4000\t4\n" +
+ "5000\t5\n",
+ text
+ );
+ } finally {
+ extractor.close();
+ }
}
public void testWithComments() {
@@ -272,15 +277,22 @@ public final class TestExcelExtractor extends TestCase {
HSSFWorkbook wbB = new HSSFWorkbook(dirB, fs, true);
ExcelExtractor exA = new ExcelExtractor(wbA);
- ExcelExtractor exB = new ExcelExtractor(wbB);
-
- assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n",
- exA.getText());
- assertEquals("Sample Excel", exA.getSummaryInformation().getTitle());
-
- assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n",
- exB.getText());
- assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle());
+ try {
+ ExcelExtractor exB = new ExcelExtractor(wbB);
+ try {
+ assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n",
+ exA.getText());
+ assertEquals("Sample Excel", exA.getSummaryInformation().getTitle());
+
+ assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n",
+ exB.getText());
+ assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle());
+ } finally {
+ exB.close();
+ }
+ } finally {
+ exA.close();
+ }
}
/**
@@ -299,21 +311,32 @@ public final class TestExcelExtractor extends TestCase {
HSSFWorkbook wbB = new HSSFWorkbook(dirB, fs, true);
ExcelExtractor exA = new ExcelExtractor(wbA);
- ExcelExtractor exB = new ExcelExtractor(wbB);
-
- assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n",
- exA.getText());
- assertEquals("Sample Excel", exA.getSummaryInformation().getTitle());
-
- assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n",
- exB.getText());
- assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle());
-
- // And the base file too
- ExcelExtractor ex = new ExcelExtractor(fs);
- assertEquals("Sheet1\nI have lots of embeded files in me\nSheet2\nSheet3\n",
- ex.getText());
- assertEquals("Excel With Embeded", ex.getSummaryInformation().getTitle());
+ try {
+ ExcelExtractor exB = new ExcelExtractor(wbB);
+ try {
+ assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n",
+ exA.getText());
+ assertEquals("Sample Excel", exA.getSummaryInformation().getTitle());
+
+ assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n",
+ exB.getText());
+ assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle());
+
+ // And the base file too
+ ExcelExtractor ex = new ExcelExtractor(fs);
+ try {
+ assertEquals("Sheet1\nI have lots of embeded files in me\nSheet2\nSheet3\n",
+ ex.getText());
+ assertEquals("Excel With Embeded", ex.getSummaryInformation().getTitle());
+ } finally {
+ ex.close();
+ }
+ } finally {
+ exB.close();
+ }
+ } finally {
+ exA.close();
+ }
}
/**