* Also handle a XLSX which does not have row-numbers in the sheet-xml. Excel can read it so it makes sense to also allow to read it in the XSSFSheetXMLHandler * Remove some Eclipse warnings in test-code git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1662691 13f79535-47bb-0310-9956-ffa450edef68tags/REL_3_12_FINAL
@@ -16,15 +16,23 @@ | |||
==================================================================== */ | |||
package org.apache.poi.stress; | |||
import static org.junit.Assert.assertEquals; | |||
import static org.junit.Assert.assertFalse; | |||
import static org.junit.Assert.assertNotNull; | |||
import java.io.File; | |||
import java.io.FileInputStream; | |||
import java.io.FileNotFoundException; | |||
import java.io.IOException; | |||
import java.io.InputStream; | |||
import java.util.HashSet; | |||
import java.util.Set; | |||
import org.apache.poi.POITextExtractor; | |||
import org.apache.poi.extractor.ExtractorFactory; | |||
import org.apache.poi.openxml4j.exceptions.InvalidFormatException; | |||
import org.apache.poi.openxml4j.exceptions.OpenXML4JException; | |||
import org.apache.xmlbeans.XmlException; | |||
public abstract class AbstractFileHandler implements FileHandler { | |||
public static final Set<String> EXPECTED_EXTRACTOR_FAILURES = new HashSet<String>(); | |||
@@ -48,6 +56,22 @@ public abstract class AbstractFileHandler implements FileHandler { | |||
} | |||
public void handleExtracting(File file) throws Exception { | |||
boolean before = ExtractorFactory.getThreadPrefersEventExtractors(); | |||
try { | |||
ExtractorFactory.setThreadPrefersEventExtractors(true); | |||
handleExtractingInternal(file); | |||
ExtractorFactory.setThreadPrefersEventExtractors(false); | |||
handleExtractingInternal(file); | |||
} finally { | |||
ExtractorFactory.setThreadPrefersEventExtractors(before); | |||
} | |||
} | |||
private void handleExtractingInternal(File file) throws Exception { | |||
long length = file.length(); | |||
long modified = file.lastModified(); | |||
POITextExtractor extractor = ExtractorFactory.createExtractor(file); | |||
try { | |||
assertNotNull(extractor); | |||
@@ -60,6 +84,11 @@ public abstract class AbstractFileHandler implements FileHandler { | |||
assertFalse("Expected Extraction to fail for file " + file + " and handler " + this + ", but did not fail!", | |||
EXPECTED_EXTRACTOR_FAILURES.contains(file)); | |||
assertEquals("File should not be modified by extractor", length, file.length()); | |||
assertEquals("File should not be modified by extractor", modified, file.lastModified()); | |||
handleExtractingAsStream(file); | |||
} catch (IllegalArgumentException e) { | |||
if(!EXPECTED_EXTRACTOR_FAILURES.contains(file)) { | |||
throw new Exception("While handling " + file, e); | |||
@@ -68,4 +97,22 @@ public abstract class AbstractFileHandler implements FileHandler { | |||
extractor.close(); | |||
} | |||
} | |||
private void handleExtractingAsStream(File file) throws FileNotFoundException, | |||
IOException, InvalidFormatException, OpenXML4JException, | |||
XmlException { | |||
InputStream stream = new FileInputStream(file); | |||
try { | |||
POITextExtractor streamExtractor = ExtractorFactory.createExtractor(stream); | |||
try { | |||
assertNotNull(streamExtractor); | |||
assertNotNull(streamExtractor.getText()); | |||
} finally { | |||
streamExtractor.close(); | |||
} | |||
} finally { | |||
stream.close(); | |||
} | |||
} | |||
} |
@@ -18,6 +18,7 @@ package org.apache.poi.stress; | |||
import static org.junit.Assert.assertNotNull; | |||
import java.io.File; | |||
import java.io.FileInputStream; | |||
import java.io.InputStream; | |||
@@ -43,4 +44,10 @@ public class HPSFFileHandler extends AbstractFileHandler { | |||
stream.close(); | |||
} | |||
} | |||
// a test-case to test this locally without executing the full TestAllFiles | |||
@Test | |||
public void testExtractor() throws Exception { | |||
handleExtracting(new File("test-data/hpsf/TestBug44375.xls")); | |||
} | |||
} |
@@ -17,6 +17,7 @@ | |||
package org.apache.poi.stress; | |||
import java.io.ByteArrayOutputStream; | |||
import java.io.File; | |||
import java.io.FileInputStream; | |||
import java.io.InputStream; | |||
@@ -71,4 +72,10 @@ public class XSSFFileHandler extends SpreadsheetHandler { | |||
stream.close(); | |||
} | |||
} | |||
// a test-case to test this locally without executing the full TestAllFiles | |||
@Test | |||
public void testExtractor() throws Exception { | |||
handleExtracting(new File("test-data/spreadsheet/56278.xlsx")); | |||
} | |||
} |
@@ -57,6 +57,10 @@ public class HPSFPropertiesExtractor extends POITextExtractor { | |||
} | |||
public String getDocumentSummaryInformationText() { | |||
if(document == null) { // event based extractor does not have a document | |||
return ""; | |||
} | |||
DocumentSummaryInformation dsi = document.getDocumentSummaryInformation(); | |||
StringBuffer text = new StringBuffer(); | |||
@@ -78,6 +82,10 @@ public class HPSFPropertiesExtractor extends POITextExtractor { | |||
return text.toString(); | |||
} | |||
public String getSummaryInformationText() { | |||
if(document == null) { // event based extractor does not have a document | |||
return ""; | |||
} | |||
SummaryInformation si = document.getSummaryInformation(); | |||
// Just normal properties |
@@ -19,6 +19,7 @@ package org.apache.poi.hssf.eventusermodel; | |||
import java.io.InputStream; | |||
import java.io.IOException; | |||
import java.util.Set; | |||
import org.apache.poi.hssf.eventusermodel.HSSFUserException; | |||
import org.apache.poi.hssf.record.*; | |||
@@ -56,11 +57,24 @@ public class HSSFEventFactory { | |||
* @param req an Instance of HSSFRequest which has your registered listeners | |||
* @param dir a DirectoryNode containing your workbook | |||
*/ | |||
public void processWorkbookEvents(HSSFRequest req, DirectoryNode dir) throws IOException { | |||
InputStream in = dir.createDocumentInputStream("Workbook"); | |||
processEvents(req, in); | |||
} | |||
public void processWorkbookEvents(HSSFRequest req, DirectoryNode dir) throws IOException { | |||
// some old documents have "WORKBOOK" or "BOOK" | |||
final String name; | |||
Set<String> entryNames = dir.getEntryNames(); | |||
if (entryNames.contains("Workbook")) { | |||
name = "Workbook"; | |||
} else if (entryNames.contains("WORKBOOK")) { | |||
name = "WORKBOOK"; | |||
} else if (entryNames.contains("BOOK")) { | |||
name = "BOOK"; | |||
} else { | |||
name = "Workbook"; | |||
} | |||
InputStream in = dir.createDocumentInputStream(name); | |||
processEvents(req, in); | |||
} | |||
/** | |||
* Processes a file into essentially record events. |
@@ -67,9 +67,14 @@ public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor { | |||
* Returns the core document properties, eg author | |||
*/ | |||
public String getCorePropertiesText() { | |||
POIXMLDocument document = getDocument(); | |||
if(document == null) { // event based extractor does not have a document | |||
return ""; | |||
} | |||
StringBuffer text = new StringBuffer(); | |||
PackagePropertiesPart props = | |||
getDocument().getProperties().getCoreProperties().getUnderlyingProperties(); | |||
PackagePropertiesPart props = | |||
document.getProperties().getCoreProperties().getUnderlyingProperties(); | |||
appendIfPresent(text, "Category", props.getCategoryProperty().getValue()); | |||
appendIfPresent(text, "Category", props.getCategoryProperty().getValue()); | |||
@@ -99,9 +104,14 @@ public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor { | |||
* application | |||
*/ | |||
public String getExtendedPropertiesText() { | |||
POIXMLDocument document = getDocument(); | |||
if(document == null) { // event based extractor does not have a document | |||
return ""; | |||
} | |||
StringBuffer text = new StringBuffer(); | |||
org.openxmlformats.schemas.officeDocument.x2006.extendedProperties.CTProperties | |||
props = getDocument().getProperties().getExtendedProperties().getUnderlyingProperties(); | |||
props = document.getProperties().getExtendedProperties().getUnderlyingProperties(); | |||
appendIfPresent(text, "Application", props.getApplication()); | |||
appendIfPresent(text, "AppVersion", props.getAppVersion()); | |||
@@ -127,9 +137,14 @@ public class POIXMLPropertiesTextExtractor extends POIXMLTextExtractor { | |||
*/ | |||
@SuppressWarnings("deprecation") | |||
public String getCustomPropertiesText() { | |||
POIXMLDocument document = getDocument(); | |||
if(document == null) { // event based extractor does not have a document | |||
return ""; | |||
} | |||
StringBuilder text = new StringBuilder(); | |||
org.openxmlformats.schemas.officeDocument.x2006.customProperties.CTProperties | |||
props = getDocument().getProperties().getCustomProperties().getUnderlyingProperties(); | |||
props = document.getProperties().getCustomProperties().getUnderlyingProperties(); | |||
for(CTProperty property : props.getPropertyArray()) { | |||
String val = "(not implemented!)"; |
@@ -265,10 +265,10 @@ public class ExtractorFactory { | |||
/** | |||
* Returns an array of text extractors, one for each of | |||
* the embeded documents in the file (if there are any). | |||
* If there are no embeded documents, you'll get back an | |||
* the embedded documents in the file (if there are any). | |||
* If there are no embedded documents, you'll get back an | |||
* empty array. Otherwise, you'll get one open | |||
* {@link POITextExtractor} for each embeded file. | |||
* {@link POITextExtractor} for each embedded file. | |||
*/ | |||
public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, InvalidFormatException, OpenXML4JException, XmlException { | |||
// All the embded directories we spotted |
@@ -96,6 +96,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler { | |||
private String formatString; | |||
private final DataFormatter formatter; | |||
private int rowNum; | |||
private int nextRowNum; // some sheets do not have rowNums, Excel can read them so we should try to handle them correctly as well | |||
private String cellRef; | |||
private boolean formulasNotResults; | |||
@@ -240,7 +241,12 @@ public class XSSFSheetXMLHandler extends DefaultHandler { | |||
headerFooter.setLength(0); | |||
} | |||
else if("row".equals(name)) { | |||
rowNum = Integer.parseInt(attributes.getValue("r")) - 1; | |||
String rowNumStr = attributes.getValue("r"); | |||
if(rowNumStr != null) { | |||
rowNum = Integer.parseInt(rowNumStr) - 1; | |||
} else { | |||
rowNum = nextRowNum; | |||
} | |||
output.startRow(rowNum); | |||
} | |||
// c => cell | |||
@@ -343,7 +349,7 @@ public class XSSFSheetXMLHandler extends DefaultHandler { | |||
case NUMBER: | |||
String n = value.toString(); | |||
if (this.formatString != null) | |||
if (this.formatString != null && n.length() > 0) | |||
thisStr = formatter.formatRawCellContents(Double.parseDouble(n), this.formatIndex, this.formatString); | |||
else | |||
thisStr = n; | |||
@@ -370,6 +376,9 @@ public class XSSFSheetXMLHandler extends DefaultHandler { | |||
// Finish up the row | |||
output.endRow(rowNum); | |||
// some sheets do not have rowNum set in the XML, Excel can read them so we should try to read them as well | |||
nextRowNum = rowNum + 1; | |||
} else if ("sheetData".equals(name)) { | |||
// Handle any "missing" cells which had comments attached | |||
checkForEmptyCellComments(EmptyCellCommentsCheckType.END_OF_SHEET_DATA); |
@@ -23,6 +23,7 @@ import java.util.regex.Pattern; | |||
import junit.framework.TestCase; | |||
import org.apache.poi.POITextExtractor; | |||
import org.apache.poi.POIXMLTextExtractor; | |||
import org.apache.poi.hssf.HSSFTestDataSamples; | |||
import org.apache.poi.hssf.extractor.ExcelExtractor; | |||
import org.apache.poi.xssf.XSSFTestDataSamples; | |||
@@ -155,7 +156,6 @@ public class TestXSSFEventBasedExcelExtractor extends TestCase { | |||
POITextExtractor[] extractors = | |||
new POITextExtractor[] { ooxmlExtractor, ole2Extractor }; | |||
for (int i = 0; i < extractors.length; i++) { | |||
@SuppressWarnings("resource") | |||
POITextExtractor extractor = extractors[i]; | |||
String text = extractor.getText().replaceAll("[\r\t]", ""); | |||
@@ -316,4 +316,25 @@ public class TestXSSFEventBasedExcelExtractor extends TestCase { | |||
fixture.close(); | |||
} | |||
} | |||
public void testFile56278_normal() throws Exception { | |||
// first with normal Text Extractor | |||
POIXMLTextExtractor extractor = new XSSFExcelExtractor( | |||
XSSFTestDataSamples.openSampleWorkbook("56278.xlsx")); | |||
try { | |||
assertNotNull(extractor.getText()); | |||
} finally { | |||
extractor.close(); | |||
} | |||
} | |||
public void testFile56278_event() throws Exception { | |||
// then with event based one | |||
POIXMLTextExtractor extractor = getExtractor("56278.xlsx"); | |||
try { | |||
assertNotNull(extractor.getText()); | |||
} finally { | |||
extractor.close(); | |||
} | |||
} | |||
} |
@@ -22,10 +22,12 @@ import java.io.IOException; | |||
import junit.framework.TestCase; | |||
import org.apache.poi.POIDataSamples; | |||
import org.apache.poi.POITextExtractor; | |||
import org.apache.poi.hpsf.Thumbnail; | |||
import org.apache.poi.hssf.HSSFTestDataSamples; | |||
import org.apache.poi.hssf.extractor.ExcelExtractor; | |||
import org.apache.poi.hssf.usermodel.HSSFWorkbook; | |||
import org.apache.poi.hwpf.extractor.Word6Extractor; | |||
import org.apache.poi.poifs.filesystem.POIFSFileSystem; | |||
public final class TestHPSFPropertiesExtractor extends TestCase { | |||
@@ -34,45 +36,53 @@ public final class TestHPSFPropertiesExtractor extends TestCase { | |||
public void testNormalProperties() throws Exception { | |||
POIFSFileSystem fs = new POIFSFileSystem(_samples.openResourceAsStream("TestMickey.doc")); | |||
HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs); | |||
ext.getText(); | |||
// Check each bit in turn | |||
String sinfText = ext.getSummaryInformationText(); | |||
String dinfText = ext.getDocumentSummaryInformationText(); | |||
assertTrue(sinfText.indexOf("TEMPLATE = Normal") > -1); | |||
assertTrue(sinfText.indexOf("SUBJECT = sample subject") > -1); | |||
assertTrue(dinfText.indexOf("MANAGER = sample manager") > -1); | |||
assertTrue(dinfText.indexOf("COMPANY = sample company") > -1); | |||
// Now overall | |||
String text = ext.getText(); | |||
assertTrue(text.indexOf("TEMPLATE = Normal") > -1); | |||
assertTrue(text.indexOf("SUBJECT = sample subject") > -1); | |||
assertTrue(text.indexOf("MANAGER = sample manager") > -1); | |||
assertTrue(text.indexOf("COMPANY = sample company") > -1); | |||
try { | |||
ext.getText(); | |||
// Check each bit in turn | |||
String sinfText = ext.getSummaryInformationText(); | |||
String dinfText = ext.getDocumentSummaryInformationText(); | |||
assertTrue(sinfText.indexOf("TEMPLATE = Normal") > -1); | |||
assertTrue(sinfText.indexOf("SUBJECT = sample subject") > -1); | |||
assertTrue(dinfText.indexOf("MANAGER = sample manager") > -1); | |||
assertTrue(dinfText.indexOf("COMPANY = sample company") > -1); | |||
// Now overall | |||
String text = ext.getText(); | |||
assertTrue(text.indexOf("TEMPLATE = Normal") > -1); | |||
assertTrue(text.indexOf("SUBJECT = sample subject") > -1); | |||
assertTrue(text.indexOf("MANAGER = sample manager") > -1); | |||
assertTrue(text.indexOf("COMPANY = sample company") > -1); | |||
} finally { | |||
ext.close(); | |||
} | |||
} | |||
public void testNormalUnicodeProperties() throws Exception { | |||
POIFSFileSystem fs = new POIFSFileSystem(_samples.openResourceAsStream("TestUnicode.xls")); | |||
HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs); | |||
ext.getText(); | |||
// Check each bit in turn | |||
String sinfText = ext.getSummaryInformationText(); | |||
String dinfText = ext.getDocumentSummaryInformationText(); | |||
assertTrue(sinfText.indexOf("AUTHOR = marshall") > -1); | |||
assertTrue(sinfText.indexOf("TITLE = Titel: \u00c4h") > -1); | |||
assertTrue(dinfText.indexOf("COMPANY = Schreiner") > -1); | |||
assertTrue(dinfText.indexOf("SCALE = false") > -1); | |||
// Now overall | |||
String text = ext.getText(); | |||
assertTrue(text.indexOf("AUTHOR = marshall") > -1); | |||
assertTrue(text.indexOf("TITLE = Titel: \u00c4h") > -1); | |||
assertTrue(text.indexOf("COMPANY = Schreiner") > -1); | |||
assertTrue(text.indexOf("SCALE = false") > -1); | |||
try { | |||
ext.getText(); | |||
// Check each bit in turn | |||
String sinfText = ext.getSummaryInformationText(); | |||
String dinfText = ext.getDocumentSummaryInformationText(); | |||
assertTrue(sinfText.indexOf("AUTHOR = marshall") > -1); | |||
assertTrue(sinfText.indexOf("TITLE = Titel: \u00c4h") > -1); | |||
assertTrue(dinfText.indexOf("COMPANY = Schreiner") > -1); | |||
assertTrue(dinfText.indexOf("SCALE = false") > -1); | |||
// Now overall | |||
String text = ext.getText(); | |||
assertTrue(text.indexOf("AUTHOR = marshall") > -1); | |||
assertTrue(text.indexOf("TITLE = Titel: \u00c4h") > -1); | |||
assertTrue(text.indexOf("COMPANY = Schreiner") > -1); | |||
assertTrue(text.indexOf("SCALE = false") > -1); | |||
} finally { | |||
ext.close(); | |||
} | |||
} | |||
public void testCustomProperties() throws Exception { | |||
@@ -80,18 +90,21 @@ public final class TestHPSFPropertiesExtractor extends TestCase { | |||
_samples.openResourceAsStream("TestMickey.doc") | |||
); | |||
HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(fs); | |||
// Custom properties are part of the document info stream | |||
String dinfText = ext.getDocumentSummaryInformationText(); | |||
assertTrue(dinfText.indexOf("Client = sample client") > -1); | |||
assertTrue(dinfText.indexOf("Division = sample division") > -1); | |||
String text = ext.getText(); | |||
assertTrue(text.indexOf("Client = sample client") > -1); | |||
assertTrue(text.indexOf("Division = sample division") > -1); | |||
try { | |||
// Custom properties are part of the document info stream | |||
String dinfText = ext.getDocumentSummaryInformationText(); | |||
assertTrue(dinfText.indexOf("Client = sample client") > -1); | |||
assertTrue(dinfText.indexOf("Division = sample division") > -1); | |||
String text = ext.getText(); | |||
assertTrue(text.indexOf("Client = sample client") > -1); | |||
assertTrue(text.indexOf("Division = sample division") > -1); | |||
} finally { | |||
ext.close(); | |||
} | |||
} | |||
public void testConstructors() { | |||
public void testConstructors() throws IOException { | |||
POIFSFileSystem fs; | |||
HSSFWorkbook wb; | |||
try { | |||
@@ -102,9 +115,29 @@ public final class TestHPSFPropertiesExtractor extends TestCase { | |||
} | |||
ExcelExtractor excelExt = new ExcelExtractor(wb); | |||
String fsText = (new HPSFPropertiesExtractor(fs)).getText(); | |||
String hwText = (new HPSFPropertiesExtractor(wb)).getText(); | |||
String eeText = (new HPSFPropertiesExtractor(excelExt)).getText(); | |||
final String fsText; | |||
HPSFPropertiesExtractor fsExt = new HPSFPropertiesExtractor(fs); | |||
try { | |||
fsText = fsExt.getText(); | |||
} finally { | |||
fsExt.close(); | |||
} | |||
final String hwText; | |||
HPSFPropertiesExtractor hwExt = new HPSFPropertiesExtractor(wb); | |||
try { | |||
hwText = hwExt.getText(); | |||
} finally { | |||
hwExt.close(); | |||
} | |||
final String eeText; | |||
HPSFPropertiesExtractor eeExt = new HPSFPropertiesExtractor(excelExt); | |||
try { | |||
eeText = eeExt.getText(); | |||
} finally { | |||
eeExt.close(); | |||
} | |||
assertEquals(fsText, hwText); | |||
assertEquals(fsText, eeText); | |||
@@ -113,13 +146,17 @@ public final class TestHPSFPropertiesExtractor extends TestCase { | |||
assertTrue(fsText.indexOf("TITLE = Titel: \u00c4h") > -1); | |||
} | |||
public void test42726() { | |||
HPSFPropertiesExtractor ex = new HPSFPropertiesExtractor(HSSFTestDataSamples.openSampleWorkbook("42726.xls")); | |||
String txt = ex.getText(); | |||
assertTrue(txt.indexOf("PID_AUTHOR") != -1); | |||
assertTrue(txt.indexOf("PID_EDITTIME") != -1); | |||
assertTrue(txt.indexOf("PID_REVNUMBER") != -1); | |||
assertTrue(txt.indexOf("PID_THUMBNAIL") != -1); | |||
public void test42726() throws IOException { | |||
HPSFPropertiesExtractor ext = new HPSFPropertiesExtractor(HSSFTestDataSamples.openSampleWorkbook("42726.xls")); | |||
try { | |||
String txt = ext.getText(); | |||
assertTrue(txt.indexOf("PID_AUTHOR") != -1); | |||
assertTrue(txt.indexOf("PID_EDITTIME") != -1); | |||
assertTrue(txt.indexOf("PID_REVNUMBER") != -1); | |||
assertTrue(txt.indexOf("PID_THUMBNAIL") != -1); | |||
} finally { | |||
ext.close(); | |||
} | |||
} | |||
public void testThumbnail() throws Exception { | |||
@@ -131,4 +168,24 @@ public final class TestHPSFPropertiesExtractor extends TestCase { | |||
assertNotNull(thumbnail.getThumbnailAsWMF()); | |||
wb.close(); | |||
} | |||
public void testExtractorFromWord6Extractor() throws Exception { | |||
POIFSFileSystem fs = new POIFSFileSystem(_samples.openResourceAsStream("TestMickey.doc")); | |||
Word6Extractor wExt = new Word6Extractor(fs); | |||
try { | |||
POITextExtractor ext = wExt.getMetadataTextExtractor(); | |||
try { | |||
// Now overall | |||
String text = ext.getText(); | |||
assertTrue(text.indexOf("TEMPLATE = Normal") > -1); | |||
assertTrue(text.indexOf("SUBJECT = sample subject") > -1); | |||
assertTrue(text.indexOf("MANAGER = sample manager") > -1); | |||
assertTrue(text.indexOf("COMPANY = sample company") > -1); | |||
} finally { | |||
ext.close(); | |||
} | |||
} finally { | |||
wExt.close(); | |||
} | |||
} | |||
} |
@@ -107,8 +107,6 @@ public final class TestHSSFEventFactory extends TestCase { | |||
POIFSFileSystem fs = new POIFSFileSystem(openSample("42844.xls")); | |||
HSSFEventFactory factory = new HSSFEventFactory(); | |||
factory.processWorkbookEvents(req, fs); | |||
assertTrue("no errors while processing the file", true); | |||
} | |||
private static class MockHSSFListener implements HSSFListener { | |||
@@ -125,4 +123,18 @@ public final class TestHSSFEventFactory extends TestCase { | |||
records.add(record); | |||
} | |||
} | |||
public void testWithDifferentWorkbookName() throws Exception { | |||
HSSFRequest req = new HSSFRequest(); | |||
MockHSSFListener mockListen = new MockHSSFListener(); | |||
req.addListenerForAllRecords(mockListen); | |||
POIFSFileSystem fs = new POIFSFileSystem(openSample("BOOK_in_capitals.xls")); | |||
HSSFEventFactory factory = new HSSFEventFactory(); | |||
factory.processWorkbookEvents(req, fs); | |||
fs = new POIFSFileSystem(openSample("WORKBOOK_in_capitals.xls")); | |||
factory = new HSSFEventFactory(); | |||
factory.processWorkbookEvents(req, fs); | |||
} | |||
} |
@@ -46,15 +46,18 @@ public final class TestExcelExtractor extends TestCase { | |||
} | |||
public void testSimple() { | |||
public void testSimple() throws IOException { | |||
ExcelExtractor extractor = createExtractor("Simple.xls"); | |||
assertEquals("Sheet1\nreplaceMe\nSheet2\nSheet3\n", extractor.getText()); | |||
// Now turn off sheet names | |||
extractor.setIncludeSheetNames(false); | |||
assertEquals("replaceMe\n", extractor.getText()); | |||
try { | |||
assertEquals("Sheet1\nreplaceMe\nSheet2\nSheet3\n", extractor.getText()); | |||
// Now turn off sheet names | |||
extractor.setIncludeSheetNames(false); | |||
assertEquals("replaceMe\n", extractor.getText()); | |||
} finally { | |||
extractor.close(); | |||
} | |||
} | |||
public void testNumericFormula() { | |||
@@ -126,45 +129,47 @@ public final class TestExcelExtractor extends TestCase { | |||
public void testEventExtractor() throws Exception { | |||
EventBasedExcelExtractor extractor; | |||
// First up, a simple file with string | |||
// based formulas in it | |||
extractor = new EventBasedExcelExtractor( | |||
EventBasedExcelExtractor extractor = new EventBasedExcelExtractor( | |||
new POIFSFileSystem( | |||
HSSFTestDataSamples.openSampleFileStream("SimpleWithFormula.xls") | |||
) | |||
); | |||
extractor.setIncludeSheetNames(true); | |||
String text = extractor.getText(); | |||
assertEquals("Sheet1\nreplaceme\nreplaceme\nreplacemereplaceme\nSheet2\nSheet3\n", text); | |||
extractor.setIncludeSheetNames(false); | |||
extractor.setFormulasNotResults(true); | |||
text = extractor.getText(); | |||
assertEquals("replaceme\nreplaceme\nCONCATENATE(A1,A2)\n", text); | |||
// Now, a slightly longer file with numeric formulas | |||
extractor = new EventBasedExcelExtractor( | |||
new POIFSFileSystem( | |||
HSSFTestDataSamples.openSampleFileStream("sumifformula.xls") | |||
) | |||
); | |||
extractor.setIncludeSheetNames(false); | |||
extractor.setFormulasNotResults(true); | |||
text = extractor.getText(); | |||
assertEquals( | |||
"1000\t1\tSUMIF(A1:A5,\">4000\",B1:B5)\n" + | |||
"2000\t2\n" + | |||
"3000\t3\n" + | |||
"4000\t4\n" + | |||
"5000\t5\n", | |||
text | |||
); | |||
try { | |||
extractor.setIncludeSheetNames(true); | |||
String text = extractor.getText(); | |||
assertEquals("Sheet1\nreplaceme\nreplaceme\nreplacemereplaceme\nSheet2\nSheet3\n", text); | |||
extractor.setIncludeSheetNames(false); | |||
extractor.setFormulasNotResults(true); | |||
text = extractor.getText(); | |||
assertEquals("replaceme\nreplaceme\nCONCATENATE(A1,A2)\n", text); | |||
// Now, a slightly longer file with numeric formulas | |||
extractor = new EventBasedExcelExtractor( | |||
new POIFSFileSystem( | |||
HSSFTestDataSamples.openSampleFileStream("sumifformula.xls") | |||
) | |||
); | |||
extractor.setIncludeSheetNames(false); | |||
extractor.setFormulasNotResults(true); | |||
text = extractor.getText(); | |||
assertEquals( | |||
"1000\t1\tSUMIF(A1:A5,\">4000\",B1:B5)\n" + | |||
"2000\t2\n" + | |||
"3000\t3\n" + | |||
"4000\t4\n" + | |||
"5000\t5\n", | |||
text | |||
); | |||
} finally { | |||
extractor.close(); | |||
} | |||
} | |||
public void testWithComments() { | |||
@@ -272,15 +277,22 @@ public final class TestExcelExtractor extends TestCase { | |||
HSSFWorkbook wbB = new HSSFWorkbook(dirB, fs, true); | |||
ExcelExtractor exA = new ExcelExtractor(wbA); | |||
ExcelExtractor exB = new ExcelExtractor(wbB); | |||
assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n", | |||
exA.getText()); | |||
assertEquals("Sample Excel", exA.getSummaryInformation().getTitle()); | |||
assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n", | |||
exB.getText()); | |||
assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle()); | |||
try { | |||
ExcelExtractor exB = new ExcelExtractor(wbB); | |||
try { | |||
assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n", | |||
exA.getText()); | |||
assertEquals("Sample Excel", exA.getSummaryInformation().getTitle()); | |||
assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n", | |||
exB.getText()); | |||
assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle()); | |||
} finally { | |||
exB.close(); | |||
} | |||
} finally { | |||
exA.close(); | |||
} | |||
} | |||
/** | |||
@@ -299,21 +311,32 @@ public final class TestExcelExtractor extends TestCase { | |||
HSSFWorkbook wbB = new HSSFWorkbook(dirB, fs, true); | |||
ExcelExtractor exA = new ExcelExtractor(wbA); | |||
ExcelExtractor exB = new ExcelExtractor(wbB); | |||
assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n", | |||
exA.getText()); | |||
assertEquals("Sample Excel", exA.getSummaryInformation().getTitle()); | |||
assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n", | |||
exB.getText()); | |||
assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle()); | |||
// And the base file too | |||
ExcelExtractor ex = new ExcelExtractor(fs); | |||
assertEquals("Sheet1\nI have lots of embeded files in me\nSheet2\nSheet3\n", | |||
ex.getText()); | |||
assertEquals("Excel With Embeded", ex.getSummaryInformation().getTitle()); | |||
try { | |||
ExcelExtractor exB = new ExcelExtractor(wbB); | |||
try { | |||
assertEquals("Sheet1\nTest excel file\nThis is the first file\nSheet2\nSheet3\n", | |||
exA.getText()); | |||
assertEquals("Sample Excel", exA.getSummaryInformation().getTitle()); | |||
assertEquals("Sheet1\nAnother excel file\nThis is the second file\nSheet2\nSheet3\n", | |||
exB.getText()); | |||
assertEquals("Sample Excel 2", exB.getSummaryInformation().getTitle()); | |||
// And the base file too | |||
ExcelExtractor ex = new ExcelExtractor(fs); | |||
try { | |||
assertEquals("Sheet1\nI have lots of embeded files in me\nSheet2\nSheet3\n", | |||
ex.getText()); | |||
assertEquals("Excel With Embeded", ex.getSummaryInformation().getTitle()); | |||
} finally { | |||
ex.close(); | |||
} | |||
} finally { | |||
exB.close(); | |||
} | |||
} finally { | |||
exA.close(); | |||
} | |||
} | |||
/** |