Implement close() where necessary so resources are closed. Add close() to tests and run existing unit tests also against the Extractor that is built via the Factory. Also add a small test-suite to quickly execute all extractor-related tests. git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1493669 13f79535-47bb-0310-9956-ffa450edef68tags/3.10-beta1
@@ -16,6 +16,9 @@ | |||
==================================================================== */ | |||
package org.apache.poi; | |||
import java.io.Closeable; | |||
import java.io.IOException; | |||
/** | |||
* Common Parent for Text Extractors | |||
* of POI Documents. | |||
@@ -27,7 +30,7 @@ package org.apache.poi; | |||
* @see org.apache.poi.hdgf.extractor.VisioTextExtractor | |||
* @see org.apache.poi.hwpf.extractor.WordExtractor | |||
*/ | |||
public abstract class POITextExtractor { | |||
public abstract class POITextExtractor implements Closeable { | |||
/** The POIDocument that's open */ | |||
protected POIDocument document; | |||
@@ -61,4 +64,15 @@ public abstract class POITextExtractor { | |||
* metadata / properties, such as author and title. | |||
*/ | |||
public abstract POITextExtractor getMetadataTextExtractor(); | |||
/** | |||
* Allows to free resources of the Extractor as soon as | |||
* it is not needed any more. This may include closing | |||
* open file handles and freeing memory. | |||
* | |||
* The Extractor cannot be used after close has been called. | |||
*/ | |||
public void close() throws IOException { | |||
// nothing to do in abstract class, derived classes may perform actions. | |||
} | |||
} |
@@ -17,6 +17,8 @@ | |||
package org.apache.poi; | |||
import java.io.IOException; | |||
import org.apache.poi.POIXMLProperties.CoreProperties; | |||
import org.apache.poi.POIXMLProperties.CustomProperties; | |||
import org.apache.poi.POIXMLProperties.ExtendedProperties; | |||
@@ -75,4 +77,16 @@ public abstract class POIXMLTextExtractor extends POITextExtractor { | |||
public POIXMLPropertiesTextExtractor getMetadataTextExtractor() { | |||
return new POIXMLPropertiesTextExtractor(_document); | |||
} | |||
@Override | |||
public void close() throws IOException { | |||
// e.g. XSSFEventBaseExcelExtractor passes a null-document | |||
if(_document != null) { | |||
OPCPackage pkg = _document.getPackage(); | |||
if(pkg != null) { | |||
pkg.close(); | |||
} | |||
} | |||
super.close(); | |||
} | |||
} |
@@ -191,6 +191,15 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor { | |||
} | |||
} | |||
@Override | |||
public void close() throws IOException { | |||
if (container != null) { | |||
container.close(); | |||
container = null; | |||
} | |||
super.close(); | |||
} | |||
protected class SheetTextExtractor implements SheetContentsHandler { | |||
private final StringBuffer output; | |||
private boolean firstCellOfRow = true; |
@@ -46,6 +46,9 @@ public final class TestXMLPropertiesTextExtractor extends TestCase { | |||
assertTrue(text.contains("LastModifiedBy = Yury Batrakov")); | |||
assertTrue(cText.contains("LastModifiedBy = Yury Batrakov")); | |||
textExt.close(); | |||
ext.close(); | |||
} | |||
public void testCore() throws Exception { | |||
@@ -63,6 +66,8 @@ public final class TestXMLPropertiesTextExtractor extends TestCase { | |||
assertTrue(text.contains("LastModifiedBy = Yury Batrakov")); | |||
assertTrue(cText.contains("LastModifiedBy = Yury Batrakov")); | |||
ext.close(); | |||
} | |||
public void testExtended() throws Exception { | |||
@@ -82,6 +87,8 @@ public final class TestXMLPropertiesTextExtractor extends TestCase { | |||
assertTrue(text.contains("Company = Mera")); | |||
assertTrue(eText.contains("Application = Microsoft Excel")); | |||
assertTrue(eText.contains("Company = Mera")); | |||
ext.close(); | |||
} | |||
public void testCustom() throws Exception { | |||
@@ -99,6 +106,8 @@ public final class TestXMLPropertiesTextExtractor extends TestCase { | |||
assertTrue(text.contains("description = another value")); | |||
assertTrue(cText.contains("description = another value")); | |||
ext.close(); | |||
} | |||
/** | |||
@@ -118,5 +127,7 @@ public final class TestXMLPropertiesTextExtractor extends TestCase { | |||
assertFalse(text.contains("Created =")); // With date is null | |||
assertTrue(text.contains("CreatedString = ")); // Via string is blank | |||
assertTrue(text.contains("LastModifiedBy = IT Client Services")); | |||
ext.close(); | |||
} | |||
} |
@@ -43,8 +43,8 @@ public class TestXSLFPowerPointExtractor extends TestCase { | |||
* Get text out of the simple file | |||
*/ | |||
public void testGetSimpleText() throws Exception { | |||
new XSLFPowerPointExtractor(xmlA); | |||
new XSLFPowerPointExtractor(pkg); | |||
new XSLFPowerPointExtractor(xmlA).close(); | |||
new XSLFPowerPointExtractor(pkg).close(); | |||
XSLFPowerPointExtractor extractor = | |||
new XSLFPowerPointExtractor(xmlA); | |||
@@ -148,6 +148,8 @@ public class TestXSLFPowerPointExtractor extends TestCase { | |||
assertEquals( | |||
"\n\n\n\n", text | |||
); | |||
extractor.close(); | |||
} | |||
public void testGetComments() throws Exception { | |||
@@ -165,6 +167,8 @@ public class TestXSLFPowerPointExtractor extends TestCase { | |||
// Check the authors came through too | |||
assertTrue("Unable to find expected word in text\n" + text, text.contains("XPVMWARE01")); | |||
extractor.close(); | |||
} | |||
public void testGetMasterText() throws Exception { | |||
@@ -206,6 +210,8 @@ public class TestXSLFPowerPointExtractor extends TestCase { | |||
"This text comes from the Master Slide\n" | |||
, text | |||
); | |||
extractor.close(); | |||
} | |||
public void testTable() throws Exception { | |||
@@ -219,6 +225,8 @@ public class TestXSLFPowerPointExtractor extends TestCase { | |||
// Check comments are there | |||
assertTrue("Unable to find expected word in text\n" + text, text.contains("TEST")); | |||
extractor.close(); | |||
} | |||
/** | |||
@@ -267,6 +275,8 @@ public class TestXSLFPowerPointExtractor extends TestCase { | |||
"Text missing for " + filename + "\n" + text, | |||
text.contains("Mystery") | |||
); | |||
extractor.close(); | |||
} | |||
} | |||
} |
@@ -30,12 +30,10 @@ import org.apache.poi.xssf.XSSFTestDataSamples; | |||
/** | |||
* Tests for {@link XSSFEventBasedExcelExtractor} | |||
*/ | |||
public final class TestXSSFEventBasedExcelExtractor extends TestCase { | |||
private static final XSSFEventBasedExcelExtractor getExtractor(String sampleName) throws Exception { | |||
return new XSSFEventBasedExcelExtractor(XSSFTestDataSamples. | |||
openSamplePackage(sampleName)); | |||
public class TestXSSFEventBasedExcelExtractor extends TestCase { | |||
protected XSSFEventBasedExcelExtractor getExtractor(String sampleName) throws Exception { | |||
return new XSSFEventBasedExcelExtractor(XSSFTestDataSamples. | |||
openSamplePackage(sampleName)); | |||
} | |||
/** | |||
@@ -97,6 +95,8 @@ public final class TestXSSFEventBasedExcelExtractor extends TestCase { | |||
CHUNK2 + | |||
"Sheet3\n" | |||
, text); | |||
extractor.close(); | |||
} | |||
public void testGetComplexText() throws Exception { | |||
@@ -112,6 +112,8 @@ public final class TestXSSFEventBasedExcelExtractor extends TestCase { | |||
"Avgtxfull\n" + | |||
"(iii) AVERAGE TAX RATES ON ANNUAL" | |||
)); | |||
extractor.close(); | |||
} | |||
public void testInlineStrings() throws Exception { | |||
@@ -134,6 +136,8 @@ public final class TestXSSFEventBasedExcelExtractor extends TestCase { | |||
// Formulas | |||
assertTrue("Unable to find expected word in text\n" + text, text.contains("A2")); | |||
assertTrue("Unable to find expected word in text\n" + text, text.contains("A5-A$2")); | |||
extractor.close(); | |||
} | |||
/** | |||
@@ -159,5 +163,8 @@ public final class TestXSSFEventBasedExcelExtractor extends TestCase { | |||
Matcher m = pattern.matcher(text); | |||
assertTrue(m.matches()); | |||
} | |||
ole2Extractor.close(); | |||
ooxmlExtractor.close(); | |||
} | |||
} |
@@ -0,0 +1,29 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.xssf.extractor; | |||
import org.apache.poi.extractor.ExtractorFactory; | |||
import org.apache.poi.hssf.HSSFTestDataSamples; | |||
public class TestXSSFEventBasedExcelExtractorUsingFactory extends TestXSSFEventBasedExcelExtractor { | |||
@Override | |||
protected final XSSFEventBasedExcelExtractor getExtractor(String sampleName) throws Exception { | |||
ExtractorFactory.setAllThreadsPreferEventExtractors(true); | |||
return (XSSFEventBasedExcelExtractor) ExtractorFactory.createExtractor(HSSFTestDataSamples.openSampleFileStream(sampleName)); | |||
} | |||
} |
@@ -17,6 +17,7 @@ | |||
package org.apache.poi.xssf.extractor; | |||
import java.io.IOException; | |||
import java.util.regex.Matcher; | |||
import java.util.regex.Pattern; | |||
@@ -30,17 +31,16 @@ import org.apache.poi.xssf.XSSFTestDataSamples; | |||
/** | |||
* Tests for {@link XSSFExcelExtractor} | |||
*/ | |||
public final class TestXSSFExcelExtractor extends TestCase { | |||
private static final XSSFExcelExtractor getExtractor(String sampleName) { | |||
public class TestXSSFExcelExtractor extends TestCase { | |||
protected XSSFExcelExtractor getExtractor(String sampleName) { | |||
return new XSSFExcelExtractor(XSSFTestDataSamples.openSampleWorkbook(sampleName)); | |||
} | |||
/** | |||
* Get text out of the simple file | |||
* @throws IOException | |||
*/ | |||
public void testGetSimpleText() { | |||
public void testGetSimpleText() throws IOException { | |||
// a very simple file | |||
XSSFExcelExtractor extractor = getExtractor("sample.xlsx"); | |||
extractor.getText(); | |||
@@ -96,9 +96,11 @@ public final class TestXSSFExcelExtractor extends TestCase { | |||
CHUNK2 + | |||
"Sheet3\n" | |||
, text); | |||
extractor.close(); | |||
} | |||
public void testGetComplexText() { | |||
public void testGetComplexText() throws IOException { | |||
// A fairly complex file | |||
XSSFExcelExtractor extractor = getExtractor("AverageTaxRates.xlsx"); | |||
extractor.getText(); | |||
@@ -112,14 +114,17 @@ public final class TestXSSFExcelExtractor extends TestCase { | |||
"Avgtxfull\n" + | |||
"null\t(iii) AVERAGE TAX RATES ON ANNUAL" | |||
)); | |||
extractor.close(); | |||
} | |||
/** | |||
* Test that we return pretty much the same as | |||
* ExcelExtractor does, when we're both passed | |||
* the same file, just saved as xls and xlsx | |||
* @throws IOException | |||
*/ | |||
public void testComparedToOLE2() { | |||
public void testComparedToOLE2() throws IOException { | |||
// A fairly simple file - ooxml | |||
XSSFExcelExtractor ooxmlExtractor = getExtractor("SampleSS.xlsx"); | |||
@@ -137,12 +142,16 @@ public final class TestXSSFExcelExtractor extends TestCase { | |||
Matcher m = pattern.matcher(text); | |||
assertTrue(m.matches()); | |||
} | |||
ole2Extractor.close(); | |||
ooxmlExtractor.close(); | |||
} | |||
/** | |||
* From bug #45540 | |||
* @throws IOException | |||
*/ | |||
public void testHeaderFooter() { | |||
public void testHeaderFooter() throws IOException { | |||
String[] files = new String[] { | |||
"45540_classic_Header.xlsx", "45540_form_Header.xlsx", | |||
"45540_classic_Footer.xlsx", "45540_form_Footer.xlsx", | |||
@@ -152,15 +161,17 @@ public final class TestXSSFExcelExtractor extends TestCase { | |||
String text = extractor.getText(); | |||
assertTrue("Unable to find expected word in text from " + sampleName + "\n" + text, text.contains("testdoc")); | |||
assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase")); | |||
assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase")); | |||
extractor.close(); | |||
} | |||
} | |||
/** | |||
* From bug #45544 | |||
* @throws IOException | |||
*/ | |||
public void testComments() { | |||
public void testComments() throws IOException { | |||
XSSFExcelExtractor extractor = getExtractor("45544.xlsx"); | |||
String text = extractor.getText(); | |||
@@ -173,9 +184,11 @@ public final class TestXSSFExcelExtractor extends TestCase { | |||
text = extractor.getText(); | |||
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc")); | |||
assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase")); | |||
extractor.close(); | |||
} | |||
public void testInlineStrings() { | |||
public void testInlineStrings() throws IOException { | |||
XSSFExcelExtractor extractor = getExtractor("InlineStrings.xlsx"); | |||
extractor.setFormulasNotResults(true); | |||
String text = extractor.getText(); | |||
@@ -195,5 +208,7 @@ public final class TestXSSFExcelExtractor extends TestCase { | |||
// Formulas | |||
assertTrue("Unable to find expected word in text\n" + text, text.contains("A2")); | |||
assertTrue("Unable to find expected word in text\n" + text, text.contains("A5-A$2")); | |||
extractor.close(); | |||
} | |||
} |
@@ -0,0 +1,37 @@ | |||
/* ==================================================================== | |||
Licensed to the Apache Software Foundation (ASF) under one or more | |||
contributor license agreements. See the NOTICE file distributed with | |||
this work for additional information regarding copyright ownership. | |||
The ASF licenses this file to You under the Apache License, Version 2.0 | |||
(the "License"); you may not use this file except in compliance with | |||
the License. You may obtain a copy of the License at | |||
http://www.apache.org/licenses/LICENSE-2.0 | |||
Unless required by applicable law or agreed to in writing, software | |||
distributed under the License is distributed on an "AS IS" BASIS, | |||
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | |||
See the License for the specific language governing permissions and | |||
limitations under the License. | |||
==================================================================== */ | |||
package org.apache.poi.xssf.extractor; | |||
import org.apache.poi.extractor.ExtractorFactory; | |||
import org.apache.poi.hssf.HSSFTestDataSamples; | |||
/** | |||
* Tests for {@link XSSFExcelExtractor} | |||
*/ | |||
public final class TestXSSFExcelExtractorUsingFactory extends TestXSSFExcelExtractor { | |||
@Override | |||
protected final XSSFExcelExtractor getExtractor(String sampleName) { | |||
ExtractorFactory.setAllThreadsPreferEventExtractors(false); | |||
ExtractorFactory.setThreadPrefersEventExtractors(false); | |||
try { | |||
return (XSSFExcelExtractor) ExtractorFactory.createExtractor(HSSFTestDataSamples.openSampleFileStream(sampleName)); | |||
} catch (Exception e) { | |||
throw new RuntimeException(e); | |||
} | |||
} | |||
} |
@@ -57,6 +57,8 @@ public class TestXWPFWordExtractor extends TestCase { | |||
} | |||
} | |||
assertEquals(3, ps); | |||
extractor.close(); | |||
} | |||
/** | |||
@@ -93,6 +95,8 @@ public class TestXWPFWordExtractor extends TestCase { | |||
} | |||
} | |||
assertEquals(134, ps); | |||
extractor.close(); | |||
} | |||
public void testGetWithHyperlinks() throws IOException { | |||
@@ -118,6 +122,8 @@ public class TestXWPFWordExtractor extends TestCase { | |||
"We have a hyperlink <http://poi.apache.org/> here, and another.\n", | |||
extractor.getText() | |||
); | |||
extractor.close(); | |||
} | |||
public void testHeadersFooters() throws IOException { | |||
@@ -141,7 +147,11 @@ public class TestXWPFWordExtractor extends TestCase { | |||
// Now another file, expect multiple headers | |||
// and multiple footers | |||
doc = XWPFTestDataSamples.openSampleDocument("DiffFirstPageHeadFoot.docx"); | |||
extractor.close(); | |||
extractor = new XWPFWordExtractor(doc); | |||
extractor.close(); | |||
extractor = | |||
new XWPFWordExtractor(doc); | |||
extractor.getText(); | |||
@@ -161,6 +171,8 @@ public class TestXWPFWordExtractor extends TestCase { | |||
"Footer Left\tFooter Middle\tFooter Right\n", | |||
extractor.getText() | |||
); | |||
extractor.close(); | |||
} | |||
public void testFootnotes() throws IOException { | |||
@@ -169,6 +181,8 @@ public class TestXWPFWordExtractor extends TestCase { | |||
String text = extractor.getText(); | |||
assertTrue(text.contains("snoska")); | |||
assertTrue(text.contains("Eto ochen prostoy[footnoteRef:1] text so snoskoy")); | |||
extractor.close(); | |||
} | |||
@@ -177,6 +191,8 @@ public class TestXWPFWordExtractor extends TestCase { | |||
XWPFWordExtractor extractor = new XWPFWordExtractor(doc); | |||
assertTrue(extractor.getText().contains("snoska")); | |||
extractor.close(); | |||
} | |||
public void testFormFootnotes() throws IOException { | |||
@@ -186,6 +202,8 @@ public class TestXWPFWordExtractor extends TestCase { | |||
String text = extractor.getText(); | |||
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc")); | |||
assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase")); | |||
extractor.close(); | |||
} | |||
public void testEndnotes() throws IOException { | |||
@@ -194,6 +212,8 @@ public class TestXWPFWordExtractor extends TestCase { | |||
String text = extractor.getText(); | |||
assertTrue(text.contains("XXX")); | |||
assertTrue(text.contains("tilaka [endnoteRef:2]or 'tika'")); | |||
extractor.close(); | |||
} | |||
public void testInsertedDeletedText() throws IOException { | |||
@@ -202,6 +222,8 @@ public class TestXWPFWordExtractor extends TestCase { | |||
assertTrue(extractor.getText().contains("pendant worn")); | |||
assertTrue(extractor.getText().contains("extremely well")); | |||
extractor.close(); | |||
} | |||
public void testParagraphHeader() throws IOException { | |||
@@ -211,6 +233,8 @@ public class TestXWPFWordExtractor extends TestCase { | |||
assertTrue(extractor.getText().contains("Section 1")); | |||
assertTrue(extractor.getText().contains("Section 2")); | |||
assertTrue(extractor.getText().contains("Section 3")); | |||
extractor.close(); | |||
} | |||
/** | |||
@@ -225,6 +249,8 @@ public class TestXWPFWordExtractor extends TestCase { | |||
assertTrue(extractor.getText().contains("2004")); | |||
assertTrue(extractor.getText().contains("2008")); | |||
assertTrue(extractor.getText().contains("(120 ")); | |||
extractor.close(); | |||
} | |||
/** | |||
@@ -244,6 +270,8 @@ public class TestXWPFWordExtractor extends TestCase { | |||
// Now check the first paragraph in total | |||
assertTrue(extractor.getText().contains("a\tb\n")); | |||
extractor.close(); | |||
} | |||
/** | |||
@@ -258,6 +286,8 @@ public class TestXWPFWordExtractor extends TestCase { | |||
assertTrue(text.length() > 0); | |||
assertFalse(text.contains("AUTHOR")); | |||
assertFalse(text.contains("CREATEDATE")); | |||
extractor.close(); | |||
} | |||
/** | |||
@@ -271,6 +301,8 @@ public class TestXWPFWordExtractor extends TestCase { | |||
String text = extractor.getText(); | |||
assertTrue(text.length() > 0); | |||
assertTrue(text.contains("FldSimple.docx")); | |||
extractor.close(); | |||
} | |||
/** | |||
@@ -282,5 +314,7 @@ public class TestXWPFWordExtractor extends TestCase { | |||
XWPFWordExtractor extractor = new XWPFWordExtractor(doc); | |||
String text = extractor.getText(); | |||
assertTrue(text.length() > 0); | |||
extractor.close(); | |||
} | |||
} |
@@ -239,7 +239,7 @@ public final class TestExcelExtractor extends TestCase { | |||
) > -1 | |||
); | |||
assertTrue( | |||
assertTrue("Had: " + text + ", but should contain 'nn.nn\\t10.52\\n'", | |||
text.indexOf( | |||
"nn.nn\t10.52\n" | |||
) > -1 |