From: Dominik Stadler Date: Mon, 17 Jun 2013 07:53:59 +0000 (+0000) Subject: Bug 54982: Add a close() interface to POITextExtractor which can be used to free... X-Git-Tag: 3.10-beta1~19 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=fa1977747654220d2b1afc0077e61fa91e2e4587;p=poi.git Bug 54982: Add a close() interface to POITextExtractor which can be used to free resources later. Implement close() where necessary so resources are closed. Add close() to tests and run existing unit tests also against the Extractor that is built via the Factory. Also add a small test-suite to quickly execute all extractor-related tests. git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1493669 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/java/org/apache/poi/POITextExtractor.java b/src/java/org/apache/poi/POITextExtractor.java index 0b69894d08..e18078b461 100644 --- a/src/java/org/apache/poi/POITextExtractor.java +++ b/src/java/org/apache/poi/POITextExtractor.java @@ -16,6 +16,9 @@ ==================================================================== */ package org.apache.poi; +import java.io.Closeable; +import java.io.IOException; + /** * Common Parent for Text Extractors * of POI Documents. @@ -27,7 +30,7 @@ package org.apache.poi; * @see org.apache.poi.hdgf.extractor.VisioTextExtractor * @see org.apache.poi.hwpf.extractor.WordExtractor */ -public abstract class POITextExtractor { +public abstract class POITextExtractor implements Closeable { /** The POIDocument that's open */ protected POIDocument document; @@ -61,4 +64,15 @@ public abstract class POITextExtractor { * metadata / properties, such as author and title. */ public abstract POITextExtractor getMetadataTextExtractor(); + + /** + * Allows to free resources of the Extractor as soon as + * it is not needed any more. This may include closing + * open file handles and freeing memory. + * + * The Extractor cannot be used after close has been called. + */ + public void close() throws IOException { + // nothing to do in abstract class, derived classes may perform actions. + } } diff --git a/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java b/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java index eee1d25abd..eeb03f7aad 100644 --- a/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java +++ b/src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java @@ -17,6 +17,8 @@ package org.apache.poi; +import java.io.IOException; + import org.apache.poi.POIXMLProperties.CoreProperties; import org.apache.poi.POIXMLProperties.CustomProperties; import org.apache.poi.POIXMLProperties.ExtendedProperties; @@ -75,4 +77,16 @@ public abstract class POIXMLTextExtractor extends POITextExtractor { public POIXMLPropertiesTextExtractor getMetadataTextExtractor() { return new POIXMLPropertiesTextExtractor(_document); } + + @Override + public void close() throws IOException { + // e.g. XSSFEventBaseExcelExtractor passes a null-document + if(_document != null) { + OPCPackage pkg = _document.getPackage(); + if(pkg != null) { + pkg.close(); + } + } + super.close(); + } } diff --git a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java index efc42cff5f..0c31fe04d5 100644 --- a/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java +++ b/src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java @@ -191,6 +191,15 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor { } } + @Override + public void close() throws IOException { + if (container != null) { + container.close(); + container = null; + } + super.close(); + } + protected class SheetTextExtractor implements SheetContentsHandler { private final StringBuffer output; private boolean firstCellOfRow = true; diff --git a/src/ooxml/testcases/org/apache/poi/TestXMLPropertiesTextExtractor.java b/src/ooxml/testcases/org/apache/poi/TestXMLPropertiesTextExtractor.java index 8ad2f78c4b..0df6ccf816 100644 --- a/src/ooxml/testcases/org/apache/poi/TestXMLPropertiesTextExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/TestXMLPropertiesTextExtractor.java @@ -46,6 +46,9 @@ public final class TestXMLPropertiesTextExtractor extends TestCase { assertTrue(text.contains("LastModifiedBy = Yury Batrakov")); assertTrue(cText.contains("LastModifiedBy = Yury Batrakov")); + + textExt.close(); + ext.close(); } public void testCore() throws Exception { @@ -63,6 +66,8 @@ public final class TestXMLPropertiesTextExtractor extends TestCase { assertTrue(text.contains("LastModifiedBy = Yury Batrakov")); assertTrue(cText.contains("LastModifiedBy = Yury Batrakov")); + + ext.close(); } public void testExtended() throws Exception { @@ -82,6 +87,8 @@ public final class TestXMLPropertiesTextExtractor extends TestCase { assertTrue(text.contains("Company = Mera")); assertTrue(eText.contains("Application = Microsoft Excel")); assertTrue(eText.contains("Company = Mera")); + + ext.close(); } public void testCustom() throws Exception { @@ -99,6 +106,8 @@ public final class TestXMLPropertiesTextExtractor extends TestCase { assertTrue(text.contains("description = another value")); assertTrue(cText.contains("description = another value")); + + ext.close(); } /** @@ -118,5 +127,7 @@ public final class TestXMLPropertiesTextExtractor extends TestCase { assertFalse(text.contains("Created =")); // With date is null assertTrue(text.contains("CreatedString = ")); // Via string is blank assertTrue(text.contains("LastModifiedBy = IT Client Services")); + + ext.close(); } } diff --git a/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java b/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java index cb7ad736f1..35ee3f1cb4 100644 --- a/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java @@ -43,8 +43,8 @@ public class TestXSLFPowerPointExtractor extends TestCase { * Get text out of the simple file */ public void testGetSimpleText() throws Exception { - new XSLFPowerPointExtractor(xmlA); - new XSLFPowerPointExtractor(pkg); + new XSLFPowerPointExtractor(xmlA).close(); + new XSLFPowerPointExtractor(pkg).close(); XSLFPowerPointExtractor extractor = new XSLFPowerPointExtractor(xmlA); @@ -148,6 +148,8 @@ public class TestXSLFPowerPointExtractor extends TestCase { assertEquals( "\n\n\n\n", text ); + + extractor.close(); } public void testGetComments() throws Exception { @@ -165,6 +167,8 @@ public class TestXSLFPowerPointExtractor extends TestCase { // Check the authors came through too assertTrue("Unable to find expected word in text\n" + text, text.contains("XPVMWARE01")); + + extractor.close(); } public void testGetMasterText() throws Exception { @@ -206,6 +210,8 @@ public class TestXSLFPowerPointExtractor extends TestCase { "This text comes from the Master Slide\n" , text ); + + extractor.close(); } public void testTable() throws Exception { @@ -219,6 +225,8 @@ public class TestXSLFPowerPointExtractor extends TestCase { // Check comments are there assertTrue("Unable to find expected word in text\n" + text, text.contains("TEST")); + + extractor.close(); } /** @@ -267,6 +275,8 @@ public class TestXSLFPowerPointExtractor extends TestCase { "Text missing for " + filename + "\n" + text, text.contains("Mystery") ); + + extractor.close(); } } } diff --git a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java index eade64f2fe..eac3700e7d 100644 --- a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java @@ -30,12 +30,10 @@ import org.apache.poi.xssf.XSSFTestDataSamples; /** * Tests for {@link XSSFEventBasedExcelExtractor} */ -public final class TestXSSFEventBasedExcelExtractor extends TestCase { - - - private static final XSSFEventBasedExcelExtractor getExtractor(String sampleName) throws Exception { - return new XSSFEventBasedExcelExtractor(XSSFTestDataSamples. - openSamplePackage(sampleName)); +public class TestXSSFEventBasedExcelExtractor extends TestCase { + protected XSSFEventBasedExcelExtractor getExtractor(String sampleName) throws Exception { + return new XSSFEventBasedExcelExtractor(XSSFTestDataSamples. + openSamplePackage(sampleName)); } /** @@ -97,6 +95,8 @@ public final class TestXSSFEventBasedExcelExtractor extends TestCase { CHUNK2 + "Sheet3\n" , text); + + extractor.close(); } public void testGetComplexText() throws Exception { @@ -112,6 +112,8 @@ public final class TestXSSFEventBasedExcelExtractor extends TestCase { "Avgtxfull\n" + "(iii) AVERAGE TAX RATES ON ANNUAL" )); + + extractor.close(); } public void testInlineStrings() throws Exception { @@ -134,6 +136,8 @@ public final class TestXSSFEventBasedExcelExtractor extends TestCase { // Formulas assertTrue("Unable to find expected word in text\n" + text, text.contains("A2")); assertTrue("Unable to find expected word in text\n" + text, text.contains("A5-A$2")); + + extractor.close(); } /** @@ -159,5 +163,8 @@ public final class TestXSSFEventBasedExcelExtractor extends TestCase { Matcher m = pattern.matcher(text); assertTrue(m.matches()); } + + ole2Extractor.close(); + ooxmlExtractor.close(); } } diff --git a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractorUsingFactory.java b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractorUsingFactory.java new file mode 100644 index 0000000000..05e790fd59 --- /dev/null +++ b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractorUsingFactory.java @@ -0,0 +1,29 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ +package org.apache.poi.xssf.extractor; + +import org.apache.poi.extractor.ExtractorFactory; +import org.apache.poi.hssf.HSSFTestDataSamples; + + +public class TestXSSFEventBasedExcelExtractorUsingFactory extends TestXSSFEventBasedExcelExtractor { + @Override + protected final XSSFEventBasedExcelExtractor getExtractor(String sampleName) throws Exception { + ExtractorFactory.setAllThreadsPreferEventExtractors(true); + return (XSSFEventBasedExcelExtractor) ExtractorFactory.createExtractor(HSSFTestDataSamples.openSampleFileStream(sampleName)); + } +} diff --git a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java index 2d14cd535d..bc86d6f9b9 100644 --- a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java @@ -17,6 +17,7 @@ package org.apache.poi.xssf.extractor; +import java.io.IOException; import java.util.regex.Matcher; import java.util.regex.Pattern; @@ -30,17 +31,16 @@ import org.apache.poi.xssf.XSSFTestDataSamples; /** * Tests for {@link XSSFExcelExtractor} */ -public final class TestXSSFExcelExtractor extends TestCase { - - - private static final XSSFExcelExtractor getExtractor(String sampleName) { +public class TestXSSFExcelExtractor extends TestCase { + protected XSSFExcelExtractor getExtractor(String sampleName) { return new XSSFExcelExtractor(XSSFTestDataSamples.openSampleWorkbook(sampleName)); } /** * Get text out of the simple file + * @throws IOException */ - public void testGetSimpleText() { + public void testGetSimpleText() throws IOException { // a very simple file XSSFExcelExtractor extractor = getExtractor("sample.xlsx"); extractor.getText(); @@ -96,9 +96,11 @@ public final class TestXSSFExcelExtractor extends TestCase { CHUNK2 + "Sheet3\n" , text); + + extractor.close(); } - public void testGetComplexText() { + public void testGetComplexText() throws IOException { // A fairly complex file XSSFExcelExtractor extractor = getExtractor("AverageTaxRates.xlsx"); extractor.getText(); @@ -112,14 +114,17 @@ public final class TestXSSFExcelExtractor extends TestCase { "Avgtxfull\n" + "null\t(iii) AVERAGE TAX RATES ON ANNUAL" )); + + extractor.close(); } /** * Test that we return pretty much the same as * ExcelExtractor does, when we're both passed * the same file, just saved as xls and xlsx + * @throws IOException */ - public void testComparedToOLE2() { + public void testComparedToOLE2() throws IOException { // A fairly simple file - ooxml XSSFExcelExtractor ooxmlExtractor = getExtractor("SampleSS.xlsx"); @@ -137,12 +142,16 @@ public final class TestXSSFExcelExtractor extends TestCase { Matcher m = pattern.matcher(text); assertTrue(m.matches()); } + + ole2Extractor.close(); + ooxmlExtractor.close(); } /** * From bug #45540 + * @throws IOException */ - public void testHeaderFooter() { + public void testHeaderFooter() throws IOException { String[] files = new String[] { "45540_classic_Header.xlsx", "45540_form_Header.xlsx", "45540_classic_Footer.xlsx", "45540_form_Footer.xlsx", @@ -152,15 +161,17 @@ public final class TestXSSFExcelExtractor extends TestCase { String text = extractor.getText(); assertTrue("Unable to find expected word in text from " + sampleName + "\n" + text, text.contains("testdoc")); - assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase")); + assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase")); + + extractor.close(); } } /** * From bug #45544 + * @throws IOException */ - public void testComments() { - + public void testComments() throws IOException { XSSFExcelExtractor extractor = getExtractor("45544.xlsx"); String text = extractor.getText(); @@ -173,9 +184,11 @@ public final class TestXSSFExcelExtractor extends TestCase { text = extractor.getText(); assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc")); assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase")); + + extractor.close(); } - public void testInlineStrings() { + public void testInlineStrings() throws IOException { XSSFExcelExtractor extractor = getExtractor("InlineStrings.xlsx"); extractor.setFormulasNotResults(true); String text = extractor.getText(); @@ -195,5 +208,7 @@ public final class TestXSSFExcelExtractor extends TestCase { // Formulas assertTrue("Unable to find expected word in text\n" + text, text.contains("A2")); assertTrue("Unable to find expected word in text\n" + text, text.contains("A5-A$2")); + + extractor.close(); } } diff --git a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractorUsingFactory.java b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractorUsingFactory.java new file mode 100644 index 0000000000..fd5cde38e3 --- /dev/null +++ b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractorUsingFactory.java @@ -0,0 +1,37 @@ +/* ==================================================================== + Licensed to the Apache Software Foundation (ASF) under one or more + contributor license agreements. See the NOTICE file distributed with + this work for additional information regarding copyright ownership. + The ASF licenses this file to You under the Apache License, Version 2.0 + (the "License"); you may not use this file except in compliance with + the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. +==================================================================== */ + +package org.apache.poi.xssf.extractor; + +import org.apache.poi.extractor.ExtractorFactory; +import org.apache.poi.hssf.HSSFTestDataSamples; + +/** + * Tests for {@link XSSFExcelExtractor} + */ +public final class TestXSSFExcelExtractorUsingFactory extends TestXSSFExcelExtractor { + @Override + protected final XSSFExcelExtractor getExtractor(String sampleName) { + ExtractorFactory.setAllThreadsPreferEventExtractors(false); + ExtractorFactory.setThreadPrefersEventExtractors(false); + try { + return (XSSFExcelExtractor) ExtractorFactory.createExtractor(HSSFTestDataSamples.openSampleFileStream(sampleName)); + } catch (Exception e) { + throw new RuntimeException(e); + } + } +} diff --git a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java index 34d20aae74..f62749f524 100644 --- a/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java +++ b/src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java @@ -57,6 +57,8 @@ public class TestXWPFWordExtractor extends TestCase { } } assertEquals(3, ps); + + extractor.close(); } /** @@ -93,6 +95,8 @@ public class TestXWPFWordExtractor extends TestCase { } } assertEquals(134, ps); + + extractor.close(); } public void testGetWithHyperlinks() throws IOException { @@ -118,6 +122,8 @@ public class TestXWPFWordExtractor extends TestCase { "We have a hyperlink here, and another.\n", extractor.getText() ); + + extractor.close(); } public void testHeadersFooters() throws IOException { @@ -141,7 +147,11 @@ public class TestXWPFWordExtractor extends TestCase { // Now another file, expect multiple headers // and multiple footers doc = XWPFTestDataSamples.openSampleDocument("DiffFirstPageHeadFoot.docx"); + extractor.close(); + extractor = new XWPFWordExtractor(doc); + extractor.close(); + extractor = new XWPFWordExtractor(doc); extractor.getText(); @@ -161,6 +171,8 @@ public class TestXWPFWordExtractor extends TestCase { "Footer Left\tFooter Middle\tFooter Right\n", extractor.getText() ); + + extractor.close(); } public void testFootnotes() throws IOException { @@ -169,6 +181,8 @@ public class TestXWPFWordExtractor extends TestCase { String text = extractor.getText(); assertTrue(text.contains("snoska")); assertTrue(text.contains("Eto ochen prostoy[footnoteRef:1] text so snoskoy")); + + extractor.close(); } @@ -177,6 +191,8 @@ public class TestXWPFWordExtractor extends TestCase { XWPFWordExtractor extractor = new XWPFWordExtractor(doc); assertTrue(extractor.getText().contains("snoska")); + + extractor.close(); } public void testFormFootnotes() throws IOException { @@ -186,6 +202,8 @@ public class TestXWPFWordExtractor extends TestCase { String text = extractor.getText(); assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc")); assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase")); + + extractor.close(); } public void testEndnotes() throws IOException { @@ -194,6 +212,8 @@ public class TestXWPFWordExtractor extends TestCase { String text = extractor.getText(); assertTrue(text.contains("XXX")); assertTrue(text.contains("tilaka [endnoteRef:2]or 'tika'")); + + extractor.close(); } public void testInsertedDeletedText() throws IOException { @@ -202,6 +222,8 @@ public class TestXWPFWordExtractor extends TestCase { assertTrue(extractor.getText().contains("pendant worn")); assertTrue(extractor.getText().contains("extremely well")); + + extractor.close(); } public void testParagraphHeader() throws IOException { @@ -211,6 +233,8 @@ public class TestXWPFWordExtractor extends TestCase { assertTrue(extractor.getText().contains("Section 1")); assertTrue(extractor.getText().contains("Section 2")); assertTrue(extractor.getText().contains("Section 3")); + + extractor.close(); } /** @@ -225,6 +249,8 @@ public class TestXWPFWordExtractor extends TestCase { assertTrue(extractor.getText().contains("2004")); assertTrue(extractor.getText().contains("2008")); assertTrue(extractor.getText().contains("(120 ")); + + extractor.close(); } /** @@ -244,6 +270,8 @@ public class TestXWPFWordExtractor extends TestCase { // Now check the first paragraph in total assertTrue(extractor.getText().contains("a\tb\n")); + + extractor.close(); } /** @@ -258,6 +286,8 @@ public class TestXWPFWordExtractor extends TestCase { assertTrue(text.length() > 0); assertFalse(text.contains("AUTHOR")); assertFalse(text.contains("CREATEDATE")); + + extractor.close(); } /** @@ -271,6 +301,8 @@ public class TestXWPFWordExtractor extends TestCase { String text = extractor.getText(); assertTrue(text.length() > 0); assertTrue(text.contains("FldSimple.docx")); + + extractor.close(); } /** @@ -282,5 +314,7 @@ public class TestXWPFWordExtractor extends TestCase { XWPFWordExtractor extractor = new XWPFWordExtractor(doc); String text = extractor.getText(); assertTrue(text.length() > 0); + + extractor.close(); } } diff --git a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java index 00b3afad8f..de82210291 100644 --- a/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java +++ b/src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java @@ -239,7 +239,7 @@ public final class TestExcelExtractor extends TestCase { ) > -1 ); - assertTrue( + assertTrue("Had: " + text + ", but should contain 'nn.nn\\t10.52\\n'", text.indexOf( "nn.nn\t10.52\n" ) > -1