]> source.dussan.org Git - poi.git/commitdiff
Bug 54982: Add a close() interface to POITextExtractor which can be used to free...
authorDominik Stadler <centic@apache.org>
Mon, 17 Jun 2013 07:53:59 +0000 (07:53 +0000)
committerDominik Stadler <centic@apache.org>
Mon, 17 Jun 2013 07:53:59 +0000 (07:53 +0000)
Implement close() where necessary so resources are closed. Add close()
to tests and run existing unit tests also against the Extractor that is
built via the Factory. Also add a small test-suite to quickly execute
all extractor-related tests.

git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1493669 13f79535-47bb-0310-9956-ffa450edef68

src/java/org/apache/poi/POITextExtractor.java
src/ooxml/java/org/apache/poi/POIXMLTextExtractor.java
src/ooxml/java/org/apache/poi/xssf/extractor/XSSFEventBasedExcelExtractor.java
src/ooxml/testcases/org/apache/poi/TestXMLPropertiesTextExtractor.java
src/ooxml/testcases/org/apache/poi/xslf/extractor/TestXSLFPowerPointExtractor.java
src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractor.java
src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractorUsingFactory.java [new file with mode: 0644]
src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractor.java
src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractorUsingFactory.java [new file with mode: 0644]
src/ooxml/testcases/org/apache/poi/xwpf/extractor/TestXWPFWordExtractor.java
src/testcases/org/apache/poi/hssf/extractor/TestExcelExtractor.java

index 0b69894d0853c66fbc86992a45dedd6d1707c997..e18078b461f4cec23dad0d137a13d661983d546c 100644 (file)
@@ -16,6 +16,9 @@
 ==================================================================== */
 package org.apache.poi;
 
+import java.io.Closeable;
+import java.io.IOException;
+
 /**
  * Common Parent for Text Extractors
  *  of POI Documents. 
@@ -27,7 +30,7 @@ package org.apache.poi;
  * @see org.apache.poi.hdgf.extractor.VisioTextExtractor
  * @see org.apache.poi.hwpf.extractor.WordExtractor
  */
-public abstract class POITextExtractor {
+public abstract class POITextExtractor implements Closeable {
        /** The POIDocument that's open */
        protected POIDocument document;
 
@@ -61,4 +64,15 @@ public abstract class POITextExtractor {
         *  metadata / properties, such as author and title.
         */
        public abstract POITextExtractor getMetadataTextExtractor();
+       
+       /**
+        * Allows to free resources of the Extractor as soon as
+        * it is not needed any more. This may include closing
+        * open file handles and freeing memory.
+        * 
+        * The Extractor cannot be used after close has been called.
+        */
+       public void close() throws IOException {
+               // nothing to do in abstract class, derived classes may perform actions.
+       }
 }
index eee1d25abdbbd8212e5fd6ae5ba44b34900d15d0..eeb03f7aad15827d6dbff034b9673cc69c7be20d 100644 (file)
@@ -17,6 +17,8 @@
 
 package org.apache.poi;
 
+import java.io.IOException;
+
 import org.apache.poi.POIXMLProperties.CoreProperties;
 import org.apache.poi.POIXMLProperties.CustomProperties;
 import org.apache.poi.POIXMLProperties.ExtendedProperties;
@@ -75,4 +77,16 @@ public abstract class POIXMLTextExtractor extends POITextExtractor {
        public POIXMLPropertiesTextExtractor getMetadataTextExtractor() {
                return new POIXMLPropertiesTextExtractor(_document);
        }
+
+       @Override
+       public void close() throws IOException {
+               // e.g. XSSFEventBaseExcelExtractor passes a null-document
+               if(_document != null) {
+                       OPCPackage pkg = _document.getPackage();
+                       if(pkg != null) {
+                               pkg.close();
+                       }
+               }
+               super.close();
+       }
 }
index efc42cff5f171bd4f0b720b73f9fb6a7a1794127..0c31fe04d5a8ec1b8772f8cfde492ed7ccda1a56 100644 (file)
@@ -191,6 +191,15 @@ public class XSSFEventBasedExcelExtractor extends POIXMLTextExtractor {
        }
    }
    
+       @Override
+       public void close() throws IOException {
+               if (container != null) {
+                       container.close();
+                       container = null;
+               }
+               super.close();
+       }
+
    protected class SheetTextExtractor implements SheetContentsHandler {
       private final StringBuffer output;
       private boolean firstCellOfRow = true;
index 8ad2f78c4b5d41011302ce659a0073dee1a0b243..0df6ccf8164dc60fb8748d933109e0740a5b9228 100644 (file)
@@ -46,6 +46,9 @@ public final class TestXMLPropertiesTextExtractor extends TestCase {
 
                assertTrue(text.contains("LastModifiedBy = Yury Batrakov"));
                assertTrue(cText.contains("LastModifiedBy = Yury Batrakov"));
+               
+               textExt.close();
+               ext.close();
        }
 
        public void testCore() throws Exception {
@@ -63,6 +66,8 @@ public final class TestXMLPropertiesTextExtractor extends TestCase {
 
                assertTrue(text.contains("LastModifiedBy = Yury Batrakov"));
                assertTrue(cText.contains("LastModifiedBy = Yury Batrakov"));
+               
+               ext.close();
        }
 
        public void testExtended() throws Exception {
@@ -82,6 +87,8 @@ public final class TestXMLPropertiesTextExtractor extends TestCase {
                assertTrue(text.contains("Company = Mera"));
                assertTrue(eText.contains("Application = Microsoft Excel"));
                assertTrue(eText.contains("Company = Mera"));
+
+               ext.close();
        }
 
        public void testCustom() throws Exception {
@@ -99,6 +106,8 @@ public final class TestXMLPropertiesTextExtractor extends TestCase {
       
       assertTrue(text.contains("description = another value"));
       assertTrue(cText.contains("description = another value"));
+
+      ext.close();
        }
        
        /**
@@ -118,5 +127,7 @@ public final class TestXMLPropertiesTextExtractor extends TestCase {
       assertFalse(text.contains("Created =")); // With date is null
       assertTrue(text.contains("CreatedString = ")); // Via string is blank
       assertTrue(text.contains("LastModifiedBy = IT Client Services"));
+               
+      ext.close();
        }
 }
index cb7ad736f1bc74bc9e1d7dabdbe4a7ed8abe6b39..35ee3f1cb41cdb17c5b5cfbf3eb877efa0a268a3 100644 (file)
@@ -43,8 +43,8 @@ public class TestXSLFPowerPointExtractor extends TestCase {
         * Get text out of the simple file
         */
        public void testGetSimpleText() throws Exception {
-               new XSLFPowerPointExtractor(xmlA);
-               new XSLFPowerPointExtractor(pkg);
+               new XSLFPowerPointExtractor(xmlA).close();
+               new XSLFPowerPointExtractor(pkg).close();
                
                XSLFPowerPointExtractor extractor = 
                        new XSLFPowerPointExtractor(xmlA);
@@ -148,6 +148,8 @@ public class TestXSLFPowerPointExtractor extends TestCase {
                assertEquals(
                                "\n\n\n\n", text
                );
+               
+               extractor.close();
        }
        
    public void testGetComments() throws Exception {
@@ -165,6 +167,8 @@ public class TestXSLFPowerPointExtractor extends TestCase {
 
       // Check the authors came through too
       assertTrue("Unable to find expected word in text\n" + text, text.contains("XPVMWARE01"));
+               
+               extractor.close();
    }
        
        public void testGetMasterText() throws Exception {
@@ -206,6 +210,8 @@ public class TestXSLFPowerPointExtractor extends TestCase {
             "This text comes from the Master Slide\n"
             , text
       );
+               
+               extractor.close();
        }
 
     public void testTable() throws Exception {
@@ -219,6 +225,8 @@ public class TestXSLFPowerPointExtractor extends TestCase {
 
         // Check comments are there
         assertTrue("Unable to find expected word in text\n" + text, text.contains("TEST"));
+               
+               extractor.close();
     }
     
     /**
@@ -267,6 +275,8 @@ public class TestXSLFPowerPointExtractor extends TestCase {
                "Text missing for " + filename + "\n" + text, 
                text.contains("Mystery")
          );
+         
+                extractor.close();
        }
     }
 }
index eade64f2fe4eb74581c9b544bae6e650acfbb015..eac3700e7db001c47ccf19f131b16eb096338d5d 100644 (file)
@@ -30,12 +30,10 @@ import org.apache.poi.xssf.XSSFTestDataSamples;
 /**
  * Tests for {@link XSSFEventBasedExcelExtractor}
  */
-public final class TestXSSFEventBasedExcelExtractor extends TestCase {
-
-
-       private static final XSSFEventBasedExcelExtractor getExtractor(String sampleName) throws Exception {
-               return new XSSFEventBasedExcelExtractor(XSSFTestDataSamples.
-                     openSamplePackage(sampleName));
+public class TestXSSFEventBasedExcelExtractor extends TestCase {
+       protected XSSFEventBasedExcelExtractor getExtractor(String sampleName) throws Exception {
+        return new XSSFEventBasedExcelExtractor(XSSFTestDataSamples.
+                openSamplePackage(sampleName));
        }
 
        /**
@@ -97,6 +95,8 @@ public final class TestXSSFEventBasedExcelExtractor extends TestCase {
                                CHUNK2 +
                                "Sheet3\n"
                                , text);
+               
+               extractor.close();
        }
        
        public void testGetComplexText() throws Exception {
@@ -112,6 +112,8 @@ public final class TestXSSFEventBasedExcelExtractor extends TestCase {
                                                "Avgtxfull\n" +
                                                "(iii) AVERAGE TAX RATES ON ANNUAL"     
                ));
+               
+               extractor.close();
        }
        
    public void testInlineStrings() throws Exception {
@@ -134,6 +136,8 @@ public final class TestXSSFEventBasedExcelExtractor extends TestCase {
       // Formulas
       assertTrue("Unable to find expected word in text\n" + text, text.contains("A2"));
       assertTrue("Unable to find expected word in text\n" + text, text.contains("A5-A$2"));
+               
+      extractor.close();
    }
    
        /**
@@ -159,5 +163,8 @@ public final class TestXSSFEventBasedExcelExtractor extends TestCase {
                        Matcher m = pattern.matcher(text);
                        assertTrue(m.matches());                        
                }
+               
+               ole2Extractor.close();
+               ooxmlExtractor.close();
        }
 }
diff --git a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractorUsingFactory.java b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFEventBasedExcelExtractorUsingFactory.java
new file mode 100644 (file)
index 0000000..05e790f
--- /dev/null
@@ -0,0 +1,29 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+package org.apache.poi.xssf.extractor;
+
+import org.apache.poi.extractor.ExtractorFactory;
+import org.apache.poi.hssf.HSSFTestDataSamples;
+
+
+public class TestXSSFEventBasedExcelExtractorUsingFactory extends TestXSSFEventBasedExcelExtractor {
+       @Override
+       protected final XSSFEventBasedExcelExtractor getExtractor(String sampleName) throws Exception {
+               ExtractorFactory.setAllThreadsPreferEventExtractors(true);
+               return (XSSFEventBasedExcelExtractor) ExtractorFactory.createExtractor(HSSFTestDataSamples.openSampleFileStream(sampleName));
+       }
+}
index 2d14cd535d9e4eb4b18a6de44d2d16a48159c309..bc86d6f9b9b190ad52a7c13a39d31b111878661b 100644 (file)
@@ -17,6 +17,7 @@
 
 package org.apache.poi.xssf.extractor;
 
+import java.io.IOException;
 import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 
@@ -30,17 +31,16 @@ import org.apache.poi.xssf.XSSFTestDataSamples;
 /**
  * Tests for {@link XSSFExcelExtractor}
  */
-public final class TestXSSFExcelExtractor extends TestCase {
-
-
-       private static final XSSFExcelExtractor getExtractor(String sampleName) {
+public class TestXSSFExcelExtractor extends TestCase {
+       protected XSSFExcelExtractor getExtractor(String sampleName) {
                return new XSSFExcelExtractor(XSSFTestDataSamples.openSampleWorkbook(sampleName));
        }
 
        /**
         * Get text out of the simple file
+        * @throws IOException 
         */
-       public void testGetSimpleText() {
+       public void testGetSimpleText() throws IOException {
                // a very simple file
                XSSFExcelExtractor extractor = getExtractor("sample.xlsx");
                extractor.getText();
@@ -96,9 +96,11 @@ public final class TestXSSFExcelExtractor extends TestCase {
                                CHUNK2 +
                                "Sheet3\n"
                                , text);
+               
+               extractor.close();
        }
        
-       public void testGetComplexText() {
+       public void testGetComplexText() throws IOException {
                // A fairly complex file
                XSSFExcelExtractor extractor = getExtractor("AverageTaxRates.xlsx");
                extractor.getText();
@@ -112,14 +114,17 @@ public final class TestXSSFExcelExtractor extends TestCase {
                                                "Avgtxfull\n" +
                                                "null\t(iii) AVERAGE TAX RATES ON ANNUAL"       
                ));
+               
+               extractor.close();
        }
        
        /**
         * Test that we return pretty much the same as
         *  ExcelExtractor does, when we're both passed
         *  the same file, just saved as xls and xlsx
+        * @throws IOException 
         */
-       public void testComparedToOLE2() {
+       public void testComparedToOLE2() throws IOException {
                // A fairly simple file - ooxml
                XSSFExcelExtractor ooxmlExtractor = getExtractor("SampleSS.xlsx");
 
@@ -137,12 +142,16 @@ public final class TestXSSFExcelExtractor extends TestCase {
                        Matcher m = pattern.matcher(text);
                        assertTrue(m.matches());                        
                }
+
+               ole2Extractor.close();
+               ooxmlExtractor.close();
        }
        
        /**
         * From bug #45540
+        * @throws IOException 
         */
-       public void testHeaderFooter() {
+       public void testHeaderFooter() throws IOException {
                String[] files = new String[] {
                        "45540_classic_Header.xlsx", "45540_form_Header.xlsx",
                        "45540_classic_Footer.xlsx", "45540_form_Footer.xlsx",
@@ -152,15 +161,17 @@ public final class TestXSSFExcelExtractor extends TestCase {
                        String text = extractor.getText();
                        
                        assertTrue("Unable to find expected word in text from " + sampleName + "\n" + text, text.contains("testdoc"));
-                       assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase")); 
+                       assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
+                       
+                       extractor.close();
                }
        }
 
        /**
         * From bug #45544
+        * @throws IOException 
         */
-       public void testComments() {
-               
+       public void testComments() throws IOException {
                XSSFExcelExtractor extractor = getExtractor("45544.xlsx");
                String text = extractor.getText();
 
@@ -173,9 +184,11 @@ public final class TestXSSFExcelExtractor extends TestCase {
                text = extractor.getText();
                assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
                assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
+               
+               extractor.close();
        }
        
-       public void testInlineStrings() {
+       public void testInlineStrings() throws IOException {
       XSSFExcelExtractor extractor = getExtractor("InlineStrings.xlsx");
       extractor.setFormulasNotResults(true);
       String text = extractor.getText();
@@ -195,5 +208,7 @@ public final class TestXSSFExcelExtractor extends TestCase {
       // Formulas
       assertTrue("Unable to find expected word in text\n" + text, text.contains("A2"));
       assertTrue("Unable to find expected word in text\n" + text, text.contains("A5-A$2"));
+      
+      extractor.close();
        }
 }
diff --git a/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractorUsingFactory.java b/src/ooxml/testcases/org/apache/poi/xssf/extractor/TestXSSFExcelExtractorUsingFactory.java
new file mode 100644 (file)
index 0000000..fd5cde3
--- /dev/null
@@ -0,0 +1,37 @@
+/* ====================================================================
+   Licensed to the Apache Software Foundation (ASF) under one or more
+   contributor license agreements.  See the NOTICE file distributed with
+   this work for additional information regarding copyright ownership.
+   The ASF licenses this file to You under the Apache License, Version 2.0
+   (the "License"); you may not use this file except in compliance with
+   the License.  You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.extractor;
+
+import org.apache.poi.extractor.ExtractorFactory;
+import org.apache.poi.hssf.HSSFTestDataSamples;
+
+/**
+ * Tests for {@link XSSFExcelExtractor}
+ */
+public final class TestXSSFExcelExtractorUsingFactory extends TestXSSFExcelExtractor {
+       @Override
+       protected final XSSFExcelExtractor getExtractor(String sampleName) {
+               ExtractorFactory.setAllThreadsPreferEventExtractors(false);
+               ExtractorFactory.setThreadPrefersEventExtractors(false);
+               try {
+                       return (XSSFExcelExtractor) ExtractorFactory.createExtractor(HSSFTestDataSamples.openSampleFileStream(sampleName));
+               } catch (Exception e) {
+                       throw new RuntimeException(e);
+               }
+       }
+}
index 34d20aae7453c1678b0b0c3c141dbb3b86d7712f..f62749f524128fbd797dc79c43001259e048b702 100644 (file)
@@ -57,6 +57,8 @@ public class TestXWPFWordExtractor extends TestCase {
             }
         }
         assertEquals(3, ps);
+        
+        extractor.close();
     }
 
     /**
@@ -93,6 +95,8 @@ public class TestXWPFWordExtractor extends TestCase {
             }
         }
         assertEquals(134, ps);
+        
+        extractor.close();
     }
 
     public void testGetWithHyperlinks() throws IOException {
@@ -118,6 +122,8 @@ public class TestXWPFWordExtractor extends TestCase {
                                "We have a hyperlink <http://poi.apache.org/> here, and another.\n",
                 extractor.getText()
         );
+        
+        extractor.close();
     }
 
     public void testHeadersFooters() throws IOException {
@@ -141,7 +147,11 @@ public class TestXWPFWordExtractor extends TestCase {
         // Now another file, expect multiple headers
         //  and multiple footers
         doc = XWPFTestDataSamples.openSampleDocument("DiffFirstPageHeadFoot.docx");
+        extractor.close();
+
         extractor = new XWPFWordExtractor(doc);
+        extractor.close();
+
         extractor =
                 new XWPFWordExtractor(doc);
         extractor.getText();
@@ -161,6 +171,8 @@ public class TestXWPFWordExtractor extends TestCase {
                         "Footer Left\tFooter Middle\tFooter Right\n",
                 extractor.getText()
         );
+        
+        extractor.close();
     }
 
     public void testFootnotes() throws IOException {
@@ -169,6 +181,8 @@ public class TestXWPFWordExtractor extends TestCase {
         String text = extractor.getText();
         assertTrue(text.contains("snoska"));
         assertTrue(text.contains("Eto ochen prostoy[footnoteRef:1] text so snoskoy"));
+        
+        extractor.close();
     }
 
 
@@ -177,6 +191,8 @@ public class TestXWPFWordExtractor extends TestCase {
         XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
 
         assertTrue(extractor.getText().contains("snoska"));
+        
+        extractor.close();
     }
 
     public void testFormFootnotes() throws IOException {
@@ -186,6 +202,8 @@ public class TestXWPFWordExtractor extends TestCase {
         String text = extractor.getText();
         assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
         assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
+        
+        extractor.close();
     }
 
     public void testEndnotes() throws IOException {
@@ -194,6 +212,8 @@ public class TestXWPFWordExtractor extends TestCase {
         String text = extractor.getText();
         assertTrue(text.contains("XXX"));
         assertTrue(text.contains("tilaka [endnoteRef:2]or 'tika'"));
+        
+        extractor.close();
     }
 
     public void testInsertedDeletedText() throws IOException {
@@ -202,6 +222,8 @@ public class TestXWPFWordExtractor extends TestCase {
 
         assertTrue(extractor.getText().contains("pendant worn"));
         assertTrue(extractor.getText().contains("extremely well"));
+        
+        extractor.close();
     }
 
     public void testParagraphHeader() throws IOException {
@@ -211,6 +233,8 @@ public class TestXWPFWordExtractor extends TestCase {
         assertTrue(extractor.getText().contains("Section 1"));
         assertTrue(extractor.getText().contains("Section 2"));
         assertTrue(extractor.getText().contains("Section 3"));
+        
+        extractor.close();
     }
 
     /**
@@ -225,6 +249,8 @@ public class TestXWPFWordExtractor extends TestCase {
         assertTrue(extractor.getText().contains("2004"));
         assertTrue(extractor.getText().contains("2008"));
         assertTrue(extractor.getText().contains("(120 "));
+        
+        extractor.close();
     }
     
     /**
@@ -244,6 +270,8 @@ public class TestXWPFWordExtractor extends TestCase {
        
        // Now check the first paragraph in total
        assertTrue(extractor.getText().contains("a\tb\n"));
+       
+       extractor.close();
     }
     
     /**
@@ -258,6 +286,8 @@ public class TestXWPFWordExtractor extends TestCase {
         assertTrue(text.length() > 0);
         assertFalse(text.contains("AUTHOR"));
         assertFalse(text.contains("CREATEDATE"));
+        
+        extractor.close();
     }
     
     /**
@@ -271,6 +301,8 @@ public class TestXWPFWordExtractor extends TestCase {
         String text = extractor.getText();
         assertTrue(text.length() > 0);
         assertTrue(text.contains("FldSimple.docx"));
+        
+        extractor.close();
     }
 
     /**
@@ -282,5 +314,7 @@ public class TestXWPFWordExtractor extends TestCase {
         XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
         String text = extractor.getText();
         assertTrue(text.length() > 0);
+        
+        extractor.close();
     }
 }
index 00b3afad8ff337e06f8b01e62a2034cceabc6ed5..de82210291505bfaea49688a1a66d272ed283089 100644 (file)
@@ -239,7 +239,7 @@ public final class TestExcelExtractor extends TestCase {
             ) > -1
       );
       
-      assertTrue(
+      assertTrue("Had: " + text + ", but should contain 'nn.nn\\t10.52\\n'",
             text.indexOf(
                "nn.nn\t10.52\n"
             ) > -1