==================================================================== */
package org.apache.poi;
+import java.io.Closeable;
+import java.io.IOException;
+
/**
* Common Parent for Text Extractors
* of POI Documents.
* @see org.apache.poi.hdgf.extractor.VisioTextExtractor
* @see org.apache.poi.hwpf.extractor.WordExtractor
*/
-public abstract class POITextExtractor {
+public abstract class POITextExtractor implements Closeable {
/** The POIDocument that's open */
protected POIDocument document;
* metadata / properties, such as author and title.
*/
public abstract POITextExtractor getMetadataTextExtractor();
+
+ /**
+ * Allows to free resources of the Extractor as soon as
+ * it is not needed any more. This may include closing
+ * open file handles and freeing memory.
+ *
+ * The Extractor cannot be used after close has been called.
+ */
+ public void close() throws IOException {
+ // nothing to do in abstract class, derived classes may perform actions.
+ }
}
package org.apache.poi;
+import java.io.IOException;
+
import org.apache.poi.POIXMLProperties.CoreProperties;
import org.apache.poi.POIXMLProperties.CustomProperties;
import org.apache.poi.POIXMLProperties.ExtendedProperties;
public POIXMLPropertiesTextExtractor getMetadataTextExtractor() {
return new POIXMLPropertiesTextExtractor(_document);
}
+
+ @Override
+ public void close() throws IOException {
+ // e.g. XSSFEventBaseExcelExtractor passes a null-document
+ if(_document != null) {
+ OPCPackage pkg = _document.getPackage();
+ if(pkg != null) {
+ pkg.close();
+ }
+ }
+ super.close();
+ }
}
}
}
+ @Override
+ public void close() throws IOException {
+ if (container != null) {
+ container.close();
+ container = null;
+ }
+ super.close();
+ }
+
protected class SheetTextExtractor implements SheetContentsHandler {
private final StringBuffer output;
private boolean firstCellOfRow = true;
assertTrue(text.contains("LastModifiedBy = Yury Batrakov"));
assertTrue(cText.contains("LastModifiedBy = Yury Batrakov"));
+
+ textExt.close();
+ ext.close();
}
public void testCore() throws Exception {
assertTrue(text.contains("LastModifiedBy = Yury Batrakov"));
assertTrue(cText.contains("LastModifiedBy = Yury Batrakov"));
+
+ ext.close();
}
public void testExtended() throws Exception {
assertTrue(text.contains("Company = Mera"));
assertTrue(eText.contains("Application = Microsoft Excel"));
assertTrue(eText.contains("Company = Mera"));
+
+ ext.close();
}
public void testCustom() throws Exception {
assertTrue(text.contains("description = another value"));
assertTrue(cText.contains("description = another value"));
+
+ ext.close();
}
/**
assertFalse(text.contains("Created =")); // With date is null
assertTrue(text.contains("CreatedString = ")); // Via string is blank
assertTrue(text.contains("LastModifiedBy = IT Client Services"));
+
+ ext.close();
}
}
* Get text out of the simple file
*/
public void testGetSimpleText() throws Exception {
- new XSLFPowerPointExtractor(xmlA);
- new XSLFPowerPointExtractor(pkg);
+ new XSLFPowerPointExtractor(xmlA).close();
+ new XSLFPowerPointExtractor(pkg).close();
XSLFPowerPointExtractor extractor =
new XSLFPowerPointExtractor(xmlA);
assertEquals(
"\n\n\n\n", text
);
+
+ extractor.close();
}
public void testGetComments() throws Exception {
// Check the authors came through too
assertTrue("Unable to find expected word in text\n" + text, text.contains("XPVMWARE01"));
+
+ extractor.close();
}
public void testGetMasterText() throws Exception {
"This text comes from the Master Slide\n"
, text
);
+
+ extractor.close();
}
public void testTable() throws Exception {
// Check comments are there
assertTrue("Unable to find expected word in text\n" + text, text.contains("TEST"));
+
+ extractor.close();
}
/**
"Text missing for " + filename + "\n" + text,
text.contains("Mystery")
);
+
+ extractor.close();
}
}
}
/**
* Tests for {@link XSSFEventBasedExcelExtractor}
*/
-public final class TestXSSFEventBasedExcelExtractor extends TestCase {
-
-
- private static final XSSFEventBasedExcelExtractor getExtractor(String sampleName) throws Exception {
- return new XSSFEventBasedExcelExtractor(XSSFTestDataSamples.
- openSamplePackage(sampleName));
+public class TestXSSFEventBasedExcelExtractor extends TestCase {
+ protected XSSFEventBasedExcelExtractor getExtractor(String sampleName) throws Exception {
+ return new XSSFEventBasedExcelExtractor(XSSFTestDataSamples.
+ openSamplePackage(sampleName));
}
/**
CHUNK2 +
"Sheet3\n"
, text);
+
+ extractor.close();
}
public void testGetComplexText() throws Exception {
"Avgtxfull\n" +
"(iii) AVERAGE TAX RATES ON ANNUAL"
));
+
+ extractor.close();
}
public void testInlineStrings() throws Exception {
// Formulas
assertTrue("Unable to find expected word in text\n" + text, text.contains("A2"));
assertTrue("Unable to find expected word in text\n" + text, text.contains("A5-A$2"));
+
+ extractor.close();
}
/**
Matcher m = pattern.matcher(text);
assertTrue(m.matches());
}
+
+ ole2Extractor.close();
+ ooxmlExtractor.close();
}
}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+package org.apache.poi.xssf.extractor;
+
+import org.apache.poi.extractor.ExtractorFactory;
+import org.apache.poi.hssf.HSSFTestDataSamples;
+
+
+public class TestXSSFEventBasedExcelExtractorUsingFactory extends TestXSSFEventBasedExcelExtractor {
+ @Override
+ protected final XSSFEventBasedExcelExtractor getExtractor(String sampleName) throws Exception {
+ ExtractorFactory.setAllThreadsPreferEventExtractors(true);
+ return (XSSFEventBasedExcelExtractor) ExtractorFactory.createExtractor(HSSFTestDataSamples.openSampleFileStream(sampleName));
+ }
+}
package org.apache.poi.xssf.extractor;
+import java.io.IOException;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Tests for {@link XSSFExcelExtractor}
*/
-public final class TestXSSFExcelExtractor extends TestCase {
-
-
- private static final XSSFExcelExtractor getExtractor(String sampleName) {
+public class TestXSSFExcelExtractor extends TestCase {
+ protected XSSFExcelExtractor getExtractor(String sampleName) {
return new XSSFExcelExtractor(XSSFTestDataSamples.openSampleWorkbook(sampleName));
}
/**
* Get text out of the simple file
+ * @throws IOException
*/
- public void testGetSimpleText() {
+ public void testGetSimpleText() throws IOException {
// a very simple file
XSSFExcelExtractor extractor = getExtractor("sample.xlsx");
extractor.getText();
CHUNK2 +
"Sheet3\n"
, text);
+
+ extractor.close();
}
- public void testGetComplexText() {
+ public void testGetComplexText() throws IOException {
// A fairly complex file
XSSFExcelExtractor extractor = getExtractor("AverageTaxRates.xlsx");
extractor.getText();
"Avgtxfull\n" +
"null\t(iii) AVERAGE TAX RATES ON ANNUAL"
));
+
+ extractor.close();
}
/**
* Test that we return pretty much the same as
* ExcelExtractor does, when we're both passed
* the same file, just saved as xls and xlsx
+ * @throws IOException
*/
- public void testComparedToOLE2() {
+ public void testComparedToOLE2() throws IOException {
// A fairly simple file - ooxml
XSSFExcelExtractor ooxmlExtractor = getExtractor("SampleSS.xlsx");
Matcher m = pattern.matcher(text);
assertTrue(m.matches());
}
+
+ ole2Extractor.close();
+ ooxmlExtractor.close();
}
/**
* From bug #45540
+ * @throws IOException
*/
- public void testHeaderFooter() {
+ public void testHeaderFooter() throws IOException {
String[] files = new String[] {
"45540_classic_Header.xlsx", "45540_form_Header.xlsx",
"45540_classic_Footer.xlsx", "45540_form_Footer.xlsx",
String text = extractor.getText();
assertTrue("Unable to find expected word in text from " + sampleName + "\n" + text, text.contains("testdoc"));
- assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
+ assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
+
+ extractor.close();
}
}
/**
* From bug #45544
+ * @throws IOException
*/
- public void testComments() {
-
+ public void testComments() throws IOException {
XSSFExcelExtractor extractor = getExtractor("45544.xlsx");
String text = extractor.getText();
text = extractor.getText();
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
+
+ extractor.close();
}
- public void testInlineStrings() {
+ public void testInlineStrings() throws IOException {
XSSFExcelExtractor extractor = getExtractor("InlineStrings.xlsx");
extractor.setFormulasNotResults(true);
String text = extractor.getText();
// Formulas
assertTrue("Unable to find expected word in text\n" + text, text.contains("A2"));
assertTrue("Unable to find expected word in text\n" + text, text.contains("A5-A$2"));
+
+ extractor.close();
}
}
--- /dev/null
+/* ====================================================================
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+==================================================================== */
+
+package org.apache.poi.xssf.extractor;
+
+import org.apache.poi.extractor.ExtractorFactory;
+import org.apache.poi.hssf.HSSFTestDataSamples;
+
+/**
+ * Tests for {@link XSSFExcelExtractor}
+ */
+public final class TestXSSFExcelExtractorUsingFactory extends TestXSSFExcelExtractor {
+ @Override
+ protected final XSSFExcelExtractor getExtractor(String sampleName) {
+ ExtractorFactory.setAllThreadsPreferEventExtractors(false);
+ ExtractorFactory.setThreadPrefersEventExtractors(false);
+ try {
+ return (XSSFExcelExtractor) ExtractorFactory.createExtractor(HSSFTestDataSamples.openSampleFileStream(sampleName));
+ } catch (Exception e) {
+ throw new RuntimeException(e);
+ }
+ }
+}
}
}
assertEquals(3, ps);
+
+ extractor.close();
}
/**
}
}
assertEquals(134, ps);
+
+ extractor.close();
}
public void testGetWithHyperlinks() throws IOException {
"We have a hyperlink <http://poi.apache.org/> here, and another.\n",
extractor.getText()
);
+
+ extractor.close();
}
public void testHeadersFooters() throws IOException {
// Now another file, expect multiple headers
// and multiple footers
doc = XWPFTestDataSamples.openSampleDocument("DiffFirstPageHeadFoot.docx");
+ extractor.close();
+
extractor = new XWPFWordExtractor(doc);
+ extractor.close();
+
extractor =
new XWPFWordExtractor(doc);
extractor.getText();
"Footer Left\tFooter Middle\tFooter Right\n",
extractor.getText()
);
+
+ extractor.close();
}
public void testFootnotes() throws IOException {
String text = extractor.getText();
assertTrue(text.contains("snoska"));
assertTrue(text.contains("Eto ochen prostoy[footnoteRef:1] text so snoskoy"));
+
+ extractor.close();
}
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
assertTrue(extractor.getText().contains("snoska"));
+
+ extractor.close();
}
public void testFormFootnotes() throws IOException {
String text = extractor.getText();
assertTrue("Unable to find expected word in text\n" + text, text.contains("testdoc"));
assertTrue("Unable to find expected word in text\n" + text, text.contains("test phrase"));
+
+ extractor.close();
}
public void testEndnotes() throws IOException {
String text = extractor.getText();
assertTrue(text.contains("XXX"));
assertTrue(text.contains("tilaka [endnoteRef:2]or 'tika'"));
+
+ extractor.close();
}
public void testInsertedDeletedText() throws IOException {
assertTrue(extractor.getText().contains("pendant worn"));
assertTrue(extractor.getText().contains("extremely well"));
+
+ extractor.close();
}
public void testParagraphHeader() throws IOException {
assertTrue(extractor.getText().contains("Section 1"));
assertTrue(extractor.getText().contains("Section 2"));
assertTrue(extractor.getText().contains("Section 3"));
+
+ extractor.close();
}
/**
assertTrue(extractor.getText().contains("2004"));
assertTrue(extractor.getText().contains("2008"));
assertTrue(extractor.getText().contains("(120 "));
+
+ extractor.close();
}
/**
// Now check the first paragraph in total
assertTrue(extractor.getText().contains("a\tb\n"));
+
+ extractor.close();
}
/**
assertTrue(text.length() > 0);
assertFalse(text.contains("AUTHOR"));
assertFalse(text.contains("CREATEDATE"));
+
+ extractor.close();
}
/**
String text = extractor.getText();
assertTrue(text.length() > 0);
assertTrue(text.contains("FldSimple.docx"));
+
+ extractor.close();
}
/**
XWPFWordExtractor extractor = new XWPFWordExtractor(doc);
String text = extractor.getText();
assertTrue(text.length() > 0);
+
+ extractor.close();
}
}
) > -1
);
- assertTrue(
+ assertTrue("Had: " + text + ", but should contain 'nn.nn\\t10.52\\n'",
text.indexOf(
"nn.nn\t10.52\n"
) > -1