import org.apache.poi.util.NotImplemented;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
+import org.apache.poi.util.Removal;
import org.apache.poi.xdgf.extractor.XDGFVisioExtractor;
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
import org.apache.poi.xslf.usermodel.XSLFRelation;
POIOLE2TextExtractor extractor = createExtractor(fs);
extractor.setFilesystem(fs);
return extractor;
-
} catch (OfficeXmlFileException e) {
// ensure file-handle release
IOUtils.closeQuietly(fs);
return createExtractor(OPCPackage.open(f.toString(), PackageAccess.READ));
-
} catch (NotOLE2FileException ne) {
// ensure file-handle release
IOUtils.closeQuietly(fs);
throw new IllegalArgumentException("Your File was neither an OLE2 file, nor an OOXML file");
-
- } catch (OpenXML4JException e) {
- // ensure file-handle release
- IOUtils.closeQuietly(fs);
- throw e;
-
- } catch (XmlException e) {
- // ensure file-handle release
- IOUtils.closeQuietly(fs);
- throw e;
-
- } catch (IOException e) {
- // ensure file-handle release
- IOUtils.closeQuietly(fs);
- throw e;
-
- } catch (RuntimeException e) {
- // ensure file-handle release
- IOUtils.closeQuietly(fs);
- throw e;
- } catch (Error e) {
+ } catch (OpenXML4JException | Error | RuntimeException | IOException | XmlException e) {
// ensure file-handle release
IOUtils.closeQuietly(fs);
throw e;
}
- }
+ }
public static POITextExtractor createExtractor(InputStream inp) throws IOException, OpenXML4JException, XmlException {
InputStream is = FileMagic.prepareToCheckMagic(inp);
throw new IllegalArgumentException("No supported documents found in the OOXML package (found "+contentType+")");
- } catch (IOException e) {
- // ensure that we close the package again if there is an error opening it, however
- // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
- pkg.revert();
- throw e;
- } catch (OpenXML4JException e) {
- // ensure that we close the package again if there is an error opening it, however
- // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
- pkg.revert();
- throw e;
- } catch (XmlException e) {
- // ensure that we close the package again if there is an error opening it, however
- // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
- pkg.revert();
- throw e;
- } catch (RuntimeException e) {
- // ensure that we close the package again if there is an error opening it, however
- // we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
- pkg.revert();
- throw e;
- } catch (Error e) {
+ } catch (IOException | Error | RuntimeException | XmlException | OpenXML4JException e) {
// ensure that we close the package again if there is an error opening it, however
// we need to revert the package to not re-write the file via close(), which is very likely not wanted for a TextExtractor!
pkg.revert();
* If there are no embedded documents, you'll get back an
* empty array. Otherwise, you'll get one open
* {@link POITextExtractor} for each embedded file.
+ *
+ * @deprecated Use the method with correct "embedded"
*/
+ @Deprecated
+ @Removal(version="4.2")
public static POITextExtractor[] getEmbededDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException {
+ return getEmbeddedDocsTextExtractors(ext);
+ }
+
+ /**
+ * Returns an array of text extractors, one for each of
+ * the embedded documents in the file (if there are any).
+ * If there are no embedded documents, you'll get back an
+ * empty array. Otherwise, you'll get one open
+ * {@link POITextExtractor} for each embedded file.
+ */
+ public static POITextExtractor[] getEmbeddedDocsTextExtractors(POIOLE2TextExtractor ext) throws IOException, OpenXML4JException, XmlException {
// All the embedded directories we spotted
ArrayList<Entry> dirs = new ArrayList<>();
// For anything else not directly held in as a POIFS directory
// Ignore, just means it didn't contain
// a format we support as yet
logger.log(POILogger.INFO, "Format not supported yet", e.getLocalizedMessage());
- } catch (XmlException e) {
- throw new IOException(e.getMessage(), e);
- } catch (OpenXML4JException e) {
+ } catch (XmlException | OpenXML4JException e) {
throw new IOException(e.getMessage(), e);
}
}
* If there are no embedded documents, you'll get back an
* empty array. Otherwise, you'll get one open
* {@link POITextExtractor} for each embedded file.
+ *
+ * @deprecated Use the method with correct "embedded"
*/
+ @Deprecated
+ @Removal(version="4.2")
@NotImplemented
- @SuppressWarnings("UnusedParameters")
+ @SuppressWarnings({"UnusedParameters", "UnusedReturnValue"})
public static POITextExtractor[] getEmbededDocsTextExtractors(POIXMLTextExtractor ext) {
+ return getEmbeddedDocsTextExtractors(ext);
+ }
+
+ /**
+ * Returns an array of text extractors, one for each of
+ * the embedded documents in the file (if there are any).
+ * If there are no embedded documents, you'll get back an
+ * empty array. Otherwise, you'll get one open
+ * {@link POITextExtractor} for each embedded file.
+ */
+ @NotImplemented
+ @SuppressWarnings({"UnusedParameters", "UnusedReturnValue"})
+ public static POITextExtractor[] getEmbeddedDocsTextExtractors(POIXMLTextExtractor ext) {
throw new IllegalStateException("Not yet supported");
}
import org.apache.poi.openxml4j.opc.PackageAccess;
import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
-import org.apache.poi.util.IOUtils;
import org.apache.poi.util.POILogFactory;
import org.apache.poi.util.POILogger;
import org.apache.poi.xdgf.extractor.XDGFVisioExtractor;
import org.apache.poi.xslf.extractor.XSLFPowerPointExtractor;
import org.apache.poi.xssf.extractor.XSSFEventBasedExcelExtractor;
import org.apache.poi.xssf.extractor.XSSFExcelExtractor;
-import org.apache.poi.xssf.usermodel.TestMatrixFormulasFromXMLSpreadsheet;
import org.apache.poi.xwpf.extractor.XWPFWordExtractor;
import org.junit.BeforeClass;
import org.junit.Test;
}
/**
- * Test embeded docs text extraction. For now, only
- * does poifs embeded, but will do ooxml ones
+ * Test embedded docs text extraction. For now, only
+ * does poifs embedded, but will do ooxml ones
* at some point.
*/
+ @SuppressWarnings("deprecation")
@Test
- public void testEmbeded() throws Exception {
+ public void testEmbedded() throws Exception {
POIOLE2TextExtractor ext;
POITextExtractor[] embeds;
- // No embedings
+ // No embeddings
ext = (POIOLE2TextExtractor)
ExtractorFactory.createExtractor(xls);
embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
assertEquals(0, embeds.length);
ext.close();
+ // No embeddings
+ ext = (POIOLE2TextExtractor)
+ ExtractorFactory.createExtractor(xls);
+ embeds = ExtractorFactory.getEmbeddedDocsTextExtractors(ext);
+ assertEquals(0, embeds.length);
+ ext.close();
+
// Excel
ext = (POIOLE2TextExtractor)
ExtractorFactory.createExtractor(xlsEmb);
embeds = ExtractorFactory.getEmbededDocsTextExtractors(ext);
+ assertNotNull(embeds);
+
+ // Excel
+ ext = (POIOLE2TextExtractor)
+ ExtractorFactory.createExtractor(xlsEmb);
+ embeds = ExtractorFactory.getEmbeddedDocsTextExtractors(ext);
assertEquals(6, embeds.length);
int numWord = 0, numXls = 0, numPpt = 0, numMsg = 0, numWordX;
}
}
+ @SuppressWarnings("deprecation")
@Test
public void testGetEmbeddedFromXMLExtractor() {
try {
} catch (IllegalStateException e) {
// expected here
}
+
+ try {
+ // currently not implemented
+ ExtractorFactory.getEmbeddedDocsTextExtractors((POIXMLTextExtractor)null);
+ fail("Unsupported currently");
+ } catch (IllegalStateException e) {
+ // expected here
+ }
}
// This bug is currently open. This test will fail with "expected error not thrown" when the bug has been fixed.
// bug 45565: text within TextBoxes is extracted by ExcelExtractor and WordExtractor
@Test(expected=AssertionError.class)
public void test45565() throws Exception {
- POITextExtractor extractor = ExtractorFactory.createExtractor(HSSFTestDataSamples.getSampleFile("45565.xls"));
- try {
+ try (POITextExtractor extractor = ExtractorFactory.createExtractor(HSSFTestDataSamples.getSampleFile("45565.xls"))) {
String text = extractor.getText();
assertContains(text, "testdoc");
assertContains(text, "test phrase");
- } finally {
- extractor.close();
}
}
}