aboutsummaryrefslogtreecommitdiffstats
path: root/src
diff options
context:
space:
mode:
authorNick Burch <nick@apache.org>2016-07-11 22:47:02 +0000
committerNick Burch <nick@apache.org>2016-07-11 22:47:02 +0000
commitef2af2d53d3ee229a67f84764c242bf1f82da69b (patch)
treeebfc25ac869aa318cf710666a7c18046665fc8f6 /src
parenta5f19ab07f29493e6111e8afecf74abbd6b580c9 (diff)
downloadpoi-ef2af2d53d3ee229a67f84764c242bf1f82da69b.tar.gz
poi-ef2af2d53d3ee229a67f84764c242bf1f82da69b.zip
Start moving logic over into the main and scratchpad jars for OLE2
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1752226 13f79535-47bb-0310-9956-ffa450edef68
Diffstat (limited to 'src')
-rw-r--r--src/java/org/apache/poi/extractor/OLE2ExtractorFactory.java69
-rw-r--r--src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java92
-rw-r--r--src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java6
-rw-r--r--src/scratchpad/src/org/apache/poi/extractor/OLE2ScrachpadExtractorFactory.java4
4 files changed, 79 insertions, 92 deletions
diff --git a/src/java/org/apache/poi/extractor/OLE2ExtractorFactory.java b/src/java/org/apache/poi/extractor/OLE2ExtractorFactory.java
index 0db450aaf0..9facd39797 100644
--- a/src/java/org/apache/poi/extractor/OLE2ExtractorFactory.java
+++ b/src/java/org/apache/poi/extractor/OLE2ExtractorFactory.java
@@ -20,8 +20,10 @@ import static org.apache.poi.hssf.model.InternalWorkbook.WORKBOOK_DIR_ENTRY_NAME
import java.io.IOException;
import java.io.InputStream;
+import java.lang.reflect.Method;
import java.util.ArrayList;
import java.util.Iterator;
+import java.util.List;
import org.apache.poi.POIOLE2TextExtractor;
import org.apache.poi.POITextExtractor;
@@ -33,6 +35,8 @@ import org.apache.poi.poifs.filesystem.Entry;
import org.apache.poi.poifs.filesystem.NPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.OPOIFSFileSystem;
import org.apache.poi.poifs.filesystem.POIFSFileSystem;
+import org.apache.poi.util.POILogFactory;
+import org.apache.poi.util.POILogger;
/**
* Figures out the correct POIOLE2TextExtractor for your supplied
@@ -48,6 +52,8 @@ import org.apache.poi.poifs.filesystem.POIFSFileSystem;
*/
@SuppressWarnings("WeakerAccess")
public class OLE2ExtractorFactory {
+ private static final POILogger LOGGER = POILogFactory.getLogger(OLE2ExtractorFactory.class);
+
/** Should this thread prefer event based over usermodel based extractors? */
private static final ThreadLocal<Boolean> threadPreferEventExtractors = new ThreadLocal<Boolean>() {
@Override
@@ -115,11 +121,38 @@ public class OLE2ExtractorFactory {
return (POIOLE2TextExtractor)createExtractor(fs.getRoot());
}
- public static POITextExtractor createExtractor(InputStream input) {
- // TODO Something nasty with reflection...
- return null;
+ public static POITextExtractor createExtractor(InputStream input) throws IOException {
+ Class<?> cls = getOOXMLClass();
+ if (cls != null) {
+ // TODO Reflection
+ throw new IllegalArgumentException("TODO Reflection");
+ } else {
+ // Best hope it's OLE2....
+ return createExtractor(new NPOIFSFileSystem(input));
+ }
}
+ private static Class<?> getOOXMLClass() {
+ try {
+ return OLE2ExtractorFactory.class.getClassLoader().loadClass(
+ "org.apache.poi.extractor.ExtractorFactory"
+ );
+ } catch (ClassNotFoundException e) {
+ LOGGER.log(POILogger.WARN, "POI OOXML jar missing");
+ return null;
+ }
+ }
+ private static Class<?> getScratchpadClass() {
+ try {
+ return OLE2ExtractorFactory.class.getClassLoader().loadClass(
+ "org.apache.poi.extractor.OLE2ScrachpadExtractorFactory"
+ );
+ } catch (ClassNotFoundException e) {
+ LOGGER.log(POILogger.ERROR, "POI Scratchpad jar missing");
+ throw new IllegalStateException("POI Scratchpad jar missing, required for ExtractorFactory");
+ }
+ }
+
/**
* Create the Extractor, if possible. Generally needs the Scratchpad jar.
* Note that this won't check for embedded OOXML resources either, use
@@ -138,8 +171,16 @@ public class OLE2ExtractorFactory {
return new ExcelExtractor(poifsDir);
}
}
-
- // TODO Try to ask the Scratchpad
+
+ // Ask Scratchpad, or fail trying
+ Class<?> cls = getScratchpadClass();
+ try {
+ Method m = cls.getDeclaredMethod("createExtractor", DirectoryNode.class);
+ POITextExtractor ext = (POITextExtractor)m.invoke(null, poifsDir);
+ if (ext != null) return ext;
+ } catch (Exception e) {
+ throw new IllegalArgumentException("Error creating Scratchpad Extractor", e);
+ }
throw new IllegalArgumentException("No supported documents found in the OLE2 stream");
}
@@ -155,9 +196,9 @@ public class OLE2ExtractorFactory {
throws IOException
{
// All the embedded directories we spotted
- ArrayList<Entry> dirs = new ArrayList<Entry>();
+ List<Entry> dirs = new ArrayList<Entry>();
// For anything else not directly held in as a POIFS directory
- ArrayList<InputStream> nonPOIFS = new ArrayList<InputStream>();
+ List<InputStream> nonPOIFS = new ArrayList<InputStream>();
// Find all the embedded directories
DirectoryEntry root = ext.getRoot();
@@ -175,7 +216,15 @@ public class OLE2ExtractorFactory {
}
}
} else {
- // TODO Ask scratchpad
+ // Ask Scratchpad, or fail trying
+ Class<?> cls = getScratchpadClass();
+ try {
+ Method m = cls.getDeclaredMethod(
+ "identifyEmbeddedResources", POIOLE2TextExtractor.class, List.class, List.class);
+ m.invoke(null, ext, dirs, nonPOIFS);
+ } catch (Exception e) {
+ throw new IllegalArgumentException("Error checking for Scratchpad embedded resources", e);
+ }
}
// Create the extractors
@@ -195,10 +244,10 @@ public class OLE2ExtractorFactory {
} catch (IllegalArgumentException ie) {
// Ignore, just means it didn't contain
// a format we support as yet
- // TODO Should we log this?
+ LOGGER.log(POILogger.WARN, ie);
} catch (Exception xe) {
// Ignore, invalid format
- // TODO Should we log this?
+ LOGGER.log(POILogger.WARN, xe);
}
}
return e.toArray(new POITextExtractor[e.size()]);
diff --git a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
index 4ba8d8f2f8..830a4d82d8 100644
--- a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
+++ b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java
@@ -78,23 +78,13 @@ public class ExtractorFactory {
protected static final String VISIO_DOCUMENT_REL = PackageRelationshipTypes.VISIO_CORE_DOCUMENT;
protected static final String STRICT_DOCUMENT_REL = PackageRelationshipTypes.STRICT_CORE_DOCUMENT;
-
- /** Should this thread prefer event based over usermodel based extractors? */
- private static final ThreadLocal<Boolean> threadPreferEventExtractors = new ThreadLocal<Boolean>() {
- @Override
- protected Boolean initialValue() { return Boolean.FALSE; }
- };
-
- /** Should all threads prefer event based over usermodel based extractors? */
- private static Boolean allPreferEventExtractors;
-
/**
* Should this thread prefer event based over usermodel based extractors?
* (usermodel extractors tend to be more accurate, but use more memory)
* Default is false.
*/
public static boolean getThreadPrefersEventExtractors() {
- return threadPreferEventExtractors.get();
+ return OLE2ExtractorFactory.getThreadPrefersEventExtractors();
}
/**
@@ -103,7 +93,7 @@ public class ExtractorFactory {
* Default is to use the thread level setting, which defaults to false.
*/
public static Boolean getAllThreadsPreferEventExtractors() {
- return allPreferEventExtractors;
+ return OLE2ExtractorFactory.getAllThreadsPreferEventExtractors();
}
/**
@@ -111,7 +101,7 @@ public class ExtractorFactory {
* Will only be used if the All Threads setting is null.
*/
public static void setThreadPrefersEventExtractors(boolean preferEventExtractors) {
- threadPreferEventExtractors.set(preferEventExtractors);
+ OLE2ExtractorFactory.setThreadPrefersEventExtractors(preferEventExtractors);
}
/**
@@ -119,7 +109,7 @@ public class ExtractorFactory {
* If set, will take preference over the Thread level setting.
*/
public static void setAllThreadsPreferEventExtractors(Boolean preferEventExtractors) {
- allPreferEventExtractors = preferEventExtractors;
+ OLE2ExtractorFactory.setAllThreadsPreferEventExtractors(preferEventExtractors);
}
/**
@@ -127,10 +117,7 @@ public class ExtractorFactory {
* Checks the all-threads one first, then thread specific.
*/
protected static boolean getPreferEventExtractor() {
- if(allPreferEventExtractors != null) {
- return allPreferEventExtractors;
- }
- return threadPreferEventExtractors.get();
+ return OLE2ExtractorFactory.getPreferEventExtractor();
}
public static POITextExtractor createExtractor(File f) throws IOException, InvalidFormatException, OpenXML4JException, XmlException {
@@ -281,83 +268,28 @@ public class ExtractorFactory {
}
public static POIOLE2TextExtractor createExtractor(POIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
- // Only ever an OLE2 one from the root of the FS
- return (POIOLE2TextExtractor)createExtractor(fs.getRoot());
+ return OLE2ExtractorFactory.createExtractor(fs);
}
public static POIOLE2TextExtractor createExtractor(NPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
- // Only ever an OLE2 one from the root of the FS
- return (POIOLE2TextExtractor)createExtractor(fs.getRoot());
+ return OLE2ExtractorFactory.createExtractor(fs);
}
public static POIOLE2TextExtractor createExtractor(OPOIFSFileSystem fs) throws IOException, OpenXML4JException, XmlException {
- // Only ever an OLE2 one from the root of the FS
- return (POIOLE2TextExtractor)createExtractor(fs.getRoot());
+ return OLE2ExtractorFactory.createExtractor(fs);
}
public static POITextExtractor createExtractor(DirectoryNode poifsDir) throws IOException,
OpenXML4JException, XmlException
{
- // Look for certain entries in the stream, to figure it
- // out from
- for (String workbookName : WORKBOOK_DIR_ENTRY_NAMES) {
- if (poifsDir.hasEntry(workbookName)) {
- if (getPreferEventExtractor()) {
- return new EventBasedExcelExtractor(poifsDir);
- }
- return new ExcelExtractor(poifsDir);
- }
- }
- if (poifsDir.hasEntry(OLD_WORKBOOK_DIR_ENTRY_NAME)) {
- throw new OldExcelFormatException("Old Excel Spreadsheet format (1-95) "
- + "found. Please call OldExcelExtractor directly for basic text extraction");
- }
-
- if (poifsDir.hasEntry("WordDocument")) {
- // Old or new style word document?
- try {
- return new WordExtractor(poifsDir);
- } catch (OldWordFileFormatException e) {
- return new Word6Extractor(poifsDir);
- }
- }
-
- if (poifsDir.hasEntry("PowerPoint Document")) {
- return new PowerPointExtractor(poifsDir);
- }
-
- if (poifsDir.hasEntry("VisioDocument")) {
- return new VisioTextExtractor(poifsDir);
- }
-
- if (poifsDir.hasEntry("Quill")) {
- return new PublisherTextExtractor(poifsDir);
- }
-
- final String[] outlookEntryNames = new String[] {
- // message bodies, saved as plain text (PtypString)
- // The first short (0x1000, 0x0047, 0x0037) refer to the Property ID (see [MS-OXPROPS].pdf)
- // the second short (0x001e, 0x001f, 0x0102) refer to the type of data stored in this entry
- // https://msdn.microsoft.com/endatatypes.Ex-us/library/cc433490(v=exchg.80).aspx
- // @see org.apache.poi.hsmf.Types.MAPIType
- "__substg1.0_1000001E", //PidTagBody ASCII
- "__substg1.0_1000001F", //PidTagBody Unicode
- "__substg1.0_0047001E", //PidTagMessageSubmissionId ASCII
- "__substg1.0_0047001F", //PidTagMessageSubmissionId Unicode
- "__substg1.0_0037001E", //PidTagSubject ASCII
- "__substg1.0_0037001F", //PidTagSubject Unicode
- };
- for (String entryName : outlookEntryNames) {
- if (poifsDir.hasEntry(entryName)) {
- return new OutlookTextExtactor(poifsDir);
- }
- }
-
+ // First, check for OOXML
for (String entryName : poifsDir.getEntryNames()) {
if (entryName.equals("Package")) {
OPCPackage pkg = OPCPackage.open(poifsDir.createDocumentInputStream("Package"));
return createExtractor(pkg);
}
}
- throw new IllegalArgumentException("No supported documents found in the OLE2 stream");
+
+ // If not, ask the OLE2 code to check, with Scratchpad if possible
+ return OLE2ExtractorFactory.createExtractor(poifsDir);
}
/**
diff --git a/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java b/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
index 467bb31fdf..0aa022332e 100644
--- a/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
+++ b/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java
@@ -150,6 +150,7 @@ public class TestExtractorFactory {
POITextExtractor extractor = ExtractorFactory.createExtractor(xlsx);
assertTrue(
+ extractor.getClass().getName(),
extractor
instanceof XSSFExcelExtractor
);
@@ -163,6 +164,7 @@ public class TestExtractorFactory {
extractor = ExtractorFactory.createExtractor(xltx);
assertTrue(
+ extractor.getClass().getName(),
extractor
instanceof XSSFExcelExtractor
);
@@ -340,6 +342,7 @@ public class TestExtractorFactory {
extractor = ExtractorFactory.createExtractor(new FileInputStream(xlsx));
assertTrue(
+ extractor.getClass().getName(),
extractor
instanceof XSSFExcelExtractor
);
@@ -359,6 +362,7 @@ public class TestExtractorFactory {
// Word
extractor = ExtractorFactory.createExtractor(new FileInputStream(doc));
assertTrue(
+ extractor.getClass().getName(),
extractor
instanceof WordExtractor
);
@@ -369,6 +373,7 @@ public class TestExtractorFactory {
extractor = ExtractorFactory.createExtractor(new FileInputStream(doc6));
assertTrue(
+ extractor.getClass().getName(),
extractor
instanceof Word6Extractor
);
@@ -379,6 +384,7 @@ public class TestExtractorFactory {
extractor = ExtractorFactory.createExtractor(new FileInputStream(doc95));
assertTrue(
+ extractor.getClass().getName(),
extractor
instanceof Word6Extractor
);
diff --git a/src/scratchpad/src/org/apache/poi/extractor/OLE2ScrachpadExtractorFactory.java b/src/scratchpad/src/org/apache/poi/extractor/OLE2ScrachpadExtractorFactory.java
index 90ede62367..b1e52f4d75 100644
--- a/src/scratchpad/src/org/apache/poi/extractor/OLE2ScrachpadExtractorFactory.java
+++ b/src/scratchpad/src/org/apache/poi/extractor/OLE2ScrachpadExtractorFactory.java
@@ -20,8 +20,8 @@ import java.io.ByteArrayInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
-import java.util.ArrayList;
import java.util.Iterator;
+import java.util.List;
import org.apache.poi.POIOLE2TextExtractor;
import org.apache.poi.POITextExtractor;
@@ -108,7 +108,7 @@ public class OLE2ScrachpadExtractorFactory {
* empty array. Otherwise, you'll get one open
* {@link POITextExtractor} for each embedded file.
*/
- public static void identifyEmbeddedResources(POIOLE2TextExtractor ext, ArrayList<Entry> dirs, ArrayList<InputStream> nonPOIFS) throws IOException {
+ public static void identifyEmbeddedResources(POIOLE2TextExtractor ext, List<Entry> dirs, List<InputStream> nonPOIFS) throws IOException {
// Find all the embedded directories
DirectoryEntry root = ext.getRoot();
if(root == null) {