aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDominik Stadler <centic@apache.org>2023-12-06 19:50:02 +0000
committerDominik Stadler <centic@apache.org>2023-12-06 19:50:02 +0000
commitc8c8130ae352d8647b03faba492732883471e8bf (patch)
tree20bbdb3c3c1069fc73874dced418758f2568a8e4
parent497482d4dc8d9b2eed05eb31ff366a09a610e8b6 (diff)
downloadpoi-c8c8130ae352d8647b03faba492732883471e8bf.tar.gz
poi-c8c8130ae352d8647b03faba492732883471e8bf.zip
Make sure OLE2ScratchpadExtractorFacory is sorted first
Otherwise order of found extractors would depend on jar-loading order and thus might have unexpected side-effects and missing features in text-extraction. git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1914407 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--poi/src/main/java/org/apache/poi/extractor/ExtractorFactory.java15
-rw-r--r--poi/src/main/java/org/apache/poi/extractor/ExtractorProvider.java3
2 files changed, 17 insertions, 1 deletions
diff --git a/poi/src/main/java/org/apache/poi/extractor/ExtractorFactory.java b/poi/src/main/java/org/apache/poi/extractor/ExtractorFactory.java
index f784842274..ea3ee82f9a 100644
--- a/poi/src/main/java/org/apache/poi/extractor/ExtractorFactory.java
+++ b/poi/src/main/java/org/apache/poi/extractor/ExtractorFactory.java
@@ -88,6 +88,21 @@ public final class ExtractorFactory {
private ExtractorFactory() {
ClassLoader cl = ExtractorFactory.class.getClassLoader();
ServiceLoader.load(ExtractorProvider.class, cl).forEach(provider::add);
+
+ // loading of service-files is non-deterministic as it depends on order of loaded jars
+ // however we would like to "prefer" one Factory, so let's make sure the more
+ // powerful "ScratchpadProvider" is sorted first
+ provider.sort((o1, o2) -> {
+ if (o1.getClass() != o2.getClass()) {
+ if (o1.getClass().getSimpleName().equals("OLE2ScratchpadExtractorFactory")) {
+ return -1;
+ } else if (o2.getClass().getSimpleName().equals("OLE2ScratchpadExtractorFactory")) {
+ return 1;
+ }
+ }
+
+ return o1.getClass().getName().compareTo(o2.getClass().getName());
+ });
}
/**
diff --git a/poi/src/main/java/org/apache/poi/extractor/ExtractorProvider.java b/poi/src/main/java/org/apache/poi/extractor/ExtractorProvider.java
index 5669cd9fe7..777cd38709 100644
--- a/poi/src/main/java/org/apache/poi/extractor/ExtractorProvider.java
+++ b/poi/src/main/java/org/apache/poi/extractor/ExtractorProvider.java
@@ -70,7 +70,8 @@ public interface ExtractorProvider {
* @param dirs a list to be filled with directory references holding embedded
* @param nonPOIFS a list to be filled with streams which aren't based on POIFS entries
*
- * @throws IOException when the format specific extraction fails because of invalid entires
+ * @throws IOException when the format specific extraction fails because of invalid entries
+ * @throws java.lang.IllegalArgumentException if implementations do not overwrite this method
*/
default void identifyEmbeddedResources(POIOLE2TextExtractor ext, List<Entry> dirs, List<InputStream> nonPOIFS) throws IOException {
throw new IllegalArgumentException("Error checking for Scratchpad embedded resources");