From c8c8130ae352d8647b03faba492732883471e8bf Mon Sep 17 00:00:00 2001 From: Dominik Stadler Date: Wed, 6 Dec 2023 19:50:02 +0000 Subject: [PATCH] Make sure OLE2ScratchpadExtractorFacory is sorted first Otherwise order of found extractors would depend on jar-loading order and thus might have unexpected side-effects and missing features in text-extraction. git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1914407 13f79535-47bb-0310-9956-ffa450edef68 --- .../apache/poi/extractor/ExtractorFactory.java | 15 +++++++++++++++ .../apache/poi/extractor/ExtractorProvider.java | 3 ++- 2 files changed, 17 insertions(+), 1 deletion(-) diff --git a/poi/src/main/java/org/apache/poi/extractor/ExtractorFactory.java b/poi/src/main/java/org/apache/poi/extractor/ExtractorFactory.java index f784842274..ea3ee82f9a 100644 --- a/poi/src/main/java/org/apache/poi/extractor/ExtractorFactory.java +++ b/poi/src/main/java/org/apache/poi/extractor/ExtractorFactory.java @@ -88,6 +88,21 @@ public final class ExtractorFactory { private ExtractorFactory() { ClassLoader cl = ExtractorFactory.class.getClassLoader(); ServiceLoader.load(ExtractorProvider.class, cl).forEach(provider::add); + + // loading of service-files is non-deterministic as it depends on order of loaded jars + // however we would like to "prefer" one Factory, so let's make sure the more + // powerful "ScratchpadProvider" is sorted first + provider.sort((o1, o2) -> { + if (o1.getClass() != o2.getClass()) { + if (o1.getClass().getSimpleName().equals("OLE2ScratchpadExtractorFactory")) { + return -1; + } else if (o2.getClass().getSimpleName().equals("OLE2ScratchpadExtractorFactory")) { + return 1; + } + } + + return o1.getClass().getName().compareTo(o2.getClass().getName()); + }); } /** diff --git a/poi/src/main/java/org/apache/poi/extractor/ExtractorProvider.java b/poi/src/main/java/org/apache/poi/extractor/ExtractorProvider.java index 5669cd9fe7..777cd38709 100644 --- a/poi/src/main/java/org/apache/poi/extractor/ExtractorProvider.java +++ b/poi/src/main/java/org/apache/poi/extractor/ExtractorProvider.java @@ -70,7 +70,8 @@ public interface ExtractorProvider { * @param dirs a list to be filled with directory references holding embedded * @param nonPOIFS a list to be filled with streams which aren't based on POIFS entries * - * @throws IOException when the format specific extraction fails because of invalid entires + * @throws IOException when the format specific extraction fails because of invalid entries + * @throws java.lang.IllegalArgumentException if implementations do not overwrite this method */ default void identifyEmbeddedResources(POIOLE2TextExtractor ext, List dirs, List nonPOIFS) throws IOException { throw new IllegalArgumentException("Error checking for Scratchpad embedded resources"); -- 2.39.5