From: Dominik Stadler Date: Wed, 6 Dec 2023 19:50:02 +0000 (+0000) Subject: Make sure OLE2ScratchpadExtractorFacory is sorted first X-Git-Tag: REL_5_3_0~156 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=c8c8130ae352d8647b03faba492732883471e8bf;p=poi.git Make sure OLE2ScratchpadExtractorFacory is sorted first Otherwise order of found extractors would depend on jar-loading order and thus might have unexpected side-effects and missing features in text-extraction. git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1914407 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/poi/src/main/java/org/apache/poi/extractor/ExtractorFactory.java b/poi/src/main/java/org/apache/poi/extractor/ExtractorFactory.java index f784842274..ea3ee82f9a 100644 --- a/poi/src/main/java/org/apache/poi/extractor/ExtractorFactory.java +++ b/poi/src/main/java/org/apache/poi/extractor/ExtractorFactory.java @@ -88,6 +88,21 @@ public final class ExtractorFactory { private ExtractorFactory() { ClassLoader cl = ExtractorFactory.class.getClassLoader(); ServiceLoader.load(ExtractorProvider.class, cl).forEach(provider::add); + + // loading of service-files is non-deterministic as it depends on order of loaded jars + // however we would like to "prefer" one Factory, so let's make sure the more + // powerful "ScratchpadProvider" is sorted first + provider.sort((o1, o2) -> { + if (o1.getClass() != o2.getClass()) { + if (o1.getClass().getSimpleName().equals("OLE2ScratchpadExtractorFactory")) { + return -1; + } else if (o2.getClass().getSimpleName().equals("OLE2ScratchpadExtractorFactory")) { + return 1; + } + } + + return o1.getClass().getName().compareTo(o2.getClass().getName()); + }); } /** diff --git a/poi/src/main/java/org/apache/poi/extractor/ExtractorProvider.java b/poi/src/main/java/org/apache/poi/extractor/ExtractorProvider.java index 5669cd9fe7..777cd38709 100644 --- a/poi/src/main/java/org/apache/poi/extractor/ExtractorProvider.java +++ b/poi/src/main/java/org/apache/poi/extractor/ExtractorProvider.java @@ -70,7 +70,8 @@ public interface ExtractorProvider { * @param dirs a list to be filled with directory references holding embedded * @param nonPOIFS a list to be filled with streams which aren't based on POIFS entries * - * @throws IOException when the format specific extraction fails because of invalid entires + * @throws IOException when the format specific extraction fails because of invalid entries + * @throws java.lang.IllegalArgumentException if implementations do not overwrite this method */ default void identifyEmbeddedResources(POIOLE2TextExtractor ext, List dirs, List nonPOIFS) throws IOException { throw new IllegalArgumentException("Error checking for Scratchpad embedded resources");