From: Nick Burch Date: Wed, 11 Mar 2015 16:17:41 +0000 (+0000) Subject: Give a more helpful exception if a Visio VSDX ooxml file is passed to ExtractorFactory X-Git-Tag: REL_3_12_FINAL~90 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=47a2847cbe99eab98b5bb6673f925eef810edfcc;p=poi.git Give a more helpful exception if a Visio VSDX ooxml file is passed to ExtractorFactory git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1665929 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java index 46cd2cd386..0bc7eb31db 100644 --- a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java +++ b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java @@ -68,6 +68,8 @@ import org.apache.xmlbeans.XmlException; public class ExtractorFactory { public static final String CORE_DOCUMENT_REL = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument"; + public static final String VISIO_DOCUMENT_REL = + "http://schemas.microsoft.com/visio/2010/relationships/document"; /** Should this thread prefer event based over usermodel based extractors? */ @@ -158,12 +160,25 @@ public class ExtractorFactory { } public static POIXMLTextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException { + // Check for the normal Office core document PackageRelationshipCollection core = pkg.getRelationshipsByType(CORE_DOCUMENT_REL); - if(core.size() != 1) { - throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size()); + + // If nothing was found, try some of the other OOXML-based core types + if (core.size() == 0) { + // Could it be a visio one? + PackageRelationshipCollection visio = + pkg.getRelationshipsByType(VISIO_DOCUMENT_REL); + if (visio.size() == 1) { + throw new IllegalArgumentException("Text extraction not supported for Visio OOXML files"); + } + } + // Should just be a single core document, complain if not + if (core.size() != 1) { + throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size()); } + // Grab the core document part, and try to identify from that PackagePart corePart = pkg.getPart(core.getRelationship(0)); // Is it XSSF? diff --git a/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java b/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java index 7f79afaad3..35198a80ea 100644 --- a/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java +++ b/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java @@ -71,6 +71,7 @@ public class TestExtractorFactory extends TestCase { private File msgEmbMsg; private File vsd; + private File vsdx; private File pub; @@ -109,6 +110,7 @@ public class TestExtractorFactory extends TestCase { POIDataSamples dgTests = POIDataSamples.getDiagramInstance(); vsd = getFileAndCheck(dgTests, "Test_Visio-Some_Random_Text.vsd"); + vsdx = getFileAndCheck(dgTests, "test.vsdx"); POIDataSamples pubTests = POIDataSamples.getPublisherInstance(); pub = getFileAndCheck(pubTests, "Simple.pub"); @@ -230,7 +232,7 @@ public class TestExtractorFactory extends TestCase { ); extractor.close(); - // Visio + // Visio - binary assertTrue( ExtractorFactory.createExtractor(vsd) instanceof VisioTextExtractor @@ -238,6 +240,13 @@ public class TestExtractorFactory extends TestCase { assertTrue( ExtractorFactory.createExtractor(vsd).getText().length() > 50 ); + // Visio - vsdx + try { + ExtractorFactory.createExtractor(vsdx); + fail(); + } catch(IllegalArgumentException e) { + // Good + } // Publisher assertTrue( @@ -342,6 +351,13 @@ public class TestExtractorFactory extends TestCase { assertTrue( ExtractorFactory.createExtractor(new FileInputStream(vsd)).getText().length() > 50 ); + // Visio - vsdx + try { + ExtractorFactory.createExtractor(new FileInputStream(vsdx)); + fail(); + } catch(IllegalArgumentException e) { + // Good + } // Publisher assertTrue( diff --git a/test-data/diagram/test.vsdx b/test-data/diagram/test.vsdx new file mode 100644 index 0000000000..1fa690356e Binary files /dev/null and b/test-data/diagram/test.vsdx differ