git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1665929 13f79535-47bb-0310-9956-ffa450edef68tags/REL_3_12_FINAL
@@ -68,6 +68,8 @@ import org.apache.xmlbeans.XmlException; | |||
public class ExtractorFactory { | |||
public static final String CORE_DOCUMENT_REL = | |||
"http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument"; | |||
public static final String VISIO_DOCUMENT_REL = | |||
"http://schemas.microsoft.com/visio/2010/relationships/document"; | |||
/** Should this thread prefer event based over usermodel based extractors? */ | |||
@@ -158,12 +160,25 @@ public class ExtractorFactory { | |||
} | |||
public static POIXMLTextExtractor createExtractor(OPCPackage pkg) throws IOException, OpenXML4JException, XmlException { | |||
// Check for the normal Office core document | |||
PackageRelationshipCollection core = | |||
pkg.getRelationshipsByType(CORE_DOCUMENT_REL); | |||
if(core.size() != 1) { | |||
throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size()); | |||
// If nothing was found, try some of the other OOXML-based core types | |||
if (core.size() == 0) { | |||
// Could it be a visio one? | |||
PackageRelationshipCollection visio = | |||
pkg.getRelationshipsByType(VISIO_DOCUMENT_REL); | |||
if (visio.size() == 1) { | |||
throw new IllegalArgumentException("Text extraction not supported for Visio OOXML files"); | |||
} | |||
} | |||
// Should just be a single core document, complain if not | |||
if (core.size() != 1) { | |||
throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size()); | |||
} | |||
// Grab the core document part, and try to identify from that | |||
PackagePart corePart = pkg.getPart(core.getRelationship(0)); | |||
// Is it XSSF? |
@@ -71,6 +71,7 @@ public class TestExtractorFactory extends TestCase { | |||
private File msgEmbMsg; | |||
private File vsd; | |||
private File vsdx; | |||
private File pub; | |||
@@ -109,6 +110,7 @@ public class TestExtractorFactory extends TestCase { | |||
POIDataSamples dgTests = POIDataSamples.getDiagramInstance(); | |||
vsd = getFileAndCheck(dgTests, "Test_Visio-Some_Random_Text.vsd"); | |||
vsdx = getFileAndCheck(dgTests, "test.vsdx"); | |||
POIDataSamples pubTests = POIDataSamples.getPublisherInstance(); | |||
pub = getFileAndCheck(pubTests, "Simple.pub"); | |||
@@ -230,7 +232,7 @@ public class TestExtractorFactory extends TestCase { | |||
); | |||
extractor.close(); | |||
// Visio | |||
// Visio - binary | |||
assertTrue( | |||
ExtractorFactory.createExtractor(vsd) | |||
instanceof VisioTextExtractor | |||
@@ -238,6 +240,13 @@ public class TestExtractorFactory extends TestCase { | |||
assertTrue( | |||
ExtractorFactory.createExtractor(vsd).getText().length() > 50 | |||
); | |||
// Visio - vsdx | |||
try { | |||
ExtractorFactory.createExtractor(vsdx); | |||
fail(); | |||
} catch(IllegalArgumentException e) { | |||
// Good | |||
} | |||
// Publisher | |||
assertTrue( | |||
@@ -342,6 +351,13 @@ public class TestExtractorFactory extends TestCase { | |||
assertTrue( | |||
ExtractorFactory.createExtractor(new FileInputStream(vsd)).getText().length() > 50 | |||
); | |||
// Visio - vsdx | |||
try { | |||
ExtractorFactory.createExtractor(new FileInputStream(vsdx)); | |||
fail(); | |||
} catch(IllegalArgumentException e) { | |||
// Good | |||
} | |||
// Publisher | |||
assertTrue( |