From: Nick Burch Date: Fri, 13 Mar 2015 17:39:47 +0000 (+0000) Subject: Detect OOXML-strict, and give more helpful exceptions for them X-Git-Tag: REL_3_12_FINAL~75 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=0227765619298a12ed382d4f3ef999c18b8b422e;p=poi.git Detect OOXML-strict, and give more helpful exceptions for them git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1666525 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/ooxml/java/org/apache/poi/POIXMLDocumentPart.java b/src/ooxml/java/org/apache/poi/POIXMLDocumentPart.java index 4759ddbd3b..f051eb32af 100644 --- a/src/ooxml/java/org/apache/poi/POIXMLDocumentPart.java +++ b/src/ooxml/java/org/apache/poi/POIXMLDocumentPart.java @@ -94,6 +94,15 @@ public class POIXMLDocumentPart { */ public POIXMLDocumentPart(OPCPackage pkg) { PackageRelationship coreRel = pkg.getRelationshipsByType(PackageRelationshipTypes.CORE_DOCUMENT).getRelationship(0); + if (coreRel == null) { + coreRel = pkg.getRelationshipsByType(PackageRelationshipTypes.STRICT_CORE_DOCUMENT).getRelationship(0); + if (coreRel != null) { + throw new POIXMLException("Strict OOXML isn't currently supported, please see bug #57699"); + } + } + if (coreRel == null) { + throw new POIXMLException("OOXML file structure broken/invalid - no core document found!"); + } this.packagePart = pkg.getPart(coreRel); this.packageRel = coreRel; diff --git a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java index 0bc7eb31db..b484e2ed06 100644 --- a/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java +++ b/src/ooxml/java/org/apache/poi/extractor/ExtractorFactory.java @@ -47,6 +47,7 @@ import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.openxml4j.opc.PackageAccess; import org.apache.poi.openxml4j.opc.PackagePart; import org.apache.poi.openxml4j.opc.PackageRelationshipCollection; +import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; import org.apache.poi.poifs.filesystem.DirectoryEntry; import org.apache.poi.poifs.filesystem.DirectoryNode; import org.apache.poi.poifs.filesystem.Entry; @@ -66,10 +67,9 @@ import org.apache.xmlbeans.XmlException; * document, and returns it. */ public class ExtractorFactory { - public static final String CORE_DOCUMENT_REL = - "http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument"; - public static final String VISIO_DOCUMENT_REL = - "http://schemas.microsoft.com/visio/2010/relationships/document"; + public static final String CORE_DOCUMENT_REL = PackageRelationshipTypes.CORE_DOCUMENT; + protected static final String VISIO_DOCUMENT_REL = PackageRelationshipTypes.VISIO_CORE_DOCUMENT; + protected static final String STRICT_DOCUMENT_REL = PackageRelationshipTypes.STRICT_CORE_DOCUMENT; /** Should this thread prefer event based over usermodel based extractors? */ @@ -165,6 +165,10 @@ public class ExtractorFactory { pkg.getRelationshipsByType(CORE_DOCUMENT_REL); // If nothing was found, try some of the other OOXML-based core types + if (core.size() == 0) { + // Could it be an OOXML-Strict one? + core = pkg.getRelationshipsByType(STRICT_DOCUMENT_REL); + } if (core.size() == 0) { // Could it be a visio one? PackageRelationshipCollection visio = @@ -173,6 +177,7 @@ public class ExtractorFactory { throw new IllegalArgumentException("Text extraction not supported for Visio OOXML files"); } } + // Should just be a single core document, complain if not if (core.size() != 1) { throw new IllegalArgumentException("Invalid OOXML Package received - expected 1 core document, found " + core.size()); diff --git a/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java b/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java index 003c0eccf5..a8644f4202 100644 --- a/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java +++ b/src/ooxml/testcases/org/apache/poi/extractor/TestExtractorFactory.java @@ -25,6 +25,7 @@ import junit.framework.TestCase; import org.apache.poi.POIDataSamples; import org.apache.poi.POIOLE2TextExtractor; import org.apache.poi.POITextExtractor; +import org.apache.poi.POIXMLException; import org.apache.poi.POIXMLTextExtractor; import org.apache.poi.hdgf.extractor.VisioTextExtractor; import org.apache.poi.hpbf.extractor.PublisherTextExtractor; @@ -162,6 +163,12 @@ public class TestExtractorFactory extends TestCase { extractor.close(); // TODO Support OOXML-Strict, see bug #57699 + try { + extractor = ExtractorFactory.createExtractor(xlsxStrict); + fail("OOXML-Strict isn't yet supported"); + } catch (POIXMLException e) { + // Expected, for now + } // extractor = ExtractorFactory.createExtractor(xlsxStrict); // assertTrue( // extractor @@ -307,6 +314,14 @@ public class TestExtractorFactory extends TestCase { assertTrue( ExtractorFactory.createExtractor(new FileInputStream(xlsx)).getText().length() > 200 ); + // TODO Support OOXML-Strict, see bug #57699 +// assertTrue( +// ExtractorFactory.createExtractor(new FileInputStream(xlsxStrict)) +// instanceof XSSFExcelExtractor +// ); +// assertTrue( +// ExtractorFactory.createExtractor(new FileInputStream(xlsxStrict)).getText().length() > 200 +// ); // Word assertTrue( diff --git a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java index 6334326c5e..bb9efa4073 100644 --- a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java +++ b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java @@ -2293,13 +2293,19 @@ public final class TestXSSFBugs extends BaseTestBugzillaIssues { /** * OOXML-Strict files + * Not currently working - namespace mis-match from XMLBeans */ @Test - @Ignore + @Ignore("XMLBeans namespace mis-match on ooxml-strict files") public void test57699() throws Exception { - Workbook wb = XSSFTestDataSamples.openSampleWorkbook("sample.strict.xlsx"); + XSSFWorkbook wb = XSSFTestDataSamples.openSampleWorkbook("sample.strict.xlsx"); assertEquals(3, wb.getNumberOfSheets()); + // TODO Check sheet contents + // TODO Check formula evaluation - // TODO Check the rest + XSSFWorkbook wbBack = XSSFTestDataSamples.writeOutAndReadBack(wb); + assertEquals(3, wbBack.getNumberOfSheets()); + // TODO Re-check sheet contents + // TODO Re-check formula evaluation } }