From: Nick Burch Date: Mon, 30 Jan 2012 12:59:54 +0000 (+0000) Subject: Fix bug #52540 - Relax the M4.1 constraint on reading OOXML files, as some Office... X-Git-Tag: REL_3_8_FINAL~73 X-Git-Url: https://source.dussan.org/?a=commitdiff_plain;h=bcb898e9778d4b2078f59474a4ee34c9cf82d293;p=poi.git Fix bug #52540 - Relax the M4.1 constraint on reading OOXML files, as some Office produced ones do have 2 Core Properties, despite the specification explicitly forbidding this git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1237631 13f79535-47bb-0310-9956-ffa450edef68 --- diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml index 7a021f3347..794df2f135 100644 --- a/src/documentation/content/xdocs/status.xml +++ b/src/documentation/content/xdocs/status.xml @@ -34,6 +34,7 @@ + 52540 - Relax the M4.1 constraint on reading OOXML files, as some Office produced ones do have 2 Core Properties, despite the specification explicitly forbidding this 52462 - Added implementation for SUMIFS() POIXMLPropertiesTextExtractor support for extracting custom OOXML properties as text 52449 - Support writing XWPF documents with glossaries (Glossaries are not yet supported, but can now be written out again without changes) diff --git a/src/ooxml/java/org/apache/poi/openxml4j/opc/OPCPackage.java b/src/ooxml/java/org/apache/poi/openxml4j/opc/OPCPackage.java index 541700a136..f384cc650c 100644 --- a/src/ooxml/java/org/apache/poi/openxml4j/opc/OPCPackage.java +++ b/src/ooxml/java/org/apache/poi/openxml4j/opc/OPCPackage.java @@ -597,8 +597,13 @@ public abstract class OPCPackage implements RelationshipSource, Closeable { } /** - * Load the parts of the archive if it has not been done yet The - * relationships of each part are not loaded + * Load the parts of the archive if it has not been done yet. The + * relationships of each part are not loaded. + * + * Note - Rule M4.1 states that there may only ever be one Core + * Properties Part, but Office produced files will sometimes + * have multiple! As Office ignores all but the first, we relax + * Compliance with Rule M4.1, and ignore all others silently too. * * @return All this package's parts. */ @@ -609,31 +614,36 @@ public abstract class OPCPackage implements RelationshipSource, Closeable { if (partList == null) { /* Variables use to validate OPC Compliance */ - // Ensure rule M4.1 -> A format consumer shall consider more than + // Check rule M4.1 -> A format consumer shall consider more than // one core properties relationship for a package to be an error + // (We just log it and move on, as real files break this!) boolean hasCorePropertiesPart = false; + boolean needCorePropertiesPart = true; PackagePart[] parts = this.getPartsImpl(); this.partList = new PackagePartCollection(); for (PackagePart part : parts) { if (partList.containsKey(part._partName)) throw new InvalidFormatException( - "A part with the name '" - + part._partName - + "' already exist : Packages shall not contain equivalent part names and package implementers shall neither create nor recognize packages with equivalent part names. [M1.12]"); + "A part with the name '" + + part._partName + + "' already exist : Packages shall not contain equivalent " + + "part names and package implementers shall neither create " + + "nor recognize packages with equivalent part names. [M1.12]"); // Check OPC compliance rule M4.1 if (part.getContentType().equals( ContentTypes.CORE_PROPERTIES_PART)) { - if (!hasCorePropertiesPart) + if (!hasCorePropertiesPart) { hasCorePropertiesPart = true; - else - throw new InvalidFormatException( - "OPC Compliance error [M4.1]: there is more than one core properties relationship in the package !"); + } else { + logger.log(POILogger.WARN, "OPC Compliance error [M4.1]: " + + "there is more than one core properties relationship in the package! " + + "POI will use only the first, but other software may reject this file."); + } } - PartUnmarshaller partUnmarshaller = partUnmarshallers - .get(part._contentType); + PartUnmarshaller partUnmarshaller = partUnmarshallers.get(part._contentType); if (partUnmarshaller != null) { UnmarshallContext context = new UnmarshallContext(this, @@ -643,9 +653,14 @@ public abstract class OPCPackage implements RelationshipSource, Closeable { .unmarshall(context, part.getInputStream()); partList.put(unmarshallPart._partName, unmarshallPart); - // Core properties case - if (unmarshallPart instanceof PackagePropertiesPart) + // Core properties case-- use first CoreProperties part we come across + // and ignore any subsequent ones + if (unmarshallPart instanceof PackagePropertiesPart && + hasCorePropertiesPart && + needCorePropertiesPart) { this.packageProperties = (PackagePropertiesPart) unmarshallPart; + needCorePropertiesPart = false; + } } catch (IOException ioe) { logger.log(POILogger.WARN, "Unmarshall operation : IOException for " + part._partName); @@ -718,19 +733,20 @@ public abstract class OPCPackage implements RelationshipSource, Closeable { if (partList.containsKey(partName) && !partList.get(partName).isDeleted()) { throw new PartAlreadyExistsException( - "A part with the name '" - + partName.getName() - + "' already exists : Packages shall not contain equivalent part names and package implementers shall neither create nor recognize packages with equivalent part names. [M1.12]"); + "A part with the name '" + partName.getName() + "'" + + " already exists : Packages shall not contain equivalent part names and package" + + " implementers shall neither create nor recognize packages with equivalent part names. [M1.12]"); } /* Check OPC compliance */ - // Rule [M4.1]: The format designer shall specify and the format - // producer + // Rule [M4.1]: The format designer shall specify and the format producer // shall create at most one core properties relationship for a package. // A format consumer shall consider more than one core properties // relationship for a package to be an error. If present, the // relationship shall target the Core Properties part. + // Note - POI will read files with more than one Core Properties, which + // Office sometimes produces, but is strict on generation if (contentType.equals(ContentTypes.CORE_PROPERTIES_PART)) { if (this.packageProperties != null) throw new InvalidOperationException( diff --git a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/compliance/TestOPCComplianceCoreProperties.java b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/compliance/TestOPCComplianceCoreProperties.java index b3a4a6320f..0cf5200092 100644 --- a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/compliance/TestOPCComplianceCoreProperties.java +++ b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/compliance/TestOPCComplianceCoreProperties.java @@ -25,6 +25,7 @@ import java.net.URISyntaxException; import junit.framework.AssertionFailedError; import junit.framework.TestCase; +import org.apache.poi.POIDataSamples; import org.apache.poi.openxml4j.OpenXML4JTestDataSamples; import org.apache.poi.openxml4j.exceptions.InvalidFormatException; import org.apache.poi.openxml4j.exceptions.InvalidOperationException; @@ -33,7 +34,6 @@ import org.apache.poi.openxml4j.opc.OPCPackage; import org.apache.poi.openxml4j.opc.PackageRelationshipTypes; import org.apache.poi.openxml4j.opc.PackagingURIHelper; import org.apache.poi.openxml4j.opc.TargetMode; -import org.apache.poi.POIDataSamples; /** * Test core properties Open Packaging Convention compliance. @@ -42,6 +42,7 @@ import org.apache.poi.POIDataSamples; * at most one core properties relationship for a package. A format consumer * shall consider more than one core properties relationship for a package to be * an error. If present, the relationship shall target the Core Properties part. + * (POI relaxes this on reading, as Office sometimes breaks this) * * M4.2: The format designer shall not specify and the format producer shall not * create Core Properties that use the Markup Compatibility namespace as defined @@ -82,28 +83,43 @@ public final class TestOPCComplianceCoreProperties extends TestCase { } private static String extractInvalidFormatMessage(String sampleNameSuffix) { - InputStream is = OpenXML4JTestDataSamples.openComplianceSampleStream("OPCCompliance_CoreProperties_" + sampleNameSuffix); OPCPackage pkg; try { pkg = OPCPackage.open(is); } catch (InvalidFormatException e) { - // expected during successful test + // no longer required for successful test return e.getMessage(); } catch (IOException e) { throw new RuntimeException(e); } pkg.revert(); - // Normally must thrown an InvalidFormatException exception. throw new AssertionFailedError("expected OPC compliance exception was not thrown"); } /** * Test M4.1 rule. */ - public void testOnlyOneCorePropertiesPart() { - String msg = extractInvalidFormatMessage("OnlyOneCorePropertiesPartFAIL.docx"); - assertEquals("OPC Compliance error [M4.1]: there is more than one core properties relationship in the package !", msg); + public void testOnlyOneCorePropertiesPart() throws Exception { + // We have relaxed this check, so we can read the file anyway + try { + extractInvalidFormatMessage("OnlyOneCorePropertiesPartFAIL.docx"); + fail("M4.1 should be being relaxed"); + } catch (AssertionFailedError e) {} + + // We will use the first core properties, and ignore the others + InputStream is = OpenXML4JTestDataSamples.openSampleStream("MultipleCoreProperties.docx"); + OPCPackage pkg = OPCPackage.open(is); + + // We can see 2 by type + assertEquals(2, pkg.getPartsByContentType(ContentTypes.CORE_PROPERTIES_PART).size()); + // But only the first one by relationship + assertEquals(1, pkg.getPartsByRelationshipType(PackageRelationshipTypes.CORE_PROPERTIES).size()); + // It should be core.xml not the older core1.xml + assertEquals( + "/docProps/core.xml", + pkg.getPartsByRelationshipType(PackageRelationshipTypes.CORE_PROPERTIES).get(0).getPartName().toString() + ); } private static URI createURI(String text) { @@ -131,7 +147,8 @@ public final class TestOPCComplianceCoreProperties extends TestCase { try { pkg.addRelationship(PackagingURIHelper.createPartName(partUri), TargetMode.INTERNAL, PackageRelationshipTypes.CORE_PROPERTIES); - fail("expected OPC compliance exception was not thrown"); + // no longer fail on compliance error + //fail("expected OPC compliance exception was not thrown"); } catch (InvalidFormatException e) { throw new RuntimeException(e); } catch (InvalidOperationException e) { @@ -157,7 +174,8 @@ public final class TestOPCComplianceCoreProperties extends TestCase { try { pkg.createPart(PackagingURIHelper.createPartName(partUri), ContentTypes.CORE_PROPERTIES_PART); - fail("expected OPC compliance exception was not thrown"); + // no longer fail on compliance error + //fail("expected OPC compliance exception was not thrown"); } catch (InvalidFormatException e) { throw new RuntimeException(e); } catch (InvalidOperationException e) { diff --git a/test-data/openxml4j/MultipleCoreProperties.docx b/test-data/openxml4j/MultipleCoreProperties.docx new file mode 100644 index 0000000000..4c19ae77aa Binary files /dev/null and b/test-data/openxml4j/MultipleCoreProperties.docx differ