]> source.dussan.org Git - poi.git/commitdiff
Fix bug #52540 - Relax the M4.1 constraint on reading OOXML files, as some Office...
authorNick Burch <nick@apache.org>
Mon, 30 Jan 2012 12:59:54 +0000 (12:59 +0000)
committerNick Burch <nick@apache.org>
Mon, 30 Jan 2012 12:59:54 +0000 (12:59 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1237631 13f79535-47bb-0310-9956-ffa450edef68

src/documentation/content/xdocs/status.xml
src/ooxml/java/org/apache/poi/openxml4j/opc/OPCPackage.java
src/ooxml/testcases/org/apache/poi/openxml4j/opc/compliance/TestOPCComplianceCoreProperties.java
test-data/openxml4j/MultipleCoreProperties.docx [new file with mode: 0644]

index 7a021f33477006128c4ec07aa7d9f6e05fadc728..794df2f1357c2c160de7a07e718f4c042104370a 100644 (file)
@@ -34,6 +34,7 @@
 
     <changes>
         <release version="3.8-beta6" date="2012-??-??">
+           <action dev="poi-developers" type="fix">52540 - Relax the M4.1 constraint on reading OOXML files, as some Office produced ones do have 2 Core Properties, despite the specification explicitly forbidding this</action>
            <action dev="poi-developers" type="add">52462 - Added implementation for SUMIFS()</action>
            <action dev="poi-developers" type="add">POIXMLPropertiesTextExtractor support for extracting custom OOXML properties as text</action>
            <action dev="poi-developers" type="fix">52449 - Support writing XWPF documents with glossaries (Glossaries are not yet supported, but can now be written out again without changes)</action>
index 541700a136bb67997c21b023eac69b1ee3285f43..f384cc650c508bf3481e50d10c4dc9ddce022b35 100644 (file)
@@ -597,8 +597,13 @@ public abstract class OPCPackage implements RelationshipSource, Closeable {
        }
 
        /**
-        * Load the parts of the archive if it has not been done yet The
-        * relationships of each part are not loaded
+        * Load the parts of the archive if it has not been done yet. The
+        * relationships of each part are not loaded.
+        * 
+        * Note - Rule M4.1 states that there may only ever be one Core
+        *  Properties Part, but Office produced files will sometimes
+        *  have multiple! As Office ignores all but the first, we relax
+        *  Compliance with Rule M4.1, and ignore all others silently too. 
         *
         * @return All this package's parts.
         */
@@ -609,31 +614,36 @@ public abstract class OPCPackage implements RelationshipSource, Closeable {
                if (partList == null) {
                        /* Variables use to validate OPC Compliance */
 
-                       // Ensure rule M4.1 -> A format consumer shall consider more than
+                       // Check rule M4.1 -> A format consumer shall consider more than
                        // one core properties relationship for a package to be an error
+                  // (We just log it and move on, as real files break this!)
                        boolean hasCorePropertiesPart = false;
+                       boolean needCorePropertiesPart = true;
 
                        PackagePart[] parts = this.getPartsImpl();
                        this.partList = new PackagePartCollection();
                        for (PackagePart part : parts) {
                                if (partList.containsKey(part._partName))
                                        throw new InvalidFormatException(
-                                                       "A part with the name '"
-                                                                       + part._partName
-                                                                       + "' already exist : Packages shall not contain equivalent part names and package implementers shall neither create nor recognize packages with equivalent part names. [M1.12]");
+                                                       "A part with the name '" +
+                                                       part._partName +
+                                                       "' already exist : Packages shall not contain equivalent " +
+                                                       "part names and package implementers shall neither create " +
+                                                       "nor recognize packages with equivalent part names. [M1.12]");
 
                                // Check OPC compliance rule M4.1
                                if (part.getContentType().equals(
                                                ContentTypes.CORE_PROPERTIES_PART)) {
-                                       if (!hasCorePropertiesPart)
+                                       if (!hasCorePropertiesPart) {
                                                hasCorePropertiesPart = true;
-                                       else
-                                               throw new InvalidFormatException(
-                                                               "OPC Compliance error [M4.1]: there is more than one core properties relationship in the package !");
+                                       } else {
+                                          logger.log(POILogger.WARN, "OPC Compliance error [M4.1]: " +
+                                                       "there is more than one core properties relationship in the package! " +
+                                                       "POI will use only the first, but other software may reject this file.");
+                                       }
                                }
 
-                               PartUnmarshaller partUnmarshaller = partUnmarshallers
-                                               .get(part._contentType);
+                               PartUnmarshaller partUnmarshaller = partUnmarshallers.get(part._contentType);
 
                                if (partUnmarshaller != null) {
                                        UnmarshallContext context = new UnmarshallContext(this,
@@ -643,9 +653,14 @@ public abstract class OPCPackage implements RelationshipSource, Closeable {
                                                                .unmarshall(context, part.getInputStream());
                                                partList.put(unmarshallPart._partName, unmarshallPart);
 
-                                               // Core properties case
-                                               if (unmarshallPart instanceof PackagePropertiesPart)
+                                               // Core properties case-- use first CoreProperties part we come across
+                                               // and ignore any subsequent ones
+                                               if (unmarshallPart instanceof PackagePropertiesPart &&
+                                                               hasCorePropertiesPart &&
+                                                               needCorePropertiesPart) {
                                                        this.packageProperties = (PackagePropertiesPart) unmarshallPart;
+                                                       needCorePropertiesPart = false;
+                                               }
                                        } catch (IOException ioe) {
                                                logger.log(POILogger.WARN, "Unmarshall operation : IOException for "
                                                                + part._partName);
@@ -718,19 +733,20 @@ public abstract class OPCPackage implements RelationshipSource, Closeable {
                if (partList.containsKey(partName)
                                && !partList.get(partName).isDeleted()) {
                        throw new PartAlreadyExistsException(
-                                       "A part with the name '"
-                                                       + partName.getName()
-                                                       + "' already exists : Packages shall not contain equivalent part names and package implementers shall neither create nor recognize packages with equivalent part names. [M1.12]");
+                                       "A part with the name '" + partName.getName() + "'" +
+                                       " already exists : Packages shall not contain equivalent part names and package" +
+                                       " implementers shall neither create nor recognize packages with equivalent part names. [M1.12]");
                }
 
                /* Check OPC compliance */
 
-               // Rule [M4.1]: The format designer shall specify and the format
-               // producer
+               // Rule [M4.1]: The format designer shall specify and the format producer
                // shall create at most one core properties relationship for a package.
                // A format consumer shall consider more than one core properties
                // relationship for a package to be an error. If present, the
                // relationship shall target the Core Properties part.
+               // Note - POI will read files with more than one Core Properties, which
+               //  Office sometimes produces, but is strict on generation
                if (contentType.equals(ContentTypes.CORE_PROPERTIES_PART)) {
                        if (this.packageProperties != null)
                                throw new InvalidOperationException(
index b3a4a6320fab6f2cb7c3219b3645d1c4b724cf00..0cf5200092ea89853d2856bb3b6492e40f400170 100644 (file)
@@ -25,6 +25,7 @@ import java.net.URISyntaxException;
 import junit.framework.AssertionFailedError;
 import junit.framework.TestCase;
 
+import org.apache.poi.POIDataSamples;
 import org.apache.poi.openxml4j.OpenXML4JTestDataSamples;
 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
 import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
@@ -33,7 +34,6 @@ import org.apache.poi.openxml4j.opc.OPCPackage;
 import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
 import org.apache.poi.openxml4j.opc.PackagingURIHelper;
 import org.apache.poi.openxml4j.opc.TargetMode;
-import org.apache.poi.POIDataSamples;
 
 /**
  * Test core properties Open Packaging Convention compliance.
@@ -42,6 +42,7 @@ import org.apache.poi.POIDataSamples;
  * at most one core properties relationship for a package. A format consumer
  * shall consider more than one core properties relationship for a package to be
  * an error. If present, the relationship shall target the Core Properties part.
+ * (POI relaxes this on reading, as Office sometimes breaks this)
  * 
  * M4.2: The format designer shall not specify and the format producer shall not
  * create Core Properties that use the Markup Compatibility namespace as defined
@@ -82,28 +83,43 @@ public final class TestOPCComplianceCoreProperties extends TestCase {
        }
 
        private static String extractInvalidFormatMessage(String sampleNameSuffix) {
-
                InputStream is = OpenXML4JTestDataSamples.openComplianceSampleStream("OPCCompliance_CoreProperties_" + sampleNameSuffix);
                OPCPackage pkg;
                try {
                        pkg = OPCPackage.open(is);
                } catch (InvalidFormatException e) {
-                       // expected during successful test
+                       // no longer required for successful test
                        return e.getMessage();
                } catch (IOException e) {
                        throw new RuntimeException(e);
                }
                pkg.revert();
-               // Normally must thrown an InvalidFormatException exception.
                throw new AssertionFailedError("expected OPC compliance exception was not thrown");
        }
        
        /**
         * Test M4.1 rule.
         */
-       public void testOnlyOneCorePropertiesPart() {
-               String msg = extractInvalidFormatMessage("OnlyOneCorePropertiesPartFAIL.docx");
-               assertEquals("OPC Compliance error [M4.1]: there is more than one core properties relationship in the package !", msg);
+       public void testOnlyOneCorePropertiesPart() throws Exception {
+          // We have relaxed this check, so we can read the file anyway
+          try {
+             extractInvalidFormatMessage("OnlyOneCorePropertiesPartFAIL.docx");
+             fail("M4.1 should be being relaxed");
+          } catch (AssertionFailedError e) {}
+          
+          // We will use the first core properties, and ignore the others
+      InputStream is = OpenXML4JTestDataSamples.openSampleStream("MultipleCoreProperties.docx");
+      OPCPackage pkg = OPCPackage.open(is);
+      
+      // We can see 2 by type
+      assertEquals(2, pkg.getPartsByContentType(ContentTypes.CORE_PROPERTIES_PART).size());
+      // But only the first one by relationship
+      assertEquals(1, pkg.getPartsByRelationshipType(PackageRelationshipTypes.CORE_PROPERTIES).size());
+      // It should be core.xml not the older core1.xml
+      assertEquals(
+            "/docProps/core.xml",
+            pkg.getPartsByRelationshipType(PackageRelationshipTypes.CORE_PROPERTIES).get(0).getPartName().toString()
+      );
        }
        
        private static URI createURI(String text) {
@@ -131,7 +147,8 @@ public final class TestOPCComplianceCoreProperties extends TestCase {
                try {
                        pkg.addRelationship(PackagingURIHelper.createPartName(partUri), TargetMode.INTERNAL,
                                        PackageRelationshipTypes.CORE_PROPERTIES);
-                       fail("expected OPC compliance exception was not thrown");
+                       // no longer fail on compliance error
+                       //fail("expected OPC compliance exception was not thrown");
                } catch (InvalidFormatException e) {
                        throw new RuntimeException(e);
                } catch (InvalidOperationException e) {
@@ -157,7 +174,8 @@ public final class TestOPCComplianceCoreProperties extends TestCase {
                try {
                        pkg.createPart(PackagingURIHelper.createPartName(partUri),
                                        ContentTypes.CORE_PROPERTIES_PART);
-                       fail("expected OPC compliance exception was not thrown");
+                       // no longer fail on compliance error
+                       //fail("expected OPC compliance exception was not thrown");
                } catch (InvalidFormatException e) {
                        throw new RuntimeException(e);
                } catch (InvalidOperationException e) {
diff --git a/test-data/openxml4j/MultipleCoreProperties.docx b/test-data/openxml4j/MultipleCoreProperties.docx
new file mode 100644 (file)
index 0000000..4c19ae7
Binary files /dev/null and b/test-data/openxml4j/MultipleCoreProperties.docx differ