aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorNick Burch <nick@apache.org>2012-01-30 12:59:54 +0000
committerNick Burch <nick@apache.org>2012-01-30 12:59:54 +0000
commitbcb898e9778d4b2078f59474a4ee34c9cf82d293 (patch)
treef98e44dbe0c0ea532353d3fdf010b4bec0b634c6
parent91f163c97a2c5683c77967e8998d018887348b8f (diff)
downloadpoi-bcb898e9778d4b2078f59474a4ee34c9cf82d293.tar.gz
poi-bcb898e9778d4b2078f59474a4ee34c9cf82d293.zip
Fix bug #52540 - Relax the M4.1 constraint on reading OOXML files, as some Office produced ones do have 2 Core Properties, despite the specification explicitly forbidding this
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1237631 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--src/documentation/content/xdocs/status.xml1
-rw-r--r--src/ooxml/java/org/apache/poi/openxml4j/opc/OPCPackage.java54
-rw-r--r--src/ooxml/testcases/org/apache/poi/openxml4j/opc/compliance/TestOPCComplianceCoreProperties.java36
-rw-r--r--test-data/openxml4j/MultipleCoreProperties.docxbin0 -> 43853 bytes
4 files changed, 63 insertions, 28 deletions
diff --git a/src/documentation/content/xdocs/status.xml b/src/documentation/content/xdocs/status.xml
index 7a021f3347..794df2f135 100644
--- a/src/documentation/content/xdocs/status.xml
+++ b/src/documentation/content/xdocs/status.xml
@@ -34,6 +34,7 @@
<changes>
<release version="3.8-beta6" date="2012-??-??">
+ <action dev="poi-developers" type="fix">52540 - Relax the M4.1 constraint on reading OOXML files, as some Office produced ones do have 2 Core Properties, despite the specification explicitly forbidding this</action>
<action dev="poi-developers" type="add">52462 - Added implementation for SUMIFS()</action>
<action dev="poi-developers" type="add">POIXMLPropertiesTextExtractor support for extracting custom OOXML properties as text</action>
<action dev="poi-developers" type="fix">52449 - Support writing XWPF documents with glossaries (Glossaries are not yet supported, but can now be written out again without changes)</action>
diff --git a/src/ooxml/java/org/apache/poi/openxml4j/opc/OPCPackage.java b/src/ooxml/java/org/apache/poi/openxml4j/opc/OPCPackage.java
index 541700a136..f384cc650c 100644
--- a/src/ooxml/java/org/apache/poi/openxml4j/opc/OPCPackage.java
+++ b/src/ooxml/java/org/apache/poi/openxml4j/opc/OPCPackage.java
@@ -597,8 +597,13 @@ public abstract class OPCPackage implements RelationshipSource, Closeable {
}
/**
- * Load the parts of the archive if it has not been done yet The
- * relationships of each part are not loaded
+ * Load the parts of the archive if it has not been done yet. The
+ * relationships of each part are not loaded.
+ *
+ * Note - Rule M4.1 states that there may only ever be one Core
+ * Properties Part, but Office produced files will sometimes
+ * have multiple! As Office ignores all but the first, we relax
+ * Compliance with Rule M4.1, and ignore all others silently too.
*
* @return All this package's parts.
*/
@@ -609,31 +614,36 @@ public abstract class OPCPackage implements RelationshipSource, Closeable {
if (partList == null) {
/* Variables use to validate OPC Compliance */
- // Ensure rule M4.1 -> A format consumer shall consider more than
+ // Check rule M4.1 -> A format consumer shall consider more than
// one core properties relationship for a package to be an error
+ // (We just log it and move on, as real files break this!)
boolean hasCorePropertiesPart = false;
+ boolean needCorePropertiesPart = true;
PackagePart[] parts = this.getPartsImpl();
this.partList = new PackagePartCollection();
for (PackagePart part : parts) {
if (partList.containsKey(part._partName))
throw new InvalidFormatException(
- "A part with the name '"
- + part._partName
- + "' already exist : Packages shall not contain equivalent part names and package implementers shall neither create nor recognize packages with equivalent part names. [M1.12]");
+ "A part with the name '" +
+ part._partName +
+ "' already exist : Packages shall not contain equivalent " +
+ "part names and package implementers shall neither create " +
+ "nor recognize packages with equivalent part names. [M1.12]");
// Check OPC compliance rule M4.1
if (part.getContentType().equals(
ContentTypes.CORE_PROPERTIES_PART)) {
- if (!hasCorePropertiesPart)
+ if (!hasCorePropertiesPart) {
hasCorePropertiesPart = true;
- else
- throw new InvalidFormatException(
- "OPC Compliance error [M4.1]: there is more than one core properties relationship in the package !");
+ } else {
+ logger.log(POILogger.WARN, "OPC Compliance error [M4.1]: " +
+ "there is more than one core properties relationship in the package! " +
+ "POI will use only the first, but other software may reject this file.");
+ }
}
- PartUnmarshaller partUnmarshaller = partUnmarshallers
- .get(part._contentType);
+ PartUnmarshaller partUnmarshaller = partUnmarshallers.get(part._contentType);
if (partUnmarshaller != null) {
UnmarshallContext context = new UnmarshallContext(this,
@@ -643,9 +653,14 @@ public abstract class OPCPackage implements RelationshipSource, Closeable {
.unmarshall(context, part.getInputStream());
partList.put(unmarshallPart._partName, unmarshallPart);
- // Core properties case
- if (unmarshallPart instanceof PackagePropertiesPart)
+ // Core properties case-- use first CoreProperties part we come across
+ // and ignore any subsequent ones
+ if (unmarshallPart instanceof PackagePropertiesPart &&
+ hasCorePropertiesPart &&
+ needCorePropertiesPart) {
this.packageProperties = (PackagePropertiesPart) unmarshallPart;
+ needCorePropertiesPart = false;
+ }
} catch (IOException ioe) {
logger.log(POILogger.WARN, "Unmarshall operation : IOException for "
+ part._partName);
@@ -718,19 +733,20 @@ public abstract class OPCPackage implements RelationshipSource, Closeable {
if (partList.containsKey(partName)
&& !partList.get(partName).isDeleted()) {
throw new PartAlreadyExistsException(
- "A part with the name '"
- + partName.getName()
- + "' already exists : Packages shall not contain equivalent part names and package implementers shall neither create nor recognize packages with equivalent part names. [M1.12]");
+ "A part with the name '" + partName.getName() + "'" +
+ " already exists : Packages shall not contain equivalent part names and package" +
+ " implementers shall neither create nor recognize packages with equivalent part names. [M1.12]");
}
/* Check OPC compliance */
- // Rule [M4.1]: The format designer shall specify and the format
- // producer
+ // Rule [M4.1]: The format designer shall specify and the format producer
// shall create at most one core properties relationship for a package.
// A format consumer shall consider more than one core properties
// relationship for a package to be an error. If present, the
// relationship shall target the Core Properties part.
+ // Note - POI will read files with more than one Core Properties, which
+ // Office sometimes produces, but is strict on generation
if (contentType.equals(ContentTypes.CORE_PROPERTIES_PART)) {
if (this.packageProperties != null)
throw new InvalidOperationException(
diff --git a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/compliance/TestOPCComplianceCoreProperties.java b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/compliance/TestOPCComplianceCoreProperties.java
index b3a4a6320f..0cf5200092 100644
--- a/src/ooxml/testcases/org/apache/poi/openxml4j/opc/compliance/TestOPCComplianceCoreProperties.java
+++ b/src/ooxml/testcases/org/apache/poi/openxml4j/opc/compliance/TestOPCComplianceCoreProperties.java
@@ -25,6 +25,7 @@ import java.net.URISyntaxException;
import junit.framework.AssertionFailedError;
import junit.framework.TestCase;
+import org.apache.poi.POIDataSamples;
import org.apache.poi.openxml4j.OpenXML4JTestDataSamples;
import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
@@ -33,7 +34,6 @@ import org.apache.poi.openxml4j.opc.OPCPackage;
import org.apache.poi.openxml4j.opc.PackageRelationshipTypes;
import org.apache.poi.openxml4j.opc.PackagingURIHelper;
import org.apache.poi.openxml4j.opc.TargetMode;
-import org.apache.poi.POIDataSamples;
/**
* Test core properties Open Packaging Convention compliance.
@@ -42,6 +42,7 @@ import org.apache.poi.POIDataSamples;
* at most one core properties relationship for a package. A format consumer
* shall consider more than one core properties relationship for a package to be
* an error. If present, the relationship shall target the Core Properties part.
+ * (POI relaxes this on reading, as Office sometimes breaks this)
*
* M4.2: The format designer shall not specify and the format producer shall not
* create Core Properties that use the Markup Compatibility namespace as defined
@@ -82,28 +83,43 @@ public final class TestOPCComplianceCoreProperties extends TestCase {
}
private static String extractInvalidFormatMessage(String sampleNameSuffix) {
-
InputStream is = OpenXML4JTestDataSamples.openComplianceSampleStream("OPCCompliance_CoreProperties_" + sampleNameSuffix);
OPCPackage pkg;
try {
pkg = OPCPackage.open(is);
} catch (InvalidFormatException e) {
- // expected during successful test
+ // no longer required for successful test
return e.getMessage();
} catch (IOException e) {
throw new RuntimeException(e);
}
pkg.revert();
- // Normally must thrown an InvalidFormatException exception.
throw new AssertionFailedError("expected OPC compliance exception was not thrown");
}
/**
* Test M4.1 rule.
*/
- public void testOnlyOneCorePropertiesPart() {
- String msg = extractInvalidFormatMessage("OnlyOneCorePropertiesPartFAIL.docx");
- assertEquals("OPC Compliance error [M4.1]: there is more than one core properties relationship in the package !", msg);
+ public void testOnlyOneCorePropertiesPart() throws Exception {
+ // We have relaxed this check, so we can read the file anyway
+ try {
+ extractInvalidFormatMessage("OnlyOneCorePropertiesPartFAIL.docx");
+ fail("M4.1 should be being relaxed");
+ } catch (AssertionFailedError e) {}
+
+ // We will use the first core properties, and ignore the others
+ InputStream is = OpenXML4JTestDataSamples.openSampleStream("MultipleCoreProperties.docx");
+ OPCPackage pkg = OPCPackage.open(is);
+
+ // We can see 2 by type
+ assertEquals(2, pkg.getPartsByContentType(ContentTypes.CORE_PROPERTIES_PART).size());
+ // But only the first one by relationship
+ assertEquals(1, pkg.getPartsByRelationshipType(PackageRelationshipTypes.CORE_PROPERTIES).size());
+ // It should be core.xml not the older core1.xml
+ assertEquals(
+ "/docProps/core.xml",
+ pkg.getPartsByRelationshipType(PackageRelationshipTypes.CORE_PROPERTIES).get(0).getPartName().toString()
+ );
}
private static URI createURI(String text) {
@@ -131,7 +147,8 @@ public final class TestOPCComplianceCoreProperties extends TestCase {
try {
pkg.addRelationship(PackagingURIHelper.createPartName(partUri), TargetMode.INTERNAL,
PackageRelationshipTypes.CORE_PROPERTIES);
- fail("expected OPC compliance exception was not thrown");
+ // no longer fail on compliance error
+ //fail("expected OPC compliance exception was not thrown");
} catch (InvalidFormatException e) {
throw new RuntimeException(e);
} catch (InvalidOperationException e) {
@@ -157,7 +174,8 @@ public final class TestOPCComplianceCoreProperties extends TestCase {
try {
pkg.createPart(PackagingURIHelper.createPartName(partUri),
ContentTypes.CORE_PROPERTIES_PART);
- fail("expected OPC compliance exception was not thrown");
+ // no longer fail on compliance error
+ //fail("expected OPC compliance exception was not thrown");
} catch (InvalidFormatException e) {
throw new RuntimeException(e);
} catch (InvalidOperationException e) {
diff --git a/test-data/openxml4j/MultipleCoreProperties.docx b/test-data/openxml4j/MultipleCoreProperties.docx
new file mode 100644
index 0000000000..4c19ae77aa
--- /dev/null
+++ b/test-data/openxml4j/MultipleCoreProperties.docx
Binary files differ