aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYegor Kozlov <yegor@apache.org>2010-07-18 16:12:17 +0000
committerYegor Kozlov <yegor@apache.org>2010-07-18 16:12:17 +0000
commit3726ef1074728a6d461cabd3426fcb1c7f49ba0a (patch)
tree44ad771406b4a62856a9b78ad091e28823887a76
parent40a14f81d3c84b1dc107a2065aa29dd86f352d33 (diff)
downloadpoi-3726ef1074728a6d461cabd3426fcb1c7f49ba0a.tar.gz
poi-3726ef1074728a6d461cabd3426fcb1c7f49ba0a.zip
do case-insensitive comparison when detecting Content_Types part, tolerate backslashes in part names. see bug 49609
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@965258 13f79535-47bb-0310-9956-ffa450edef68
-rw-r--r--src/ooxml/java/org/apache/poi/openxml4j/opc/PackagingURIHelper.java27
-rw-r--r--src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java6
-rw-r--r--src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java15
-rwxr-xr-xtest-data/spreadsheet/49609.xlsxbin0 -> 105424 bytes
4 files changed, 44 insertions, 4 deletions
diff --git a/src/ooxml/java/org/apache/poi/openxml4j/opc/PackagingURIHelper.java b/src/ooxml/java/org/apache/poi/openxml4j/opc/PackagingURIHelper.java
index d74e0a13ef..c4169d9dbf 100644
--- a/src/ooxml/java/org/apache/poi/openxml4j/opc/PackagingURIHelper.java
+++ b/src/ooxml/java/org/apache/poi/openxml4j/opc/PackagingURIHelper.java
@@ -484,7 +484,7 @@ public final class PackagingURIHelper {
throws InvalidFormatException {
URI partNameURI;
try {
- partNameURI = new URI(partName);
+ partNameURI = new URI(resolvePartName(partName));
} catch (URISyntaxException e) {
throw new InvalidFormatException(e.getMessage());
}
@@ -646,4 +646,29 @@ public final class PackagingURIHelper {
}
return retPartName;
}
+
+ /**
+ * If part name is not a valid URI, it is resolved as follows:
+ * <p>
+ * 1. Percent-encode each open bracket ([) and close bracket (]).</li>
+ * 2. Percent-encode each percent (%) character that is not followed by a hexadecimal notation of an octet value.</li>
+ * 3. Un-percent-encode each percent-encoded unreserved character.
+ * 4. Un-percent-encode each forward slash (/) and back slash (\).
+ * 5. Convert all back slashes to forward slashes.
+ * 6. If present in a segment containing non-dot (?.?) characters, remove trailing dot (?.?) characters from each segment.
+ * 7. Replace each occurrence of multiple consecutive forward slashes (/) with a single forward slash.
+ * 8. If a single trailing forward slash (/) is present, remove that trailing forward slash.
+ * 9. Remove complete segments that consist of three or more dots.
+ * 10. Resolve the relative reference against the base URI of the part holding the Unicode string, as it is defined
+ * in ?5.2 of RFC 3986. The path component of the resulting absolute URI is the part name.
+ *</p>
+ *
+ * @param partName the name to resolve
+ * @return the resolved part name that should be OK to construct a URI
+ *
+ * TODO YK: for now this method does only (5). Finish the rest.
+ */
+ public static String resolvePartName(String partName){
+ return partName.replace('\\', '/');
+ }
}
diff --git a/src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java b/src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java
index 2e211a64d1..114c75c06e 100644
--- a/src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java
+++ b/src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java
@@ -130,7 +130,7 @@ public final class ZipPackage extends Package {
Enumeration<? extends ZipEntry> entries = this.zipArchive.getEntries();
while (entries.hasMoreElements()) {
ZipEntry entry = entries.nextElement();
- if (entry.getName().equals(
+ if (entry.getName().equalsIgnoreCase(
ContentTypeManager.CONTENT_TYPES_PART_NAME)) {
try {
this.contentTypeManager = new ZipContentTypeManager(
@@ -208,7 +208,7 @@ public final class ZipPackage extends Package {
try {
// We get an error when we parse [Content_Types].xml
// because it's not a valid URI.
- if (entry.getName().equals(
+ if (entry.getName().equalsIgnoreCase(
ContentTypeManager.CONTENT_TYPES_PART_NAME)) {
return null;
}
@@ -218,7 +218,7 @@ public final class ZipPackage extends Package {
// We assume we can continue, even in degraded mode ...
logger.log(POILogger.WARN,"Entry "
+ entry.getName()
- + " is not valid, so this part won't be add to the package.");
+ + " is not valid, so this part won't be add to the package.", e);
return null;
}
}
diff --git a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java
index 504eceb0ac..5a6e250d9e 100644
--- a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java
+++ b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java
@@ -397,4 +397,19 @@ public final class TestXSSFBugs extends BaseTestBugzillaIssues {
} catch(IllegalStateException e) {}
}
}
+
+ /**
+ * A problem file from a non-standard source (a scientific instrument that saves its
+ * output as an .xlsx file) that have two issues:
+ * 1. The Content Type part name is lower-case: [content_types].xml
+ * 2. The file appears to use backslashes as path separators
+ *
+ * The OPC spec tolerates both of these peculiarities, so does POI
+ */
+ public void test49609() throws Exception {
+ XSSFWorkbook wb = XSSFTestDataSamples.openSampleWorkbook("49609.xlsx");
+ assertEquals("FAM", wb.getSheetName(0));
+ assertEquals("Cycle", wb.getSheetAt(0).getRow(0).getCell(1).getStringCellValue());
+
+ }
}
diff --git a/test-data/spreadsheet/49609.xlsx b/test-data/spreadsheet/49609.xlsx
new file mode 100755
index 0000000000..03d9d12ca2
--- /dev/null
+++ b/test-data/spreadsheet/49609.xlsx
Binary files differ