do case-insensitive comparison when detecting Content_Types part, tolerate backslashe...

author Yegor Kozlov <yegor@apache.org>

Sun, 18 Jul 2010 16:12:17 +0000 (16:12 +0000)

committer Yegor Kozlov <yegor@apache.org>

Sun, 18 Jul 2010 16:12:17 +0000 (16:12 +0000)
author Yegor Kozlov <yegor@apache.org>
Sun, 18 Jul 2010 16:12:17 +0000 (16:12 +0000)
committer Yegor Kozlov <yegor@apache.org>
Sun, 18 Jul 2010 16:12:17 +0000 (16:12 +0000)
diff --git a/src/ooxml/java/org/apache/poi/openxml4j/opc/PackagingURIHelper.java b/src/ooxml/java/org/apache/poi/openxml4j/opc/PackagingURIHelper.java

index d74e0a13eff3f326d6c597a1f04adb4aec9302f8..c4169d9dbf701eca6838f9a35dc3936b4e1bb54c 100644 (file)
--- a/src/ooxml/java/org/apache/poi/openxml4j/opc/PackagingURIHelper.java
+++ b/src/ooxml/java/org/apache/poi/openxml4j/opc/PackagingURIHelper.java
@@ -484,7 +484,7 @@ public final class PackagingURIHelper {
                         throws InvalidFormatException {
                 URI partNameURI;
                 try {
-                       partNameURI = new URI(partName);
+                       partNameURI = new URI(resolvePartName(partName));
                 } catch (URISyntaxException e) {
                         throw new InvalidFormatException(e.getMessage());
                 }
@@ -646,4 +646,29 @@ public final class PackagingURIHelper {
                 }
                 return retPartName;
         }
+
+    /**
+     *  If  part name is not a valid URI, it is resolved as follows:
+     * <p>
+     * 1. Percent-encode each open bracket ([) and close bracket (]).</li>
+     * 2. Percent-encode each percent (%) character that is not followed by a hexadecimal notation of an octet value.</li>
+     * 3. Un-percent-encode each percent-encoded unreserved character.
+     * 4. Un-percent-encode each forward slash (/) and back slash (\).
+     * 5. Convert all back slashes to forward slashes.
+     * 6. If present in a segment containing non-dot (?.?) characters, remove trailing dot (?.?) characters from each segment.
+     * 7. Replace each occurrence of multiple consecutive forward slashes (/) with a single forward slash.
+     * 8. If a single trailing forward slash (/) is present, remove that trailing forward slash.
+     * 9. Remove complete segments that consist of three or more dots.
+     * 10. Resolve the relative reference against the base URI of the part holding the Unicode string, as it is defined
+     * in ?5.2 of RFC 3986. The path component of the resulting absolute URI is the part name.
+     *</p>
+     *
+     * @param partName the name to resolve
+     * @return  the resolved part name that should be OK to construct a URI
+     *
+     * TODO YK: for now this method does only (5). Finish the rest.
+     */
+    public static String resolvePartName(String partName){
+        return partName.replace('\\', '/');
+    }
  }
diff --git a/src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java b/src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java

index 2e211a64d13c493f2b343e91662f7860454fd41b..114c75c06eb0f64bd54b6f6310dbdb8b124c3182 100644 (file)
--- a/src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java
+++ b/src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java
@@ -130,7 +130,7 @@ public final class ZipPackage extends Package {
                 Enumeration<? extends ZipEntry> entries = this.zipArchive.getEntries();
                 while (entries.hasMoreElements()) {
                         ZipEntry entry = entries.nextElement();
-                       if (entry.getName().equals(
+                       if (entry.getName().equalsIgnoreCase(
                                         ContentTypeManager.CONTENT_TYPES_PART_NAME)) {
                                 try {
                                         this.contentTypeManager = new ZipContentTypeManager(
@@ -208,7 +208,7 @@ public final class ZipPackage extends Package {
                 try {
                         // We get an error when we parse [Content_Types].xml
                         // because it's not a valid URI.
-                       if (entry.getName().equals(
+                       if (entry.getName().equalsIgnoreCase(
                                         ContentTypeManager.CONTENT_TYPES_PART_NAME)) {
                                 return null;
                         }
@@ -218,7 +218,7 @@ public final class ZipPackage extends Package {
                         // We assume we can continue, even in degraded mode ...
                         logger.log(POILogger.WARN,"Entry "
                                                         + entry.getName()
-                                                       + " is not valid, so this part won't be add to the package.");
+                                                       + " is not valid, so this part won't be add to the package.", e);
                         return null;
                 }
         }
diff --git a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java

index 504eceb0acb7d9cf2f427b43f85605bc5d52562f..5a6e250d9eb4398650a2205d46849e14cb1a61e3 100644 (file)
--- a/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java
+++ b/src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java
@@ -397,4 +397,19 @@ public final class TestXSSFBugs extends BaseTestBugzillaIssues {
            } catch(IllegalStateException e) {}
         }
      }
+
+    /**
+     * A problem file from a non-standard source (a scientific instrument that saves its
+     * output as an .xlsx file) that have two issues:
+     * 1. The Content Type part name is lower-case:  [content_types].xml
+     * 2. The file appears to use backslashes as path separators
+     *
+     * The OPC spec tolerates both of these peculiarities, so does POI
+     */
+    public void test49609() throws Exception {
+        XSSFWorkbook wb =  XSSFTestDataSamples.openSampleWorkbook("49609.xlsx");
+        assertEquals("FAM", wb.getSheetName(0));
+        assertEquals("Cycle", wb.getSheetAt(0).getRow(0).getCell(1).getStringCellValue());
+
+    }
  }
diff --git a/test-data/spreadsheet/49609.xlsx b/test-data/spreadsheet/49609.xlsx

new file mode 100755 (executable)

index 0000000..03d9d12

Binary files /dev/null and b/test-data/spreadsheet/49609.xlsx differ
author	Yegor Kozlov <yegor@apache.org>
	Sun, 18 Jul 2010 16:12:17 +0000 (16:12 +0000)
committer	Yegor Kozlov <yegor@apache.org>
	Sun, 18 Jul 2010 16:12:17 +0000 (16:12 +0000)
src/ooxml/java/org/apache/poi/openxml4j/opc/PackagingURIHelper.java		patch \| blob \| history
src/ooxml/java/org/apache/poi/openxml4j/opc/ZipPackage.java		patch \| blob \| history
src/ooxml/testcases/org/apache/poi/xssf/usermodel/TestXSSFBugs.java		patch \| blob \| history
test-data/spreadsheet/49609.xlsx	[new file with mode: 0755]	patch \| blob