]> source.dussan.org Git - poi.git/commitdiff
allow white spaces and unicode in OPC relationship targets, see Bugzilla 50154
authorYegor Kozlov <yegor@apache.org>
Wed, 17 Nov 2010 20:40:35 +0000 (20:40 +0000)
committerYegor Kozlov <yegor@apache.org>
Wed, 17 Nov 2010 20:40:35 +0000 (20:40 +0000)
git-svn-id: https://svn.apache.org/repos/asf/poi/trunk@1036215 13f79535-47bb-0310-9956-ffa450edef68

src/documentation/content/xdocs/status.xml
src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationshipCollection.java
src/ooxml/java/org/apache/poi/openxml4j/opc/PackageRelationshipTypes.java
src/ooxml/java/org/apache/poi/openxml4j/opc/PackagingURIHelper.java
src/ooxml/java/org/apache/poi/openxml4j/opc/internal/marshallers/ZipPartMarshaller.java
src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestPackagingURIHelper.java
src/ooxml/testcases/org/apache/poi/openxml4j/opc/TestRelationships.java
test-data/openxml4j/50154.xlsx [new file with mode: 0644]

index 74eeae2ee79cad66c08d7c65ade24ca6f9de1bce..be82f7f1429db995b6887c03bd72c9ee42875628 100644 (file)
@@ -34,6 +34,7 @@
 
     <changes>
         <release version="3.8-beta1" date="2010-??-??">
+           <action dev="poi-developers" type="fix">50154 - Allow white spaces and unicode in OPC relationship targets </action>
            <action dev="poi-developers" type="fix">50113 - Remove cell from Calculation Chain after setting cell type to blank </action>
            <action dev="poi-developers" type="fix">49966 - Ensure that XSSFRow#removeCell cleares calculation chain entries </action>
            <action dev="poi-developers" type="fix">50096 - Fixed evaluation of cell references with column index greater than 255 </action>
index 990a3bf4cb4f98c992132f2233f8d1bf2bf9d83a..a5a40990c28b4a004d66a82daa812f8741699799 100644 (file)
@@ -351,16 +351,8 @@ public final class PackageRelationshipCollection implements
                                                        PackageRelationship.TARGET_ATTRIBUTE_NAME)
                                                        .getValue();
 
-                                       if (value.indexOf("\\") != -1) {
-                                               logger
-                                                               .log(POILogger.INFO, "target contains \\ therefore not a valid URI"
-                                                                               + value + " replaced by /");
-                                               value = value.replaceAll("\\\\", "/");
-                                               // word can save external relationship with a \ instead
-                                               // of /
-                                       }
-
-                                       target = new URI(value);
+                    target = PackagingURIHelper.toURI(value);
+
                                } catch (URISyntaxException e) {
                                        logger.log(POILogger.ERROR, "Cannot convert " + value
                                                        + " in a valid relationship URI-> ignored", e);
index 337dacb4e10a0f59aa730d29589cfa3a702e7244..01ed54c965d016551e10aeb6c10a109be5a55d7b 100644 (file)
@@ -75,6 +75,11 @@ public interface PackageRelationshipTypes {
         */
        String IMAGE_PART = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/image";
 
+    /**
+     * Hyperlink type.
+     */
+    String HYPERLINK_PART = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink";
+
        /**
         * Style type.
         */
index c4169d9dbf701eca6838f9a35dc3936b4e1bb54c..4d48d2bd7219fe52497a4bab6c63e9e5a2cd4581 100644 (file)
@@ -19,6 +19,8 @@ package org.apache.poi.openxml4j.opc;
 
 import java.net.URI;
 import java.net.URISyntaxException;
+import java.nio.ByteBuffer;
+import java.io.UnsupportedEncodingException;
 
 import org.apache.poi.openxml4j.exceptions.InvalidFormatException;
 import org.apache.poi.openxml4j.exceptions.InvalidOperationException;
@@ -287,7 +289,7 @@ public final class PackagingURIHelper {
                //  form must actually be an absolute URI
                if(sourceURI.toString().equals("/")) {
             String path = targetURI.getPath();
-            if(msCompatible && path.charAt(0) == '/') {
+            if(msCompatible && path.length() > 0 && path.charAt(0) == '/') {
                 try {
                     targetURI = new URI(path.substring(1));
                 } catch (Exception e) {
@@ -362,6 +364,12 @@ public final class PackagingURIHelper {
                        }
                }
 
+        // if the target had a fragment then append it to the result
+        String fragment = targetURI.getRawFragment();
+        if (fragment != null) {
+            retVal.append("#").append(fragment);
+        }
+
                try {
                        return new URI(retVal.toString());
                } catch (Exception e) {
@@ -412,9 +420,9 @@ public final class PackagingURIHelper {
         * Get URI from a string path.
         */
        public static URI getURIFromPath(String path) {
-               URI retUri = null;
+               URI retUri;
                try {
-                       retUri = new URI(path);
+                       retUri = toURI(path);
                } catch (URISyntaxException e) {
                        throw new IllegalArgumentException("path");
                }
@@ -484,7 +492,7 @@ public final class PackagingURIHelper {
                        throws InvalidFormatException {
                URI partNameURI;
                try {
-                       partNameURI = new URI(resolvePartName(partName));
+                       partNameURI = toURI(partName);
                } catch (URISyntaxException e) {
                        throw new InvalidFormatException(e.getMessage());
                }
@@ -648,7 +656,9 @@ public final class PackagingURIHelper {
        }
 
     /**
-     *  If  part name is not a valid URI, it is resolved as follows:
+     * Convert a string to {@link java.net.URI}
+     *
+     * If  part name is not a valid URI, it is resolved as follows:
      * <p>
      * 1. Percent-encode each open bracket ([) and close bracket (]).</li>
      * 2. Percent-encode each percent (%) character that is not followed by a hexadecimal notation of an octet value.</li>
@@ -663,12 +673,72 @@ public final class PackagingURIHelper {
      * in ?5.2 of RFC 3986. The path component of the resulting absolute URI is the part name.
      *</p>
      *
-     * @param partName the name to resolve
+     * @param   value   the string to be parsed into a URI
      * @return  the resolved part name that should be OK to construct a URI
      *
      * TODO YK: for now this method does only (5). Finish the rest.
      */
-    public static String resolvePartName(String partName){
-        return partName.replace('\\', '/');
+    public static URI toURI(String value) throws URISyntaxException  {
+        //5. Convert all back slashes to forward slashes
+        if (value.indexOf("\\") != -1) {
+             value = value.replace('\\', '/');
+        }
+
+        // URI fragemnts (those starting with '#') are not encoded
+        // and may contain white spaces and raw unicode characters
+        int fragmentIdx = value.indexOf('#');
+        if(fragmentIdx != -1){
+            String path = value.substring(0, fragmentIdx);
+            String fragment = value.substring(fragmentIdx + 1);
+
+            value = path + "#" + encode(fragment);
+        }
+
+        return new URI(value);
+    }
+
+    /**
+     * percent-encode white spaces and characters above 0x80.
+     * <p>
+     *   Examples:
+     *   'Apache POI' --> 'Apache%20POI'
+     *   'Apache\u0410POI' --> 'Apache%04%10POI'
+     *
+     * @param s the string to encode
+     * @return  the encoded string
+     */
+    public static String encode(String s) {
+        int n = s.length();
+        if (n == 0) return s;
+
+        ByteBuffer bb;
+        try {
+            bb = ByteBuffer.wrap(s.getBytes("UTF-8"));
+        } catch (UnsupportedEncodingException e){
+            // should not happen
+            throw new RuntimeException(e);
+        }
+        StringBuilder sb = new StringBuilder();
+        while (bb.hasRemaining()) {
+            int b = bb.get() & 0xff;
+            if (isUnsafe(b)) {
+                sb.append('%');
+                sb.append(hexDigits[(b >> 4) & 0x0F]);
+                sb.append(hexDigits[(b >> 0) & 0x0F]);
+            } else {
+                sb.append((char)b);
+            }
+        }
+        return sb.toString();
     }
+
+    private final static char[] hexDigits = {
+        '0', '1', '2', '3', '4', '5', '6', '7',
+        '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
+    };
+
+    private static boolean isUnsafe(int ch) {
+        return ch > 0x80 || " ".indexOf(ch) >= 0;
+    }
+
 }
index c8bbb96af446d7fcd24d98cc82798cfa6fc1fdaf..4a9fec855e4de0bacdb8ea2918deaa3c847bf5e1 100644 (file)
@@ -163,10 +163,7 @@ public final class ZipPartMarshaller implements PartMarshaller {
                        } else {
                 URI targetURI = rel.getTargetURI();
                 targetValue = PackagingURIHelper.relativizeURI(
-                                               sourcePartURI, targetURI, true).getPath();
-                if (targetURI.getRawFragment() != null) {
-                    targetValue += "#" + targetURI.getRawFragment();
-                }
+                                               sourcePartURI, targetURI, true).toString();
                        }
                        relElem.addAttribute(PackageRelationship.TARGET_ATTRIBUTE_NAME,
                                        targetValue);
index 7064fc48d8cec8e7d33d7babffd524070ab9bc13..9e2297d53049290156f1a30fbc06a941c2fce1a0 100644 (file)
@@ -17,6 +17,7 @@
 package org.apache.poi.openxml4j.opc;
 
 import java.net.URI;
+import java.net.URISyntaxException;
 
 import junit.framework.TestCase;
 
@@ -35,7 +36,9 @@ public class TestPackagingURIHelper extends TestCase {
        public void testRelativizeURI() throws Exception {
                URI uri1 = new URI("/word/document.xml");
                URI uri2 = new URI("/word/media/image1.gif");
-               
+        URI uri3 = new URI("/word/media/image1.gif#Sheet1!A1");
+        URI uri4 = new URI("#'My%20Sheet1'!A1");
+
                // Document to image is down a directory
                URI retURI1to2 = PackagingURIHelper.relativizeURI(uri1, uri2);
                assertEquals("media/image1.gif", retURI1to2.getPath());
@@ -60,6 +63,12 @@ public class TestPackagingURIHelper extends TestCase {
         //URI compatible with MS Office and OpenOffice: leading slash is removed
         uriRes = PackagingURIHelper.relativizeURI(root, uri1, true);
         assertEquals("word/document.xml", uriRes.toString());
+
+        //preserve URI fragments
+        uriRes = PackagingURIHelper.relativizeURI(uri1, uri3, true);
+        assertEquals("media/image1.gif#Sheet1!A1", uriRes.toString());
+        uriRes = PackagingURIHelper.relativizeURI(root, uri4, true);
+        assertEquals("#'My%20Sheet1'!A1", uriRes.toString());
     }
 
        /**
@@ -104,4 +113,22 @@ public class TestPackagingURIHelper extends TestCase {
                                .equals(relativeName));
                pkg.revert();
        }
+
+    public void testCreateURIFromString() throws Exception {
+        String[] href = {
+                "..\\\\\\cygwin\\home\\yegor\\.vim\\filetype.vim",
+                "..\\Program%20Files\\AGEIA%20Technologies\\v2.3.3\\NxCooking.dll",
+                "file:///D:\\seva\\1981\\r810102ns.mp3",
+                "..\\cygwin\\home\\yegor\\dinom\\%5baccess%5d.2010-10-26.log",
+                "#'Instructions (Text)'!B21"
+        };
+        for(String s : href){
+            try {
+                URI uri = PackagingURIHelper.toURI(s);
+            } catch (URISyntaxException e){
+                fail("Failed to create URI from " + s);
+            }
+        }
+    }
+
 }
index 2b3b28138fce5df31646a1ac6fc971799dfa6dbd..7711b1d0c8b4b92a87ba610fe0ae54db408a9f26 100644 (file)
@@ -18,6 +18,7 @@
 package org.apache.poi.openxml4j.opc;
 
 import java.io.*;
+import java.net.URI;
 
 import junit.framework.TestCase;
 
@@ -254,4 +255,62 @@ public class TestRelationships extends TestCase {
                        pkg.getRelationshipsByType("http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties").getRelationship(0).getTargetURI().toString());
     }
 
+
+    public void testTargetWithSpecialChars() throws Exception{
+
+        OPCPackage pkg;
+
+        String filepath = OpenXML4JTestDataSamples.getSampleFileName("50154.xlsx");
+        pkg = OPCPackage.open(filepath);
+        assert_50154(pkg);
+
+        ByteArrayOutputStream baos = new ByteArrayOutputStream();
+        pkg.save(baos);
+        ByteArrayInputStream bais = new ByteArrayInputStream(baos.toByteArray());
+        pkg = OPCPackage.open(bais);
+
+        assert_50154(pkg);
+    }
+
+    public void assert_50154(OPCPackage pkg) throws Exception {
+        URI drawingURI = new URI("/xl/drawings/drawing1.xml");
+        PackagePart drawingPart = pkg.getPart(PackagingURIHelper.createPartName(drawingURI));
+        PackageRelationshipCollection drawingRels = drawingPart.getRelationships();
+
+        assertEquals(6, drawingRels.size());
+
+        // expected one image
+        assertEquals(1, drawingPart.getRelationshipsByType("http://schemas.openxmlformats.org/officeDocument/2006/relationships/image").size());
+        // and three hyperlinks
+        assertEquals(5, drawingPart.getRelationshipsByType("http://schemas.openxmlformats.org/officeDocument/2006/relationships/hyperlink").size());
+
+        PackageRelationship rId1 = drawingPart.getRelationship("rId1");
+        URI parent = drawingPart.getPartName().getURI();
+        URI rel1 = parent.relativize(rId1.getTargetURI());
+        URI rel11 = PackagingURIHelper.relativizeURI(drawingPart.getPartName().getURI(), rId1.getTargetURI());
+        assertEquals("'Another Sheet'!A1", rel1.getFragment());
+
+        PackageRelationship rId2 = drawingPart.getRelationship("rId2");
+        URI rel2 = PackagingURIHelper.relativizeURI(drawingPart.getPartName().getURI(), rId2.getTargetURI());
+        assertEquals("../media/image1.png", rel2.getPath());
+
+        PackageRelationship rId3 = drawingPart.getRelationship("rId3");
+        URI rel3 = parent.relativize(rId3.getTargetURI());
+        assertEquals("ThirdSheet!A1", rel3.getFragment());
+
+        PackageRelationship rId4 = drawingPart.getRelationship("rId4");
+        URI rel4 = parent.relativize(rId4.getTargetURI());
+        assertEquals("'\u0410\u043F\u0430\u0447\u0435 \u041F\u041E\u0418'!A1", rel4.getFragment());
+
+        PackageRelationship rId5 = drawingPart.getRelationship("rId5");
+        URI rel5 = parent.relativize(rId5.getTargetURI());
+        // back slashed have been replaced with forward
+        assertEquals("file:///D:/chan-chan.mp3", rel5.toString());
+
+        PackageRelationship rId6 = drawingPart.getRelationship("rId6");
+        URI rel6 = parent.relativize(rId6.getTargetURI());
+        assertEquals("../../../../../../../cygwin/home/yegor/dinom/&&&[access].2010-10-26.log", rel6.getPath());
+        assertEquals("'\u0410\u043F\u0430\u0447\u0435 \u041F\u041E\u0418'!A5", rel6.getFragment());
+    }
+
 }
diff --git a/test-data/openxml4j/50154.xlsx b/test-data/openxml4j/50154.xlsx
new file mode 100644 (file)
index 0000000..7637fb3
Binary files /dev/null and b/test-data/openxml4j/50154.xlsx differ