You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

URISpecification.java 4.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146
  1. /*
  2. * Licensed to the Apache Software Foundation (ASF) under one or more
  3. * contributor license agreements. See the NOTICE file distributed with
  4. * this work for additional information regarding copyright ownership.
  5. * The ASF licenses this file to You under the Apache License, Version 2.0
  6. * (the "License"); you may not use this file except in compliance with
  7. * the License. You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. */
  17. /* $Id$ */
  18. package org.apache.fop.datatypes;
  19. import java.io.UnsupportedEncodingException;
  20. /**
  21. * This class contains method to deal with the <uri-specification> datatype from XSL-FO.
  22. */
  23. public final class URISpecification {
  24. private URISpecification() {
  25. }
  26. /**
  27. * Get the URL string from a wrapped URL.
  28. *
  29. * @param href the input wrapped URL
  30. * @return the raw URL
  31. */
  32. public static String getURL(String href) {
  33. /*
  34. * According to section 5.11 a <uri-specification> is:
  35. * "url(" + URI + ")"
  36. * according to 7.28.7 a <uri-specification> is:
  37. * URI
  38. * So handle both.
  39. */
  40. href = href.trim();
  41. if (href.startsWith("url(") && (href.indexOf(")") != -1)) {
  42. href = href.substring(4, href.lastIndexOf(")")).trim();
  43. if (href.startsWith("'") && href.endsWith("'")) {
  44. href = href.substring(1, href.length() - 1);
  45. } else if (href.startsWith("\"") && href.endsWith("\"")) {
  46. href = href.substring(1, href.length() - 1);
  47. }
  48. } else {
  49. // warn
  50. }
  51. return href;
  52. }
  53. private static final String PUNCT = ",;:$&+=";
  54. private static final String RESERVED = PUNCT + "?/[]@";
  55. /* not used
  56. private static boolean isValidURIChar(char ch) {
  57. return true;
  58. }
  59. */
  60. private static boolean isDigit(char ch) {
  61. return (ch >= '0' && ch <= '9');
  62. }
  63. private static boolean isAlpha(char ch) {
  64. return (ch >= 'A' && ch <= 'Z') || (ch >= 'a' && ch <= 'z');
  65. }
  66. private static boolean isHexDigit(char ch) {
  67. return (ch >= '0' && ch <= '9') || (ch >= 'A' && ch <= 'F') || (ch >= 'a' && ch <= 'f');
  68. }
  69. private static boolean isReserved(char ch) {
  70. if (RESERVED.indexOf(ch) >= 0) {
  71. return true;
  72. } else if ('#' == ch) {
  73. //# is not a reserved character but is used for the fragment
  74. return true;
  75. }
  76. return false;
  77. }
  78. private static boolean isUnreserved(char ch) {
  79. if (isDigit(ch) || isAlpha(ch)) {
  80. return true;
  81. } else if ("_-!.~\'()*".indexOf(ch) >= 0) {
  82. //remaining unreserved characters
  83. return true;
  84. }
  85. return false;
  86. }
  87. private static final char[] HEX_DIGITS = {
  88. '0', '1', '2', '3', '4', '5', '6', '7',
  89. '8', '9', 'A', 'B', 'C', 'D', 'E', 'F'
  90. };
  91. private static void appendEscape(StringBuffer sb, byte b) {
  92. sb.append('%').append(HEX_DIGITS[(b >> 4) & 0x0f]).append(HEX_DIGITS[(b >> 0) & 0x0f]);
  93. }
  94. /**
  95. * Escapes any illegal URI character in a given URI, for example, it escapes a space to "%20".
  96. * Note: This method does not "parse" the URI and therefore does not treat the individual
  97. * components (user-info, path, query etc.) individually.
  98. * @param uri the URI to inspect
  99. * @return the escaped URI
  100. */
  101. public static String escapeURI(String uri) {
  102. uri = getURL(uri);
  103. StringBuffer sb = new StringBuffer();
  104. for (int i = 0, c = uri.length(); i < c; i++) {
  105. char ch = uri.charAt(i);
  106. if (ch == '%') {
  107. if (i < c - 3 && isHexDigit(uri.charAt(i + 1)) && isHexDigit(uri.charAt(i + 2))) {
  108. sb.append(ch);
  109. continue;
  110. }
  111. }
  112. if (isReserved(ch) || isUnreserved(ch)) {
  113. //Note: this may not be accurate for some very special cases.
  114. sb.append(ch);
  115. } else {
  116. try {
  117. byte[] utf8 = Character.toString(ch).getBytes("UTF-8");
  118. for (byte anUtf8 : utf8) {
  119. appendEscape(sb, anUtf8);
  120. }
  121. } catch (UnsupportedEncodingException e) {
  122. throw new Error("Incompatible JVM. UTF-8 not supported.");
  123. }
  124. }
  125. }
  126. return sb.toString();
  127. }
  128. }