You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

QuotedString.java 10KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378
  1. /*
  2. * Copyright (C) 2008, Google Inc.
  3. * and other copyright owners as documented in the project's IP log.
  4. *
  5. * This program and the accompanying materials are made available
  6. * under the terms of the Eclipse Distribution License v1.0 which
  7. * accompanies this distribution, is reproduced below, and is
  8. * available at http://www.eclipse.org/org/documents/edl-v10.php
  9. *
  10. * All rights reserved.
  11. *
  12. * Redistribution and use in source and binary forms, with or
  13. * without modification, are permitted provided that the following
  14. * conditions are met:
  15. *
  16. * - Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. *
  19. * - Redistributions in binary form must reproduce the above
  20. * copyright notice, this list of conditions and the following
  21. * disclaimer in the documentation and/or other materials provided
  22. * with the distribution.
  23. *
  24. * - Neither the name of the Eclipse Foundation, Inc. nor the
  25. * names of its contributors may be used to endorse or promote
  26. * products derived from this software without specific prior
  27. * written permission.
  28. *
  29. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  30. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  31. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  32. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  33. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  34. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  35. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  36. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  37. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  38. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  39. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  40. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  41. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  42. */
  43. package org.eclipse.jgit.util;
  44. import java.util.Arrays;
  45. import org.eclipse.jgit.lib.Constants;
  46. /** Utility functions related to quoted string handling. */
  47. public abstract class QuotedString {
  48. /** Quoting style that obeys the rules Git applies to file names */
  49. public static final GitPathStyle GIT_PATH = new GitPathStyle();
  50. /**
  51. * Quoting style used by the Bourne shell.
  52. * <p>
  53. * Quotes are unconditionally inserted during {@link #quote(String)}. This
  54. * protects shell meta-characters like <code>$</code> or <code>~</code> from
  55. * being recognized as special.
  56. */
  57. public static final BourneStyle BOURNE = new BourneStyle();
  58. /** Bourne style, but permits <code>~user</code> at the start of the string. */
  59. public static final BourneUserPathStyle BOURNE_USER_PATH = new BourneUserPathStyle();
  60. /**
  61. * Quote an input string by the quoting rules.
  62. * <p>
  63. * If the input string does not require any quoting, the same String
  64. * reference is returned to the caller.
  65. * <p>
  66. * Otherwise a quoted string is returned, including the opening and closing
  67. * quotation marks at the start and end of the string. If the style does not
  68. * permit raw Unicode characters then the string will first be encoded in
  69. * UTF-8, with unprintable sequences possibly escaped by the rules.
  70. *
  71. * @param in
  72. * any non-null Unicode string.
  73. * @return a quoted string. See above for details.
  74. */
  75. public abstract String quote(String in);
  76. /**
  77. * Clean a previously quoted input, decoding the result via UTF-8.
  78. * <p>
  79. * This method must match quote such that:
  80. *
  81. * <pre>
  82. * a.equals(dequote(quote(a)));
  83. * </pre>
  84. *
  85. * is true for any <code>a</code>.
  86. *
  87. * @param in
  88. * a Unicode string to remove quoting from.
  89. * @return the cleaned string.
  90. * @see #dequote(byte[], int, int)
  91. */
  92. public String dequote(final String in) {
  93. final byte[] b = Constants.encode(in);
  94. return dequote(b, 0, b.length);
  95. }
  96. /**
  97. * Decode a previously quoted input, scanning a UTF-8 encoded buffer.
  98. * <p>
  99. * This method must match quote such that:
  100. *
  101. * <pre>
  102. * a.equals(dequote(Constants.encode(quote(a))));
  103. * </pre>
  104. *
  105. * is true for any <code>a</code>.
  106. * <p>
  107. * This method removes any opening/closing quotation marks added by
  108. * {@link #quote(String)}.
  109. *
  110. * @param in
  111. * the input buffer to parse.
  112. * @param offset
  113. * first position within <code>in</code> to scan.
  114. * @param end
  115. * one position past in <code>in</code> to scan.
  116. * @return the cleaned string.
  117. */
  118. public abstract String dequote(byte[] in, int offset, int end);
  119. /**
  120. * Quoting style used by the Bourne shell.
  121. * <p>
  122. * Quotes are unconditionally inserted during {@link #quote(String)}. This
  123. * protects shell meta-characters like <code>$</code> or <code>~</code> from
  124. * being recognized as special.
  125. */
  126. public static class BourneStyle extends QuotedString {
  127. @Override
  128. public String quote(final String in) {
  129. final StringBuilder r = new StringBuilder();
  130. r.append('\'');
  131. int start = 0, i = 0;
  132. for (; i < in.length(); i++) {
  133. switch (in.charAt(i)) {
  134. case '\'':
  135. case '!':
  136. r.append(in, start, i);
  137. r.append('\'');
  138. r.append('\\');
  139. r.append(in.charAt(i));
  140. r.append('\'');
  141. start = i + 1;
  142. break;
  143. }
  144. }
  145. r.append(in, start, i);
  146. r.append('\'');
  147. return r.toString();
  148. }
  149. @Override
  150. public String dequote(final byte[] in, int ip, final int ie) {
  151. boolean inquote = false;
  152. final byte[] r = new byte[ie - ip];
  153. int rPtr = 0;
  154. while (ip < ie) {
  155. final byte b = in[ip++];
  156. switch (b) {
  157. case '\'':
  158. inquote = !inquote;
  159. continue;
  160. case '\\':
  161. if (inquote || ip == ie)
  162. r[rPtr++] = b; // literal within a quote
  163. else
  164. r[rPtr++] = in[ip++];
  165. continue;
  166. default:
  167. r[rPtr++] = b;
  168. continue;
  169. }
  170. }
  171. return RawParseUtils.decode(Constants.CHARSET, r, 0, rPtr);
  172. }
  173. }
  174. /** Bourne style, but permits <code>~user</code> at the start of the string. */
  175. public static class BourneUserPathStyle extends BourneStyle {
  176. @Override
  177. public String quote(final String in) {
  178. if (in.matches("^~[A-Za-z0-9_-]+$")) { //$NON-NLS-1$
  179. // If the string is just "~user" we can assume they
  180. // mean "~user/".
  181. //
  182. return in + "/"; //$NON-NLS-1$
  183. }
  184. if (in.matches("^~[A-Za-z0-9_-]*/.*$")) { //$NON-NLS-1$
  185. // If the string is of "~/path" or "~user/path"
  186. // we must not escape ~/ or ~user/ from the shell.
  187. //
  188. final int i = in.indexOf('/') + 1;
  189. if (i == in.length())
  190. return in;
  191. return in.substring(0, i) + super.quote(in.substring(i));
  192. }
  193. return super.quote(in);
  194. }
  195. }
  196. /** Quoting style that obeys the rules Git applies to file names */
  197. public static final class GitPathStyle extends QuotedString {
  198. private static final byte[] quote;
  199. static {
  200. quote = new byte[128];
  201. Arrays.fill(quote, (byte) -1);
  202. for (int i = '0'; i <= '9'; i++)
  203. quote[i] = 0;
  204. for (int i = 'a'; i <= 'z'; i++)
  205. quote[i] = 0;
  206. for (int i = 'A'; i <= 'Z'; i++)
  207. quote[i] = 0;
  208. quote[' '] = 0;
  209. quote['$'] = 0;
  210. quote['%'] = 0;
  211. quote['&'] = 0;
  212. quote['*'] = 0;
  213. quote['+'] = 0;
  214. quote[','] = 0;
  215. quote['-'] = 0;
  216. quote['.'] = 0;
  217. quote['/'] = 0;
  218. quote[':'] = 0;
  219. quote[';'] = 0;
  220. quote['='] = 0;
  221. quote['?'] = 0;
  222. quote['@'] = 0;
  223. quote['_'] = 0;
  224. quote['^'] = 0;
  225. quote['|'] = 0;
  226. quote['~'] = 0;
  227. quote['\u0007'] = 'a';
  228. quote['\b'] = 'b';
  229. quote['\f'] = 'f';
  230. quote['\n'] = 'n';
  231. quote['\r'] = 'r';
  232. quote['\t'] = 't';
  233. quote['\u000B'] = 'v';
  234. quote['\\'] = '\\';
  235. quote['"'] = '"';
  236. }
  237. @Override
  238. public String quote(final String instr) {
  239. if (instr.length() == 0)
  240. return "\"\""; //$NON-NLS-1$
  241. boolean reuse = true;
  242. final byte[] in = Constants.encode(instr);
  243. final StringBuilder r = new StringBuilder(2 + in.length);
  244. r.append('"');
  245. for (int i = 0; i < in.length; i++) {
  246. final int c = in[i] & 0xff;
  247. if (c < quote.length) {
  248. final byte style = quote[c];
  249. if (style == 0) {
  250. r.append((char) c);
  251. continue;
  252. }
  253. if (style > 0) {
  254. reuse = false;
  255. r.append('\\');
  256. r.append((char) style);
  257. continue;
  258. }
  259. }
  260. reuse = false;
  261. r.append('\\');
  262. r.append((char) (((c >> 6) & 03) + '0'));
  263. r.append((char) (((c >> 3) & 07) + '0'));
  264. r.append((char) (((c >> 0) & 07) + '0'));
  265. }
  266. if (reuse)
  267. return instr;
  268. r.append('"');
  269. return r.toString();
  270. }
  271. @Override
  272. public String dequote(final byte[] in, final int inPtr, final int inEnd) {
  273. if (2 <= inEnd - inPtr && in[inPtr] == '"' && in[inEnd - 1] == '"')
  274. return dq(in, inPtr + 1, inEnd - 1);
  275. return RawParseUtils.decode(Constants.CHARSET, in, inPtr, inEnd);
  276. }
  277. private static String dq(final byte[] in, int inPtr, final int inEnd) {
  278. final byte[] r = new byte[inEnd - inPtr];
  279. int rPtr = 0;
  280. while (inPtr < inEnd) {
  281. final byte b = in[inPtr++];
  282. if (b != '\\') {
  283. r[rPtr++] = b;
  284. continue;
  285. }
  286. if (inPtr == inEnd) {
  287. // Lone trailing backslash. Treat it as a literal.
  288. //
  289. r[rPtr++] = '\\';
  290. break;
  291. }
  292. switch (in[inPtr++]) {
  293. case 'a':
  294. r[rPtr++] = 0x07 /* \a = BEL */;
  295. continue;
  296. case 'b':
  297. r[rPtr++] = '\b';
  298. continue;
  299. case 'f':
  300. r[rPtr++] = '\f';
  301. continue;
  302. case 'n':
  303. r[rPtr++] = '\n';
  304. continue;
  305. case 'r':
  306. r[rPtr++] = '\r';
  307. continue;
  308. case 't':
  309. r[rPtr++] = '\t';
  310. continue;
  311. case 'v':
  312. r[rPtr++] = 0x0B/* \v = VT */;
  313. continue;
  314. case '\\':
  315. case '"':
  316. r[rPtr++] = in[inPtr - 1];
  317. continue;
  318. case '0':
  319. case '1':
  320. case '2':
  321. case '3': {
  322. int cp = in[inPtr - 1] - '0';
  323. for (int n = 1; n < 3 && inPtr < inEnd; n++) {
  324. final byte c = in[inPtr];
  325. if ('0' <= c && c <= '7') {
  326. cp <<= 3;
  327. cp |= c - '0';
  328. inPtr++;
  329. } else {
  330. break;
  331. }
  332. }
  333. r[rPtr++] = (byte) cp;
  334. continue;
  335. }
  336. default:
  337. // Any other code is taken literally.
  338. //
  339. r[rPtr++] = '\\';
  340. r[rPtr++] = in[inPtr - 1];
  341. continue;
  342. }
  343. }
  344. return RawParseUtils.decode(Constants.CHARSET, r, 0, rPtr);
  345. }
  346. private GitPathStyle() {
  347. // Singleton
  348. }
  349. }
  350. }