You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

QuotedString.java 10.0KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382
  1. /*
  2. * Copyright (C) 2008, Google Inc.
  3. * and other copyright owners as documented in the project's IP log.
  4. *
  5. * This program and the accompanying materials are made available
  6. * under the terms of the Eclipse Distribution License v1.0 which
  7. * accompanies this distribution, is reproduced below, and is
  8. * available at http://www.eclipse.org/org/documents/edl-v10.php
  9. *
  10. * All rights reserved.
  11. *
  12. * Redistribution and use in source and binary forms, with or
  13. * without modification, are permitted provided that the following
  14. * conditions are met:
  15. *
  16. * - Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. *
  19. * - Redistributions in binary form must reproduce the above
  20. * copyright notice, this list of conditions and the following
  21. * disclaimer in the documentation and/or other materials provided
  22. * with the distribution.
  23. *
  24. * - Neither the name of the Eclipse Foundation, Inc. nor the
  25. * names of its contributors may be used to endorse or promote
  26. * products derived from this software without specific prior
  27. * written permission.
  28. *
  29. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  30. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  31. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  32. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  33. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  34. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  35. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  36. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  37. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  38. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  39. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  40. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  41. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  42. */
  43. package org.eclipse.jgit.util;
  44. import static java.nio.charset.StandardCharsets.UTF_8;
  45. import java.util.Arrays;
  46. import org.eclipse.jgit.lib.Constants;
  47. /**
  48. * Utility functions related to quoted string handling.
  49. */
  50. public abstract class QuotedString {
  51. /** Quoting style that obeys the rules Git applies to file names */
  52. public static final GitPathStyle GIT_PATH = new GitPathStyle();
  53. /**
  54. * Quoting style used by the Bourne shell.
  55. * <p>
  56. * Quotes are unconditionally inserted during {@link #quote(String)}. This
  57. * protects shell meta-characters like <code>$</code> or <code>~</code> from
  58. * being recognized as special.
  59. */
  60. public static final BourneStyle BOURNE = new BourneStyle();
  61. /** Bourne style, but permits <code>~user</code> at the start of the string. */
  62. public static final BourneUserPathStyle BOURNE_USER_PATH = new BourneUserPathStyle();
  63. /**
  64. * Quote an input string by the quoting rules.
  65. * <p>
  66. * If the input string does not require any quoting, the same String
  67. * reference is returned to the caller.
  68. * <p>
  69. * Otherwise a quoted string is returned, including the opening and closing
  70. * quotation marks at the start and end of the string. If the style does not
  71. * permit raw Unicode characters then the string will first be encoded in
  72. * UTF-8, with unprintable sequences possibly escaped by the rules.
  73. *
  74. * @param in
  75. * any non-null Unicode string.
  76. * @return a quoted string. See above for details.
  77. */
  78. public abstract String quote(String in);
  79. /**
  80. * Clean a previously quoted input, decoding the result via UTF-8.
  81. * <p>
  82. * This method must match quote such that:
  83. *
  84. * <pre>
  85. * a.equals(dequote(quote(a)));
  86. * </pre>
  87. *
  88. * is true for any <code>a</code>.
  89. *
  90. * @param in
  91. * a Unicode string to remove quoting from.
  92. * @return the cleaned string.
  93. * @see #dequote(byte[], int, int)
  94. */
  95. public String dequote(String in) {
  96. final byte[] b = Constants.encode(in);
  97. return dequote(b, 0, b.length);
  98. }
  99. /**
  100. * Decode a previously quoted input, scanning a UTF-8 encoded buffer.
  101. * <p>
  102. * This method must match quote such that:
  103. *
  104. * <pre>
  105. * a.equals(dequote(Constants.encode(quote(a))));
  106. * </pre>
  107. *
  108. * is true for any <code>a</code>.
  109. * <p>
  110. * This method removes any opening/closing quotation marks added by
  111. * {@link #quote(String)}.
  112. *
  113. * @param in
  114. * the input buffer to parse.
  115. * @param offset
  116. * first position within <code>in</code> to scan.
  117. * @param end
  118. * one position past in <code>in</code> to scan.
  119. * @return the cleaned string.
  120. */
  121. public abstract String dequote(byte[] in, int offset, int end);
  122. /**
  123. * Quoting style used by the Bourne shell.
  124. * <p>
  125. * Quotes are unconditionally inserted during {@link #quote(String)}. This
  126. * protects shell meta-characters like <code>$</code> or <code>~</code> from
  127. * being recognized as special.
  128. */
  129. public static class BourneStyle extends QuotedString {
  130. @Override
  131. public String quote(String in) {
  132. final StringBuilder r = new StringBuilder();
  133. r.append('\'');
  134. int start = 0, i = 0;
  135. for (; i < in.length(); i++) {
  136. switch (in.charAt(i)) {
  137. case '\'':
  138. case '!':
  139. r.append(in, start, i);
  140. r.append('\'');
  141. r.append('\\');
  142. r.append(in.charAt(i));
  143. r.append('\'');
  144. start = i + 1;
  145. break;
  146. }
  147. }
  148. r.append(in, start, i);
  149. r.append('\'');
  150. return r.toString();
  151. }
  152. @Override
  153. public String dequote(byte[] in, int ip, int ie) {
  154. boolean inquote = false;
  155. final byte[] r = new byte[ie - ip];
  156. int rPtr = 0;
  157. while (ip < ie) {
  158. final byte b = in[ip++];
  159. switch (b) {
  160. case '\'':
  161. inquote = !inquote;
  162. continue;
  163. case '\\':
  164. if (inquote || ip == ie)
  165. r[rPtr++] = b; // literal within a quote
  166. else
  167. r[rPtr++] = in[ip++];
  168. continue;
  169. default:
  170. r[rPtr++] = b;
  171. continue;
  172. }
  173. }
  174. return RawParseUtils.decode(UTF_8, r, 0, rPtr);
  175. }
  176. }
  177. /** Bourne style, but permits <code>~user</code> at the start of the string. */
  178. public static class BourneUserPathStyle extends BourneStyle {
  179. @Override
  180. public String quote(String in) {
  181. if (in.matches("^~[A-Za-z0-9_-]+$")) { //$NON-NLS-1$
  182. // If the string is just "~user" we can assume they
  183. // mean "~user/".
  184. //
  185. return in + "/"; //$NON-NLS-1$
  186. }
  187. if (in.matches("^~[A-Za-z0-9_-]*/.*$")) { //$NON-NLS-1$
  188. // If the string is of "~/path" or "~user/path"
  189. // we must not escape ~/ or ~user/ from the shell.
  190. //
  191. final int i = in.indexOf('/') + 1;
  192. if (i == in.length())
  193. return in;
  194. return in.substring(0, i) + super.quote(in.substring(i));
  195. }
  196. return super.quote(in);
  197. }
  198. }
  199. /** Quoting style that obeys the rules Git applies to file names */
  200. public static final class GitPathStyle extends QuotedString {
  201. private static final byte[] quote;
  202. static {
  203. quote = new byte[128];
  204. Arrays.fill(quote, (byte) -1);
  205. for (int i = '0'; i <= '9'; i++)
  206. quote[i] = 0;
  207. for (int i = 'a'; i <= 'z'; i++)
  208. quote[i] = 0;
  209. for (int i = 'A'; i <= 'Z'; i++)
  210. quote[i] = 0;
  211. quote[' '] = 0;
  212. quote['$'] = 0;
  213. quote['%'] = 0;
  214. quote['&'] = 0;
  215. quote['*'] = 0;
  216. quote['+'] = 0;
  217. quote[','] = 0;
  218. quote['-'] = 0;
  219. quote['.'] = 0;
  220. quote['/'] = 0;
  221. quote[':'] = 0;
  222. quote[';'] = 0;
  223. quote['='] = 0;
  224. quote['?'] = 0;
  225. quote['@'] = 0;
  226. quote['_'] = 0;
  227. quote['^'] = 0;
  228. quote['|'] = 0;
  229. quote['~'] = 0;
  230. quote['\u0007'] = 'a';
  231. quote['\b'] = 'b';
  232. quote['\f'] = 'f';
  233. quote['\n'] = 'n';
  234. quote['\r'] = 'r';
  235. quote['\t'] = 't';
  236. quote['\u000B'] = 'v';
  237. quote['\\'] = '\\';
  238. quote['"'] = '"';
  239. }
  240. @Override
  241. public String quote(String instr) {
  242. if (instr.length() == 0)
  243. return "\"\""; //$NON-NLS-1$
  244. boolean reuse = true;
  245. final byte[] in = Constants.encode(instr);
  246. final StringBuilder r = new StringBuilder(2 + in.length);
  247. r.append('"');
  248. for (int i = 0; i < in.length; i++) {
  249. final int c = in[i] & 0xff;
  250. if (c < quote.length) {
  251. final byte style = quote[c];
  252. if (style == 0) {
  253. r.append((char) c);
  254. continue;
  255. }
  256. if (style > 0) {
  257. reuse = false;
  258. r.append('\\');
  259. r.append((char) style);
  260. continue;
  261. }
  262. }
  263. reuse = false;
  264. r.append('\\');
  265. r.append((char) (((c >> 6) & 03) + '0'));
  266. r.append((char) (((c >> 3) & 07) + '0'));
  267. r.append((char) (((c >> 0) & 07) + '0'));
  268. }
  269. if (reuse)
  270. return instr;
  271. r.append('"');
  272. return r.toString();
  273. }
  274. @Override
  275. public String dequote(byte[] in, int inPtr, int inEnd) {
  276. if (2 <= inEnd - inPtr && in[inPtr] == '"' && in[inEnd - 1] == '"')
  277. return dq(in, inPtr + 1, inEnd - 1);
  278. return RawParseUtils.decode(UTF_8, in, inPtr, inEnd);
  279. }
  280. private static String dq(byte[] in, int inPtr, int inEnd) {
  281. final byte[] r = new byte[inEnd - inPtr];
  282. int rPtr = 0;
  283. while (inPtr < inEnd) {
  284. final byte b = in[inPtr++];
  285. if (b != '\\') {
  286. r[rPtr++] = b;
  287. continue;
  288. }
  289. if (inPtr == inEnd) {
  290. // Lone trailing backslash. Treat it as a literal.
  291. //
  292. r[rPtr++] = '\\';
  293. break;
  294. }
  295. switch (in[inPtr++]) {
  296. case 'a':
  297. r[rPtr++] = 0x07 /* \a = BEL */;
  298. continue;
  299. case 'b':
  300. r[rPtr++] = '\b';
  301. continue;
  302. case 'f':
  303. r[rPtr++] = '\f';
  304. continue;
  305. case 'n':
  306. r[rPtr++] = '\n';
  307. continue;
  308. case 'r':
  309. r[rPtr++] = '\r';
  310. continue;
  311. case 't':
  312. r[rPtr++] = '\t';
  313. continue;
  314. case 'v':
  315. r[rPtr++] = 0x0B/* \v = VT */;
  316. continue;
  317. case '\\':
  318. case '"':
  319. r[rPtr++] = in[inPtr - 1];
  320. continue;
  321. case '0':
  322. case '1':
  323. case '2':
  324. case '3': {
  325. int cp = in[inPtr - 1] - '0';
  326. for (int n = 1; n < 3 && inPtr < inEnd; n++) {
  327. final byte c = in[inPtr];
  328. if ('0' <= c && c <= '7') {
  329. cp <<= 3;
  330. cp |= c - '0';
  331. inPtr++;
  332. } else {
  333. break;
  334. }
  335. }
  336. r[rPtr++] = (byte) cp;
  337. continue;
  338. }
  339. default:
  340. // Any other code is taken literally.
  341. //
  342. r[rPtr++] = '\\';
  343. r[rPtr++] = in[inPtr - 1];
  344. continue;
  345. }
  346. }
  347. return RawParseUtils.decode(UTF_8, r, 0, rPtr);
  348. }
  349. private GitPathStyle() {
  350. // Singleton
  351. }
  352. }
  353. }