You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

QuotedString.java 8.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365
  1. /*
  2. * Copyright (C) 2008, 2019 Google Inc. and others
  3. *
  4. * This program and the accompanying materials are made available under the
  5. * terms of the Eclipse Distribution License v. 1.0 which is available at
  6. * https://www.eclipse.org/org/documents/edl-v10.php.
  7. *
  8. * SPDX-License-Identifier: BSD-3-Clause
  9. */
  10. package org.eclipse.jgit.util;
  11. import static java.nio.charset.StandardCharsets.UTF_8;
  12. import java.util.Arrays;
  13. import org.eclipse.jgit.lib.Constants;
  14. /**
  15. * Utility functions related to quoted string handling.
  16. */
  17. public abstract class QuotedString {
  18. /** Quoting style that obeys the rules Git applies to file names */
  19. public static final GitPathStyle GIT_PATH = new GitPathStyle(true);
  20. /**
  21. * Quoting style that obeys the rules Git applies to file names when
  22. * {@code core.quotePath = false}.
  23. *
  24. * @since 5.6
  25. */
  26. public static final QuotedString GIT_PATH_MINIMAL = new GitPathStyle(false);
  27. /**
  28. * Quoting style used by the Bourne shell.
  29. * <p>
  30. * Quotes are unconditionally inserted during {@link #quote(String)}. This
  31. * protects shell meta-characters like <code>$</code> or <code>~</code> from
  32. * being recognized as special.
  33. */
  34. public static final BourneStyle BOURNE = new BourneStyle();
  35. /** Bourne style, but permits <code>~user</code> at the start of the string. */
  36. public static final BourneUserPathStyle BOURNE_USER_PATH = new BourneUserPathStyle();
  37. /**
  38. * Quote an input string by the quoting rules.
  39. * <p>
  40. * If the input string does not require any quoting, the same String
  41. * reference is returned to the caller.
  42. * <p>
  43. * Otherwise a quoted string is returned, including the opening and closing
  44. * quotation marks at the start and end of the string. If the style does not
  45. * permit raw Unicode characters then the string will first be encoded in
  46. * UTF-8, with unprintable sequences possibly escaped by the rules.
  47. *
  48. * @param in
  49. * any non-null Unicode string.
  50. * @return a quoted string. See above for details.
  51. */
  52. public abstract String quote(String in);
  53. /**
  54. * Clean a previously quoted input, decoding the result via UTF-8.
  55. * <p>
  56. * This method must match quote such that:
  57. *
  58. * <pre>
  59. * a.equals(dequote(quote(a)));
  60. * </pre>
  61. *
  62. * is true for any <code>a</code>.
  63. *
  64. * @param in
  65. * a Unicode string to remove quoting from.
  66. * @return the cleaned string.
  67. * @see #dequote(byte[], int, int)
  68. */
  69. public String dequote(String in) {
  70. final byte[] b = Constants.encode(in);
  71. return dequote(b, 0, b.length);
  72. }
  73. /**
  74. * Decode a previously quoted input, scanning a UTF-8 encoded buffer.
  75. * <p>
  76. * This method must match quote such that:
  77. *
  78. * <pre>
  79. * a.equals(dequote(Constants.encode(quote(a))));
  80. * </pre>
  81. *
  82. * is true for any <code>a</code>.
  83. * <p>
  84. * This method removes any opening/closing quotation marks added by
  85. * {@link #quote(String)}.
  86. *
  87. * @param in
  88. * the input buffer to parse.
  89. * @param offset
  90. * first position within <code>in</code> to scan.
  91. * @param end
  92. * one position past in <code>in</code> to scan.
  93. * @return the cleaned string.
  94. */
  95. public abstract String dequote(byte[] in, int offset, int end);
  96. /**
  97. * Quoting style used by the Bourne shell.
  98. * <p>
  99. * Quotes are unconditionally inserted during {@link #quote(String)}. This
  100. * protects shell meta-characters like <code>$</code> or <code>~</code> from
  101. * being recognized as special.
  102. */
  103. public static class BourneStyle extends QuotedString {
  104. @Override
  105. public String quote(String in) {
  106. final StringBuilder r = new StringBuilder();
  107. r.append('\'');
  108. int start = 0, i = 0;
  109. for (; i < in.length(); i++) {
  110. switch (in.charAt(i)) {
  111. case '\'':
  112. case '!':
  113. r.append(in, start, i);
  114. r.append('\'');
  115. r.append('\\');
  116. r.append(in.charAt(i));
  117. r.append('\'');
  118. start = i + 1;
  119. break;
  120. }
  121. }
  122. r.append(in, start, i);
  123. r.append('\'');
  124. return r.toString();
  125. }
  126. @Override
  127. public String dequote(byte[] in, int ip, int ie) {
  128. boolean inquote = false;
  129. final byte[] r = new byte[ie - ip];
  130. int rPtr = 0;
  131. while (ip < ie) {
  132. final byte b = in[ip++];
  133. switch (b) {
  134. case '\'':
  135. inquote = !inquote;
  136. continue;
  137. case '\\':
  138. if (inquote || ip == ie)
  139. r[rPtr++] = b; // literal within a quote
  140. else
  141. r[rPtr++] = in[ip++];
  142. continue;
  143. default:
  144. r[rPtr++] = b;
  145. continue;
  146. }
  147. }
  148. return RawParseUtils.decode(UTF_8, r, 0, rPtr);
  149. }
  150. }
  151. /** Bourne style, but permits <code>~user</code> at the start of the string. */
  152. public static class BourneUserPathStyle extends BourneStyle {
  153. @Override
  154. public String quote(String in) {
  155. if (in.matches("^~[A-Za-z0-9_-]+$")) { //$NON-NLS-1$
  156. // If the string is just "~user" we can assume they
  157. // mean "~user/".
  158. //
  159. return in + "/"; //$NON-NLS-1$
  160. }
  161. if (in.matches("^~[A-Za-z0-9_-]*/.*$")) { //$NON-NLS-1$
  162. // If the string is of "~/path" or "~user/path"
  163. // we must not escape ~/ or ~user/ from the shell.
  164. //
  165. final int i = in.indexOf('/') + 1;
  166. if (i == in.length())
  167. return in;
  168. return in.substring(0, i) + super.quote(in.substring(i));
  169. }
  170. return super.quote(in);
  171. }
  172. }
  173. /** Quoting style that obeys the rules Git applies to file names */
  174. public static final class GitPathStyle extends QuotedString {
  175. private static final byte[] quote;
  176. static {
  177. quote = new byte[128];
  178. Arrays.fill(quote, (byte) -1);
  179. for (int i = '0'; i <= '9'; i++)
  180. quote[i] = 0;
  181. for (int i = 'a'; i <= 'z'; i++)
  182. quote[i] = 0;
  183. for (int i = 'A'; i <= 'Z'; i++)
  184. quote[i] = 0;
  185. quote[' '] = 0;
  186. quote['$'] = 0;
  187. quote['%'] = 0;
  188. quote['&'] = 0;
  189. quote['*'] = 0;
  190. quote['+'] = 0;
  191. quote[','] = 0;
  192. quote['-'] = 0;
  193. quote['.'] = 0;
  194. quote['/'] = 0;
  195. quote[':'] = 0;
  196. quote[';'] = 0;
  197. quote['='] = 0;
  198. quote['?'] = 0;
  199. quote['@'] = 0;
  200. quote['_'] = 0;
  201. quote['^'] = 0;
  202. quote['|'] = 0;
  203. quote['~'] = 0;
  204. quote['\u0007'] = 'a';
  205. quote['\b'] = 'b';
  206. quote['\f'] = 'f';
  207. quote['\n'] = 'n';
  208. quote['\r'] = 'r';
  209. quote['\t'] = 't';
  210. quote['\u000B'] = 'v';
  211. quote['\\'] = '\\';
  212. quote['"'] = '"';
  213. }
  214. private final boolean quoteHigh;
  215. @Override
  216. public String quote(String instr) {
  217. if (instr.isEmpty()) {
  218. return "\"\""; //$NON-NLS-1$
  219. }
  220. boolean reuse = true;
  221. final byte[] in = Constants.encode(instr);
  222. final byte[] out = new byte[4 * in.length + 2];
  223. int o = 0;
  224. out[o++] = '"';
  225. for (byte element : in) {
  226. final int c = element & 0xff;
  227. if (c < quote.length) {
  228. final byte style = quote[c];
  229. if (style == 0) {
  230. out[o++] = (byte) c;
  231. continue;
  232. }
  233. if (style > 0) {
  234. reuse = false;
  235. out[o++] = '\\';
  236. out[o++] = style;
  237. continue;
  238. }
  239. } else if (!quoteHigh) {
  240. out[o++] = (byte) c;
  241. continue;
  242. }
  243. reuse = false;
  244. out[o++] = '\\';
  245. out[o++] = (byte) (((c >> 6) & 03) + '0');
  246. out[o++] = (byte) (((c >> 3) & 07) + '0');
  247. out[o++] = (byte) (((c >> 0) & 07) + '0');
  248. }
  249. if (reuse) {
  250. return instr;
  251. }
  252. out[o++] = '"';
  253. return new String(out, 0, o, UTF_8);
  254. }
  255. @Override
  256. public String dequote(byte[] in, int inPtr, int inEnd) {
  257. if (2 <= inEnd - inPtr && in[inPtr] == '"' && in[inEnd - 1] == '"')
  258. return dq(in, inPtr + 1, inEnd - 1);
  259. return RawParseUtils.decode(UTF_8, in, inPtr, inEnd);
  260. }
  261. private static String dq(byte[] in, int inPtr, int inEnd) {
  262. final byte[] r = new byte[inEnd - inPtr];
  263. int rPtr = 0;
  264. while (inPtr < inEnd) {
  265. final byte b = in[inPtr++];
  266. if (b != '\\') {
  267. r[rPtr++] = b;
  268. continue;
  269. }
  270. if (inPtr == inEnd) {
  271. // Lone trailing backslash. Treat it as a literal.
  272. //
  273. r[rPtr++] = '\\';
  274. break;
  275. }
  276. switch (in[inPtr++]) {
  277. case 'a':
  278. r[rPtr++] = 0x07 /* \a = BEL */;
  279. continue;
  280. case 'b':
  281. r[rPtr++] = '\b';
  282. continue;
  283. case 'f':
  284. r[rPtr++] = '\f';
  285. continue;
  286. case 'n':
  287. r[rPtr++] = '\n';
  288. continue;
  289. case 'r':
  290. r[rPtr++] = '\r';
  291. continue;
  292. case 't':
  293. r[rPtr++] = '\t';
  294. continue;
  295. case 'v':
  296. r[rPtr++] = 0x0B/* \v = VT */;
  297. continue;
  298. case '\\':
  299. case '"':
  300. r[rPtr++] = in[inPtr - 1];
  301. continue;
  302. case '0':
  303. case '1':
  304. case '2':
  305. case '3': {
  306. int cp = in[inPtr - 1] - '0';
  307. for (int n = 1; n < 3 && inPtr < inEnd; n++) {
  308. final byte c = in[inPtr];
  309. if ('0' <= c && c <= '7') {
  310. cp <<= 3;
  311. cp |= c - '0';
  312. inPtr++;
  313. } else {
  314. break;
  315. }
  316. }
  317. r[rPtr++] = (byte) cp;
  318. continue;
  319. }
  320. default:
  321. // Any other code is taken literally.
  322. //
  323. r[rPtr++] = '\\';
  324. r[rPtr++] = in[inPtr - 1];
  325. continue;
  326. }
  327. }
  328. return RawParseUtils.decode(UTF_8, r, 0, rPtr);
  329. }
  330. private GitPathStyle(boolean doQuote) {
  331. quoteHigh = doQuote;
  332. }
  333. }
  334. }