You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Strings.java 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434
  1. /*
  2. * Copyright (C) 2014, Andrey Loskutov <loskutov@gmx.de>
  3. * and other copyright owners as documented in the project's IP log.
  4. *
  5. * This program and the accompanying materials are made available
  6. * under the terms of the Eclipse Distribution License v1.0 which
  7. * accompanies this distribution, is reproduced below, and is
  8. * available at http://www.eclipse.org/org/documents/edl-v10.php
  9. *
  10. * All rights reserved.
  11. *
  12. * Redistribution and use in source and binary forms, with or
  13. * without modification, are permitted provided that the following
  14. * conditions are met:
  15. *
  16. * - Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. *
  19. * - Redistributions in binary form must reproduce the above
  20. * copyright notice, this list of conditions and the following
  21. * disclaimer in the documentation and/or other materials provided
  22. * with the distribution.
  23. *
  24. * - Neither the name of the Eclipse Foundation, Inc. nor the
  25. * names of its contributors may be used to endorse or promote
  26. * products derived from this software without specific prior
  27. * written permission.
  28. *
  29. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  30. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  31. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  32. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  33. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  34. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  35. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  36. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  37. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  38. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  39. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  40. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  41. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  42. */
  43. package org.eclipse.jgit.ignore.internal;
  44. import static java.lang.Character.isLetter;
  45. import java.text.MessageFormat;
  46. import java.util.ArrayList;
  47. import java.util.Arrays;
  48. import java.util.List;
  49. import java.util.regex.Pattern;
  50. import java.util.regex.PatternSyntaxException;
  51. import org.eclipse.jgit.errors.InvalidPatternException;
  52. import org.eclipse.jgit.ignore.FastIgnoreRule;
  53. import org.eclipse.jgit.internal.JGitText;
  54. /**
  55. * Various {@link String} related utility methods, written mostly to avoid
  56. * generation of new String objects (e.g. via splitting Strings etc).
  57. *
  58. * @since 3.6
  59. */
  60. public class Strings {
  61. static char getPathSeparator(Character pathSeparator) {
  62. return pathSeparator == null ? FastIgnoreRule.PATH_SEPARATOR
  63. : pathSeparator.charValue();
  64. }
  65. /**
  66. * @param pattern
  67. * non null
  68. * @param c
  69. * character to remove
  70. * @return new string with all trailing characters removed
  71. */
  72. public static String stripTrailing(String pattern, char c) {
  73. while (pattern.length() > 0
  74. && pattern.charAt(pattern.length() - 1) == c)
  75. pattern = pattern.substring(0, pattern.length() - 1);
  76. return pattern;
  77. }
  78. static int count(String s, char c, boolean ignoreFirstLast) {
  79. int start = 0;
  80. int count = 0;
  81. while (true) {
  82. start = s.indexOf(c, start);
  83. if (start == -1)
  84. break;
  85. if (!ignoreFirstLast || (start != 0 && start != s.length()))
  86. count++;
  87. start++;
  88. }
  89. return count;
  90. }
  91. /**
  92. * Splits given string to substrings by given separator
  93. *
  94. * @param pattern
  95. * non null
  96. * @param slash
  97. * separator char
  98. * @return list of substrings
  99. */
  100. public static List<String> split(String pattern, char slash) {
  101. int count = count(pattern, slash, true);
  102. if (count < 1)
  103. throw new IllegalStateException(
  104. "Pattern must have at least two segments: " + pattern); //$NON-NLS-1$
  105. List<String> segments = new ArrayList<String>(count);
  106. int right = 0;
  107. while (true) {
  108. int left = right;
  109. right = pattern.indexOf(slash, right);
  110. if (right == -1) {
  111. if (left < pattern.length())
  112. segments.add(pattern.substring(left));
  113. break;
  114. }
  115. if (right - left > 0)
  116. if (left == 1)
  117. // leading slash should remain by the first pattern
  118. segments.add(pattern.substring(left - 1, right));
  119. else if (right == pattern.length() - 1)
  120. // trailing slash should remain too
  121. segments.add(pattern.substring(left, right + 1));
  122. else
  123. segments.add(pattern.substring(left, right));
  124. right++;
  125. }
  126. return segments;
  127. }
  128. static boolean isWildCard(String pattern) {
  129. return pattern.indexOf('*') != -1 || isComplexWildcard(pattern);
  130. }
  131. private static boolean isComplexWildcard(String pattern) {
  132. int idx1 = pattern.indexOf('[');
  133. if (idx1 != -1) {
  134. int idx2 = pattern.indexOf(']');
  135. if (idx2 > idx1)
  136. return true;
  137. }
  138. if (pattern.indexOf('?') != -1) {
  139. return true;
  140. } else {
  141. // check if the backslash escapes one of the glob special characters
  142. // if not, backslash is not part of a regex and treated literally
  143. int backSlash = pattern.indexOf('\\');
  144. if (backSlash >= 0) {
  145. int nextIdx = backSlash + 1;
  146. if (pattern.length() == nextIdx) {
  147. return false;
  148. }
  149. char nextChar = pattern.charAt(nextIdx);
  150. if (nextChar == '?' || nextChar == '*' || nextChar == '['
  151. // required to match escaped backslashes '\\\\'
  152. || nextChar == '\\') {
  153. return true;
  154. } else {
  155. return false;
  156. }
  157. }
  158. }
  159. return false;
  160. }
  161. static PatternState checkWildCards(String pattern) {
  162. if (isComplexWildcard(pattern))
  163. return PatternState.COMPLEX;
  164. int startIdx = pattern.indexOf('*');
  165. if (startIdx < 0)
  166. return PatternState.NONE;
  167. if (startIdx == pattern.length() - 1)
  168. return PatternState.TRAILING_ASTERISK_ONLY;
  169. if (pattern.lastIndexOf('*') == 0)
  170. return PatternState.LEADING_ASTERISK_ONLY;
  171. return PatternState.COMPLEX;
  172. }
  173. static enum PatternState {
  174. LEADING_ASTERISK_ONLY, TRAILING_ASTERISK_ONLY, COMPLEX, NONE
  175. }
  176. final static List<String> POSIX_CHAR_CLASSES = Arrays.asList(
  177. "alnum", "alpha", "blank", "cntrl", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
  178. // [:alnum:] [:alpha:] [:blank:] [:cntrl:]
  179. "digit", "graph", "lower", "print", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
  180. // [:digit:] [:graph:] [:lower:] [:print:]
  181. "punct", "space", "upper", "xdigit", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
  182. // [:punct:] [:space:] [:upper:] [:xdigit:]
  183. "word" //$NON-NLS-1$
  184. // [:word:] XXX I don't see it in
  185. // http://man7.org/linux/man-pages/man7/glob.7.html
  186. // but this was in org.eclipse.jgit.fnmatch.GroupHead.java ???
  187. );
  188. private static final String DL = "\\p{javaDigit}\\p{javaLetter}"; //$NON-NLS-1$
  189. final static List<String> JAVA_CHAR_CLASSES = Arrays
  190. .asList("\\p{Alnum}", "\\p{javaLetter}", "\\p{Blank}", "\\p{Cntrl}", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
  191. // [:alnum:] [:alpha:] [:blank:] [:cntrl:]
  192. "\\p{javaDigit}", "[\\p{Graph}" + DL + "]", "\\p{Ll}", "[\\p{Print}" + DL + "]", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$
  193. // [:digit:] [:graph:] [:lower:] [:print:]
  194. "\\p{Punct}", "\\p{Space}", "\\p{Lu}", "\\p{XDigit}", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
  195. // [:punct:] [:space:] [:upper:] [:xdigit:]
  196. "[" + DL + "_]" //$NON-NLS-1$ //$NON-NLS-2$
  197. // [:word:]
  198. );
  199. // Collating symbols [[.a.]] or equivalence class expressions [[=a=]] are
  200. // not supported by CLI git (at least not by 1.9.1)
  201. final static Pattern UNSUPPORTED = Pattern
  202. .compile("\\[\\[[.=]\\w+[.=]\\]\\]"); //$NON-NLS-1$
  203. /**
  204. * Conversion from glob to Java regex following two sources: <li>
  205. * http://man7.org/linux/man-pages/man7/glob.7.html <li>
  206. * org.eclipse.jgit.fnmatch.FileNameMatcher.java Seems that there are
  207. * various ways to define what "glob" can be.
  208. *
  209. * @param pattern
  210. * non null pattern
  211. *
  212. * @return Java regex pattern corresponding to given glob pattern
  213. * @throws InvalidPatternException
  214. */
  215. static Pattern convertGlob(String pattern) throws InvalidPatternException {
  216. if (UNSUPPORTED.matcher(pattern).find())
  217. throw new InvalidPatternException(
  218. "Collating symbols [[.a.]] or equivalence class expressions [[=a=]] are not supported", //$NON-NLS-1$
  219. pattern);
  220. StringBuilder sb = new StringBuilder(pattern.length());
  221. int in_brackets = 0;
  222. boolean seenEscape = false;
  223. boolean ignoreLastBracket = false;
  224. boolean in_char_class = false;
  225. // 6 is the length of the longest posix char class "xdigit"
  226. char[] charClass = new char[6];
  227. for (int i = 0; i < pattern.length(); i++) {
  228. char c = pattern.charAt(i);
  229. switch (c) {
  230. case '*':
  231. if (seenEscape || in_brackets > 0)
  232. sb.append(c);
  233. else
  234. sb.append('.').append(c);
  235. break;
  236. case '.':
  237. if (seenEscape)
  238. sb.append(c);
  239. else
  240. sb.append('\\').append('.');
  241. break;
  242. case '?':
  243. if (seenEscape || in_brackets > 0)
  244. sb.append(c);
  245. else
  246. sb.append('.');
  247. break;
  248. case ':':
  249. if (in_brackets > 0)
  250. if (lookBehind(sb) == '['
  251. && isLetter(lookAhead(pattern, i)))
  252. in_char_class = true;
  253. sb.append(':');
  254. break;
  255. case '-':
  256. if (in_brackets > 0) {
  257. if (lookAhead(pattern, i) == ']')
  258. sb.append('\\').append(c);
  259. else
  260. sb.append(c);
  261. } else
  262. sb.append('-');
  263. break;
  264. case '\\':
  265. if (in_brackets > 0) {
  266. char lookAhead = lookAhead(pattern, i);
  267. if (lookAhead == ']' || lookAhead == '[')
  268. ignoreLastBracket = true;
  269. }
  270. sb.append(c);
  271. break;
  272. case '[':
  273. if (in_brackets > 0) {
  274. sb.append('\\').append('[');
  275. ignoreLastBracket = true;
  276. } else {
  277. if (!seenEscape) {
  278. in_brackets++;
  279. ignoreLastBracket = false;
  280. }
  281. sb.append('[');
  282. }
  283. break;
  284. case ']':
  285. if (seenEscape) {
  286. sb.append(']');
  287. ignoreLastBracket = true;
  288. break;
  289. }
  290. if (in_brackets <= 0) {
  291. sb.append('\\').append(']');
  292. ignoreLastBracket = true;
  293. break;
  294. }
  295. char lookBehind = lookBehind(sb);
  296. if ((lookBehind == '[' && !ignoreLastBracket)
  297. || lookBehind == '^') {
  298. sb.append('\\');
  299. sb.append(']');
  300. ignoreLastBracket = true;
  301. } else {
  302. ignoreLastBracket = false;
  303. if (!in_char_class) {
  304. in_brackets--;
  305. sb.append(']');
  306. } else {
  307. in_char_class = false;
  308. String charCl = checkPosixCharClass(charClass);
  309. // delete last \[:: chars and set the pattern
  310. if (charCl != null) {
  311. sb.setLength(sb.length() - 4);
  312. sb.append(charCl);
  313. }
  314. reset(charClass);
  315. }
  316. }
  317. break;
  318. case '!':
  319. if (in_brackets > 0) {
  320. if (lookBehind(sb) == '[')
  321. sb.append('^');
  322. else
  323. sb.append(c);
  324. } else
  325. sb.append(c);
  326. break;
  327. default:
  328. if (in_char_class)
  329. setNext(charClass, c);
  330. else
  331. sb.append(c);
  332. break;
  333. } // end switch
  334. seenEscape = c == '\\';
  335. } // end for
  336. if (in_brackets > 0)
  337. throw new InvalidPatternException("Not closed bracket?", pattern); //$NON-NLS-1$
  338. try {
  339. return Pattern.compile(sb.toString());
  340. } catch (PatternSyntaxException e) {
  341. InvalidPatternException patternException = new InvalidPatternException(
  342. MessageFormat.format(JGitText.get().invalidIgnoreRule,
  343. pattern),
  344. pattern);
  345. patternException.initCause(e);
  346. throw patternException;
  347. }
  348. }
  349. /**
  350. * @param buffer
  351. * @return zero of the buffer is empty, otherwise the last character from
  352. * buffer
  353. */
  354. private static char lookBehind(StringBuilder buffer) {
  355. return buffer.length() > 0 ? buffer.charAt(buffer.length() - 1) : 0;
  356. }
  357. /**
  358. * @param pattern
  359. * @param i
  360. * current pointer in the pattern
  361. * @return zero of the index is out of range, otherwise the next character
  362. * from given position
  363. */
  364. private static char lookAhead(String pattern, int i) {
  365. int idx = i + 1;
  366. return idx >= pattern.length() ? 0 : pattern.charAt(idx);
  367. }
  368. private static void setNext(char[] buffer, char c) {
  369. for (int i = 0; i < buffer.length; i++)
  370. if (buffer[i] == 0) {
  371. buffer[i] = c;
  372. break;
  373. }
  374. }
  375. private static void reset(char[] buffer) {
  376. for (int i = 0; i < buffer.length; i++)
  377. buffer[i] = 0;
  378. }
  379. private static String checkPosixCharClass(char[] buffer) {
  380. for (int i = 0; i < POSIX_CHAR_CLASSES.size(); i++) {
  381. String clazz = POSIX_CHAR_CLASSES.get(i);
  382. boolean match = true;
  383. for (int j = 0; j < clazz.length(); j++)
  384. if (buffer[j] != clazz.charAt(j)) {
  385. match = false;
  386. break;
  387. }
  388. if (match)
  389. return JAVA_CHAR_CLASSES.get(i);
  390. }
  391. return null;
  392. }
  393. }