You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

Strings.java 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528
  1. /*
  2. * Copyright (C) 2014, 2017 Andrey Loskutov <loskutov@gmx.de>
  3. * and other copyright owners as documented in the project's IP log.
  4. *
  5. * This program and the accompanying materials are made available
  6. * under the terms of the Eclipse Distribution License v1.0 which
  7. * accompanies this distribution, is reproduced below, and is
  8. * available at http://www.eclipse.org/org/documents/edl-v10.php
  9. *
  10. * All rights reserved.
  11. *
  12. * Redistribution and use in source and binary forms, with or
  13. * without modification, are permitted provided that the following
  14. * conditions are met:
  15. *
  16. * - Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. *
  19. * - Redistributions in binary form must reproduce the above
  20. * copyright notice, this list of conditions and the following
  21. * disclaimer in the documentation and/or other materials provided
  22. * with the distribution.
  23. *
  24. * - Neither the name of the Eclipse Foundation, Inc. nor the
  25. * names of its contributors may be used to endorse or promote
  26. * products derived from this software without specific prior
  27. * written permission.
  28. *
  29. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  30. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  31. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  32. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  33. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  34. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  35. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  36. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  37. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  38. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  39. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  40. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  41. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  42. */
  43. package org.eclipse.jgit.ignore.internal;
  44. import static java.lang.Character.isLetter;
  45. import java.text.MessageFormat;
  46. import java.util.ArrayList;
  47. import java.util.Arrays;
  48. import java.util.List;
  49. import java.util.regex.Pattern;
  50. import java.util.regex.PatternSyntaxException;
  51. import org.eclipse.jgit.errors.InvalidPatternException;
  52. import org.eclipse.jgit.ignore.FastIgnoreRule;
  53. import org.eclipse.jgit.internal.JGitText;
  54. /**
  55. * Various {@link java.lang.String} related utility methods, written mostly to
  56. * avoid generation of new String objects (e.g. via splitting Strings etc).
  57. */
  58. public class Strings {
  59. static char getPathSeparator(Character pathSeparator) {
  60. return pathSeparator == null ? FastIgnoreRule.PATH_SEPARATOR
  61. : pathSeparator.charValue();
  62. }
  63. /**
  64. * Strip trailing characters
  65. *
  66. * @param pattern
  67. * non null
  68. * @param c
  69. * character to remove
  70. * @return new string with all trailing characters removed
  71. */
  72. public static String stripTrailing(String pattern, char c) {
  73. for (int i = pattern.length() - 1; i >= 0; i--) {
  74. char charAt = pattern.charAt(i);
  75. if (charAt != c) {
  76. if (i == pattern.length() - 1) {
  77. return pattern;
  78. }
  79. return pattern.substring(0, i + 1);
  80. }
  81. }
  82. return ""; //$NON-NLS-1$
  83. }
  84. /**
  85. * Strip trailing whitespace characters
  86. *
  87. * @param pattern
  88. * non null
  89. * @return new string with all trailing whitespace removed
  90. */
  91. public static String stripTrailingWhitespace(String pattern) {
  92. for (int i = pattern.length() - 1; i >= 0; i--) {
  93. char charAt = pattern.charAt(i);
  94. if (!Character.isWhitespace(charAt)) {
  95. if (i == pattern.length() - 1) {
  96. return pattern;
  97. }
  98. return pattern.substring(0, i + 1);
  99. }
  100. }
  101. return ""; //$NON-NLS-1$
  102. }
  103. /**
  104. * Check if pattern is a directory pattern ending with a path separator
  105. *
  106. * @param pattern
  107. * non null
  108. * @return {@code true} if the last character, which is not whitespace, is a
  109. * path separator
  110. */
  111. public static boolean isDirectoryPattern(String pattern) {
  112. for (int i = pattern.length() - 1; i >= 0; i--) {
  113. char charAt = pattern.charAt(i);
  114. if (!Character.isWhitespace(charAt)) {
  115. return charAt == FastIgnoreRule.PATH_SEPARATOR;
  116. }
  117. }
  118. return false;
  119. }
  120. static int count(String s, char c, boolean ignoreFirstLast) {
  121. int start = 0;
  122. int count = 0;
  123. int length = s.length();
  124. while (start < length) {
  125. start = s.indexOf(c, start);
  126. if (start == -1) {
  127. break;
  128. }
  129. if (!ignoreFirstLast || (start != 0 && start != length - 1)) {
  130. count++;
  131. }
  132. start++;
  133. }
  134. return count;
  135. }
  136. /**
  137. * Splits given string to substrings by given separator
  138. *
  139. * @param pattern
  140. * non null
  141. * @param slash
  142. * separator char
  143. * @return list of substrings
  144. */
  145. public static List<String> split(String pattern, char slash) {
  146. int count = count(pattern, slash, true);
  147. if (count < 1)
  148. throw new IllegalStateException(
  149. "Pattern must have at least two segments: " + pattern); //$NON-NLS-1$
  150. List<String> segments = new ArrayList<>(count);
  151. int right = 0;
  152. while (true) {
  153. int left = right;
  154. right = pattern.indexOf(slash, right);
  155. if (right == -1) {
  156. if (left < pattern.length())
  157. segments.add(pattern.substring(left));
  158. break;
  159. }
  160. if (right - left > 0)
  161. if (left == 1)
  162. // leading slash should remain by the first pattern
  163. segments.add(pattern.substring(left - 1, right));
  164. else if (right == pattern.length() - 1)
  165. // trailing slash should remain too
  166. segments.add(pattern.substring(left, right + 1));
  167. else
  168. segments.add(pattern.substring(left, right));
  169. right++;
  170. }
  171. return segments;
  172. }
  173. static boolean isWildCard(String pattern) {
  174. return pattern.indexOf('*') != -1 || isComplexWildcard(pattern);
  175. }
  176. private static boolean isComplexWildcard(String pattern) {
  177. int idx1 = pattern.indexOf('[');
  178. if (idx1 != -1) {
  179. return true;
  180. }
  181. if (pattern.indexOf('?') != -1) {
  182. return true;
  183. }
  184. // check if the backslash escapes one of the glob special characters
  185. // if not, backslash is not part of a regex and treated literally
  186. int backSlash = pattern.indexOf('\\');
  187. if (backSlash >= 0) {
  188. int nextIdx = backSlash + 1;
  189. if (pattern.length() == nextIdx) {
  190. return false;
  191. }
  192. char nextChar = pattern.charAt(nextIdx);
  193. if (escapedByBackslash(nextChar)) {
  194. return true;
  195. }
  196. return false;
  197. }
  198. return false;
  199. }
  200. private static boolean escapedByBackslash(char nextChar) {
  201. return nextChar == '?' || nextChar == '*' || nextChar == '[';
  202. }
  203. static PatternState checkWildCards(String pattern) {
  204. if (isComplexWildcard(pattern))
  205. return PatternState.COMPLEX;
  206. int startIdx = pattern.indexOf('*');
  207. if (startIdx < 0)
  208. return PatternState.NONE;
  209. if (startIdx == pattern.length() - 1)
  210. return PatternState.TRAILING_ASTERISK_ONLY;
  211. if (pattern.lastIndexOf('*') == 0)
  212. return PatternState.LEADING_ASTERISK_ONLY;
  213. return PatternState.COMPLEX;
  214. }
  215. enum PatternState {
  216. LEADING_ASTERISK_ONLY, TRAILING_ASTERISK_ONLY, COMPLEX, NONE
  217. }
  218. final static List<String> POSIX_CHAR_CLASSES = Arrays.asList(
  219. "alnum", "alpha", "blank", "cntrl", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
  220. // [:alnum:] [:alpha:] [:blank:] [:cntrl:]
  221. "digit", "graph", "lower", "print", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
  222. // [:digit:] [:graph:] [:lower:] [:print:]
  223. "punct", "space", "upper", "xdigit", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
  224. // [:punct:] [:space:] [:upper:] [:xdigit:]
  225. "word" //$NON-NLS-1$
  226. // [:word:] XXX I don't see it in
  227. // http://man7.org/linux/man-pages/man7/glob.7.html
  228. // but this was in org.eclipse.jgit.fnmatch.GroupHead.java ???
  229. );
  230. private static final String DL = "\\p{javaDigit}\\p{javaLetter}"; //$NON-NLS-1$
  231. final static List<String> JAVA_CHAR_CLASSES = Arrays
  232. .asList("\\p{Alnum}", "\\p{javaLetter}", "\\p{Blank}", "\\p{Cntrl}", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
  233. // [:alnum:] [:alpha:] [:blank:] [:cntrl:]
  234. "\\p{javaDigit}", "[\\p{Graph}" + DL + "]", "\\p{Ll}", "[\\p{Print}" + DL + "]", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$ //$NON-NLS-5$ //$NON-NLS-6$
  235. // [:digit:] [:graph:] [:lower:] [:print:]
  236. "\\p{Punct}", "\\p{Space}", "\\p{Lu}", "\\p{XDigit}", //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ //$NON-NLS-4$
  237. // [:punct:] [:space:] [:upper:] [:xdigit:]
  238. "[" + DL + "_]" //$NON-NLS-1$ //$NON-NLS-2$
  239. // [:word:]
  240. );
  241. // Collating symbols [[.a.]] or equivalence class expressions [[=a=]] are
  242. // not supported by CLI git (at least not by 1.9.1)
  243. final static Pattern UNSUPPORTED = Pattern
  244. .compile("\\[\\[[.=]\\w+[.=]\\]\\]"); //$NON-NLS-1$
  245. /**
  246. * Conversion from glob to Java regex following two sources: <li>
  247. * http://man7.org/linux/man-pages/man7/glob.7.html <li>
  248. * org.eclipse.jgit.fnmatch.FileNameMatcher.java Seems that there are
  249. * various ways to define what "glob" can be.
  250. *
  251. * @param pattern
  252. * non null pattern
  253. *
  254. * @return Java regex pattern corresponding to given glob pattern
  255. * @throws InvalidPatternException
  256. */
  257. static Pattern convertGlob(String pattern) throws InvalidPatternException {
  258. if (UNSUPPORTED.matcher(pattern).find())
  259. throw new InvalidPatternException(
  260. "Collating symbols [[.a.]] or equivalence class expressions [[=a=]] are not supported", //$NON-NLS-1$
  261. pattern);
  262. StringBuilder sb = new StringBuilder(pattern.length());
  263. int in_brackets = 0;
  264. boolean seenEscape = false;
  265. boolean ignoreLastBracket = false;
  266. boolean in_char_class = false;
  267. // 6 is the length of the longest posix char class "xdigit"
  268. char[] charClass = new char[6];
  269. for (int i = 0; i < pattern.length(); i++) {
  270. final char c = pattern.charAt(i);
  271. switch (c) {
  272. case '*':
  273. if (seenEscape || in_brackets > 0)
  274. sb.append(c);
  275. else
  276. sb.append('.').append(c);
  277. break;
  278. case '(': // fall-through
  279. case ')': // fall-through
  280. case '{': // fall-through
  281. case '}': // fall-through
  282. case '+': // fall-through
  283. case '$': // fall-through
  284. case '^': // fall-through
  285. case '|':
  286. if (seenEscape || in_brackets > 0)
  287. sb.append(c);
  288. else
  289. sb.append('\\').append(c);
  290. break;
  291. case '.':
  292. if (seenEscape)
  293. sb.append(c);
  294. else
  295. sb.append('\\').append('.');
  296. break;
  297. case '?':
  298. if (seenEscape || in_brackets > 0)
  299. sb.append(c);
  300. else
  301. sb.append('.');
  302. break;
  303. case ':':
  304. if (in_brackets > 0)
  305. if (lookBehind(sb) == '['
  306. && isLetter(lookAhead(pattern, i)))
  307. in_char_class = true;
  308. sb.append(':');
  309. break;
  310. case '-':
  311. if (in_brackets > 0) {
  312. if (lookAhead(pattern, i) == ']')
  313. sb.append('\\').append(c);
  314. else
  315. sb.append(c);
  316. } else
  317. sb.append('-');
  318. break;
  319. case '\\':
  320. if (in_brackets > 0) {
  321. char lookAhead = lookAhead(pattern, i);
  322. if (lookAhead == ']' || lookAhead == '[')
  323. ignoreLastBracket = true;
  324. } else {
  325. //
  326. char lookAhead = lookAhead(pattern, i);
  327. if (lookAhead != '\\' && lookAhead != '['
  328. && lookAhead != '?' && lookAhead != '*'
  329. && lookAhead != ' ' && lookBehind(sb) != '\\') {
  330. break;
  331. }
  332. }
  333. sb.append(c);
  334. break;
  335. case '[':
  336. if (in_brackets > 0) {
  337. if (!seenEscape) {
  338. sb.append('\\');
  339. }
  340. sb.append('[');
  341. ignoreLastBracket = true;
  342. } else {
  343. if (!seenEscape) {
  344. in_brackets++;
  345. ignoreLastBracket = false;
  346. }
  347. sb.append('[');
  348. }
  349. break;
  350. case ']':
  351. if (seenEscape) {
  352. sb.append(']');
  353. ignoreLastBracket = true;
  354. break;
  355. }
  356. if (in_brackets <= 0) {
  357. sb.append('\\').append(']');
  358. ignoreLastBracket = true;
  359. break;
  360. }
  361. char lookBehind = lookBehind(sb);
  362. if ((lookBehind == '[' && !ignoreLastBracket)
  363. || lookBehind == '^') {
  364. sb.append('\\');
  365. sb.append(']');
  366. ignoreLastBracket = true;
  367. } else {
  368. ignoreLastBracket = false;
  369. if (!in_char_class) {
  370. in_brackets--;
  371. sb.append(']');
  372. } else {
  373. in_char_class = false;
  374. String charCl = checkPosixCharClass(charClass);
  375. // delete last \[:: chars and set the pattern
  376. if (charCl != null) {
  377. sb.setLength(sb.length() - 4);
  378. sb.append(charCl);
  379. }
  380. reset(charClass);
  381. }
  382. }
  383. break;
  384. case '!':
  385. if (in_brackets > 0) {
  386. if (lookBehind(sb) == '[')
  387. sb.append('^');
  388. else
  389. sb.append(c);
  390. } else
  391. sb.append(c);
  392. break;
  393. default:
  394. if (in_char_class)
  395. setNext(charClass, c);
  396. else
  397. sb.append(c);
  398. break;
  399. } // end switch
  400. seenEscape = c == '\\';
  401. } // end for
  402. if (in_brackets > 0)
  403. throw new InvalidPatternException("Not closed bracket?", pattern); //$NON-NLS-1$
  404. try {
  405. return Pattern.compile(sb.toString(), Pattern.DOTALL);
  406. } catch (PatternSyntaxException e) {
  407. throw new InvalidPatternException(
  408. MessageFormat.format(JGitText.get().invalidIgnoreRule,
  409. pattern),
  410. pattern, e);
  411. }
  412. }
  413. /**
  414. * @param buffer
  415. * @return zero of the buffer is empty, otherwise the last character from
  416. * buffer
  417. */
  418. private static char lookBehind(StringBuilder buffer) {
  419. return buffer.length() > 0 ? buffer.charAt(buffer.length() - 1) : 0;
  420. }
  421. /**
  422. * @param pattern
  423. * @param i
  424. * current pointer in the pattern
  425. * @return zero of the index is out of range, otherwise the next character
  426. * from given position
  427. */
  428. private static char lookAhead(String pattern, int i) {
  429. int idx = i + 1;
  430. return idx >= pattern.length() ? 0 : pattern.charAt(idx);
  431. }
  432. private static void setNext(char[] buffer, char c) {
  433. for (int i = 0; i < buffer.length; i++)
  434. if (buffer[i] == 0) {
  435. buffer[i] = c;
  436. break;
  437. }
  438. }
  439. private static void reset(char[] buffer) {
  440. for (int i = 0; i < buffer.length; i++)
  441. buffer[i] = 0;
  442. }
  443. private static String checkPosixCharClass(char[] buffer) {
  444. for (int i = 0; i < POSIX_CHAR_CLASSES.size(); i++) {
  445. String clazz = POSIX_CHAR_CLASSES.get(i);
  446. boolean match = true;
  447. for (int j = 0; j < clazz.length(); j++)
  448. if (buffer[j] != clazz.charAt(j)) {
  449. match = false;
  450. break;
  451. }
  452. if (match)
  453. return JAVA_CHAR_CLASSES.get(i);
  454. }
  455. return null;
  456. }
  457. static String deleteBackslash(String s) {
  458. if (s.indexOf('\\') < 0) {
  459. return s;
  460. }
  461. StringBuilder sb = new StringBuilder(s.length());
  462. for (int i = 0; i < s.length(); i++) {
  463. char ch = s.charAt(i);
  464. if (ch == '\\') {
  465. if (i + 1 == s.length()) {
  466. continue;
  467. }
  468. char next = s.charAt(i + 1);
  469. if (next == '\\') {
  470. sb.append(ch);
  471. i++;
  472. continue;
  473. }
  474. if (!escapedByBackslash(next)) {
  475. continue;
  476. }
  477. }
  478. sb.append(ch);
  479. }
  480. return sb.toString();
  481. }
  482. }