You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

FileNameMatcher.java 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390
  1. /*
  2. * Copyright (C) 2008, Florian Köberle <florianskarten@web.de> and others
  3. *
  4. * This program and the accompanying materials are made available under the
  5. * terms of the Eclipse Distribution License v. 1.0 which is available at
  6. * https://www.eclipse.org/org/documents/edl-v10.php.
  7. *
  8. * SPDX-License-Identifier: BSD-3-Clause
  9. */
  10. package org.eclipse.jgit.fnmatch;
  11. import java.util.ArrayList;
  12. import java.util.Collections;
  13. import java.util.List;
  14. import java.util.ListIterator;
  15. import java.util.regex.Matcher;
  16. import java.util.regex.Pattern;
  17. import org.eclipse.jgit.errors.InvalidPatternException;
  18. import org.eclipse.jgit.errors.NoClosingBracketException;
  19. /**
  20. * This class can be used to match filenames against fnmatch like patterns. It
  21. * is not thread save.
  22. * <p>
  23. * Supported are the wildcard characters * and ? and groups with:
  24. * <ul>
  25. * <li>characters e.g. [abc]</li>
  26. * <li>ranges e.g. [a-z]</li>
  27. * <li>the following character classes
  28. * <ul>
  29. * <li>[:alnum:]</li>
  30. * <li>[:alpha:]</li>
  31. * <li>[:blank:]</li>
  32. * <li>[:cntrl:]</li>
  33. * <li>[:digit:]</li>
  34. * <li>[:graph:]</li>
  35. * <li>[:lower:]</li>
  36. * <li>[:print:]</li>
  37. * <li>[:punct:]</li>
  38. * <li>[:space:]</li>
  39. * <li>[:upper:]</li>
  40. * <li>[:word:]</li>
  41. * <li>[:xdigit:]</li>
  42. * </ul>
  43. * e. g. [[:xdigit:]]</li>
  44. * </ul>
  45. * Any character can be escaped by prepending it with a \
  46. */
  47. public class FileNameMatcher {
  48. static final List<Head> EMPTY_HEAD_LIST = Collections.emptyList();
  49. private static final Pattern characterClassStartPattern = Pattern
  50. .compile("\\[[.:=]"); //$NON-NLS-1$
  51. private List<Head> headsStartValue;
  52. private List<Head> heads;
  53. /**
  54. * {{@link #extendStringToMatchByOneCharacter(char)} needs a list for the
  55. * new heads, allocating a new array would be bad for the performance, as
  56. * the method gets called very often.
  57. *
  58. */
  59. private List<Head> listForLocalUseage;
  60. /**
  61. *
  62. * @param headsStartValue
  63. * must be a list which will never be modified.
  64. */
  65. private FileNameMatcher(List<Head> headsStartValue) {
  66. this(headsStartValue, headsStartValue);
  67. }
  68. /**
  69. *
  70. * @param headsStartValue
  71. * must be a list which will never be modified.
  72. * @param heads
  73. * a list which will be cloned and then used as current head
  74. * list.
  75. */
  76. private FileNameMatcher(final List<Head> headsStartValue,
  77. final List<Head> heads) {
  78. this.headsStartValue = headsStartValue;
  79. this.heads = new ArrayList<>(heads.size());
  80. this.heads.addAll(heads);
  81. this.listForLocalUseage = new ArrayList<>(heads.size());
  82. }
  83. /**
  84. * Constructor for FileNameMatcher
  85. *
  86. * @param patternString
  87. * must contain a pattern which fnmatch would accept.
  88. * @param invalidWildgetCharacter
  89. * if this parameter isn't null then this character will not
  90. * match at wildcards(* and ? are wildcards).
  91. * @throws org.eclipse.jgit.errors.InvalidPatternException
  92. * if the patternString contains a invalid fnmatch pattern.
  93. */
  94. public FileNameMatcher(final String patternString,
  95. final Character invalidWildgetCharacter)
  96. throws InvalidPatternException {
  97. this(createHeadsStartValues(patternString, invalidWildgetCharacter));
  98. }
  99. /**
  100. * A Copy Constructor which creates a new
  101. * {@link org.eclipse.jgit.fnmatch.FileNameMatcher} with the same state and
  102. * reset point like <code>other</code>.
  103. *
  104. * @param other
  105. * another {@link org.eclipse.jgit.fnmatch.FileNameMatcher}
  106. * instance.
  107. */
  108. public FileNameMatcher(FileNameMatcher other) {
  109. this(other.headsStartValue, other.heads);
  110. }
  111. private static List<Head> createHeadsStartValues(
  112. final String patternString, final Character invalidWildgetCharacter)
  113. throws InvalidPatternException {
  114. final List<AbstractHead> allHeads = parseHeads(patternString,
  115. invalidWildgetCharacter);
  116. List<Head> nextHeadsSuggestion = new ArrayList<>(2);
  117. nextHeadsSuggestion.add(LastHead.INSTANCE);
  118. for (int i = allHeads.size() - 1; i >= 0; i--) {
  119. final AbstractHead head = allHeads.get(i);
  120. // explanation:
  121. // a and * of the pattern "a*b"
  122. // need *b as newHeads
  123. // that's why * extends the list for it self and it's left neighbor.
  124. if (head.isStar()) {
  125. nextHeadsSuggestion.add(head);
  126. head.setNewHeads(nextHeadsSuggestion);
  127. } else {
  128. head.setNewHeads(nextHeadsSuggestion);
  129. nextHeadsSuggestion = new ArrayList<>(2);
  130. nextHeadsSuggestion.add(head);
  131. }
  132. }
  133. return nextHeadsSuggestion;
  134. }
  135. private static int findGroupEnd(final int indexOfStartBracket,
  136. final String pattern) throws InvalidPatternException {
  137. int firstValidCharClassIndex = indexOfStartBracket + 1;
  138. int firstValidEndBracketIndex = indexOfStartBracket + 2;
  139. if (indexOfStartBracket + 1 >= pattern.length())
  140. throw new NoClosingBracketException(indexOfStartBracket, "[", "]", //$NON-NLS-1$ //$NON-NLS-2$
  141. pattern);
  142. if (pattern.charAt(firstValidCharClassIndex) == '!') {
  143. firstValidCharClassIndex++;
  144. firstValidEndBracketIndex++;
  145. }
  146. final Matcher charClassStartMatcher = characterClassStartPattern
  147. .matcher(pattern);
  148. int groupEnd = -1;
  149. while (groupEnd == -1) {
  150. final int possibleGroupEnd = indexOfUnescaped(pattern, ']',
  151. firstValidEndBracketIndex);
  152. if (possibleGroupEnd == -1)
  153. throw new NoClosingBracketException(indexOfStartBracket, "[", //$NON-NLS-1$
  154. "]", pattern); //$NON-NLS-1$
  155. final boolean foundCharClass = charClassStartMatcher
  156. .find(firstValidCharClassIndex);
  157. if (foundCharClass
  158. && charClassStartMatcher.start() < possibleGroupEnd) {
  159. final String classStart = charClassStartMatcher.group(0);
  160. final String classEnd = classStart.charAt(1) + "]"; //$NON-NLS-1$
  161. final int classStartIndex = charClassStartMatcher.start();
  162. final int classEndIndex = pattern.indexOf(classEnd,
  163. classStartIndex + 2);
  164. if (classEndIndex == -1)
  165. throw new NoClosingBracketException(classStartIndex,
  166. classStart, classEnd, pattern);
  167. firstValidCharClassIndex = classEndIndex + 2;
  168. firstValidEndBracketIndex = firstValidCharClassIndex;
  169. } else {
  170. groupEnd = possibleGroupEnd;
  171. }
  172. }
  173. return groupEnd;
  174. }
  175. private static List<AbstractHead> parseHeads(final String pattern,
  176. final Character invalidWildgetCharacter)
  177. throws InvalidPatternException {
  178. int currentIndex = 0;
  179. List<AbstractHead> heads = new ArrayList<>();
  180. while (currentIndex < pattern.length()) {
  181. final int groupStart = indexOfUnescaped(pattern, '[', currentIndex);
  182. if (groupStart == -1) {
  183. final String patternPart = pattern.substring(currentIndex);
  184. heads.addAll(createSimpleHeads(patternPart,
  185. invalidWildgetCharacter));
  186. currentIndex = pattern.length();
  187. } else {
  188. final String patternPart = pattern.substring(currentIndex,
  189. groupStart);
  190. heads.addAll(createSimpleHeads(patternPart,
  191. invalidWildgetCharacter));
  192. final int groupEnd = findGroupEnd(groupStart, pattern);
  193. final String groupPart = pattern.substring(groupStart + 1,
  194. groupEnd);
  195. heads.add(new GroupHead(groupPart, pattern));
  196. currentIndex = groupEnd + 1;
  197. }
  198. }
  199. return heads;
  200. }
  201. private static List<AbstractHead> createSimpleHeads(
  202. final String patternPart, final Character invalidWildgetCharacter) {
  203. final List<AbstractHead> heads = new ArrayList<>(
  204. patternPart.length());
  205. boolean escaped = false;
  206. for (int i = 0; i < patternPart.length(); i++) {
  207. final char c = patternPart.charAt(i);
  208. if (escaped) {
  209. final CharacterHead head = new CharacterHead(c);
  210. heads.add(head);
  211. escaped = false;
  212. } else {
  213. switch (c) {
  214. case '*': {
  215. final AbstractHead head = createWildCardHead(
  216. invalidWildgetCharacter, true);
  217. heads.add(head);
  218. break;
  219. }
  220. case '?': {
  221. final AbstractHead head = createWildCardHead(
  222. invalidWildgetCharacter, false);
  223. heads.add(head);
  224. break;
  225. }
  226. case '\\':
  227. escaped = true;
  228. break;
  229. default:
  230. final CharacterHead head = new CharacterHead(c);
  231. heads.add(head);
  232. }
  233. }
  234. }
  235. return heads;
  236. }
  237. private static AbstractHead createWildCardHead(
  238. final Character invalidWildgetCharacter, final boolean star) {
  239. if (invalidWildgetCharacter != null) {
  240. return new RestrictedWildCardHead(invalidWildgetCharacter
  241. .charValue(), star);
  242. }
  243. return new WildCardHead(star);
  244. }
  245. /**
  246. * @param c new character to append
  247. * @return true to continue, false if the matcher can stop appending
  248. */
  249. private boolean extendStringToMatchByOneCharacter(char c) {
  250. final List<Head> newHeads = listForLocalUseage;
  251. newHeads.clear();
  252. List<Head> lastAddedHeads = null;
  253. for (int i = 0; i < heads.size(); i++) {
  254. final Head head = heads.get(i);
  255. final List<Head> headsToAdd = head.getNextHeads(c);
  256. // Why the next performance optimization isn't wrong:
  257. // Some times two heads return the very same list.
  258. // We save future effort if we don't add these heads again.
  259. // This is the case with the heads "a" and "*" of "a*b" which
  260. // both can return the list ["*","b"]
  261. if (headsToAdd != lastAddedHeads) {
  262. if (!headsToAdd.isEmpty())
  263. newHeads.addAll(headsToAdd);
  264. lastAddedHeads = headsToAdd;
  265. }
  266. }
  267. listForLocalUseage = heads;
  268. heads = newHeads;
  269. return !newHeads.isEmpty();
  270. }
  271. private static int indexOfUnescaped(final String searchString,
  272. final char ch, final int fromIndex) {
  273. for (int i = fromIndex; i < searchString.length(); i++) {
  274. char current = searchString.charAt(i);
  275. if (current == ch)
  276. return i;
  277. if (current == '\\')
  278. i++; // Skip the next char as it is escaped }
  279. }
  280. return -1;
  281. }
  282. /**
  283. * Append to the string which is matched against the patterns of this class
  284. *
  285. * @param stringToMatch
  286. * extends the string which is matched against the patterns of
  287. * this class.
  288. */
  289. public void append(String stringToMatch) {
  290. for (int i = 0; i < stringToMatch.length(); i++) {
  291. final char c = stringToMatch.charAt(i);
  292. if (!extendStringToMatchByOneCharacter(c))
  293. break;
  294. }
  295. }
  296. /**
  297. * Resets this matcher to it's state right after construction.
  298. */
  299. public void reset() {
  300. heads.clear();
  301. heads.addAll(headsStartValue);
  302. }
  303. /**
  304. * Create a {@link org.eclipse.jgit.fnmatch.FileNameMatcher} instance which
  305. * uses the same pattern like this matcher, but has the current state of
  306. * this matcher as reset and start point
  307. *
  308. * @return a {@link org.eclipse.jgit.fnmatch.FileNameMatcher} instance which
  309. * uses the same pattern like this matcher, but has the current
  310. * state of this matcher as reset and start point.
  311. */
  312. public FileNameMatcher createMatcherForSuffix() {
  313. final List<Head> copyOfHeads = new ArrayList<>(heads.size());
  314. copyOfHeads.addAll(heads);
  315. return new FileNameMatcher(copyOfHeads);
  316. }
  317. /**
  318. * Whether the matcher matches
  319. *
  320. * @return whether the matcher matches
  321. */
  322. public boolean isMatch() {
  323. if (heads.isEmpty())
  324. return false;
  325. final ListIterator<Head> headIterator = heads
  326. .listIterator(heads.size());
  327. while (headIterator.hasPrevious()) {
  328. final Head head = headIterator.previous();
  329. if (head == LastHead.INSTANCE) {
  330. return true;
  331. }
  332. }
  333. return false;
  334. }
  335. /**
  336. * Whether a match can be appended
  337. *
  338. * @return a boolean.
  339. */
  340. public boolean canAppendMatch() {
  341. for (Head head : heads) {
  342. if (head != LastHead.INSTANCE) {
  343. return true;
  344. }
  345. }
  346. return false;
  347. }
  348. }