From 58f71c9c265323a5c341e365816521a7119eae07 Mon Sep 17 00:00:00 2001
From: Evgeny Mandrikov
Date: Thu, 20 Oct 2011 00:40:01 +0400
Subject: [PATCH] SONAR-2762 Fully refactor construction of regular expression
in WildcardPattern
Thus now it provides correct escaping for regular expression operators.
---
.../org/sonar/api/utils/WildcardPattern.java | 158 +++++++++++++-----
.../sonar/api/utils/WildcardPatternTest.java | 60 ++++++-
2 files changed, 177 insertions(+), 41 deletions(-)
diff --git a/sonar-plugin-api/src/main/java/org/sonar/api/utils/WildcardPattern.java b/sonar-plugin-api/src/main/java/org/sonar/api/utils/WildcardPattern.java
index 0403d82aa3b..7649045ba78 100644
--- a/sonar-plugin-api/src/main/java/org/sonar/api/utils/WildcardPattern.java
+++ b/sonar-plugin-api/src/main/java/org/sonar/api/utils/WildcardPattern.java
@@ -19,18 +19,49 @@
*/
package org.sonar.api.utils;
-import org.apache.commons.lang.StringUtils;
-
import java.util.HashMap;
import java.util.Map;
import java.util.regex.Pattern;
+import org.apache.commons.lang.StringUtils;
+
/**
+ * Implementation of Ant-style matching patterns.
+ * Contrary to other implementations (like AntPathMatcher from Spring Framework) it is based on {@link Pattern Java Regular Expressions}.
+ * To increase performance it holds an internal cache of all processed patterns.
+ *
+ * Following rules are applied:
+ *
+ * - ? matches single character
+ * - * matches zero or more characters
+ * - ** matches zero or more 'directories'
+ *
+ *
+ *
+ * Some examples of patterns:
+ *
+ * org/T?st.java
- matches org/Test.java
and also org/Tost.java
+ * org/*.java
- matches all .java
files in the org
directory,
+ * i.e. org/Foo.java
or org/Bar.java
+ * org/**
- matches everything underneath the org
directory,
+ * i.e org/Foo.java
or org/foo/bar.jsp
+ * org/**/Test.java
- matches all Test.java
files underneath the org
directory,
+ * i.e. org/Test.java
or org/foo/Test.java
or org/foo/bar/Test.java
+ * org/**/*.java
- matches all .java
files underneath the org
directory,
+ * i.e. org/Foo.java
or org/foo/Bar.java
or org/foo/bar/Baz.java
+ *
+ *
+ *
+ * Another implementation, which is also based on Java Regular Expressions, can be found in
+ * FileUtil
+ * from IntelliJ OpenAPI.
+ *
+ *
* @since 1.10
*/
public class WildcardPattern {
- private static final Map patterns = new HashMap();
+ private static final Map CACHE = new HashMap();
private Pattern pattern;
private String stringRepresentation;
@@ -40,50 +71,63 @@ public class WildcardPattern {
this.pattern = Pattern.compile(toRegexp(pattern, directorySeparator));
}
- public boolean match(String value) {
- return pattern.matcher(removeSlahesToIgnore(value)).matches();
- }
-
- private String toRegexp(String wildcardPattern, String directorySeparator) {
- String patternStr = removeSlahesToIgnore(wildcardPattern);
- patternStr = StringUtils.replace(patternStr, "**/**", "**");
- patternStr = StringUtils.replace(patternStr, "**/", "(&/|)");
- patternStr = StringUtils.replace(patternStr, "/**", "/&");
- patternStr = StringUtils.replace(patternStr, "**", "&");
- StringBuilder sb = new StringBuilder();
-
- for (char c : patternStr.toCharArray()) {
- switch (c) {
- case '&':
- sb.append(".*");
- break;
- case '*':
- sb.append("[^\\").append(directorySeparator).append("]*");
- break;
- case '?':
- sb.append("[^\\").append(directorySeparator).append("]");
- break;
- case '.':
- sb.append("\\.");
- break;
- case '/':
- sb.append("\\").append(directorySeparator);
- break;
- default:
- sb.append(c);
+ private static String toRegexp(String antPattern, String directorySeparator) {
+ final String escapedDirectorySeparator = '\\' + directorySeparator;
+
+ final StringBuilder sb = new StringBuilder(antPattern.length());
+
+ sb.append('^');
+
+ int i = antPattern.startsWith("/") || antPattern.startsWith("\\") ? 1 : 0;
+ while (i < antPattern.length()) {
+ final char ch = antPattern.charAt(i);
+
+ if (ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '^' || ch == '$' || ch == '.' || ch == '{' || ch == '}' || ch == '+' || ch == '|') {
+ // Escape regexp-specific characters
+ sb.append('\\').append(ch);
+ } else if (ch == '*') {
+ if (i + 1 < antPattern.length() && antPattern.charAt(i + 1) == '*') {
+ // Double asterisk
+ // Zero or more directories
+ if (i + 2 < antPattern.length() && isSlash(antPattern.charAt(i + 2))) {
+ sb.append("(?:.*").append(escapedDirectorySeparator).append("|)");
+ i += 2;
+ } else {
+ sb.append(".*");
+ i += 1;
+ }
+ } else {
+ // Single asterisk
+ // Zero or more characters excluding directory separator
+ sb.append("[^").append(escapedDirectorySeparator).append("]*?");
+ }
+ } else if (ch == '?') {
+ // Any single character excluding directory separator
+ sb.append("[^").append(escapedDirectorySeparator).append("]");
+ } else if (isSlash(ch)) {
+ // Directory separator
+ sb.append(escapedDirectorySeparator);
+ } else {
+ // Single character
+ sb.append(ch);
}
+
+ i++;
}
+ sb.append('$');
+
return sb.toString();
}
- private String removeSlahesToIgnore(String wildcardPattern) {
- String patternStr = StringUtils.removeStart(wildcardPattern, "/");
- return StringUtils.removeEnd(patternStr, "/");
+ private static boolean isSlash(char ch) {
+ return ch == '/' || ch == '\\';
}
/**
- * This method is overridden since version 2.5-RC2.
+ * Returns string representation of this pattern.
+ *
+ * @since 2.5
*/
@Override
public String toString() {
@@ -91,6 +135,17 @@ public class WildcardPattern {
}
/**
+ * Returns true if specified value matches this pattern.
+ */
+ public boolean match(String value) {
+ value = StringUtils.removeStart(value, "/");
+ value = StringUtils.removeEnd(value, "/");
+ return pattern.matcher(value).matches();
+ }
+
+ /**
+ * Returns true if specified value matches one of specified patterns.
+ *
* @since 2.4
*/
public static boolean match(WildcardPattern[] patterns, String value) {
@@ -102,10 +157,20 @@ public class WildcardPattern {
return false;
}
+ /**
+ * Creates pattern with "/" as a directory separator.
+ *
+ * @see #create(String, String)
+ */
public static WildcardPattern create(String pattern) {
return create(pattern, "/");
}
+ /**
+ * Creates array of patterns with "/" as a directory separator.
+ *
+ * @see #create(String, String)
+ */
public static WildcardPattern[] create(String[] patterns) {
if (patterns == null) {
return new WildcardPattern[0];
@@ -117,12 +182,25 @@ public class WildcardPattern {
return exclusionPAtterns;
}
+ /**
+ * Creates pattern with specified separator for directories.
+ *
+ * This is used to match Java-classes, i.e. org.foo.Bar
against org/**
.
+ * However usage of character other than "/" as a directory separator is misleading and should be avoided,
+ * so method {@link #create(String)} is preferred over this one.
+ *
+ *
+ * Also note that no matter whether forward or backward slashes were used in the antPattern
+ * the returned pattern will use directorySeparator
.
+ * Thus to match Windows-style path "dir\file.ext" against pattern "dir/file.ext" normalization should be performed.
+ *
+ */
public static WildcardPattern create(String pattern, String directorySeparator) {
String key = pattern + directorySeparator;
- WildcardPattern wildcardPattern = patterns.get(key);
+ WildcardPattern wildcardPattern = CACHE.get(key);
if (wildcardPattern == null) {
wildcardPattern = new WildcardPattern(pattern, directorySeparator);
- patterns.put(key, wildcardPattern);
+ CACHE.put(key, wildcardPattern);
}
return wildcardPattern;
}
diff --git a/sonar-plugin-api/src/test/java/org/sonar/api/utils/WildcardPatternTest.java b/sonar-plugin-api/src/test/java/org/sonar/api/utils/WildcardPatternTest.java
index 0a5a7aeaff7..2cde1e5964b 100644
--- a/sonar-plugin-api/src/test/java/org/sonar/api/utils/WildcardPatternTest.java
+++ b/sonar-plugin-api/src/test/java/org/sonar/api/utils/WildcardPatternTest.java
@@ -36,6 +36,26 @@ public class WildcardPatternTest {
return new WildcardPattern(pattern, "/").match(value);
}
+ @Test
+ public void examples() {
+ assertTrue(match("org/T?st.java", "org/Test.java"));
+ assertTrue(match("org/T?st.java", "org/Tost.java"));
+
+ assertTrue(match("org/*.java", "org/Foo.java"));
+ assertTrue(match("org/*.java", "org/Bar.java"));
+
+ assertTrue(match("org/**", "org/Foo.java"));
+ assertTrue(match("org/**", "org/foo/bar.jsp"));
+
+ assertTrue(match("org/**/Test.java", "org/Test.java"));
+ assertTrue(match("org/**/Test.java", "org/foo/Test.java"));
+ assertTrue(match("org/**/Test.java", "org/foo/bar/Test.java"));
+
+ assertTrue(match("org/**/*.java", "org/Foo.java"));
+ assertTrue(match("org/**/*.java", "org/foo/Bar.java"));
+ assertTrue(match("org/**/*.java", "org/foo/bar/Baz.java"));
+ }
+
@Test
public void javaResourcesShouldMatchWildcards() {
assertTrue(match("Foo", "Foo", "."));
@@ -114,12 +134,50 @@ public class WildcardPatternTest {
assertFalse(match("**/app/**", "com/application/MyService"));
}
+ /**
+ * See SONAR-2762
+ */
+ @Test
+ public void shouldEscapeRegexpSpecificCharacters() {
+ assertFalse(match("**/*$*", "foo/bar"));
+ assertTrue(match("**/*$*", "foo/bar$baz"));
+
+ assertFalse(match("a+", "aa"));
+ assertTrue(match("a+", "a+"));
+
+ assertFalse(match("[ab]", "a"));
+ assertTrue(match("[ab]", "[ab]"));
+ }
+
+ @Test
+ public void backslash() {
+ assertFalse("backslash is not an escape character", match("\\n", "\n"));
+ assertTrue("backslash is the same as forward slash", match("foo\\bar", "foo/bar"));
+ }
+
+ @Test
+ public void shouldIgnoreStartingSlash() {
+ assertTrue(match("/foo", "foo"));
+ assertTrue(match("\\foo", "foo"));
+ }
+
+ /**
+ * Godin: in my opinion this is invalid pattern, however it might be constructed by {@link org.sonar.api.resources.JavaFile#matchFilePattern(String)},
+ * so it should be supported at least for now for backward compatibility.
+ */
+ @Test
+ public void cornerCase() {
+ assertTrue(match("org/**.*", "org.sonar.commons.Foo.java", "."));
+ }
+
@Test
public void multiplePatterns() {
- WildcardPattern[] patterns = new WildcardPattern[] { WildcardPattern.create("Foo"), WildcardPattern.create("Bar") };
+ WildcardPattern[] patterns = WildcardPattern.create(new String[] { "Foo", "Bar" });
assertTrue(WildcardPattern.match(patterns, "Foo"));
assertTrue(WildcardPattern.match(patterns, "Bar"));
assertFalse(WildcardPattern.match(patterns, "Other"));
+
+ assertThat(WildcardPattern.create((String[]) null).length, is(0));
}
@Test
--
2.39.5