From 58f71c9c265323a5c341e365816521a7119eae07 Mon Sep 17 00:00:00 2001 From: Evgeny Mandrikov Date: Thu, 20 Oct 2011 00:40:01 +0400 Subject: [PATCH] SONAR-2762 Fully refactor construction of regular expression in WildcardPattern Thus now it provides correct escaping for regular expression operators. --- .../org/sonar/api/utils/WildcardPattern.java | 158 +++++++++++++----- .../sonar/api/utils/WildcardPatternTest.java | 60 ++++++- 2 files changed, 177 insertions(+), 41 deletions(-) diff --git a/sonar-plugin-api/src/main/java/org/sonar/api/utils/WildcardPattern.java b/sonar-plugin-api/src/main/java/org/sonar/api/utils/WildcardPattern.java index 0403d82aa3b..7649045ba78 100644 --- a/sonar-plugin-api/src/main/java/org/sonar/api/utils/WildcardPattern.java +++ b/sonar-plugin-api/src/main/java/org/sonar/api/utils/WildcardPattern.java @@ -19,18 +19,49 @@ */ package org.sonar.api.utils; -import org.apache.commons.lang.StringUtils; - import java.util.HashMap; import java.util.Map; import java.util.regex.Pattern; +import org.apache.commons.lang.StringUtils; + /** + * Implementation of Ant-style matching patterns. + * Contrary to other implementations (like AntPathMatcher from Spring Framework) it is based on {@link Pattern Java Regular Expressions}. + * To increase performance it holds an internal cache of all processed patterns. + *

+ * Following rules are applied: + *

+ *

+ *

+ * Some examples of patterns: + *

+ *

+ *

+ * Another implementation, which is also based on Java Regular Expressions, can be found in + * FileUtil + * from IntelliJ OpenAPI. + *

+ * * @since 1.10 */ public class WildcardPattern { - private static final Map patterns = new HashMap(); + private static final Map CACHE = new HashMap(); private Pattern pattern; private String stringRepresentation; @@ -40,50 +71,63 @@ public class WildcardPattern { this.pattern = Pattern.compile(toRegexp(pattern, directorySeparator)); } - public boolean match(String value) { - return pattern.matcher(removeSlahesToIgnore(value)).matches(); - } - - private String toRegexp(String wildcardPattern, String directorySeparator) { - String patternStr = removeSlahesToIgnore(wildcardPattern); - patternStr = StringUtils.replace(patternStr, "**/**", "**"); - patternStr = StringUtils.replace(patternStr, "**/", "(&/|)"); - patternStr = StringUtils.replace(patternStr, "/**", "/&"); - patternStr = StringUtils.replace(patternStr, "**", "&"); - StringBuilder sb = new StringBuilder(); - - for (char c : patternStr.toCharArray()) { - switch (c) { - case '&': - sb.append(".*"); - break; - case '*': - sb.append("[^\\").append(directorySeparator).append("]*"); - break; - case '?': - sb.append("[^\\").append(directorySeparator).append("]"); - break; - case '.': - sb.append("\\."); - break; - case '/': - sb.append("\\").append(directorySeparator); - break; - default: - sb.append(c); + private static String toRegexp(String antPattern, String directorySeparator) { + final String escapedDirectorySeparator = '\\' + directorySeparator; + + final StringBuilder sb = new StringBuilder(antPattern.length()); + + sb.append('^'); + + int i = antPattern.startsWith("/") || antPattern.startsWith("\\") ? 1 : 0; + while (i < antPattern.length()) { + final char ch = antPattern.charAt(i); + + if (ch == '(' || ch == ')' || ch == '[' || ch == ']' || ch == '^' || ch == '$' || ch == '.' || ch == '{' || ch == '}' || ch == '+' || ch == '|') { + // Escape regexp-specific characters + sb.append('\\').append(ch); + } else if (ch == '*') { + if (i + 1 < antPattern.length() && antPattern.charAt(i + 1) == '*') { + // Double asterisk + // Zero or more directories + if (i + 2 < antPattern.length() && isSlash(antPattern.charAt(i + 2))) { + sb.append("(?:.*").append(escapedDirectorySeparator).append("|)"); + i += 2; + } else { + sb.append(".*"); + i += 1; + } + } else { + // Single asterisk + // Zero or more characters excluding directory separator + sb.append("[^").append(escapedDirectorySeparator).append("]*?"); + } + } else if (ch == '?') { + // Any single character excluding directory separator + sb.append("[^").append(escapedDirectorySeparator).append("]"); + } else if (isSlash(ch)) { + // Directory separator + sb.append(escapedDirectorySeparator); + } else { + // Single character + sb.append(ch); } + + i++; } + sb.append('$'); + return sb.toString(); } - private String removeSlahesToIgnore(String wildcardPattern) { - String patternStr = StringUtils.removeStart(wildcardPattern, "/"); - return StringUtils.removeEnd(patternStr, "/"); + private static boolean isSlash(char ch) { + return ch == '/' || ch == '\\'; } /** - * This method is overridden since version 2.5-RC2. + * Returns string representation of this pattern. + * + * @since 2.5 */ @Override public String toString() { @@ -91,6 +135,17 @@ public class WildcardPattern { } /** + * Returns true if specified value matches this pattern. + */ + public boolean match(String value) { + value = StringUtils.removeStart(value, "/"); + value = StringUtils.removeEnd(value, "/"); + return pattern.matcher(value).matches(); + } + + /** + * Returns true if specified value matches one of specified patterns. + * * @since 2.4 */ public static boolean match(WildcardPattern[] patterns, String value) { @@ -102,10 +157,20 @@ public class WildcardPattern { return false; } + /** + * Creates pattern with "/" as a directory separator. + * + * @see #create(String, String) + */ public static WildcardPattern create(String pattern) { return create(pattern, "/"); } + /** + * Creates array of patterns with "/" as a directory separator. + * + * @see #create(String, String) + */ public static WildcardPattern[] create(String[] patterns) { if (patterns == null) { return new WildcardPattern[0]; @@ -117,12 +182,25 @@ public class WildcardPattern { return exclusionPAtterns; } + /** + * Creates pattern with specified separator for directories. + *

+ * This is used to match Java-classes, i.e. org.foo.Bar against org/**. + * However usage of character other than "/" as a directory separator is misleading and should be avoided, + * so method {@link #create(String)} is preferred over this one. + *

+ *

+ * Also note that no matter whether forward or backward slashes were used in the antPattern + * the returned pattern will use directorySeparator. + * Thus to match Windows-style path "dir\file.ext" against pattern "dir/file.ext" normalization should be performed. + *

+ */ public static WildcardPattern create(String pattern, String directorySeparator) { String key = pattern + directorySeparator; - WildcardPattern wildcardPattern = patterns.get(key); + WildcardPattern wildcardPattern = CACHE.get(key); if (wildcardPattern == null) { wildcardPattern = new WildcardPattern(pattern, directorySeparator); - patterns.put(key, wildcardPattern); + CACHE.put(key, wildcardPattern); } return wildcardPattern; } diff --git a/sonar-plugin-api/src/test/java/org/sonar/api/utils/WildcardPatternTest.java b/sonar-plugin-api/src/test/java/org/sonar/api/utils/WildcardPatternTest.java index 0a5a7aeaff7..2cde1e5964b 100644 --- a/sonar-plugin-api/src/test/java/org/sonar/api/utils/WildcardPatternTest.java +++ b/sonar-plugin-api/src/test/java/org/sonar/api/utils/WildcardPatternTest.java @@ -36,6 +36,26 @@ public class WildcardPatternTest { return new WildcardPattern(pattern, "/").match(value); } + @Test + public void examples() { + assertTrue(match("org/T?st.java", "org/Test.java")); + assertTrue(match("org/T?st.java", "org/Tost.java")); + + assertTrue(match("org/*.java", "org/Foo.java")); + assertTrue(match("org/*.java", "org/Bar.java")); + + assertTrue(match("org/**", "org/Foo.java")); + assertTrue(match("org/**", "org/foo/bar.jsp")); + + assertTrue(match("org/**/Test.java", "org/Test.java")); + assertTrue(match("org/**/Test.java", "org/foo/Test.java")); + assertTrue(match("org/**/Test.java", "org/foo/bar/Test.java")); + + assertTrue(match("org/**/*.java", "org/Foo.java")); + assertTrue(match("org/**/*.java", "org/foo/Bar.java")); + assertTrue(match("org/**/*.java", "org/foo/bar/Baz.java")); + } + @Test public void javaResourcesShouldMatchWildcards() { assertTrue(match("Foo", "Foo", ".")); @@ -114,12 +134,50 @@ public class WildcardPatternTest { assertFalse(match("**/app/**", "com/application/MyService")); } + /** + * See SONAR-2762 + */ + @Test + public void shouldEscapeRegexpSpecificCharacters() { + assertFalse(match("**/*$*", "foo/bar")); + assertTrue(match("**/*$*", "foo/bar$baz")); + + assertFalse(match("a+", "aa")); + assertTrue(match("a+", "a+")); + + assertFalse(match("[ab]", "a")); + assertTrue(match("[ab]", "[ab]")); + } + + @Test + public void backslash() { + assertFalse("backslash is not an escape character", match("\\n", "\n")); + assertTrue("backslash is the same as forward slash", match("foo\\bar", "foo/bar")); + } + + @Test + public void shouldIgnoreStartingSlash() { + assertTrue(match("/foo", "foo")); + assertTrue(match("\\foo", "foo")); + } + + /** + * Godin: in my opinion this is invalid pattern, however it might be constructed by {@link org.sonar.api.resources.JavaFile#matchFilePattern(String)}, + * so it should be supported at least for now for backward compatibility. + */ + @Test + public void cornerCase() { + assertTrue(match("org/**.*", "org.sonar.commons.Foo.java", ".")); + } + @Test public void multiplePatterns() { - WildcardPattern[] patterns = new WildcardPattern[] { WildcardPattern.create("Foo"), WildcardPattern.create("Bar") }; + WildcardPattern[] patterns = WildcardPattern.create(new String[] { "Foo", "Bar" }); assertTrue(WildcardPattern.match(patterns, "Foo")); assertTrue(WildcardPattern.match(patterns, "Bar")); assertFalse(WildcardPattern.match(patterns, "Other")); + + assertThat(WildcardPattern.create((String[]) null).length, is(0)); } @Test -- 2.39.5