From 2c940fa18a457c37e9aa7a8065e8d650c83589e2 Mon Sep 17 00:00:00 2001 From: Andrey Loskutov Date: Fri, 7 Nov 2014 16:30:44 +0100 Subject: [PATCH] Don't use java.util.regex for two simple wildcard cases To improve ignore parser performance we can avoid using java.util.regex code on simple wildcard patterns with leading or trailing asterisk. As those patterns represent a majority of ignore rules, the index diff performance can be drastically increased on huge repository with lot of ignore rules. Bug: 450466 Change-Id: I80428441cc8d5de5468813f841d89322413eed8b Signed-off-by: Andrey Loskutov Signed-off-by: Matthias Sohn --- .../jgit/ignore/FastIgnoreRuleTest.java | 14 ++-- .../ignore/IgnoreMatcherParametrizedTest.java | 6 +- .../ignore/IgnoreRuleSpecialCasesTest.java | 27 +++--- .../internal/LeadingAsteriskMatcher.java | 83 +++++++++++++++++++ .../jgit/ignore/internal/PathMatcher.java | 15 +++- .../eclipse/jgit/ignore/internal/Strings.java | 37 ++++++++- .../internal/TrailingAsteriskMatcher.java | 82 ++++++++++++++++++ 7 files changed, 237 insertions(+), 27 deletions(-) create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/LeadingAsteriskMatcher.java create mode 100644 org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/TrailingAsteriskMatcher.java diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/ignore/FastIgnoreRuleTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/ignore/FastIgnoreRuleTest.java index 656ba446d5..007bdcc5fc 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/ignore/FastIgnoreRuleTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/ignore/FastIgnoreRuleTest.java @@ -62,14 +62,14 @@ import org.junit.runners.Parameterized.Parameters; @RunWith(Parameterized.class) public class FastIgnoreRuleTest { - @Parameters(name = "JGit? {0}") + @Parameters(name = "OldRule? {0}") public static Iterable data() { return Arrays.asList(new Boolean[][] { { Boolean.FALSE }, { Boolean.TRUE } }); } @Parameter - public Boolean useJGitRule; + public Boolean useOldRule; @Test public void testSimpleCharClass() { @@ -388,11 +388,11 @@ public class FastIgnoreRuleTest { @SuppressWarnings("boxing") @Test public void testWildmatch() { - if (useJGitRule) + if (useOldRule) System.err .println("IgnoreRule can't understand wildmatch rules, skipping testWildmatch!"); - Boolean assume = useJGitRule; + Boolean assume = useOldRule; assertMatched("**/a/b", "a/b", assume); assertMatched("**/a/b", "c/a/b", assume); assertMatched("**/a/b", "c/d/a/b", assume); @@ -428,11 +428,11 @@ public class FastIgnoreRuleTest { @SuppressWarnings("boxing") @Test public void testWildmatchDoNotMatch() { - if (useJGitRule) + if (useOldRule) System.err .println("IgnoreRule can't understand wildmatch rules, skipping testWildmatchDoNotMatch!"); - Boolean assume = useJGitRule; + Boolean assume = useOldRule; assertNotMatched("**/a/b", "a/c/b", assume); assertNotMatched("!/**/*.zip", "c/a/b.zip", assume); assertNotMatched("!**/*.zip", "c/a/b.zip", assume); @@ -542,7 +542,7 @@ public class FastIgnoreRuleTest { */ private boolean match(String pattern, String target) { boolean isDirectory = target.endsWith("/"); - if (useJGitRule.booleanValue()) { + if (useOldRule.booleanValue()) { IgnoreRule r = new IgnoreRule(pattern); // If speed of this test is ever an issue, we can use a presetRule // field diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/ignore/IgnoreMatcherParametrizedTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/ignore/IgnoreMatcherParametrizedTest.java index 48649d6aee..cbfe6f2790 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/ignore/IgnoreMatcherParametrizedTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/ignore/IgnoreMatcherParametrizedTest.java @@ -60,14 +60,14 @@ import org.junit.runners.Parameterized.Parameters; @RunWith(Parameterized.class) public class IgnoreMatcherParametrizedTest { - @Parameters(name = "JGit? {0}") + @Parameters(name = "OldRule? {0}") public static Iterable data() { return Arrays.asList(new Boolean[][] { { Boolean.FALSE }, { Boolean.TRUE } }); } @Parameter - public Boolean useJGitRule; + public Boolean useOldRule; @Test public void testBasic() { @@ -355,7 +355,7 @@ public class IgnoreMatcherParametrizedTest { */ private boolean match(String pattern, String target) { boolean isDirectory = target.endsWith("/"); - if (useJGitRule.booleanValue()) { + if (useOldRule.booleanValue()) { IgnoreRule r = new IgnoreRule(pattern); // If speed of this test is ever an issue, we can use a presetRule // field diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/ignore/IgnoreRuleSpecialCasesTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/ignore/IgnoreRuleSpecialCasesTest.java index 109f28dabc..7afa69f441 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/ignore/IgnoreRuleSpecialCasesTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/ignore/IgnoreRuleSpecialCasesTest.java @@ -58,19 +58,19 @@ import org.junit.runners.Parameterized.Parameters; @SuppressWarnings({ "deprecation", "boxing" }) public class IgnoreRuleSpecialCasesTest { - @Parameters(name = "JGit? {0}") + @Parameters(name = "OldRule? {0}") public static Iterable data() { return Arrays.asList(new Boolean[][] { { Boolean.FALSE }, { Boolean.TRUE } }); } @Parameter - public Boolean useJGitRule; + public Boolean useOldRule; private void assertMatch(final String pattern, final String input, final boolean matchExpected, Boolean... assume) { boolean assumeDir = input.endsWith("/"); - if (useJGitRule.booleanValue()) { + if (useOldRule.booleanValue()) { final IgnoreRule matcher = new IgnoreRule(pattern); if (assume.length == 0 || !assume[0].booleanValue()) assertEquals(matchExpected, matcher.isMatch(input, assumeDir)); @@ -88,7 +88,7 @@ public class IgnoreRuleSpecialCasesTest { private void assertFileNameMatch(final String pattern, final String input, final boolean matchExpected) { boolean assumeDir = input.endsWith("/"); - if (useJGitRule.booleanValue()) { + if (useOldRule.booleanValue()) { final IgnoreRule matcher = new IgnoreRule(pattern); assertEquals(matchExpected, matcher.isMatch(input, assumeDir)); } else { @@ -99,10 +99,10 @@ public class IgnoreRuleSpecialCasesTest { @Test public void testVerySimplePatternCase0() throws Exception { - if (useJGitRule) + if (useOldRule) System.err .println("IgnoreRule can't understand blank lines, skipping"); - Boolean assume = useJGitRule; + Boolean assume = useOldRule; assertMatch("", "", false, assume); } @@ -800,9 +800,14 @@ public class IgnoreRuleSpecialCasesTest { @Test public void testSpecialGroupCase9() throws Exception { - if (useJGitRule) + assertMatch("][", "][", true); + } + + @Test + public void testSpecialGroupCase10() throws Exception { + if (useOldRule) System.err.println("IgnoreRule can't understand [[:], skipping"); - Boolean assume = useJGitRule; + Boolean assume = useOldRule; // Second bracket is threated literally, so both [ and : should match assertMatch("[[:]", ":", true, assume); assertMatch("[[:]", "[", true, assume); @@ -861,10 +866,10 @@ public class IgnoreRuleSpecialCasesTest { @Test public void testEscapedBackslash() throws Exception { - if (useJGitRule) + if (useOldRule) System.err .println("IgnoreRule can't understand escaped backslashes, skipping"); - Boolean assume = useJGitRule; + Boolean assume = useOldRule; // In Git CLI a\\b matches a\b file assertMatch("a\\\\b", "a\\b", true, assume); } @@ -899,4 +904,4 @@ public class IgnoreRuleSpecialCasesTest { assertFileNameMatch("a?b", "a\\b", true); } -} \ No newline at end of file +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/LeadingAsteriskMatcher.java b/org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/LeadingAsteriskMatcher.java new file mode 100644 index 0000000000..f1153d9c69 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/LeadingAsteriskMatcher.java @@ -0,0 +1,83 @@ +/* + * Copyright (C) 2014, Andrey Loskutov + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.eclipse.jgit.ignore.internal; + +/** + * Matcher for simple regex patterns starting with an asterisk, e.g. "*.tmp" + * + * @since 3.6 + */ +public class LeadingAsteriskMatcher extends NameMatcher { + + LeadingAsteriskMatcher(String pattern, Character pathSeparator, boolean dirOnly) { + super(pattern, pathSeparator, dirOnly); + + if (subPattern.charAt(0) != '*') + throw new IllegalArgumentException( + "Pattern must have leading asterisk: " + pattern); //$NON-NLS-1$ + } + + public boolean matches(String segment, int startIncl, int endExcl, + boolean assumeDirectory) { + // faster local access, same as in string.indexOf() + String s = subPattern; + + // we don't need to count '*' character itself + int subLength = s.length() - 1; + // simple /*/ pattern + if (subLength == 0) + return true; + + if (subLength > (endExcl - startIncl)) + return false; + + for (int i = subLength, j = endExcl - 1; i > 0; i--, j--) { + char c1 = s.charAt(i); + char c2 = segment.charAt(j); + if (c1 != c2) + return false; + } + return true; + } + +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/PathMatcher.java b/org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/PathMatcher.java index 830aab1cff..dcecf303c4 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/PathMatcher.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/PathMatcher.java @@ -42,6 +42,7 @@ */ package org.eclipse.jgit.ignore.internal; +import static org.eclipse.jgit.ignore.internal.Strings.checkWildCards; import static org.eclipse.jgit.ignore.internal.Strings.count; import static org.eclipse.jgit.ignore.internal.Strings.getPathSeparator; import static org.eclipse.jgit.ignore.internal.Strings.isWildCard; @@ -52,6 +53,7 @@ import java.util.List; import org.eclipse.jgit.errors.InvalidPatternException; import org.eclipse.jgit.ignore.FastIgnoreRule; +import org.eclipse.jgit.ignore.internal.Strings.PatternState; /** * Matcher built by patterns consists of multiple path segments. @@ -132,9 +134,18 @@ public class PathMatcher extends AbstractMatcher { if (WildMatcher.WILDMATCH.equals(segment) || WildMatcher.WILDMATCH2.equals(segment)) return WILD; - if (isWildCard(segment)) + + PatternState state = checkWildCards(segment); + switch (state) { + case LEADING_ASTERISK_ONLY: + return new LeadingAsteriskMatcher(segment, pathSeparator, dirOnly); + case TRAILING_ASTERISK_ONLY: + return new TrailingAsteriskMatcher(segment, pathSeparator, dirOnly); + case COMPLEX: return new WildCardMatcher(segment, pathSeparator, dirOnly); - return new NameMatcher(segment, pathSeparator, dirOnly); + default: + return new NameMatcher(segment, pathSeparator, dirOnly); + } } public boolean matches(String path, boolean assumeDirectory) { diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/Strings.java b/org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/Strings.java index c694a14566..cd4d7536d9 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/Strings.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/Strings.java @@ -132,10 +132,39 @@ public class Strings { } static boolean isWildCard(String pattern) { - return pattern.indexOf('*') != -1 || pattern.indexOf('?') != -1 - || pattern.indexOf('[') != -1 - // required to match escaped backslashes '\\\\' - || pattern.indexOf('\\') != -1 || pattern.indexOf(']') != -1; + return pattern.indexOf('*') != -1 || isComplexWildcard(pattern); + } + + private static boolean isComplexWildcard(String pattern) { + int idx1 = pattern.indexOf('['); + if (idx1 != -1) { + int idx2 = pattern.indexOf(']'); + if (idx2 > idx1) + return true; + } + // required to match escaped backslashes '\\\\' + if (pattern.indexOf('?') != -1 || pattern.indexOf('\\') != -1) + return true; + return false; + } + + static PatternState checkWildCards(String pattern) { + if (isComplexWildcard(pattern)) + return PatternState.COMPLEX; + int startIdx = pattern.indexOf('*'); + if (startIdx < 0) + return PatternState.NONE; + + if (startIdx == pattern.length() - 1) + return PatternState.TRAILING_ASTERISK_ONLY; + if (pattern.lastIndexOf('*') == 0) + return PatternState.LEADING_ASTERISK_ONLY; + + return PatternState.COMPLEX; + } + + static enum PatternState { + LEADING_ASTERISK_ONLY, TRAILING_ASTERISK_ONLY, COMPLEX, NONE } final static List POSIX_CHAR_CLASSES = Arrays.asList( diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/TrailingAsteriskMatcher.java b/org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/TrailingAsteriskMatcher.java new file mode 100644 index 0000000000..4a1c780d99 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/TrailingAsteriskMatcher.java @@ -0,0 +1,82 @@ +/* + * Copyright (C) 2014, Andrey Loskutov + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.eclipse.jgit.ignore.internal; + +/** + * Matcher for simple patterns ending with an asterisk, e.g. "Makefile.*" + * + * @since 3.6 + */ +public class TrailingAsteriskMatcher extends NameMatcher { + + TrailingAsteriskMatcher(String pattern, Character pathSeparator, boolean dirOnly) { + super(pattern, pathSeparator, dirOnly); + + if (subPattern.charAt(subPattern.length() - 1) != '*') + throw new IllegalArgumentException( + "Pattern must have trailing asterisk: " + pattern); //$NON-NLS-1$ + } + + public boolean matches(String segment, int startIncl, int endExcl, + boolean assumeDirectory) { + // faster local access, same as in string.indexOf() + String s = subPattern; + // we don't need to count '*' character itself + int subLenth = s.length() - 1; + // simple /*/ pattern + if (subLenth == 0) + return true; + + if (subLenth > (endExcl - startIncl)) + return false; + + for (int i = 0; i < subLenth; i++) { + char c1 = s.charAt(i); + char c2 = segment.charAt(i + startIncl); + if (c1 != c2) + return false; + } + return true; + } + +} -- 2.39.5