]> source.dussan.org Git - jgit.git/commitdiff
Don't use java.util.regex for two simple wildcard cases 44/36144/3
authorAndrey Loskutov <loskutov@gmx.de>
Fri, 7 Nov 2014 15:30:44 +0000 (16:30 +0100)
committerMatthias Sohn <matthias.sohn@sap.com>
Tue, 18 Nov 2014 09:58:26 +0000 (10:58 +0100)
To improve ignore parser performance we can avoid using java.util.regex
code on simple wildcard patterns with leading or trailing asterisk. As
those patterns represent a majority of ignore rules, the index diff
performance can be drastically increased on huge repository with lot of
ignore rules.

Bug: 450466
Change-Id: I80428441cc8d5de5468813f841d89322413eed8b
Signed-off-by: Andrey Loskutov <loskutov@gmx.de>
Signed-off-by: Matthias Sohn <matthias.sohn@sap.com>
org.eclipse.jgit.test/tst/org/eclipse/jgit/ignore/FastIgnoreRuleTest.java
org.eclipse.jgit.test/tst/org/eclipse/jgit/ignore/IgnoreMatcherParametrizedTest.java
org.eclipse.jgit.test/tst/org/eclipse/jgit/ignore/IgnoreRuleSpecialCasesTest.java
org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/LeadingAsteriskMatcher.java [new file with mode: 0644]
org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/PathMatcher.java
org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/Strings.java
org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/TrailingAsteriskMatcher.java [new file with mode: 0644]

index 656ba446d56ec82860f7a6e7a098e48409eaf534..007bdcc5fcd9809ec45e32206e56afda85a6b0a2 100644 (file)
@@ -62,14 +62,14 @@ import org.junit.runners.Parameterized.Parameters;
 @RunWith(Parameterized.class)
 public class FastIgnoreRuleTest {
 
-       @Parameters(name = "JGit? {0}")
+       @Parameters(name = "OldRule? {0}")
        public static Iterable<Boolean[]> data() {
                return Arrays.asList(new Boolean[][] { { Boolean.FALSE },
                                { Boolean.TRUE } });
        }
 
        @Parameter
-       public Boolean useJGitRule;
+       public Boolean useOldRule;
 
        @Test
        public void testSimpleCharClass() {
@@ -388,11 +388,11 @@ public class FastIgnoreRuleTest {
        @SuppressWarnings("boxing")
        @Test
        public void testWildmatch() {
-               if (useJGitRule)
+               if (useOldRule)
                        System.err
                                        .println("IgnoreRule can't understand wildmatch rules, skipping testWildmatch!");
 
-               Boolean assume = useJGitRule;
+               Boolean assume = useOldRule;
                assertMatched("**/a/b", "a/b", assume);
                assertMatched("**/a/b", "c/a/b", assume);
                assertMatched("**/a/b", "c/d/a/b", assume);
@@ -428,11 +428,11 @@ public class FastIgnoreRuleTest {
        @SuppressWarnings("boxing")
        @Test
        public void testWildmatchDoNotMatch() {
-               if (useJGitRule)
+               if (useOldRule)
                        System.err
                                        .println("IgnoreRule can't understand wildmatch rules, skipping testWildmatchDoNotMatch!");
 
-               Boolean assume = useJGitRule;
+               Boolean assume = useOldRule;
                assertNotMatched("**/a/b", "a/c/b", assume);
                assertNotMatched("!/**/*.zip", "c/a/b.zip", assume);
                assertNotMatched("!**/*.zip", "c/a/b.zip", assume);
@@ -542,7 +542,7 @@ public class FastIgnoreRuleTest {
         */
        private boolean match(String pattern, String target) {
                boolean isDirectory = target.endsWith("/");
-               if (useJGitRule.booleanValue()) {
+               if (useOldRule.booleanValue()) {
                        IgnoreRule r = new IgnoreRule(pattern);
                        // If speed of this test is ever an issue, we can use a presetRule
                        // field
index 48649d6aeea77ee661ec7f2ae6a2b447437200ec..cbfe6f2790402b5cda9441e466644b4b8a8e6f01 100644 (file)
@@ -60,14 +60,14 @@ import org.junit.runners.Parameterized.Parameters;
 @RunWith(Parameterized.class)
 public class IgnoreMatcherParametrizedTest {
 
-       @Parameters(name = "JGit? {0}")
+       @Parameters(name = "OldRule? {0}")
        public static Iterable<Boolean[]> data() {
                return Arrays.asList(new Boolean[][] { { Boolean.FALSE },
                                { Boolean.TRUE } });
        }
 
        @Parameter
-       public Boolean useJGitRule;
+       public Boolean useOldRule;
 
        @Test
        public void testBasic() {
@@ -355,7 +355,7 @@ public class IgnoreMatcherParametrizedTest {
         */
        private boolean match(String pattern, String target) {
                boolean isDirectory = target.endsWith("/");
-               if (useJGitRule.booleanValue()) {
+               if (useOldRule.booleanValue()) {
                        IgnoreRule r = new IgnoreRule(pattern);
                        // If speed of this test is ever an issue, we can use a presetRule
                        // field
index 109f28dabca7704e1a32a78dae74669d021424e4..7afa69f441fa44cba696e909adea6890bc8b1438 100644 (file)
@@ -58,19 +58,19 @@ import org.junit.runners.Parameterized.Parameters;
 @SuppressWarnings({ "deprecation", "boxing" })
 public class IgnoreRuleSpecialCasesTest {
 
-       @Parameters(name = "JGit? {0}")
+       @Parameters(name = "OldRule? {0}")
        public static Iterable<Boolean[]> data() {
                return Arrays.asList(new Boolean[][] { { Boolean.FALSE },
                                { Boolean.TRUE } });
        }
 
        @Parameter
-       public Boolean useJGitRule;
+       public Boolean useOldRule;
 
        private void assertMatch(final String pattern, final String input,
                        final boolean matchExpected, Boolean... assume) {
                boolean assumeDir = input.endsWith("/");
-               if (useJGitRule.booleanValue()) {
+               if (useOldRule.booleanValue()) {
                        final IgnoreRule matcher = new IgnoreRule(pattern);
                        if (assume.length == 0 || !assume[0].booleanValue())
                                assertEquals(matchExpected, matcher.isMatch(input, assumeDir));
@@ -88,7 +88,7 @@ public class IgnoreRuleSpecialCasesTest {
        private void assertFileNameMatch(final String pattern, final String input,
                        final boolean matchExpected) {
                boolean assumeDir = input.endsWith("/");
-               if (useJGitRule.booleanValue()) {
+               if (useOldRule.booleanValue()) {
                        final IgnoreRule matcher = new IgnoreRule(pattern);
                        assertEquals(matchExpected, matcher.isMatch(input, assumeDir));
                } else {
@@ -99,10 +99,10 @@ public class IgnoreRuleSpecialCasesTest {
 
        @Test
        public void testVerySimplePatternCase0() throws Exception {
-               if (useJGitRule)
+               if (useOldRule)
                        System.err
                                        .println("IgnoreRule can't understand blank lines, skipping");
-               Boolean assume = useJGitRule;
+               Boolean assume = useOldRule;
                assertMatch("", "", false, assume);
        }
 
@@ -800,9 +800,14 @@ public class IgnoreRuleSpecialCasesTest {
 
        @Test
        public void testSpecialGroupCase9() throws Exception {
-               if (useJGitRule)
+               assertMatch("][", "][", true);
+       }
+
+       @Test
+       public void testSpecialGroupCase10() throws Exception {
+               if (useOldRule)
                        System.err.println("IgnoreRule can't understand [[:], skipping");
-               Boolean assume = useJGitRule;
+               Boolean assume = useOldRule;
                // Second bracket is threated literally, so both [ and : should match
                assertMatch("[[:]", ":", true, assume);
                assertMatch("[[:]", "[", true, assume);
@@ -861,10 +866,10 @@ public class IgnoreRuleSpecialCasesTest {
 
        @Test
        public void testEscapedBackslash() throws Exception {
-               if (useJGitRule)
+               if (useOldRule)
                        System.err
                                        .println("IgnoreRule can't understand escaped backslashes, skipping");
-               Boolean assume = useJGitRule;
+               Boolean assume = useOldRule;
                // In Git CLI a\\b matches a\b file
                assertMatch("a\\\\b", "a\\b", true, assume);
        }
@@ -899,4 +904,4 @@ public class IgnoreRuleSpecialCasesTest {
                assertFileNameMatch("a?b", "a\\b", true);
        }
 
-}
\ No newline at end of file
+}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/LeadingAsteriskMatcher.java b/org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/LeadingAsteriskMatcher.java
new file mode 100644 (file)
index 0000000..f1153d9
--- /dev/null
@@ -0,0 +1,83 @@
+/*
+ * Copyright (C) 2014, Andrey Loskutov <loskutov@gmx.de>
+ * and other copyright owners as documented in the project's IP log.
+ *
+ * This program and the accompanying materials are made available
+ * under the terms of the Eclipse Distribution License v1.0 which
+ * accompanies this distribution, is reproduced below, and is
+ * available at http://www.eclipse.org/org/documents/edl-v10.php
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials provided
+ *   with the distribution.
+ *
+ * - Neither the name of the Eclipse Foundation, Inc. nor the
+ *   names of its contributors may be used to endorse or promote
+ *   products derived from this software without specific prior
+ *   written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+package org.eclipse.jgit.ignore.internal;
+
+/**
+ * Matcher for simple regex patterns starting with an asterisk, e.g. "*.tmp"
+ *
+ * @since 3.6
+ */
+public class LeadingAsteriskMatcher extends NameMatcher {
+
+       LeadingAsteriskMatcher(String pattern, Character pathSeparator, boolean dirOnly) {
+               super(pattern, pathSeparator, dirOnly);
+
+               if (subPattern.charAt(0) != '*')
+                       throw new IllegalArgumentException(
+                                       "Pattern must have leading asterisk: " + pattern); //$NON-NLS-1$
+       }
+
+       public boolean matches(String segment, int startIncl, int endExcl,
+                       boolean assumeDirectory) {
+               // faster local access, same as in string.indexOf()
+               String s = subPattern;
+
+               // we don't need to count '*' character itself
+               int subLength = s.length() - 1;
+               // simple /*/ pattern
+               if (subLength == 0)
+                       return true;
+
+               if (subLength > (endExcl - startIncl))
+                       return false;
+
+               for (int i = subLength, j = endExcl - 1; i > 0; i--, j--) {
+                       char c1 = s.charAt(i);
+                       char c2 = segment.charAt(j);
+                       if (c1 != c2)
+                               return false;
+               }
+               return true;
+       }
+
+}
index 830aab1cff7d0bb0b9ccfbaf8db3c9048681c9af..dcecf303c4394255271169488ba47989b7474c41 100644 (file)
@@ -42,6 +42,7 @@
  */
 package org.eclipse.jgit.ignore.internal;
 
+import static org.eclipse.jgit.ignore.internal.Strings.checkWildCards;
 import static org.eclipse.jgit.ignore.internal.Strings.count;
 import static org.eclipse.jgit.ignore.internal.Strings.getPathSeparator;
 import static org.eclipse.jgit.ignore.internal.Strings.isWildCard;
@@ -52,6 +53,7 @@ import java.util.List;
 
 import org.eclipse.jgit.errors.InvalidPatternException;
 import org.eclipse.jgit.ignore.FastIgnoreRule;
+import org.eclipse.jgit.ignore.internal.Strings.PatternState;
 
 /**
  * Matcher built by patterns consists of multiple path segments.
@@ -132,9 +134,18 @@ public class PathMatcher extends AbstractMatcher {
                if (WildMatcher.WILDMATCH.equals(segment)
                                || WildMatcher.WILDMATCH2.equals(segment))
                        return WILD;
-               if (isWildCard(segment))
+
+               PatternState state = checkWildCards(segment);
+               switch (state) {
+               case LEADING_ASTERISK_ONLY:
+                       return new LeadingAsteriskMatcher(segment, pathSeparator, dirOnly);
+               case TRAILING_ASTERISK_ONLY:
+                       return new TrailingAsteriskMatcher(segment, pathSeparator, dirOnly);
+               case COMPLEX:
                        return new WildCardMatcher(segment, pathSeparator, dirOnly);
-               return new NameMatcher(segment, pathSeparator, dirOnly);
+               default:
+                       return new NameMatcher(segment, pathSeparator, dirOnly);
+               }
        }
 
        public boolean matches(String path, boolean assumeDirectory) {
index c694a14566dac7631b7ae6b13fbd4b05e24a707f..cd4d7536d9039d4c80feb6ab576f875b70830483 100644 (file)
@@ -132,10 +132,39 @@ public class Strings {
        }
 
        static boolean isWildCard(String pattern) {
-               return pattern.indexOf('*') != -1 || pattern.indexOf('?') != -1
-                               || pattern.indexOf('[') != -1
-                               // required to match escaped backslashes '\\\\'
-                               || pattern.indexOf('\\') != -1 || pattern.indexOf(']') != -1;
+               return pattern.indexOf('*') != -1 || isComplexWildcard(pattern);
+       }
+
+       private static boolean isComplexWildcard(String pattern) {
+               int idx1 = pattern.indexOf('[');
+               if (idx1 != -1) {
+                       int idx2 = pattern.indexOf(']');
+                       if (idx2 > idx1)
+                               return true;
+               }
+               // required to match escaped backslashes '\\\\'
+               if (pattern.indexOf('?') != -1 || pattern.indexOf('\\') != -1)
+                       return true;
+               return false;
+       }
+
+       static PatternState checkWildCards(String pattern) {
+               if (isComplexWildcard(pattern))
+                       return PatternState.COMPLEX;
+               int startIdx = pattern.indexOf('*');
+               if (startIdx < 0)
+                       return PatternState.NONE;
+
+               if (startIdx == pattern.length() - 1)
+                       return PatternState.TRAILING_ASTERISK_ONLY;
+               if (pattern.lastIndexOf('*') == 0)
+                       return PatternState.LEADING_ASTERISK_ONLY;
+
+               return PatternState.COMPLEX;
+       }
+
+       static enum PatternState {
+               LEADING_ASTERISK_ONLY, TRAILING_ASTERISK_ONLY, COMPLEX, NONE
        }
 
        final static List<String> POSIX_CHAR_CLASSES = Arrays.asList(
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/TrailingAsteriskMatcher.java b/org.eclipse.jgit/src/org/eclipse/jgit/ignore/internal/TrailingAsteriskMatcher.java
new file mode 100644 (file)
index 0000000..4a1c780
--- /dev/null
@@ -0,0 +1,82 @@
+/*
+ * Copyright (C) 2014, Andrey Loskutov <loskutov@gmx.de>
+ * and other copyright owners as documented in the project's IP log.
+ *
+ * This program and the accompanying materials are made available
+ * under the terms of the Eclipse Distribution License v1.0 which
+ * accompanies this distribution, is reproduced below, and is
+ * available at http://www.eclipse.org/org/documents/edl-v10.php
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ *   notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ *   copyright notice, this list of conditions and the following
+ *   disclaimer in the documentation and/or other materials provided
+ *   with the distribution.
+ *
+ * - Neither the name of the Eclipse Foundation, Inc. nor the
+ *   names of its contributors may be used to endorse or promote
+ *   products derived from this software without specific prior
+ *   written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+package org.eclipse.jgit.ignore.internal;
+
+/**
+ * Matcher for simple patterns ending with an asterisk, e.g. "Makefile.*"
+ *
+ * @since 3.6
+ */
+public class TrailingAsteriskMatcher extends NameMatcher {
+
+       TrailingAsteriskMatcher(String pattern, Character pathSeparator, boolean dirOnly) {
+               super(pattern, pathSeparator, dirOnly);
+
+               if (subPattern.charAt(subPattern.length() - 1) != '*')
+                       throw new IllegalArgumentException(
+                                       "Pattern must have trailing asterisk: " + pattern); //$NON-NLS-1$
+       }
+
+       public boolean matches(String segment, int startIncl, int endExcl,
+                       boolean assumeDirectory) {
+               // faster local access, same as in string.indexOf()
+               String s = subPattern;
+               // we don't need to count '*' character itself
+               int subLenth = s.length() - 1;
+               // simple /*/ pattern
+               if (subLenth == 0)
+                       return true;
+
+               if (subLenth > (endExcl - startIncl))
+                       return false;
+
+               for (int i = 0; i < subLenth; i++) {
+                       char c1 = s.charAt(i);
+                       char c2 = segment.charAt(i + startIncl);
+                       if (c1 != c2)
+                               return false;
+               }
+               return true;
+       }
+
+}