diff options
3 files changed, 643 insertions, 30 deletions
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/filter/PathFilterGroupTest2.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/filter/PathFilterGroupTest2.java new file mode 100644 index 0000000000..6b9d934e4c --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/treewalk/filter/PathFilterGroupTest2.java @@ -0,0 +1,256 @@ +/* + * Copyright (C) 2013, Robin Rosenberg + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ +package org.eclipse.jgit.treewalk.filter; + +import static org.hamcrest.MatcherAssert.assertThat; +import static org.hamcrest.Matchers.lessThan; +import static org.junit.Assert.assertEquals; + +import java.io.IOException; + +import org.eclipse.jgit.dircache.DirCache; +import org.eclipse.jgit.dircache.DirCacheEditor; +import org.eclipse.jgit.dircache.DirCacheEntry; +import org.eclipse.jgit.dircache.DirCacheIterator; +import org.eclipse.jgit.errors.IncorrectObjectTypeException; +import org.eclipse.jgit.errors.MissingObjectException; +import org.eclipse.jgit.lib.FileMode; +import org.eclipse.jgit.lib.ObjectReader; +import org.eclipse.jgit.treewalk.TreeWalk; +import org.junit.Test; + +/** + * Performance test suite for PathFilterGroup + */ +public class PathFilterGroupTest2 { + + private DirCache dc; + + private String[] paths = new String[100000]; + + private TreeFilter pf; + + private String data; + + private long tInit; + + public void setup(int pathsize, int filtersize, String[] filters) { + long t1 = System.nanoTime(); + String[] seed = { "abc", "def", "ghi", "jkl", "mno", "pqr", "stu", + "vwx", "xyz", "\u00e5\u00e4\u00f6" }; + dc = DirCache.newInCore(); + DirCacheEditor ed = dc.editor(); + int pi = 0; + B: for (String a : seed) { + for (String b : seed) { + for (String c : seed) { + for (String d : seed) { + for (String e : seed) { + if (pi >= pathsize) + break B; + String p1 = a + "/" + b + "/" + c + "/" + d + "/" + + e; + paths[pi] = p1; + ed.add(new DirCacheEditor.PathEdit(p1) { + + @Override + public void apply(DirCacheEntry ent) { + ent.setFileMode(FileMode.REGULAR_FILE); + } + }); + ++pi; + } + } + } + } + } + ed.finish(); + long t2 = System.nanoTime(); + if (filters != null) + pf = PathFilterGroup.createFromStrings(filters); + else { + // System.out.println(dc.getEntryCount()); + String[] filterPaths = new String[filtersize]; + System.arraycopy(paths, pathsize - filtersize, filterPaths, 0, + filtersize); + pf = PathFilterGroup.createFromStrings(filterPaths); + } + long t3 = System.nanoTime(); + data = "PX\t" + (t2 - t1) / 1E9 + "\t" + (t3 - t2) / 1E9 + "\t"; + tInit = t2-t1; + } + + public void test(int pathSize, int filterSize) + throws MissingObjectException, IncorrectObjectTypeException, + IOException { + setup(pathSize, filterSize, null); + data += pathSize + "\t" + filterSize + "\t"; + long t1 = System.nanoTime(); + TreeWalk tw = new TreeWalk((ObjectReader) null); + tw.reset(); + tw.addTree(new DirCacheIterator(dc)); + tw.setFilter(pf); + tw.setRecursive(true); + int n = 0; + while (tw.next()) + n++; + long t2 = System.nanoTime(); + data += (t2 - t1) / 1E9; + System.out.println(data); + assertEquals(filterSize, n); + } + + @SuppressWarnings("boxing") + public void test(int pathSize, int expectedWalkCount, String... filters) + throws MissingObjectException, IncorrectObjectTypeException, + IOException { + setup(pathSize, -1, filters); + data += pathSize + "\t" + filters.length + "\t"; + long t1 = System.nanoTime(); + TreeWalk tw = new TreeWalk((ObjectReader) null); + tw.reset(); + tw.addTree(new DirCacheIterator(dc)); + tw.setFilter(pf); + tw.setRecursive(true); + int n = 0; + while (tw.next()) + n++; + long t2 = System.nanoTime(); + data += (t2 - t1) / 1E9; + System.out.println(data); + assertEquals(expectedWalkCount, n); + + // Walk time should be (much) faster then setup time + assertThat(t2 - t1, lessThan(tInit / 2)); + } + + @Test + public void test1_1() throws MissingObjectException, + IncorrectObjectTypeException, IOException { + test(1, 1); + } + + @Test + public void test2_2() throws MissingObjectException, + IncorrectObjectTypeException, IOException { + test(2, 2); + } + + @Test + public void test10_10() throws MissingObjectException, + IncorrectObjectTypeException, IOException { + test(10, 10); + } + + @Test + public void test100_100() throws MissingObjectException, + IncorrectObjectTypeException, IOException { + test(100, 100); + } + + @Test + public void test1000_1000() throws MissingObjectException, + IncorrectObjectTypeException, IOException { + test(1000, 1000); + } + + @Test + public void test10000_10000() throws MissingObjectException, + IncorrectObjectTypeException, IOException { + test(10000, 10000); + } + + @Test + public void test100000_100000() throws MissingObjectException, + IncorrectObjectTypeException, IOException { + test(100000, 100000); + } + + @Test + public void test100000_10000() throws MissingObjectException, + IncorrectObjectTypeException, IOException { + test(100000, 10000); + } + + @Test + public void test100000_1000() throws MissingObjectException, + IncorrectObjectTypeException, IOException { + test(100000, 1000); + } + + @Test + public void test100000_100() throws MissingObjectException, + IncorrectObjectTypeException, IOException { + test(100000, 100); + } + + @Test + public void test100000_10() throws MissingObjectException, + IncorrectObjectTypeException, IOException { + test(100000, 10); + } + + @Test + public void test100000_1() throws MissingObjectException, + IncorrectObjectTypeException, IOException { + test(100000, 1); + } + + @Test + public void test10000_1F() throws MissingObjectException, + IncorrectObjectTypeException, IOException { + test(100000, 10000, "abc"); + } + + @Test + public void test10000_L2() throws MissingObjectException, + IncorrectObjectTypeException, IOException { + test(100000, 20000, "abc", "def"); + } + + @Test + public void test10000_L10() throws MissingObjectException, + IncorrectObjectTypeException, IOException { + test(100000, 10000, "\u00e5\u00e4\u00f6"); + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/ByteArraySet.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/ByteArraySet.java new file mode 100644 index 0000000000..0df24af24f --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/ByteArraySet.java @@ -0,0 +1,318 @@ +/* + * Copyright (C) 2009, Google Inc. + * Copyright (C) 2008, Marek Zawirski <marek.zawirski@gmail.com> + * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> + * Copyright (C) 2013, Robin Rosenberg + * and other copyright owners as documented in the project's IP log. + * + * This program and the accompanying materials are made available + * under the terms of the Eclipse Distribution License v1.0 which + * accompanies this distribution, is reproduced below, and is + * available at http://www.eclipse.org/org/documents/edl-v10.php + * + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or + * without modification, are permitted provided that the following + * conditions are met: + * + * - Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * - Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * - Neither the name of the Eclipse Foundation, Inc. nor the + * names of its contributors may be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND + * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, + * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; + * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER + * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +package org.eclipse.jgit.treewalk.filter; + +import org.eclipse.jgit.util.RawParseUtils; + +/** + * Specialized set for byte arrays, interpreted as strings for use in + * {@link PathFilterGroup.Group}. Most methods assume the hash is already know + * and therefore requires the caller to supply it beforehand. The implementation + * is a loose derivative of ObjectIdSubclassMap. + */ +class ByteArraySet { + + private int size; + + private int grow; + + private int mask; + + private byte[][] table; + + /** + * Create an empty set. + * + * @param capacity + */ + ByteArraySet(int capacity) { + initTable(1 << Integer.highestOneBit((capacity * 2) - 1)); + } + + private byte[] get(final byte[] toFind, int length, int hash) { + final int msk = mask; + int i = hash & msk; + final byte[][] tbl = table; + byte[] obj; + + while ((obj = tbl[i]) != null) { + if (equals(obj, toFind, length)) + return obj; + i = (i + 1) & msk; + } + return null; + } + + private static boolean equals(byte[] a, byte[] b, int length) { + if (a.length < length || b.length < length) + return false; + for (int i = 0; i < length; ++i) { + if (a[i] != b[i]) + return false; + } + return true; + } + + /** + * Returns true if this set contains the specified array. + * + * @param toFind + * array to find. + * @param length + * The number of bytes in toFind that are used + * @param hash + * pre-computed hash of toFind + * @return true if the mapping exists for this byte array; false otherwise. + */ + boolean contains(final byte[] toFind, int length, int hash) { + return get(toFind, length, hash) != null; + } + + /** + * Store a byte array for future lookup. + * <p> + * Stores {@code newValue}, but only if it does not already exist in the + * set. Callers can tell if the value is new by checking the return value + * with reference equality: + * + * <pre> + * byte[] obj = ...; + * boolean wasNew = map.addIfAbsent(array, length, hash) == array; + * </pre> + * + * @param newValue + * the array to store. + * @param length + * The number of bytes in newValue that are used + * @param hash + * pre-computed hash of toFind + * @return {@code newValue} if stored, or the prior value already stored and + * that would have been returned had the caller used + * {@code get(newValue)} first. + */ + byte[] addIfAbsent(final byte[] newValue, int length, int hash) { + final int msk = mask; + int i = hash & msk; + final byte[][] tbl = table; + byte[] obj; + + while ((obj = tbl[i]) != null) { + if (equals(obj, newValue, length)) + return obj; + i = (i + 1) & msk; + } + + byte[] valueToInsert = copyIfNotSameSize(newValue, length); + if (++size == grow) { + grow(); + insert(valueToInsert, hash); + } else + tbl[i] = valueToInsert; + return valueToInsert; + } + + private static byte[] copyIfNotSameSize(byte[] newValue, int length) { + if (newValue.length == length) + return newValue; + byte[] ret = new byte[length]; + System.arraycopy(newValue, 0, ret, 0, length); + return ret; + } + + /** + * @return number of arrays in the set + */ + int size() { + return size; + } + + /** @return true if {@link #size()} is 0. */ + boolean isEmpty() { + return size == 0; + } + + private void insert(final byte[] newValue, int hash) { + final int msk = mask; + int j = hash & msk; + final byte[][] tbl = table; + while (tbl[j] != null) + j = (j + 1) & msk; + tbl[j] = newValue; + } + + private Hasher hasher = new Hasher(null, 0); + + private void grow() { + final byte[][] oldTable = table; + final int oldSize = table.length; + + initTable(oldSize << 1); + for (int i = 0; i < oldSize; i++) { + final byte[] obj = oldTable[i]; + if (obj != null) { + hasher.init(obj, obj.length); + insert(obj, hasher.hash()); + } + } + } + + private void initTable(int sz) { + if (sz < 2) + sz = 2; + grow = sz >> 1; + mask = sz - 1; + table = new byte[sz][]; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + sb.append('['); + for (byte[] b : table) { + if (b == null) + continue; + if (sb.length() > 1) + sb.append(" , "); //$NON-NLS-1$ + sb.append('"'); + sb.append(RawParseUtils.decode(b)); + sb.append('"'); + sb.append('('); + sb.append(chainlength(b)); + sb.append(')'); + } + sb.append(']'); + return sb.toString(); + } + + private int chainlength(byte[] b) { + Hasher h = new Hasher(b, b.length); + int hash = h.hash(); + final int msk = mask; + int i = hash & msk; + final byte[][] tbl = table; + byte[] obj; + + int n = 0; + while ((obj = tbl[i]) != null) { + if (equals(obj, b, b.length)) + return n; + i = (i + 1) & msk; + ++n; + } + return -1; + } + + static class Hasher { + private int hash; + + private int pos; + + private byte[] data; + + private int length; + + Hasher(byte[] data, int length) { + init(data, length); + } + + void init(byte[] d, int l) { + this.data = d; + this.length = l; + pos = 0; + hash = 0; + } + + int hash() { + while (pos < length) + hash = hash * 31 + data[pos++]; + return hash; + } + + int nextHash() { + for (;;) { + hash = hash * 31 + data[pos]; + ++pos; + if (pos == length || data[pos] == '/') + return hash; + } + } + + int getHash() { + return hash; + } + + boolean hasNext() { + return pos < length; + } + + public int length() { + return pos; + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < pos; ++i) + sb.append((char) data[i]); + sb.append(" | "); //$NON-NLS-1$ + for (int i = pos; i < length; ++i) + sb.append((char) data[i]); + return sb.toString(); + } + } + + byte[][] toArray() { + byte[][] ret = new byte[size][]; + int i = 0; + for (byte[] entry : table) { + if (entry != null) + ret[i++] = entry; + } + return ret; + } + +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/PathFilterGroup.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/PathFilterGroup.java index 51761a8126..66d9f87a77 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/PathFilterGroup.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/PathFilterGroup.java @@ -44,13 +44,13 @@ package org.eclipse.jgit.treewalk.filter; -import java.util.Arrays; import java.util.Collection; -import java.util.Comparator; import org.eclipse.jgit.errors.StopWalkException; import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.treewalk.TreeWalk; +import org.eclipse.jgit.treewalk.filter.ByteArraySet.Hasher; +import org.eclipse.jgit.util.RawParseUtils; /** * Includes tree entries only if they match one or more configured paths. @@ -83,7 +83,8 @@ public class PathFilterGroup { */ public static TreeFilter createFromStrings(final Collection<String> paths) { if (paths.isEmpty()) - throw new IllegalArgumentException(JGitText.get().atLeastOnePathIsRequired); + throw new IllegalArgumentException( + JGitText.get().atLeastOnePathIsRequired); final PathFilter[] p = new PathFilter[paths.size()]; int i = 0; for (final String s : paths) @@ -131,7 +132,8 @@ public class PathFilterGroup { */ public static TreeFilter create(final Collection<PathFilter> paths) { if (paths.isEmpty()) - throw new IllegalArgumentException(JGitText.get().atLeastOnePathIsRequired); + throw new IllegalArgumentException( + JGitText.get().atLeastOnePathIsRequired); final PathFilter[] p = new PathFilter[paths.size()]; paths.toArray(p); return create(p); @@ -177,41 +179,74 @@ public class PathFilterGroup { } static class Group extends TreeFilter { - private static final Comparator<PathFilter> PATH_SORT = new Comparator<PathFilter>() { - public int compare(final PathFilter o1, final PathFilter o2) { - return o1.pathStr.compareTo(o2.pathStr); - } - }; - private final PathFilter[] paths; + private ByteArraySet fullpaths; + + private ByteArraySet prefixes; + + private byte[] max; + + private Group(final PathFilter[] pathFilters) { + fullpaths = new ByteArraySet(pathFilters.length); + prefixes = new ByteArraySet(pathFilters.length / 5); + // 5 is an empirically derived ratio of #paths/#prefixes from: + // egit/jgit: 8 + // git: 5 + // linux kernel: 13 + // eclipse.platform.ui: 7 + max = pathFilters[0].pathRaw; + Hasher hasher = new Hasher(null, 0); + for (PathFilter pf : pathFilters) { + hasher.init(pf.pathRaw, pf.pathRaw.length); + while (hasher.hasNext()) { + int hash = hasher.nextHash(); + if (hasher.hasNext()) + prefixes.addIfAbsent(pf.pathRaw, hasher.length(), hash); + } + fullpaths.addIfAbsent(pf.pathRaw, pf.pathRaw.length, + hasher.getHash()); + if (compare(max, pf.pathRaw) < 0) + max = pf.pathRaw; + } + } - private Group(final PathFilter[] p) { - paths = p; - Arrays.sort(paths, PATH_SORT); + private static int compare(byte[] a, byte[] b) { + int i = 0; + while (i < a.length && i < b.length) { + int ba = a[i] & 0xFF; + int bb = b[i] & 0xFF; + int cmp = ba - bb; + if (cmp != 0) + return cmp; + ++i; + } + return a.length - b.length; } @Override public boolean include(final TreeWalk walker) { - final int n = paths.length; - for (int i = 0;;) { - final byte[] r = paths[i].pathRaw; - final int cmp = walker.isPathPrefix(r, r.length); - if (cmp == 0) + + byte[] rp = walker.getRawPath(); + Hasher hasher = new Hasher(rp, walker.getPathLength()); + while (hasher.hasNext()) { + int hash = hasher.nextHash(); + if (fullpaths.contains(rp, hasher.length(), hash)) return true; - if (++i < n) - continue; - if (cmp > 0) - throw StopWalkException.INSTANCE; - return false; + if (!hasher.hasNext()) + if (prefixes.contains(rp, hasher.length(), hash)) + return true; } + + final int cmp = walker.isPathPrefix(max, max.length); + if (cmp > 0) + throw StopWalkException.INSTANCE; + + return false; } @Override public boolean shouldBeRecursive() { - for (final PathFilter p : paths) - if (p.shouldBeRecursive()) - return true; - return false; + return !prefixes.isEmpty(); } @Override @@ -222,13 +257,17 @@ public class PathFilterGroup { public String toString() { final StringBuilder r = new StringBuilder(); r.append("FAST("); //$NON-NLS-1$ - for (int i = 0; i < paths.length; i++) { - if (i > 0) + boolean first = true; + for (byte[] p : fullpaths.toArray()) { + if (!first) { r.append(" OR "); //$NON-NLS-1$ - r.append(paths[i].toString()); + } + r.append(RawParseUtils.decode(p)); + first = false; } r.append(")"); //$NON-NLS-1$ return r.toString(); } } + } |