aboutsummaryrefslogtreecommitdiffstats
path: root/org.eclipse.jgit/src/org/eclipse/jgit/treewalk
diff options
context:
space:
mode:
authorRobin Rosenberg <robin.rosenberg@dewire.com>2013-01-28 19:00:00 -0500
committerGerrit Code Review @ Eclipse.org <gerrit@eclipse.org>2013-01-28 19:00:00 -0500
commitee413067fded57d817af5b6b9978b8ceb6e6374e (patch)
tree76d00910ec5b361306fe224e77503f9e818a9424 /org.eclipse.jgit/src/org/eclipse/jgit/treewalk
parent33bc4f7c052fc7a1150f0c9671736329d8100e29 (diff)
parent522fc6a9c64bb28399f7ecadb239aa7a02c5a81b (diff)
downloadjgit-ee413067fded57d817af5b6b9978b8ceb6e6374e.tar.gz
jgit-ee413067fded57d817af5b6b9978b8ceb6e6374e.zip
Merge changes Ife0cc2da,If38507ef
* changes: Speed up PathFilterGroup for large sets of paths Add test case for PathFilterGroup
Diffstat (limited to 'org.eclipse.jgit/src/org/eclipse/jgit/treewalk')
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/ByteArraySet.java318
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/PathFilterGroup.java99
2 files changed, 387 insertions, 30 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/ByteArraySet.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/ByteArraySet.java
new file mode 100644
index 0000000000..0df24af24f
--- /dev/null
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/ByteArraySet.java
@@ -0,0 +1,318 @@
+/*
+ * Copyright (C) 2009, Google Inc.
+ * Copyright (C) 2008, Marek Zawirski <marek.zawirski@gmail.com>
+ * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
+ * Copyright (C) 2013, Robin Rosenberg
+ * and other copyright owners as documented in the project's IP log.
+ *
+ * This program and the accompanying materials are made available
+ * under the terms of the Eclipse Distribution License v1.0 which
+ * accompanies this distribution, is reproduced below, and is
+ * available at http://www.eclipse.org/org/documents/edl-v10.php
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * - Neither the name of the Eclipse Foundation, Inc. nor the
+ * names of its contributors may be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.eclipse.jgit.treewalk.filter;
+
+import org.eclipse.jgit.util.RawParseUtils;
+
+/**
+ * Specialized set for byte arrays, interpreted as strings for use in
+ * {@link PathFilterGroup.Group}. Most methods assume the hash is already know
+ * and therefore requires the caller to supply it beforehand. The implementation
+ * is a loose derivative of ObjectIdSubclassMap.
+ */
+class ByteArraySet {
+
+ private int size;
+
+ private int grow;
+
+ private int mask;
+
+ private byte[][] table;
+
+ /**
+ * Create an empty set.
+ *
+ * @param capacity
+ */
+ ByteArraySet(int capacity) {
+ initTable(1 << Integer.highestOneBit((capacity * 2) - 1));
+ }
+
+ private byte[] get(final byte[] toFind, int length, int hash) {
+ final int msk = mask;
+ int i = hash & msk;
+ final byte[][] tbl = table;
+ byte[] obj;
+
+ while ((obj = tbl[i]) != null) {
+ if (equals(obj, toFind, length))
+ return obj;
+ i = (i + 1) & msk;
+ }
+ return null;
+ }
+
+ private static boolean equals(byte[] a, byte[] b, int length) {
+ if (a.length < length || b.length < length)
+ return false;
+ for (int i = 0; i < length; ++i) {
+ if (a[i] != b[i])
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Returns true if this set contains the specified array.
+ *
+ * @param toFind
+ * array to find.
+ * @param length
+ * The number of bytes in toFind that are used
+ * @param hash
+ * pre-computed hash of toFind
+ * @return true if the mapping exists for this byte array; false otherwise.
+ */
+ boolean contains(final byte[] toFind, int length, int hash) {
+ return get(toFind, length, hash) != null;
+ }
+
+ /**
+ * Store a byte array for future lookup.
+ * <p>
+ * Stores {@code newValue}, but only if it does not already exist in the
+ * set. Callers can tell if the value is new by checking the return value
+ * with reference equality:
+ *
+ * <pre>
+ * byte[] obj = ...;
+ * boolean wasNew = map.addIfAbsent(array, length, hash) == array;
+ * </pre>
+ *
+ * @param newValue
+ * the array to store.
+ * @param length
+ * The number of bytes in newValue that are used
+ * @param hash
+ * pre-computed hash of toFind
+ * @return {@code newValue} if stored, or the prior value already stored and
+ * that would have been returned had the caller used
+ * {@code get(newValue)} first.
+ */
+ byte[] addIfAbsent(final byte[] newValue, int length, int hash) {
+ final int msk = mask;
+ int i = hash & msk;
+ final byte[][] tbl = table;
+ byte[] obj;
+
+ while ((obj = tbl[i]) != null) {
+ if (equals(obj, newValue, length))
+ return obj;
+ i = (i + 1) & msk;
+ }
+
+ byte[] valueToInsert = copyIfNotSameSize(newValue, length);
+ if (++size == grow) {
+ grow();
+ insert(valueToInsert, hash);
+ } else
+ tbl[i] = valueToInsert;
+ return valueToInsert;
+ }
+
+ private static byte[] copyIfNotSameSize(byte[] newValue, int length) {
+ if (newValue.length == length)
+ return newValue;
+ byte[] ret = new byte[length];
+ System.arraycopy(newValue, 0, ret, 0, length);
+ return ret;
+ }
+
+ /**
+ * @return number of arrays in the set
+ */
+ int size() {
+ return size;
+ }
+
+ /** @return true if {@link #size()} is 0. */
+ boolean isEmpty() {
+ return size == 0;
+ }
+
+ private void insert(final byte[] newValue, int hash) {
+ final int msk = mask;
+ int j = hash & msk;
+ final byte[][] tbl = table;
+ while (tbl[j] != null)
+ j = (j + 1) & msk;
+ tbl[j] = newValue;
+ }
+
+ private Hasher hasher = new Hasher(null, 0);
+
+ private void grow() {
+ final byte[][] oldTable = table;
+ final int oldSize = table.length;
+
+ initTable(oldSize << 1);
+ for (int i = 0; i < oldSize; i++) {
+ final byte[] obj = oldTable[i];
+ if (obj != null) {
+ hasher.init(obj, obj.length);
+ insert(obj, hasher.hash());
+ }
+ }
+ }
+
+ private void initTable(int sz) {
+ if (sz < 2)
+ sz = 2;
+ grow = sz >> 1;
+ mask = sz - 1;
+ table = new byte[sz][];
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append('[');
+ for (byte[] b : table) {
+ if (b == null)
+ continue;
+ if (sb.length() > 1)
+ sb.append(" , "); //$NON-NLS-1$
+ sb.append('"');
+ sb.append(RawParseUtils.decode(b));
+ sb.append('"');
+ sb.append('(');
+ sb.append(chainlength(b));
+ sb.append(')');
+ }
+ sb.append(']');
+ return sb.toString();
+ }
+
+ private int chainlength(byte[] b) {
+ Hasher h = new Hasher(b, b.length);
+ int hash = h.hash();
+ final int msk = mask;
+ int i = hash & msk;
+ final byte[][] tbl = table;
+ byte[] obj;
+
+ int n = 0;
+ while ((obj = tbl[i]) != null) {
+ if (equals(obj, b, b.length))
+ return n;
+ i = (i + 1) & msk;
+ ++n;
+ }
+ return -1;
+ }
+
+ static class Hasher {
+ private int hash;
+
+ private int pos;
+
+ private byte[] data;
+
+ private int length;
+
+ Hasher(byte[] data, int length) {
+ init(data, length);
+ }
+
+ void init(byte[] d, int l) {
+ this.data = d;
+ this.length = l;
+ pos = 0;
+ hash = 0;
+ }
+
+ int hash() {
+ while (pos < length)
+ hash = hash * 31 + data[pos++];
+ return hash;
+ }
+
+ int nextHash() {
+ for (;;) {
+ hash = hash * 31 + data[pos];
+ ++pos;
+ if (pos == length || data[pos] == '/')
+ return hash;
+ }
+ }
+
+ int getHash() {
+ return hash;
+ }
+
+ boolean hasNext() {
+ return pos < length;
+ }
+
+ public int length() {
+ return pos;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < pos; ++i)
+ sb.append((char) data[i]);
+ sb.append(" | "); //$NON-NLS-1$
+ for (int i = pos; i < length; ++i)
+ sb.append((char) data[i]);
+ return sb.toString();
+ }
+ }
+
+ byte[][] toArray() {
+ byte[][] ret = new byte[size][];
+ int i = 0;
+ for (byte[] entry : table) {
+ if (entry != null)
+ ret[i++] = entry;
+ }
+ return ret;
+ }
+
+}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/PathFilterGroup.java b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/PathFilterGroup.java
index 51761a8126..66d9f87a77 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/PathFilterGroup.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/treewalk/filter/PathFilterGroup.java
@@ -44,13 +44,13 @@
package org.eclipse.jgit.treewalk.filter;
-import java.util.Arrays;
import java.util.Collection;
-import java.util.Comparator;
import org.eclipse.jgit.errors.StopWalkException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.treewalk.TreeWalk;
+import org.eclipse.jgit.treewalk.filter.ByteArraySet.Hasher;
+import org.eclipse.jgit.util.RawParseUtils;
/**
* Includes tree entries only if they match one or more configured paths.
@@ -83,7 +83,8 @@ public class PathFilterGroup {
*/
public static TreeFilter createFromStrings(final Collection<String> paths) {
if (paths.isEmpty())
- throw new IllegalArgumentException(JGitText.get().atLeastOnePathIsRequired);
+ throw new IllegalArgumentException(
+ JGitText.get().atLeastOnePathIsRequired);
final PathFilter[] p = new PathFilter[paths.size()];
int i = 0;
for (final String s : paths)
@@ -131,7 +132,8 @@ public class PathFilterGroup {
*/
public static TreeFilter create(final Collection<PathFilter> paths) {
if (paths.isEmpty())
- throw new IllegalArgumentException(JGitText.get().atLeastOnePathIsRequired);
+ throw new IllegalArgumentException(
+ JGitText.get().atLeastOnePathIsRequired);
final PathFilter[] p = new PathFilter[paths.size()];
paths.toArray(p);
return create(p);
@@ -177,41 +179,74 @@ public class PathFilterGroup {
}
static class Group extends TreeFilter {
- private static final Comparator<PathFilter> PATH_SORT = new Comparator<PathFilter>() {
- public int compare(final PathFilter o1, final PathFilter o2) {
- return o1.pathStr.compareTo(o2.pathStr);
- }
- };
- private final PathFilter[] paths;
+ private ByteArraySet fullpaths;
+
+ private ByteArraySet prefixes;
+
+ private byte[] max;
+
+ private Group(final PathFilter[] pathFilters) {
+ fullpaths = new ByteArraySet(pathFilters.length);
+ prefixes = new ByteArraySet(pathFilters.length / 5);
+ // 5 is an empirically derived ratio of #paths/#prefixes from:
+ // egit/jgit: 8
+ // git: 5
+ // linux kernel: 13
+ // eclipse.platform.ui: 7
+ max = pathFilters[0].pathRaw;
+ Hasher hasher = new Hasher(null, 0);
+ for (PathFilter pf : pathFilters) {
+ hasher.init(pf.pathRaw, pf.pathRaw.length);
+ while (hasher.hasNext()) {
+ int hash = hasher.nextHash();
+ if (hasher.hasNext())
+ prefixes.addIfAbsent(pf.pathRaw, hasher.length(), hash);
+ }
+ fullpaths.addIfAbsent(pf.pathRaw, pf.pathRaw.length,
+ hasher.getHash());
+ if (compare(max, pf.pathRaw) < 0)
+ max = pf.pathRaw;
+ }
+ }
- private Group(final PathFilter[] p) {
- paths = p;
- Arrays.sort(paths, PATH_SORT);
+ private static int compare(byte[] a, byte[] b) {
+ int i = 0;
+ while (i < a.length && i < b.length) {
+ int ba = a[i] & 0xFF;
+ int bb = b[i] & 0xFF;
+ int cmp = ba - bb;
+ if (cmp != 0)
+ return cmp;
+ ++i;
+ }
+ return a.length - b.length;
}
@Override
public boolean include(final TreeWalk walker) {
- final int n = paths.length;
- for (int i = 0;;) {
- final byte[] r = paths[i].pathRaw;
- final int cmp = walker.isPathPrefix(r, r.length);
- if (cmp == 0)
+
+ byte[] rp = walker.getRawPath();
+ Hasher hasher = new Hasher(rp, walker.getPathLength());
+ while (hasher.hasNext()) {
+ int hash = hasher.nextHash();
+ if (fullpaths.contains(rp, hasher.length(), hash))
return true;
- if (++i < n)
- continue;
- if (cmp > 0)
- throw StopWalkException.INSTANCE;
- return false;
+ if (!hasher.hasNext())
+ if (prefixes.contains(rp, hasher.length(), hash))
+ return true;
}
+
+ final int cmp = walker.isPathPrefix(max, max.length);
+ if (cmp > 0)
+ throw StopWalkException.INSTANCE;
+
+ return false;
}
@Override
public boolean shouldBeRecursive() {
- for (final PathFilter p : paths)
- if (p.shouldBeRecursive())
- return true;
- return false;
+ return !prefixes.isEmpty();
}
@Override
@@ -222,13 +257,17 @@ public class PathFilterGroup {
public String toString() {
final StringBuilder r = new StringBuilder();
r.append("FAST("); //$NON-NLS-1$
- for (int i = 0; i < paths.length; i++) {
- if (i > 0)
+ boolean first = true;
+ for (byte[] p : fullpaths.toArray()) {
+ if (!first) {
r.append(" OR "); //$NON-NLS-1$
- r.append(paths[i].toString());
+ }
+ r.append(RawParseUtils.decode(p));
+ first = false;
}
r.append(")"); //$NON-NLS-1$
return r.toString();
}
}
+
}