The ByteArraySet is a distorted copy of ObjectIdSubclassMap.
Bug: 397185
Change-Id: Ife0cc2da6cc46431be94a6b14f64f6adbe983afb
--- /dev/null
+/*
+ * Copyright (C) 2013, Robin Rosenberg
+ * and other copyright owners as documented in the project's IP log.
+ *
+ * This program and the accompanying materials are made available
+ * under the terms of the Eclipse Distribution License v1.0 which
+ * accompanies this distribution, is reproduced below, and is
+ * available at http://www.eclipse.org/org/documents/edl-v10.php
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * - Neither the name of the Eclipse Foundation, Inc. nor the
+ * names of its contributors may be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+package org.eclipse.jgit.treewalk.filter;
+
+import static org.hamcrest.MatcherAssert.assertThat;
+import static org.hamcrest.Matchers.lessThan;
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+
+import org.eclipse.jgit.dircache.DirCache;
+import org.eclipse.jgit.dircache.DirCacheEditor;
+import org.eclipse.jgit.dircache.DirCacheEntry;
+import org.eclipse.jgit.dircache.DirCacheIterator;
+import org.eclipse.jgit.errors.IncorrectObjectTypeException;
+import org.eclipse.jgit.errors.MissingObjectException;
+import org.eclipse.jgit.lib.FileMode;
+import org.eclipse.jgit.lib.ObjectReader;
+import org.eclipse.jgit.treewalk.TreeWalk;
+import org.junit.Test;
+
+/**
+ * Performance test suite for PathFilterGroup
+ */
+public class PathFilterGroupTest2 {
+
+ private DirCache dc;
+
+ private String[] paths = new String[100000];
+
+ private TreeFilter pf;
+
+ private String data;
+
+ private long tInit;
+
+ public void setup(int pathsize, int filtersize, String[] filters) {
+ long t1 = System.nanoTime();
+ String[] seed = { "abc", "def", "ghi", "jkl", "mno", "pqr", "stu",
+ "vwx", "xyz", "\u00e5\u00e4\u00f6" };
+ dc = DirCache.newInCore();
+ DirCacheEditor ed = dc.editor();
+ int pi = 0;
+ B: for (String a : seed) {
+ for (String b : seed) {
+ for (String c : seed) {
+ for (String d : seed) {
+ for (String e : seed) {
+ if (pi >= pathsize)
+ break B;
+ String p1 = a + "/" + b + "/" + c + "/" + d + "/"
+ + e;
+ paths[pi] = p1;
+ ed.add(new DirCacheEditor.PathEdit(p1) {
+
+ @Override
+ public void apply(DirCacheEntry ent) {
+ ent.setFileMode(FileMode.REGULAR_FILE);
+ }
+ });
+ ++pi;
+ }
+ }
+ }
+ }
+ }
+ ed.finish();
+ long t2 = System.nanoTime();
+ if (filters != null)
+ pf = PathFilterGroup.createFromStrings(filters);
+ else {
+ // System.out.println(dc.getEntryCount());
+ String[] filterPaths = new String[filtersize];
+ System.arraycopy(paths, pathsize - filtersize, filterPaths, 0,
+ filtersize);
+ pf = PathFilterGroup.createFromStrings(filterPaths);
+ }
+ long t3 = System.nanoTime();
+ data = "PX\t" + (t2 - t1) / 1E9 + "\t" + (t3 - t2) / 1E9 + "\t";
+ tInit = t2-t1;
+ }
+
+ public void test(int pathSize, int filterSize)
+ throws MissingObjectException, IncorrectObjectTypeException,
+ IOException {
+ setup(pathSize, filterSize, null);
+ data += pathSize + "\t" + filterSize + "\t";
+ long t1 = System.nanoTime();
+ TreeWalk tw = new TreeWalk((ObjectReader) null);
+ tw.reset();
+ tw.addTree(new DirCacheIterator(dc));
+ tw.setFilter(pf);
+ tw.setRecursive(true);
+ int n = 0;
+ while (tw.next())
+ n++;
+ long t2 = System.nanoTime();
+ data += (t2 - t1) / 1E9;
+ System.out.println(data);
+ assertEquals(filterSize, n);
+ }
+
+ @SuppressWarnings("boxing")
+ public void test(int pathSize, int expectedWalkCount, String... filters)
+ throws MissingObjectException, IncorrectObjectTypeException,
+ IOException {
+ setup(pathSize, -1, filters);
+ data += pathSize + "\t" + filters.length + "\t";
+ long t1 = System.nanoTime();
+ TreeWalk tw = new TreeWalk((ObjectReader) null);
+ tw.reset();
+ tw.addTree(new DirCacheIterator(dc));
+ tw.setFilter(pf);
+ tw.setRecursive(true);
+ int n = 0;
+ while (tw.next())
+ n++;
+ long t2 = System.nanoTime();
+ data += (t2 - t1) / 1E9;
+ System.out.println(data);
+ assertEquals(expectedWalkCount, n);
+
+ // Walk time should be (much) faster then setup time
+ assertThat(t2 - t1, lessThan(tInit / 2));
+ }
+
+ @Test
+ public void test1_1() throws MissingObjectException,
+ IncorrectObjectTypeException, IOException {
+ test(1, 1);
+ }
+
+ @Test
+ public void test2_2() throws MissingObjectException,
+ IncorrectObjectTypeException, IOException {
+ test(2, 2);
+ }
+
+ @Test
+ public void test10_10() throws MissingObjectException,
+ IncorrectObjectTypeException, IOException {
+ test(10, 10);
+ }
+
+ @Test
+ public void test100_100() throws MissingObjectException,
+ IncorrectObjectTypeException, IOException {
+ test(100, 100);
+ }
+
+ @Test
+ public void test1000_1000() throws MissingObjectException,
+ IncorrectObjectTypeException, IOException {
+ test(1000, 1000);
+ }
+
+ @Test
+ public void test10000_10000() throws MissingObjectException,
+ IncorrectObjectTypeException, IOException {
+ test(10000, 10000);
+ }
+
+ @Test
+ public void test100000_100000() throws MissingObjectException,
+ IncorrectObjectTypeException, IOException {
+ test(100000, 100000);
+ }
+
+ @Test
+ public void test100000_10000() throws MissingObjectException,
+ IncorrectObjectTypeException, IOException {
+ test(100000, 10000);
+ }
+
+ @Test
+ public void test100000_1000() throws MissingObjectException,
+ IncorrectObjectTypeException, IOException {
+ test(100000, 1000);
+ }
+
+ @Test
+ public void test100000_100() throws MissingObjectException,
+ IncorrectObjectTypeException, IOException {
+ test(100000, 100);
+ }
+
+ @Test
+ public void test100000_10() throws MissingObjectException,
+ IncorrectObjectTypeException, IOException {
+ test(100000, 10);
+ }
+
+ @Test
+ public void test100000_1() throws MissingObjectException,
+ IncorrectObjectTypeException, IOException {
+ test(100000, 1);
+ }
+
+ @Test
+ public void test10000_1F() throws MissingObjectException,
+ IncorrectObjectTypeException, IOException {
+ test(100000, 10000, "abc");
+ }
+
+ @Test
+ public void test10000_L2() throws MissingObjectException,
+ IncorrectObjectTypeException, IOException {
+ test(100000, 20000, "abc", "def");
+ }
+
+ @Test
+ public void test10000_L10() throws MissingObjectException,
+ IncorrectObjectTypeException, IOException {
+ test(100000, 10000, "\u00e5\u00e4\u00f6");
+ }
+}
--- /dev/null
+/*
+ * Copyright (C) 2009, Google Inc.
+ * Copyright (C) 2008, Marek Zawirski <marek.zawirski@gmail.com>
+ * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org>
+ * Copyright (C) 2013, Robin Rosenberg
+ * and other copyright owners as documented in the project's IP log.
+ *
+ * This program and the accompanying materials are made available
+ * under the terms of the Eclipse Distribution License v1.0 which
+ * accompanies this distribution, is reproduced below, and is
+ * available at http://www.eclipse.org/org/documents/edl-v10.php
+ *
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or
+ * without modification, are permitted provided that the following
+ * conditions are met:
+ *
+ * - Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *
+ * - Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following
+ * disclaimer in the documentation and/or other materials provided
+ * with the distribution.
+ *
+ * - Neither the name of the Eclipse Foundation, Inc. nor the
+ * names of its contributors may be used to endorse or promote
+ * products derived from this software without specific prior
+ * written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
+ * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
+ * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
+ * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
+ * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+ * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+ * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
+ * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+ * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
+ * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+package org.eclipse.jgit.treewalk.filter;
+
+import org.eclipse.jgit.util.RawParseUtils;
+
+/**
+ * Specialized set for byte arrays, interpreted as strings for use in
+ * {@link PathFilterGroup.Group}. Most methods assume the hash is already know
+ * and therefore requires the caller to supply it beforehand. The implementation
+ * is a loose derivative of ObjectIdSubclassMap.
+ */
+class ByteArraySet {
+
+ private int size;
+
+ private int grow;
+
+ private int mask;
+
+ private byte[][] table;
+
+ /**
+ * Create an empty set.
+ *
+ * @param capacity
+ */
+ ByteArraySet(int capacity) {
+ initTable(1 << Integer.highestOneBit((capacity * 2) - 1));
+ }
+
+ private byte[] get(final byte[] toFind, int length, int hash) {
+ final int msk = mask;
+ int i = hash & msk;
+ final byte[][] tbl = table;
+ byte[] obj;
+
+ while ((obj = tbl[i]) != null) {
+ if (equals(obj, toFind, length))
+ return obj;
+ i = (i + 1) & msk;
+ }
+ return null;
+ }
+
+ private static boolean equals(byte[] a, byte[] b, int length) {
+ if (a.length < length || b.length < length)
+ return false;
+ for (int i = 0; i < length; ++i) {
+ if (a[i] != b[i])
+ return false;
+ }
+ return true;
+ }
+
+ /**
+ * Returns true if this set contains the specified array.
+ *
+ * @param toFind
+ * array to find.
+ * @param length
+ * The number of bytes in toFind that are used
+ * @param hash
+ * pre-computed hash of toFind
+ * @return true if the mapping exists for this byte array; false otherwise.
+ */
+ boolean contains(final byte[] toFind, int length, int hash) {
+ return get(toFind, length, hash) != null;
+ }
+
+ /**
+ * Store a byte array for future lookup.
+ * <p>
+ * Stores {@code newValue}, but only if it does not already exist in the
+ * set. Callers can tell if the value is new by checking the return value
+ * with reference equality:
+ *
+ * <pre>
+ * byte[] obj = ...;
+ * boolean wasNew = map.addIfAbsent(array, length, hash) == array;
+ * </pre>
+ *
+ * @param newValue
+ * the array to store.
+ * @param length
+ * The number of bytes in newValue that are used
+ * @param hash
+ * pre-computed hash of toFind
+ * @return {@code newValue} if stored, or the prior value already stored and
+ * that would have been returned had the caller used
+ * {@code get(newValue)} first.
+ */
+ byte[] addIfAbsent(final byte[] newValue, int length, int hash) {
+ final int msk = mask;
+ int i = hash & msk;
+ final byte[][] tbl = table;
+ byte[] obj;
+
+ while ((obj = tbl[i]) != null) {
+ if (equals(obj, newValue, length))
+ return obj;
+ i = (i + 1) & msk;
+ }
+
+ byte[] valueToInsert = copyIfNotSameSize(newValue, length);
+ if (++size == grow) {
+ grow();
+ insert(valueToInsert, hash);
+ } else
+ tbl[i] = valueToInsert;
+ return valueToInsert;
+ }
+
+ private static byte[] copyIfNotSameSize(byte[] newValue, int length) {
+ if (newValue.length == length)
+ return newValue;
+ byte[] ret = new byte[length];
+ System.arraycopy(newValue, 0, ret, 0, length);
+ return ret;
+ }
+
+ /**
+ * @return number of arrays in the set
+ */
+ int size() {
+ return size;
+ }
+
+ /** @return true if {@link #size()} is 0. */
+ boolean isEmpty() {
+ return size == 0;
+ }
+
+ private void insert(final byte[] newValue, int hash) {
+ final int msk = mask;
+ int j = hash & msk;
+ final byte[][] tbl = table;
+ while (tbl[j] != null)
+ j = (j + 1) & msk;
+ tbl[j] = newValue;
+ }
+
+ private Hasher hasher = new Hasher(null, 0);
+
+ private void grow() {
+ final byte[][] oldTable = table;
+ final int oldSize = table.length;
+
+ initTable(oldSize << 1);
+ for (int i = 0; i < oldSize; i++) {
+ final byte[] obj = oldTable[i];
+ if (obj != null) {
+ hasher.init(obj, obj.length);
+ insert(obj, hasher.hash());
+ }
+ }
+ }
+
+ private void initTable(int sz) {
+ if (sz < 2)
+ sz = 2;
+ grow = sz >> 1;
+ mask = sz - 1;
+ table = new byte[sz][];
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ sb.append('[');
+ for (byte[] b : table) {
+ if (b == null)
+ continue;
+ if (sb.length() > 1)
+ sb.append(" , "); //$NON-NLS-1$
+ sb.append('"');
+ sb.append(RawParseUtils.decode(b));
+ sb.append('"');
+ sb.append('(');
+ sb.append(chainlength(b));
+ sb.append(')');
+ }
+ sb.append(']');
+ return sb.toString();
+ }
+
+ private int chainlength(byte[] b) {
+ Hasher h = new Hasher(b, b.length);
+ int hash = h.hash();
+ final int msk = mask;
+ int i = hash & msk;
+ final byte[][] tbl = table;
+ byte[] obj;
+
+ int n = 0;
+ while ((obj = tbl[i]) != null) {
+ if (equals(obj, b, b.length))
+ return n;
+ i = (i + 1) & msk;
+ ++n;
+ }
+ return -1;
+ }
+
+ static class Hasher {
+ private int hash;
+
+ private int pos;
+
+ private byte[] data;
+
+ private int length;
+
+ Hasher(byte[] data, int length) {
+ init(data, length);
+ }
+
+ void init(byte[] d, int l) {
+ this.data = d;
+ this.length = l;
+ pos = 0;
+ hash = 0;
+ }
+
+ int hash() {
+ while (pos < length)
+ hash = hash * 31 + data[pos++];
+ return hash;
+ }
+
+ int nextHash() {
+ for (;;) {
+ hash = hash * 31 + data[pos];
+ ++pos;
+ if (pos == length || data[pos] == '/')
+ return hash;
+ }
+ }
+
+ int getHash() {
+ return hash;
+ }
+
+ boolean hasNext() {
+ return pos < length;
+ }
+
+ public int length() {
+ return pos;
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder();
+ for (int i = 0; i < pos; ++i)
+ sb.append((char) data[i]);
+ sb.append(" | "); //$NON-NLS-1$
+ for (int i = pos; i < length; ++i)
+ sb.append((char) data[i]);
+ return sb.toString();
+ }
+ }
+
+ byte[][] toArray() {
+ byte[][] ret = new byte[size][];
+ int i = 0;
+ for (byte[] entry : table) {
+ if (entry != null)
+ ret[i++] = entry;
+ }
+ return ret;
+ }
+
+}
package org.eclipse.jgit.treewalk.filter;
-import java.util.Arrays;
import java.util.Collection;
-import java.util.Comparator;
import org.eclipse.jgit.errors.StopWalkException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.treewalk.TreeWalk;
+import org.eclipse.jgit.treewalk.filter.ByteArraySet.Hasher;
+import org.eclipse.jgit.util.RawParseUtils;
/**
* Includes tree entries only if they match one or more configured paths.
*/
public static TreeFilter createFromStrings(final Collection<String> paths) {
if (paths.isEmpty())
- throw new IllegalArgumentException(JGitText.get().atLeastOnePathIsRequired);
+ throw new IllegalArgumentException(
+ JGitText.get().atLeastOnePathIsRequired);
final PathFilter[] p = new PathFilter[paths.size()];
int i = 0;
for (final String s : paths)
*/
public static TreeFilter create(final Collection<PathFilter> paths) {
if (paths.isEmpty())
- throw new IllegalArgumentException(JGitText.get().atLeastOnePathIsRequired);
+ throw new IllegalArgumentException(
+ JGitText.get().atLeastOnePathIsRequired);
final PathFilter[] p = new PathFilter[paths.size()];
paths.toArray(p);
return create(p);
}
static class Group extends TreeFilter {
- private static final Comparator<PathFilter> PATH_SORT = new Comparator<PathFilter>() {
- public int compare(final PathFilter o1, final PathFilter o2) {
- return o1.pathStr.compareTo(o2.pathStr);
- }
- };
- private final PathFilter[] paths;
+ private ByteArraySet fullpaths;
+
+ private ByteArraySet prefixes;
+
+ private byte[] max;
+
+ private Group(final PathFilter[] pathFilters) {
+ fullpaths = new ByteArraySet(pathFilters.length);
+ prefixes = new ByteArraySet(pathFilters.length / 5);
+ // 5 is an empirically derived ratio of #paths/#prefixes from:
+ // egit/jgit: 8
+ // git: 5
+ // linux kernel: 13
+ // eclipse.platform.ui: 7
+ max = pathFilters[0].pathRaw;
+ Hasher hasher = new Hasher(null, 0);
+ for (PathFilter pf : pathFilters) {
+ hasher.init(pf.pathRaw, pf.pathRaw.length);
+ while (hasher.hasNext()) {
+ int hash = hasher.nextHash();
+ if (hasher.hasNext())
+ prefixes.addIfAbsent(pf.pathRaw, hasher.length(), hash);
+ }
+ fullpaths.addIfAbsent(pf.pathRaw, pf.pathRaw.length,
+ hasher.getHash());
+ if (compare(max, pf.pathRaw) < 0)
+ max = pf.pathRaw;
+ }
+ }
- private Group(final PathFilter[] p) {
- paths = p;
- Arrays.sort(paths, PATH_SORT);
+ private static int compare(byte[] a, byte[] b) {
+ int i = 0;
+ while (i < a.length && i < b.length) {
+ int ba = a[i] & 0xFF;
+ int bb = b[i] & 0xFF;
+ int cmp = ba - bb;
+ if (cmp != 0)
+ return cmp;
+ ++i;
+ }
+ return a.length - b.length;
}
@Override
public boolean include(final TreeWalk walker) {
- final int n = paths.length;
- for (int i = 0;;) {
- final byte[] r = paths[i].pathRaw;
- final int cmp = walker.isPathPrefix(r, r.length);
- if (cmp == 0)
+
+ byte[] rp = walker.getRawPath();
+ Hasher hasher = new Hasher(rp, walker.getPathLength());
+ while (hasher.hasNext()) {
+ int hash = hasher.nextHash();
+ if (fullpaths.contains(rp, hasher.length(), hash))
return true;
- if (++i < n)
- continue;
- if (cmp > 0)
- throw StopWalkException.INSTANCE;
- return false;
+ if (!hasher.hasNext())
+ if (prefixes.contains(rp, hasher.length(), hash))
+ return true;
}
+
+ final int cmp = walker.isPathPrefix(max, max.length);
+ if (cmp > 0)
+ throw StopWalkException.INSTANCE;
+
+ return false;
}
@Override
public boolean shouldBeRecursive() {
- for (final PathFilter p : paths)
- if (p.shouldBeRecursive())
- return true;
- return false;
+ return !prefixes.isEmpty();
}
@Override
public String toString() {
final StringBuilder r = new StringBuilder();
r.append("FAST("); //$NON-NLS-1$
- for (int i = 0; i < paths.length; i++) {
- if (i > 0)
+ boolean first = true;
+ for (byte[] p : fullpaths.toArray()) {
+ if (!first) {
r.append(" OR "); //$NON-NLS-1$
- r.append(paths[i].toString());
+ }
+ r.append(RawParseUtils.decode(p));
+ first = false;
}
r.append(")"); //$NON-NLS-1$
return r.toString();
}
}
+
}