summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/diff/PatienceDiffIndex.java31
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextComparator.java16
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java44
3 files changed, 45 insertions, 46 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/PatienceDiffIndex.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/PatienceDiffIndex.java
index da21c483b5..27cf9252e8 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/PatienceDiffIndex.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/PatienceDiffIndex.java
@@ -91,10 +91,11 @@ final class PatienceDiffIndex<S extends Sequence> {
/** 1 past the last valid entry in {@link #pCommon}. */
private final int pEnd;
- /** Keyed by {@code cmp.hash() & tableMask} to yield an entry offset. */
+ /** Keyed by {@link #hash(HashedSequence, int)} to get an entry offset. */
private final int[] table;
- private final int tableMask;
+ /** Number of low bits to discard from a key to index {@link #table}. */
+ private final int keyShift;
// To save memory the buckets for hash chains are stored in correlated
// arrays. This permits us to get 3 values per entry, without paying
@@ -158,8 +159,9 @@ final class PatienceDiffIndex<S extends Sequence> {
this.pEnd = pCnt;
final int sz = region.getLengthB();
- table = new int[tableSize(sz)];
- tableMask = table.length - 1;
+ final int tableBits = tableBits(sz);
+ table = new int[1 << tableBits];
+ keyShift = 32 - tableBits;
// As we insert elements we preincrement so that 0 is never a
// valid entry. Therefore we have to allocate one extra space.
@@ -187,7 +189,7 @@ final class PatienceDiffIndex<S extends Sequence> {
final int end = region.endB;
int pIdx = pBegin;
SCAN: while (ptr < end) {
- final int tIdx = cmp.hash(b, ptr) & tableMask;
+ final int tIdx = hash(b, ptr);
if (pIdx < pEnd) {
final long priorRec = pCommon[pIdx];
@@ -244,7 +246,7 @@ final class PatienceDiffIndex<S extends Sequence> {
final int end = region.endA;
int pLast = pBegin - 1;
SCAN: while (ptr < end) {
- final int tIdx = cmp.hash(a, ptr) & tableMask;
+ final int tIdx = hash(a, ptr);
for (int eIdx = table[tIdx]; eIdx != 0; eIdx = next[eIdx]) {
final long rec = ptrs[eIdx];
@@ -391,6 +393,10 @@ final class PatienceDiffIndex<S extends Sequence> {
return lcs;
}
+ private int hash(HashedSequence<S> s, int idx) {
+ return (cmp.hash(s, idx) * 0x9e370001 /* mix bits */) >>> keyShift;
+ }
+
private static boolean isDuplicate(long rec) {
return (((int) rec) & DUPLICATE_MASK) != 0;
}
@@ -407,11 +413,12 @@ final class PatienceDiffIndex<S extends Sequence> {
return (int) (rec >>> B_SHIFT);
}
- private static int tableSize(final int worstCaseBlockCnt) {
- int shift = 32 - Integer.numberOfLeadingZeros(worstCaseBlockCnt);
- int sz = 1 << (shift - 1);
- if (sz < worstCaseBlockCnt)
- sz <<= 1;
- return sz;
+ private static int tableBits(final int sz) {
+ int bits = 31 - Integer.numberOfLeadingZeros(sz);
+ if (bits == 0)
+ bits = 1;
+ if (1 << bits < sz)
+ bits++;
+ return bits;
}
}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextComparator.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextComparator.java
index a1e2e6cece..f9cf376ab8 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextComparator.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawTextComparator.java
@@ -78,7 +78,7 @@ public abstract class RawTextComparator extends SequenceComparator<RawText> {
protected int hashRegion(final byte[] raw, int ptr, final int end) {
int hash = 5381;
for (; ptr < end; ptr++)
- hash = (hash << 5) ^ (raw[ptr] & 0xff);
+ hash = ((hash << 5) + hash) + (raw[ptr] & 0xff);
return hash;
}
};
@@ -128,7 +128,7 @@ public abstract class RawTextComparator extends SequenceComparator<RawText> {
for (; ptr < end; ptr++) {
byte c = raw[ptr];
if (!isWhitespace(c))
- hash = (hash << 5) ^ (c & 0xff);
+ hash = ((hash << 5) + hash) + (c & 0xff);
}
return hash;
}
@@ -163,9 +163,8 @@ public abstract class RawTextComparator extends SequenceComparator<RawText> {
protected int hashRegion(final byte[] raw, int ptr, int end) {
int hash = 5381;
ptr = trimLeadingWhitespace(raw, ptr, end);
- for (; ptr < end; ptr++) {
- hash = (hash << 5) ^ (raw[ptr] & 0xff);
- }
+ for (; ptr < end; ptr++)
+ hash = ((hash << 5) + hash) + (raw[ptr] & 0xff);
return hash;
}
};
@@ -199,9 +198,8 @@ public abstract class RawTextComparator extends SequenceComparator<RawText> {
protected int hashRegion(final byte[] raw, int ptr, int end) {
int hash = 5381;
end = trimTrailingWhitespace(raw, ptr, end);
- for (; ptr < end; ptr++) {
- hash = (hash << 5) ^ (raw[ptr] & 0xff);
- }
+ for (; ptr < end; ptr++)
+ hash = ((hash << 5) + hash) + (raw[ptr] & 0xff);
return hash;
}
};
@@ -247,7 +245,7 @@ public abstract class RawTextComparator extends SequenceComparator<RawText> {
end = trimTrailingWhitespace(raw, ptr, end);
while (ptr < end) {
byte c = raw[ptr];
- hash = (hash << 5) ^ (c & 0xff);
+ hash = ((hash << 5) + hash) + (c & 0xff);
if (isWhitespace(c))
ptr = trimLeadingWhitespace(raw, ptr, end);
else
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java
index 39bcebb4c3..6627268e48 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java
@@ -68,20 +68,13 @@ class SimilarityIndex {
/** The {@link #idHash} table stops growing at {@code 1 << MAX_HASH_BITS}. */
private static final int MAX_HASH_BITS = 17;
- /** The {@link #idHash} table will not grow bigger than this, ever. */
- private static final int MAX_HASH_SIZE = 1 << MAX_HASH_BITS;
-
- /** Prime just before {@link #MAX_HASH_SIZE}. */
- private static final int P = 131071;
-
/**
* Shift to apply before storing a key.
* <p>
* Within the 64 bit table record space, we leave the highest bit unset so
- * all values are positive, and we need {@link #MAX_HASH_BITS} bits for the
- * keys. The lower 32 bits are used to count bytes impacted.
+ * all values are positive. The lower 32 bits to count bytes.
*/
- private static final int KEY_SHIFT = 64 - 1 - MAX_HASH_BITS;
+ private static final int KEY_SHIFT = 32;
/** Total size of the file we hashed into the structure. */
private long fileSize;
@@ -100,8 +93,12 @@ class SimilarityIndex {
*/
private long[] idHash;
+ /** {@code idHash.length == 1 << idHashBits}. */
+ private int idHashBits;
+
SimilarityIndex() {
- idHash = new long[256];
+ idHashBits = 8;
+ idHash = new long[1 << idHashBits];
}
long getFileSize() {
@@ -138,7 +135,7 @@ class SimilarityIndex {
int c = raw[ptr++] & 0xff;
if (c == '\n')
break;
- hash = (hash << 5) ^ c;
+ hash = (hash << 5) + hash + c;
} while (ptr < end && ptr - start < 64);
add(hash, ptr - start);
}
@@ -166,7 +163,7 @@ class SimilarityIndex {
int c = buf[ptr++] & 0xff;
if (c == '\n')
break;
- hash = (hash << 5) ^ c;
+ hash = (hash << 5) + hash + c;
} while (n < 64 && n < remaining);
add(hash, n);
remaining -= n;
@@ -272,7 +269,8 @@ class SimilarityIndex {
}
void add(int key, int cnt) {
- key = hash(key);
+ key = (key * 0x9e370001) >>> 1; // Mix bits and ensure not negative.
+
int j = slot(key);
for (;;) {
long v = idHash[j];
@@ -298,28 +296,24 @@ class SimilarityIndex {
}
}
- private static int hash(int key) {
- // Make the key fit into our table. Since we have a maximum size
- // that we cap the table at, all keys get squashed before going
- // into the table. This prevents overflow.
- //
- return (key >>> 1) % P;
- }
-
private int slot(int key) {
- return key % idHash.length;
+ // We use 31 - idHashBits because the upper bit was already forced
+ // to be 0 and we want the remaining high bits to be used as the
+ // table slot.
+ //
+ return key >>> (31 - idHashBits);
}
private boolean shouldGrow() {
- int n = idHash.length;
- return n < MAX_HASH_SIZE && n <= idSize * 2;
+ return idHashBits < MAX_HASH_BITS && idHash.length <= idSize * 2;
}
private void grow() {
long[] oldHash = idHash;
int oldSize = idHash.length;
- idHash = new long[2 * oldSize];
+ idHashBits++;
+ idHash = new long[1 << idHashBits];
for (int i = 0; i < oldSize; i++) {
long v = oldHash[i];
if (v != 0) {