*/
private static final int KEY_SHIFT = 32;
+ /** Maximum value of the count field, also mask to extract the count. */
+ private static final long MAX_COUNT = (1L << KEY_SHIFT) - 1;
+
/** Total size of the file we hashed into the structure. */
private long fileSize;
return (int) ((common(dst) * maxScore) / max);
}
- int common(SimilarityIndex dst) {
+ long common(SimilarityIndex dst) {
return common(this, dst);
}
- private static int common(SimilarityIndex src, SimilarityIndex dst) {
+ private static long common(SimilarityIndex src, SimilarityIndex dst) {
int srcIdx = src.packedIndex(0);
int dstIdx = dst.packedIndex(0);
long[] srcHash = src.idHash;
return common(srcHash, srcIdx, dstHash, dstIdx);
}
- private static int common(long[] srcHash, int srcIdx, //
+ private static long common(long[] srcHash, int srcIdx, //
long[] dstHash, int dstIdx) {
if (srcIdx == srcHash.length || dstIdx == dstHash.length)
return 0;
- int common = 0;
+ long common = 0;
int srcKey = keyOf(srcHash[srcIdx]);
int dstKey = keyOf(dstHash[dstIdx]);
j = slot(key);
continue;
}
- idHash[j] = (((long) key) << KEY_SHIFT) | cnt;
+ idHash[j] = pair(key, cnt);
idSize++;
return;
} else if (keyOf(v) == key) {
- // Same key, increment the counter.
- idHash[j] = v + cnt;
+ // Same key, increment the counter. If it overflows, fail
+ // indexing to prevent the key from being impacted.
+ //
+ idHash[j] = pair(key, countOf(v) + cnt);
return;
} else if (++j >= idHash.length) {
}
}
+ private static long pair(int key, long cnt) throws TableFullException {
+ if (MAX_COUNT < cnt)
+ throw new TableFullException();
+ return (((long) key) << KEY_SHIFT) | cnt;
+ }
+
private int slot(int key) {
// We use 31 - idHashBits because the upper bit was already forced
// to be 0 and we want the remaining high bits to be used as the
return (int) (v >>> KEY_SHIFT);
}
- private static int countOf(long v) {
- return (int) v;
+ private static long countOf(long v) {
+ return v & MAX_COUNT;
}
static class TableFullException extends Exception {