summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorShawn Pearce <spearce@spearce.org>2015-05-26 16:49:28 -0400
committerGerrit Code Review @ Eclipse.org <gerrit@eclipse.org>2015-05-26 16:49:30 -0400
commit2ad2d85bcda42b5f2fde3c4126f07519e2c75c79 (patch)
treed08da381cc61eddb4d3fa0717fa73117a271638c
parent5635d9e1af61c054740037aa0934fca8ef34eaa4 (diff)
parent5e57cc95854dbf84bfafe3e61791a99b4d86746e (diff)
downloadjgit-2ad2d85bcda42b5f2fde3c4126f07519e2c75c79.tar.gz
jgit-2ad2d85bcda42b5f2fde3c4126f07519e2c75c79.zip
Merge "Enable public access to SimilarityIndex scoring function"
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java52
1 files changed, 48 insertions, 4 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java
index f376b8e36e..1c40d7fcbf 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java
@@ -63,10 +63,13 @@ import org.eclipse.jgit.lib.ObjectStream;
* will not exceed 1 MiB per instance. The index starts out at a smaller size
* (closer to 2 KiB), but may grow as more distinct blocks within the scanned
* file are discovered.
+ *
+ * @since 4.0
*/
-class SimilarityIndex {
+public class SimilarityIndex {
/** A special {@link TableFullException} used in place of OutOfMemoryError. */
- private static final TableFullException TABLE_FULL_OUT_OF_MEMORY = new TableFullException();
+ public static final TableFullException
+ TABLE_FULL_OUT_OF_MEMORY = new TableFullException();
/**
* Shift to apply before storing a key.
@@ -105,6 +108,26 @@ class SimilarityIndex {
/** {@code idHash.length == 1 << idHashBits}. */
private int idHashBits;
+ /**
+ * Create a new similarity index for the given object
+ *
+ * @param obj
+ * the object to hash
+ * @return similarity index for this object
+ * @throws IOException
+ * file contents cannot be read from the repository.
+ * @throws TableFullException
+ * object hashing overflowed the storage capacity of the
+ * SimilarityIndex.
+ */
+ public static SimilarityIndex create(ObjectLoader obj) throws IOException,
+ TableFullException {
+ SimilarityIndex idx = new SimilarityIndex();
+ idx.hash(obj);
+ idx.sort();
+ return idx;
+ }
+
SimilarityIndex() {
idHashBits = 8;
idHash = new long[1 << idHashBits];
@@ -212,7 +235,27 @@ class SimilarityIndex {
Arrays.sort(idHash);
}
- int score(SimilarityIndex dst, int maxScore) {
+ /**
+ * Compute the similarity score between this index and another.
+ * <p>
+ * A region of a file is defined as a line in a text file or a fixed-size
+ * block in a binary file. To prepare an index, each region in the file is
+ * hashed; the values and counts of hashes are retained in a sorted table.
+ * Define the similarity fraction F as the the count of matching regions
+ * between the two files divided between the maximum count of regions in
+ * either file. The similarity score is F multiplied by the maxScore
+ * constant, yielding a range [0, maxScore]. It is defined as maxScore for
+ * the degenerate case of two empty files.
+ * <p>
+ * The similarity score is symmetrical; i.e. a.score(b) == b.score(a).
+ *
+ * @param dst
+ * the other index
+ * @param maxScore
+ * the score representing a 100% match
+ * @return the similarity score
+ */
+ public int score(SimilarityIndex dst, int maxScore) {
long max = Math.max(hashedCnt, dst.hashedCnt);
if (max == 0)
return maxScore;
@@ -381,7 +424,8 @@ class SimilarityIndex {
return v & MAX_COUNT;
}
- static class TableFullException extends Exception {
+ /** Thrown by {@code create()} when file is too large. */
+ public static class TableFullException extends Exception {
private static final long serialVersionUID = 1L;
}
}