diff options
author | Shawn Pearce <spearce@spearce.org> | 2015-05-26 16:49:28 -0400 |
---|---|---|
committer | Gerrit Code Review @ Eclipse.org <gerrit@eclipse.org> | 2015-05-26 16:49:30 -0400 |
commit | 2ad2d85bcda42b5f2fde3c4126f07519e2c75c79 (patch) | |
tree | d08da381cc61eddb4d3fa0717fa73117a271638c /org.eclipse.jgit | |
parent | 5635d9e1af61c054740037aa0934fca8ef34eaa4 (diff) | |
parent | 5e57cc95854dbf84bfafe3e61791a99b4d86746e (diff) | |
download | jgit-2ad2d85bcda42b5f2fde3c4126f07519e2c75c79.tar.gz jgit-2ad2d85bcda42b5f2fde3c4126f07519e2c75c79.zip |
Merge "Enable public access to SimilarityIndex scoring function"
Diffstat (limited to 'org.eclipse.jgit')
-rw-r--r-- | org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java | 52 |
1 files changed, 48 insertions, 4 deletions
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java index f376b8e36e..1c40d7fcbf 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java @@ -63,10 +63,13 @@ import org.eclipse.jgit.lib.ObjectStream; * will not exceed 1 MiB per instance. The index starts out at a smaller size * (closer to 2 KiB), but may grow as more distinct blocks within the scanned * file are discovered. + * + * @since 4.0 */ -class SimilarityIndex { +public class SimilarityIndex { /** A special {@link TableFullException} used in place of OutOfMemoryError. */ - private static final TableFullException TABLE_FULL_OUT_OF_MEMORY = new TableFullException(); + public static final TableFullException + TABLE_FULL_OUT_OF_MEMORY = new TableFullException(); /** * Shift to apply before storing a key. @@ -105,6 +108,26 @@ class SimilarityIndex { /** {@code idHash.length == 1 << idHashBits}. */ private int idHashBits; + /** + * Create a new similarity index for the given object + * + * @param obj + * the object to hash + * @return similarity index for this object + * @throws IOException + * file contents cannot be read from the repository. + * @throws TableFullException + * object hashing overflowed the storage capacity of the + * SimilarityIndex. + */ + public static SimilarityIndex create(ObjectLoader obj) throws IOException, + TableFullException { + SimilarityIndex idx = new SimilarityIndex(); + idx.hash(obj); + idx.sort(); + return idx; + } + SimilarityIndex() { idHashBits = 8; idHash = new long[1 << idHashBits]; @@ -212,7 +235,27 @@ class SimilarityIndex { Arrays.sort(idHash); } - int score(SimilarityIndex dst, int maxScore) { + /** + * Compute the similarity score between this index and another. + * <p> + * A region of a file is defined as a line in a text file or a fixed-size + * block in a binary file. To prepare an index, each region in the file is + * hashed; the values and counts of hashes are retained in a sorted table. + * Define the similarity fraction F as the the count of matching regions + * between the two files divided between the maximum count of regions in + * either file. The similarity score is F multiplied by the maxScore + * constant, yielding a range [0, maxScore]. It is defined as maxScore for + * the degenerate case of two empty files. + * <p> + * The similarity score is symmetrical; i.e. a.score(b) == b.score(a). + * + * @param dst + * the other index + * @param maxScore + * the score representing a 100% match + * @return the similarity score + */ + public int score(SimilarityIndex dst, int maxScore) { long max = Math.max(hashedCnt, dst.hashedCnt); if (max == 0) return maxScore; @@ -381,7 +424,8 @@ class SimilarityIndex { return v & MAX_COUNT; } - static class TableFullException extends Exception { + /** Thrown by {@code create()} when file is too large. */ + public static class TableFullException extends Exception { private static final long serialVersionUID = 1L; } } |