This is similar to change Idbc2c29bd that skipped detecting content renames for large files. With this change, we added a new option in RenameDetector called "skipContentRenamesForBinaryFiles", that when set, causes binary files with any slight modification to be identified as added/deleted. The default for this boolean is false, so preserving current behaviour. Change-Id: I4770b1f69c60b1037025ddd0940ba86df6047299tags/v5.12.0.202106011439-rc1
@@ -579,6 +579,57 @@ public class RenameDetectorTest extends RepositoryTestCase { | |||
assertDelete(PATH_Q, bId, FileMode.REGULAR_FILE, entries.get(1)); | |||
} | |||
@Test | |||
public void testExactRenameForBinaryFile_isIdentified() throws Exception { | |||
ObjectId aId = blob("a\nb\nc\n\0\0\0\0d\n"); | |||
DiffEntry a = DiffEntry.add(PATH_A, aId); | |||
DiffEntry b = DiffEntry.delete(PATH_Q, aId); | |||
rd.add(a); | |||
rd.add(b); | |||
List<DiffEntry> entries = rd.compute(); | |||
assertEquals(1, entries.size()); | |||
assertRename(b, a, 100, entries.get(0)); | |||
} | |||
@Test | |||
public void testInexactRenameForBinaryFile_identifiedByDefault() throws Exception { | |||
ObjectId aId = blob("a\nb\nc\n\0\0\0\0d\n"); | |||
ObjectId bId = blob("a\nb\nc\n\0\0\0d\n"); | |||
DiffEntry a = DiffEntry.add(PATH_A, aId); | |||
DiffEntry b = DiffEntry.delete(PATH_Q, bId); | |||
rd.add(a); | |||
rd.add(b); | |||
rd.setRenameScore(40); | |||
List<DiffEntry> entries = rd.compute(); | |||
assertEquals(1, entries.size()); | |||
assertRename(b, a, 50, entries.get(0)); | |||
} | |||
@Test | |||
public void testInexactRenameForBinaryFile_notIdentifiedIfSkipParameterSet() throws Exception { | |||
ObjectId aId = blob("a\nb\nc\n\0\0\0\0d\n"); | |||
ObjectId bId = blob("a\nb\nc\n\0\0\0d\n"); | |||
DiffEntry a = DiffEntry.add(PATH_A, aId); | |||
DiffEntry b = DiffEntry.delete(PATH_Q, bId); | |||
rd.add(a); | |||
rd.add(b); | |||
rd.setRenameScore(40); | |||
rd.setSkipContentRenamesForBinaryFiles(true); | |||
List<DiffEntry> entries = rd.compute(); | |||
assertEquals(2, entries.size()); | |||
assertAdd(PATH_A, aId, FileMode.REGULAR_FILE, entries.get(0)); | |||
assertDelete(PATH_Q, bId, FileMode.REGULAR_FILE, entries.get(1)); | |||
} | |||
@Test | |||
public void testSetRenameScore_IllegalArgs() throws Exception { | |||
try { |
@@ -104,6 +104,13 @@ public class RenameDetector { | |||
*/ | |||
private int bigFileThreshold = DEFAULT_BIG_FILE_THRESHOLD; | |||
/** | |||
* Skip detecting content renames for binary files. Content renames are | |||
* those that are not exact, that is with a slight content modification | |||
* between the two files. | |||
*/ | |||
private boolean skipContentRenamesForBinaryFiles = false; | |||
/** Set if the number of adds or deletes was over the limit. */ | |||
private boolean overRenameLimit; | |||
@@ -235,6 +242,26 @@ public class RenameDetector { | |||
this.bigFileThreshold = threshold; | |||
} | |||
/** | |||
* Get skipping detecting content renames for binary files. | |||
* | |||
* @return true if content renames should be skipped for binary files, false otherwise. | |||
* @since 5.12 | |||
*/ | |||
public boolean getSkipContentRenamesForBinaryFiles() { | |||
return skipContentRenamesForBinaryFiles; | |||
} | |||
/** | |||
* Sets skipping detecting content renames for binary files. | |||
* | |||
* @param value true if content renames should be skipped for binary files, false otherwise. | |||
* @since 5.12 | |||
*/ | |||
public void setSkipContentRenamesForBinaryFiles(boolean value) { | |||
this.skipContentRenamesForBinaryFiles = value; | |||
} | |||
/** | |||
* Check if the detector is over the rename limit. | |||
* <p> | |||
@@ -521,6 +548,7 @@ public class RenameDetector { | |||
d = new SimilarityRenameDetector(reader, deleted, added); | |||
d.setRenameScore(getRenameScore()); | |||
d.setBigFileThreshold(getBigFileThreshold()); | |||
d.setSkipBinaryFiles(getSkipContentRenamesForBinaryFiles()); | |||
d.compute(pm); | |||
overRenameLimit |= d.isTableOverflow(); | |||
deleted = d.getLeftOverSources(); |
@@ -102,6 +102,15 @@ public class SimilarityIndex { | |||
idGrowAt = growAt(idHashBits); | |||
} | |||
static boolean isBinary(ObjectLoader obj) throws IOException { | |||
if (obj.isLarge()) { | |||
try (ObjectStream in1 = obj.openStream()) { | |||
return RawText.isBinary(in1); | |||
} | |||
} | |||
return RawText.isBinary(obj.getCachedBytes()); | |||
} | |||
void hash(ObjectLoader obj) throws MissingObjectException, IOException, | |||
TableFullException { | |||
if (obj.isLarge()) { | |||
@@ -115,9 +124,7 @@ public class SimilarityIndex { | |||
private void hashLargeObject(ObjectLoader obj) throws IOException, | |||
TableFullException { | |||
boolean text; | |||
try (ObjectStream in1 = obj.openStream()) { | |||
text = !RawText.isBinary(in1); | |||
} | |||
text = !isBinary(obj); | |||
try (ObjectStream in2 = obj.openStream()) { | |||
hash(in2, in2.getSize(), text); |
@@ -26,6 +26,7 @@ import org.eclipse.jgit.errors.CancelledException; | |||
import org.eclipse.jgit.internal.JGitText; | |||
import org.eclipse.jgit.lib.FileMode; | |||
import org.eclipse.jgit.lib.NullProgressMonitor; | |||
import org.eclipse.jgit.lib.ObjectLoader; | |||
import org.eclipse.jgit.lib.ProgressMonitor; | |||
class SimilarityRenameDetector { | |||
@@ -87,6 +88,9 @@ class SimilarityRenameDetector { | |||
*/ | |||
private int bigFileThreshold = DEFAULT_BIG_FILE_THRESHOLD; | |||
/** Skip content renames for binary files. */ | |||
private boolean skipBinaryFiles = false; | |||
/** Set if any {@link SimilarityIndex.TableFullException} occurs. */ | |||
private boolean tableOverflow; | |||
@@ -107,6 +111,10 @@ class SimilarityRenameDetector { | |||
bigFileThreshold = threshold; | |||
} | |||
void setSkipBinaryFiles(boolean value) { | |||
skipBinaryFiles = value; | |||
} | |||
void compute(ProgressMonitor pm) throws IOException, CancelledException { | |||
if (pm == null) | |||
pm = NullProgressMonitor.INSTANCE; | |||
@@ -271,7 +279,12 @@ class SimilarityRenameDetector { | |||
if (s == null) { | |||
try { | |||
s = hash(OLD, srcEnt); | |||
ObjectLoader loader = reader.open(OLD, srcEnt); | |||
if (skipBinaryFiles && SimilarityIndex.isBinary(loader)) { | |||
pm.update(1); | |||
continue SRC; | |||
} | |||
s = hash(loader); | |||
} catch (TableFullException tableFull) { | |||
tableOverflow = true; | |||
continue SRC; | |||
@@ -280,7 +293,12 @@ class SimilarityRenameDetector { | |||
SimilarityIndex d; | |||
try { | |||
d = hash(NEW, dstEnt); | |||
ObjectLoader loader = reader.open(NEW, dstEnt); | |||
if (skipBinaryFiles && SimilarityIndex.isBinary(loader)) { | |||
pm.update(1); | |||
continue; | |||
} | |||
d = hash(loader); | |||
} catch (TableFullException tableFull) { | |||
if (dstTooLarge == null) | |||
dstTooLarge = new BitSet(dsts.size()); | |||
@@ -364,10 +382,10 @@ class SimilarityRenameDetector { | |||
return (((dirScoreLtr + dirScoreRtl) * 25) + (fileScore * 50)) / 100; | |||
} | |||
private SimilarityIndex hash(DiffEntry.Side side, DiffEntry ent) | |||
private SimilarityIndex hash(ObjectLoader objectLoader) | |||
throws IOException, TableFullException { | |||
SimilarityIndex r = new SimilarityIndex(); | |||
r.hash(reader.open(side, ent)); | |||
r.hash(objectLoader); | |||
r.sort(); | |||
return r; | |||
} |