Browse Source

Skip detecting content renames for binary files

This is similar to change Idbc2c29bd that skipped detecting content
renames for large files. With this change, we added a new option in
RenameDetector called "skipContentRenamesForBinaryFiles", that when set,
causes binary files with any slight modification to be identified as
added/deleted. The default for this boolean is false, so preserving
current behaviour.

Change-Id: I4770b1f69c60b1037025ddd0940ba86df6047299
tags/v5.12.0.202106011439-rc1
Youssef Elghareeb 3 years ago
parent
commit
1788b72d1a

+ 51
- 0
org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java View File

@@ -579,6 +579,57 @@ public class RenameDetectorTest extends RepositoryTestCase {
assertDelete(PATH_Q, bId, FileMode.REGULAR_FILE, entries.get(1));
}

@Test
public void testExactRenameForBinaryFile_isIdentified() throws Exception {
ObjectId aId = blob("a\nb\nc\n\0\0\0\0d\n");

DiffEntry a = DiffEntry.add(PATH_A, aId);
DiffEntry b = DiffEntry.delete(PATH_Q, aId);

rd.add(a);
rd.add(b);

List<DiffEntry> entries = rd.compute();
assertEquals(1, entries.size());
assertRename(b, a, 100, entries.get(0));
}

@Test
public void testInexactRenameForBinaryFile_identifiedByDefault() throws Exception {
ObjectId aId = blob("a\nb\nc\n\0\0\0\0d\n");
ObjectId bId = blob("a\nb\nc\n\0\0\0d\n");

DiffEntry a = DiffEntry.add(PATH_A, aId);
DiffEntry b = DiffEntry.delete(PATH_Q, bId);

rd.add(a);
rd.add(b);
rd.setRenameScore(40);

List<DiffEntry> entries = rd.compute();
assertEquals(1, entries.size());
assertRename(b, a, 50, entries.get(0));
}

@Test
public void testInexactRenameForBinaryFile_notIdentifiedIfSkipParameterSet() throws Exception {
ObjectId aId = blob("a\nb\nc\n\0\0\0\0d\n");
ObjectId bId = blob("a\nb\nc\n\0\0\0d\n");

DiffEntry a = DiffEntry.add(PATH_A, aId);
DiffEntry b = DiffEntry.delete(PATH_Q, bId);

rd.add(a);
rd.add(b);
rd.setRenameScore(40);
rd.setSkipContentRenamesForBinaryFiles(true);

List<DiffEntry> entries = rd.compute();
assertEquals(2, entries.size());
assertAdd(PATH_A, aId, FileMode.REGULAR_FILE, entries.get(0));
assertDelete(PATH_Q, bId, FileMode.REGULAR_FILE, entries.get(1));
}

@Test
public void testSetRenameScore_IllegalArgs() throws Exception {
try {

+ 28
- 0
org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java View File

@@ -104,6 +104,13 @@ public class RenameDetector {
*/
private int bigFileThreshold = DEFAULT_BIG_FILE_THRESHOLD;

/**
* Skip detecting content renames for binary files. Content renames are
* those that are not exact, that is with a slight content modification
* between the two files.
*/
private boolean skipContentRenamesForBinaryFiles = false;

/** Set if the number of adds or deletes was over the limit. */
private boolean overRenameLimit;

@@ -235,6 +242,26 @@ public class RenameDetector {
this.bigFileThreshold = threshold;
}

/**
* Get skipping detecting content renames for binary files.
*
* @return true if content renames should be skipped for binary files, false otherwise.
* @since 5.12
*/
public boolean getSkipContentRenamesForBinaryFiles() {
return skipContentRenamesForBinaryFiles;
}

/**
* Sets skipping detecting content renames for binary files.
*
* @param value true if content renames should be skipped for binary files, false otherwise.
* @since 5.12
*/
public void setSkipContentRenamesForBinaryFiles(boolean value) {
this.skipContentRenamesForBinaryFiles = value;
}

/**
* Check if the detector is over the rename limit.
* <p>
@@ -521,6 +548,7 @@ public class RenameDetector {
d = new SimilarityRenameDetector(reader, deleted, added);
d.setRenameScore(getRenameScore());
d.setBigFileThreshold(getBigFileThreshold());
d.setSkipBinaryFiles(getSkipContentRenamesForBinaryFiles());
d.compute(pm);
overRenameLimit |= d.isTableOverflow();
deleted = d.getLeftOverSources();

+ 10
- 3
org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityIndex.java View File

@@ -102,6 +102,15 @@ public class SimilarityIndex {
idGrowAt = growAt(idHashBits);
}

static boolean isBinary(ObjectLoader obj) throws IOException {
if (obj.isLarge()) {
try (ObjectStream in1 = obj.openStream()) {
return RawText.isBinary(in1);
}
}
return RawText.isBinary(obj.getCachedBytes());
}

void hash(ObjectLoader obj) throws MissingObjectException, IOException,
TableFullException {
if (obj.isLarge()) {
@@ -115,9 +124,7 @@ public class SimilarityIndex {
private void hashLargeObject(ObjectLoader obj) throws IOException,
TableFullException {
boolean text;
try (ObjectStream in1 = obj.openStream()) {
text = !RawText.isBinary(in1);
}
text = !isBinary(obj);

try (ObjectStream in2 = obj.openStream()) {
hash(in2, in2.getSize(), text);

+ 22
- 4
org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java View File

@@ -26,6 +26,7 @@ import org.eclipse.jgit.errors.CancelledException;
import org.eclipse.jgit.internal.JGitText;
import org.eclipse.jgit.lib.FileMode;
import org.eclipse.jgit.lib.NullProgressMonitor;
import org.eclipse.jgit.lib.ObjectLoader;
import org.eclipse.jgit.lib.ProgressMonitor;

class SimilarityRenameDetector {
@@ -87,6 +88,9 @@ class SimilarityRenameDetector {
*/
private int bigFileThreshold = DEFAULT_BIG_FILE_THRESHOLD;

/** Skip content renames for binary files. */
private boolean skipBinaryFiles = false;

/** Set if any {@link SimilarityIndex.TableFullException} occurs. */
private boolean tableOverflow;

@@ -107,6 +111,10 @@ class SimilarityRenameDetector {
bigFileThreshold = threshold;
}

void setSkipBinaryFiles(boolean value) {
skipBinaryFiles = value;
}

void compute(ProgressMonitor pm) throws IOException, CancelledException {
if (pm == null)
pm = NullProgressMonitor.INSTANCE;
@@ -271,7 +279,12 @@ class SimilarityRenameDetector {

if (s == null) {
try {
s = hash(OLD, srcEnt);
ObjectLoader loader = reader.open(OLD, srcEnt);
if (skipBinaryFiles && SimilarityIndex.isBinary(loader)) {
pm.update(1);
continue SRC;
}
s = hash(loader);
} catch (TableFullException tableFull) {
tableOverflow = true;
continue SRC;
@@ -280,7 +293,12 @@ class SimilarityRenameDetector {

SimilarityIndex d;
try {
d = hash(NEW, dstEnt);
ObjectLoader loader = reader.open(NEW, dstEnt);
if (skipBinaryFiles && SimilarityIndex.isBinary(loader)) {
pm.update(1);
continue;
}
d = hash(loader);
} catch (TableFullException tableFull) {
if (dstTooLarge == null)
dstTooLarge = new BitSet(dsts.size());
@@ -364,10 +382,10 @@ class SimilarityRenameDetector {
return (((dirScoreLtr + dirScoreRtl) * 25) + (fileScore * 50)) / 100;
}

private SimilarityIndex hash(DiffEntry.Side side, DiffEntry ent)
private SimilarityIndex hash(ObjectLoader objectLoader)
throws IOException, TableFullException {
SimilarityIndex r = new SimilarityIndex();
r.hash(reader.open(side, ent));
r.hash(objectLoader);
r.sort();
return r;
}

Loading…
Cancel
Save