aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorYoussef Elghareeb <ghareeb@google.com>2021-03-11 13:01:14 +0100
committerMatthias Sohn <matthias.sohn@sap.com>2021-03-14 11:38:13 +0100
commit4a78d911c578a6f9028d6e74b5668dfc384ef80f (patch)
tree6dc5c597adab84b5043bad901fc9aa1456bbb314
parent84ed57d2f63307437492f3a2c9b60ce7737b96e6 (diff)
downloadjgit-4a78d911c578a6f9028d6e74b5668dfc384ef80f.tar.gz
jgit-4a78d911c578a6f9028d6e74b5668dfc384ef80f.zip
Skip detecting content renames for large files
There are two code paths for detecting renames: one on tree diffs (using DiffFormatter#scan) and the other on single file diffs (using DiffFormatter#format). The latter skips binary and large files for rename detection - check [1], but the former doesn't. This change skips content rename detection for the tree diffs case for large files. This is essential to avoid expensive computations while reading the file, especially for callers who don't want to pay that cost. Content renames are those which involve files with slightly modified content. Exact renames will still be identified. The default threshold for file sizes is reused from PackConfig.DEFAULT_BIG_FILE_THRESHOLD: 50 MB. [1] https://git.eclipse.org/r/plugins/gitiles/jgit/jgit/+/232876421d067a1242e8afcaa33b9171342fee3e/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java#386 Change-Id: Idbc2c29bd381c6e387185204638f76fda47df41e Signed-off-by: Youssef Elghareeb <ghareeb@google.com>
-rw-r--r--org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java48
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java28
-rw-r--r--org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java16
3 files changed, 92 insertions, 0 deletions
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java
index 6203feda48..2ea3cd7ebe 100644
--- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java
+++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java
@@ -543,6 +543,43 @@ public class RenameDetectorTest extends RepositoryTestCase {
}
@Test
+ public void testExactRename_LargeFile() throws Exception {
+ ObjectId aId = blob("blah\nblah\nfoo"); // size = 14
+
+ DiffEntry a = DiffEntry.add(PATH_A, aId);
+ DiffEntry b = DiffEntry.delete(PATH_Q, aId);
+
+ rd.add(a);
+ rd.add(b);
+
+ // Exact renames are identified for large files
+ rd.setBigFileThreshold(10);
+ List<DiffEntry> entries = rd.compute();
+ assertEquals(1, entries.size());
+ assertRename(b, a, 100, entries.get(0));
+ }
+
+ @Test
+ public void testInexactRename_LargeFile() throws Exception {
+ ObjectId aId = blob("blah\nblah\nfoo"); // size = 14
+ ObjectId bId = blob("bla\nblah\nfoo"); // size = 13
+
+ DiffEntry a = DiffEntry.add(PATH_A, aId);
+ DiffEntry b = DiffEntry.delete(PATH_Q, bId);
+
+ rd.add(a);
+ rd.add(b);
+
+ rd.setBigFileThreshold(10);
+
+ // Inexact renames are not detected for large files
+ List<DiffEntry> entries = rd.compute();
+ assertEquals(2, entries.size());
+ assertAdd(PATH_A, aId, FileMode.REGULAR_FILE, entries.get(0));
+ assertDelete(PATH_Q, bId, FileMode.REGULAR_FILE, entries.get(1));
+ }
+
+ @Test
public void testSetRenameScore_IllegalArgs() throws Exception {
try {
rd.setRenameScore(-1);
@@ -634,4 +671,15 @@ public class RenameDetectorTest extends RepositoryTestCase {
assertEquals(AbbreviatedObjectId.fromObjectId(newId), add.newId);
assertEquals(newMode, add.newMode);
}
+
+ private static void assertDelete(String oldName, ObjectId oldId,
+ FileMode oldMode, DiffEntry delete) {
+ assertEquals(DiffEntry.DEV_NULL, delete.newPath);
+ assertEquals(DiffEntry.A_ZERO, delete.newId);
+ assertEquals(FileMode.MISSING, delete.newMode);
+ assertEquals(ChangeType.DELETE, delete.changeType);
+ assertEquals(oldName, delete.oldPath);
+ assertEquals(AbbreviatedObjectId.fromObjectId(oldId), delete.oldId);
+ assertEquals(oldMode, delete.oldMode);
+ }
}
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java
index 80e1b18291..75784c2556 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java
@@ -12,6 +12,7 @@ package org.eclipse.jgit.diff;
import static org.eclipse.jgit.diff.DiffEntry.Side.NEW;
import static org.eclipse.jgit.diff.DiffEntry.Side.OLD;
+import static org.eclipse.jgit.storage.pack.PackConfig.DEFAULT_BIG_FILE_THRESHOLD;
import java.io.IOException;
import java.util.ArrayList;
@@ -97,6 +98,12 @@ public class RenameDetector {
/** Limit in the number of files to consider for renames. */
private int renameLimit;
+ /**
+ * File size threshold (in bytes) for detecting renames. Files larger
+ * than this size will not be processed for renames.
+ */
+ private int bigFileThreshold = DEFAULT_BIG_FILE_THRESHOLD;
+
/** Set if the number of adds or deletes was over the limit. */
private boolean overRenameLimit;
@@ -209,6 +216,26 @@ public class RenameDetector {
}
/**
+ * Get file size threshold for detecting renames. Files larger
+ * than this size will not be processed for rename detection.
+ *
+ * @return threshold in bytes of the file size.
+ * @since 5.12
+ */
+ public int getBigFileThreshold() { return bigFileThreshold; }
+
+ /**
+ * Set the file size threshold for detecting renames. Files larger than this
+ * threshold will be skipped during rename detection computation.
+ *
+ * @param threshold file size threshold in bytes.
+ * @since 5.12
+ */
+ public void setBigFileThreshold(int threshold) {
+ this.bigFileThreshold = threshold;
+ }
+
+ /**
* Check if the detector is over the rename limit.
* <p>
* This method can be invoked either before or after {@code getEntries} has
@@ -493,6 +520,7 @@ public class RenameDetector {
d = new SimilarityRenameDetector(reader, deleted, added);
d.setRenameScore(getRenameScore());
+ d.setBigFileThreshold(getBigFileThreshold());
d.compute(pm);
overRenameLimit |= d.isTableOverflow();
deleted = d.getLeftOverSources();
diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java
index 74a11a024a..082f31d178 100644
--- a/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java
+++ b/org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java
@@ -12,6 +12,7 @@ package org.eclipse.jgit.diff;
import static org.eclipse.jgit.diff.DiffEntry.Side.NEW;
import static org.eclipse.jgit.diff.DiffEntry.Side.OLD;
+import static org.eclipse.jgit.storage.pack.PackConfig.DEFAULT_BIG_FILE_THRESHOLD;
import java.io.IOException;
import java.util.ArrayList;
@@ -80,6 +81,12 @@ class SimilarityRenameDetector {
/** Score a pair must exceed to be considered a rename. */
private int renameScore = 60;
+ /**
+ * File size threshold (in bytes) for detecting renames. Files larger
+ * than this size will not be processed for renames.
+ */
+ private int bigFileThreshold = DEFAULT_BIG_FILE_THRESHOLD;
+
/** Set if any {@link SimilarityIndex.TableFullException} occurs. */
private boolean tableOverflow;
@@ -96,6 +103,10 @@ class SimilarityRenameDetector {
renameScore = score;
}
+ void setBigFileThreshold(int threshold) {
+ bigFileThreshold = threshold;
+ }
+
void compute(ProgressMonitor pm) throws IOException, CancelledException {
if (pm == null)
pm = NullProgressMonitor.INSTANCE;
@@ -253,6 +264,11 @@ class SimilarityRenameDetector {
continue;
}
+ if (max > bigFileThreshold) {
+ pm.update(1);
+ continue;
+ }
+
if (s == null) {
try {
s = hash(OLD, srcEnt);