]> source.dussan.org Git - jgit.git/commitdiff
Skip detecting content renames for large files 53/177553/5
authorYoussef Elghareeb <ghareeb@google.com>
Thu, 11 Mar 2021 12:01:14 +0000 (13:01 +0100)
committerMatthias Sohn <matthias.sohn@sap.com>
Sun, 14 Mar 2021 10:38:13 +0000 (11:38 +0100)
There are two code paths for detecting renames: one on tree diffs
(using DiffFormatter#scan) and the other on single file diffs (using
DiffFormatter#format). The latter skips binary and large files
for rename detection - check [1], but the former doesn't.

This change skips content rename detection for the tree diffs case for
large files. This is essential to avoid expensive computations while
reading the file, especially for callers who don't want to pay that
cost. Content renames are those which involve files with slightly
modified content. Exact renames will still be identified.

The default threshold for file sizes is reused from
PackConfig.DEFAULT_BIG_FILE_THRESHOLD: 50 MB.

[1] https://git.eclipse.org/r/plugins/gitiles/jgit/jgit/+/232876421d067a1242e8afcaa33b9171342fee3e/org.eclipse.jgit/src/org/eclipse/jgit/diff/RawText.java#386

Change-Id: Idbc2c29bd381c6e387185204638f76fda47df41e
Signed-off-by: Youssef Elghareeb <ghareeb@google.com>
org.eclipse.jgit.test/tst/org/eclipse/jgit/diff/RenameDetectorTest.java
org.eclipse.jgit/src/org/eclipse/jgit/diff/RenameDetector.java
org.eclipse.jgit/src/org/eclipse/jgit/diff/SimilarityRenameDetector.java

index 6203feda48cf5d2345afdf262b2b0d81edd00e01..2ea3cd7ebec09de2ddd0f0155a866caa85c3587e 100644 (file)
@@ -542,6 +542,43 @@ public class RenameDetectorTest extends RepositoryTestCase {
                assertEquals(0, modify.score);
        }
 
+       @Test
+       public void testExactRename_LargeFile() throws Exception {
+               ObjectId aId = blob("blah\nblah\nfoo"); // size = 14
+
+               DiffEntry a = DiffEntry.add(PATH_A, aId);
+               DiffEntry b = DiffEntry.delete(PATH_Q, aId);
+
+               rd.add(a);
+               rd.add(b);
+
+               // Exact renames are identified for large files
+               rd.setBigFileThreshold(10);
+               List<DiffEntry> entries = rd.compute();
+               assertEquals(1, entries.size());
+               assertRename(b, a, 100, entries.get(0));
+       }
+
+       @Test
+       public void testInexactRename_LargeFile() throws Exception {
+               ObjectId aId = blob("blah\nblah\nfoo"); // size = 14
+               ObjectId bId = blob("bla\nblah\nfoo"); // size = 13
+
+               DiffEntry a = DiffEntry.add(PATH_A, aId);
+               DiffEntry b = DiffEntry.delete(PATH_Q, bId);
+
+               rd.add(a);
+               rd.add(b);
+
+               rd.setBigFileThreshold(10);
+
+               // Inexact renames are not detected for large files
+               List<DiffEntry> entries = rd.compute();
+               assertEquals(2, entries.size());
+               assertAdd(PATH_A, aId, FileMode.REGULAR_FILE, entries.get(0));
+               assertDelete(PATH_Q, bId, FileMode.REGULAR_FILE, entries.get(1));
+       }
+
        @Test
        public void testSetRenameScore_IllegalArgs() throws Exception {
                try {
@@ -634,4 +671,15 @@ public class RenameDetectorTest extends RepositoryTestCase {
                assertEquals(AbbreviatedObjectId.fromObjectId(newId), add.newId);
                assertEquals(newMode, add.newMode);
        }
+
+       private static void assertDelete(String oldName, ObjectId oldId,
+                       FileMode oldMode, DiffEntry delete) {
+               assertEquals(DiffEntry.DEV_NULL, delete.newPath);
+               assertEquals(DiffEntry.A_ZERO, delete.newId);
+               assertEquals(FileMode.MISSING, delete.newMode);
+               assertEquals(ChangeType.DELETE, delete.changeType);
+               assertEquals(oldName, delete.oldPath);
+               assertEquals(AbbreviatedObjectId.fromObjectId(oldId), delete.oldId);
+               assertEquals(oldMode, delete.oldMode);
+       }
 }
index 80e1b18291531cd8e1aad0ca869ccf5f090831b1..75784c2556603d4ada69bb59586d080dac7e6366 100644 (file)
@@ -12,6 +12,7 @@ package org.eclipse.jgit.diff;
 
 import static org.eclipse.jgit.diff.DiffEntry.Side.NEW;
 import static org.eclipse.jgit.diff.DiffEntry.Side.OLD;
+import static org.eclipse.jgit.storage.pack.PackConfig.DEFAULT_BIG_FILE_THRESHOLD;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -97,6 +98,12 @@ public class RenameDetector {
        /** Limit in the number of files to consider for renames. */
        private int renameLimit;
 
+       /**
+        * File size threshold (in bytes) for detecting renames. Files larger
+        * than this size will not be processed for renames.
+        */
+       private int bigFileThreshold = DEFAULT_BIG_FILE_THRESHOLD;
+
        /** Set if the number of adds or deletes was over the limit. */
        private boolean overRenameLimit;
 
@@ -208,6 +215,26 @@ public class RenameDetector {
                renameLimit = limit;
        }
 
+       /**
+        * Get file size threshold for detecting renames. Files larger
+        * than this size will not be processed for rename detection.
+        *
+        * @return threshold in bytes of the file size.
+        * @since 5.12
+        */
+       public int getBigFileThreshold() { return bigFileThreshold; }
+
+       /**
+        * Set the file size threshold for detecting renames. Files larger than this
+        * threshold will be skipped during rename detection computation.
+        *
+        * @param threshold file size threshold in bytes.
+        * @since 5.12
+        */
+       public void setBigFileThreshold(int threshold) {
+               this.bigFileThreshold = threshold;
+       }
+
        /**
         * Check if the detector is over the rename limit.
         * <p>
@@ -493,6 +520,7 @@ public class RenameDetector {
 
                        d = new SimilarityRenameDetector(reader, deleted, added);
                        d.setRenameScore(getRenameScore());
+                       d.setBigFileThreshold(getBigFileThreshold());
                        d.compute(pm);
                        overRenameLimit |= d.isTableOverflow();
                        deleted = d.getLeftOverSources();
index 74a11a024a9f2fef0150140cafb6818b376415c1..082f31d178b0810f851165b613f84d9ef303a815 100644 (file)
@@ -12,6 +12,7 @@ package org.eclipse.jgit.diff;
 
 import static org.eclipse.jgit.diff.DiffEntry.Side.NEW;
 import static org.eclipse.jgit.diff.DiffEntry.Side.OLD;
+import static org.eclipse.jgit.storage.pack.PackConfig.DEFAULT_BIG_FILE_THRESHOLD;
 
 import java.io.IOException;
 import java.util.ArrayList;
@@ -80,6 +81,12 @@ class SimilarityRenameDetector {
        /** Score a pair must exceed to be considered a rename. */
        private int renameScore = 60;
 
+       /**
+        * File size threshold (in bytes) for detecting renames. Files larger
+        * than this size will not be processed for renames.
+        */
+       private int bigFileThreshold = DEFAULT_BIG_FILE_THRESHOLD;
+
        /** Set if any {@link SimilarityIndex.TableFullException} occurs. */
        private boolean tableOverflow;
 
@@ -96,6 +103,10 @@ class SimilarityRenameDetector {
                renameScore = score;
        }
 
+       void setBigFileThreshold(int threshold) {
+               bigFileThreshold = threshold;
+       }
+
        void compute(ProgressMonitor pm) throws IOException, CancelledException {
                if (pm == null)
                        pm = NullProgressMonitor.INSTANCE;
@@ -253,6 +264,11 @@ class SimilarityRenameDetector {
                                        continue;
                                }
 
+                               if (max > bigFileThreshold) {
+                                       pm.update(1);
+                                       continue;
+                               }
+
                                if (s == null) {
                                        try {
                                                s = hash(OLD, srcEnt);