]> source.dussan.org Git - jgit.git/commitdiff
Reintroduce garbage pack coalescing when ttl > 0. 65/90465/4
authorThirumala Reddy Mutchukota <thirumala@google.com>
Mon, 6 Feb 2017 22:08:27 +0000 (14:08 -0800)
committerThirumala Reddy Mutchukota <thirumala@google.com>
Wed, 8 Feb 2017 04:34:31 +0000 (20:34 -0800)
Disabling the garbage pack coalescing when garbageTtl > 0 can result in
lot of garbage packs if they are created within the garbageTtl time.

To avoid a large number of garbage packs, re-introducing garbage pack
coalescing for the packs that are created within a single calendar day
when the garbageTtl is more than one day or one third of the garbageTtl.

Change-Id: If969716aeb55fb4fd0ff71d75f41a07638cd5a69
Signed-off-by: Thirumala Reddy Mutchukota <thirumala@google.com>
org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollectorTest.java
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsGarbageCollector.java

index 188b723e6e0e98031a9b5008063fcfc467d5aca9..771aadc841f38ca462dba3d7acb6e55294718604 100644 (file)
@@ -16,12 +16,15 @@ import java.io.IOException;
 import java.util.concurrent.TimeUnit;
 
 import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource;
+import org.eclipse.jgit.junit.MockSystemReader;
 import org.eclipse.jgit.junit.TestRepository;
 import org.eclipse.jgit.lib.AnyObjectId;
 import org.eclipse.jgit.lib.Ref;
 import org.eclipse.jgit.lib.Repository;
 import org.eclipse.jgit.revwalk.RevCommit;
 import org.eclipse.jgit.revwalk.RevWalk;
+import org.eclipse.jgit.util.SystemReader;
+import org.junit.After;
 import org.junit.Before;
 import org.junit.Test;
 
@@ -29,6 +32,7 @@ public class DfsGarbageCollectorTest {
        private TestRepository<InMemoryRepository> git;
        private InMemoryRepository repo;
        private DfsObjDatabase odb;
+       private MockSystemReader mockSystemReader;
 
        @Before
        public void setUp() throws IOException {
@@ -36,6 +40,13 @@ public class DfsGarbageCollectorTest {
                git = new TestRepository<>(new InMemoryRepository(desc));
                repo = git.getRepository();
                odb = repo.getObjectDatabase();
+               mockSystemReader = new MockSystemReader();
+               SystemReader.setInstance(mockSystemReader);
+       }
+
+       @After
+       public void tearDown() {
+               SystemReader.setInstance(null);
        }
 
        @Test
@@ -171,6 +182,58 @@ public class DfsGarbageCollectorTest {
                }
        }
 
+       @Test
+       public void testCollectionWithGarbageCoalescenceWithShortTtl()
+                       throws Exception {
+               RevCommit commit0 = commit().message("0").create();
+               RevCommit commit1 = commit().message("1").parent(commit0).create();
+               git.update("master", commit0);
+
+               // Create commits at 1 minute intervals with 1 hour ttl.
+               for (int i = 0; i < 100; i++) {
+                       mockSystemReader.tick(60);
+                       commit1 = commit().message("g" + i).parent(commit1).create();
+
+                       DfsGarbageCollector gc = new DfsGarbageCollector(repo);
+                       gc.setGarbageTtl(1, TimeUnit.HOURS);
+                       run(gc);
+
+                       // Make sure we don't have more than 4 UNREACHABLE_GARBAGE packs
+                       // because all the packs that are created in a 20 minutes interval
+                       // should be coalesced and the packs older than 60 minutes should be
+                       // removed due to ttl.
+                       int count = countPacks(UNREACHABLE_GARBAGE);
+                       assertTrue("Garbage pack count should not exceed 4, but found "
+                                       + count, count <= 4);
+               }
+       }
+
+       @Test
+       public void testCollectionWithGarbageCoalescenceWithLongTtl()
+                       throws Exception {
+               RevCommit commit0 = commit().message("0").create();
+               RevCommit commit1 = commit().message("1").parent(commit0).create();
+               git.update("master", commit0);
+
+               // Create commits at 1 hour intervals with 2 days ttl.
+               for (int i = 0; i < 100; i++) {
+                       mockSystemReader.tick(3600);
+                       commit1 = commit().message("g" + i).parent(commit1).create();
+
+                       DfsGarbageCollector gc = new DfsGarbageCollector(repo);
+                       gc.setGarbageTtl(2, TimeUnit.DAYS);
+                       run(gc);
+
+                       // Make sure we don't have more than 3 UNREACHABLE_GARBAGE packs
+                       // because all the packs that are created in a single day should
+                       // be coalesced and the packs older than 2 days should be
+                       // removed due to ttl.
+                       int count = countPacks(UNREACHABLE_GARBAGE);
+                       assertTrue("Garbage pack count should not exceed 3, but found "
+                                       + count, count <= 3);
+               }
+       }
+
        @Test
        public void testEstimateGcPackSizeInNewRepo() throws Exception {
                RevCommit commit0 = commit().message("0").create();
@@ -420,20 +483,17 @@ public class DfsGarbageCollectorTest {
                run(gc);
        }
 
-       private void gcWithTtl() throws InterruptedException, IOException {
-               // Wait for the system clock to move by at least 1 millisecond.
-               // This allows the DfsGarbageCollector to recognize the boundary.
-               long start = System.currentTimeMillis();
-               do {
-                       Thread.sleep(10);
-               } while (System.currentTimeMillis() <= start);
-
+       private void gcWithTtl() throws IOException {
+               // Move the clock forward by 1 minute and use the same as ttl.
+               mockSystemReader.tick(60);
                DfsGarbageCollector gc = new DfsGarbageCollector(repo);
-               gc.setGarbageTtl(1, TimeUnit.MILLISECONDS);
+               gc.setGarbageTtl(1, TimeUnit.MINUTES);
                run(gc);
        }
 
        private void run(DfsGarbageCollector gc) throws IOException {
+               // adjust the current time that will be used by the gc operation.
+               mockSystemReader.tick(1);
                assertTrue("gc repacked", gc.pack(null));
                odb.clearCache();
        }
index 56ff1b5c2eaca87738fd23ab76e596edcbf07396..968313254b8dd29968d991ff3aa9d33b57eac30d 100644 (file)
@@ -56,9 +56,11 @@ import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
 
 import java.io.IOException;
 import java.util.ArrayList;
+import java.util.Calendar;
 import java.util.Collection;
 import java.util.Collections;
 import java.util.EnumSet;
+import java.util.GregorianCalendar;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
@@ -82,6 +84,7 @@ import org.eclipse.jgit.lib.RefDatabase;
 import org.eclipse.jgit.revwalk.RevWalk;
 import org.eclipse.jgit.storage.pack.PackConfig;
 import org.eclipse.jgit.storage.pack.PackStatistics;
+import org.eclipse.jgit.util.SystemReader;
 import org.eclipse.jgit.util.io.CountingOutputStream;
 
 /** Repack and garbage collect a repository. */
@@ -100,7 +103,8 @@ public class DfsGarbageCollector {
 
        private PackConfig packConfig;
 
-       // See pack(), below, for how these two variables interact.
+       // See packIsCoalesceableGarbage(), below, for how these two variables
+       // interact.
        private long coalesceGarbageLimit = 50 << 20;
        private long garbageTtlMillis = TimeUnit.DAYS.toMillis(1);
 
@@ -228,14 +232,8 @@ public class DfsGarbageCollector {
                if (packConfig.getIndexVersion() != 2)
                        throw new IllegalStateException(
                                        JGitText.get().supportOnlyPackIndexVersion2);
-               if (garbageTtlMillis > 0) {
-                       // We disable coalescing because the coalescing step will keep
-                       // refreshing the UNREACHABLE_GARBAGE pack and we wouldn't
-                       // actually prune anything.
-                       coalesceGarbageLimit = 0;
-               }
 
-               startTimeMillis = System.currentTimeMillis();
+               startTimeMillis = SystemReader.getInstance().getCurrentTime();
                ctx = (DfsReader) objdb.newReader();
                try {
                        refdb.refresh();
@@ -310,14 +308,14 @@ public class DfsGarbageCollector {
                expiredGarbagePacks = new ArrayList<DfsPackFile>(packs.length);
 
                long mostRecentGC = mostRecentGC(packs);
-               long now = System.currentTimeMillis();
+               long now = SystemReader.getInstance().getCurrentTime();
                for (DfsPackFile p : packs) {
                        DfsPackDescription d = p.getPackDescription();
                        if (d.getPackSource() != UNREACHABLE_GARBAGE) {
                                packsBefore.add(p);
                        } else if (packIsExpiredGarbage(d, mostRecentGC, now)) {
                                expiredGarbagePacks.add(p);
-                       } else if (d.getFileSize(PackExt.PACK) < coalesceGarbageLimit) {
+                       } else if (packIsCoalesceableGarbage(d, now)) {
                                packsBefore.add(p);
                        }
                }
@@ -360,6 +358,68 @@ public class DfsGarbageCollector {
                                && now - d.getLastModified() >= garbageTtlMillis;
        }
 
+       private boolean packIsCoalesceableGarbage(DfsPackDescription d, long now) {
+               // An UNREACHABLE_GARBAGE pack can be coalesced if its size is less than
+               // the coalesceGarbageLimit and either garbageTtl is zero or if the pack
+               // is created in a close time interval (on a single calendar day when
+               // the garbageTtl is more than one day or one third of the garbageTtl).
+               //
+               // When the garbageTtl is more than 24 hours, garbage packs that are
+               // created within a single calendar day are coalesced together. This
+               // would make the effective ttl of the garbage pack as garbageTtl+23:59
+               // and limit the number of garbage to a maximum number of
+               // garbageTtl_in_days + 1 (assuming all of them are less than the size
+               // of coalesceGarbageLimit).
+               //
+               // When the garbageTtl is less than or equal to 24 hours, garbage packs
+               // that are created within a one third of garbageTtl are coalesced
+               // together. This would make the effective ttl of the garbage packs as
+               // garbageTtl + (garbageTtl / 3) and would limit the number of garbage
+               // packs to a maximum number of 4 (assuming all of them are less than
+               // the size of coalesceGarbageLimit).
+
+               if (d.getPackSource() != UNREACHABLE_GARBAGE
+                               || d.getFileSize(PackExt.PACK) >= coalesceGarbageLimit) {
+                       return false;
+               }
+
+               if (garbageTtlMillis == 0) {
+                       return true;
+               }
+
+               long lastModified = d.getLastModified();
+               long dayStartLastModified = dayStartInMillis(lastModified);
+               long dayStartToday = dayStartInMillis(now);
+
+               if (dayStartLastModified != dayStartToday) {
+                       return false; // this pack is not created today.
+               }
+
+               if (garbageTtlMillis > TimeUnit.DAYS.toMillis(1)) {
+                       return true; // ttl is more than one day and pack is created today.
+               }
+
+               long timeInterval = garbageTtlMillis / 3;
+               if (timeInterval == 0) {
+                       return false; // ttl is too small, don't try to coalesce.
+               }
+
+               long modifiedTimeSlot = (lastModified - dayStartLastModified) / timeInterval;
+               long presentTimeSlot = (now - dayStartToday) / timeInterval;
+               return modifiedTimeSlot == presentTimeSlot;
+       }
+
+       private static long dayStartInMillis(long timeInMillis) {
+               Calendar cal = new GregorianCalendar(
+                               SystemReader.getInstance().getTimeZone());
+               cal.setTimeInMillis(timeInMillis);
+               cal.set(Calendar.HOUR_OF_DAY, 0);
+               cal.set(Calendar.MINUTE, 0);
+               cal.set(Calendar.SECOND, 0);
+               cal.set(Calendar.MILLISECOND, 0);
+               return cal.getTimeInMillis();
+       }
+
        /** @return all of the source packs that fed into this compaction. */
        public List<DfsPackDescription> getSourcePacks() {
                return toPrune();