]> source.dussan.org Git - jgit.git/commitdiff
Do not exclude objects in locked packs from bitmap processing 25/203625/21
authorLuca Milanesio <luca.milanesio@gmail.com>
Fri, 11 Aug 2023 19:15:17 +0000 (20:15 +0100)
committerMatthias Sohn <matthias.sohn@sap.com>
Thu, 12 Oct 2023 20:46:08 +0000 (22:46 +0200)
Packfiles having an equivalent .keep file are associated with in-flight
pushes that haven't been completed, with potentially a set of git
objects not yet referenced by a ref.

If the Git client is not up-to-date, it may result in pushing a
packfile, generating a <packfile>.keep on the server, which
may also contain existing commits due to the lack of Git protocol
negotiation in the git-receive-pack.

The Git protocol negotiation is the phase where the client and the
server exchange the list of refs they have for trying to find a common
base and minimise the amount of objects to be transferred.

The repack phase in GC was previously skipping all objects that were
contained in all packfiles having a <packfile>.keep file associated
(aka "locked packfiles"), which did not take into consideration the
fact that excluding the existing commits would have resulted in the
generation of an invalid bitmap file.

The code for excluding the objects in the locked packfiles was written
well before the bitmap was introduced, hence could not consider a use
case that did not exist at that time.

However, when the bitmap was introduced, the exclusion of locked
packfiles was not changed, hence creating a potential problem.
The issue went unnoticed for many years because the bitmap generation
was disabled when JGit noticed any locked packfiles; however, the
bitmaps are enabled again since  Id722e68d9f , and the the issue is now
visible and is impacting the GC repack phase.

Introduce the '--pack-kept-objects' option in GC for including the
objects contained in the locked packfiles during the repack phase,
which is not an issue because of the following:

- If there are any existing commits duplicated in the packfiles
  they will be just considered once anyway because the repack doesn't
  generate duplicates in the output packfile.

- If there are any new commits that do not have any ref pointing to
  them, they will be automatically excluded from the output repacked
  packfile.

The same identical solution is adopted in the C implementation of git
in repack.c.

Because the locked packfile is not pruned, any new commits not pointed
by any refs will remain in the repository and there will not be any
accidental pruning or object loss as it is today before this change.

As a side-effect of this change, it is now potentially possible to still
have duplicate BLOBs after GC when the keep packfile contained existing
objects. However, it is way better to keep the duplication until the
next GC phase rather than omitting existing objects from repacking and,
therefore generating an invalid bitmap and incorrect packfile.

Bug: 582292
Bug: 582455
Change-Id: Ide3445e652fcf256a7912f881cb898897c99b8f8

org.eclipse.jgit.pgm/resources/org/eclipse/jgit/pgm/internal/CLIText.properties
org.eclipse.jgit.pgm/src/org/eclipse/jgit/pgm/Gc.java
org.eclipse.jgit.ssh.apache.agent/bin/.project [new file with mode: 0644]
org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/GcKeepFilesTest.java
org.eclipse.jgit/.settings/.api_filters
org.eclipse.jgit/src/org/eclipse/jgit/api/GarbageCollectCommand.java
org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/GC.java

index 97450033ee0cdc7817570f687ad848bfdc905429..ae10af2f61abf818947f433e4d72a1b27344e584 100644 (file)
@@ -265,6 +265,7 @@ usage_MakeCacheTree=Show the current cache tree structure
 usage_Match=Only consider tags matching the given glob(7) pattern or patterns, excluding the "refs/tags/" prefix.
 usage_MergeBase=Find as good common ancestors as possible for a merge
 usage_MergesTwoDevelopmentHistories=Merges two development histories
+usage_PackKeptObjects=Include objects in packs locked by a ".keep" file when repacking
 usage_PreserveOldPacks=Preserve old pack files by moving them into the preserved subdirectory instead of deleting them after repacking
 usage_PrunePreserved=Remove the preserved subdirectory containing previously preserved old pack files before repacking, and before preserving more old pack files
 usage_ReadDirCache= Read the DirCache 100 times
index c87f0b6dcf641cd82468284b5e5ea8ae8c48d777..35ac7a162a01838934c923e3e7bc7fbc5ff7bb57 100644 (file)
@@ -27,6 +27,9 @@ class Gc extends TextBuiltin {
        @Option(name = "--prune-preserved", usage = "usage_PrunePreserved")
        private Boolean prunePreserved;
 
+       @Option(name = "--pack-kept-objects", usage = "usage_PackKeptObjects")
+       private Boolean packKeptObjects;
+
        /** {@inheritDoc} */
        @Override
        protected void run() {
@@ -40,6 +43,9 @@ class Gc extends TextBuiltin {
                        if (prunePreserved != null) {
                                command.setPrunePreserved(prunePreserved.booleanValue());
                        }
+                       if (packKeptObjects != null) {
+                               command.setPackKeptObjects(packKeptObjects.booleanValue());
+                       }
                        command.call();
                } catch (GitAPIException e) {
                        throw die(e.getMessage(), e);
diff --git a/org.eclipse.jgit.ssh.apache.agent/bin/.project b/org.eclipse.jgit.ssh.apache.agent/bin/.project
new file mode 100644 (file)
index 0000000..73358f4
--- /dev/null
@@ -0,0 +1,28 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<projectDescription>
+       <name>org.eclipse.jgit.ssh.apache.agent</name>
+       <comment></comment>
+       <projects>
+       </projects>
+       <buildSpec>
+               <buildCommand>
+                       <name>org.eclipse.jdt.core.javabuilder</name>
+                       <arguments>
+                       </arguments>
+               </buildCommand>
+               <buildCommand>
+                       <name>org.eclipse.pde.ManifestBuilder</name>
+                       <arguments>
+                       </arguments>
+               </buildCommand>
+               <buildCommand>
+                       <name>org.eclipse.pde.SchemaBuilder</name>
+                       <arguments>
+                       </arguments>
+               </buildCommand>
+       </buildSpec>
+       <natures>
+               <nature>org.eclipse.pde.PluginNature</nature>
+               <nature>org.eclipse.jdt.core.javanature</nature>
+       </natures>
+</projectDescription>
index 67848ae7eaa241be580a182e715820e4dedd6254..5919192e54e9dbefb3f19d46d3e7d9dcc17cc0c7 100644 (file)
@@ -12,6 +12,7 @@ package org.eclipse.jgit.internal.storage.file;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
 import static org.junit.Assert.assertTrue;
 
 import java.util.Iterator;
@@ -19,9 +20,12 @@ import java.util.Iterator;
 import org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry;
 import org.eclipse.jgit.internal.storage.pack.PackExt;
 import org.eclipse.jgit.junit.TestRepository.BranchBuilder;
+import org.eclipse.jgit.lib.ObjectId;
 import org.junit.Test;
 
 public class GcKeepFilesTest extends GcTestCase {
+       private static final int COMMIT_AND_TREE_OBJECTS = 2;
+
        @Test
        public void testKeepFiles() throws Exception {
                BranchBuilder bb = tr.branch("refs/heads/master");
@@ -72,4 +76,76 @@ public class GcKeepFilesTest extends GcTestCase {
                                                                + e.toObjectId(),
                                                ind2.hasObject(e.toObjectId()));
        }
+
+       @Test
+       public void testKeptObjectsAreIncluded() throws Exception {
+               BranchBuilder bb = tr.branch("refs/heads/master");
+               ObjectId commitObjectInLockedPack = bb.commit().create().toObjectId();
+               gc.gc();
+               stats = gc.getStatistics();
+               assertEquals(COMMIT_AND_TREE_OBJECTS, stats.numberOfPackedObjects);
+               assertEquals(1, stats.numberOfPackFiles);
+               assertTrue(getSinglePack().getPackFile().create(PackExt.KEEP).createNewFile());
+
+               bb.commit().create();
+               gc.setPackKeptObjects(true);
+               gc.gc();
+               stats = gc.getStatistics();
+               assertEquals(2 * COMMIT_AND_TREE_OBJECTS + 1,
+                               stats.numberOfPackedObjects);
+               assertEquals(2, stats.numberOfPackFiles);
+
+               PackIndex lockedPackIdx = null;
+               PackIndex newPackIdx = null;
+               for (Pack pack : repo.getObjectDatabase().getPacks()) {
+                       if (pack.getObjectCount() == COMMIT_AND_TREE_OBJECTS) {
+                               lockedPackIdx = pack.getIndex();
+                       } else {
+                               newPackIdx = pack.getIndex();
+                       }
+               }
+               assertNotNull(lockedPackIdx);
+               assertTrue(lockedPackIdx.hasObject(commitObjectInLockedPack));
+               assertNotNull(newPackIdx);
+               assertTrue(newPackIdx.hasObject(commitObjectInLockedPack));
+       }
+
+       @Test
+       public void testKeptObjectsAreNotIncludedByDefault() throws Exception {
+               BranchBuilder bb = tr.branch("refs/heads/master");
+               ObjectId commitObjectInLockedPack = bb.commit().create().toObjectId();
+               gc.gc();
+               stats = gc.getStatistics();
+               assertEquals(COMMIT_AND_TREE_OBJECTS, stats.numberOfPackedObjects);
+               assertEquals(1, stats.numberOfPackFiles);
+               assertTrue(getSinglePack().getPackFile().create(PackExt.KEEP).createNewFile());
+
+               bb.commit().create();
+               gc.gc();
+               stats = gc.getStatistics();
+               assertEquals(COMMIT_AND_TREE_OBJECTS + 1, stats.numberOfPackedObjects);
+               assertEquals(2, stats.numberOfPackFiles);
+
+               PackIndex lockedPackIdx = null;
+               PackIndex newPackIdx = null;
+               for (Pack pack : repo.getObjectDatabase().getPacks()) {
+                       if (pack.getObjectCount() == COMMIT_AND_TREE_OBJECTS) {
+                               lockedPackIdx = pack.getIndex();
+                       } else {
+                               newPackIdx = pack.getIndex();
+                       }
+               }
+               assertNotNull(lockedPackIdx);
+               assertTrue(lockedPackIdx.hasObject(commitObjectInLockedPack));
+               assertNotNull(newPackIdx);
+               assertFalse(newPackIdx.hasObject(commitObjectInLockedPack));
+       }
+
+       private Pack getSinglePack() {
+               Iterator<Pack> packIt = repo.getObjectDatabase().getPacks()
+                               .iterator();
+               Pack singlePack = packIt.next();
+               assertFalse(packIt.hasNext());
+               return singlePack;
+       }
 }
index 1d3917173b960a9863a9bc5a4f3c8dc45eee3133..24c6a3150fd1969b9c52f7477d0aac09f72996b8 100644 (file)
@@ -1,5 +1,13 @@
 <?xml version="1.0" encoding="UTF-8" standalone="no"?>
 <component id="org.eclipse.jgit" version="2">
+    <resource path="src/org/eclipse/jgit/api/GarbageCollectCommand.java" type="org.eclipse.jgit.api.GarbageCollectCommand">
+        <filter id="1142947843">
+            <message_arguments>
+                <message_argument value="5.13.3"/>
+                <message_argument value="setPackKeptObjects(boolean)"/>
+            </message_arguments>
+        </filter>
+    </resource>
     <resource path="src/org/eclipse/jgit/errors/PackMismatchException.java" type="org.eclipse.jgit.errors.PackMismatchException">
         <filter id="1142947843">
             <message_arguments>
index a2fbd411f60eea87d70cd4045a4ba19af917eebd..2e09d4a8e37c44394f808359b490cc0a0c026d7c 100644 (file)
@@ -62,6 +62,8 @@ public class GarbageCollectCommand extends GitCommand<Properties> {
 
        private PackConfig pconfig;
 
+       private boolean packKeptObjects;
+
        /**
         * Constructor for GarbageCollectCommand.
         *
@@ -130,6 +132,19 @@ public class GarbageCollectCommand extends GitCommand<Properties> {
                return this;
        }
 
+       /**
+        * Whether to include objects in `.keep` packs when repacking.
+        *
+        * @param packKeptObjects
+        *            whether to include objects in `.keep` files when repacking.
+        * @return this instance
+        * @since 5.13.3
+        */
+       public GarbageCollectCommand setPackKeptObjects(boolean packKeptObjects) {
+               this.packKeptObjects = packKeptObjects;
+               return this;
+       }
+
        /**
         * Whether to preserve old pack files instead of deleting them.
         *
@@ -174,6 +189,7 @@ public class GarbageCollectCommand extends GitCommand<Properties> {
                                gc.setProgressMonitor(monitor);
                                if (this.expire != null)
                                        gc.setExpire(expire);
+                               gc.setPackKeptObjects(packKeptObjects);
 
                                try {
                                        gc.gc();
index 4db922b2c123ecaa35d881d806c1bd6fa08216aa..63edfa64b919a21a1fa998aa68749ea15cdfdd2f 100644 (file)
@@ -158,6 +158,8 @@ public class GC {
 
        private Date packExpire;
 
+       private boolean packKeptObjects;
+
        private PackConfig pconfig;
 
        /**
@@ -839,8 +841,9 @@ public class GC {
                List<ObjectIdSet> excluded = new LinkedList<>();
                for (Pack p : repo.getObjectDatabase().getPacks()) {
                        checkCancelled();
-                       if (p.shouldBeKept())
+                       if (!packKeptObjects && p.shouldBeKept()) {
                                excluded.add(p.getIndex());
+                       }
                }
 
                // Don't exclude tags that are also branch tips
@@ -1307,6 +1310,15 @@ public class GC {
                }
        }
 
+       /**
+        * Define whether to include objects in `.keep` files when repacking.
+        *
+        * @param packKeptObjects Whether to include objects in `.keep` files when repacking.
+        */
+       public void setPackKeptObjects(boolean packKeptObjects) {
+               this.packKeptObjects = packKeptObjects;
+       }
+
        /**
         * A class holding statistical data for a FileRepository regarding how many
         * objects are stored as loose or packed objects