diff options
author | Christian Halstrick <christian.halstrick@sap.com> | 2016-12-08 15:44:23 +0100 |
---|---|---|
committer | Matthias Sohn <matthias.sohn@sap.com> | 2016-12-13 11:28:12 +0100 |
commit | 11d24e68442e97ba8a4132d749e7e7f4ebf1ff52 (patch) | |
tree | fb97b83a923eec0b64644a2a96752fb04d4545ff | |
parent | d62130558884e0600665236ab62b20ba0f5bac45 (diff) | |
download | jgit-11d24e68442e97ba8a4132d749e7e7f4ebf1ff52.tar.gz jgit-11d24e68442e97ba8a4132d749e7e7f4ebf1ff52.zip |
Fix FileSnapshot.isModified
FileSnapshot.isModified may have reported a file to be clean although it
was actually dirty.
Imagine you have a FileSnapshot on file f. lastmodified and lastread are
both t0. Now time is t1 and you
1) modify the file
2) update the FileSnapshot of the file (lastModified=t1, lastRead=t1)
3) modify the file again
4) wait 3 seconds
5) ask the Filesnapshot whether the file is dirty or not. It erroneously
answered it's clean.
Any file which has been modified longer than 2.5 seconds ago was
reported to be clean. As the test shows that's not always correct.
The real-world problem fixed by this change is the following:
* A gerrit server using JGit to serve git repositories is processing
fetch requests while simultaneously a native git garbage collection
runs on the repo.
* At time t1 native git writes temporary files in the pack folder
setting the mtime of the pack folder to t1.
* A fetch request causes JGit to search for new packfiles and JGit
remembers this scan in a Filesnapshot on the packs folder. Since the gc
is not finished JGit doesn't see any new packfiles.
* The fetch is processed and the gc ends while the filesystem timer is
still t1. GC writes a new packfile and deletes the old packfile.
* 3 seconds later another request arrives. JGit does not yet know about
the new packfile but is also not rescanning the pack folder because it
cached that the last scan happened at time t1 and pack folder's mtime is
also t1. Now JGit will not be able to resolve any object contained in
this new pack. This behavior may be persistent if objects referenced by
the ref/meta/config branch are affected so gerrit can't read permissions
stored in the refs/meta/config branch anymore and will not allow any
pushes anymore. The pack folder will not change its mtime and therefore
no rescan will take place.
Change-Id: I3efd0ccffeb97b01207dc3e7a6b85c6b06928fad
Signed-off-by: Matthias Sohn <matthias.sohn@sap.com>
3 files changed, 103 insertions, 23 deletions
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/FileSnapshotTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/FileSnapshotTest.java index 8506753823..98a9501766 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/FileSnapshotTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/FileSnapshotTest.java @@ -42,7 +42,6 @@ */ package org.eclipse.jgit.internal.storage.file; -import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import java.io.File; @@ -98,22 +97,6 @@ public class FileSnapshotTest { } /** - * Create a file, wait long enough and verify that it has not been modified. - * 3.5 seconds mean any difference between file system timestamp and system - * clock should be significant. - * - * @throws Exception - */ - @Test - public void testOldFile() throws Exception { - File f1 = createFile("oldfile"); - waitNextSec(f1); - FileSnapshot save = FileSnapshot.save(f1); - Thread.sleep(3500); - assertFalse(save.isModified(f1)); - } - - /** * Create a file, but don't wait long enough for the difference between file * system clock and system clock to be significant. Assume the file may have * been modified. It may have been, but the clock alone cannot determine diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/ObjectDirectoryTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/ObjectDirectoryTest.java index 7f247ff0ad..87554ae6a4 100644 --- a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/ObjectDirectoryTest.java +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/file/ObjectDirectoryTest.java @@ -42,6 +42,11 @@ package org.eclipse.jgit.internal.storage.file; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.File; +import java.io.FilenameFilter; import java.util.Collection; import java.util.Collections; import java.util.concurrent.Callable; @@ -50,8 +55,12 @@ import java.util.concurrent.Executors; import java.util.concurrent.Future; import org.eclipse.jgit.junit.RepositoryTestCase; +import org.eclipse.jgit.lib.ConfigConstants; import org.eclipse.jgit.lib.Constants; import org.eclipse.jgit.lib.ObjectId; +import org.eclipse.jgit.revwalk.RevCommit; +import org.eclipse.jgit.storage.file.FileBasedConfig; +import org.junit.Assume; import org.junit.Test; public class ObjectDirectoryTest extends RepositoryTestCase { @@ -68,6 +77,100 @@ public class ObjectDirectoryTest extends RepositoryTestCase { } } + /** + * Test packfile scanning while a gc is done from the outside (different + * process or different Repository instance). This situation occurs e.g. if + * a gerrit server is serving fetch requests while native git is doing a + * garbage collection. The test shows that when core.trustfolderstat==true + * jgit may miss to detect that a new packfile was created. This situation + * is persistent until a new full rescan of the pack directory is triggered. + * + * The test works with two Repository instances working on the same disk + * location. One (db) for all write operations (creating commits, doing gc) + * and another one (receivingDB) which just reads and which in the end shows + * the bug + * + * @throws Exception + */ + @Test + public void testScanningForPackfiles() throws Exception { + ObjectId unknownID = ObjectId + .fromString("c0ffee09d0b63d694bf49bc1e6847473f42d4a8c"); + GC gc = new GC(db); + gc.setExpireAgeMillis(0); + gc.setPackExpireAgeMillis(0); + + // the default repo db is used to create the objects. The receivingDB + // repo is used to trigger gc's + try (FileRepository receivingDB = new FileRepository( + db.getDirectory())) { + // set trustfolderstat to true. If set to false the test always + // succeeds. + FileBasedConfig cfg = receivingDB.getConfig(); + cfg.setBoolean(ConfigConstants.CONFIG_CORE_SECTION, null, + ConfigConstants.CONFIG_KEY_TRUSTFOLDERSTAT, true); + cfg.save(); + + // setup a repo which has at least one pack file and trigger + // scanning of the packs directory + ObjectId id = commitFile("file.txt", "test", "master").getId(); + gc.gc(); + assertFalse(receivingDB.hasObject(unknownID)); + assertTrue(receivingDB.getObjectDatabase().hasPackedObject(id)); + + // preparations + File packsFolder = new File(receivingDB.getObjectsDirectory(), + "pack"); + // prepare creation of a temporary file in the pack folder. This + // simulates that a native git gc is happening starting to write + // temporary files but has not yet finished + File tmpFile = new File(packsFolder, "1.tmp"); + RevCommit id2 = commitFile("file.txt", "test2", "master"); + // wait until filesystem timer ticks. This raises probability that + // the next statements are executed in the same tick as the + // filesystem timer + fsTick(null); + + // create a Temp file in the packs folder and trigger a rescan of + // the packs folder. This lets receivingDB think it has scanned the + // packs folder at the current fs timestamp t1. The following gc + // will create new files which have the same timestamp t1 but this + // will not update the mtime of the packs folder. Because of that + // JGit will not rescan the packs folder later on and fails to see + // the pack file created during gc. + assertTrue(tmpFile.createNewFile()); + assertFalse(receivingDB.hasObject(unknownID)); + + // trigger a gc. This will create packfiles which have likely the + // same mtime than the packfolder + gc.gc(); + + // To deal with racy-git situations JGit's Filesnapshot class will + // report a file/folder potentially dirty if + // cachedLastReadTime-cachedLastModificationTime < 2500ms. This + // causes JGit to always rescan a file after modification. But: + // this was true only if the difference between current system time + // and cachedLastModification time was less than 2500ms. If the + // modification is more than 2500ms ago we may have reported a + // file/folder to be clean although it has not been rescanned. A + // Bug. To show the bug we sleep for more than 2500ms + Thread.sleep(2600); + + File[] ret = packsFolder.listFiles(new FilenameFilter() { + @Override + public boolean accept(File dir, String name) { + return name.endsWith(".pack"); + } + }); + assertTrue(ret.length == 1); + Assume.assumeTrue(tmpFile.lastModified() == ret[0].lastModified()); + + // all objects are in a new packfile but we will not detect it + assertFalse(receivingDB.hasObject(unknownID)); + assertTrue(receivingDB.hasObject(id2)); + } + } + private Collection<Callable<ObjectId>> blobInsertersForTheSameFanOutDir( final ObjectDirectory dir) { Callable<ObjectId> callable = new Callable<ObjectId>() { diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/FileSnapshot.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/FileSnapshot.java index 8926d79306..97f3b573d0 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/FileSnapshot.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/file/FileSnapshot.java @@ -264,12 +264,6 @@ public class FileSnapshot { return false; } - // Our lastRead flag may be old, refresh and retry - lastRead = System.currentTimeMillis(); - if (notRacyClean(lastRead)) { - return false; - } - // We last read this path too close to its last observed // modification time. We may have missed a modification. // Scan again, to ensure we still see the same state. |