diff options
author | Masaya Suzuki <masayasuzuki@google.com> | 2020-01-23 16:47:40 -0800 |
---|---|---|
committer | Masaya Suzuki <masayasuzuki@google.com> | 2020-09-03 22:58:37 +0000 |
commit | 9d2055152c1315819839988e44e030a63977e93c (patch) | |
tree | 8b0bf44c5d9b66987a1ebc11b6cd39ebee9417e2 | |
parent | 957419610ad1af3791ff0c279bbc7cbadabd810d (diff) | |
download | jgit-9d2055152c1315819839988e44e030a63977e93c.tar.gz jgit-9d2055152c1315819839988e44e030a63977e93c.zip |
jgit: Add DfsBundleWriter
DfsBundleWriter writes out the entire repository to a Git bundle file.
It packs all objects included in the packfile by concatenating all pack
files. This makes the bundle creation fast and cheap. Useful for backing
up a repository as-is.
Change-Id: Iee20e4b1ab45b2a178dde8c72093c0dd83f04805
Signed-off-by: Masaya Suzuki <masayasuzuki@google.com>
4 files changed, 189 insertions, 9 deletions
diff --git a/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsBundleWriterTest.java b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsBundleWriterTest.java new file mode 100644 index 0000000000..4238ee6bf0 --- /dev/null +++ b/org.eclipse.jgit.test/tst/org/eclipse/jgit/internal/storage/dfs/DfsBundleWriterTest.java @@ -0,0 +1,85 @@ +/* + * Copyright (c) 2020, Google LLC and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.eclipse.jgit.internal.storage.dfs; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertTrue; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.util.Collections; +import java.util.Set; + +import org.eclipse.jgit.junit.TestRepository; +import org.eclipse.jgit.lib.NullProgressMonitor; +import org.eclipse.jgit.lib.Ref; +import org.eclipse.jgit.lib.Repository; +import org.eclipse.jgit.revwalk.RevCommit; +import org.eclipse.jgit.transport.FetchResult; +import org.eclipse.jgit.transport.RefSpec; +import org.eclipse.jgit.transport.TransportBundleStream; +import org.eclipse.jgit.transport.URIish; +import org.junit.Before; +import org.junit.Test; + +public class DfsBundleWriterTest { + private TestRepository<InMemoryRepository> git; + + private InMemoryRepository repo; + + @Before + public void setUp() throws IOException { + DfsRepositoryDescription desc = new DfsRepositoryDescription("test"); + git = new TestRepository<>(new InMemoryRepository(desc)); + repo = git.getRepository(); + } + + @Test + public void testRepo() throws Exception { + RevCommit commit0 = git.commit().message("0").create(); + RevCommit commit1 = git.commit().message("1").parent(commit0).create(); + git.update("master", commit1); + + RevCommit commit2 = git.commit().message("0").create(); + + byte[] bundle = makeBundle(); + try (Repository newRepo = new InMemoryRepository( + new DfsRepositoryDescription("copy"))) { + fetchFromBundle(newRepo, bundle); + Ref ref = newRepo.exactRef("refs/heads/master"); + assertNotNull(ref); + assertEquals(commit1.toObjectId(), ref.getObjectId()); + + // Unreferenced objects are included as well. + assertTrue(newRepo.getObjectDatabase().has(commit2)); + } + } + + private byte[] makeBundle() throws IOException { + ByteArrayOutputStream out = new ByteArrayOutputStream(); + DfsBundleWriter.writeEntireRepositoryAsBundle( + NullProgressMonitor.INSTANCE, out, repo); + return out.toByteArray(); + } + + private static FetchResult fetchFromBundle(Repository newRepo, + byte[] bundle) throws Exception { + URIish uri = new URIish("in-memory://"); + ByteArrayInputStream in = new ByteArrayInputStream(bundle); + RefSpec rs = new RefSpec("refs/heads/*:refs/heads/*"); + Set<RefSpec> refs = Collections.singleton(rs); + try (TransportBundleStream transport = new TransportBundleStream( + newRepo, uri, in)) { + return transport.fetch(NullProgressMonitor.INSTANCE, refs); + } + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsBundleWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsBundleWriter.java new file mode 100644 index 0000000000..736f381d78 --- /dev/null +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsBundleWriter.java @@ -0,0 +1,52 @@ +/* + * Copyright (c) 2020, Google LLC and others + * + * This program and the accompanying materials are made available under the + * terms of the Eclipse Distribution License v. 1.0 which is available at + * http://www.eclipse.org/org/documents/edl-v10.php. + * + * SPDX-License-Identifier: BSD-3-Clause + */ +package org.eclipse.jgit.internal.storage.dfs; + +import java.io.IOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.List; + +import org.eclipse.jgit.internal.storage.pack.CachedPack; +import org.eclipse.jgit.lib.ProgressMonitor; +import org.eclipse.jgit.transport.BundleWriter; + +/** Writes {@link DfsRepository} to a Git bundle. */ +public class DfsBundleWriter { + /** + * Writes the entire {@link DfsRepository} to a Git bundle. + * <p> + * This method try to avoid traversing the pack files as much as possible + * and dumps all objects as-is to a Git bundle. + * + * @param pm + * progress monitor + * @param os + * Git bundle output + * @param db + * repository + * @throws IOException + * thrown if the output stream throws one. + */ + public static void writeEntireRepositoryAsBundle(ProgressMonitor pm, + OutputStream os, DfsRepository db) throws IOException { + BundleWriter bw = new BundleWriter(db); + db.getRefDatabase().getRefs().forEach(bw::include); + List<CachedPack> packs = new ArrayList<>(); + for (DfsPackFile p : db.getObjectDatabase().getPacks()) { + packs.add(new DfsCachedPack(p)); + } + bw.addObjectsAsIs(packs); + bw.writeBundle(pm, os); + } + + private DfsBundleWriter() { + } +} diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java index 9e409490fa..3e4b5df6aa 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/pack/PackWriter.java @@ -756,6 +756,19 @@ public class PackWriter implements AutoCloseable { /** * Prepare the list of objects to be written to the pack stream. + * + * <p> + * PackWriter will concat and write out the specified packs as-is. + * + * @param c + * cached packs to be written. + */ + public void preparePack(Collection<? extends CachedPack> c) { + cachedPacks.addAll(c); + } + + /** + * Prepare the list of objects to be written to the pack stream. * <p> * Basing on these 2 sets, another set of objects to put in a pack file is * created: this set consists of all objects reachable (ancestors) from diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/transport/BundleWriter.java b/org.eclipse.jgit/src/org/eclipse/jgit/transport/BundleWriter.java index 57eed3ad2a..e1aa9d72fb 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/transport/BundleWriter.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/transport/BundleWriter.java @@ -17,12 +17,16 @@ import java.io.OutputStream; import java.io.OutputStreamWriter; import java.io.Writer; import java.text.MessageFormat; +import java.util.ArrayList; +import java.util.Collection; import java.util.HashSet; +import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; import org.eclipse.jgit.internal.JGitText; +import org.eclipse.jgit.internal.storage.pack.CachedPack; import org.eclipse.jgit.internal.storage.pack.PackWriter; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.Constants; @@ -62,6 +66,8 @@ public class BundleWriter { private final Set<ObjectId> tagTargets; + private final List<CachedPack> cachedPacks = new ArrayList<>(); + private PackConfig packConfig; private ObjectCountCallback callback; @@ -150,6 +156,25 @@ public class BundleWriter { } /** + * Add objects to the bundle file. + * + * <p> + * When this method is used, object traversal is disabled and specified pack + * files are directly saved to the Git bundle file. + * + * <p> + * Unlike {@link #include}, this doesn't affect the refs. Even if the + * objects are not reachable from any ref, they will be included in the + * bundle file. + * + * @param c + * pack to include + */ + public void addObjectsAsIs(Collection<? extends CachedPack> c) { + cachedPacks.addAll(c); + } + + /** * Assume a commit is available on the recipient's side. * <p> * In order to fetch from a bundle the recipient must have any assumed @@ -187,19 +212,24 @@ public class BundleWriter { try (PackWriter packWriter = newPackWriter()) { packWriter.setObjectCountCallback(callback); - final HashSet<ObjectId> inc = new HashSet<>(); - final HashSet<ObjectId> exc = new HashSet<>(); - inc.addAll(include.values()); - for (RevCommit r : assume) - exc.add(r.getId()); packWriter.setIndexDisabled(true); packWriter.setDeltaBaseAsOffset(true); - packWriter.setThin(!exc.isEmpty()); packWriter.setReuseValidatingObjects(false); - if (exc.isEmpty()) { - packWriter.setTagTargets(tagTargets); + if (cachedPacks.isEmpty()) { + HashSet<ObjectId> inc = new HashSet<>(); + HashSet<ObjectId> exc = new HashSet<>(); + inc.addAll(include.values()); + for (RevCommit r : assume) { + exc.add(r.getId()); + } + if (exc.isEmpty()) { + packWriter.setTagTargets(tagTargets); + } + packWriter.setThin(!exc.isEmpty()); + packWriter.preparePack(monitor, inc, exc); + } else { + packWriter.preparePack(cachedPacks); } - packWriter.preparePack(monitor, inc, exc); final Writer w = new OutputStreamWriter(os, UTF_8); w.write(TransportBundle.V2_BUNDLE_SIGNATURE); |