123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399 |
- /*
- * Copyright (C) 2011, Google Inc.
- * and other copyright owners as documented in the project's IP log.
- *
- * This program and the accompanying materials are made available
- * under the terms of the Eclipse Distribution License v1.0 which
- * accompanies this distribution, is reproduced below, and is
- * available at http://www.eclipse.org/org/documents/edl-v10.php
- *
- * All rights reserved.
- *
- * Redistribution and use in source and binary forms, with or
- * without modification, are permitted provided that the following
- * conditions are met:
- *
- * - Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- *
- * - Redistributions in binary form must reproduce the above
- * copyright notice, this list of conditions and the following
- * disclaimer in the documentation and/or other materials provided
- * with the distribution.
- *
- * - Neither the name of the Eclipse Foundation, Inc. nor the
- * names of its contributors may be used to endorse or promote
- * products derived from this software without specific prior
- * written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
- * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
- * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
- * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
- * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
- * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
- * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
- * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
- * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
- * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
- * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
- */
-
- package org.eclipse.jgit.internal.storage.dfs;
-
- import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC;
- import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE;
- import static org.eclipse.jgit.internal.storage.pack.PackExt.BITMAP_INDEX;
- import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX;
- import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
- import static org.eclipse.jgit.lib.RefDatabase.ALL;
-
- import java.io.IOException;
- import java.util.ArrayList;
- import java.util.Collections;
- import java.util.HashSet;
- import java.util.List;
- import java.util.Map;
- import java.util.Set;
-
- import org.eclipse.jgit.internal.JGitText;
- import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource;
- import org.eclipse.jgit.internal.storage.file.PackIndex;
- import org.eclipse.jgit.internal.storage.pack.PackExt;
- import org.eclipse.jgit.internal.storage.pack.PackWriter;
- import org.eclipse.jgit.lib.AnyObjectId;
- import org.eclipse.jgit.lib.Constants;
- import org.eclipse.jgit.lib.NullProgressMonitor;
- import org.eclipse.jgit.lib.ObjectId;
- import org.eclipse.jgit.lib.ObjectIdSet;
- import org.eclipse.jgit.lib.ProgressMonitor;
- import org.eclipse.jgit.lib.Ref;
- import org.eclipse.jgit.lib.RefDatabase;
- import org.eclipse.jgit.revwalk.RevWalk;
- import org.eclipse.jgit.storage.pack.PackConfig;
- import org.eclipse.jgit.storage.pack.PackStatistics;
- import org.eclipse.jgit.util.io.CountingOutputStream;
-
- /** Repack and garbage collect a repository. */
- public class DfsGarbageCollector {
- private final DfsRepository repo;
- private final RefDatabase refdb;
- private final DfsObjDatabase objdb;
-
- private final List<DfsPackDescription> newPackDesc;
-
- private final List<PackStatistics> newPackStats;
-
- private final List<ObjectIdSet> newPackObj;
-
- private DfsReader ctx;
-
- private PackConfig packConfig;
-
- private long coalesceGarbageLimit = 50 << 20;
-
- private Map<String, Ref> refsBefore;
-
- private List<DfsPackFile> packsBefore;
-
- private Set<ObjectId> allHeads;
-
- private Set<ObjectId> nonHeads;
-
- private Set<ObjectId> tagTargets;
-
- /**
- * Initialize a garbage collector.
- *
- * @param repository
- * repository objects to be packed will be read from.
- */
- public DfsGarbageCollector(DfsRepository repository) {
- repo = repository;
- refdb = repo.getRefDatabase();
- objdb = repo.getObjectDatabase();
- newPackDesc = new ArrayList<DfsPackDescription>(4);
- newPackStats = new ArrayList<PackStatistics>(4);
- newPackObj = new ArrayList<ObjectIdSet>(4);
-
- packConfig = new PackConfig(repo);
- packConfig.setIndexVersion(2);
- }
-
- /** @return configuration used to generate the new pack file. */
- public PackConfig getPackConfig() {
- return packConfig;
- }
-
- /**
- * @param newConfig
- * the new configuration to use when creating the pack file.
- * @return {@code this}
- */
- public DfsGarbageCollector setPackConfig(PackConfig newConfig) {
- packConfig = newConfig;
- return this;
- }
-
- /** @return garbage packs smaller than this size will be repacked. */
- public long getCoalesceGarbageLimit() {
- return coalesceGarbageLimit;
- }
-
- /**
- * Set the byte size limit for garbage packs to be repacked.
- * <p>
- * Any UNREACHABLE_GARBAGE pack smaller than this limit will be repacked at
- * the end of the run. This allows the garbage collector to coalesce
- * unreachable objects into a single file.
- * <p>
- * If an UNREACHABLE_GARBAGE pack is already larger than this limit it will
- * be left alone by the garbage collector. This avoids unnecessary disk IO
- * reading and copying the objects.
- * <p>
- * If limit is set to 0 the UNREACHABLE_GARBAGE coalesce is disabled.<br>
- * If limit is set to {@link Long#MAX_VALUE}, everything is coalesced.
- * <p>
- * Keeping unreachable garbage prevents race conditions with repository
- * changes that may suddenly need an object whose only copy was stored in
- * the UNREACHABLE_GARBAGE pack.
- *
- * @param limit
- * size in bytes.
- * @return {@code this}
- */
- public DfsGarbageCollector setCoalesceGarbageLimit(long limit) {
- coalesceGarbageLimit = limit;
- return this;
- }
-
- /**
- * Create a single new pack file containing all of the live objects.
- * <p>
- * This method safely decides which packs can be expired after the new pack
- * is created by validating the references have not been modified in an
- * incompatible way.
- *
- * @param pm
- * progress monitor to receive updates on as packing may take a
- * while, depending on the size of the repository.
- * @return true if the repack was successful without race conditions. False
- * if a race condition was detected and the repack should be run
- * again later.
- * @throws IOException
- * a new pack cannot be created.
- */
- public boolean pack(ProgressMonitor pm) throws IOException {
- if (pm == null)
- pm = NullProgressMonitor.INSTANCE;
- if (packConfig.getIndexVersion() != 2)
- throw new IllegalStateException(
- JGitText.get().supportOnlyPackIndexVersion2);
-
- ctx = (DfsReader) objdb.newReader();
- try {
- refdb.refresh();
- objdb.clearCache();
-
- refsBefore = refdb.getRefs(ALL);
- packsBefore = packsToRebuild();
- if (packsBefore.isEmpty())
- return true;
-
- allHeads = new HashSet<ObjectId>();
- nonHeads = new HashSet<ObjectId>();
- tagTargets = new HashSet<ObjectId>();
- for (Ref ref : refsBefore.values()) {
- if (ref.isSymbolic() || ref.getObjectId() == null)
- continue;
- if (isHead(ref))
- allHeads.add(ref.getObjectId());
- else
- nonHeads.add(ref.getObjectId());
- if (ref.getPeeledObjectId() != null)
- tagTargets.add(ref.getPeeledObjectId());
- }
- tagTargets.addAll(allHeads);
-
- boolean rollback = true;
- try {
- packHeads(pm);
- packRest(pm);
- packGarbage(pm);
- objdb.commitPack(newPackDesc, toPrune());
- rollback = false;
- return true;
- } finally {
- if (rollback)
- objdb.rollbackPack(newPackDesc);
- }
- } finally {
- ctx.close();
- }
- }
-
- private List<DfsPackFile> packsToRebuild() throws IOException {
- DfsPackFile[] packs = objdb.getPacks();
- List<DfsPackFile> out = new ArrayList<DfsPackFile>(packs.length);
- for (DfsPackFile p : packs) {
- DfsPackDescription d = p.getPackDescription();
- if (d.getPackSource() != UNREACHABLE_GARBAGE)
- out.add(p);
- else if (d.getFileSize(PackExt.PACK) < coalesceGarbageLimit)
- out.add(p);
- }
- return out;
- }
-
- /** @return all of the source packs that fed into this compaction. */
- public List<DfsPackDescription> getSourcePacks() {
- return toPrune();
- }
-
- /** @return new packs created by this compaction. */
- public List<DfsPackDescription> getNewPacks() {
- return newPackDesc;
- }
-
- /** @return statistics corresponding to the {@link #getNewPacks()}. */
- public List<PackStatistics> getNewPackStatistics() {
- return newPackStats;
- }
-
- private List<DfsPackDescription> toPrune() {
- int cnt = packsBefore.size();
- List<DfsPackDescription> all = new ArrayList<DfsPackDescription>(cnt);
- for (DfsPackFile pack : packsBefore)
- all.add(pack.getPackDescription());
- return all;
- }
-
- private void packHeads(ProgressMonitor pm) throws IOException {
- if (allHeads.isEmpty())
- return;
-
- try (PackWriter pw = newPackWriter()) {
- pw.setTagTargets(tagTargets);
- pw.preparePack(pm, allHeads, Collections.<ObjectId> emptySet());
- if (0 < pw.getObjectCount())
- writePack(GC, pw, pm);
- }
- }
-
- private void packRest(ProgressMonitor pm) throws IOException {
- if (nonHeads.isEmpty())
- return;
-
- try (PackWriter pw = newPackWriter()) {
- for (ObjectIdSet packedObjs : newPackObj)
- pw.excludeObjects(packedObjs);
- pw.preparePack(pm, nonHeads, allHeads);
- if (0 < pw.getObjectCount())
- writePack(GC, pw, pm);
- }
- }
-
- private void packGarbage(ProgressMonitor pm) throws IOException {
- // TODO(sop) This is ugly. The garbage pack needs to be deleted.
- PackConfig cfg = new PackConfig(packConfig);
- cfg.setReuseDeltas(true);
- cfg.setReuseObjects(true);
- cfg.setDeltaCompress(false);
- cfg.setBuildBitmaps(false);
-
- try (PackWriter pw = new PackWriter(cfg, ctx);
- RevWalk pool = new RevWalk(ctx)) {
- pw.setDeltaBaseAsOffset(true);
- pw.setReuseDeltaCommits(true);
- pm.beginTask(JGitText.get().findingGarbage, objectsBefore());
- for (DfsPackFile oldPack : packsBefore) {
- PackIndex oldIdx = oldPack.getPackIndex(ctx);
- for (PackIndex.MutableEntry ent : oldIdx) {
- pm.update(1);
- ObjectId id = ent.toObjectId();
- if (pool.lookupOrNull(id) != null || anyPackHas(id))
- continue;
-
- int type = oldPack.getObjectType(ctx, ent.getOffset());
- pw.addObject(pool.lookupAny(id, type));
- }
- }
- pm.endTask();
- if (0 < pw.getObjectCount())
- writePack(UNREACHABLE_GARBAGE, pw, pm);
- }
- }
-
- private boolean anyPackHas(AnyObjectId id) {
- for (ObjectIdSet packedObjs : newPackObj)
- if (packedObjs.contains(id))
- return true;
- return false;
- }
-
- private static boolean isHead(Ref ref) {
- return ref.getName().startsWith(Constants.R_HEADS);
- }
-
- private int objectsBefore() {
- int cnt = 0;
- for (DfsPackFile p : packsBefore)
- cnt += p.getPackDescription().getObjectCount();
- return cnt;
- }
-
- private PackWriter newPackWriter() {
- PackWriter pw = new PackWriter(packConfig, ctx);
- pw.setDeltaBaseAsOffset(true);
- pw.setReuseDeltaCommits(false);
- return pw;
- }
-
- private DfsPackDescription writePack(PackSource source, PackWriter pw,
- ProgressMonitor pm) throws IOException {
- DfsOutputStream out;
- DfsPackDescription pack = repo.getObjectDatabase().newPack(source);
- newPackDesc.add(pack);
-
- out = objdb.writeFile(pack, PACK);
- try {
- pw.writePack(pm, pm, out);
- pack.addFileExt(PACK);
- } finally {
- out.close();
- }
-
- out = objdb.writeFile(pack, INDEX);
- try {
- CountingOutputStream cnt = new CountingOutputStream(out);
- pw.writeIndex(cnt);
- pack.addFileExt(INDEX);
- pack.setFileSize(INDEX, cnt.getCount());
- pack.setIndexVersion(pw.getIndexVersion());
- } finally {
- out.close();
- }
-
- if (pw.prepareBitmapIndex(pm)) {
- out = objdb.writeFile(pack, BITMAP_INDEX);
- try {
- CountingOutputStream cnt = new CountingOutputStream(out);
- pw.writeBitmapIndex(cnt);
- pack.addFileExt(BITMAP_INDEX);
- pack.setFileSize(BITMAP_INDEX, cnt.getCount());
- } finally {
- out.close();
- }
- }
-
- PackStatistics stats = pw.getStatistics();
- pack.setPackStats(stats);
- newPackStats.add(stats);
- newPackObj.add(pw.getObjectSet());
-
- DfsBlockCache.getInstance().getOrCreate(pack, null);
- return pack;
- }
- }
|