You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

DfsGarbageCollector.java 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406
  1. /*
  2. * Copyright (C) 2011, Google Inc.
  3. * and other copyright owners as documented in the project's IP log.
  4. *
  5. * This program and the accompanying materials are made available
  6. * under the terms of the Eclipse Distribution License v1.0 which
  7. * accompanies this distribution, is reproduced below, and is
  8. * available at http://www.eclipse.org/org/documents/edl-v10.php
  9. *
  10. * All rights reserved.
  11. *
  12. * Redistribution and use in source and binary forms, with or
  13. * without modification, are permitted provided that the following
  14. * conditions are met:
  15. *
  16. * - Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. *
  19. * - Redistributions in binary form must reproduce the above
  20. * copyright notice, this list of conditions and the following
  21. * disclaimer in the documentation and/or other materials provided
  22. * with the distribution.
  23. *
  24. * - Neither the name of the Eclipse Foundation, Inc. nor the
  25. * names of its contributors may be used to endorse or promote
  26. * products derived from this software without specific prior
  27. * written permission.
  28. *
  29. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  30. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  31. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  32. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  33. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  34. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  35. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  36. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  37. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  38. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  39. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  40. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  41. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  42. */
  43. package org.eclipse.jgit.internal.storage.dfs;
  44. import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC;
  45. import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.UNREACHABLE_GARBAGE;
  46. import static org.eclipse.jgit.internal.storage.pack.PackExt.BITMAP_INDEX;
  47. import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX;
  48. import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
  49. import static org.eclipse.jgit.lib.RefDatabase.ALL;
  50. import java.io.IOException;
  51. import java.util.ArrayList;
  52. import java.util.Collections;
  53. import java.util.HashSet;
  54. import java.util.List;
  55. import java.util.Map;
  56. import java.util.Set;
  57. import org.eclipse.jgit.internal.JGitText;
  58. import org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource;
  59. import org.eclipse.jgit.internal.storage.file.PackIndex;
  60. import org.eclipse.jgit.internal.storage.pack.PackExt;
  61. import org.eclipse.jgit.internal.storage.pack.PackWriter;
  62. import org.eclipse.jgit.lib.AnyObjectId;
  63. import org.eclipse.jgit.lib.Constants;
  64. import org.eclipse.jgit.lib.NullProgressMonitor;
  65. import org.eclipse.jgit.lib.ObjectId;
  66. import org.eclipse.jgit.lib.ObjectIdOwnerMap;
  67. import org.eclipse.jgit.lib.ProgressMonitor;
  68. import org.eclipse.jgit.lib.Ref;
  69. import org.eclipse.jgit.revwalk.RevWalk;
  70. import org.eclipse.jgit.storage.pack.PackConfig;
  71. import org.eclipse.jgit.util.io.CountingOutputStream;
  72. /** Repack and garbage collect a repository. */
  73. public class DfsGarbageCollector {
  74. private final DfsRepository repo;
  75. private final DfsRefDatabase refdb;
  76. private final DfsObjDatabase objdb;
  77. private final List<DfsPackDescription> newPackDesc;
  78. private final List<PackWriter.Statistics> newPackStats;
  79. private final List<PackWriter.ObjectIdSet> newPackObj;
  80. private DfsReader ctx;
  81. private PackConfig packConfig;
  82. private long coalesceGarbageLimit = 50 << 20;
  83. private Map<String, Ref> refsBefore;
  84. private List<DfsPackFile> packsBefore;
  85. private Set<ObjectId> allHeads;
  86. private Set<ObjectId> nonHeads;
  87. private Set<ObjectId> tagTargets;
  88. /**
  89. * Initialize a garbage collector.
  90. *
  91. * @param repository
  92. * repository objects to be packed will be read from.
  93. */
  94. public DfsGarbageCollector(DfsRepository repository) {
  95. repo = repository;
  96. refdb = repo.getRefDatabase();
  97. objdb = repo.getObjectDatabase();
  98. newPackDesc = new ArrayList<DfsPackDescription>(4);
  99. newPackStats = new ArrayList<PackWriter.Statistics>(4);
  100. newPackObj = new ArrayList<PackWriter.ObjectIdSet>(4);
  101. packConfig = new PackConfig(repo);
  102. packConfig.setIndexVersion(2);
  103. }
  104. /** @return configuration used to generate the new pack file. */
  105. public PackConfig getPackConfig() {
  106. return packConfig;
  107. }
  108. /**
  109. * @param newConfig
  110. * the new configuration to use when creating the pack file.
  111. * @return {@code this}
  112. */
  113. public DfsGarbageCollector setPackConfig(PackConfig newConfig) {
  114. packConfig = newConfig;
  115. return this;
  116. }
  117. /** @return garbage packs smaller than this size will be repacked. */
  118. public long getCoalesceGarbageLimit() {
  119. return coalesceGarbageLimit;
  120. }
  121. /**
  122. * Set the byte size limit for garbage packs to be repacked.
  123. * <p>
  124. * Any UNREACHABLE_GARBAGE pack smaller than this limit will be repacked at
  125. * the end of the run. This allows the garbage collector to coalesce
  126. * unreachable objects into a single file.
  127. * <p>
  128. * If an UNREACHABLE_GARBAGE pack is already larger than this limit it will
  129. * be left alone by the garbage collector. This avoids unnecessary disk IO
  130. * reading and copying the objects.
  131. * <p>
  132. * If limit is set to 0 the UNREACHABLE_GARBAGE coalesce is disabled.<br>
  133. * If limit is set to {@link Long#MAX_VALUE}, everything is coalesced.
  134. * <p>
  135. * Keeping unreachable garbage prevents race conditions with repository
  136. * changes that may suddenly need an object whose only copy was stored in
  137. * the UNREACHABLE_GARBAGE pack.
  138. *
  139. * @param limit
  140. * size in bytes.
  141. * @return {@code this}
  142. */
  143. public DfsGarbageCollector setCoalesceGarbageLimit(long limit) {
  144. coalesceGarbageLimit = limit;
  145. return this;
  146. }
  147. /**
  148. * Create a single new pack file containing all of the live objects.
  149. * <p>
  150. * This method safely decides which packs can be expired after the new pack
  151. * is created by validating the references have not been modified in an
  152. * incompatible way.
  153. *
  154. * @param pm
  155. * progress monitor to receive updates on as packing may take a
  156. * while, depending on the size of the repository.
  157. * @return true if the repack was successful without race conditions. False
  158. * if a race condition was detected and the repack should be run
  159. * again later.
  160. * @throws IOException
  161. * a new pack cannot be created.
  162. */
  163. public boolean pack(ProgressMonitor pm) throws IOException {
  164. if (pm == null)
  165. pm = NullProgressMonitor.INSTANCE;
  166. if (packConfig.getIndexVersion() != 2)
  167. throw new IllegalStateException(
  168. JGitText.get().supportOnlyPackIndexVersion2);
  169. ctx = (DfsReader) objdb.newReader();
  170. try {
  171. refdb.clearCache();
  172. objdb.clearCache();
  173. refsBefore = refdb.getRefs(ALL);
  174. packsBefore = packsToRebuild();
  175. if (packsBefore.isEmpty())
  176. return true;
  177. allHeads = new HashSet<ObjectId>();
  178. nonHeads = new HashSet<ObjectId>();
  179. tagTargets = new HashSet<ObjectId>();
  180. for (Ref ref : refsBefore.values()) {
  181. if (ref.isSymbolic() || ref.getObjectId() == null)
  182. continue;
  183. if (isHead(ref))
  184. allHeads.add(ref.getObjectId());
  185. else
  186. nonHeads.add(ref.getObjectId());
  187. if (ref.getPeeledObjectId() != null)
  188. tagTargets.add(ref.getPeeledObjectId());
  189. }
  190. tagTargets.addAll(allHeads);
  191. boolean rollback = true;
  192. try {
  193. packHeads(pm);
  194. packRest(pm);
  195. packGarbage(pm);
  196. objdb.commitPack(newPackDesc, toPrune());
  197. rollback = false;
  198. return true;
  199. } finally {
  200. if (rollback)
  201. objdb.rollbackPack(newPackDesc);
  202. }
  203. } finally {
  204. ctx.close();
  205. }
  206. }
  207. private List<DfsPackFile> packsToRebuild() throws IOException {
  208. DfsPackFile[] packs = objdb.getPacks();
  209. List<DfsPackFile> out = new ArrayList<DfsPackFile>(packs.length);
  210. for (DfsPackFile p : packs) {
  211. DfsPackDescription d = p.getPackDescription();
  212. if (d.getPackSource() != UNREACHABLE_GARBAGE)
  213. out.add(p);
  214. else if (d.getFileSize(PackExt.PACK) < coalesceGarbageLimit)
  215. out.add(p);
  216. }
  217. return out;
  218. }
  219. /** @return all of the source packs that fed into this compaction. */
  220. public List<DfsPackDescription> getSourcePacks() {
  221. return toPrune();
  222. }
  223. /** @return new packs created by this compaction. */
  224. public List<DfsPackDescription> getNewPacks() {
  225. return newPackDesc;
  226. }
  227. /** @return statistics corresponding to the {@link #getNewPacks()}. */
  228. public List<PackWriter.Statistics> getNewPackStatistics() {
  229. return newPackStats;
  230. }
  231. private List<DfsPackDescription> toPrune() {
  232. int cnt = packsBefore.size();
  233. List<DfsPackDescription> all = new ArrayList<DfsPackDescription>(cnt);
  234. for (DfsPackFile pack : packsBefore)
  235. all.add(pack.getPackDescription());
  236. return all;
  237. }
  238. private void packHeads(ProgressMonitor pm) throws IOException {
  239. if (allHeads.isEmpty())
  240. return;
  241. try (PackWriter pw = newPackWriter()) {
  242. pw.setTagTargets(tagTargets);
  243. pw.preparePack(pm, allHeads, Collections.<ObjectId> emptySet());
  244. if (0 < pw.getObjectCount())
  245. writePack(GC, pw, pm);
  246. }
  247. }
  248. private void packRest(ProgressMonitor pm) throws IOException {
  249. if (nonHeads.isEmpty())
  250. return;
  251. try (PackWriter pw = newPackWriter()) {
  252. for (PackWriter.ObjectIdSet packedObjs : newPackObj)
  253. pw.excludeObjects(packedObjs);
  254. pw.preparePack(pm, nonHeads, allHeads);
  255. if (0 < pw.getObjectCount())
  256. writePack(GC, pw, pm);
  257. }
  258. }
  259. private void packGarbage(ProgressMonitor pm) throws IOException {
  260. // TODO(sop) This is ugly. The garbage pack needs to be deleted.
  261. PackConfig cfg = new PackConfig(packConfig);
  262. cfg.setReuseDeltas(true);
  263. cfg.setReuseObjects(true);
  264. cfg.setDeltaCompress(false);
  265. cfg.setBuildBitmaps(false);
  266. try (PackWriter pw = new PackWriter(cfg, ctx);
  267. RevWalk pool = new RevWalk(ctx)) {
  268. pw.setDeltaBaseAsOffset(true);
  269. pw.setReuseDeltaCommits(true);
  270. pm.beginTask(JGitText.get().findingGarbage, objectsBefore());
  271. for (DfsPackFile oldPack : packsBefore) {
  272. PackIndex oldIdx = oldPack.getPackIndex(ctx);
  273. for (PackIndex.MutableEntry ent : oldIdx) {
  274. pm.update(1);
  275. ObjectId id = ent.toObjectId();
  276. if (pool.lookupOrNull(id) != null || anyPackHas(id))
  277. continue;
  278. int type = oldPack.getObjectType(ctx, ent.getOffset());
  279. pw.addObject(pool.lookupAny(id, type));
  280. }
  281. }
  282. pm.endTask();
  283. if (0 < pw.getObjectCount())
  284. writePack(UNREACHABLE_GARBAGE, pw, pm);
  285. }
  286. }
  287. private boolean anyPackHas(AnyObjectId id) {
  288. for (PackWriter.ObjectIdSet packedObjs : newPackObj)
  289. if (packedObjs.contains(id))
  290. return true;
  291. return false;
  292. }
  293. private static boolean isHead(Ref ref) {
  294. return ref.getName().startsWith(Constants.R_HEADS);
  295. }
  296. private int objectsBefore() {
  297. int cnt = 0;
  298. for (DfsPackFile p : packsBefore)
  299. cnt += p.getPackDescription().getObjectCount();
  300. return cnt;
  301. }
  302. private PackWriter newPackWriter() {
  303. PackWriter pw = new PackWriter(packConfig, ctx);
  304. pw.setDeltaBaseAsOffset(true);
  305. pw.setReuseDeltaCommits(false);
  306. return pw;
  307. }
  308. private DfsPackDescription writePack(PackSource source, PackWriter pw,
  309. ProgressMonitor pm) throws IOException {
  310. DfsOutputStream out;
  311. DfsPackDescription pack = repo.getObjectDatabase().newPack(source);
  312. newPackDesc.add(pack);
  313. out = objdb.writeFile(pack, PACK);
  314. try {
  315. pw.writePack(pm, pm, out);
  316. pack.addFileExt(PACK);
  317. } finally {
  318. out.close();
  319. }
  320. out = objdb.writeFile(pack, INDEX);
  321. try {
  322. CountingOutputStream cnt = new CountingOutputStream(out);
  323. pw.writeIndex(cnt);
  324. pack.addFileExt(INDEX);
  325. pack.setFileSize(INDEX, cnt.getCount());
  326. pack.setIndexVersion(pw.getIndexVersion());
  327. } finally {
  328. out.close();
  329. }
  330. if (pw.prepareBitmapIndex(pm)) {
  331. out = objdb.writeFile(pack, BITMAP_INDEX);
  332. try {
  333. CountingOutputStream cnt = new CountingOutputStream(out);
  334. pw.writeBitmapIndex(cnt);
  335. pack.addFileExt(BITMAP_INDEX);
  336. pack.setFileSize(BITMAP_INDEX, cnt.getCount());
  337. } finally {
  338. out.close();
  339. }
  340. }
  341. final ObjectIdOwnerMap<ObjectIdOwnerMap.Entry> packedObjs = pw
  342. .getObjectSet();
  343. newPackObj.add(new PackWriter.ObjectIdSet() {
  344. public boolean contains(AnyObjectId objectId) {
  345. return packedObjs.contains(objectId);
  346. }
  347. });
  348. PackWriter.Statistics stats = pw.getStatistics();
  349. pack.setPackStats(stats);
  350. newPackStats.add(stats);
  351. DfsBlockCache.getInstance().getOrCreate(pack, null);
  352. return pack;
  353. }
  354. }