You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

DfsPackCompactor.java 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391
  1. /*
  2. * Copyright (C) 2011, Google Inc.
  3. * and other copyright owners as documented in the project's IP log.
  4. *
  5. * This program and the accompanying materials are made available
  6. * under the terms of the Eclipse Distribution License v1.0 which
  7. * accompanies this distribution, is reproduced below, and is
  8. * available at http://www.eclipse.org/org/documents/edl-v10.php
  9. *
  10. * All rights reserved.
  11. *
  12. * Redistribution and use in source and binary forms, with or
  13. * without modification, are permitted provided that the following
  14. * conditions are met:
  15. *
  16. * - Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. *
  19. * - Redistributions in binary form must reproduce the above
  20. * copyright notice, this list of conditions and the following
  21. * disclaimer in the documentation and/or other materials provided
  22. * with the distribution.
  23. *
  24. * - Neither the name of the Eclipse Foundation, Inc. nor the
  25. * names of its contributors may be used to endorse or promote
  26. * products derived from this software without specific prior
  27. * written permission.
  28. *
  29. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  30. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  31. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  32. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  33. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  34. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  35. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  36. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  37. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  38. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  39. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  40. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  41. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  42. */
  43. package org.eclipse.jgit.internal.storage.dfs;
  44. import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.COMPACT;
  45. import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX;
  46. import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
  47. import static org.eclipse.jgit.internal.storage.pack.StoredObjectRepresentation.PACK_DELTA;
  48. import java.io.IOException;
  49. import java.util.ArrayList;
  50. import java.util.Collections;
  51. import java.util.Comparator;
  52. import java.util.List;
  53. import org.eclipse.jgit.errors.IncorrectObjectTypeException;
  54. import org.eclipse.jgit.internal.JGitText;
  55. import org.eclipse.jgit.internal.storage.file.PackIndex;
  56. import org.eclipse.jgit.internal.storage.file.PackReverseIndex;
  57. import org.eclipse.jgit.internal.storage.pack.PackWriter;
  58. import org.eclipse.jgit.lib.AnyObjectId;
  59. import org.eclipse.jgit.lib.NullProgressMonitor;
  60. import org.eclipse.jgit.lib.ObjectId;
  61. import org.eclipse.jgit.lib.ObjectIdSet;
  62. import org.eclipse.jgit.lib.ProgressMonitor;
  63. import org.eclipse.jgit.revwalk.RevFlag;
  64. import org.eclipse.jgit.revwalk.RevObject;
  65. import org.eclipse.jgit.revwalk.RevWalk;
  66. import org.eclipse.jgit.storage.pack.PackConfig;
  67. import org.eclipse.jgit.storage.pack.PackStatistics;
  68. import org.eclipse.jgit.util.BlockList;
  69. import org.eclipse.jgit.util.io.CountingOutputStream;
  70. /**
  71. * Combine several pack files into one pack.
  72. * <p>
  73. * The compactor combines several pack files together by including all objects
  74. * contained in each pack file into the same output pack. If an object appears
  75. * multiple times, it is only included once in the result. Because the new pack
  76. * is constructed by enumerating the indexes of the source packs, it is quicker
  77. * than doing a full repack of the repository, however the result is not nearly
  78. * as space efficient as new delta compression is disabled.
  79. * <p>
  80. * This method is suitable for quickly combining several packs together after
  81. * receiving a number of small fetch or push operations into a repository,
  82. * allowing the system to maintain reasonable read performance without expending
  83. * a lot of time repacking the entire repository.
  84. */
  85. public class DfsPackCompactor {
  86. private final DfsRepository repo;
  87. private final List<DfsPackFile> srcPacks;
  88. private final List<ObjectIdSet> exclude;
  89. private final List<DfsPackDescription> newPacks;
  90. private final List<PackStatistics> newStats;
  91. private int autoAddSize;
  92. private RevWalk rw;
  93. private RevFlag added;
  94. private RevFlag isBase;
  95. /**
  96. * Initialize a pack compactor.
  97. *
  98. * @param repository
  99. * repository objects to be packed will be read from.
  100. */
  101. public DfsPackCompactor(DfsRepository repository) {
  102. repo = repository;
  103. autoAddSize = 5 * 1024 * 1024; // 5 MiB
  104. srcPacks = new ArrayList<DfsPackFile>();
  105. exclude = new ArrayList<ObjectIdSet>(4);
  106. newPacks = new ArrayList<DfsPackDescription>(1);
  107. newStats = new ArrayList<PackStatistics>(1);
  108. }
  109. /**
  110. * Add a pack to be compacted.
  111. * <p>
  112. * All of the objects in this pack will be copied into the resulting pack.
  113. * The resulting pack will order objects according to the source pack's own
  114. * description ordering (which is based on creation date), and then by the
  115. * order the objects appear in the source pack.
  116. *
  117. * @param pack
  118. * a pack to combine into the resulting pack.
  119. * @return {@code this}
  120. */
  121. public DfsPackCompactor add(DfsPackFile pack) {
  122. srcPacks.add(pack);
  123. return this;
  124. }
  125. /**
  126. * Automatically select packs to be included, and add them.
  127. * <p>
  128. * Packs are selected based on size, smaller packs get included while bigger
  129. * ones are omitted.
  130. *
  131. * @return {@code this}
  132. * @throws IOException
  133. * existing packs cannot be read.
  134. */
  135. public DfsPackCompactor autoAdd() throws IOException {
  136. DfsObjDatabase objdb = repo.getObjectDatabase();
  137. for (DfsPackFile pack : objdb.getPacks()) {
  138. DfsPackDescription d = pack.getPackDescription();
  139. if (d.getFileSize(PACK) < autoAddSize)
  140. add(pack);
  141. else
  142. exclude(pack);
  143. }
  144. return this;
  145. }
  146. /**
  147. * Exclude objects from the compacted pack.
  148. *
  149. * @param set
  150. * objects to not include.
  151. * @return {@code this}.
  152. */
  153. public DfsPackCompactor exclude(ObjectIdSet set) {
  154. exclude.add(set);
  155. return this;
  156. }
  157. /**
  158. * Exclude objects from the compacted pack.
  159. *
  160. * @param pack
  161. * objects to not include.
  162. * @return {@code this}.
  163. * @throws IOException
  164. * pack index cannot be loaded.
  165. */
  166. public DfsPackCompactor exclude(DfsPackFile pack) throws IOException {
  167. final PackIndex idx;
  168. try (DfsReader ctx = (DfsReader) repo.newObjectReader()) {
  169. idx = pack.getPackIndex(ctx);
  170. }
  171. return exclude(idx);
  172. }
  173. /**
  174. * Compact the pack files together.
  175. *
  176. * @param pm
  177. * progress monitor to receive updates on as packing may take a
  178. * while, depending on the size of the repository.
  179. * @throws IOException
  180. * the packs cannot be compacted.
  181. */
  182. public void compact(ProgressMonitor pm) throws IOException {
  183. if (pm == null)
  184. pm = NullProgressMonitor.INSTANCE;
  185. DfsObjDatabase objdb = repo.getObjectDatabase();
  186. try (DfsReader ctx = (DfsReader) objdb.newReader()) {
  187. PackConfig pc = new PackConfig(repo);
  188. pc.setIndexVersion(2);
  189. pc.setDeltaCompress(false);
  190. pc.setReuseDeltas(true);
  191. pc.setReuseObjects(true);
  192. PackWriter pw = new PackWriter(pc, ctx);
  193. try {
  194. pw.setDeltaBaseAsOffset(true);
  195. pw.setReuseDeltaCommits(false);
  196. addObjectsToPack(pw, ctx, pm);
  197. if (pw.getObjectCount() == 0) {
  198. List<DfsPackDescription> remove = toPrune();
  199. if (remove.size() > 0)
  200. objdb.commitPack(
  201. Collections.<DfsPackDescription>emptyList(),
  202. remove);
  203. return;
  204. }
  205. boolean rollback = true;
  206. DfsPackDescription pack = objdb.newPack(COMPACT);
  207. try {
  208. writePack(objdb, pack, pw, pm);
  209. writeIndex(objdb, pack, pw);
  210. PackStatistics stats = pw.getStatistics();
  211. pw.close();
  212. pw = null;
  213. pack.setPackStats(stats);
  214. objdb.commitPack(Collections.singletonList(pack), toPrune());
  215. newPacks.add(pack);
  216. newStats.add(stats);
  217. rollback = false;
  218. } finally {
  219. if (rollback)
  220. objdb.rollbackPack(Collections.singletonList(pack));
  221. }
  222. } finally {
  223. if (pw != null)
  224. pw.close();
  225. }
  226. } finally {
  227. rw = null;
  228. }
  229. }
  230. /** @return all of the source packs that fed into this compaction. */
  231. public List<DfsPackDescription> getSourcePacks() {
  232. return toPrune();
  233. }
  234. /** @return new packs created by this compaction. */
  235. public List<DfsPackDescription> getNewPacks() {
  236. return newPacks;
  237. }
  238. /** @return statistics corresponding to the {@link #getNewPacks()}. */
  239. public List<PackStatistics> getNewPackStatistics() {
  240. return newStats;
  241. }
  242. private List<DfsPackDescription> toPrune() {
  243. int cnt = srcPacks.size();
  244. List<DfsPackDescription> all = new ArrayList<DfsPackDescription>(cnt);
  245. for (DfsPackFile pack : srcPacks)
  246. all.add(pack.getPackDescription());
  247. return all;
  248. }
  249. private void addObjectsToPack(PackWriter pw, DfsReader ctx,
  250. ProgressMonitor pm) throws IOException,
  251. IncorrectObjectTypeException {
  252. // Sort packs by description ordering, this places newer packs before
  253. // older packs, allowing the PackWriter to be handed newer objects
  254. // first and older objects last.
  255. Collections.sort(srcPacks, new Comparator<DfsPackFile>() {
  256. public int compare(DfsPackFile a, DfsPackFile b) {
  257. return a.getPackDescription().compareTo(b.getPackDescription());
  258. }
  259. });
  260. rw = new RevWalk(ctx);
  261. added = rw.newFlag("ADDED"); //$NON-NLS-1$
  262. isBase = rw.newFlag("IS_BASE"); //$NON-NLS-1$
  263. List<RevObject> baseObjects = new BlockList<RevObject>();
  264. pm.beginTask(JGitText.get().countingObjects, ProgressMonitor.UNKNOWN);
  265. for (DfsPackFile src : srcPacks) {
  266. List<ObjectIdWithOffset> want = toInclude(src, ctx);
  267. if (want.isEmpty())
  268. continue;
  269. PackReverseIndex rev = src.getReverseIdx(ctx);
  270. DfsObjectRepresentation rep = new DfsObjectRepresentation(src);
  271. for (ObjectIdWithOffset id : want) {
  272. int type = src.getObjectType(ctx, id.offset);
  273. RevObject obj = rw.lookupAny(id, type);
  274. if (obj.has(added))
  275. continue;
  276. pm.update(1);
  277. pw.addObject(obj);
  278. obj.add(added);
  279. src.representation(rep, id.offset, ctx, rev);
  280. if (rep.getFormat() != PACK_DELTA)
  281. continue;
  282. RevObject base = rw.lookupAny(rep.getDeltaBase(), type);
  283. if (!base.has(added) && !base.has(isBase)) {
  284. baseObjects.add(base);
  285. base.add(isBase);
  286. }
  287. }
  288. }
  289. for (RevObject obj : baseObjects) {
  290. if (!obj.has(added)) {
  291. pm.update(1);
  292. pw.addObject(obj);
  293. obj.add(added);
  294. }
  295. }
  296. pm.endTask();
  297. }
  298. private List<ObjectIdWithOffset> toInclude(DfsPackFile src, DfsReader ctx)
  299. throws IOException {
  300. PackIndex srcIdx = src.getPackIndex(ctx);
  301. List<ObjectIdWithOffset> want = new BlockList<ObjectIdWithOffset>(
  302. (int) srcIdx.getObjectCount());
  303. SCAN: for (PackIndex.MutableEntry ent : srcIdx) {
  304. ObjectId id = ent.toObjectId();
  305. RevObject obj = rw.lookupOrNull(id);
  306. if (obj != null && (obj.has(added) || obj.has(isBase)))
  307. continue;
  308. for (ObjectIdSet e : exclude)
  309. if (e.contains(id))
  310. continue SCAN;
  311. want.add(new ObjectIdWithOffset(id, ent.getOffset()));
  312. }
  313. Collections.sort(want, new Comparator<ObjectIdWithOffset>() {
  314. public int compare(ObjectIdWithOffset a, ObjectIdWithOffset b) {
  315. return Long.signum(a.offset - b.offset);
  316. }
  317. });
  318. return want;
  319. }
  320. private static void writePack(DfsObjDatabase objdb,
  321. DfsPackDescription pack,
  322. PackWriter pw, ProgressMonitor pm) throws IOException {
  323. DfsOutputStream out = objdb.writeFile(pack, PACK);
  324. try {
  325. pw.writePack(pm, pm, out);
  326. pack.addFileExt(PACK);
  327. } finally {
  328. out.close();
  329. }
  330. }
  331. private static void writeIndex(DfsObjDatabase objdb,
  332. DfsPackDescription pack,
  333. PackWriter pw) throws IOException {
  334. DfsOutputStream out = objdb.writeFile(pack, INDEX);
  335. try {
  336. CountingOutputStream cnt = new CountingOutputStream(out);
  337. pw.writeIndex(cnt);
  338. pack.addFileExt(INDEX);
  339. pack.setFileSize(INDEX, cnt.getCount());
  340. pack.setIndexVersion(pw.getIndexVersion());
  341. } finally {
  342. out.close();
  343. }
  344. }
  345. private static class ObjectIdWithOffset extends ObjectId {
  346. final long offset;
  347. ObjectIdWithOffset(AnyObjectId id, long ofs) {
  348. super(id);
  349. offset = ofs;
  350. }
  351. }
  352. }