You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

DfsPackCompactor.java 16KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529
  1. /*
  2. * Copyright (C) 2011, Google Inc.
  3. * and other copyright owners as documented in the project's IP log.
  4. *
  5. * This program and the accompanying materials are made available
  6. * under the terms of the Eclipse Distribution License v1.0 which
  7. * accompanies this distribution, is reproduced below, and is
  8. * available at http://www.eclipse.org/org/documents/edl-v10.php
  9. *
  10. * All rights reserved.
  11. *
  12. * Redistribution and use in source and binary forms, with or
  13. * without modification, are permitted provided that the following
  14. * conditions are met:
  15. *
  16. * - Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. *
  19. * - Redistributions in binary form must reproduce the above
  20. * copyright notice, this list of conditions and the following
  21. * disclaimer in the documentation and/or other materials provided
  22. * with the distribution.
  23. *
  24. * - Neither the name of the Eclipse Foundation, Inc. nor the
  25. * names of its contributors may be used to endorse or promote
  26. * products derived from this software without specific prior
  27. * written permission.
  28. *
  29. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  30. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  31. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  32. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  33. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  34. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  35. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  36. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  37. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  38. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  39. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  40. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  41. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  42. */
  43. package org.eclipse.jgit.internal.storage.dfs;
  44. import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.COMPACT;
  45. import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC;
  46. import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX;
  47. import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK;
  48. import static org.eclipse.jgit.internal.storage.pack.PackExt.REFTABLE;
  49. import static org.eclipse.jgit.internal.storage.pack.StoredObjectRepresentation.PACK_DELTA;
  50. import java.io.IOException;
  51. import java.util.ArrayList;
  52. import java.util.Collection;
  53. import java.util.Collections;
  54. import java.util.Comparator;
  55. import java.util.HashSet;
  56. import java.util.Iterator;
  57. import java.util.List;
  58. import java.util.Set;
  59. import org.eclipse.jgit.errors.IncorrectObjectTypeException;
  60. import org.eclipse.jgit.internal.JGitText;
  61. import org.eclipse.jgit.internal.storage.file.PackIndex;
  62. import org.eclipse.jgit.internal.storage.file.PackReverseIndex;
  63. import org.eclipse.jgit.internal.storage.pack.PackWriter;
  64. import org.eclipse.jgit.internal.storage.reftable.ReftableCompactor;
  65. import org.eclipse.jgit.internal.storage.reftable.ReftableConfig;
  66. import org.eclipse.jgit.lib.AnyObjectId;
  67. import org.eclipse.jgit.lib.NullProgressMonitor;
  68. import org.eclipse.jgit.lib.ObjectId;
  69. import org.eclipse.jgit.lib.ObjectIdSet;
  70. import org.eclipse.jgit.lib.ProgressMonitor;
  71. import org.eclipse.jgit.revwalk.RevFlag;
  72. import org.eclipse.jgit.revwalk.RevObject;
  73. import org.eclipse.jgit.revwalk.RevWalk;
  74. import org.eclipse.jgit.storage.pack.PackConfig;
  75. import org.eclipse.jgit.storage.pack.PackStatistics;
  76. import org.eclipse.jgit.util.BlockList;
  77. import org.eclipse.jgit.util.io.CountingOutputStream;
  78. /**
  79. * Combine several pack files into one pack.
  80. * <p>
  81. * The compactor combines several pack files together by including all objects
  82. * contained in each pack file into the same output pack. If an object appears
  83. * multiple times, it is only included once in the result. Because the new pack
  84. * is constructed by enumerating the indexes of the source packs, it is quicker
  85. * than doing a full repack of the repository, however the result is not nearly
  86. * as space efficient as new delta compression is disabled.
  87. * <p>
  88. * This method is suitable for quickly combining several packs together after
  89. * receiving a number of small fetch or push operations into a repository,
  90. * allowing the system to maintain reasonable read performance without expending
  91. * a lot of time repacking the entire repository.
  92. */
  93. public class DfsPackCompactor {
  94. private final DfsRepository repo;
  95. private final List<DfsPackFile> srcPacks;
  96. private final List<DfsReftable> srcReftables;
  97. private final List<ObjectIdSet> exclude;
  98. private PackStatistics newStats;
  99. private DfsPackDescription outDesc;
  100. private int autoAddSize;
  101. private ReftableConfig reftableConfig;
  102. private RevWalk rw;
  103. private RevFlag added;
  104. private RevFlag isBase;
  105. /**
  106. * Initialize a pack compactor.
  107. *
  108. * @param repository
  109. * repository objects to be packed will be read from.
  110. */
  111. public DfsPackCompactor(DfsRepository repository) {
  112. repo = repository;
  113. autoAddSize = 5 * 1024 * 1024; // 5 MiB
  114. srcPacks = new ArrayList<>();
  115. srcReftables = new ArrayList<>();
  116. exclude = new ArrayList<>(4);
  117. }
  118. /**
  119. * Set configuration to write a reftable.
  120. *
  121. * @param cfg
  122. * configuration to write a reftable. Reftable compacting is
  123. * disabled (default) when {@code cfg} is {@code null}.
  124. * @return {@code this}
  125. */
  126. public DfsPackCompactor setReftableConfig(ReftableConfig cfg) {
  127. reftableConfig = cfg;
  128. return this;
  129. }
  130. /**
  131. * Add a pack to be compacted.
  132. * <p>
  133. * All of the objects in this pack will be copied into the resulting pack.
  134. * The resulting pack will order objects according to the source pack's own
  135. * description ordering (which is based on creation date), and then by the
  136. * order the objects appear in the source pack.
  137. *
  138. * @param pack
  139. * a pack to combine into the resulting pack.
  140. * @return {@code this}
  141. */
  142. public DfsPackCompactor add(DfsPackFile pack) {
  143. srcPacks.add(pack);
  144. return this;
  145. }
  146. /**
  147. * Add a reftable to be compacted.
  148. *
  149. * @param table
  150. * a reftable to combine.
  151. * @return {@code this}
  152. */
  153. public DfsPackCompactor add(DfsReftable table) {
  154. srcReftables.add(table);
  155. return this;
  156. }
  157. /**
  158. * Automatically select pack and reftables to be included, and add them.
  159. * <p>
  160. * Packs are selected based on size, smaller packs get included while bigger
  161. * ones are omitted.
  162. *
  163. * @return {@code this}
  164. * @throws java.io.IOException
  165. * existing packs cannot be read.
  166. */
  167. public DfsPackCompactor autoAdd() throws IOException {
  168. DfsObjDatabase objdb = repo.getObjectDatabase();
  169. for (DfsPackFile pack : objdb.getPacks()) {
  170. DfsPackDescription d = pack.getPackDescription();
  171. if (d.getFileSize(PACK) < autoAddSize)
  172. add(pack);
  173. else
  174. exclude(pack);
  175. }
  176. if (reftableConfig != null) {
  177. for (DfsReftable table : objdb.getReftables()) {
  178. DfsPackDescription d = table.getPackDescription();
  179. if (d.getPackSource() != GC
  180. && d.getFileSize(REFTABLE) < autoAddSize) {
  181. add(table);
  182. }
  183. }
  184. }
  185. return this;
  186. }
  187. /**
  188. * Exclude objects from the compacted pack.
  189. *
  190. * @param set
  191. * objects to not include.
  192. * @return {@code this}.
  193. */
  194. public DfsPackCompactor exclude(ObjectIdSet set) {
  195. exclude.add(set);
  196. return this;
  197. }
  198. /**
  199. * Exclude objects from the compacted pack.
  200. *
  201. * @param pack
  202. * objects to not include.
  203. * @return {@code this}.
  204. * @throws java.io.IOException
  205. * pack index cannot be loaded.
  206. */
  207. public DfsPackCompactor exclude(DfsPackFile pack) throws IOException {
  208. final PackIndex idx;
  209. try (DfsReader ctx = (DfsReader) repo.newObjectReader()) {
  210. idx = pack.getPackIndex(ctx);
  211. }
  212. return exclude(idx);
  213. }
  214. /**
  215. * Compact the pack files together.
  216. *
  217. * @param pm
  218. * progress monitor to receive updates on as packing may take a
  219. * while, depending on the size of the repository.
  220. * @throws java.io.IOException
  221. * the packs cannot be compacted.
  222. */
  223. public void compact(ProgressMonitor pm) throws IOException {
  224. if (pm == null) {
  225. pm = NullProgressMonitor.INSTANCE;
  226. }
  227. DfsObjDatabase objdb = repo.getObjectDatabase();
  228. try (DfsReader ctx = objdb.newReader()) {
  229. if (reftableConfig != null && !srcReftables.isEmpty()) {
  230. compactReftables(ctx);
  231. }
  232. compactPacks(ctx, pm);
  233. List<DfsPackDescription> commit = getNewPacks();
  234. Collection<DfsPackDescription> remove = toPrune();
  235. if (!commit.isEmpty() || !remove.isEmpty()) {
  236. objdb.commitPack(commit, remove);
  237. }
  238. } finally {
  239. rw = null;
  240. }
  241. }
  242. private void compactPacks(DfsReader ctx, ProgressMonitor pm)
  243. throws IOException, IncorrectObjectTypeException {
  244. DfsObjDatabase objdb = repo.getObjectDatabase();
  245. PackConfig pc = new PackConfig(repo);
  246. pc.setIndexVersion(2);
  247. pc.setDeltaCompress(false);
  248. pc.setReuseDeltas(true);
  249. pc.setReuseObjects(true);
  250. try (PackWriter pw = new PackWriter(pc, ctx)) {
  251. pw.setDeltaBaseAsOffset(true);
  252. pw.setReuseDeltaCommits(false);
  253. addObjectsToPack(pw, ctx, pm);
  254. if (pw.getObjectCount() == 0) {
  255. return;
  256. }
  257. boolean rollback = true;
  258. initOutDesc(objdb);
  259. try {
  260. writePack(objdb, outDesc, pw, pm);
  261. writeIndex(objdb, outDesc, pw);
  262. PackStatistics stats = pw.getStatistics();
  263. outDesc.setPackStats(stats);
  264. newStats = stats;
  265. rollback = false;
  266. } finally {
  267. if (rollback) {
  268. objdb.rollbackPack(Collections.singletonList(outDesc));
  269. }
  270. }
  271. }
  272. }
  273. private long estimatePackSize() {
  274. // Every pack file contains 12 bytes of header and 20 bytes of trailer.
  275. // Include the final pack file header and trailer size here and ignore
  276. // the same from individual pack files.
  277. long size = 32;
  278. for (DfsPackFile pack : srcPacks) {
  279. size += pack.getPackDescription().getFileSize(PACK) - 32;
  280. }
  281. return size;
  282. }
  283. private void compactReftables(DfsReader ctx) throws IOException {
  284. DfsObjDatabase objdb = repo.getObjectDatabase();
  285. Collections.sort(srcReftables, objdb.reftableComparator());
  286. try (ReftableStack stack = ReftableStack.open(ctx, srcReftables)) {
  287. initOutDesc(objdb);
  288. ReftableCompactor compact = new ReftableCompactor();
  289. compact.addAll(stack.readers());
  290. compact.setIncludeDeletes(true);
  291. writeReftable(objdb, outDesc, compact);
  292. }
  293. }
  294. private void initOutDesc(DfsObjDatabase objdb) throws IOException {
  295. if (outDesc == null) {
  296. outDesc = objdb.newPack(COMPACT, estimatePackSize());
  297. }
  298. }
  299. /**
  300. * Get all of the source packs that fed into this compaction.
  301. *
  302. * @return all of the source packs that fed into this compaction.
  303. */
  304. public Collection<DfsPackDescription> getSourcePacks() {
  305. Set<DfsPackDescription> src = new HashSet<>();
  306. for (DfsPackFile pack : srcPacks) {
  307. src.add(pack.getPackDescription());
  308. }
  309. for (DfsReftable table : srcReftables) {
  310. src.add(table.getPackDescription());
  311. }
  312. return src;
  313. }
  314. /**
  315. * Get new packs created by this compaction.
  316. *
  317. * @return new packs created by this compaction.
  318. */
  319. public List<DfsPackDescription> getNewPacks() {
  320. return outDesc != null
  321. ? Collections.singletonList(outDesc)
  322. : Collections.emptyList();
  323. }
  324. /**
  325. * Get statistics corresponding to the {@link #getNewPacks()}.
  326. * May be null if statistics are not available.
  327. *
  328. * @return statistics corresponding to the {@link #getNewPacks()}.
  329. *
  330. */
  331. public List<PackStatistics> getNewPackStatistics() {
  332. return outDesc != null
  333. ? Collections.singletonList(newStats)
  334. : Collections.emptyList();
  335. }
  336. private Collection<DfsPackDescription> toPrune() {
  337. Set<DfsPackDescription> packs = new HashSet<>();
  338. for (DfsPackFile pack : srcPacks) {
  339. packs.add(pack.getPackDescription());
  340. }
  341. Set<DfsPackDescription> reftables = new HashSet<>();
  342. for (DfsReftable table : srcReftables) {
  343. reftables.add(table.getPackDescription());
  344. }
  345. for (Iterator<DfsPackDescription> i = packs.iterator(); i.hasNext();) {
  346. DfsPackDescription d = i.next();
  347. if (d.hasFileExt(REFTABLE) && !reftables.contains(d)) {
  348. i.remove();
  349. }
  350. }
  351. for (Iterator<DfsPackDescription> i = reftables.iterator();
  352. i.hasNext();) {
  353. DfsPackDescription d = i.next();
  354. if (d.hasFileExt(PACK) && !packs.contains(d)) {
  355. i.remove();
  356. }
  357. }
  358. Set<DfsPackDescription> toPrune = new HashSet<>();
  359. toPrune.addAll(packs);
  360. toPrune.addAll(reftables);
  361. return toPrune;
  362. }
  363. private void addObjectsToPack(PackWriter pw, DfsReader ctx,
  364. ProgressMonitor pm) throws IOException,
  365. IncorrectObjectTypeException {
  366. // Sort packs by description ordering, this places newer packs before
  367. // older packs, allowing the PackWriter to be handed newer objects
  368. // first and older objects last.
  369. Collections.sort(
  370. srcPacks,
  371. Comparator.comparing(
  372. DfsPackFile::getPackDescription,
  373. DfsPackDescription.objectLookupComparator()));
  374. rw = new RevWalk(ctx);
  375. added = rw.newFlag("ADDED"); //$NON-NLS-1$
  376. isBase = rw.newFlag("IS_BASE"); //$NON-NLS-1$
  377. List<RevObject> baseObjects = new BlockList<>();
  378. pm.beginTask(JGitText.get().countingObjects, ProgressMonitor.UNKNOWN);
  379. for (DfsPackFile src : srcPacks) {
  380. List<ObjectIdWithOffset> want = toInclude(src, ctx);
  381. if (want.isEmpty())
  382. continue;
  383. PackReverseIndex rev = src.getReverseIdx(ctx);
  384. DfsObjectRepresentation rep = new DfsObjectRepresentation(src);
  385. for (ObjectIdWithOffset id : want) {
  386. int type = src.getObjectType(ctx, id.offset);
  387. RevObject obj = rw.lookupAny(id, type);
  388. if (obj.has(added))
  389. continue;
  390. pm.update(1);
  391. pw.addObject(obj);
  392. obj.add(added);
  393. src.representation(rep, id.offset, ctx, rev);
  394. if (rep.getFormat() != PACK_DELTA)
  395. continue;
  396. RevObject base = rw.lookupAny(rep.getDeltaBase(), type);
  397. if (!base.has(added) && !base.has(isBase)) {
  398. baseObjects.add(base);
  399. base.add(isBase);
  400. }
  401. }
  402. }
  403. for (RevObject obj : baseObjects) {
  404. if (!obj.has(added)) {
  405. pm.update(1);
  406. pw.addObject(obj);
  407. obj.add(added);
  408. }
  409. }
  410. pm.endTask();
  411. }
  412. private List<ObjectIdWithOffset> toInclude(DfsPackFile src, DfsReader ctx)
  413. throws IOException {
  414. PackIndex srcIdx = src.getPackIndex(ctx);
  415. List<ObjectIdWithOffset> want = new BlockList<>(
  416. (int) srcIdx.getObjectCount());
  417. SCAN: for (PackIndex.MutableEntry ent : srcIdx) {
  418. ObjectId id = ent.toObjectId();
  419. RevObject obj = rw.lookupOrNull(id);
  420. if (obj != null && (obj.has(added) || obj.has(isBase)))
  421. continue;
  422. for (ObjectIdSet e : exclude)
  423. if (e.contains(id))
  424. continue SCAN;
  425. want.add(new ObjectIdWithOffset(id, ent.getOffset()));
  426. }
  427. Collections.sort(want, (ObjectIdWithOffset a,
  428. ObjectIdWithOffset b) -> Long.signum(a.offset - b.offset));
  429. return want;
  430. }
  431. private static void writePack(DfsObjDatabase objdb,
  432. DfsPackDescription pack,
  433. PackWriter pw, ProgressMonitor pm) throws IOException {
  434. try (DfsOutputStream out = objdb.writeFile(pack, PACK)) {
  435. pw.writePack(pm, pm, out);
  436. pack.addFileExt(PACK);
  437. pack.setBlockSize(PACK, out.blockSize());
  438. }
  439. }
  440. private static void writeIndex(DfsObjDatabase objdb,
  441. DfsPackDescription pack,
  442. PackWriter pw) throws IOException {
  443. try (DfsOutputStream out = objdb.writeFile(pack, INDEX)) {
  444. CountingOutputStream cnt = new CountingOutputStream(out);
  445. pw.writeIndex(cnt);
  446. pack.addFileExt(INDEX);
  447. pack.setFileSize(INDEX, cnt.getCount());
  448. pack.setBlockSize(INDEX, out.blockSize());
  449. pack.setIndexVersion(pw.getIndexVersion());
  450. }
  451. }
  452. private void writeReftable(DfsObjDatabase objdb, DfsPackDescription pack,
  453. ReftableCompactor compact) throws IOException {
  454. try (DfsOutputStream out = objdb.writeFile(pack, REFTABLE)) {
  455. compact.setConfig(configureReftable(reftableConfig, out));
  456. compact.compact(out);
  457. pack.addFileExt(REFTABLE);
  458. pack.setReftableStats(compact.getStats());
  459. }
  460. }
  461. static ReftableConfig configureReftable(ReftableConfig cfg,
  462. DfsOutputStream out) {
  463. int bs = out.blockSize();
  464. if (bs > 0) {
  465. cfg = new ReftableConfig(cfg);
  466. cfg.setRefBlockSize(bs);
  467. cfg.setAlignBlocks(true);
  468. }
  469. return cfg;
  470. }
  471. private static class ObjectIdWithOffset extends ObjectId {
  472. final long offset;
  473. ObjectIdWithOffset(AnyObjectId id, long ofs) {
  474. super(id);
  475. offset = ofs;
  476. }
  477. }
  478. }