From d126bcc5c800ef03e8292e7ad1dd7fba7c3358d2 Mon Sep 17 00:00:00 2001 From: Shawn Pearce Date: Tue, 25 Jul 2017 15:37:56 -0700 Subject: [PATCH] dfs: compact reftables during DfsPackCompactor Combine intermediate, non-GC reftables when combining pack files. This shrinks the reftable stack, improving lookup times. Change-Id: I5dbba41806f99af5ecaff3a3119f6630e9404256 --- .../storage/dfs/DfsPackCompactor.java | 244 +++++++++++++----- .../storage/reftable/ReftableCompactor.java | 21 +- 2 files changed, 203 insertions(+), 62 deletions(-) diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackCompactor.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackCompactor.java index ac14c0bc32..99663eb738 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackCompactor.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/dfs/DfsPackCompactor.java @@ -44,21 +44,29 @@ package org.eclipse.jgit.internal.storage.dfs; import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.COMPACT; +import static org.eclipse.jgit.internal.storage.dfs.DfsObjDatabase.PackSource.GC; import static org.eclipse.jgit.internal.storage.pack.PackExt.INDEX; import static org.eclipse.jgit.internal.storage.pack.PackExt.PACK; +import static org.eclipse.jgit.internal.storage.pack.PackExt.REFTABLE; import static org.eclipse.jgit.internal.storage.pack.StoredObjectRepresentation.PACK_DELTA; import java.io.IOException; import java.util.ArrayList; +import java.util.Collection; import java.util.Collections; import java.util.Comparator; +import java.util.HashSet; +import java.util.Iterator; import java.util.List; +import java.util.Set; import org.eclipse.jgit.errors.IncorrectObjectTypeException; import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.internal.storage.file.PackIndex; import org.eclipse.jgit.internal.storage.file.PackReverseIndex; import org.eclipse.jgit.internal.storage.pack.PackWriter; +import org.eclipse.jgit.internal.storage.reftable.ReftableCompactor; +import org.eclipse.jgit.internal.storage.reftable.ReftableConfig; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.NullProgressMonitor; import org.eclipse.jgit.lib.ObjectId; @@ -89,16 +97,15 @@ import org.eclipse.jgit.util.io.CountingOutputStream; */ public class DfsPackCompactor { private final DfsRepository repo; - private final List srcPacks; - + private final List srcReftables; private final List exclude; - private final List newPacks; - - private final List newStats; + private PackStatistics newStats; + private DfsPackDescription outDesc; private int autoAddSize; + private ReftableConfig reftableConfig; private RevWalk rw; private RevFlag added; @@ -114,9 +121,19 @@ public class DfsPackCompactor { repo = repository; autoAddSize = 5 * 1024 * 1024; // 5 MiB srcPacks = new ArrayList<>(); + srcReftables = new ArrayList<>(); exclude = new ArrayList<>(4); - newPacks = new ArrayList<>(1); - newStats = new ArrayList<>(1); + } + + /** + * @param cfg + * configuration to write a reftable. Reftable compacting is + * disabled (default) when {@code cfg} is {@code null}. + * @return {@code this} + */ + public DfsPackCompactor setReftableConfig(ReftableConfig cfg) { + reftableConfig = cfg; + return this; } /** @@ -137,7 +154,19 @@ public class DfsPackCompactor { } /** - * Automatically select packs to be included, and add them. + * Add a reftable to be compacted. + * + * @param table + * a reftable to combine. + * @return {@code this} + */ + public DfsPackCompactor add(DfsReftable table) { + srcReftables.add(table); + return this; + } + + /** + * Automatically select pack and reftables to be included, and add them. *

* Packs are selected based on size, smaller packs get included while bigger * ones are omitted. @@ -155,6 +184,16 @@ public class DfsPackCompactor { else exclude(pack); } + + if (reftableConfig != null) { + for (DfsReftable table : objdb.getReftables()) { + DfsPackDescription d = table.getPackDescription(); + if (d.getPackSource() != GC + && d.getFileSize(REFTABLE) < autoAddSize) { + add(table); + } + } + } return this; } @@ -197,58 +236,68 @@ public class DfsPackCompactor { * the packs cannot be compacted. */ public void compact(ProgressMonitor pm) throws IOException { - if (pm == null) + if (pm == null) { pm = NullProgressMonitor.INSTANCE; + } DfsObjDatabase objdb = repo.getObjectDatabase(); try (DfsReader ctx = objdb.newReader()) { - PackConfig pc = new PackConfig(repo); - pc.setIndexVersion(2); - pc.setDeltaCompress(false); - pc.setReuseDeltas(true); - pc.setReuseObjects(true); + if (reftableConfig != null && !srcReftables.isEmpty()) { + compactReftables(ctx); + } + compactPacks(ctx, pm); + + List commit = getNewPacks(); + Collection remove = toPrune(); + if (!commit.isEmpty() || !remove.isEmpty()) { + objdb.commitPack(commit, remove); + } + } finally { + rw = null; + } + } - PackWriter pw = new PackWriter(pc, ctx); + private void compactPacks(DfsReader ctx, ProgressMonitor pm) + throws IOException, IncorrectObjectTypeException { + DfsObjDatabase objdb = repo.getObjectDatabase(); + PackConfig pc = new PackConfig(repo); + pc.setIndexVersion(2); + pc.setDeltaCompress(false); + pc.setReuseDeltas(true); + pc.setReuseObjects(true); + + PackWriter pw = new PackWriter(pc, ctx); + try { + pw.setDeltaBaseAsOffset(true); + pw.setReuseDeltaCommits(false); + + addObjectsToPack(pw, ctx, pm); + if (pw.getObjectCount() == 0) { + return; + } + + boolean rollback = true; + initOutDesc(objdb); try { - pw.setDeltaBaseAsOffset(true); - pw.setReuseDeltaCommits(false); - - addObjectsToPack(pw, ctx, pm); - if (pw.getObjectCount() == 0) { - List remove = toPrune(); - if (remove.size() > 0) - objdb.commitPack( - Collections.emptyList(), - remove); - return; - } + writePack(objdb, outDesc, pw, pm); + writeIndex(objdb, outDesc, pw); - boolean rollback = true; - DfsPackDescription pack = objdb.newPack(COMPACT, - estimatePackSize()); - try { - writePack(objdb, pack, pw, pm); - writeIndex(objdb, pack, pw); - - PackStatistics stats = pw.getStatistics(); - pw.close(); - pw = null; - - pack.setPackStats(stats); - objdb.commitPack(Collections.singletonList(pack), toPrune()); - newPacks.add(pack); - newStats.add(stats); - rollback = false; - } finally { - if (rollback) - objdb.rollbackPack(Collections.singletonList(pack)); - } + PackStatistics stats = pw.getStatistics(); + pw.close(); + pw = null; + + outDesc.setPackStats(stats); + newStats = stats; + rollback = false; } finally { - if (pw != null) - pw.close(); + if (rollback) { + objdb.rollbackPack(Collections.singletonList(outDesc)); + } } } finally { - rw = null; + if (pw != null) { + pw.close(); + } } } @@ -263,27 +312,81 @@ public class DfsPackCompactor { return size; } + private void compactReftables(DfsReader ctx) throws IOException { + DfsObjDatabase objdb = repo.getObjectDatabase(); + Collections.sort(srcReftables, objdb.reftableComparator()); + + try (ReftableStack stack = ReftableStack.open(ctx, srcReftables)) { + initOutDesc(objdb); + ReftableCompactor compact = new ReftableCompactor(); + compact.addAll(stack.readers()); + compact.setIncludeDeletes(true); + writeReftable(objdb, outDesc, compact); + } + } + + private void initOutDesc(DfsObjDatabase objdb) throws IOException { + if (outDesc == null) { + outDesc = objdb.newPack(COMPACT, estimatePackSize()); + } + } + /** @return all of the source packs that fed into this compaction. */ - public List getSourcePacks() { - return toPrune(); + public Collection getSourcePacks() { + Set src = new HashSet<>(); + for (DfsPackFile pack : srcPacks) { + src.add(pack.getPackDescription()); + } + for (DfsReftable table : srcReftables) { + src.add(table.getPackDescription()); + } + return src; } /** @return new packs created by this compaction. */ public List getNewPacks() { - return newPacks; + return outDesc != null + ? Collections.singletonList(outDesc) + : Collections.emptyList(); } /** @return statistics corresponding to the {@link #getNewPacks()}. */ public List getNewPackStatistics() { - return newStats; + return newStats != null + ? Collections.singletonList(newStats) + : Collections.emptyList(); } - private List toPrune() { - int cnt = srcPacks.size(); - List all = new ArrayList<>(cnt); - for (DfsPackFile pack : srcPacks) - all.add(pack.getPackDescription()); - return all; + private Collection toPrune() { + Set packs = new HashSet<>(); + for (DfsPackFile pack : srcPacks) { + packs.add(pack.getPackDescription()); + } + + Set reftables = new HashSet<>(); + for (DfsReftable table : srcReftables) { + reftables.add(table.getPackDescription()); + } + + for (Iterator i = packs.iterator(); i.hasNext();) { + DfsPackDescription d = i.next(); + if (d.hasFileExt(REFTABLE) && !reftables.contains(d)) { + i.remove(); + } + } + + for (Iterator i = reftables.iterator(); + i.hasNext();) { + DfsPackDescription d = i.next(); + if (d.hasFileExt(PACK) && !packs.contains(d)) { + i.remove(); + } + } + + Set toPrune = new HashSet<>(); + toPrune.addAll(packs); + toPrune.addAll(reftables); + return toPrune; } private void addObjectsToPack(PackWriter pw, DfsReader ctx, @@ -390,6 +493,27 @@ public class DfsPackCompactor { } } + private void writeReftable(DfsObjDatabase objdb, DfsPackDescription pack, + ReftableCompactor compact) throws IOException { + try (DfsOutputStream out = objdb.writeFile(pack, REFTABLE)) { + compact.setConfig(configureReftable(reftableConfig, out)); + compact.compact(out); + pack.addFileExt(REFTABLE); + pack.setReftableStats(compact.getStats()); + } + } + + static ReftableConfig configureReftable(ReftableConfig cfg, + DfsOutputStream out) { + int bs = out.blockSize(); + if (bs > 0) { + cfg = new ReftableConfig(cfg); + cfg.setRefBlockSize(bs); + cfg.setAlignBlocks(true); + } + return cfg; + } + private static class ObjectIdWithOffset extends ObjectId { final long offset; diff --git a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableCompactor.java b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableCompactor.java index 3927f7b170..c22157784c 100644 --- a/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableCompactor.java +++ b/org.eclipse.jgit/src/org/eclipse/jgit/internal/storage/reftable/ReftableCompactor.java @@ -159,9 +159,16 @@ public class ReftableCompactor { * tables to compact. Tables should be ordered oldest first/most * recent last so that the more recent tables can shadow the * older results. Caller is responsible for closing the readers. + * @throws IOException + * update indexes of a reader cannot be accessed. */ - public void addAll(List readers) { + public void addAll(List readers) throws IOException { tables.addAll(readers); + for (Reftable r : readers) { + if (r instanceof ReftableReader) { + adjustUpdateIndexes((ReftableReader) r); + } + } } /** @@ -178,7 +185,7 @@ public class ReftableCompactor { * @return {@code true} if the compactor accepted this table; {@code false} * if the compactor has reached its limit. * @throws IOException - * if size of {@code reader} cannot be read. + * if size of {@code reader}, or its update indexes cannot be read. */ public boolean tryAddFirst(ReftableReader reader) throws IOException { long sz = reader.size(); @@ -186,10 +193,20 @@ public class ReftableCompactor { return false; } bytesToCompact += sz; + adjustUpdateIndexes(reader); tables.addFirst(reader); return true; } + private void adjustUpdateIndexes(ReftableReader reader) throws IOException { + if (minUpdateIndex == 0) { + minUpdateIndex = reader.minUpdateIndex(); + } else { + minUpdateIndex = Math.min(minUpdateIndex, reader.minUpdateIndex()); + } + maxUpdateIndex = Math.max(maxUpdateIndex, reader.maxUpdateIndex()); + } + /** * Write a compaction to {@code out}. * -- 2.39.5