/* * Copyright (C) 2025, Google LLC * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.internal.storage.midx; import java.util.ArrayList; import java.util.Iterator; import java.util.List; import java.util.Map; import java.util.NoSuchElementException; import java.util.stream.Collectors; import org.eclipse.jgit.internal.storage.file.PackIndex; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.MutableObjectId; /** * Collect the stats and offers an iterator over the union of n-pack indexes. *

* The multipack index is a list of (sha1, packid, offset) ordered by sha1. We * can build it from the individual pack indexes (sha1, offset) ordered by sha1, * with a simple merge ignoring duplicates. *

* This class encapsulates the merging logic and precalculates the stats that * the index needs (like total count of objects). To limit memory consumption, * it does the merge as it goes during the iteration and iterators use mutable * entries. The stats of the combined index are calculated in an iteration at * construction time. */ class PackIndexMerger { private static final int LIMIT_31_BITS = (1 << 31) - 1; private static final long LIMIT_32_BITS = (1L << 32) - 1; /** * Object returned by the iterator. *

* The iterator returns (on each next()) the same instance with different * values, to avoid allocating many short-lived objects. Callers should not * keep a reference to that returned value. */ static class MidxMutableEntry { // The object id private final MutableObjectId oid = new MutableObjectId(); // Position of the pack in the ordered list of pack in this merger private int packId; // Offset in its pack private long offset; public AnyObjectId getObjectId() { return oid; } public int getPackId() { return packId; } public long getOffset() { return offset; } /** * Copy values from another mutable entry * * @param packId * packId * @param other * another mutable entry */ private void fill(int packId, PackIndex.MutableEntry other) { other.copyOidTo(oid); this.packId = packId; this.offset = other.getOffset(); } } private final List packNames; private final List indexes; private final boolean needsLargeOffsetsChunk; private final int offsetsOver31BitsCount; private final int uniqueObjectCount; PackIndexMerger(Map packs) { this.packNames = packs.keySet().stream().sorted() .collect(Collectors.toUnmodifiableList()); this.indexes = packNames.stream().map(packs::get) .collect(Collectors.toUnmodifiableList()); // Iterate for duplicates int objectCount = 0; boolean hasLargeOffsets = false; int over31bits = 0; MutableObjectId lastSeen = new MutableObjectId(); MultiIndexIterator it = new MultiIndexIterator(indexes); while (it.hasNext()) { MidxMutableEntry entry = it.next(); if (lastSeen.equals(entry.oid)) { continue; } // If there is at least one offset value larger than 2^32-1, then // the large offset chunk must exist, and offsets larger than // 2^31-1 must be stored in it instead if (entry.offset > LIMIT_32_BITS) { hasLargeOffsets = true; } if (entry.offset > LIMIT_31_BITS) { over31bits++; } lastSeen.fromObjectId(entry.oid); objectCount++; } uniqueObjectCount = objectCount; offsetsOver31BitsCount = over31bits; needsLargeOffsetsChunk = hasLargeOffsets; } /** * Object count of the merged index (i.e. without duplicates) * * @return object count of the merged index */ int getUniqueObjectCount() { return uniqueObjectCount; } /** * If any object in any of the indexes has an offset over 2^32-1 * * @return true if there is any object with offset > 2^32 -1 */ boolean needsLargeOffsetsChunk() { return needsLargeOffsetsChunk; } /** * How many object have offsets over 2^31-1 *

* Per multipack index spec, IF there is large offset chunk, all this * offsets should be there. * * @return number of objects with offsets over 2^31-1 */ int getOffsetsOver31BitsCount() { return offsetsOver31BitsCount; } /** * List of pack names in alphabetical order. *

* Order matters: In case of duplicates, the multipack index prefers the * first package with it. This is in the same order we are using to * prioritize duplicates. * * @return List of pack names, in the order used by the merge. */ List getPackNames() { return packNames; } /** * How many packs are being merged * * @return count of packs merged */ int getPackCount() { return packNames.size(); } /** * Iterator over the merged indexes in sha1 order without duplicates *

* The returned entry in the iterator is mutable, callers should NOT keep a * reference to it. * * @return an iterator in sha1 order without duplicates. */ Iterator bySha1Iterator() { return new DedupMultiIndexIterator(new MultiIndexIterator(indexes), getUniqueObjectCount()); } /** * For testing. Iterate all entries, not skipping duplicates (stable order) * * @return an iterator of all objects in sha1 order, including duplicates. */ Iterator rawIterator() { return new MultiIndexIterator(indexes); } /** * Iterator over n-indexes in ObjectId order. *

* It returns duplicates if the same object id is in different indexes. Wrap * it with {@link DedupMultiIndexIterator (Iterator, int)} to avoid * duplicates. */ private static final class MultiIndexIterator implements Iterator { private final List indexIterators; private final MidxMutableEntry mutableEntry = new MidxMutableEntry(); MultiIndexIterator(List indexes) { this.indexIterators = new ArrayList<>(indexes.size()); for (int i = 0; i < indexes.size(); i++) { PackIndexPeekIterator it = new PackIndexPeekIterator(i, indexes.get(i)); // Position in the first element if (it.next() != null) { indexIterators.add(it); } } } @Override public boolean hasNext() { return !indexIterators.isEmpty(); } @Override public MidxMutableEntry next() { PackIndexPeekIterator winner = null; for (int index = 0; index < indexIterators.size(); index++) { PackIndexPeekIterator current = indexIterators.get(index); if (winner == null || current.peek().compareBySha1To(winner.peek()) < 0) { winner = current; } } if (winner == null) { throw new NoSuchElementException(); } mutableEntry.fill(winner.getPackId(), winner.peek()); if (winner.next() == null) { indexIterators.remove(winner); }; return mutableEntry; } } private static class DedupMultiIndexIterator implements Iterator { private final MultiIndexIterator src; private int remaining; private final MutableObjectId lastOid = new MutableObjectId(); DedupMultiIndexIterator(MultiIndexIterator src, int totalCount) { this.src = src; this.remaining = totalCount; } @Override public boolean hasNext() { return remaining > 0; } @Override public MidxMutableEntry next() { MidxMutableEntry next = src.next(); while (next != null && lastOid.equals(next.oid)) { next = src.next(); } if (next == null) { throw new NoSuchElementException(); } lastOid.fromObjectId(next.oid); remaining--; return next; } } /** * Convenience around the PackIndex iterator to read the current value * multiple times without consuming it. *

* This is used to merge indexes in the multipack index, where we need to * compare the current value between indexes multiple times to find the * next. *

* We could also implement this keeping the position (int) and * MutableEntry#getObjectId, but that would create an ObjectId per entry. * This implementation reuses the MutableEntry and avoid instantiations. */ // Visible for testing static class PackIndexPeekIterator { private final Iterator it; private final int packId; PackIndex.MutableEntry current; PackIndexPeekIterator(int packId, PackIndex index) { it = index.iterator(); this.packId = packId; } PackIndex.MutableEntry next() { if (it.hasNext()) { current = it.next(); } else { current = null; } return current; } PackIndex.MutableEntry peek() { return current; } int getPackId() { return packId; } } }