/* * Copyright (C) 2023, Google LLC and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.internal.storage.file; import java.text.MessageFormat; import org.eclipse.jgit.errors.CorruptObjectException; import org.eclipse.jgit.errors.PackMismatchException; import org.eclipse.jgit.internal.JGitText; import org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry; import org.eclipse.jgit.lib.ObjectId; /** * Reverse index for forward pack index which is computed from the forward pack * index. *

* Creating an instance uses an insertion sort of the entries in the forward * index, so it runs in quadratic time on average. */ final class PackReverseIndexComputed implements PackReverseIndex { /** * Index we were created from, and that has our ObjectId data. */ private final PackIndex index; /** * The difference in offset between the start of an offset bucket and the * start of its succeeding bucket. */ private final long bucketSize; /** * The indexes into indexPosInOffsetOrder at which the next bucket starts. *

* For example, given offset o (and therefore bucket = o / bucketSize), the * indexPos corresponding to o will be contained in the range * indexPosInOffsetOrder[nextBucketStart[bucket - 1]] inclusive to * indexPosInOffsetOrder[nextBucketStart[bucket]] exclusive. *

* This range information can speed up #binarySearch by identifying the * relevant bucket and only searching within its range. *

* See {@link #binarySearch} */ private final int[] nextBucketStart; /** * Mapping from indices in offset order to indices in SHA-1 order. */ private final int[] indexPosInOffsetOrder; /** * Create reverse index from straight/forward pack index, by indexing all * its entries. * * @param packIndex * forward index - entries to (reverse) index. */ PackReverseIndexComputed(PackIndex packIndex) { index = packIndex; long rawCnt = index.getObjectCount(); if (rawCnt + 1 > Integer.MAX_VALUE) { throw new IllegalArgumentException( JGitText.get().hugeIndexesAreNotSupportedByJgitYet); } int cnt = (int) rawCnt; if (cnt == 0) { bucketSize = Long.MAX_VALUE; nextBucketStart = new int[1]; indexPosInOffsetOrder = new int[0]; return; } // Sort the index positions according to the corresponding pack offsets. // Use bucket sort since the offsets are somewhat uniformly distributed // over the range (0, pack size). long[] offsetsInIndexOrder = new long[cnt]; long maxOffset = 0; int i = 0; for (MutableEntry entry : index) { long offset = entry.getOffset(); offsetsInIndexOrder[i++] = offset; if (offset > maxOffset) { maxOffset = offset; } } bucketSize = maxOffset / cnt + 1; // The values in each bucket, stored as a linked list. Given a bucket, // headValues[bucket] contains the first value, // furtherValues[headValues[bucket]] contains the second, // furtherValues[furtherValues[headValues[bucket]]] the third, and so // on. The linked list stops when a value is 0. The values themselves // are shifted index positions. There won't be any // collisions because every index position is unique. int[] headValues = new int[cnt]; int[] furtherValues = new int[cnt + 1]; for (int indexPos = 0; indexPos < cnt; indexPos++) { // The offset determines which bucket this index position falls // into, since the goal is sort into offset order. long offset = offsetsInIndexOrder[indexPos]; int bucket = (int) (offset / bucketSize); // Store the index positions as 1-indexed so that default // initialized value 0 can be interpreted as the end of the bucket // values. int asBucketValue = indexPos + 1; // If there is an existing value in this bucket, push the value to // the front of the linked list. int current = headValues[bucket]; headValues[bucket] = asBucketValue; furtherValues[asBucketValue] = current; } int nthByOffset = 0; indexPosInOffsetOrder = new int[cnt]; nextBucketStart = headValues; // Reuse the allocation for (int bi = 0; bi < headValues.length; bi++) { // Insertion sort of the values in the bucket. int start = nthByOffset; for (int vi = headValues[bi]; vi > 0; vi = furtherValues[vi]) { int nthBySha1 = vi - 1; long o = offsetsInIndexOrder[nthBySha1]; int insertion = nthByOffset++; for (; start < insertion; insertion--) { if (o > offsetsInIndexOrder[indexPosInOffsetOrder[insertion - 1]]) { break; } indexPosInOffsetOrder[insertion] = indexPosInOffsetOrder[insertion - 1]; } indexPosInOffsetOrder[insertion] = nthBySha1; } nextBucketStart[bi] = nthByOffset; } } @Override public void verifyPackChecksum(String packFilePath) throws PackMismatchException { // There is no file with a checksum. } @Override public ObjectId findObject(long offset) { final int ith = binarySearch(offset); if (ith < 0) { return null; } return index.getObjectId(indexPosInOffsetOrder[ith]); } @Override public long findNextOffset(long offset, long maxOffset) throws CorruptObjectException { final int ith = binarySearch(offset); if (ith < 0) { throw new CorruptObjectException(MessageFormat.format(JGitText .get().cantFindObjectInReversePackIndexForTheSpecifiedOffset, Long.valueOf(offset))); } if (ith + 1 == indexPosInOffsetOrder.length) { return maxOffset; } return index.getOffset(indexPosInOffsetOrder[ith + 1]); } @Override public int findPosition(long offset) { return binarySearch(offset); } private int binarySearch(long offset) { int bucket = (int) (offset / bucketSize); int low = bucket == 0 ? 0 : nextBucketStart[bucket - 1]; int high = nextBucketStart[bucket]; while (low < high) { final int mid = (low + high) >>> 1; final long o = index.getOffset(indexPosInOffsetOrder[mid]); if (offset < o) { high = mid; } else if (offset == o) { return mid; } else { low = mid + 1; } } return -1; } @Override public ObjectId findObjectByPosition(int nthPosition) { return index.getObjectId(indexPosInOffsetOrder[nthPosition]); } }