You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

PackReverseIndex.java 6.9KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218
  1. /*
  2. * Copyright (C) 2008, Marek Zawirski <marek.zawirski@gmail.com>
  3. * and other copyright owners as documented in the project's IP log.
  4. *
  5. * This program and the accompanying materials are made available
  6. * under the terms of the Eclipse Distribution License v1.0 which
  7. * accompanies this distribution, is reproduced below, and is
  8. * available at http://www.eclipse.org/org/documents/edl-v10.php
  9. *
  10. * All rights reserved.
  11. *
  12. * Redistribution and use in source and binary forms, with or
  13. * without modification, are permitted provided that the following
  14. * conditions are met:
  15. *
  16. * - Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. *
  19. * - Redistributions in binary form must reproduce the above
  20. * copyright notice, this list of conditions and the following
  21. * disclaimer in the documentation and/or other materials provided
  22. * with the distribution.
  23. *
  24. * - Neither the name of the Eclipse Foundation, Inc. nor the
  25. * names of its contributors may be used to endorse or promote
  26. * products derived from this software without specific prior
  27. * written permission.
  28. *
  29. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  30. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  31. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  32. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  33. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  34. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  35. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  36. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  37. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  38. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  39. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  40. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  41. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  42. */
  43. package org.eclipse.jgit.internal.storage.file;
  44. import java.text.MessageFormat;
  45. import org.eclipse.jgit.errors.CorruptObjectException;
  46. import org.eclipse.jgit.internal.JGitText;
  47. import org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry;
  48. import org.eclipse.jgit.lib.ObjectId;
  49. /**
  50. * <p>
  51. * Reverse index for forward pack index. Provides operations based on offset
  52. * instead of object id. Such offset-based reverse lookups are performed in
  53. * O(log n) time.
  54. * </p>
  55. *
  56. * @see PackIndex
  57. * @see PackFile
  58. */
  59. public class PackReverseIndex {
  60. /** Index we were created from, and that has our ObjectId data. */
  61. private final PackIndex index;
  62. /** The number of bytes per entry in the offsetIndex. */
  63. private final long bucketSize;
  64. /**
  65. * An index into the nth mapping, where the value is the position after the
  66. * the last index that contains the values of the bucket. For example given
  67. * offset o (and bucket = o / bucketSize), the offset will be contained in
  68. * the range nth[offsetIndex[bucket - 1]] inclusive to
  69. * nth[offsetIndex[bucket]] exclusive.
  70. *
  71. * See {@link #binarySearch}
  72. */
  73. private final int[] offsetIndex;
  74. /** Mapping from indices in offset order to indices in SHA-1 order. */
  75. private final int[] nth;
  76. /**
  77. * Create reverse index from straight/forward pack index, by indexing all
  78. * its entries.
  79. *
  80. * @param packIndex
  81. * forward index - entries to (reverse) index.
  82. */
  83. public PackReverseIndex(PackIndex packIndex) {
  84. index = packIndex;
  85. final long cnt = index.getObjectCount();
  86. if (cnt + 1 > Integer.MAX_VALUE)
  87. throw new IllegalArgumentException(
  88. JGitText.get().hugeIndexesAreNotSupportedByJgitYet);
  89. if (cnt == 0) {
  90. bucketSize = Long.MAX_VALUE;
  91. offsetIndex = new int[1];
  92. nth = new int[0];
  93. return;
  94. }
  95. final long[] offsetsBySha1 = new long[(int) cnt];
  96. long maxOffset = 0;
  97. int ith = 0;
  98. for (MutableEntry me : index) {
  99. final long o = me.getOffset();
  100. offsetsBySha1[ith++] = o;
  101. if (o > maxOffset)
  102. maxOffset = o;
  103. }
  104. bucketSize = maxOffset / cnt + 1;
  105. int[] bucketIndex = new int[(int) cnt];
  106. int[] bucketValues = new int[(int) cnt + 1];
  107. for (int oi = 0; oi < offsetsBySha1.length; oi++) {
  108. final long o = offsetsBySha1[oi];
  109. final int bucket = (int) (o / bucketSize);
  110. final int bucketValuesPos = oi + 1;
  111. final int current = bucketIndex[bucket];
  112. bucketIndex[bucket] = bucketValuesPos;
  113. bucketValues[bucketValuesPos] = current;
  114. }
  115. int nthByOffset = 0;
  116. nth = new int[offsetsBySha1.length];
  117. offsetIndex = bucketIndex; // Reuse the allocation
  118. for (int bi = 0; bi < bucketIndex.length; bi++) {
  119. final int start = nthByOffset;
  120. // Insertion sort of the values in the bucket.
  121. for (int vi = bucketIndex[bi]; vi > 0; vi = bucketValues[vi]) {
  122. final int nthBySha1 = vi - 1;
  123. final long o = offsetsBySha1[nthBySha1];
  124. int insertion = nthByOffset++;
  125. for (; start < insertion; insertion--) {
  126. if (o > offsetsBySha1[nth[insertion - 1]])
  127. break;
  128. nth[insertion] = nth[insertion - 1];
  129. }
  130. nth[insertion] = nthBySha1;
  131. }
  132. offsetIndex[bi] = nthByOffset;
  133. }
  134. }
  135. /**
  136. * Search for object id with the specified start offset in this pack
  137. * (reverse) index.
  138. *
  139. * @param offset
  140. * start offset of object to find.
  141. * @return object id for this offset, or null if no object was found.
  142. */
  143. public ObjectId findObject(long offset) {
  144. final int ith = binarySearch(offset);
  145. if (ith < 0)
  146. return null;
  147. return index.getObjectId(nth[ith]);
  148. }
  149. /**
  150. * Search for the next offset to the specified offset in this pack (reverse)
  151. * index.
  152. *
  153. * @param offset
  154. * start offset of previous object (must be valid-existing
  155. * offset).
  156. * @param maxOffset
  157. * maximum offset in a pack (returned when there is no next
  158. * offset).
  159. * @return offset of the next object in a pack or maxOffset if provided
  160. * offset was the last one.
  161. * @throws org.eclipse.jgit.errors.CorruptObjectException
  162. * when there is no object with the provided offset.
  163. */
  164. public long findNextOffset(long offset, long maxOffset)
  165. throws CorruptObjectException {
  166. final int ith = binarySearch(offset);
  167. if (ith < 0)
  168. throw new CorruptObjectException(
  169. MessageFormat.format(
  170. JGitText.get().cantFindObjectInReversePackIndexForTheSpecifiedOffset,
  171. Long.valueOf(offset)));
  172. if (ith + 1 == nth.length)
  173. return maxOffset;
  174. return index.getOffset(nth[ith + 1]);
  175. }
  176. int findPostion(long offset) {
  177. return binarySearch(offset);
  178. }
  179. private int binarySearch(long offset) {
  180. int bucket = (int) (offset / bucketSize);
  181. int low = bucket == 0 ? 0 : offsetIndex[bucket - 1];
  182. int high = offsetIndex[bucket];
  183. while (low < high) {
  184. final int mid = (low + high) >>> 1;
  185. final long o = index.getOffset(nth[mid]);
  186. if (offset < o)
  187. high = mid;
  188. else if (offset == o)
  189. return mid;
  190. else
  191. low = mid + 1;
  192. }
  193. return -1;
  194. }
  195. ObjectId findObjectByPosition(int nthPosition) {
  196. return index.getObjectId(nth[nthPosition]);
  197. }
  198. }