You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

PackIndex.java 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359
  1. /*
  2. * Copyright (C) 2008, Marek Zawirski <marek.zawirski@gmail.com>
  3. * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> and others
  4. *
  5. * This program and the accompanying materials are made available under the
  6. * terms of the Eclipse Distribution License v. 1.0 which is available at
  7. * https://www.eclipse.org/org/documents/edl-v10.php.
  8. *
  9. * SPDX-License-Identifier: BSD-3-Clause
  10. */
  11. package org.eclipse.jgit.internal.storage.file;
  12. import java.io.File;
  13. import java.io.FileNotFoundException;
  14. import java.io.IOException;
  15. import java.io.InputStream;
  16. import java.text.MessageFormat;
  17. import java.util.Iterator;
  18. import java.util.Set;
  19. import org.eclipse.jgit.errors.CorruptObjectException;
  20. import org.eclipse.jgit.errors.MissingObjectException;
  21. import org.eclipse.jgit.errors.UnsupportedPackIndexVersionException;
  22. import org.eclipse.jgit.internal.JGitText;
  23. import org.eclipse.jgit.lib.AbbreviatedObjectId;
  24. import org.eclipse.jgit.lib.AnyObjectId;
  25. import org.eclipse.jgit.lib.MutableObjectId;
  26. import org.eclipse.jgit.lib.ObjectId;
  27. import org.eclipse.jgit.lib.ObjectIdSet;
  28. import org.eclipse.jgit.util.IO;
  29. import org.eclipse.jgit.util.NB;
  30. import org.eclipse.jgit.util.io.SilentFileInputStream;
  31. /**
  32. * Access path to locate objects by {@link org.eclipse.jgit.lib.ObjectId} in a
  33. * {@link org.eclipse.jgit.internal.storage.file.Pack}.
  34. * <p>
  35. * Indexes are strictly redundant information in that we can rebuild all of the
  36. * data held in the index file from the on disk representation of the pack file
  37. * itself, but it is faster to access for random requests because data is stored
  38. * by ObjectId.
  39. * </p>
  40. */
  41. public abstract class PackIndex
  42. implements Iterable<PackIndex.MutableEntry>, ObjectIdSet {
  43. /**
  44. * Open an existing pack <code>.idx</code> file for reading.
  45. * <p>
  46. * The format of the file will be automatically detected and a proper access
  47. * implementation for that format will be constructed and returned to the
  48. * caller. The file may or may not be held open by the returned instance.
  49. * </p>
  50. *
  51. * @param idxFile
  52. * existing pack .idx to read.
  53. * @return access implementation for the requested file.
  54. * @throws FileNotFoundException
  55. * the file does not exist.
  56. * @throws java.io.IOException
  57. * the file exists but could not be read due to security errors,
  58. * unrecognized data version, or unexpected data corruption.
  59. */
  60. public static PackIndex open(File idxFile) throws IOException {
  61. try (SilentFileInputStream fd = new SilentFileInputStream(
  62. idxFile)) {
  63. return read(fd);
  64. } catch (IOException ioe) {
  65. throw new IOException(
  66. MessageFormat.format(JGitText.get().unreadablePackIndex,
  67. idxFile.getAbsolutePath()),
  68. ioe);
  69. }
  70. }
  71. /**
  72. * Read an existing pack index file from a buffered stream.
  73. * <p>
  74. * The format of the file will be automatically detected and a proper access
  75. * implementation for that format will be constructed and returned to the
  76. * caller. The file may or may not be held open by the returned instance.
  77. *
  78. * @param fd
  79. * stream to read the index file from. The stream must be
  80. * buffered as some small IOs are performed against the stream.
  81. * The caller is responsible for closing the stream.
  82. * @return a copy of the index in-memory.
  83. * @throws java.io.IOException
  84. * the stream cannot be read.
  85. * @throws org.eclipse.jgit.errors.CorruptObjectException
  86. * the stream does not contain a valid pack index.
  87. */
  88. public static PackIndex read(InputStream fd) throws IOException,
  89. CorruptObjectException {
  90. final byte[] hdr = new byte[8];
  91. IO.readFully(fd, hdr, 0, hdr.length);
  92. if (isTOC(hdr)) {
  93. final int v = NB.decodeInt32(hdr, 4);
  94. switch (v) {
  95. case 2:
  96. return new PackIndexV2(fd);
  97. default:
  98. throw new UnsupportedPackIndexVersionException(v);
  99. }
  100. }
  101. return new PackIndexV1(fd, hdr);
  102. }
  103. private static boolean isTOC(byte[] h) {
  104. final byte[] toc = PackIndexWriter.TOC;
  105. for (int i = 0; i < toc.length; i++)
  106. if (h[i] != toc[i])
  107. return false;
  108. return true;
  109. }
  110. /** Footer checksum applied on the bottom of the pack file. */
  111. protected byte[] packChecksum;
  112. /**
  113. * Determine if an object is contained within the pack file.
  114. *
  115. * @param id
  116. * the object to look for. Must not be null.
  117. * @return true if the object is listed in this index; false otherwise.
  118. */
  119. public boolean hasObject(AnyObjectId id) {
  120. return findOffset(id) != -1;
  121. }
  122. /** {@inheritDoc} */
  123. @Override
  124. public boolean contains(AnyObjectId id) {
  125. return findOffset(id) != -1;
  126. }
  127. /**
  128. * {@inheritDoc}
  129. * <p>
  130. * Provide iterator that gives access to index entries. Note, that iterator
  131. * returns reference to mutable object, the same reference in each call -
  132. * for performance reason. If client needs immutable objects, it must copy
  133. * returned object on its own.
  134. * <p>
  135. * Iterator returns objects in SHA-1 lexicographical order.
  136. * </p>
  137. */
  138. @Override
  139. public abstract Iterator<MutableEntry> iterator();
  140. /**
  141. * Obtain the total number of objects described by this index.
  142. *
  143. * @return number of objects in this index, and likewise in the associated
  144. * pack that this index was generated from.
  145. */
  146. public abstract long getObjectCount();
  147. /**
  148. * Obtain the total number of objects needing 64 bit offsets.
  149. *
  150. * @return number of objects in this index using a 64 bit offset; that is an
  151. * object positioned after the 2 GB position within the file.
  152. */
  153. public abstract long getOffset64Count();
  154. /**
  155. * Get ObjectId for the n-th object entry returned by {@link #iterator()}.
  156. * <p>
  157. * This method is a constant-time replacement for the following loop:
  158. *
  159. * <pre>
  160. * Iterator&lt;MutableEntry&gt; eItr = index.iterator();
  161. * int curPosition = 0;
  162. * while (eItr.hasNext() &amp;&amp; curPosition++ &lt; nthPosition)
  163. * eItr.next();
  164. * ObjectId result = eItr.next().toObjectId();
  165. * </pre>
  166. *
  167. * @param nthPosition
  168. * position within the traversal of {@link #iterator()} that the
  169. * caller needs the object for. The first returned
  170. * {@link org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry}
  171. * is 0, the second is 1, etc.
  172. * @return the ObjectId for the corresponding entry.
  173. */
  174. public abstract ObjectId getObjectId(long nthPosition);
  175. /**
  176. * Get ObjectId for the n-th object entry returned by {@link #iterator()}.
  177. * <p>
  178. * This method is a constant-time replacement for the following loop:
  179. *
  180. * <pre>
  181. * Iterator&lt;MutableEntry&gt; eItr = index.iterator();
  182. * int curPosition = 0;
  183. * while (eItr.hasNext() &amp;&amp; curPosition++ &lt; nthPosition)
  184. * eItr.next();
  185. * ObjectId result = eItr.next().toObjectId();
  186. * </pre>
  187. *
  188. * @param nthPosition
  189. * unsigned 32 bit position within the traversal of
  190. * {@link #iterator()} that the caller needs the object for. The
  191. * first returned
  192. * {@link org.eclipse.jgit.internal.storage.file.PackIndex.MutableEntry}
  193. * is 0, the second is 1, etc. Positions past 2**31-1 are
  194. * negative, but still valid.
  195. * @return the ObjectId for the corresponding entry.
  196. */
  197. public final ObjectId getObjectId(int nthPosition) {
  198. if (nthPosition >= 0)
  199. return getObjectId((long) nthPosition);
  200. final int u31 = nthPosition >>> 1;
  201. final int one = nthPosition & 1;
  202. return getObjectId(((long) u31) << 1 | one);
  203. }
  204. /**
  205. * Get offset in a pack for the n-th object entry returned by
  206. * {@link #iterator()}.
  207. *
  208. * @param nthPosition
  209. * unsigned 32 bit position within the traversal of
  210. * {@link #iterator()} for which the caller needs the offset. The
  211. * first returned {@link MutableEntry} is 0, the second is 1,
  212. * etc. Positions past 2**31-1 are negative, but still valid.
  213. * @return the offset in a pack for the corresponding entry.
  214. */
  215. abstract long getOffset(long nthPosition);
  216. /**
  217. * Locate the file offset position for the requested object.
  218. *
  219. * @param objId
  220. * name of the object to locate within the pack.
  221. * @return offset of the object's header and compressed content; -1 if the
  222. * object does not exist in this index and is thus not stored in the
  223. * associated pack.
  224. */
  225. public abstract long findOffset(AnyObjectId objId);
  226. /**
  227. * Retrieve stored CRC32 checksum of the requested object raw-data
  228. * (including header).
  229. *
  230. * @param objId
  231. * id of object to look for
  232. * @return CRC32 checksum of specified object (at 32 less significant bits)
  233. * @throws org.eclipse.jgit.errors.MissingObjectException
  234. * when requested ObjectId was not found in this index
  235. * @throws java.lang.UnsupportedOperationException
  236. * when this index doesn't support CRC32 checksum
  237. */
  238. public abstract long findCRC32(AnyObjectId objId)
  239. throws MissingObjectException, UnsupportedOperationException;
  240. /**
  241. * Check whether this index supports (has) CRC32 checksums for objects.
  242. *
  243. * @return true if CRC32 is stored, false otherwise
  244. */
  245. public abstract boolean hasCRC32Support();
  246. /**
  247. * Find objects matching the prefix abbreviation.
  248. *
  249. * @param matches
  250. * set to add any located ObjectIds to. This is an output
  251. * parameter.
  252. * @param id
  253. * prefix to search for.
  254. * @param matchLimit
  255. * maximum number of results to return. At most this many
  256. * ObjectIds should be added to matches before returning.
  257. * @throws java.io.IOException
  258. * the index cannot be read.
  259. */
  260. public abstract void resolve(Set<ObjectId> matches, AbbreviatedObjectId id,
  261. int matchLimit) throws IOException;
  262. /**
  263. * @return the checksum of the pack; caller must not modify it
  264. * @since 5.5
  265. */
  266. public byte[] getChecksum() {
  267. return packChecksum;
  268. }
  269. /**
  270. * Represent mutable entry of pack index consisting of object id and offset
  271. * in pack (both mutable).
  272. *
  273. */
  274. public static class MutableEntry {
  275. final MutableObjectId idBuffer = new MutableObjectId();
  276. long offset;
  277. /**
  278. * Returns offset for this index object entry
  279. *
  280. * @return offset of this object in a pack file
  281. */
  282. public long getOffset() {
  283. return offset;
  284. }
  285. /** @return hex string describing the object id of this entry. */
  286. public String name() {
  287. ensureId();
  288. return idBuffer.name();
  289. }
  290. /** @return a copy of the object id. */
  291. public ObjectId toObjectId() {
  292. ensureId();
  293. return idBuffer.toObjectId();
  294. }
  295. /** @return a complete copy of this entry, that won't modify */
  296. public MutableEntry cloneEntry() {
  297. final MutableEntry r = new MutableEntry();
  298. ensureId();
  299. r.idBuffer.fromObjectId(idBuffer);
  300. r.offset = offset;
  301. return r;
  302. }
  303. void ensureId() {
  304. // Override in implementations.
  305. }
  306. }
  307. abstract class EntriesIterator implements Iterator<MutableEntry> {
  308. protected final MutableEntry entry = initEntry();
  309. protected long returnedNumber = 0;
  310. protected abstract MutableEntry initEntry();
  311. @Override
  312. public boolean hasNext() {
  313. return returnedNumber < getObjectCount();
  314. }
  315. /**
  316. * Implementation must update {@link #returnedNumber} before returning
  317. * element.
  318. */
  319. @Override
  320. public abstract MutableEntry next();
  321. @Override
  322. public void remove() {
  323. throw new UnsupportedOperationException();
  324. }
  325. }
  326. }