You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

PackIndexWriter.java 9.7KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267
  1. /*
  2. * Copyright (C) 2008, Robin Rosenberg <robin.rosenberg@dewire.com>
  3. * Copyright (C) 2008, Shawn O. Pearce <spearce@spearce.org> and others
  4. *
  5. * This program and the accompanying materials are made available under the
  6. * terms of the Eclipse Distribution License v. 1.0 which is available at
  7. * https://www.eclipse.org/org/documents/edl-v10.php.
  8. *
  9. * SPDX-License-Identifier: BSD-3-Clause
  10. */
  11. package org.eclipse.jgit.internal.storage.file;
  12. import java.io.BufferedOutputStream;
  13. import java.io.IOException;
  14. import java.io.OutputStream;
  15. import java.security.DigestOutputStream;
  16. import java.text.MessageFormat;
  17. import java.util.List;
  18. import org.eclipse.jgit.internal.JGitText;
  19. import org.eclipse.jgit.lib.Constants;
  20. import org.eclipse.jgit.transport.PackedObjectInfo;
  21. import org.eclipse.jgit.util.NB;
  22. /**
  23. * Creates a table of contents to support random access by
  24. * {@link org.eclipse.jgit.internal.storage.file.Pack}.
  25. * <p>
  26. * Pack index files (the <code>.idx</code> suffix in a pack file pair) provides
  27. * random access to any object in the pack by associating an ObjectId to the
  28. * byte offset within the pack where the object's data can be read.
  29. */
  30. public abstract class PackIndexWriter {
  31. /** Magic constant indicating post-version 1 format. */
  32. protected static final byte[] TOC = { -1, 't', 'O', 'c' };
  33. /**
  34. * Create a new writer for the oldest (most widely understood) format.
  35. * <p>
  36. * This method selects an index format that can accurate describe the
  37. * supplied objects and that will be the most compatible format with older
  38. * Git implementations.
  39. * <p>
  40. * Index version 1 is widely recognized by all Git implementations, but
  41. * index version 2 (and later) is not as well recognized as it was
  42. * introduced more than a year later. Index version 1 can only be used if
  43. * the resulting pack file is under 4 gigabytes in size; packs larger than
  44. * that limit must use index version 2.
  45. *
  46. * @param dst
  47. * the stream the index data will be written to. If not already
  48. * buffered it will be automatically wrapped in a buffered
  49. * stream. Callers are always responsible for closing the stream.
  50. * @param objs
  51. * the objects the caller needs to store in the index. Entries
  52. * will be examined until a format can be conclusively selected.
  53. * @return a new writer to output an index file of the requested format to
  54. * the supplied stream.
  55. * @throws java.lang.IllegalArgumentException
  56. * no recognized pack index version can support the supplied
  57. * objects. This is likely a bug in the implementation.
  58. * @see #oldestPossibleFormat(List)
  59. */
  60. public static PackIndexWriter createOldestPossible(final OutputStream dst,
  61. final List<? extends PackedObjectInfo> objs) {
  62. return createVersion(dst, oldestPossibleFormat(objs));
  63. }
  64. /**
  65. * Return the oldest (most widely understood) index format.
  66. * <p>
  67. * This method selects an index format that can accurate describe the
  68. * supplied objects and that will be the most compatible format with older
  69. * Git implementations.
  70. * <p>
  71. * Index version 1 is widely recognized by all Git implementations, but
  72. * index version 2 (and later) is not as well recognized as it was
  73. * introduced more than a year later. Index version 1 can only be used if
  74. * the resulting pack file is under 4 gigabytes in size; packs larger than
  75. * that limit must use index version 2.
  76. *
  77. * @param objs
  78. * the objects the caller needs to store in the index. Entries
  79. * will be examined until a format can be conclusively selected.
  80. * @return the index format.
  81. * @throws java.lang.IllegalArgumentException
  82. * no recognized pack index version can support the supplied
  83. * objects. This is likely a bug in the implementation.
  84. */
  85. public static int oldestPossibleFormat(
  86. final List<? extends PackedObjectInfo> objs) {
  87. for (PackedObjectInfo oe : objs) {
  88. if (!PackIndexWriterV1.canStore(oe))
  89. return 2;
  90. }
  91. return 1;
  92. }
  93. /**
  94. * Create a new writer instance for a specific index format version.
  95. *
  96. * @param dst
  97. * the stream the index data will be written to. If not already
  98. * buffered it will be automatically wrapped in a buffered
  99. * stream. Callers are always responsible for closing the stream.
  100. * @param version
  101. * index format version number required by the caller. Exactly
  102. * this formatted version will be written.
  103. * @return a new writer to output an index file of the requested format to
  104. * the supplied stream.
  105. * @throws java.lang.IllegalArgumentException
  106. * the version requested is not supported by this
  107. * implementation.
  108. */
  109. public static PackIndexWriter createVersion(final OutputStream dst,
  110. final int version) {
  111. switch (version) {
  112. case 1:
  113. return new PackIndexWriterV1(dst);
  114. case 2:
  115. return new PackIndexWriterV2(dst);
  116. default:
  117. throw new IllegalArgumentException(MessageFormat.format(
  118. JGitText.get().unsupportedPackIndexVersion,
  119. Integer.valueOf(version)));
  120. }
  121. }
  122. /** The index data stream we are responsible for creating. */
  123. protected final DigestOutputStream out;
  124. /** A temporary buffer for use during IO to {link #out}. */
  125. protected final byte[] tmp;
  126. /** The entries this writer must pack. */
  127. protected List<? extends PackedObjectInfo> entries;
  128. /** SHA-1 checksum for the entire pack data. */
  129. protected byte[] packChecksum;
  130. /**
  131. * Create a new writer instance.
  132. *
  133. * @param dst
  134. * the stream this instance outputs to. If not already buffered
  135. * it will be automatically wrapped in a buffered stream.
  136. */
  137. protected PackIndexWriter(OutputStream dst) {
  138. out = new DigestOutputStream(dst instanceof BufferedOutputStream ? dst
  139. : new BufferedOutputStream(dst),
  140. Constants.newMessageDigest());
  141. tmp = new byte[4 + Constants.OBJECT_ID_LENGTH];
  142. }
  143. /**
  144. * Write all object entries to the index stream.
  145. * <p>
  146. * After writing the stream passed to the factory is flushed but remains
  147. * open. Callers are always responsible for closing the output stream.
  148. *
  149. * @param toStore
  150. * sorted list of objects to store in the index. The caller must
  151. * have previously sorted the list using
  152. * {@link org.eclipse.jgit.transport.PackedObjectInfo}'s native
  153. * {@link java.lang.Comparable} implementation.
  154. * @param packDataChecksum
  155. * checksum signature of the entire pack data content. This is
  156. * traditionally the last 20 bytes of the pack file's own stream.
  157. * @throws java.io.IOException
  158. * an error occurred while writing to the output stream, or this
  159. * index format cannot store the object data supplied.
  160. */
  161. public void write(final List<? extends PackedObjectInfo> toStore,
  162. final byte[] packDataChecksum) throws IOException {
  163. entries = toStore;
  164. packChecksum = packDataChecksum;
  165. writeImpl();
  166. out.flush();
  167. }
  168. /**
  169. * Writes the index file to {@link #out}.
  170. * <p>
  171. * Implementations should go something like:
  172. *
  173. * <pre>
  174. * writeFanOutTable();
  175. * for (final PackedObjectInfo po : entries)
  176. * writeOneEntry(po);
  177. * writeChecksumFooter();
  178. * </pre>
  179. *
  180. * <p>
  181. * Where the logic for <code>writeOneEntry</code> is specific to the index
  182. * format in use. Additional headers/footers may be used if necessary and
  183. * the {@link #entries} collection may be iterated over more than once if
  184. * necessary. Implementors therefore have complete control over the data.
  185. *
  186. * @throws java.io.IOException
  187. * an error occurred while writing to the output stream, or this
  188. * index format cannot store the object data supplied.
  189. */
  190. protected abstract void writeImpl() throws IOException;
  191. /**
  192. * Output the version 2 (and later) TOC header, with version number.
  193. * <p>
  194. * Post version 1 all index files start with a TOC header that makes the
  195. * file an invalid version 1 file, and then includes the version number.
  196. * This header is necessary to recognize a version 1 from a version 2
  197. * formatted index.
  198. *
  199. * @param version
  200. * version number of this index format being written.
  201. * @throws java.io.IOException
  202. * an error occurred while writing to the output stream.
  203. */
  204. protected void writeTOC(int version) throws IOException {
  205. out.write(TOC);
  206. NB.encodeInt32(tmp, 0, version);
  207. out.write(tmp, 0, 4);
  208. }
  209. /**
  210. * Output the standard 256 entry first-level fan-out table.
  211. * <p>
  212. * The fan-out table is 4 KB in size, holding 256 32-bit unsigned integer
  213. * counts. Each count represents the number of objects within this index
  214. * whose {@link org.eclipse.jgit.lib.ObjectId#getFirstByte()} matches the
  215. * count's position in the fan-out table.
  216. *
  217. * @throws java.io.IOException
  218. * an error occurred while writing to the output stream.
  219. */
  220. protected void writeFanOutTable() throws IOException {
  221. final int[] fanout = new int[256];
  222. for (PackedObjectInfo po : entries)
  223. fanout[po.getFirstByte() & 0xff]++;
  224. for (int i = 1; i < 256; i++)
  225. fanout[i] += fanout[i - 1];
  226. for (int n : fanout) {
  227. NB.encodeInt32(tmp, 0, n);
  228. out.write(tmp, 0, 4);
  229. }
  230. }
  231. /**
  232. * Output the standard two-checksum index footer.
  233. * <p>
  234. * The standard footer contains two checksums (20 byte SHA-1 values):
  235. * <ol>
  236. * <li>Pack data checksum - taken from the last 20 bytes of the pack file.</li>
  237. * <li>Index data checksum - checksum of all index bytes written, including
  238. * the pack data checksum above.</li>
  239. * </ol>
  240. *
  241. * @throws java.io.IOException
  242. * an error occurred while writing to the output stream.
  243. */
  244. protected void writeChecksumFooter() throws IOException {
  245. out.write(packChecksum);
  246. out.on(false);
  247. out.write(out.getMessageDigest().digest());
  248. }
  249. }