You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ChunkIndex.java 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428
  1. /*
  2. * Copyright (C) 2011, Google Inc.
  3. * and other copyright owners as documented in the project's IP log.
  4. *
  5. * This program and the accompanying materials are made available
  6. * under the terms of the Eclipse Distribution License v1.0 which
  7. * accompanies this distribution, is reproduced below, and is
  8. * available at http://www.eclipse.org/org/documents/edl-v10.php
  9. *
  10. * All rights reserved.
  11. *
  12. * Redistribution and use in source and binary forms, with or
  13. * without modification, are permitted provided that the following
  14. * conditions are met:
  15. *
  16. * - Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. *
  19. * - Redistributions in binary form must reproduce the above
  20. * copyright notice, this list of conditions and the following
  21. * disclaimer in the documentation and/or other materials provided
  22. * with the distribution.
  23. *
  24. * - Neither the name of the Eclipse Foundation, Inc. nor the
  25. * names of its contributors may be used to endorse or promote
  26. * products derived from this software without specific prior
  27. * written permission.
  28. *
  29. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  30. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  31. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  32. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  33. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  34. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  35. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  36. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  37. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  38. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  39. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  40. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  41. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  42. */
  43. package org.eclipse.jgit.storage.dht;
  44. import java.text.MessageFormat;
  45. import java.util.Collections;
  46. import java.util.List;
  47. import org.eclipse.jgit.lib.AnyObjectId;
  48. import static org.eclipse.jgit.lib.Constants.*;
  49. import org.eclipse.jgit.lib.ObjectId;
  50. import org.eclipse.jgit.transport.PackedObjectInfo;
  51. import org.eclipse.jgit.util.NB;
  52. /** Index into a {@link PackChunk}. */
  53. public abstract class ChunkIndex {
  54. private static final int V1 = 0x01;
  55. static ChunkIndex fromBytes(ChunkKey key, byte[] index, int ptr, int len)
  56. throws DhtException {
  57. int v = index[ptr] & 0xff;
  58. switch (v) {
  59. case V1: {
  60. final int offsetFormat = index[ptr + 1] & 7;
  61. switch (offsetFormat) {
  62. case 1:
  63. return new Offset1(index, ptr, len, key);
  64. case 2:
  65. return new Offset2(index, ptr, len, key);
  66. case 3:
  67. return new Offset3(index, ptr, len, key);
  68. case 4:
  69. return new Offset4(index, ptr, len, key);
  70. default:
  71. throw new DhtException(MessageFormat.format(
  72. DhtText.get().unsupportedChunkIndex,
  73. Integer.toHexString(NB.decodeUInt16(index, ptr)), key));
  74. }
  75. }
  76. default:
  77. throw new DhtException(MessageFormat.format(
  78. DhtText.get().unsupportedChunkIndex,
  79. Integer.toHexString(v), key));
  80. }
  81. }
  82. /**
  83. * Format the chunk index and return its binary representation.
  84. *
  85. * @param list
  86. * the list of objects that appear in the chunk. This list will
  87. * be sorted in-place if it has more than 1 element.
  88. * @return binary representation of the chunk's objects and their starting
  89. * offsets. The format is private to this class.
  90. */
  91. @SuppressWarnings("null")
  92. static byte[] create(List<? extends PackedObjectInfo> list) {
  93. int cnt = list.size();
  94. sortObjectList(list);
  95. int fanoutFormat = 0;
  96. int[] buckets = null;
  97. if (64 < cnt) {
  98. buckets = new int[256];
  99. for (PackedObjectInfo oe : list)
  100. buckets[oe.getFirstByte()]++;
  101. fanoutFormat = selectFanoutFormat(buckets);
  102. }
  103. int offsetFormat = selectOffsetFormat(list);
  104. byte[] index = new byte[2 // header
  105. + 256 * fanoutFormat // (optional) fanout
  106. + cnt * OBJECT_ID_LENGTH // ids
  107. + cnt * offsetFormat // offsets
  108. ];
  109. index[0] = V1;
  110. index[1] = (byte) ((fanoutFormat << 3) | offsetFormat);
  111. int ptr = 2;
  112. switch (fanoutFormat) {
  113. case 0:
  114. break;
  115. case 1:
  116. for (int i = 0; i < 256; i++, ptr++)
  117. index[ptr] = (byte) buckets[i];
  118. break;
  119. case 2:
  120. for (int i = 0; i < 256; i++, ptr += 2)
  121. NB.encodeInt16(index, ptr, buckets[i]);
  122. break;
  123. case 3:
  124. for (int i = 0; i < 256; i++, ptr += 3)
  125. encodeUInt24(index, ptr, buckets[i]);
  126. break;
  127. case 4:
  128. for (int i = 0; i < 256; i++, ptr += 4)
  129. NB.encodeInt32(index, ptr, buckets[i]);
  130. break;
  131. }
  132. for (PackedObjectInfo oe : list) {
  133. oe.copyRawTo(index, ptr);
  134. ptr += OBJECT_ID_LENGTH;
  135. }
  136. switch (offsetFormat) {
  137. case 1:
  138. for (PackedObjectInfo oe : list)
  139. index[ptr++] = (byte) oe.getOffset();
  140. break;
  141. case 2:
  142. for (PackedObjectInfo oe : list) {
  143. NB.encodeInt16(index, ptr, (int) oe.getOffset());
  144. ptr += 2;
  145. }
  146. break;
  147. case 3:
  148. for (PackedObjectInfo oe : list) {
  149. encodeUInt24(index, ptr, (int) oe.getOffset());
  150. ptr += 3;
  151. }
  152. break;
  153. case 4:
  154. for (PackedObjectInfo oe : list) {
  155. NB.encodeInt32(index, ptr, (int) oe.getOffset());
  156. ptr += 4;
  157. }
  158. break;
  159. }
  160. return index;
  161. }
  162. private static int selectFanoutFormat(int[] buckets) {
  163. int fmt = 1;
  164. int max = 1 << (8 * fmt);
  165. for (int cnt : buckets) {
  166. while (max <= cnt && fmt < 4) {
  167. if (++fmt == 4)
  168. return fmt;
  169. max = 1 << (8 * fmt);
  170. }
  171. }
  172. return fmt;
  173. }
  174. private static int selectOffsetFormat(List<? extends PackedObjectInfo> list) {
  175. int fmt = 1;
  176. int max = 1 << (8 * fmt);
  177. for (PackedObjectInfo oe : list) {
  178. while (max <= oe.getOffset() && fmt < 4) {
  179. if (++fmt == 4)
  180. return fmt;
  181. max = 1 << (8 * fmt);
  182. }
  183. }
  184. return fmt;
  185. }
  186. private static void sortObjectList(List<? extends PackedObjectInfo> list) {
  187. Collections.sort(list);
  188. }
  189. private final byte[] indexBuf;
  190. private final int indexPtr;
  191. private final int indexLen;
  192. private final int[] fanout;
  193. private final int idTable;
  194. private final int offsetTable;
  195. private final int count;
  196. ChunkIndex(byte[] indexBuf, int ptr, int len, ChunkKey key)
  197. throws DhtException {
  198. final int ctl = indexBuf[ptr + 1];
  199. final int fanoutFormat = (ctl >>> 3) & 7;
  200. final int offsetFormat = ctl & 7;
  201. switch (fanoutFormat) {
  202. case 0:
  203. fanout = null; // no fanout, too small
  204. break;
  205. case 1: {
  206. int last = 0;
  207. fanout = new int[256];
  208. for (int i = 0; i < 256; i++) {
  209. last += indexBuf[ptr + 2 + i] & 0xff;
  210. fanout[i] = last;
  211. }
  212. break;
  213. }
  214. case 2: {
  215. int last = 0;
  216. fanout = new int[256];
  217. for (int i = 0; i < 256; i++) {
  218. last += NB.decodeUInt16(indexBuf, ptr + 2 + i * 2);
  219. fanout[i] = last;
  220. }
  221. break;
  222. }
  223. case 3: {
  224. int last = 0;
  225. fanout = new int[256];
  226. for (int i = 0; i < 256; i++) {
  227. last += decodeUInt24(indexBuf, ptr + 2 + i * 3);
  228. fanout[i] = last;
  229. }
  230. break;
  231. }
  232. case 4: {
  233. int last = 0;
  234. fanout = new int[256];
  235. for (int i = 0; i < 256; i++) {
  236. last += NB.decodeInt32(indexBuf, ptr + 2 + i * 4);
  237. fanout[i] = last;
  238. }
  239. break;
  240. }
  241. default:
  242. throw new DhtException(MessageFormat.format(
  243. DhtText.get().unsupportedChunkIndex,
  244. Integer.toHexString(NB.decodeUInt16(indexBuf, ptr)), key));
  245. }
  246. this.indexBuf = indexBuf;
  247. this.indexPtr = ptr;
  248. this.indexLen = len;
  249. this.idTable = indexPtr + 2 + 256 * fanoutFormat;
  250. int recsz = OBJECT_ID_LENGTH + offsetFormat;
  251. this.count = (indexLen - (idTable - indexPtr)) / recsz;
  252. this.offsetTable = idTable + count * OBJECT_ID_LENGTH;
  253. }
  254. /**
  255. * Get the total number of objects described by this index.
  256. *
  257. * @return number of objects in this index and its associated chunk.
  258. */
  259. public final int getObjectCount() {
  260. return count;
  261. }
  262. /**
  263. * Get an ObjectId from this index.
  264. *
  265. * @param nth
  266. * the object to return. Must be in range [0, getObjectCount).
  267. * @return the object id.
  268. */
  269. public final ObjectId getObjectId(int nth) {
  270. return ObjectId.fromRaw(indexBuf, idPosition(nth));
  271. }
  272. /**
  273. * Get the offset of an object in the chunk.
  274. *
  275. * @param nth
  276. * offset to return. Must be in range [0, getObjectCount).
  277. * @return the offset.
  278. */
  279. public final int getOffset(int nth) {
  280. return getOffset(indexBuf, offsetTable, nth);
  281. }
  282. /** @return the size of this index, in bytes. */
  283. int getIndexSize() {
  284. int sz = indexBuf.length;
  285. if (fanout != null)
  286. sz += 12 + 256 * 4;
  287. return sz;
  288. }
  289. /**
  290. * Search for an object in the index.
  291. *
  292. * @param objId
  293. * the object to locate.
  294. * @return offset of the object in the corresponding chunk; -1 if not found.
  295. */
  296. final int findOffset(AnyObjectId objId) {
  297. int hi, lo;
  298. if (fanout != null) {
  299. int fb = objId.getFirstByte();
  300. lo = fb == 0 ? 0 : fanout[fb - 1];
  301. hi = fanout[fb];
  302. } else {
  303. lo = 0;
  304. hi = count;
  305. }
  306. while (lo < hi) {
  307. final int mid = (lo + hi) >>> 1;
  308. final int cmp = objId.compareTo(indexBuf, idPosition(mid));
  309. if (cmp < 0)
  310. hi = mid;
  311. else if (cmp == 0)
  312. return getOffset(mid);
  313. else
  314. lo = mid + 1;
  315. }
  316. return -1;
  317. }
  318. abstract int getOffset(byte[] indexArray, int offsetTableStart, int nth);
  319. private int idPosition(int nth) {
  320. return idTable + (nth * OBJECT_ID_LENGTH);
  321. }
  322. private static class Offset1 extends ChunkIndex {
  323. Offset1(byte[] index, int ptr, int len, ChunkKey key)
  324. throws DhtException {
  325. super(index, ptr, len, key);
  326. }
  327. int getOffset(byte[] index, int offsetTable, int nth) {
  328. return index[offsetTable + nth] & 0xff;
  329. }
  330. }
  331. private static class Offset2 extends ChunkIndex {
  332. Offset2(byte[] index, int ptr, int len, ChunkKey key)
  333. throws DhtException {
  334. super(index, ptr, len, key);
  335. }
  336. int getOffset(byte[] index, int offsetTable, int nth) {
  337. return NB.decodeUInt16(index, offsetTable + (nth * 2));
  338. }
  339. }
  340. private static class Offset3 extends ChunkIndex {
  341. Offset3(byte[] index, int ptr, int len, ChunkKey key)
  342. throws DhtException {
  343. super(index, ptr, len, key);
  344. }
  345. int getOffset(byte[] index, int offsetTable, int nth) {
  346. return decodeUInt24(index, offsetTable + (nth * 3));
  347. }
  348. }
  349. private static class Offset4 extends ChunkIndex {
  350. Offset4(byte[] index, int ptr, int len, ChunkKey key)
  351. throws DhtException {
  352. super(index, ptr, len, key);
  353. }
  354. int getOffset(byte[] index, int offsetTable, int nth) {
  355. return NB.decodeInt32(index, offsetTable + (nth * 4));
  356. }
  357. }
  358. private static void encodeUInt24(byte[] intbuf, int offset, int v) {
  359. intbuf[offset + 2] = (byte) v;
  360. v >>>= 8;
  361. intbuf[offset + 1] = (byte) v;
  362. v >>>= 8;
  363. intbuf[offset] = (byte) v;
  364. }
  365. private static int decodeUInt24(byte[] intbuf, int offset) {
  366. int r = (intbuf[offset] & 0xff) << 8;
  367. r |= intbuf[offset + 1] & 0xff;
  368. r <<= 8;
  369. r |= intbuf[offset + 2] & 0xff;
  370. return r;
  371. }
  372. }