You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

ChunkIndex.java 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429
  1. /*
  2. * Copyright (C) 2011, Google Inc.
  3. * and other copyright owners as documented in the project's IP log.
  4. *
  5. * This program and the accompanying materials are made available
  6. * under the terms of the Eclipse Distribution License v1.0 which
  7. * accompanies this distribution, is reproduced below, and is
  8. * available at http://www.eclipse.org/org/documents/edl-v10.php
  9. *
  10. * All rights reserved.
  11. *
  12. * Redistribution and use in source and binary forms, with or
  13. * without modification, are permitted provided that the following
  14. * conditions are met:
  15. *
  16. * - Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. *
  19. * - Redistributions in binary form must reproduce the above
  20. * copyright notice, this list of conditions and the following
  21. * disclaimer in the documentation and/or other materials provided
  22. * with the distribution.
  23. *
  24. * - Neither the name of the Eclipse Foundation, Inc. nor the
  25. * names of its contributors may be used to endorse or promote
  26. * products derived from this software without specific prior
  27. * written permission.
  28. *
  29. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  30. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  31. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  32. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  33. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  34. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  35. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  36. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  37. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  38. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  39. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  40. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  41. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  42. */
  43. package org.eclipse.jgit.storage.dht;
  44. import java.text.MessageFormat;
  45. import java.util.Collections;
  46. import java.util.List;
  47. import org.eclipse.jgit.lib.AnyObjectId;
  48. import static org.eclipse.jgit.lib.Constants.*;
  49. import org.eclipse.jgit.lib.ObjectId;
  50. import org.eclipse.jgit.transport.PackedObjectInfo;
  51. import org.eclipse.jgit.util.NB;
  52. /** Index into a {@link PackChunk}. */
  53. public abstract class ChunkIndex {
  54. private static final int V1 = 0x01;
  55. static ChunkIndex fromBytes(ChunkKey key, byte[] index, int ptr, int len)
  56. throws DhtException {
  57. int v = index[ptr] & 0xff;
  58. switch (v) {
  59. case V1: {
  60. final int offsetFormat = index[ptr + 1] & 7;
  61. switch (offsetFormat) {
  62. case 1:
  63. return new Offset1(index, ptr, len, key);
  64. case 2:
  65. return new Offset2(index, ptr, len, key);
  66. case 3:
  67. return new Offset3(index, ptr, len, key);
  68. case 4:
  69. return new Offset4(index, ptr, len, key);
  70. default:
  71. throw new DhtException(MessageFormat.format(
  72. DhtText.get().unsupportedChunkIndex,
  73. Integer.toHexString(NB.decodeUInt16(index, ptr)), key));
  74. }
  75. }
  76. default:
  77. throw new DhtException(MessageFormat.format(
  78. DhtText.get().unsupportedChunkIndex,
  79. Integer.toHexString(v), key));
  80. }
  81. }
  82. /**
  83. * Format the chunk index and return its binary representation.
  84. *
  85. * @param list
  86. * the list of objects that appear in the chunk. This list will
  87. * be sorted in-place if it has more than 1 element.
  88. * @return binary representation of the chunk's objects and their starting
  89. * offsets. The format is private to this class.
  90. */
  91. @SuppressWarnings("null")
  92. static byte[] create(List<? extends PackedObjectInfo> list) {
  93. int cnt = list.size();
  94. sortObjectList(list);
  95. int fanoutFormat = 0;
  96. int[] buckets = null;
  97. if (64 < cnt) {
  98. buckets = new int[256];
  99. for (PackedObjectInfo oe : list)
  100. buckets[oe.getFirstByte()]++;
  101. fanoutFormat = selectFanoutFormat(buckets);
  102. }
  103. int offsetFormat = selectOffsetFormat(list);
  104. byte[] index = new byte[2 // header
  105. + 256 * fanoutFormat // (optional) fanout
  106. + cnt * OBJECT_ID_LENGTH // ids
  107. + cnt * offsetFormat // offsets
  108. ];
  109. index[0] = V1;
  110. index[1] = (byte) ((fanoutFormat << 3) | offsetFormat);
  111. int ptr = 2;
  112. switch (fanoutFormat) {
  113. case 0:
  114. break;
  115. case 1:
  116. for (int i = 0; i < 256; i++, ptr++)
  117. index[ptr] = (byte) buckets[i];
  118. break;
  119. case 2:
  120. for (int i = 0; i < 256; i++, ptr += 2)
  121. NB.encodeInt16(index, ptr, buckets[i]);
  122. break;
  123. case 3:
  124. for (int i = 0; i < 256; i++, ptr += 3)
  125. encodeUInt24(index, ptr, buckets[i]);
  126. break;
  127. case 4:
  128. for (int i = 0; i < 256; i++, ptr += 4)
  129. NB.encodeInt32(index, ptr, buckets[i]);
  130. break;
  131. }
  132. for (PackedObjectInfo oe : list) {
  133. oe.copyRawTo(index, ptr);
  134. ptr += OBJECT_ID_LENGTH;
  135. }
  136. switch (offsetFormat) {
  137. case 1:
  138. for (PackedObjectInfo oe : list)
  139. index[ptr++] = (byte) oe.getOffset();
  140. break;
  141. case 2:
  142. for (PackedObjectInfo oe : list) {
  143. NB.encodeInt16(index, ptr, (int) oe.getOffset());
  144. ptr += 2;
  145. }
  146. break;
  147. case 3:
  148. for (PackedObjectInfo oe : list) {
  149. encodeUInt24(index, ptr, (int) oe.getOffset());
  150. ptr += 3;
  151. }
  152. break;
  153. case 4:
  154. for (PackedObjectInfo oe : list) {
  155. NB.encodeInt32(index, ptr, (int) oe.getOffset());
  156. ptr += 4;
  157. }
  158. break;
  159. }
  160. return index;
  161. }
  162. private static int selectFanoutFormat(int[] buckets) {
  163. int fmt = 1;
  164. int max = 1 << (8 * fmt);
  165. for (int cnt : buckets) {
  166. while (max <= cnt && fmt < 4) {
  167. if (++fmt == 4)
  168. return fmt;
  169. max = 1 << (8 * fmt);
  170. }
  171. }
  172. return fmt;
  173. }
  174. private static int selectOffsetFormat(List<? extends PackedObjectInfo> list) {
  175. int fmt = 1;
  176. int max = 1 << (8 * fmt);
  177. for (PackedObjectInfo oe : list) {
  178. while (max <= oe.getOffset() && fmt < 4) {
  179. if (++fmt == 4)
  180. return fmt;
  181. max = 1 << (8 * fmt);
  182. }
  183. }
  184. return fmt;
  185. }
  186. @SuppressWarnings("unchecked")
  187. private static void sortObjectList(List<? extends PackedObjectInfo> list) {
  188. Collections.sort(list);
  189. }
  190. private final byte[] indexBuf;
  191. private final int indexPtr;
  192. private final int indexLen;
  193. private final int[] fanout;
  194. private final int idTable;
  195. private final int offsetTable;
  196. private final int count;
  197. ChunkIndex(byte[] indexBuf, int ptr, int len, ChunkKey key)
  198. throws DhtException {
  199. final int ctl = indexBuf[ptr + 1];
  200. final int fanoutFormat = (ctl >>> 3) & 7;
  201. final int offsetFormat = ctl & 7;
  202. switch (fanoutFormat) {
  203. case 0:
  204. fanout = null; // no fanout, too small
  205. break;
  206. case 1: {
  207. int last = 0;
  208. fanout = new int[256];
  209. for (int i = 0; i < 256; i++) {
  210. last += indexBuf[ptr + 2 + i] & 0xff;
  211. fanout[i] = last;
  212. }
  213. break;
  214. }
  215. case 2: {
  216. int last = 0;
  217. fanout = new int[256];
  218. for (int i = 0; i < 256; i++) {
  219. last += NB.decodeUInt16(indexBuf, ptr + 2 + i * 2);
  220. fanout[i] = last;
  221. }
  222. break;
  223. }
  224. case 3: {
  225. int last = 0;
  226. fanout = new int[256];
  227. for (int i = 0; i < 256; i++) {
  228. last += decodeUInt24(indexBuf, ptr + 2 + i * 3);
  229. fanout[i] = last;
  230. }
  231. break;
  232. }
  233. case 4: {
  234. int last = 0;
  235. fanout = new int[256];
  236. for (int i = 0; i < 256; i++) {
  237. last += NB.decodeInt32(indexBuf, ptr + 2 + i * 4);
  238. fanout[i] = last;
  239. }
  240. break;
  241. }
  242. default:
  243. throw new DhtException(MessageFormat.format(
  244. DhtText.get().unsupportedChunkIndex,
  245. Integer.toHexString(NB.decodeUInt16(indexBuf, ptr)), key));
  246. }
  247. this.indexBuf = indexBuf;
  248. this.indexPtr = ptr;
  249. this.indexLen = len;
  250. this.idTable = indexPtr + 2 + 256 * fanoutFormat;
  251. int recsz = OBJECT_ID_LENGTH + offsetFormat;
  252. this.count = (indexLen - (idTable - indexPtr)) / recsz;
  253. this.offsetTable = idTable + count * OBJECT_ID_LENGTH;
  254. }
  255. /**
  256. * Get the total number of objects described by this index.
  257. *
  258. * @return number of objects in this index and its associated chunk.
  259. */
  260. public final int getObjectCount() {
  261. return count;
  262. }
  263. /**
  264. * Get an ObjectId from this index.
  265. *
  266. * @param nth
  267. * the object to return. Must be in range [0, getObjectCount).
  268. * @return the object id.
  269. */
  270. public final ObjectId getObjectId(int nth) {
  271. return ObjectId.fromRaw(indexBuf, idPosition(nth));
  272. }
  273. /**
  274. * Get the offset of an object in the chunk.
  275. *
  276. * @param nth
  277. * offset to return. Must be in range [0, getObjectCount).
  278. * @return the offset.
  279. */
  280. public final int getOffset(int nth) {
  281. return getOffset(indexBuf, offsetTable, nth);
  282. }
  283. /** @return the size of this index, in bytes. */
  284. int getIndexSize() {
  285. int sz = indexBuf.length;
  286. if (fanout != null)
  287. sz += 12 + 256 * 4;
  288. return sz;
  289. }
  290. /**
  291. * Search for an object in the index.
  292. *
  293. * @param objId
  294. * the object to locate.
  295. * @return offset of the object in the corresponding chunk; -1 if not found.
  296. */
  297. final int findOffset(AnyObjectId objId) {
  298. int hi, lo;
  299. if (fanout != null) {
  300. int fb = objId.getFirstByte();
  301. lo = fb == 0 ? 0 : fanout[fb - 1];
  302. hi = fanout[fb];
  303. } else {
  304. lo = 0;
  305. hi = count;
  306. }
  307. while (lo < hi) {
  308. final int mid = (lo + hi) >>> 1;
  309. final int cmp = objId.compareTo(indexBuf, idPosition(mid));
  310. if (cmp < 0)
  311. hi = mid;
  312. else if (cmp == 0)
  313. return getOffset(mid);
  314. else
  315. lo = mid + 1;
  316. }
  317. return -1;
  318. }
  319. abstract int getOffset(byte[] indexArray, int offsetTableStart, int nth);
  320. private int idPosition(int nth) {
  321. return idTable + (nth * OBJECT_ID_LENGTH);
  322. }
  323. private static class Offset1 extends ChunkIndex {
  324. Offset1(byte[] index, int ptr, int len, ChunkKey key)
  325. throws DhtException {
  326. super(index, ptr, len, key);
  327. }
  328. int getOffset(byte[] index, int offsetTable, int nth) {
  329. return index[offsetTable + nth] & 0xff;
  330. }
  331. }
  332. private static class Offset2 extends ChunkIndex {
  333. Offset2(byte[] index, int ptr, int len, ChunkKey key)
  334. throws DhtException {
  335. super(index, ptr, len, key);
  336. }
  337. int getOffset(byte[] index, int offsetTable, int nth) {
  338. return NB.decodeUInt16(index, offsetTable + (nth * 2));
  339. }
  340. }
  341. private static class Offset3 extends ChunkIndex {
  342. Offset3(byte[] index, int ptr, int len, ChunkKey key)
  343. throws DhtException {
  344. super(index, ptr, len, key);
  345. }
  346. int getOffset(byte[] index, int offsetTable, int nth) {
  347. return decodeUInt24(index, offsetTable + (nth * 3));
  348. }
  349. }
  350. private static class Offset4 extends ChunkIndex {
  351. Offset4(byte[] index, int ptr, int len, ChunkKey key)
  352. throws DhtException {
  353. super(index, ptr, len, key);
  354. }
  355. int getOffset(byte[] index, int offsetTable, int nth) {
  356. return NB.decodeInt32(index, offsetTable + (nth * 4));
  357. }
  358. }
  359. private static void encodeUInt24(byte[] intbuf, int offset, int v) {
  360. intbuf[offset + 2] = (byte) v;
  361. v >>>= 8;
  362. intbuf[offset + 1] = (byte) v;
  363. v >>>= 8;
  364. intbuf[offset] = (byte) v;
  365. }
  366. private static int decodeUInt24(byte[] intbuf, int offset) {
  367. int r = (intbuf[offset] & 0xff) << 8;
  368. r |= intbuf[offset + 1] & 0xff;
  369. r <<= 8;
  370. r |= intbuf[offset + 2] & 0xff;
  371. return r;
  372. }
  373. }