You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

OffsetCache.java 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539
  1. /*
  2. * Copyright (C) 2009, Google Inc.
  3. * and other copyright owners as documented in the project's IP log.
  4. *
  5. * This program and the accompanying materials are made available
  6. * under the terms of the Eclipse Distribution License v1.0 which
  7. * accompanies this distribution, is reproduced below, and is
  8. * available at http://www.eclipse.org/org/documents/edl-v10.php
  9. *
  10. * All rights reserved.
  11. *
  12. * Redistribution and use in source and binary forms, with or
  13. * without modification, are permitted provided that the following
  14. * conditions are met:
  15. *
  16. * - Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. *
  19. * - Redistributions in binary form must reproduce the above
  20. * copyright notice, this list of conditions and the following
  21. * disclaimer in the documentation and/or other materials provided
  22. * with the distribution.
  23. *
  24. * - Neither the name of the Eclipse Foundation, Inc. nor the
  25. * names of its contributors may be used to endorse or promote
  26. * products derived from this software without specific prior
  27. * written permission.
  28. *
  29. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  30. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  31. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  32. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  33. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  34. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  35. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  36. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  37. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  38. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  39. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  40. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  41. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  42. */
  43. package org.eclipse.jgit.lib;
  44. import java.io.IOException;
  45. import java.lang.ref.ReferenceQueue;
  46. import java.lang.ref.SoftReference;
  47. import java.util.Random;
  48. import java.util.concurrent.atomic.AtomicLong;
  49. import java.util.concurrent.atomic.AtomicReferenceArray;
  50. import java.util.concurrent.locks.ReentrantLock;
  51. /**
  52. * Least frequently used cache for objects specified by PackFile positions.
  53. * <p>
  54. * This cache maps a <code>({@link PackFile},position)</code> tuple to an Object.
  55. * <p>
  56. * This cache is suitable for objects that are "relative expensive" to compute
  57. * from the underlying PackFile, given some known position in that file.
  58. * <p>
  59. * Whenever a cache miss occurs, {@link #load(PackFile, long)} is invoked by
  60. * exactly one thread for the given <code>(PackFile,position)</code> key tuple.
  61. * This is ensured by an array of locks, with the tuple hashed to a lock
  62. * instance.
  63. * <p>
  64. * During a miss, older entries are evicted from the cache so long as
  65. * {@link #isFull()} returns true.
  66. * <p>
  67. * Its too expensive during object access to be 100% accurate with a least
  68. * recently used (LRU) algorithm. Strictly ordering every read is a lot of
  69. * overhead that typically doesn't yield a corresponding benefit to the
  70. * application.
  71. * <p>
  72. * This cache implements a loose LRU policy by randomly picking a window
  73. * comprised of roughly 10% of the cache, and evicting the oldest accessed entry
  74. * within that window.
  75. * <p>
  76. * Entities created by the cache are held under SoftReferences, permitting the
  77. * Java runtime's garbage collector to evict entries when heap memory gets low.
  78. * Most JREs implement a loose least recently used algorithm for this eviction.
  79. * <p>
  80. * The internal hash table does not expand at runtime, instead it is fixed in
  81. * size at cache creation time. The internal lock table used to gate load
  82. * invocations is also fixed in size.
  83. * <p>
  84. * The key tuple is passed through to methods as a pair of parameters rather
  85. * than as a single Object, thus reducing the transient memory allocations of
  86. * callers. It is more efficient to avoid the allocation, as we can't be 100%
  87. * sure that a JIT would be able to stack-allocate a key tuple.
  88. * <p>
  89. * This cache has an implementation rule such that:
  90. * <ul>
  91. * <li>{@link #load(PackFile, long)} is invoked by at most one thread at a time
  92. * for a given <code>(PackFile,position)</code> tuple.</li>
  93. * <li>For every <code>load()</code> invocation there is exactly one
  94. * {@link #createRef(PackFile, long, Object)} invocation to wrap a SoftReference
  95. * around the cached entity.</li>
  96. * <li>For every Reference created by <code>createRef()</code> there will be
  97. * exactly one call to {@link #clear(Ref)} to cleanup any resources associated
  98. * with the (now expired) cached entity.</li>
  99. * </ul>
  100. * <p>
  101. * Therefore, it is safe to perform resource accounting increments during the
  102. * {@link #load(PackFile, long)} or {@link #createRef(PackFile, long, Object)}
  103. * methods, and matching decrements during {@link #clear(Ref)}. Implementors may
  104. * need to override {@link #createRef(PackFile, long, Object)} in order to embed
  105. * additional accounting information into an implementation specific
  106. * {@link OffsetCache.Ref} subclass, as the cached entity may have already been
  107. * evicted by the JRE's garbage collector.
  108. * <p>
  109. * To maintain higher concurrency workloads, during eviction only one thread
  110. * performs the eviction work, while other threads can continue to insert new
  111. * objects in parallel. This means that the cache can be temporarily over limit,
  112. * especially if the nominated eviction thread is being starved relative to the
  113. * other threads.
  114. *
  115. * @param <V>
  116. * type of value stored in the cache.
  117. * @param <R>
  118. * type of {@link OffsetCache.Ref} subclass used by the cache.
  119. */
  120. abstract class OffsetCache<V, R extends OffsetCache.Ref<V>> {
  121. private static final Random rng = new Random();
  122. /** ReferenceQueue that {@link #createRef(PackFile, long, Object)} must use. */
  123. protected final ReferenceQueue<V> queue;
  124. /** Number of entries in {@link #table}. */
  125. private final int tableSize;
  126. /** Access clock for loose LRU. */
  127. private final AtomicLong clock;
  128. /** Hash bucket directory; entries are chained below. */
  129. private final AtomicReferenceArray<Entry<V>> table;
  130. /** Locks to prevent concurrent loads for same (PackFile,position). */
  131. private final Lock[] locks;
  132. /** Lock to elect the eviction thread after a load occurs. */
  133. private final ReentrantLock evictLock;
  134. /** Number of {@link #table} buckets to scan for an eviction window. */
  135. private final int evictBatch;
  136. /**
  137. * Create a new cache with a fixed size entry table and lock table.
  138. *
  139. * @param tSize
  140. * number of entries in the entry hash table.
  141. * @param lockCount
  142. * number of entries in the lock table. This is the maximum
  143. * concurrency rate for creation of new objects through
  144. * {@link #load(PackFile, long)} invocations.
  145. */
  146. OffsetCache(final int tSize, final int lockCount) {
  147. if (tSize < 1)
  148. throw new IllegalArgumentException("tSize must be >= 1");
  149. if (lockCount < 1)
  150. throw new IllegalArgumentException("lockCount must be >= 1");
  151. queue = new ReferenceQueue<V>();
  152. tableSize = tSize;
  153. clock = new AtomicLong(1);
  154. table = new AtomicReferenceArray<Entry<V>>(tableSize);
  155. locks = new Lock[lockCount];
  156. for (int i = 0; i < locks.length; i++)
  157. locks[i] = new Lock();
  158. evictLock = new ReentrantLock();
  159. int eb = (int) (tableSize * .1);
  160. if (64 < eb)
  161. eb = 64;
  162. else if (eb < 4)
  163. eb = 4;
  164. if (tableSize < eb)
  165. eb = tableSize;
  166. evictBatch = eb;
  167. }
  168. /**
  169. * Lookup a cached object, creating and loading it if it doesn't exist.
  170. *
  171. * @param pack
  172. * the pack that "contains" the cached object.
  173. * @param position
  174. * offset within <code>pack</code> of the object.
  175. * @return the object reference.
  176. * @throws IOException
  177. * the object reference was not in the cache and could not be
  178. * obtained by {@link #load(PackFile, long)}.
  179. */
  180. V getOrLoad(final PackFile pack, final long position) throws IOException {
  181. final int slot = slot(pack, position);
  182. final Entry<V> e1 = table.get(slot);
  183. V v = scan(e1, pack, position);
  184. if (v != null)
  185. return v;
  186. synchronized (lock(pack, position)) {
  187. Entry<V> e2 = table.get(slot);
  188. if (e2 != e1) {
  189. v = scan(e2, pack, position);
  190. if (v != null)
  191. return v;
  192. }
  193. v = load(pack, position);
  194. final Ref<V> ref = createRef(pack, position, v);
  195. hit(ref);
  196. for (;;) {
  197. final Entry<V> n = new Entry<V>(clean(e2), ref);
  198. if (table.compareAndSet(slot, e2, n))
  199. break;
  200. e2 = table.get(slot);
  201. }
  202. }
  203. if (evictLock.tryLock()) {
  204. try {
  205. gc();
  206. evict();
  207. } finally {
  208. evictLock.unlock();
  209. }
  210. }
  211. return v;
  212. }
  213. private V scan(Entry<V> n, final PackFile pack, final long position) {
  214. for (; n != null; n = n.next) {
  215. final Ref<V> r = n.ref;
  216. if (r.pack == pack && r.position == position) {
  217. final V v = r.get();
  218. if (v != null) {
  219. hit(r);
  220. return v;
  221. }
  222. n.kill();
  223. break;
  224. }
  225. }
  226. return null;
  227. }
  228. private void hit(final Ref<V> r) {
  229. // We don't need to be 100% accurate here. Its sufficient that at least
  230. // one thread performs the increment. Any other concurrent access at
  231. // exactly the same time can simply use the same clock value.
  232. //
  233. // Consequently we attempt the set, but we don't try to recover should
  234. // it fail. This is why we don't use getAndIncrement() here.
  235. //
  236. final long c = clock.get();
  237. clock.compareAndSet(c, c + 1);
  238. r.lastAccess = c;
  239. }
  240. private void evict() {
  241. while (isFull()) {
  242. int ptr = rng.nextInt(tableSize);
  243. Entry<V> old = null;
  244. int slot = 0;
  245. for (int b = evictBatch - 1; b >= 0; b--, ptr++) {
  246. if (tableSize <= ptr)
  247. ptr = 0;
  248. for (Entry<V> e = table.get(ptr); e != null; e = e.next) {
  249. if (e.dead)
  250. continue;
  251. if (old == null || e.ref.lastAccess < old.ref.lastAccess) {
  252. old = e;
  253. slot = ptr;
  254. }
  255. }
  256. }
  257. if (old != null) {
  258. old.kill();
  259. gc();
  260. final Entry<V> e1 = table.get(slot);
  261. table.compareAndSet(slot, e1, clean(e1));
  262. }
  263. }
  264. }
  265. /**
  266. * Clear every entry from the cache.
  267. *<p>
  268. * This is a last-ditch effort to clear out the cache, such as before it
  269. * gets replaced by another cache that is configured differently. This
  270. * method tries to force every cached entry through {@link #clear(Ref)} to
  271. * ensure that resources are correctly accounted for and cleaned up by the
  272. * subclass. A concurrent reader loading entries while this method is
  273. * running may cause resource accounting failures.
  274. */
  275. void removeAll() {
  276. for (int s = 0; s < tableSize; s++) {
  277. Entry<V> e1;
  278. do {
  279. e1 = table.get(s);
  280. for (Entry<V> e = e1; e != null; e = e.next)
  281. e.kill();
  282. } while (!table.compareAndSet(s, e1, null));
  283. }
  284. gc();
  285. }
  286. /**
  287. * Clear all entries related to a single file.
  288. * <p>
  289. * Typically this method is invoked during {@link PackFile#close()}, when we
  290. * know the pack is never going to be useful to us again (for example, it no
  291. * longer exists on disk). A concurrent reader loading an entry from this
  292. * same pack may cause the pack to become stuck in the cache anyway.
  293. *
  294. * @param pack
  295. * the file to purge all entries of.
  296. */
  297. void removeAll(final PackFile pack) {
  298. for (int s = 0; s < tableSize; s++) {
  299. final Entry<V> e1 = table.get(s);
  300. boolean hasDead = false;
  301. for (Entry<V> e = e1; e != null; e = e.next) {
  302. if (e.ref.pack == pack) {
  303. e.kill();
  304. hasDead = true;
  305. } else if (e.dead)
  306. hasDead = true;
  307. }
  308. if (hasDead)
  309. table.compareAndSet(s, e1, clean(e1));
  310. }
  311. gc();
  312. }
  313. /**
  314. * Materialize an object that doesn't yet exist in the cache.
  315. * <p>
  316. * This method is invoked by {@link #getOrLoad(PackFile, long)} when the
  317. * specified entity does not yet exist in the cache. Internal locking
  318. * ensures that at most one thread can call this method for each unique
  319. * <code>(pack,position)</code>, but multiple threads can call this method
  320. * concurrently for different <code>(pack,position)</code> tuples.
  321. *
  322. * @param pack
  323. * the file to materialize the entry from.
  324. * @param position
  325. * offset within the file of the entry.
  326. * @return the materialized object. Must never be null.
  327. * @throws IOException
  328. * the method was unable to materialize the object for this
  329. * input pair. The usual reasons would be file corruption, file
  330. * not found, out of file descriptors, etc.
  331. */
  332. protected abstract V load(PackFile pack, long position) throws IOException;
  333. /**
  334. * Construct a Ref (SoftReference) around a cached entity.
  335. * <p>
  336. * Implementing this is only necessary if the subclass is performing
  337. * resource accounting during {@link #load(PackFile, long)} and
  338. * {@link #clear(Ref)} requires some information to update the accounting.
  339. * <p>
  340. * Implementors <b>MUST</b> ensure that the returned reference uses the
  341. * {@link #queue} ReferenceQueue, otherwise {@link #clear(Ref)} will not be
  342. * invoked at the proper time.
  343. *
  344. * @param pack
  345. * the file to materialize the entry from.
  346. * @param position
  347. * offset within the file of the entry.
  348. * @param v
  349. * the object returned by {@link #load(PackFile, long)}.
  350. * @return a soft reference subclass wrapped around <code>v</code>.
  351. */
  352. @SuppressWarnings("unchecked")
  353. protected R createRef(final PackFile pack, final long position, final V v) {
  354. return (R) new Ref<V>(pack, position, v, queue);
  355. }
  356. /**
  357. * Update accounting information now that an object has left the cache.
  358. * <p>
  359. * This method is invoked exactly once for the combined
  360. * {@link #load(PackFile, long)} and
  361. * {@link #createRef(PackFile, long, Object)} invocation pair that was used
  362. * to construct and insert an object into the cache.
  363. *
  364. * @param ref
  365. * the reference wrapped around the object. Implementations must
  366. * be prepared for <code>ref.get()</code> to return null.
  367. */
  368. protected void clear(final R ref) {
  369. // Do nothing by default.
  370. }
  371. /**
  372. * Determine if the cache is full and requires eviction of entries.
  373. * <p>
  374. * By default this method returns false. Implementors may override to
  375. * consult with the accounting updated by {@link #load(PackFile, long)},
  376. * {@link #createRef(PackFile, long, Object)} and {@link #clear(Ref)}.
  377. *
  378. * @return true if the cache is still over-limit and requires eviction of
  379. * more entries.
  380. */
  381. protected boolean isFull() {
  382. return false;
  383. }
  384. @SuppressWarnings("unchecked")
  385. private void gc() {
  386. R r;
  387. while ((r = (R) queue.poll()) != null) {
  388. // Sun's Java 5 and 6 implementation have a bug where a Reference
  389. // can be enqueued and dequeued twice on the same reference queue
  390. // due to a race condition within ReferenceQueue.enqueue(Reference).
  391. //
  392. // http://bugs.sun.com/bugdatabase/view_bug.do?bug_id=6837858
  393. //
  394. // We CANNOT permit a Reference to come through us twice, as it will
  395. // skew the resource counters we maintain. Our canClear() check here
  396. // provides a way to skip the redundant dequeues, if any.
  397. //
  398. if (r.canClear()) {
  399. clear(r);
  400. boolean found = false;
  401. final int s = slot(r.pack, r.position);
  402. final Entry<V> e1 = table.get(s);
  403. for (Entry<V> n = e1; n != null; n = n.next) {
  404. if (n.ref == r) {
  405. n.dead = true;
  406. found = true;
  407. break;
  408. }
  409. }
  410. if (found)
  411. table.compareAndSet(s, e1, clean(e1));
  412. }
  413. }
  414. }
  415. /**
  416. * Compute the hash code value for a <code>(PackFile,position)</code> tuple.
  417. * <p>
  418. * For example, <code>return packHash + (int) (position >>> 4)</code>.
  419. * Implementors must override with a suitable hash (for example, a different
  420. * right shift on the position).
  421. *
  422. * @param packHash
  423. * hash code for the file being accessed.
  424. * @param position
  425. * position within the file being accessed.
  426. * @return a reasonable hash code mixing the two values.
  427. */
  428. protected abstract int hash(int packHash, long position);
  429. private int slot(final PackFile pack, final long position) {
  430. return (hash(pack.hash, position) >>> 1) % tableSize;
  431. }
  432. private Lock lock(final PackFile pack, final long position) {
  433. return locks[(hash(pack.hash, position) >>> 1) % locks.length];
  434. }
  435. private static <V> Entry<V> clean(Entry<V> top) {
  436. while (top != null && top.dead) {
  437. top.ref.enqueue();
  438. top = top.next;
  439. }
  440. if (top == null)
  441. return null;
  442. final Entry<V> n = clean(top.next);
  443. return n == top.next ? top : new Entry<V>(n, top.ref);
  444. }
  445. private static class Entry<V> {
  446. /** Next entry in the hash table's chain list. */
  447. final Entry<V> next;
  448. /** The referenced object. */
  449. final Ref<V> ref;
  450. /**
  451. * Marked true when ref.get() returns null and the ref is dead.
  452. * <p>
  453. * A true here indicates that the ref is no longer accessible, and that
  454. * we therefore need to eventually purge this Entry object out of the
  455. * bucket's chain.
  456. */
  457. volatile boolean dead;
  458. Entry(final Entry<V> n, final Ref<V> r) {
  459. next = n;
  460. ref = r;
  461. }
  462. final void kill() {
  463. dead = true;
  464. ref.enqueue();
  465. }
  466. }
  467. /**
  468. * A soft reference wrapped around a cached object.
  469. *
  470. * @param <V>
  471. * type of the cached object.
  472. */
  473. protected static class Ref<V> extends SoftReference<V> {
  474. final PackFile pack;
  475. final long position;
  476. long lastAccess;
  477. private boolean cleared;
  478. protected Ref(final PackFile pack, final long position, final V v,
  479. final ReferenceQueue<V> queue) {
  480. super(v, queue);
  481. this.pack = pack;
  482. this.position = position;
  483. }
  484. final synchronized boolean canClear() {
  485. if (cleared)
  486. return false;
  487. cleared = true;
  488. return true;
  489. }
  490. }
  491. private static final class Lock {
  492. // Used only for its implicit monitor.
  493. }
  494. }