You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

DfsObjDatabase.java 15KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. /*
  2. * Copyright (C) 2011, Google Inc.
  3. * and other copyright owners as documented in the project's IP log.
  4. *
  5. * This program and the accompanying materials are made available
  6. * under the terms of the Eclipse Distribution License v1.0 which
  7. * accompanies this distribution, is reproduced below, and is
  8. * available at http://www.eclipse.org/org/documents/edl-v10.php
  9. *
  10. * All rights reserved.
  11. *
  12. * Redistribution and use in source and binary forms, with or
  13. * without modification, are permitted provided that the following
  14. * conditions are met:
  15. *
  16. * - Redistributions of source code must retain the above copyright
  17. * notice, this list of conditions and the following disclaimer.
  18. *
  19. * - Redistributions in binary form must reproduce the above
  20. * copyright notice, this list of conditions and the following
  21. * disclaimer in the documentation and/or other materials provided
  22. * with the distribution.
  23. *
  24. * - Neither the name of the Eclipse Foundation, Inc. nor the
  25. * names of its contributors may be used to endorse or promote
  26. * products derived from this software without specific prior
  27. * written permission.
  28. *
  29. * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
  30. * CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES,
  31. * INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  32. * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  33. * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
  34. * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
  35. * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  36. * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
  37. * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  38. * CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
  39. * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  40. * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
  41. * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  42. */
  43. package org.eclipse.jgit.internal.storage.dfs;
  44. import java.io.FileNotFoundException;
  45. import java.io.IOException;
  46. import java.util.ArrayList;
  47. import java.util.Collection;
  48. import java.util.Collections;
  49. import java.util.HashMap;
  50. import java.util.List;
  51. import java.util.Map;
  52. import java.util.concurrent.atomic.AtomicReference;
  53. import org.eclipse.jgit.internal.storage.pack.PackExt;
  54. import org.eclipse.jgit.lib.AnyObjectId;
  55. import org.eclipse.jgit.lib.ObjectDatabase;
  56. import org.eclipse.jgit.lib.ObjectInserter;
  57. import org.eclipse.jgit.lib.ObjectReader;
  58. /** Manages objects stored in {@link DfsPackFile} on a storage system. */
  59. public abstract class DfsObjDatabase extends ObjectDatabase {
  60. private static final PackList NO_PACKS = new PackList(new DfsPackFile[0]);
  61. /** Sources for a pack file. */
  62. public static enum PackSource {
  63. /** The pack is created by ObjectInserter due to local activity. */
  64. INSERT(0),
  65. /**
  66. * The pack is created by PackParser due to a network event.
  67. * <p>
  68. * A received pack can be from either a push into the repository, or a
  69. * fetch into the repository, the direction doesn't matter. A received
  70. * pack was built by the remote Git implementation and may not match the
  71. * storage layout preferred by this version. Received packs are likely
  72. * to be either compacted or garbage collected in the future.
  73. */
  74. RECEIVE(0),
  75. /**
  76. * Pack was created by Git garbage collection by this implementation.
  77. * <p>
  78. * This source is only used by the {@link DfsGarbageCollector} when it
  79. * builds a pack file by traversing the object graph and copying all
  80. * reachable objects into a new pack stream.
  81. *
  82. * @see DfsGarbageCollector
  83. */
  84. GC(1),
  85. /**
  86. * RefTreeGraph pack was created by Git garbage collection.
  87. *
  88. * @see DfsGarbageCollector
  89. */
  90. GC_TXN(1),
  91. /**
  92. * The pack was created by compacting multiple packs together.
  93. * <p>
  94. * Packs created by compacting multiple packs together aren't nearly as
  95. * efficient as a fully garbage collected repository, but may save disk
  96. * space by reducing redundant copies of base objects.
  97. *
  98. * @see DfsPackCompactor
  99. */
  100. COMPACT(1),
  101. /**
  102. * Pack was created by Git garbage collection.
  103. * <p>
  104. * This pack contains only unreachable garbage that was found during the
  105. * last GC pass. It is retained in a new pack until it is safe to prune
  106. * these objects from the repository.
  107. */
  108. UNREACHABLE_GARBAGE(2);
  109. final int category;
  110. PackSource(int category) {
  111. this.category = category;
  112. }
  113. }
  114. private final AtomicReference<PackList> packList;
  115. private final DfsRepository repository;
  116. private DfsReaderOptions readerOptions;
  117. /**
  118. * Initialize an object database for our repository.
  119. *
  120. * @param repository
  121. * repository owning this object database.
  122. *
  123. * @param options
  124. * how readers should access the object database.
  125. */
  126. protected DfsObjDatabase(DfsRepository repository,
  127. DfsReaderOptions options) {
  128. this.repository = repository;
  129. this.packList = new AtomicReference<PackList>(NO_PACKS);
  130. this.readerOptions = options;
  131. }
  132. /** @return configured reader options, such as read-ahead. */
  133. public DfsReaderOptions getReaderOptions() {
  134. return readerOptions;
  135. }
  136. @Override
  137. public ObjectReader newReader() {
  138. return new DfsReader(this);
  139. }
  140. @Override
  141. public ObjectInserter newInserter() {
  142. return new DfsInserter(this);
  143. }
  144. /**
  145. * Scan and list all available pack files in the repository.
  146. *
  147. * @return list of available packs. The returned array is shared with the
  148. * implementation and must not be modified by the caller.
  149. * @throws IOException
  150. * the pack list cannot be initialized.
  151. */
  152. public DfsPackFile[] getPacks() throws IOException {
  153. return scanPacks(NO_PACKS).packs;
  154. }
  155. /** @return repository owning this object database. */
  156. protected DfsRepository getRepository() {
  157. return repository;
  158. }
  159. /**
  160. * List currently known pack files in the repository, without scanning.
  161. *
  162. * @return list of available packs. The returned array is shared with the
  163. * implementation and must not be modified by the caller.
  164. */
  165. public DfsPackFile[] getCurrentPacks() {
  166. return packList.get().packs;
  167. }
  168. /**
  169. * Does the requested object exist in this database?
  170. * <p>
  171. * This differs from ObjectDatabase's implementation in that we can selectively
  172. * ignore unreachable (garbage) objects.
  173. *
  174. * @param objectId
  175. * identity of the object to test for existence of.
  176. * @param avoidUnreachableObjects
  177. * if true, ignore objects that are unreachable.
  178. * @return true if the specified object is stored in this database.
  179. * @throws IOException
  180. * the object store cannot be accessed.
  181. */
  182. public boolean has(AnyObjectId objectId, boolean avoidUnreachableObjects)
  183. throws IOException {
  184. try (ObjectReader or = newReader()) {
  185. or.setAvoidUnreachableObjects(avoidUnreachableObjects);
  186. return or.has(objectId);
  187. }
  188. }
  189. /**
  190. * Generate a new unique name for a pack file.
  191. *
  192. * @param source
  193. * where the pack stream is created.
  194. * @return a unique name for the pack file. Must not collide with any other
  195. * pack file name in the same DFS.
  196. * @throws IOException
  197. * a new unique pack description cannot be generated.
  198. */
  199. protected abstract DfsPackDescription newPack(PackSource source)
  200. throws IOException;
  201. /**
  202. * Commit a pack and index pair that was written to the DFS.
  203. * <p>
  204. * Committing the pack/index pair makes them visible to readers. The JGit
  205. * DFS code always writes the pack, then the index. This allows a simple
  206. * commit process to do nothing if readers always look for both files to
  207. * exist and the DFS performs atomic creation of the file (e.g. stream to a
  208. * temporary file and rename to target on close).
  209. * <p>
  210. * During pack compaction or GC the new pack file may be replacing other
  211. * older files. Implementations should remove those older files (if any) as
  212. * part of the commit of the new file.
  213. * <p>
  214. * This method is a trivial wrapper around
  215. * {@link #commitPackImpl(Collection, Collection)} that calls the
  216. * implementation and fires events.
  217. *
  218. * @param desc
  219. * description of the new packs.
  220. * @param replaces
  221. * if not null, list of packs to remove.
  222. * @throws IOException
  223. * the packs cannot be committed. On failure a rollback must
  224. * also be attempted by the caller.
  225. */
  226. protected void commitPack(Collection<DfsPackDescription> desc,
  227. Collection<DfsPackDescription> replaces) throws IOException {
  228. commitPackImpl(desc, replaces);
  229. getRepository().fireEvent(new DfsPacksChangedEvent());
  230. }
  231. /**
  232. * Implementation of pack commit.
  233. *
  234. * @see #commitPack(Collection, Collection)
  235. *
  236. * @param desc
  237. * description of the new packs.
  238. * @param replaces
  239. * if not null, list of packs to remove.
  240. * @throws IOException
  241. * the packs cannot be committed.
  242. */
  243. protected abstract void commitPackImpl(Collection<DfsPackDescription> desc,
  244. Collection<DfsPackDescription> replaces) throws IOException;
  245. /**
  246. * Try to rollback a pack creation.
  247. * <p>
  248. * JGit DFS always writes the pack first, then the index. If the pack does
  249. * not yet exist, then neither does the index. A safe DFS implementation
  250. * would try to remove both files to ensure they are really gone.
  251. * <p>
  252. * A rollback does not support failures, as it only occurs when there is
  253. * already a failure in progress. A DFS implementor may wish to log
  254. * warnings/error messages when a rollback fails, but should not send new
  255. * exceptions up the Java callstack.
  256. *
  257. * @param desc
  258. * pack to delete.
  259. */
  260. protected abstract void rollbackPack(Collection<DfsPackDescription> desc);
  261. /**
  262. * List the available pack files.
  263. * <p>
  264. * The returned list must support random access and must be mutable by the
  265. * caller. It is sorted in place using the natural sorting of the returned
  266. * DfsPackDescription objects.
  267. *
  268. * @return available packs. May be empty if there are no packs.
  269. * @throws IOException
  270. * the packs cannot be listed and the object database is not
  271. * functional to the caller.
  272. */
  273. protected abstract List<DfsPackDescription> listPacks() throws IOException;
  274. /**
  275. * Open a pack, pack index, or other related file for reading.
  276. *
  277. * @param desc
  278. * description of pack related to the data that will be read.
  279. * This is an instance previously obtained from
  280. * {@link #listPacks()}, but not necessarily from the same
  281. * DfsObjDatabase instance.
  282. * @param ext
  283. * file extension that will be read i.e "pack" or "idx".
  284. * @return channel to read the file.
  285. * @throws FileNotFoundException
  286. * the file does not exist.
  287. * @throws IOException
  288. * the file cannot be opened.
  289. */
  290. protected abstract ReadableChannel openFile(
  291. DfsPackDescription desc, PackExt ext)
  292. throws FileNotFoundException, IOException;
  293. /**
  294. * Open a pack, pack index, or other related file for writing.
  295. *
  296. * @param desc
  297. * description of pack related to the data that will be written.
  298. * This is an instance previously obtained from
  299. * {@link #newPack(PackSource)}.
  300. * @param ext
  301. * file extension that will be written i.e "pack" or "idx".
  302. * @return channel to write the file.
  303. * @throws IOException
  304. * the file cannot be opened.
  305. */
  306. protected abstract DfsOutputStream writeFile(
  307. DfsPackDescription desc, PackExt ext) throws IOException;
  308. void addPack(DfsPackFile newPack) throws IOException {
  309. PackList o, n;
  310. do {
  311. o = packList.get();
  312. if (o == NO_PACKS) {
  313. // The repository may not have needed any existing objects to
  314. // complete the current task of creating a pack (e.g. push of a
  315. // pack with no external deltas). Because we don't scan for
  316. // newly added packs on missed object lookups, scan now to
  317. // make sure all older packs are available in the packList.
  318. o = scanPacks(o);
  319. // Its possible the scan identified the pack we were asked to
  320. // add, as the pack was already committed via commitPack().
  321. // If this is the case return without changing the list.
  322. for (DfsPackFile p : o.packs) {
  323. if (p == newPack)
  324. return;
  325. }
  326. }
  327. DfsPackFile[] packs = new DfsPackFile[1 + o.packs.length];
  328. packs[0] = newPack;
  329. System.arraycopy(o.packs, 0, packs, 1, o.packs.length);
  330. n = new PackList(packs);
  331. } while (!packList.compareAndSet(o, n));
  332. }
  333. private PackList scanPacks(final PackList original) throws IOException {
  334. PackList o, n;
  335. synchronized (packList) {
  336. do {
  337. o = packList.get();
  338. if (o != original) {
  339. // Another thread did the scan for us, while we
  340. // were blocked on the monitor above.
  341. //
  342. return o;
  343. }
  344. n = scanPacksImpl(o);
  345. if (n == o)
  346. return n;
  347. } while (!packList.compareAndSet(o, n));
  348. }
  349. getRepository().fireEvent(new DfsPacksChangedEvent());
  350. return n;
  351. }
  352. private PackList scanPacksImpl(PackList old) throws IOException {
  353. DfsBlockCache cache = DfsBlockCache.getInstance();
  354. Map<DfsPackDescription, DfsPackFile> forReuse = reuseMap(old);
  355. List<DfsPackDescription> scanned = listPacks();
  356. Collections.sort(scanned);
  357. List<DfsPackFile> list = new ArrayList<DfsPackFile>(scanned.size());
  358. boolean foundNew = false;
  359. for (DfsPackDescription dsc : scanned) {
  360. DfsPackFile oldPack = forReuse.remove(dsc);
  361. if (oldPack != null) {
  362. list.add(oldPack);
  363. } else {
  364. list.add(cache.getOrCreate(dsc, null));
  365. foundNew = true;
  366. }
  367. }
  368. for (DfsPackFile p : forReuse.values())
  369. p.close();
  370. if (list.isEmpty())
  371. return new PackList(NO_PACKS.packs);
  372. if (!foundNew)
  373. return old;
  374. return new PackList(list.toArray(new DfsPackFile[list.size()]));
  375. }
  376. private static Map<DfsPackDescription, DfsPackFile> reuseMap(PackList old) {
  377. Map<DfsPackDescription, DfsPackFile> forReuse
  378. = new HashMap<DfsPackDescription, DfsPackFile>();
  379. for (DfsPackFile p : old.packs) {
  380. if (p.invalid()) {
  381. // The pack instance is corrupted, and cannot be safely used
  382. // again. Do not include it in our reuse map.
  383. //
  384. p.close();
  385. continue;
  386. }
  387. DfsPackFile prior = forReuse.put(p.getPackDescription(), p);
  388. if (prior != null) {
  389. // This should never occur. It should be impossible for us
  390. // to have two pack files with the same name, as all of them
  391. // came out of the same directory. If it does, we promised to
  392. // close any PackFiles we did not reuse, so close the second,
  393. // readers are likely to be actively using the first.
  394. //
  395. forReuse.put(prior.getPackDescription(), prior);
  396. p.close();
  397. }
  398. }
  399. return forReuse;
  400. }
  401. /** Clears the cached list of packs, forcing them to be scanned again. */
  402. protected void clearCache() {
  403. packList.set(NO_PACKS);
  404. }
  405. @Override
  406. public void close() {
  407. // PackList packs = packList.get();
  408. packList.set(NO_PACKS);
  409. // TODO Close packs if they aren't cached.
  410. // for (DfsPackFile p : packs.packs)
  411. // p.close();
  412. }
  413. private static final class PackList {
  414. /** All known packs, sorted. */
  415. final DfsPackFile[] packs;
  416. PackList(final DfsPackFile[] packs) {
  417. this.packs = packs;
  418. }
  419. }
  420. }