/* * Copyright (C) 2011, Google Inc. and others * * This program and the accompanying materials are made available under the * terms of the Eclipse Distribution License v. 1.0 which is available at * https://www.eclipse.org/org/documents/edl-v10.php. * * SPDX-License-Identifier: BSD-3-Clause */ package org.eclipse.jgit.internal.storage.dfs; import static java.util.stream.Collectors.joining; import java.io.FileNotFoundException; import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; import java.util.concurrent.atomic.AtomicReference; import org.eclipse.jgit.internal.storage.pack.PackExt; import org.eclipse.jgit.lib.AnyObjectId; import org.eclipse.jgit.lib.ObjectDatabase; import org.eclipse.jgit.lib.ObjectInserter; import org.eclipse.jgit.lib.ObjectReader; /** * Manages objects stored in * {@link org.eclipse.jgit.internal.storage.dfs.DfsPackFile} on a storage * system. */ public abstract class DfsObjDatabase extends ObjectDatabase { private static final PackList NO_PACKS = new PackList( new DfsPackFile[0], new DfsReftable[0]) { @Override boolean dirty() { return true; } @Override void clearDirty() { // Always dirty. } @Override public void markDirty() { // Always dirty. } }; /** * Sources for a pack file. *

* Note: When sorting packs by source, do not use the default * comparator based on {@link Enum#compareTo}. Prefer {@link * #DEFAULT_COMPARATOR} or your own {@link ComparatorBuilder}. */ public enum PackSource { /** The pack is created by ObjectInserter due to local activity. */ INSERT, /** * The pack is created by PackParser due to a network event. *

* A received pack can be from either a push into the repository, or a * fetch into the repository, the direction doesn't matter. A received * pack was built by the remote Git implementation and may not match the * storage layout preferred by this version. Received packs are likely * to be either compacted or garbage collected in the future. */ RECEIVE, /** * The pack was created by compacting multiple packs together. *

* Packs created by compacting multiple packs together aren't nearly as * efficient as a fully garbage collected repository, but may save disk * space by reducing redundant copies of base objects. * * @see DfsPackCompactor */ COMPACT, /** * Pack was created by Git garbage collection by this implementation. *

* This source is only used by the {@link DfsGarbageCollector} when it * builds a pack file by traversing the object graph and copying all * reachable objects into a new pack stream. * * @see DfsGarbageCollector */ GC, /** Created from non-heads by {@link DfsGarbageCollector}. */ GC_REST, /** * RefTreeGraph pack was created by Git garbage collection. * * @see DfsGarbageCollector */ GC_TXN, /** * Pack was created by Git garbage collection. *

* This pack contains only unreachable garbage that was found during the * last GC pass. It is retained in a new pack until it is safe to prune * these objects from the repository. */ UNREACHABLE_GARBAGE; /** * Default comparator for sources. *

* Sorts generally newer, smaller types such as {@code INSERT} and {@code * RECEIVE} earlier; older, larger types such as {@code GC} later; and * {@code UNREACHABLE_GARBAGE} at the end. */ public static final Comparator DEFAULT_COMPARATOR = new ComparatorBuilder() .add(INSERT, RECEIVE) .add(COMPACT) .add(GC) .add(GC_REST) .add(GC_TXN) .add(UNREACHABLE_GARBAGE) .build(); /** * Builder for describing {@link PackSource} ordering where some values are * explicitly considered equal to others. */ public static class ComparatorBuilder { private final Map ranks = new HashMap<>(); private int counter; /** * Add a collection of sources that should sort as equal. *

* Sources in the input will sort after sources listed in previous calls * to this method. * * @param sources * sources in this equivalence class. * @return this. */ public ComparatorBuilder add(PackSource... sources) { for (PackSource s : sources) { ranks.put(s, Integer.valueOf(counter)); } counter++; return this; } /** * Build the comparator. * * @return new comparator instance. * @throws IllegalArgumentException * not all {@link PackSource} instances were explicitly assigned * an equivalence class. */ public Comparator build() { return new PackSourceComparator(ranks); } } private static class PackSourceComparator implements Comparator { private final Map ranks; private PackSourceComparator(Map ranks) { if (!ranks.keySet().equals( new HashSet<>(Arrays.asList(PackSource.values())))) { throw new IllegalArgumentException(); } this.ranks = new HashMap<>(ranks); } @Override public int compare(PackSource a, PackSource b) { return ranks.get(a).compareTo(ranks.get(b)); } @Override public String toString() { return Arrays.stream(PackSource.values()) .map(s -> s + "=" + ranks.get(s)) //$NON-NLS-1$ .collect(joining(", ", getClass().getSimpleName() + "{", "}")); //$NON-NLS-1$ //$NON-NLS-2$ //$NON-NLS-3$ } } } private final AtomicReference packList; private final DfsRepository repository; private DfsReaderOptions readerOptions; private Comparator packComparator; /** * Initialize an object database for our repository. * * @param repository * repository owning this object database. * @param options * how readers should access the object database. */ protected DfsObjDatabase(DfsRepository repository, DfsReaderOptions options) { this.repository = repository; this.packList = new AtomicReference<>(NO_PACKS); this.readerOptions = options; this.packComparator = DfsPackDescription.objectLookupComparator(); } /** * Get configured reader options, such as read-ahead. * * @return configured reader options, such as read-ahead. */ public DfsReaderOptions getReaderOptions() { return readerOptions; } /** * Set the comparator used when searching for objects across packs. *

* An optimal comparator will find more objects without having to load large * idx files from storage only to find that they don't contain the object. * See {@link DfsPackDescription#objectLookupComparator()} for the default * heuristics. * * @param packComparator * comparator. */ public void setPackComparator(Comparator packComparator) { this.packComparator = packComparator; } /** {@inheritDoc} */ @Override public DfsReader newReader() { return new DfsReader(this); } /** {@inheritDoc} */ @Override public ObjectInserter newInserter() { return new DfsInserter(this); } /** * Scan and list all available pack files in the repository. * * @return list of available packs. The returned array is shared with the * implementation and must not be modified by the caller. * @throws java.io.IOException * the pack list cannot be initialized. */ public DfsPackFile[] getPacks() throws IOException { return getPackList().packs; } /** * Scan and list all available reftable files in the repository. * * @return list of available reftables. The returned array is shared with * the implementation and must not be modified by the caller. * @throws java.io.IOException * the pack list cannot be initialized. */ public DfsReftable[] getReftables() throws IOException { return getPackList().reftables; } /** * Scan and list all available pack files in the repository. * * @return list of available packs, with some additional metadata. The * returned array is shared with the implementation and must not be * modified by the caller. * @throws java.io.IOException * the pack list cannot be initialized. */ public PackList getPackList() throws IOException { return scanPacks(NO_PACKS); } /** * Get repository owning this object database. * * @return repository owning this object database. */ protected DfsRepository getRepository() { return repository; } /** * List currently known pack files in the repository, without scanning. * * @return list of available packs. The returned array is shared with the * implementation and must not be modified by the caller. */ public DfsPackFile[] getCurrentPacks() { return getCurrentPackList().packs; } /** * List currently known reftable files in the repository, without scanning. * * @return list of available reftables. The returned array is shared with * the implementation and must not be modified by the caller. */ public DfsReftable[] getCurrentReftables() { return getCurrentPackList().reftables; } /** * List currently known pack files in the repository, without scanning. * * @return list of available packs, with some additional metadata. The * returned array is shared with the implementation and must not be * modified by the caller. */ public PackList getCurrentPackList() { return packList.get(); } /** * Does the requested object exist in this database? *

* This differs from ObjectDatabase's implementation in that we can selectively * ignore unreachable (garbage) objects. * * @param objectId * identity of the object to test for existence of. * @param avoidUnreachableObjects * if true, ignore objects that are unreachable. * @return true if the specified object is stored in this database. * @throws java.io.IOException * the object store cannot be accessed. */ public boolean has(AnyObjectId objectId, boolean avoidUnreachableObjects) throws IOException { try (ObjectReader or = newReader()) { or.setAvoidUnreachableObjects(avoidUnreachableObjects); return or.has(objectId); } } /** * Generate a new unique name for a pack file. * * @param source * where the pack stream is created. * @return a unique name for the pack file. Must not collide with any other * pack file name in the same DFS. * @throws java.io.IOException * a new unique pack description cannot be generated. */ protected abstract DfsPackDescription newPack(PackSource source) throws IOException; /** * Generate a new unique name for a pack file. * *

* Default implementation of this method would be equivalent to * {@code newPack(source).setEstimatedPackSize(estimatedPackSize)}. But the * clients can override this method to use the given * {@code estomatedPackSize} value more efficiently in the process of * creating a new * {@link org.eclipse.jgit.internal.storage.dfs.DfsPackDescription} object. * * @param source * where the pack stream is created. * @param estimatedPackSize * the estimated size of the pack. * @return a unique name for the pack file. Must not collide with any other * pack file name in the same DFS. * @throws java.io.IOException * a new unique pack description cannot be generated. */ protected DfsPackDescription newPack(PackSource source, long estimatedPackSize) throws IOException { DfsPackDescription pack = newPack(source); pack.setEstimatedPackSize(estimatedPackSize); return pack; } /** * Commit a pack and index pair that was written to the DFS. *

* Committing the pack/index pair makes them visible to readers. The JGit * DFS code always writes the pack, then the index. This allows a simple * commit process to do nothing if readers always look for both files to * exist and the DFS performs atomic creation of the file (e.g. stream to a * temporary file and rename to target on close). *

* During pack compaction or GC the new pack file may be replacing other * older files. Implementations should remove those older files (if any) as * part of the commit of the new file. *

* This method is a trivial wrapper around * {@link #commitPackImpl(Collection, Collection)} that calls the * implementation and fires events. * * @param desc * description of the new packs. * @param replaces * if not null, list of packs to remove. * @throws java.io.IOException * the packs cannot be committed. On failure a rollback must * also be attempted by the caller. */ protected void commitPack(Collection desc, Collection replaces) throws IOException { commitPackImpl(desc, replaces); getRepository().fireEvent(new DfsPacksChangedEvent()); } /** * Implementation of pack commit. * * @see #commitPack(Collection, Collection) * @param desc * description of the new packs. * @param replaces * if not null, list of packs to remove. * @throws java.io.IOException * the packs cannot be committed. */ protected abstract void commitPackImpl(Collection desc, Collection replaces) throws IOException; /** * Try to rollback a pack creation. *

* JGit DFS always writes the pack first, then the index. If the pack does * not yet exist, then neither does the index. A safe DFS implementation * would try to remove both files to ensure they are really gone. *

* A rollback does not support failures, as it only occurs when there is * already a failure in progress. A DFS implementor may wish to log * warnings/error messages when a rollback fails, but should not send new * exceptions up the Java callstack. * * @param desc * pack to delete. */ protected abstract void rollbackPack(Collection desc); /** * List the available pack files. *

* The returned list must support random access and must be mutable by the * caller. It is sorted in place using the natural sorting of the returned * DfsPackDescription objects. * * @return available packs. May be empty if there are no packs. * @throws java.io.IOException * the packs cannot be listed and the object database is not * functional to the caller. */ protected abstract List listPacks() throws IOException; /** * Open a pack, pack index, or other related file for reading. * * @param desc * description of pack related to the data that will be read. * This is an instance previously obtained from * {@link #listPacks()}, but not necessarily from the same * DfsObjDatabase instance. * @param ext * file extension that will be read i.e "pack" or "idx". * @return channel to read the file. * @throws java.io.FileNotFoundException * the file does not exist. * @throws java.io.IOException * the file cannot be opened. */ protected abstract ReadableChannel openFile( DfsPackDescription desc, PackExt ext) throws FileNotFoundException, IOException; /** * Open a pack, pack index, or other related file for writing. * * @param desc * description of pack related to the data that will be written. * This is an instance previously obtained from * {@link #newPack(PackSource)}. * @param ext * file extension that will be written i.e "pack" or "idx". * @return channel to write the file. * @throws java.io.IOException * the file cannot be opened. */ protected abstract DfsOutputStream writeFile( DfsPackDescription desc, PackExt ext) throws IOException; void addPack(DfsPackFile newPack) throws IOException { PackList o, n; do { o = packList.get(); if (o == NO_PACKS) { // The repository may not have needed any existing objects to // complete the current task of creating a pack (e.g. push of a // pack with no external deltas). Because we don't scan for // newly added packs on missed object lookups, scan now to // make sure all older packs are available in the packList. o = scanPacks(o); // Its possible the scan identified the pack we were asked to // add, as the pack was already committed via commitPack(). // If this is the case return without changing the list. for (DfsPackFile p : o.packs) { if (p.key.equals(newPack.key)) { return; } } } DfsPackFile[] packs = new DfsPackFile[1 + o.packs.length]; packs[0] = newPack; System.arraycopy(o.packs, 0, packs, 1, o.packs.length); n = new PackListImpl(packs, o.reftables); } while (!packList.compareAndSet(o, n)); } void addReftable(DfsPackDescription add, Set remove) throws IOException { PackList o, n; do { o = packList.get(); if (o == NO_PACKS) { o = scanPacks(o); for (DfsReftable t : o.reftables) { if (t.getPackDescription().equals(add)) { return; } } } List tables = new ArrayList<>(1 + o.reftables.length); for (DfsReftable t : o.reftables) { if (!remove.contains(t.getPackDescription())) { tables.add(t); } } tables.add(new DfsReftable(add)); n = new PackListImpl(o.packs, tables.toArray(new DfsReftable[0])); } while (!packList.compareAndSet(o, n)); } PackList scanPacks(PackList original) throws IOException { PackList o, n; synchronized (packList) { do { o = packList.get(); if (o != original) { // Another thread did the scan for us, while we // were blocked on the monitor above. // return o; } n = scanPacksImpl(o); if (n == o) return n; } while (!packList.compareAndSet(o, n)); } getRepository().fireEvent(new DfsPacksChangedEvent()); return n; } private PackList scanPacksImpl(PackList old) throws IOException { DfsBlockCache cache = DfsBlockCache.getInstance(); Map packs = packMap(old); Map reftables = reftableMap(old); List scanned = listPacks(); Collections.sort(scanned, packComparator); List newPacks = new ArrayList<>(scanned.size()); List newReftables = new ArrayList<>(scanned.size()); boolean foundNew = false; for (DfsPackDescription dsc : scanned) { DfsPackFile oldPack = packs.remove(dsc); if (oldPack != null) { newPacks.add(oldPack); } else if (dsc.hasFileExt(PackExt.PACK)) { newPacks.add(new DfsPackFile(cache, dsc)); foundNew = true; } DfsReftable oldReftable = reftables.remove(dsc); if (oldReftable != null) { newReftables.add(oldReftable); } else if (dsc.hasFileExt(PackExt.REFTABLE)) { newReftables.add(new DfsReftable(cache, dsc)); foundNew = true; } } if (newPacks.isEmpty() && newReftables.isEmpty()) return new PackListImpl(NO_PACKS.packs, NO_PACKS.reftables); if (!foundNew) { old.clearDirty(); return old; } Collections.sort(newReftables, reftableComparator()); return new PackListImpl( newPacks.toArray(new DfsPackFile[0]), newReftables.toArray(new DfsReftable[0])); } private static Map packMap(PackList old) { Map forReuse = new HashMap<>(); for (DfsPackFile p : old.packs) { if (!p.invalid()) { forReuse.put(p.desc, p); } } return forReuse; } private static Map reftableMap(PackList old) { Map forReuse = new HashMap<>(); for (DfsReftable p : old.reftables) { if (!p.invalid()) { forReuse.put(p.desc, p); } } return forReuse; } /** * Get comparator to sort {@link DfsReftable} by priority. * * @return comparator to sort {@link DfsReftable} by priority. */ protected Comparator reftableComparator() { return Comparator.comparing( DfsReftable::getPackDescription, DfsPackDescription.reftableComparator()); } /** * Clears the cached list of packs, forcing them to be scanned again. */ protected void clearCache() { packList.set(NO_PACKS); } /** {@inheritDoc} */ @Override public void close() { packList.set(NO_PACKS); } /** Snapshot of packs scanned in a single pass. */ public abstract static class PackList { /** All known packs, sorted. */ public final DfsPackFile[] packs; /** All known reftables, sorted. */ public final DfsReftable[] reftables; private long lastModified = -1; PackList(DfsPackFile[] packs, DfsReftable[] reftables) { this.packs = packs; this.reftables = reftables; } /** @return last modified time of all packs, in milliseconds. */ public long getLastModified() { if (lastModified < 0) { long max = 0; for (DfsPackFile pack : packs) { max = Math.max(max, pack.getPackDescription().getLastModified()); } lastModified = max; } return lastModified; } abstract boolean dirty(); abstract void clearDirty(); /** * Mark pack list as dirty. *

* Used when the caller knows that new data might have been written to the * repository that could invalidate open readers depending on this pack list, * for example if refs are newly scanned. */ public abstract void markDirty(); } private static final class PackListImpl extends PackList { private volatile boolean dirty; PackListImpl(DfsPackFile[] packs, DfsReftable[] reftables) { super(packs, reftables); } @Override boolean dirty() { return dirty; } @Override void clearDirty() { dirty = false; } @Override public void markDirty() { dirty = true; } } }