* Experimental support for git commit graph files and bloom filter index Signed-off-by: Filip Navara <filip.navara@gmail.com> * Force vendor of commitgraph Signed-off-by: Filip Navara <filip.navara@gmail.com> * Remove bloom filter experiment and debug prints * Remove old code for building commit graphs * Remove unused function * Remove mmap usage * gofmt * sort vendor/modules.txt * Add copyright header and log commit-graph errortags/v1.9.0-rc1
@@ -8,6 +8,7 @@ import ( | |||
"github.com/emirpasic/gods/trees/binaryheap" | |||
"gopkg.in/src-d/go-git.v4/plumbing" | |||
"gopkg.in/src-d/go-git.v4/plumbing/object" | |||
cgobject "gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph" | |||
) | |||
// GetCommitsInfo gets information of all commits that are corresponding to these entries | |||
@@ -19,7 +20,12 @@ func (tes Entries) GetCommitsInfo(commit *Commit, treePath string, cache LastCom | |||
entryPaths[i+1] = entry.Name() | |||
} | |||
c, err := commit.repo.gogitRepo.CommitObject(plumbing.Hash(commit.ID)) | |||
commitNodeIndex, commitGraphFile := commit.repo.CommitNodeIndex() | |||
if commitGraphFile != nil { | |||
defer commitGraphFile.Close() | |||
} | |||
c, err := commitNodeIndex.Get(plumbing.Hash(commit.ID)) | |||
if err != nil { | |||
return nil, nil, err | |||
} | |||
@@ -69,14 +75,14 @@ func (tes Entries) GetCommitsInfo(commit *Commit, treePath string, cache LastCom | |||
} | |||
type commitAndPaths struct { | |||
commit *object.Commit | |||
commit cgobject.CommitNode | |||
// Paths that are still on the branch represented by commit | |||
paths []string | |||
// Set of hashes for the paths | |||
hashes map[string]plumbing.Hash | |||
} | |||
func getCommitTree(c *object.Commit, treePath string) (*object.Tree, error) { | |||
func getCommitTree(c cgobject.CommitNode, treePath string) (*object.Tree, error) { | |||
tree, err := c.Tree() | |||
if err != nil { | |||
return nil, err | |||
@@ -93,7 +99,7 @@ func getCommitTree(c *object.Commit, treePath string) (*object.Tree, error) { | |||
return tree, nil | |||
} | |||
func getFileHashes(c *object.Commit, treePath string, paths []string) (map[string]plumbing.Hash, error) { | |||
func getFileHashes(c cgobject.CommitNode, treePath string, paths []string) (map[string]plumbing.Hash, error) { | |||
tree, err := getCommitTree(c, treePath) | |||
if err == object.ErrDirectoryNotFound { | |||
// The whole tree didn't exist, so return empty map | |||
@@ -118,16 +124,16 @@ func getFileHashes(c *object.Commit, treePath string, paths []string) (map[strin | |||
return hashes, nil | |||
} | |||
func getLastCommitForPaths(c *object.Commit, treePath string, paths []string) (map[string]*object.Commit, error) { | |||
func getLastCommitForPaths(c cgobject.CommitNode, treePath string, paths []string) (map[string]*object.Commit, error) { | |||
// We do a tree traversal with nodes sorted by commit time | |||
heap := binaryheap.NewWith(func(a, b interface{}) int { | |||
if a.(*commitAndPaths).commit.Committer.When.Before(b.(*commitAndPaths).commit.Committer.When) { | |||
if a.(*commitAndPaths).commit.CommitTime().Before(b.(*commitAndPaths).commit.CommitTime()) { | |||
return 1 | |||
} | |||
return -1 | |||
}) | |||
result := make(map[string]*object.Commit) | |||
resultNodes := make(map[string]cgobject.CommitNode) | |||
initialHashes, err := getFileHashes(c, treePath, paths) | |||
if err != nil { | |||
return nil, err | |||
@@ -145,9 +151,9 @@ func getLastCommitForPaths(c *object.Commit, treePath string, paths []string) (m | |||
// Load the parent commits for the one we are currently examining | |||
numParents := current.commit.NumParents() | |||
var parents []*object.Commit | |||
var parents []cgobject.CommitNode | |||
for i := 0; i < numParents; i++ { | |||
parent, err := current.commit.Parent(i) | |||
parent, err := current.commit.ParentNode(i) | |||
if err != nil { | |||
break | |||
} | |||
@@ -174,7 +180,7 @@ func getLastCommitForPaths(c *object.Commit, treePath string, paths []string) (m | |||
for i, path := range current.paths { | |||
// The results could already contain some newer change for the same path, | |||
// so don't override that and bail out on the file early. | |||
if result[path] == nil { | |||
if resultNodes[path] == nil { | |||
if pathUnchanged[i] { | |||
// The path existed with the same hash in at least one parent so it could | |||
// not have been changed in this commit directly. | |||
@@ -188,7 +194,7 @@ func getLastCommitForPaths(c *object.Commit, treePath string, paths []string) (m | |||
// - We are looking at a merge commit and the hash of the file doesn't | |||
// match any of the hashes being merged. This is more common for directories, | |||
// but it can also happen if a file is changed through conflict resolution. | |||
result[path] = current.commit | |||
resultNodes[path] = current.commit | |||
} | |||
} | |||
} | |||
@@ -222,5 +228,15 @@ func getLastCommitForPaths(c *object.Commit, treePath string, paths []string) (m | |||
} | |||
} | |||
// Post-processing | |||
result := make(map[string]*object.Commit) | |||
for path, commitNode := range resultNodes { | |||
var err error | |||
result[path], err = commitNode.Commit() | |||
if err != nil { | |||
return nil, err | |||
} | |||
} | |||
return result, nil | |||
} |
@@ -50,7 +50,17 @@ func GetNote(repo *Repository, commitID string, note *Note) error { | |||
return err | |||
} | |||
lastCommits, err := getLastCommitForPaths(commit, "", []string{commitID}) | |||
commitNodeIndex, commitGraphFile := repo.CommitNodeIndex() | |||
if commitGraphFile != nil { | |||
defer commitGraphFile.Close() | |||
} | |||
commitNode, err := commitNodeIndex.Get(commit.Hash) | |||
if err != nil { | |||
return nil | |||
} | |||
lastCommits, err := getLastCommitForPaths(commitNode, "", []string{commitID}) | |||
if err != nil { | |||
return err | |||
} |
@@ -0,0 +1,35 @@ | |||
// Copyright 2019 The Gitea Authors. | |||
// All rights reserved. | |||
// Use of this source code is governed by a MIT-style | |||
// license that can be found in the LICENSE file. | |||
package git | |||
import ( | |||
"os" | |||
"path" | |||
gitealog "code.gitea.io/gitea/modules/log" | |||
"gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph" | |||
cgobject "gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph" | |||
) | |||
// CommitNodeIndex returns the index for walking commit graph | |||
func (r *Repository) CommitNodeIndex() (cgobject.CommitNodeIndex, *os.File) { | |||
indexPath := path.Join(r.Path, "objects", "info", "commit-graph") | |||
file, err := os.Open(indexPath) | |||
if err == nil { | |||
var index commitgraph.Index | |||
index, err = commitgraph.OpenFileIndex(file) | |||
if err == nil { | |||
return cgobject.NewGraphCommitNodeIndex(index, r.gogitRepo.Storer), file | |||
} | |||
} | |||
if !os.IsNotExist(err) { | |||
gitealog.Warn("Unable to read commit-graph for %s: %v", r.Path, err) | |||
} | |||
return cgobject.NewObjectCommitNodeIndex(r.gogitRepo.Storer), nil | |||
} |
@@ -0,0 +1,35 @@ | |||
package commitgraph | |||
import ( | |||
"time" | |||
"gopkg.in/src-d/go-git.v4/plumbing" | |||
) | |||
// CommitData is a reduced representation of Commit as presented in the commit graph | |||
// file. It is merely useful as an optimization for walking the commit graphs. | |||
type CommitData struct { | |||
// TreeHash is the hash of the root tree of the commit. | |||
TreeHash plumbing.Hash | |||
// ParentIndexes are the indexes of the parent commits of the commit. | |||
ParentIndexes []int | |||
// ParentHashes are the hashes of the parent commits of the commit. | |||
ParentHashes []plumbing.Hash | |||
// Generation number is the pre-computed generation in the commit graph | |||
// or zero if not available | |||
Generation int | |||
// When is the timestamp of the commit. | |||
When time.Time | |||
} | |||
// Index represents a representation of commit graph that allows indexed | |||
// access to the nodes using commit object hash | |||
type Index interface { | |||
// GetIndexByHash gets the index in the commit graph from commit hash, if available | |||
GetIndexByHash(h plumbing.Hash) (int, error) | |||
// GetNodeByIndex gets the commit node from the commit graph using index | |||
// obtained from child node, if available | |||
GetCommitDataByIndex(i int) (*CommitData, error) | |||
// Hashes returns all the hashes that are available in the index | |||
Hashes() []plumbing.Hash | |||
} |
@@ -0,0 +1,103 @@ | |||
// Package commitgraph implements encoding and decoding of commit-graph files. | |||
// | |||
// Git commit graph format | |||
// ======================= | |||
// | |||
// The Git commit graph stores a list of commit OIDs and some associated | |||
// metadata, including: | |||
// | |||
// - The generation number of the commit. Commits with no parents have | |||
// generation number 1; commits with parents have generation number | |||
// one more than the maximum generation number of its parents. We | |||
// reserve zero as special, and can be used to mark a generation | |||
// number invalid or as "not computed". | |||
// | |||
// - The root tree OID. | |||
// | |||
// - The commit date. | |||
// | |||
// - The parents of the commit, stored using positional references within | |||
// the graph file. | |||
// | |||
// These positional references are stored as unsigned 32-bit integers | |||
// corresponding to the array position within the list of commit OIDs. Due | |||
// to some special constants we use to track parents, we can store at most | |||
// (1 << 30) + (1 << 29) + (1 << 28) - 1 (around 1.8 billion) commits. | |||
// | |||
// == Commit graph files have the following format: | |||
// | |||
// In order to allow extensions that add extra data to the graph, we organize | |||
// the body into "chunks" and provide a binary lookup table at the beginning | |||
// of the body. The header includes certain values, such as number of chunks | |||
// and hash type. | |||
// | |||
// All 4-byte numbers are in network order. | |||
// | |||
// HEADER: | |||
// | |||
// 4-byte signature: | |||
// The signature is: {'C', 'G', 'P', 'H'} | |||
// | |||
// 1-byte version number: | |||
// Currently, the only valid version is 1. | |||
// | |||
// 1-byte Hash Version (1 = SHA-1) | |||
// We infer the hash length (H) from this value. | |||
// | |||
// 1-byte number (C) of "chunks" | |||
// | |||
// 1-byte (reserved for later use) | |||
// Current clients should ignore this value. | |||
// | |||
// CHUNK LOOKUP: | |||
// | |||
// (C + 1) * 12 bytes listing the table of contents for the chunks: | |||
// First 4 bytes describe the chunk id. Value 0 is a terminating label. | |||
// Other 8 bytes provide the byte-offset in current file for chunk to | |||
// start. (Chunks are ordered contiguously in the file, so you can infer | |||
// the length using the next chunk position if necessary.) Each chunk | |||
// ID appears at most once. | |||
// | |||
// The remaining data in the body is described one chunk at a time, and | |||
// these chunks may be given in any order. Chunks are required unless | |||
// otherwise specified. | |||
// | |||
// CHUNK DATA: | |||
// | |||
// OID Fanout (ID: {'O', 'I', 'D', 'F'}) (256 * 4 bytes) | |||
// The ith entry, F[i], stores the number of OIDs with first | |||
// byte at most i. Thus F[255] stores the total | |||
// number of commits (N). | |||
// | |||
// OID Lookup (ID: {'O', 'I', 'D', 'L'}) (N * H bytes) | |||
// The OIDs for all commits in the graph, sorted in ascending order. | |||
// | |||
// Commit Data (ID: {'C', 'D', 'A', 'T' }) (N * (H + 16) bytes) | |||
// * The first H bytes are for the OID of the root tree. | |||
// * The next 8 bytes are for the positions of the first two parents | |||
// of the ith commit. Stores value 0x7000000 if no parent in that | |||
// position. If there are more than two parents, the second value | |||
// has its most-significant bit on and the other bits store an array | |||
// position into the Extra Edge List chunk. | |||
// * The next 8 bytes store the generation number of the commit and | |||
// the commit time in seconds since EPOCH. The generation number | |||
// uses the higher 30 bits of the first 4 bytes, while the commit | |||
// time uses the 32 bits of the second 4 bytes, along with the lowest | |||
// 2 bits of the lowest byte, storing the 33rd and 34th bit of the | |||
// commit time. | |||
// | |||
// Extra Edge List (ID: {'E', 'D', 'G', 'E'}) [Optional] | |||
// This list of 4-byte values store the second through nth parents for | |||
// all octopus merges. The second parent value in the commit data stores | |||
// an array position within this list along with the most-significant bit | |||
// on. Starting at that array position, iterate through this list of commit | |||
// positions for the parents until reaching a value with the most-significant | |||
// bit on. The other bits correspond to the position of the last parent. | |||
// | |||
// TRAILER: | |||
// | |||
// H-byte HASH-checksum of all of the above. | |||
// | |||
// Source: | |||
// https://raw.githubusercontent.com/git/git/master/Documentation/technical/commit-graph-format.txt | |||
package commitgraph |
@@ -0,0 +1,190 @@ | |||
package commitgraph | |||
import ( | |||
"crypto/sha1" | |||
"hash" | |||
"io" | |||
"gopkg.in/src-d/go-git.v4/plumbing" | |||
"gopkg.in/src-d/go-git.v4/utils/binary" | |||
) | |||
// Encoder writes MemoryIndex structs to an output stream. | |||
type Encoder struct { | |||
io.Writer | |||
hash hash.Hash | |||
} | |||
// NewEncoder returns a new stream encoder that writes to w. | |||
func NewEncoder(w io.Writer) *Encoder { | |||
h := sha1.New() | |||
mw := io.MultiWriter(w, h) | |||
return &Encoder{mw, h} | |||
} | |||
// Encode writes an index into the commit-graph file | |||
func (e *Encoder) Encode(idx Index) error { | |||
var err error | |||
// Get all the hashes in the input index | |||
hashes := idx.Hashes() | |||
// Sort the inout and prepare helper structures we'll need for encoding | |||
hashToIndex, fanout, extraEdgesCount := e.prepare(idx, hashes) | |||
chunkSignatures := [][]byte{oidFanoutSignature, oidLookupSignature, commitDataSignature} | |||
chunkSizes := []uint64{4 * 256, uint64(len(hashes)) * 20, uint64(len(hashes)) * 36} | |||
if extraEdgesCount > 0 { | |||
chunkSignatures = append(chunkSignatures, extraEdgeListSignature) | |||
chunkSizes = append(chunkSizes, uint64(extraEdgesCount)*4) | |||
} | |||
if err = e.encodeFileHeader(len(chunkSignatures)); err != nil { | |||
return err | |||
} | |||
if err = e.encodeChunkHeaders(chunkSignatures, chunkSizes); err != nil { | |||
return err | |||
} | |||
if err = e.encodeFanout(fanout); err != nil { | |||
return err | |||
} | |||
if err = e.encodeOidLookup(hashes); err != nil { | |||
return err | |||
} | |||
if extraEdges, err := e.encodeCommitData(hashes, hashToIndex, idx); err == nil { | |||
if err = e.encodeExtraEdges(extraEdges); err != nil { | |||
return err | |||
} | |||
} | |||
if err != nil { | |||
return err | |||
} | |||
return e.encodeChecksum() | |||
} | |||
func (e *Encoder) prepare(idx Index, hashes []plumbing.Hash) (hashToIndex map[plumbing.Hash]uint32, fanout []uint32, extraEdgesCount uint32) { | |||
// Sort the hashes and build our index | |||
plumbing.HashesSort(hashes) | |||
hashToIndex = make(map[plumbing.Hash]uint32) | |||
fanout = make([]uint32, 256) | |||
for i, hash := range hashes { | |||
hashToIndex[hash] = uint32(i) | |||
fanout[hash[0]]++ | |||
} | |||
// Convert the fanout to cumulative values | |||
for i := 1; i <= 0xff; i++ { | |||
fanout[i] += fanout[i-1] | |||
} | |||
// Find out if we will need extra edge table | |||
for i := 0; i < len(hashes); i++ { | |||
v, _ := idx.GetCommitDataByIndex(i) | |||
if len(v.ParentHashes) > 2 { | |||
extraEdgesCount += uint32(len(v.ParentHashes) - 1) | |||
break | |||
} | |||
} | |||
return | |||
} | |||
func (e *Encoder) encodeFileHeader(chunkCount int) (err error) { | |||
if _, err = e.Write(commitFileSignature); err == nil { | |||
_, err = e.Write([]byte{1, 1, byte(chunkCount), 0}) | |||
} | |||
return | |||
} | |||
func (e *Encoder) encodeChunkHeaders(chunkSignatures [][]byte, chunkSizes []uint64) (err error) { | |||
// 8 bytes of file header, 12 bytes for each chunk header and 12 byte for terminator | |||
offset := uint64(8 + len(chunkSignatures)*12 + 12) | |||
for i, signature := range chunkSignatures { | |||
if _, err = e.Write(signature); err == nil { | |||
err = binary.WriteUint64(e, offset) | |||
} | |||
if err != nil { | |||
return | |||
} | |||
offset += chunkSizes[i] | |||
} | |||
if _, err = e.Write(lastSignature); err == nil { | |||
err = binary.WriteUint64(e, offset) | |||
} | |||
return | |||
} | |||
func (e *Encoder) encodeFanout(fanout []uint32) (err error) { | |||
for i := 0; i <= 0xff; i++ { | |||
if err = binary.WriteUint32(e, fanout[i]); err != nil { | |||
return | |||
} | |||
} | |||
return | |||
} | |||
func (e *Encoder) encodeOidLookup(hashes []plumbing.Hash) (err error) { | |||
for _, hash := range hashes { | |||
if _, err = e.Write(hash[:]); err != nil { | |||
return err | |||
} | |||
} | |||
return | |||
} | |||
func (e *Encoder) encodeCommitData(hashes []plumbing.Hash, hashToIndex map[plumbing.Hash]uint32, idx Index) (extraEdges []uint32, err error) { | |||
for _, hash := range hashes { | |||
origIndex, _ := idx.GetIndexByHash(hash) | |||
commitData, _ := idx.GetCommitDataByIndex(origIndex) | |||
if _, err = e.Write(commitData.TreeHash[:]); err != nil { | |||
return | |||
} | |||
var parent1, parent2 uint32 | |||
if len(commitData.ParentHashes) == 0 { | |||
parent1 = parentNone | |||
parent2 = parentNone | |||
} else if len(commitData.ParentHashes) == 1 { | |||
parent1 = hashToIndex[commitData.ParentHashes[0]] | |||
parent2 = parentNone | |||
} else if len(commitData.ParentHashes) == 2 { | |||
parent1 = hashToIndex[commitData.ParentHashes[0]] | |||
parent2 = hashToIndex[commitData.ParentHashes[1]] | |||
} else if len(commitData.ParentHashes) > 2 { | |||
parent1 = hashToIndex[commitData.ParentHashes[0]] | |||
parent2 = uint32(len(extraEdges)) | parentOctopusUsed | |||
for _, parentHash := range commitData.ParentHashes[1:] { | |||
extraEdges = append(extraEdges, hashToIndex[parentHash]) | |||
} | |||
extraEdges[len(extraEdges)-1] |= parentLast | |||
} | |||
if err = binary.WriteUint32(e, parent1); err == nil { | |||
err = binary.WriteUint32(e, parent2) | |||
} | |||
if err != nil { | |||
return | |||
} | |||
unixTime := uint64(commitData.When.Unix()) | |||
unixTime |= uint64(commitData.Generation) << 34 | |||
if err = binary.WriteUint64(e, unixTime); err != nil { | |||
return | |||
} | |||
} | |||
return | |||
} | |||
func (e *Encoder) encodeExtraEdges(extraEdges []uint32) (err error) { | |||
for _, parent := range extraEdges { | |||
if err = binary.WriteUint32(e, parent); err != nil { | |||
return | |||
} | |||
} | |||
return | |||
} | |||
func (e *Encoder) encodeChecksum() error { | |||
_, err := e.Write(e.hash.Sum(nil)[:20]) | |||
return err | |||
} |
@@ -0,0 +1,259 @@ | |||
package commitgraph | |||
import ( | |||
"bytes" | |||
encbin "encoding/binary" | |||
"errors" | |||
"io" | |||
"time" | |||
"gopkg.in/src-d/go-git.v4/plumbing" | |||
"gopkg.in/src-d/go-git.v4/utils/binary" | |||
) | |||
var ( | |||
// ErrUnsupportedVersion is returned by OpenFileIndex when the commit graph | |||
// file version is not supported. | |||
ErrUnsupportedVersion = errors.New("Unsupported version") | |||
// ErrUnsupportedHash is returned by OpenFileIndex when the commit graph | |||
// hash function is not supported. Currently only SHA-1 is defined and | |||
// supported | |||
ErrUnsupportedHash = errors.New("Unsupported hash algorithm") | |||
// ErrMalformedCommitGraphFile is returned by OpenFileIndex when the commit | |||
// graph file is corrupted. | |||
ErrMalformedCommitGraphFile = errors.New("Malformed commit graph file") | |||
commitFileSignature = []byte{'C', 'G', 'P', 'H'} | |||
oidFanoutSignature = []byte{'O', 'I', 'D', 'F'} | |||
oidLookupSignature = []byte{'O', 'I', 'D', 'L'} | |||
commitDataSignature = []byte{'C', 'D', 'A', 'T'} | |||
extraEdgeListSignature = []byte{'E', 'D', 'G', 'E'} | |||
lastSignature = []byte{0, 0, 0, 0} | |||
parentNone = uint32(0x70000000) | |||
parentOctopusUsed = uint32(0x80000000) | |||
parentOctopusMask = uint32(0x7fffffff) | |||
parentLast = uint32(0x80000000) | |||
) | |||
type fileIndex struct { | |||
reader io.ReaderAt | |||
fanout [256]int | |||
oidFanoutOffset int64 | |||
oidLookupOffset int64 | |||
commitDataOffset int64 | |||
extraEdgeListOffset int64 | |||
} | |||
// OpenFileIndex opens a serialized commit graph file in the format described at | |||
// https://github.com/git/git/blob/master/Documentation/technical/commit-graph-format.txt | |||
func OpenFileIndex(reader io.ReaderAt) (Index, error) { | |||
fi := &fileIndex{reader: reader} | |||
if err := fi.verifyFileHeader(); err != nil { | |||
return nil, err | |||
} | |||
if err := fi.readChunkHeaders(); err != nil { | |||
return nil, err | |||
} | |||
if err := fi.readFanout(); err != nil { | |||
return nil, err | |||
} | |||
return fi, nil | |||
} | |||
func (fi *fileIndex) verifyFileHeader() error { | |||
// Verify file signature | |||
var signature = make([]byte, 4) | |||
if _, err := fi.reader.ReadAt(signature, 0); err != nil { | |||
return err | |||
} | |||
if !bytes.Equal(signature, commitFileSignature) { | |||
return ErrMalformedCommitGraphFile | |||
} | |||
// Read and verify the file header | |||
var header = make([]byte, 4) | |||
if _, err := fi.reader.ReadAt(header, 4); err != nil { | |||
return err | |||
} | |||
if header[0] != 1 { | |||
return ErrUnsupportedVersion | |||
} | |||
if header[1] != 1 { | |||
return ErrUnsupportedHash | |||
} | |||
return nil | |||
} | |||
func (fi *fileIndex) readChunkHeaders() error { | |||
var chunkID = make([]byte, 4) | |||
for i := 0; ; i++ { | |||
chunkHeader := io.NewSectionReader(fi.reader, 8+(int64(i)*12), 12) | |||
if _, err := io.ReadAtLeast(chunkHeader, chunkID, 4); err != nil { | |||
return err | |||
} | |||
chunkOffset, err := binary.ReadUint64(chunkHeader) | |||
if err != nil { | |||
return err | |||
} | |||
if bytes.Equal(chunkID, oidFanoutSignature) { | |||
fi.oidFanoutOffset = int64(chunkOffset) | |||
} else if bytes.Equal(chunkID, oidLookupSignature) { | |||
fi.oidLookupOffset = int64(chunkOffset) | |||
} else if bytes.Equal(chunkID, commitDataSignature) { | |||
fi.commitDataOffset = int64(chunkOffset) | |||
} else if bytes.Equal(chunkID, extraEdgeListSignature) { | |||
fi.extraEdgeListOffset = int64(chunkOffset) | |||
} else if bytes.Equal(chunkID, lastSignature) { | |||
break | |||
} | |||
} | |||
if fi.oidFanoutOffset <= 0 || fi.oidLookupOffset <= 0 || fi.commitDataOffset <= 0 { | |||
return ErrMalformedCommitGraphFile | |||
} | |||
return nil | |||
} | |||
func (fi *fileIndex) readFanout() error { | |||
fanoutReader := io.NewSectionReader(fi.reader, fi.oidFanoutOffset, 256*4) | |||
for i := 0; i < 256; i++ { | |||
fanoutValue, err := binary.ReadUint32(fanoutReader) | |||
if err != nil { | |||
return err | |||
} | |||
if fanoutValue > 0x7fffffff { | |||
return ErrMalformedCommitGraphFile | |||
} | |||
fi.fanout[i] = int(fanoutValue) | |||
} | |||
return nil | |||
} | |||
func (fi *fileIndex) GetIndexByHash(h plumbing.Hash) (int, error) { | |||
var oid plumbing.Hash | |||
// Find the hash in the oid lookup table | |||
var low int | |||
if h[0] == 0 { | |||
low = 0 | |||
} else { | |||
low = fi.fanout[h[0]-1] | |||
} | |||
high := fi.fanout[h[0]] | |||
for low < high { | |||
mid := (low + high) >> 1 | |||
offset := fi.oidLookupOffset + int64(mid)*20 | |||
if _, err := fi.reader.ReadAt(oid[:], offset); err != nil { | |||
return 0, err | |||
} | |||
cmp := bytes.Compare(h[:], oid[:]) | |||
if cmp < 0 { | |||
high = mid | |||
} else if cmp == 0 { | |||
return mid, nil | |||
} else { | |||
low = mid + 1 | |||
} | |||
} | |||
return 0, plumbing.ErrObjectNotFound | |||
} | |||
func (fi *fileIndex) GetCommitDataByIndex(idx int) (*CommitData, error) { | |||
if idx >= fi.fanout[0xff] { | |||
return nil, plumbing.ErrObjectNotFound | |||
} | |||
offset := fi.commitDataOffset + int64(idx)*36 | |||
commitDataReader := io.NewSectionReader(fi.reader, offset, 36) | |||
treeHash, err := binary.ReadHash(commitDataReader) | |||
if err != nil { | |||
return nil, err | |||
} | |||
parent1, err := binary.ReadUint32(commitDataReader) | |||
if err != nil { | |||
return nil, err | |||
} | |||
parent2, err := binary.ReadUint32(commitDataReader) | |||
if err != nil { | |||
return nil, err | |||
} | |||
genAndTime, err := binary.ReadUint64(commitDataReader) | |||
if err != nil { | |||
return nil, err | |||
} | |||
var parentIndexes []int | |||
if parent2&parentOctopusUsed == parentOctopusUsed { | |||
// Octopus merge | |||
parentIndexes = []int{int(parent1 & parentOctopusMask)} | |||
offset := fi.extraEdgeListOffset + 4*int64(parent2&parentOctopusMask) | |||
buf := make([]byte, 4) | |||
for { | |||
_, err := fi.reader.ReadAt(buf, offset) | |||
if err != nil { | |||
return nil, err | |||
} | |||
parent := encbin.BigEndian.Uint32(buf) | |||
offset += 4 | |||
parentIndexes = append(parentIndexes, int(parent&parentOctopusMask)) | |||
if parent&parentLast == parentLast { | |||
break | |||
} | |||
} | |||
} else if parent2 != parentNone { | |||
parentIndexes = []int{int(parent1 & parentOctopusMask), int(parent2 & parentOctopusMask)} | |||
} else if parent1 != parentNone { | |||
parentIndexes = []int{int(parent1 & parentOctopusMask)} | |||
} | |||
parentHashes, err := fi.getHashesFromIndexes(parentIndexes) | |||
if err != nil { | |||
return nil, err | |||
} | |||
return &CommitData{ | |||
TreeHash: treeHash, | |||
ParentIndexes: parentIndexes, | |||
ParentHashes: parentHashes, | |||
Generation: int(genAndTime >> 34), | |||
When: time.Unix(int64(genAndTime&0x3FFFFFFFF), 0), | |||
}, nil | |||
} | |||
func (fi *fileIndex) getHashesFromIndexes(indexes []int) ([]plumbing.Hash, error) { | |||
hashes := make([]plumbing.Hash, len(indexes)) | |||
for i, idx := range indexes { | |||
if idx >= fi.fanout[0xff] { | |||
return nil, ErrMalformedCommitGraphFile | |||
} | |||
offset := fi.oidLookupOffset + int64(idx)*20 | |||
if _, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil { | |||
return nil, err | |||
} | |||
} | |||
return hashes, nil | |||
} | |||
// Hashes returns all the hashes that are available in the index | |||
func (fi *fileIndex) Hashes() []plumbing.Hash { | |||
hashes := make([]plumbing.Hash, fi.fanout[0xff]) | |||
for i := 0; i < int(fi.fanout[0xff]); i++ { | |||
offset := fi.oidLookupOffset + int64(i)*20 | |||
if n, err := fi.reader.ReadAt(hashes[i][:], offset); err != nil || n < 20 { | |||
return nil | |||
} | |||
} | |||
return hashes | |||
} |
@@ -0,0 +1,72 @@ | |||
package commitgraph | |||
import ( | |||
"gopkg.in/src-d/go-git.v4/plumbing" | |||
) | |||
// MemoryIndex provides a way to build the commit-graph in memory | |||
// for later encoding to file. | |||
type MemoryIndex struct { | |||
commitData []*CommitData | |||
indexMap map[plumbing.Hash]int | |||
} | |||
// NewMemoryIndex creates in-memory commit graph representation | |||
func NewMemoryIndex() *MemoryIndex { | |||
return &MemoryIndex{ | |||
indexMap: make(map[plumbing.Hash]int), | |||
} | |||
} | |||
// GetIndexByHash gets the index in the commit graph from commit hash, if available | |||
func (mi *MemoryIndex) GetIndexByHash(h plumbing.Hash) (int, error) { | |||
i, ok := mi.indexMap[h] | |||
if ok { | |||
return i, nil | |||
} | |||
return 0, plumbing.ErrObjectNotFound | |||
} | |||
// GetCommitDataByIndex gets the commit node from the commit graph using index | |||
// obtained from child node, if available | |||
func (mi *MemoryIndex) GetCommitDataByIndex(i int) (*CommitData, error) { | |||
if int(i) >= len(mi.commitData) { | |||
return nil, plumbing.ErrObjectNotFound | |||
} | |||
commitData := mi.commitData[i] | |||
// Map parent hashes to parent indexes | |||
if commitData.ParentIndexes == nil { | |||
parentIndexes := make([]int, len(commitData.ParentHashes)) | |||
for i, parentHash := range commitData.ParentHashes { | |||
var err error | |||
if parentIndexes[i], err = mi.GetIndexByHash(parentHash); err != nil { | |||
return nil, err | |||
} | |||
} | |||
commitData.ParentIndexes = parentIndexes | |||
} | |||
return commitData, nil | |||
} | |||
// Hashes returns all the hashes that are available in the index | |||
func (mi *MemoryIndex) Hashes() []plumbing.Hash { | |||
hashes := make([]plumbing.Hash, 0, len(mi.indexMap)) | |||
for k := range mi.indexMap { | |||
hashes = append(hashes, k) | |||
} | |||
return hashes | |||
} | |||
// Add adds new node to the memory index | |||
func (mi *MemoryIndex) Add(hash plumbing.Hash, commitData *CommitData) { | |||
// The parent indexes are calculated lazily in GetNodeByIndex | |||
// which allows adding nodes out of order as long as all parents | |||
// are eventually resolved | |||
commitData.ParentIndexes = nil | |||
mi.indexMap[hash] = len(mi.commitData) | |||
mi.commitData = append(mi.commitData, commitData) | |||
} |
@@ -0,0 +1,98 @@ | |||
package commitgraph | |||
import ( | |||
"io" | |||
"time" | |||
"gopkg.in/src-d/go-git.v4/plumbing" | |||
"gopkg.in/src-d/go-git.v4/plumbing/object" | |||
"gopkg.in/src-d/go-git.v4/plumbing/storer" | |||
) | |||
// CommitNode is generic interface encapsulating a lightweight commit object retrieved | |||
// from CommitNodeIndex | |||
type CommitNode interface { | |||
// ID returns the Commit object id referenced by the commit graph node. | |||
ID() plumbing.Hash | |||
// Tree returns the Tree referenced by the commit graph node. | |||
Tree() (*object.Tree, error) | |||
// CommitTime returns the Commiter.When time of the Commit referenced by the commit graph node. | |||
CommitTime() time.Time | |||
// NumParents returns the number of parents in a commit. | |||
NumParents() int | |||
// ParentNodes return a CommitNodeIter for parents of specified node. | |||
ParentNodes() CommitNodeIter | |||
// ParentNode returns the ith parent of a commit. | |||
ParentNode(i int) (CommitNode, error) | |||
// ParentHashes returns hashes of the parent commits for a specified node | |||
ParentHashes() []plumbing.Hash | |||
// Generation returns the generation of the commit for reachability analysis. | |||
// Objects with newer generation are not reachable from objects of older generation. | |||
Generation() uint64 | |||
// Commit returns the full commit object from the node | |||
Commit() (*object.Commit, error) | |||
} | |||
// CommitNodeIndex is generic interface encapsulating an index of CommitNode objects | |||
type CommitNodeIndex interface { | |||
// Get returns a commit node from a commit hash | |||
Get(hash plumbing.Hash) (CommitNode, error) | |||
} | |||
// CommitNodeIter is a generic closable interface for iterating over commit nodes. | |||
type CommitNodeIter interface { | |||
Next() (CommitNode, error) | |||
ForEach(func(CommitNode) error) error | |||
Close() | |||
} | |||
// parentCommitNodeIter provides an iterator for parent commits from associated CommitNodeIndex. | |||
type parentCommitNodeIter struct { | |||
node CommitNode | |||
i int | |||
} | |||
func newParentgraphCommitNodeIter(node CommitNode) CommitNodeIter { | |||
return &parentCommitNodeIter{node, 0} | |||
} | |||
// Next moves the iterator to the next commit and returns a pointer to it. If | |||
// there are no more commits, it returns io.EOF. | |||
func (iter *parentCommitNodeIter) Next() (CommitNode, error) { | |||
obj, err := iter.node.ParentNode(iter.i) | |||
if err == object.ErrParentNotFound { | |||
return nil, io.EOF | |||
} | |||
if err == nil { | |||
iter.i++ | |||
} | |||
return obj, err | |||
} | |||
// ForEach call the cb function for each commit contained on this iter until | |||
// an error appends or the end of the iter is reached. If ErrStop is sent | |||
// the iteration is stopped but no error is returned. The iterator is closed. | |||
func (iter *parentCommitNodeIter) ForEach(cb func(CommitNode) error) error { | |||
for { | |||
obj, err := iter.Next() | |||
if err != nil { | |||
if err == io.EOF { | |||
return nil | |||
} | |||
return err | |||
} | |||
if err := cb(obj); err != nil { | |||
if err == storer.ErrStop { | |||
return nil | |||
} | |||
return err | |||
} | |||
} | |||
} | |||
func (iter *parentCommitNodeIter) Close() { | |||
} |
@@ -0,0 +1,131 @@ | |||
package commitgraph | |||
import ( | |||
"fmt" | |||
"time" | |||
"gopkg.in/src-d/go-git.v4/plumbing" | |||
"gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph" | |||
"gopkg.in/src-d/go-git.v4/plumbing/object" | |||
"gopkg.in/src-d/go-git.v4/plumbing/storer" | |||
) | |||
// graphCommitNode is a reduced representation of Commit as presented in the commit | |||
// graph file (commitgraph.Node). It is merely useful as an optimization for walking | |||
// the commit graphs. | |||
// | |||
// graphCommitNode implements the CommitNode interface. | |||
type graphCommitNode struct { | |||
// Hash for the Commit object | |||
hash plumbing.Hash | |||
// Index of the node in the commit graph file | |||
index int | |||
commitData *commitgraph.CommitData | |||
gci *graphCommitNodeIndex | |||
} | |||
// graphCommitNodeIndex is an index that can load CommitNode objects from both the commit | |||
// graph files and the object store. | |||
// | |||
// graphCommitNodeIndex implements the CommitNodeIndex interface | |||
type graphCommitNodeIndex struct { | |||
commitGraph commitgraph.Index | |||
s storer.EncodedObjectStorer | |||
} | |||
// NewGraphCommitNodeIndex returns CommitNodeIndex implementation that uses commit-graph | |||
// files as backing storage and falls back to object storage when necessary | |||
func NewGraphCommitNodeIndex(commitGraph commitgraph.Index, s storer.EncodedObjectStorer) CommitNodeIndex { | |||
return &graphCommitNodeIndex{commitGraph, s} | |||
} | |||
func (gci *graphCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) { | |||
// Check the commit graph first | |||
parentIndex, err := gci.commitGraph.GetIndexByHash(hash) | |||
if err == nil { | |||
parent, err := gci.commitGraph.GetCommitDataByIndex(parentIndex) | |||
if err != nil { | |||
return nil, err | |||
} | |||
return &graphCommitNode{ | |||
hash: hash, | |||
index: parentIndex, | |||
commitData: parent, | |||
gci: gci, | |||
}, nil | |||
} | |||
// Fallback to loading full commit object | |||
commit, err := object.GetCommit(gci.s, hash) | |||
if err != nil { | |||
return nil, err | |||
} | |||
return &objectCommitNode{ | |||
nodeIndex: gci, | |||
commit: commit, | |||
}, nil | |||
} | |||
func (c *graphCommitNode) ID() plumbing.Hash { | |||
return c.hash | |||
} | |||
func (c *graphCommitNode) Tree() (*object.Tree, error) { | |||
return object.GetTree(c.gci.s, c.commitData.TreeHash) | |||
} | |||
func (c *graphCommitNode) CommitTime() time.Time { | |||
return c.commitData.When | |||
} | |||
func (c *graphCommitNode) NumParents() int { | |||
return len(c.commitData.ParentIndexes) | |||
} | |||
func (c *graphCommitNode) ParentNodes() CommitNodeIter { | |||
return newParentgraphCommitNodeIter(c) | |||
} | |||
func (c *graphCommitNode) ParentNode(i int) (CommitNode, error) { | |||
if i < 0 || i >= len(c.commitData.ParentIndexes) { | |||
return nil, object.ErrParentNotFound | |||
} | |||
parent, err := c.gci.commitGraph.GetCommitDataByIndex(c.commitData.ParentIndexes[i]) | |||
if err != nil { | |||
return nil, err | |||
} | |||
return &graphCommitNode{ | |||
hash: c.commitData.ParentHashes[i], | |||
index: c.commitData.ParentIndexes[i], | |||
commitData: parent, | |||
gci: c.gci, | |||
}, nil | |||
} | |||
func (c *graphCommitNode) ParentHashes() []plumbing.Hash { | |||
return c.commitData.ParentHashes | |||
} | |||
func (c *graphCommitNode) Generation() uint64 { | |||
// If the commit-graph file was generated with older Git version that | |||
// set the generation to zero for every commit the generation assumption | |||
// is still valid. It is just less useful. | |||
return uint64(c.commitData.Generation) | |||
} | |||
func (c *graphCommitNode) Commit() (*object.Commit, error) { | |||
return object.GetCommit(c.gci.s, c.hash) | |||
} | |||
func (c *graphCommitNode) String() string { | |||
return fmt.Sprintf( | |||
"%s %s\nDate: %s", | |||
plumbing.CommitObject, c.ID(), | |||
c.CommitTime().Format(object.DateFormat), | |||
) | |||
} |
@@ -0,0 +1,90 @@ | |||
package commitgraph | |||
import ( | |||
"math" | |||
"time" | |||
"gopkg.in/src-d/go-git.v4/plumbing" | |||
"gopkg.in/src-d/go-git.v4/plumbing/object" | |||
"gopkg.in/src-d/go-git.v4/plumbing/storer" | |||
) | |||
// objectCommitNode is a representation of Commit as presented in the GIT object format. | |||
// | |||
// objectCommitNode implements the CommitNode interface. | |||
type objectCommitNode struct { | |||
nodeIndex CommitNodeIndex | |||
commit *object.Commit | |||
} | |||
// NewObjectCommitNodeIndex returns CommitNodeIndex implementation that uses | |||
// only object storage to load the nodes | |||
func NewObjectCommitNodeIndex(s storer.EncodedObjectStorer) CommitNodeIndex { | |||
return &objectCommitNodeIndex{s} | |||
} | |||
func (oci *objectCommitNodeIndex) Get(hash plumbing.Hash) (CommitNode, error) { | |||
commit, err := object.GetCommit(oci.s, hash) | |||
if err != nil { | |||
return nil, err | |||
} | |||
return &objectCommitNode{ | |||
nodeIndex: oci, | |||
commit: commit, | |||
}, nil | |||
} | |||
// objectCommitNodeIndex is an index that can load CommitNode objects only from the | |||
// object store. | |||
// | |||
// objectCommitNodeIndex implements the CommitNodeIndex interface | |||
type objectCommitNodeIndex struct { | |||
s storer.EncodedObjectStorer | |||
} | |||
func (c *objectCommitNode) CommitTime() time.Time { | |||
return c.commit.Committer.When | |||
} | |||
func (c *objectCommitNode) ID() plumbing.Hash { | |||
return c.commit.ID() | |||
} | |||
func (c *objectCommitNode) Tree() (*object.Tree, error) { | |||
return c.commit.Tree() | |||
} | |||
func (c *objectCommitNode) NumParents() int { | |||
return c.commit.NumParents() | |||
} | |||
func (c *objectCommitNode) ParentNodes() CommitNodeIter { | |||
return newParentgraphCommitNodeIter(c) | |||
} | |||
func (c *objectCommitNode) ParentNode(i int) (CommitNode, error) { | |||
if i < 0 || i >= len(c.commit.ParentHashes) { | |||
return nil, object.ErrParentNotFound | |||
} | |||
// Note: It's necessary to go through CommitNodeIndex here to ensure | |||
// that if the commit-graph file covers only part of the history we | |||
// start using it when that part is reached. | |||
return c.nodeIndex.Get(c.commit.ParentHashes[i]) | |||
} | |||
func (c *objectCommitNode) ParentHashes() []plumbing.Hash { | |||
return c.commit.ParentHashes | |||
} | |||
func (c *objectCommitNode) Generation() uint64 { | |||
// Commit nodes representing objects outside of the commit graph can never | |||
// be reached by objects from the commit-graph thus we return the highest | |||
// possible value. | |||
return math.MaxUint64 | |||
} | |||
func (c *objectCommitNode) Commit() (*object.Commit, error) { | |||
return c.commit, nil | |||
} |
@@ -0,0 +1,105 @@ | |||
package commitgraph | |||
import ( | |||
"io" | |||
"github.com/emirpasic/gods/trees/binaryheap" | |||
"gopkg.in/src-d/go-git.v4/plumbing" | |||
"gopkg.in/src-d/go-git.v4/plumbing/storer" | |||
) | |||
type commitNodeIteratorByCTime struct { | |||
heap *binaryheap.Heap | |||
seenExternal map[plumbing.Hash]bool | |||
seen map[plumbing.Hash]bool | |||
} | |||
// NewCommitNodeIterCTime returns a CommitNodeIter that walks the commit history, | |||
// starting at the given commit and visiting its parents while preserving Committer Time order. | |||
// this appears to be the closest order to `git log` | |||
// The given callback will be called for each visited commit. Each commit will | |||
// be visited only once. If the callback returns an error, walking will stop | |||
// and will return the error. Other errors might be returned if the history | |||
// cannot be traversed (e.g. missing objects). Ignore allows to skip some | |||
// commits from being iterated. | |||
func NewCommitNodeIterCTime( | |||
c CommitNode, | |||
seenExternal map[plumbing.Hash]bool, | |||
ignore []plumbing.Hash, | |||
) CommitNodeIter { | |||
seen := make(map[plumbing.Hash]bool) | |||
for _, h := range ignore { | |||
seen[h] = true | |||
} | |||
heap := binaryheap.NewWith(func(a, b interface{}) int { | |||
if a.(CommitNode).CommitTime().Before(b.(CommitNode).CommitTime()) { | |||
return 1 | |||
} | |||
return -1 | |||
}) | |||
heap.Push(c) | |||
return &commitNodeIteratorByCTime{ | |||
heap: heap, | |||
seenExternal: seenExternal, | |||
seen: seen, | |||
} | |||
} | |||
func (w *commitNodeIteratorByCTime) Next() (CommitNode, error) { | |||
var c CommitNode | |||
for { | |||
cIn, ok := w.heap.Pop() | |||
if !ok { | |||
return nil, io.EOF | |||
} | |||
c = cIn.(CommitNode) | |||
cID := c.ID() | |||
if w.seen[cID] || w.seenExternal[cID] { | |||
continue | |||
} | |||
w.seen[cID] = true | |||
for i, h := range c.ParentHashes() { | |||
if w.seen[h] || w.seenExternal[h] { | |||
continue | |||
} | |||
pc, err := c.ParentNode(i) | |||
if err != nil { | |||
return nil, err | |||
} | |||
w.heap.Push(pc) | |||
} | |||
return c, nil | |||
} | |||
} | |||
func (w *commitNodeIteratorByCTime) ForEach(cb func(CommitNode) error) error { | |||
for { | |||
c, err := w.Next() | |||
if err == io.EOF { | |||
break | |||
} | |||
if err != nil { | |||
return err | |||
} | |||
err = cb(c) | |||
if err == storer.ErrStop { | |||
break | |||
} | |||
if err != nil { | |||
return err | |||
} | |||
} | |||
return nil | |||
} | |||
func (w *commitNodeIteratorByCTime) Close() {} |
@@ -0,0 +1,7 @@ | |||
// Package commitgraph provides an interface for efficient traversal over Git | |||
// commit graph either through the regular object storage, or optionally with | |||
// the index stored in commit-graph file (Git 2.18+). | |||
// | |||
// The API and functionality of this package are considered EXPERIMENTAL and is | |||
// not considered stable nor production ready. | |||
package commitgraph |
@@ -432,7 +432,9 @@ gopkg.in/src-d/go-git.v4/config | |||
gopkg.in/src-d/go-git.v4/plumbing | |||
gopkg.in/src-d/go-git.v4/plumbing/cache | |||
gopkg.in/src-d/go-git.v4/plumbing/filemode | |||
gopkg.in/src-d/go-git.v4/plumbing/format/commitgraph | |||
gopkg.in/src-d/go-git.v4/plumbing/object | |||
gopkg.in/src-d/go-git.v4/plumbing/object/commitgraph | |||
gopkg.in/src-d/go-git.v4/storage/filesystem | |||
gopkg.in/src-d/go-git.v4/internal/revision | |||
gopkg.in/src-d/go-git.v4/plumbing/format/gitignore | |||
@@ -455,8 +457,8 @@ gopkg.in/src-d/go-git.v4/utils/merkletrie/index | |||
gopkg.in/src-d/go-git.v4/utils/merkletrie/noder | |||
gopkg.in/src-d/go-git.v4/internal/url | |||
gopkg.in/src-d/go-git.v4/plumbing/format/config | |||
gopkg.in/src-d/go-git.v4/plumbing/format/diff | |||
gopkg.in/src-d/go-git.v4/utils/binary | |||
gopkg.in/src-d/go-git.v4/plumbing/format/diff | |||
gopkg.in/src-d/go-git.v4/plumbing/format/idxfile | |||
gopkg.in/src-d/go-git.v4/plumbing/format/objfile | |||
gopkg.in/src-d/go-git.v4/storage/filesystem/dotgit |