123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437 |
- // Copyright 2021 The Gitea Authors. All rights reserved.
- // SPDX-License-Identifier: MIT
-
- package git
-
- import (
- "bufio"
- "bytes"
- "context"
- "errors"
- "io"
- "path"
- "sort"
- "strings"
-
- "code.gitea.io/gitea/modules/container"
-
- "github.com/djherbis/buffer"
- "github.com/djherbis/nio/v3"
- )
-
- // LogNameStatusRepo opens git log --raw in the provided repo and returns a stdin pipe, a stdout reader and cancel function
- func LogNameStatusRepo(ctx context.Context, repository, head, treepath string, paths ...string) (*bufio.Reader, func()) {
- // We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
- // so let's create a batch stdin and stdout
- stdoutReader, stdoutWriter := nio.Pipe(buffer.New(32 * 1024))
-
- // Lets also create a context so that we can absolutely ensure that the command should die when we're done
- ctx, ctxCancel := context.WithCancel(ctx)
-
- cancel := func() {
- ctxCancel()
- _ = stdoutReader.Close()
- _ = stdoutWriter.Close()
- }
-
- cmd := NewCommand(ctx)
- cmd.AddArguments("log", "--name-status", "-c", "--format=commit%x00%H %P%x00", "--parents", "--no-renames", "-t", "-z").AddDynamicArguments(head)
-
- var files []string
- if len(paths) < 70 {
- if treepath != "" {
- files = append(files, treepath)
- for _, pth := range paths {
- if pth != "" {
- files = append(files, path.Join(treepath, pth))
- }
- }
- } else {
- for _, pth := range paths {
- if pth != "" {
- files = append(files, pth)
- }
- }
- }
- } else if treepath != "" {
- files = append(files, treepath)
- }
- // Use the :(literal) pathspec magic to handle edge cases with files named like ":file.txt" or "*.jpg"
- for i, file := range files {
- files[i] = ":(literal)" + file
- }
- cmd.AddDashesAndList(files...)
-
- go func() {
- stderr := strings.Builder{}
- err := cmd.Run(&RunOpts{
- Dir: repository,
- Stdout: stdoutWriter,
- Stderr: &stderr,
- })
- if err != nil {
- _ = stdoutWriter.CloseWithError(ConcatenateError(err, (&stderr).String()))
- return
- }
-
- _ = stdoutWriter.Close()
- }()
-
- // For simplicities sake we'll us a buffered reader to read from the cat-file --batch
- bufReader := bufio.NewReaderSize(stdoutReader, 32*1024)
-
- return bufReader, cancel
- }
-
- // LogNameStatusRepoParser parses a git log raw output from LogRawRepo
- type LogNameStatusRepoParser struct {
- treepath string
- paths []string
- next []byte
- buffull bool
- rd *bufio.Reader
- cancel func()
- }
-
- // NewLogNameStatusRepoParser returns a new parser for a git log raw output
- func NewLogNameStatusRepoParser(ctx context.Context, repository, head, treepath string, paths ...string) *LogNameStatusRepoParser {
- rd, cancel := LogNameStatusRepo(ctx, repository, head, treepath, paths...)
- return &LogNameStatusRepoParser{
- treepath: treepath,
- paths: paths,
- rd: rd,
- cancel: cancel,
- }
- }
-
- // LogNameStatusCommitData represents a commit artefact from git log raw
- type LogNameStatusCommitData struct {
- CommitID string
- ParentIDs []string
- Paths []bool
- }
-
- // Next returns the next LogStatusCommitData
- func (g *LogNameStatusRepoParser) Next(treepath string, paths2ids map[string]int, changed []bool, maxpathlen int) (*LogNameStatusCommitData, error) {
- var err error
- if g.next == nil || len(g.next) == 0 {
- g.buffull = false
- g.next, err = g.rd.ReadSlice('\x00')
- if err != nil {
- if err == bufio.ErrBufferFull {
- g.buffull = true
- } else if err == io.EOF {
- return nil, nil
- } else {
- return nil, err
- }
- }
- }
-
- ret := LogNameStatusCommitData{}
- if bytes.Equal(g.next, []byte("commit\000")) {
- g.next, err = g.rd.ReadSlice('\x00')
- if err != nil {
- if err == bufio.ErrBufferFull {
- g.buffull = true
- } else if err == io.EOF {
- return nil, nil
- } else {
- return nil, err
- }
- }
- }
-
- // Our "line" must look like: <commitid> SP (<parent> SP) * NUL
- commitIDs := string(g.next)
- if g.buffull {
- more, err := g.rd.ReadString('\x00')
- if err != nil {
- return nil, err
- }
- commitIDs += more
- }
- commitIDs = commitIDs[:len(commitIDs)-1]
- splitIDs := strings.Split(commitIDs, " ")
- ret.CommitID = splitIDs[0]
- if len(splitIDs) > 1 {
- ret.ParentIDs = splitIDs[1:]
- }
-
- // now read the next "line"
- g.buffull = false
- g.next, err = g.rd.ReadSlice('\x00')
- if err != nil {
- if err == bufio.ErrBufferFull {
- g.buffull = true
- } else if err != io.EOF {
- return nil, err
- }
- }
-
- if err == io.EOF || !(g.next[0] == '\n' || g.next[0] == '\000') {
- return &ret, nil
- }
-
- // Ok we have some changes.
- // This line will look like: NL <fname> NUL
- //
- // Subsequent lines will not have the NL - so drop it here - g.bufffull must also be false at this point too.
- if g.next[0] == '\n' {
- g.next = g.next[1:]
- } else {
- g.buffull = false
- g.next, err = g.rd.ReadSlice('\x00')
- if err != nil {
- if err == bufio.ErrBufferFull {
- g.buffull = true
- } else if err != io.EOF {
- return nil, err
- }
- }
- if len(g.next) == 0 {
- return &ret, nil
- }
- if g.next[0] == '\x00' {
- g.buffull = false
- g.next, err = g.rd.ReadSlice('\x00')
- if err != nil {
- if err == bufio.ErrBufferFull {
- g.buffull = true
- } else if err != io.EOF {
- return nil, err
- }
- }
- }
- }
-
- fnameBuf := make([]byte, 4096)
-
- diffloop:
- for {
- if err == io.EOF || bytes.Equal(g.next, []byte("commit\000")) {
- return &ret, nil
- }
- g.next, err = g.rd.ReadSlice('\x00')
- if err != nil {
- if err == bufio.ErrBufferFull {
- g.buffull = true
- } else if err == io.EOF {
- return &ret, nil
- } else {
- return nil, err
- }
- }
- copy(fnameBuf, g.next)
- if len(fnameBuf) < len(g.next) {
- fnameBuf = append(fnameBuf, g.next[len(fnameBuf):]...)
- } else {
- fnameBuf = fnameBuf[:len(g.next)]
- }
- if err != nil {
- if err != bufio.ErrBufferFull {
- return nil, err
- }
- more, err := g.rd.ReadBytes('\x00')
- if err != nil {
- return nil, err
- }
- fnameBuf = append(fnameBuf, more...)
- }
-
- // read the next line
- g.buffull = false
- g.next, err = g.rd.ReadSlice('\x00')
- if err != nil {
- if err == bufio.ErrBufferFull {
- g.buffull = true
- } else if err != io.EOF {
- return nil, err
- }
- }
-
- if treepath != "" {
- if !bytes.HasPrefix(fnameBuf, []byte(treepath)) {
- fnameBuf = fnameBuf[:cap(fnameBuf)]
- continue diffloop
- }
- }
- fnameBuf = fnameBuf[len(treepath) : len(fnameBuf)-1]
- if len(fnameBuf) > maxpathlen {
- fnameBuf = fnameBuf[:cap(fnameBuf)]
- continue diffloop
- }
- if len(fnameBuf) > 0 {
- if len(treepath) > 0 {
- if fnameBuf[0] != '/' || bytes.IndexByte(fnameBuf[1:], '/') >= 0 {
- fnameBuf = fnameBuf[:cap(fnameBuf)]
- continue diffloop
- }
- fnameBuf = fnameBuf[1:]
- } else if bytes.IndexByte(fnameBuf, '/') >= 0 {
- fnameBuf = fnameBuf[:cap(fnameBuf)]
- continue diffloop
- }
- }
-
- idx, ok := paths2ids[string(fnameBuf)]
- if !ok {
- fnameBuf = fnameBuf[:cap(fnameBuf)]
- continue diffloop
- }
- if ret.Paths == nil {
- ret.Paths = changed
- }
- changed[idx] = true
- }
- }
-
- // Close closes the parser
- func (g *LogNameStatusRepoParser) Close() {
- g.cancel()
- }
-
- // WalkGitLog walks the git log --name-status for the head commit in the provided treepath and files
- func WalkGitLog(ctx context.Context, repo *Repository, head *Commit, treepath string, paths ...string) (map[string]string, error) {
- headRef := head.ID.String()
-
- tree, err := head.SubTree(treepath)
- if err != nil {
- return nil, err
- }
-
- entries, err := tree.ListEntries()
- if err != nil {
- return nil, err
- }
-
- if len(paths) == 0 {
- paths = make([]string, 0, len(entries)+1)
- paths = append(paths, "")
- for _, entry := range entries {
- paths = append(paths, entry.Name())
- }
- } else {
- sort.Strings(paths)
- if paths[0] != "" {
- paths = append([]string{""}, paths...)
- }
- // remove duplicates
- for i := len(paths) - 1; i > 0; i-- {
- if paths[i] == paths[i-1] {
- paths = append(paths[:i-1], paths[i:]...)
- }
- }
- }
-
- path2idx := map[string]int{}
- maxpathlen := len(treepath)
-
- for i := range paths {
- path2idx[paths[i]] = i
- pthlen := len(paths[i]) + len(treepath) + 1
- if pthlen > maxpathlen {
- maxpathlen = pthlen
- }
- }
-
- g := NewLogNameStatusRepoParser(ctx, repo.Path, head.ID.String(), treepath, paths...)
- // don't use defer g.Close() here as g may change its value - instead wrap in a func
- defer func() {
- g.Close()
- }()
-
- results := make([]string, len(paths))
- remaining := len(paths)
- nextRestart := (len(paths) * 3) / 4
- if nextRestart > 70 {
- nextRestart = 70
- }
- lastEmptyParent := head.ID.String()
- commitSinceLastEmptyParent := uint64(0)
- commitSinceNextRestart := uint64(0)
- parentRemaining := make(container.Set[string])
-
- changed := make([]bool, len(paths))
-
- heaploop:
- for {
- select {
- case <-ctx.Done():
- if ctx.Err() == context.DeadlineExceeded {
- break heaploop
- }
- g.Close()
- return nil, ctx.Err()
- default:
- }
- current, err := g.Next(treepath, path2idx, changed, maxpathlen)
- if err != nil {
- if errors.Is(err, context.DeadlineExceeded) {
- break heaploop
- }
- g.Close()
- return nil, err
- }
- if current == nil {
- break heaploop
- }
- parentRemaining.Remove(current.CommitID)
- for i, found := range current.Paths {
- if !found {
- continue
- }
- changed[i] = false
- if results[i] == "" {
- results[i] = current.CommitID
- if err := repo.LastCommitCache.Put(headRef, path.Join(treepath, paths[i]), current.CommitID); err != nil {
- return nil, err
- }
- delete(path2idx, paths[i])
- remaining--
- if results[0] == "" {
- results[0] = current.CommitID
- if err := repo.LastCommitCache.Put(headRef, treepath, current.CommitID); err != nil {
- return nil, err
- }
- delete(path2idx, "")
- remaining--
- }
- }
- }
-
- if remaining <= 0 {
- break heaploop
- }
- commitSinceLastEmptyParent++
- if len(parentRemaining) == 0 {
- lastEmptyParent = current.CommitID
- commitSinceLastEmptyParent = 0
- }
- if remaining <= nextRestart {
- commitSinceNextRestart++
- if 4*commitSinceNextRestart > 3*commitSinceLastEmptyParent {
- g.Close()
- remainingPaths := make([]string, 0, len(paths))
- for i, pth := range paths {
- if results[i] == "" {
- remainingPaths = append(remainingPaths, pth)
- }
- }
- g = NewLogNameStatusRepoParser(ctx, repo.Path, lastEmptyParent, treepath, remainingPaths...)
- parentRemaining = make(container.Set[string])
- nextRestart = (remaining * 3) / 4
- continue heaploop
- }
- }
- parentRemaining.AddMultiple(current.ParentIDs...)
- }
- g.Close()
-
- resultsMap := map[string]string{}
- for i, pth := range paths {
- resultsMap[pth] = results[i]
- }
-
- return resultsMap, nil
- }
|