Use git log name-status in get last commit (#16059)

* Improve get last commit using git log --name-status git log --name-status -c provides information about the diff between a commit and its parents. Using this and adjusting the algorithm to use the first change to a path allows for a much faster generation of commit info. There is a subtle change in the results generated but this will cause the results to more closely match those from elsewhere. Signed-off-by: Andrew Thornton <art27@cantab.net> Co-authored-by: 6543 <6543@obermui.de> Co-authored-by: techknowlogick <techknowlogick@gitea.io> Co-authored-by: Lauris BH <lauris@nix.lv>
author: zeripath <art27@cantab.net> 2021-06-20 23:00:46 +0100
committer: GitHub <noreply@github.com> 2021-06-21 01:00:46 +0300
commit: 23358bc55de67be132e3858a5d40f25dbdd0a769 (patch)
tree: 914386734d5dafc0bcf84253c55c67d6590092dc /modules/git/batch_reader.go
parent: 8fa3bbc42450fe34cc0cee3de566b17fa131d1c6 (diff)
download: gitea-23358bc55de67be132e3858a5d40f25dbdd0a769.tar.gz
gitea-23358bc55de67be132e3858a5d40f25dbdd0a769.zip
1 files changed, 40 insertions, 71 deletions
diff --git a/modules/git/batch_reader.go b/modules/git/batch_reader.go
index d6ee0ce8e0..678b184708 100644
--- a/modules/git/batch_reader.go
+++ b/modules/git/batch_reader.go
@@ -11,6 +11,9 @@ import (
 	"math"
 	"strconv"
 	"strings"
+
+	"github.com/djherbis/buffer"
+	"github.com/djherbis/nio/v3"
 )
 
 // WriteCloserError wraps an io.WriteCloser with an additional CloseWithError function
@@ -42,7 +45,7 @@ func CatFileBatchCheck(repoPath string) (WriteCloserError, *bufio.Reader, func()
 		}
 	}()
 
-	// For simplicities sake we'll us a buffered reader to read from the cat-file --batch
+	// For simplicities sake we'll use a buffered reader to read from the cat-file --batch-check
 	batchReader := bufio.NewReader(batchStdoutReader)
 
 	return batchStdinWriter, batchReader, cancel
@@ -53,7 +56,7 @@ func CatFileBatch(repoPath string) (WriteCloserError, *bufio.Reader, func()) {
 	// We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
 	// so let's create a batch stdin and stdout
 	batchStdinReader, batchStdinWriter := io.Pipe()
-	batchStdoutReader, batchStdoutWriter := io.Pipe()
+	batchStdoutReader, batchStdoutWriter := nio.Pipe(buffer.New(32 * 1024))
 	cancel := func() {
 		_ = batchStdinReader.Close()
 		_ = batchStdinWriter.Close()
@@ -74,7 +77,7 @@ func CatFileBatch(repoPath string) (WriteCloserError, *bufio.Reader, func()) {
 	}()
 
 	// For simplicities sake we'll us a buffered reader to read from the cat-file --batch
-	batchReader := bufio.NewReader(batchStdoutReader)
+	batchReader := bufio.NewReaderSize(batchStdoutReader, 32*1024)
 
 	return batchStdinWriter, batchReader, cancel
 }
@@ -84,22 +87,31 @@ func CatFileBatch(repoPath string) (WriteCloserError, *bufio.Reader, func()) {
 // <sha> SP <type> SP <size> LF
 // sha is a 40byte not 20byte here
 func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err error) {
-	sha, err = rd.ReadBytes(' ')
+	typ, err = rd.ReadString('\n')
 	if err != nil {
 		return
 	}
-	sha = sha[:len(sha)-1]
-
-	typ, err = rd.ReadString('\n')
-	if err != nil {
+	if len(typ) == 1 {
+		typ, err = rd.ReadString('\n')
+		if err != nil {
+			return
+		}
+	}
+	idx := strings.IndexByte(typ, ' ')
+	if idx < 0 {
+		log("missing space typ: %s", typ)
+		err = ErrNotExist{ID: string(sha)}
 		return
 	}
+	sha = []byte(typ[:idx])
+	typ = typ[idx+1:]
 
-	idx := strings.Index(typ, " ")
+	idx = strings.IndexByte(typ, ' ')
 	if idx < 0 {
 		err = ErrNotExist{ID: string(sha)}
 		return
 	}
+
 	sizeStr := typ[idx+1 : len(typ)-1]
 	typ = typ[:idx]
 
@@ -130,7 +142,7 @@ headerLoop:
 	}
 
 	// Discard the rest of the tag
-	discard := size - n
+	discard := size - n + 1
 	for discard > math.MaxInt32 {
 		_, err := rd.Discard(math.MaxInt32)
 		if err != nil {
@@ -200,85 +212,42 @@ func To40ByteSHA(sha, out []byte) []byte {
 	return out
 }
 
-// ParseTreeLineSkipMode reads an entry from a tree in a cat-file --batch stream
-// This simply skips the mode - saving a substantial amount of time and carefully avoids allocations - except where fnameBuf is too small.
+// ParseTreeLine reads an entry from a tree in a cat-file --batch stream
+// This carefully avoids allocations - except where fnameBuf is too small.
 // It is recommended therefore to pass in an fnameBuf large enough to avoid almost all allocations
 //
 // Each line is composed of:
 // <mode-in-ascii-dropping-initial-zeros> SP <fname> NUL <20-byte SHA>
 //
 // We don't attempt to convert the 20-byte SHA to 40-byte SHA to save a lot of time
-func ParseTreeLineSkipMode(rd *bufio.Reader, fnameBuf, shaBuf []byte) (fname, sha []byte, n int, err error) {
+func ParseTreeLine(rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) {
 	var readBytes []byte
-	// Skip the Mode
-	readBytes, err = rd.ReadSlice(' ') // NB: DOES NOT ALLOCATE SIMPLY RETURNS SLICE WITHIN READER BUFFER
-	if err != nil {
-		return
-	}
-	n += len(readBytes)
 
-	// Deal with the fname
+	// Read the Mode & fname
 	readBytes, err = rd.ReadSlice('\x00')
-	copy(fnameBuf, readBytes)
-	if len(fnameBuf) > len(readBytes) {
-		fnameBuf = fnameBuf[:len(readBytes)] // cut the buf the correct size
-	} else {
-		fnameBuf = append(fnameBuf, readBytes[len(fnameBuf):]...) // extend the buf and copy in the missing bits
-	}
-	for err == bufio.ErrBufferFull { // Then we need to read more
-		readBytes, err = rd.ReadSlice('\x00')
-		fnameBuf = append(fnameBuf, readBytes...) // there is little point attempting to avoid allocations here so just extend
-	}
-	n += len(fnameBuf)
 	if err != nil {
 		return
 	}
-	fnameBuf = fnameBuf[:len(fnameBuf)-1] // Drop the terminal NUL
-	fname = fnameBuf                      // set the returnable fname to the slice
-
-	// Now deal with the 20-byte SHA
-	idx := 0
-	for idx < 20 {
-		read := 0
-		read, err = rd.Read(shaBuf[idx:20])
-		n += read
-		if err != nil {
-			return
-		}
-		idx += read
-	}
-	sha = shaBuf
-	return
-}
-
-// ParseTreeLine reads an entry from a tree in a cat-file --batch stream
-// This carefully avoids allocations - except where fnameBuf is too small.
-// It is recommended therefore to pass in an fnameBuf large enough to avoid almost all allocations
-//
-// Each line is composed of:
-// <mode-in-ascii-dropping-initial-zeros> SP <fname> NUL <20-byte SHA>
-//
-// We don't attempt to convert the 20-byte SHA to 40-byte SHA to save a lot of time
-func ParseTreeLine(rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) {
-	var readBytes []byte
+	idx := bytes.IndexByte(readBytes, ' ')
+	if idx < 0 {
+		log("missing space in readBytes ParseTreeLine: %s", readBytes)
 
-	// Read the Mode
-	readBytes, err = rd.ReadSlice(' ')
-	if err != nil {
+		err = &ErrNotExist{}
 		return
 	}
-	n += len(readBytes)
-	copy(modeBuf, readBytes)
-	if len(modeBuf) > len(readBytes) {
-		modeBuf = modeBuf[:len(readBytes)]
-	} else {
-		modeBuf = append(modeBuf, readBytes[len(modeBuf):]...)
 
+	n += idx + 1
+	copy(modeBuf, readBytes[:idx])
+	if len(modeBuf) >= idx {
+		modeBuf = modeBuf[:idx]
+	} else {
+		modeBuf = append(modeBuf, readBytes[len(modeBuf):idx]...)
 	}
-	mode = modeBuf[:len(modeBuf)-1] // Drop the SP
+	mode = modeBuf
+
+	readBytes = readBytes[idx+1:]
 
 	// Deal with the fname
-	readBytes, err = rd.ReadSlice('\x00')
 	copy(fnameBuf, readBytes)
 	if len(fnameBuf) > len(readBytes) {
 		fnameBuf = fnameBuf[:len(readBytes)]
@@ -297,7 +266,7 @@ func ParseTreeLine(rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fn
 	fname = fnameBuf
 
 	// Deal with the 20-byte SHA
-	idx := 0
+	idx = 0
 	for idx < 20 {
 		read := 0
 		read, err = rd.Read(shaBuf[idx:20])
author	zeripath <art27@cantab.net>	2021-06-20 23:00:46 +0100
committer	GitHub <noreply@github.com>	2021-06-21 01:00:46 +0300
commit	23358bc55de67be132e3858a5d40f25dbdd0a769 (patch)
tree	914386734d5dafc0bcf84253c55c67d6590092dc /modules/git/batch_reader.go
parent	8fa3bbc42450fe34cc0cee3de566b17fa131d1c6 (diff)
download	gitea-23358bc55de67be132e3858a5d40f25dbdd0a769.tar.gz gitea-23358bc55de67be132e3858a5d40f25dbdd0a769.zip