summaryrefslogtreecommitdiffstats
path: root/modules/git/batch_reader.go
diff options
context:
space:
mode:
authorzeripath <art27@cantab.net>2021-06-20 23:00:46 +0100
committerGitHub <noreply@github.com>2021-06-21 01:00:46 +0300
commit23358bc55de67be132e3858a5d40f25dbdd0a769 (patch)
tree914386734d5dafc0bcf84253c55c67d6590092dc /modules/git/batch_reader.go
parent8fa3bbc42450fe34cc0cee3de566b17fa131d1c6 (diff)
downloadgitea-23358bc55de67be132e3858a5d40f25dbdd0a769.tar.gz
gitea-23358bc55de67be132e3858a5d40f25dbdd0a769.zip
Use git log name-status in get last commit (#16059)
* Improve get last commit using git log --name-status git log --name-status -c provides information about the diff between a commit and its parents. Using this and adjusting the algorithm to use the first change to a path allows for a much faster generation of commit info. There is a subtle change in the results generated but this will cause the results to more closely match those from elsewhere. Signed-off-by: Andrew Thornton <art27@cantab.net> Co-authored-by: 6543 <6543@obermui.de> Co-authored-by: techknowlogick <techknowlogick@gitea.io> Co-authored-by: Lauris BH <lauris@nix.lv>
Diffstat (limited to 'modules/git/batch_reader.go')
-rw-r--r--modules/git/batch_reader.go111
1 files changed, 40 insertions, 71 deletions
diff --git a/modules/git/batch_reader.go b/modules/git/batch_reader.go
index d6ee0ce8e0..678b184708 100644
--- a/modules/git/batch_reader.go
+++ b/modules/git/batch_reader.go
@@ -11,6 +11,9 @@ import (
"math"
"strconv"
"strings"
+
+ "github.com/djherbis/buffer"
+ "github.com/djherbis/nio/v3"
)
// WriteCloserError wraps an io.WriteCloser with an additional CloseWithError function
@@ -42,7 +45,7 @@ func CatFileBatchCheck(repoPath string) (WriteCloserError, *bufio.Reader, func()
}
}()
- // For simplicities sake we'll us a buffered reader to read from the cat-file --batch
+ // For simplicities sake we'll use a buffered reader to read from the cat-file --batch-check
batchReader := bufio.NewReader(batchStdoutReader)
return batchStdinWriter, batchReader, cancel
@@ -53,7 +56,7 @@ func CatFileBatch(repoPath string) (WriteCloserError, *bufio.Reader, func()) {
// We often want to feed the commits in order into cat-file --batch, followed by their trees and sub trees as necessary.
// so let's create a batch stdin and stdout
batchStdinReader, batchStdinWriter := io.Pipe()
- batchStdoutReader, batchStdoutWriter := io.Pipe()
+ batchStdoutReader, batchStdoutWriter := nio.Pipe(buffer.New(32 * 1024))
cancel := func() {
_ = batchStdinReader.Close()
_ = batchStdinWriter.Close()
@@ -74,7 +77,7 @@ func CatFileBatch(repoPath string) (WriteCloserError, *bufio.Reader, func()) {
}()
// For simplicities sake we'll us a buffered reader to read from the cat-file --batch
- batchReader := bufio.NewReader(batchStdoutReader)
+ batchReader := bufio.NewReaderSize(batchStdoutReader, 32*1024)
return batchStdinWriter, batchReader, cancel
}
@@ -84,22 +87,31 @@ func CatFileBatch(repoPath string) (WriteCloserError, *bufio.Reader, func()) {
// <sha> SP <type> SP <size> LF
// sha is a 40byte not 20byte here
func ReadBatchLine(rd *bufio.Reader) (sha []byte, typ string, size int64, err error) {
- sha, err = rd.ReadBytes(' ')
+ typ, err = rd.ReadString('\n')
if err != nil {
return
}
- sha = sha[:len(sha)-1]
-
- typ, err = rd.ReadString('\n')
- if err != nil {
+ if len(typ) == 1 {
+ typ, err = rd.ReadString('\n')
+ if err != nil {
+ return
+ }
+ }
+ idx := strings.IndexByte(typ, ' ')
+ if idx < 0 {
+ log("missing space typ: %s", typ)
+ err = ErrNotExist{ID: string(sha)}
return
}
+ sha = []byte(typ[:idx])
+ typ = typ[idx+1:]
- idx := strings.Index(typ, " ")
+ idx = strings.IndexByte(typ, ' ')
if idx < 0 {
err = ErrNotExist{ID: string(sha)}
return
}
+
sizeStr := typ[idx+1 : len(typ)-1]
typ = typ[:idx]
@@ -130,7 +142,7 @@ headerLoop:
}
// Discard the rest of the tag
- discard := size - n
+ discard := size - n + 1
for discard > math.MaxInt32 {
_, err := rd.Discard(math.MaxInt32)
if err != nil {
@@ -200,85 +212,42 @@ func To40ByteSHA(sha, out []byte) []byte {
return out
}
-// ParseTreeLineSkipMode reads an entry from a tree in a cat-file --batch stream
-// This simply skips the mode - saving a substantial amount of time and carefully avoids allocations - except where fnameBuf is too small.
+// ParseTreeLine reads an entry from a tree in a cat-file --batch stream
+// This carefully avoids allocations - except where fnameBuf is too small.
// It is recommended therefore to pass in an fnameBuf large enough to avoid almost all allocations
//
// Each line is composed of:
// <mode-in-ascii-dropping-initial-zeros> SP <fname> NUL <20-byte SHA>
//
// We don't attempt to convert the 20-byte SHA to 40-byte SHA to save a lot of time
-func ParseTreeLineSkipMode(rd *bufio.Reader, fnameBuf, shaBuf []byte) (fname, sha []byte, n int, err error) {
+func ParseTreeLine(rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) {
var readBytes []byte
- // Skip the Mode
- readBytes, err = rd.ReadSlice(' ') // NB: DOES NOT ALLOCATE SIMPLY RETURNS SLICE WITHIN READER BUFFER
- if err != nil {
- return
- }
- n += len(readBytes)
- // Deal with the fname
+ // Read the Mode & fname
readBytes, err = rd.ReadSlice('\x00')
- copy(fnameBuf, readBytes)
- if len(fnameBuf) > len(readBytes) {
- fnameBuf = fnameBuf[:len(readBytes)] // cut the buf the correct size
- } else {
- fnameBuf = append(fnameBuf, readBytes[len(fnameBuf):]...) // extend the buf and copy in the missing bits
- }
- for err == bufio.ErrBufferFull { // Then we need to read more
- readBytes, err = rd.ReadSlice('\x00')
- fnameBuf = append(fnameBuf, readBytes...) // there is little point attempting to avoid allocations here so just extend
- }
- n += len(fnameBuf)
if err != nil {
return
}
- fnameBuf = fnameBuf[:len(fnameBuf)-1] // Drop the terminal NUL
- fname = fnameBuf // set the returnable fname to the slice
-
- // Now deal with the 20-byte SHA
- idx := 0
- for idx < 20 {
- read := 0
- read, err = rd.Read(shaBuf[idx:20])
- n += read
- if err != nil {
- return
- }
- idx += read
- }
- sha = shaBuf
- return
-}
-
-// ParseTreeLine reads an entry from a tree in a cat-file --batch stream
-// This carefully avoids allocations - except where fnameBuf is too small.
-// It is recommended therefore to pass in an fnameBuf large enough to avoid almost all allocations
-//
-// Each line is composed of:
-// <mode-in-ascii-dropping-initial-zeros> SP <fname> NUL <20-byte SHA>
-//
-// We don't attempt to convert the 20-byte SHA to 40-byte SHA to save a lot of time
-func ParseTreeLine(rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fname, sha []byte, n int, err error) {
- var readBytes []byte
+ idx := bytes.IndexByte(readBytes, ' ')
+ if idx < 0 {
+ log("missing space in readBytes ParseTreeLine: %s", readBytes)
- // Read the Mode
- readBytes, err = rd.ReadSlice(' ')
- if err != nil {
+ err = &ErrNotExist{}
return
}
- n += len(readBytes)
- copy(modeBuf, readBytes)
- if len(modeBuf) > len(readBytes) {
- modeBuf = modeBuf[:len(readBytes)]
- } else {
- modeBuf = append(modeBuf, readBytes[len(modeBuf):]...)
+ n += idx + 1
+ copy(modeBuf, readBytes[:idx])
+ if len(modeBuf) >= idx {
+ modeBuf = modeBuf[:idx]
+ } else {
+ modeBuf = append(modeBuf, readBytes[len(modeBuf):idx]...)
}
- mode = modeBuf[:len(modeBuf)-1] // Drop the SP
+ mode = modeBuf
+
+ readBytes = readBytes[idx+1:]
// Deal with the fname
- readBytes, err = rd.ReadSlice('\x00')
copy(fnameBuf, readBytes)
if len(fnameBuf) > len(readBytes) {
fnameBuf = fnameBuf[:len(readBytes)]
@@ -297,7 +266,7 @@ func ParseTreeLine(rd *bufio.Reader, modeBuf, fnameBuf, shaBuf []byte) (mode, fn
fname = fnameBuf
// Deal with the 20-byte SHA
- idx := 0
+ idx = 0
for idx < 20 {
read := 0
read, err = rd.Read(shaBuf[idx:20])