You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

commit_info.go 9.4KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329
  1. // Copyright 2017 The Gitea Authors. All rights reserved.
  2. // Use of this source code is governed by a MIT-style
  3. // license that can be found in the LICENSE file.
  4. package git
  5. import (
  6. "bufio"
  7. "context"
  8. "fmt"
  9. "os/exec"
  10. "path"
  11. "runtime"
  12. "strconv"
  13. "strings"
  14. "sync"
  15. "time"
  16. )
  17. const (
  18. // parameters for searching for commit infos. If the untargeted search has
  19. // not found any entries in the past 5 commits, and 12 or fewer entries
  20. // remain, then we'll just let the targeted-searching threads finish off,
  21. // and stop the untargeted search to not interfere.
  22. deferToTargetedSearchColdStreak = 5
  23. deferToTargetedSearchNumRemainingEntries = 12
  24. )
  25. // getCommitsInfoState shared state while getting commit info for entries
  26. type getCommitsInfoState struct {
  27. lock sync.Mutex
  28. /* read-only fields, can be read without the mutex */
  29. // entries and entryPaths are read-only after initialization, so they can
  30. // safely be read without the mutex
  31. entries []*TreeEntry
  32. // set of filepaths to get info for
  33. entryPaths map[string]struct{}
  34. treePath string
  35. headCommit *Commit
  36. /* mutable fields, must hold mutex to read or write */
  37. // map from filepath to commit
  38. commits map[string]*Commit
  39. // set of filepaths that have been or are being searched for in a target search
  40. targetedPaths map[string]struct{}
  41. }
  42. func (state *getCommitsInfoState) numRemainingEntries() int {
  43. state.lock.Lock()
  44. defer state.lock.Unlock()
  45. return len(state.entries) - len(state.commits)
  46. }
  47. // getTargetEntryPath Returns the next path for a targeted-searching thread to
  48. // search for, or returns the empty string if nothing left to search for
  49. func (state *getCommitsInfoState) getTargetedEntryPath() string {
  50. var targetedEntryPath string
  51. state.lock.Lock()
  52. defer state.lock.Unlock()
  53. for _, entry := range state.entries {
  54. entryPath := path.Join(state.treePath, entry.Name())
  55. if _, ok := state.commits[entryPath]; ok {
  56. continue
  57. } else if _, ok = state.targetedPaths[entryPath]; ok {
  58. continue
  59. }
  60. targetedEntryPath = entryPath
  61. state.targetedPaths[entryPath] = struct{}{}
  62. break
  63. }
  64. return targetedEntryPath
  65. }
  66. // repeatedly perform targeted searches for unpopulated entries
  67. func targetedSearch(state *getCommitsInfoState, done chan error, cache LastCommitCache) {
  68. for {
  69. entryPath := state.getTargetedEntryPath()
  70. if len(entryPath) == 0 {
  71. done <- nil
  72. return
  73. }
  74. if cache != nil {
  75. commit, err := cache.Get(state.headCommit.repo.Path, state.headCommit.ID.String(), entryPath)
  76. if err == nil && commit != nil {
  77. state.update(entryPath, commit)
  78. continue
  79. }
  80. }
  81. command := NewCommand("rev-list", "-1", state.headCommit.ID.String(), "--", entryPath)
  82. output, err := command.RunInDir(state.headCommit.repo.Path)
  83. if err != nil {
  84. done <- err
  85. return
  86. }
  87. id, err := NewIDFromString(strings.TrimSpace(output))
  88. if err != nil {
  89. done <- err
  90. return
  91. }
  92. commit, err := state.headCommit.repo.getCommit(id)
  93. if err != nil {
  94. done <- err
  95. return
  96. }
  97. state.update(entryPath, commit)
  98. if cache != nil {
  99. cache.Put(state.headCommit.repo.Path, state.headCommit.ID.String(), entryPath, commit)
  100. }
  101. }
  102. }
  103. func initGetCommitInfoState(entries Entries, headCommit *Commit, treePath string) *getCommitsInfoState {
  104. entryPaths := make(map[string]struct{}, len(entries))
  105. for _, entry := range entries {
  106. entryPaths[path.Join(treePath, entry.Name())] = struct{}{}
  107. }
  108. if treePath = path.Clean(treePath); treePath == "." {
  109. treePath = ""
  110. }
  111. return &getCommitsInfoState{
  112. entries: entries,
  113. entryPaths: entryPaths,
  114. commits: make(map[string]*Commit, len(entries)),
  115. targetedPaths: make(map[string]struct{}, len(entries)),
  116. treePath: treePath,
  117. headCommit: headCommit,
  118. }
  119. }
  120. // GetCommitsInfo gets information of all commits that are corresponding to these entries
  121. func (tes Entries) GetCommitsInfo(commit *Commit, treePath string, cache LastCommitCache) ([][]interface{}, error) {
  122. state := initGetCommitInfoState(tes, commit, treePath)
  123. if err := getCommitsInfo(state, cache); err != nil {
  124. return nil, err
  125. }
  126. if len(state.commits) < len(state.entryPaths) {
  127. return nil, fmt.Errorf("could not find commits for all entries")
  128. }
  129. commitsInfo := make([][]interface{}, len(tes))
  130. for i, entry := range tes {
  131. commit, ok := state.commits[path.Join(treePath, entry.Name())]
  132. if !ok {
  133. return nil, fmt.Errorf("could not find commit for %s", entry.Name())
  134. }
  135. switch entry.Type {
  136. case ObjectCommit:
  137. subModuleURL := ""
  138. if subModule, err := state.headCommit.GetSubModule(entry.Name()); err != nil {
  139. return nil, err
  140. } else if subModule != nil {
  141. subModuleURL = subModule.URL
  142. }
  143. subModuleFile := NewSubModuleFile(commit, subModuleURL, entry.ID.String())
  144. commitsInfo[i] = []interface{}{entry, subModuleFile}
  145. default:
  146. commitsInfo[i] = []interface{}{entry, commit}
  147. }
  148. }
  149. return commitsInfo, nil
  150. }
  151. func (state *getCommitsInfoState) cleanEntryPath(rawEntryPath string) (string, error) {
  152. if rawEntryPath[0] == '"' {
  153. var err error
  154. rawEntryPath, err = strconv.Unquote(rawEntryPath)
  155. if err != nil {
  156. return rawEntryPath, err
  157. }
  158. }
  159. var entryNameStartIndex int
  160. if len(state.treePath) > 0 {
  161. entryNameStartIndex = len(state.treePath) + 1
  162. }
  163. if index := strings.IndexByte(rawEntryPath[entryNameStartIndex:], '/'); index >= 0 {
  164. return rawEntryPath[:entryNameStartIndex+index], nil
  165. }
  166. return rawEntryPath, nil
  167. }
  168. // update report that the given path was last modified by the given commit.
  169. // Returns whether state.commits was updated
  170. func (state *getCommitsInfoState) update(entryPath string, commit *Commit) bool {
  171. if _, ok := state.entryPaths[entryPath]; !ok {
  172. return false
  173. }
  174. var updated bool
  175. state.lock.Lock()
  176. defer state.lock.Unlock()
  177. if _, ok := state.commits[entryPath]; !ok {
  178. state.commits[entryPath] = commit
  179. updated = true
  180. }
  181. return updated
  182. }
  183. const getCommitsInfoPretty = "--pretty=format:%H %ct %s"
  184. func getCommitsInfo(state *getCommitsInfoState, cache LastCommitCache) error {
  185. ctx, cancel := context.WithTimeout(context.Background(), 1*time.Minute)
  186. defer cancel()
  187. args := []string{"log", state.headCommit.ID.String(), getCommitsInfoPretty, "--name-status", "-c"}
  188. if len(state.treePath) > 0 {
  189. args = append(args, "--", state.treePath)
  190. }
  191. cmd := exec.CommandContext(ctx, "git", args...)
  192. cmd.Dir = state.headCommit.repo.Path
  193. readCloser, err := cmd.StdoutPipe()
  194. if err != nil {
  195. return err
  196. }
  197. if err := cmd.Start(); err != nil {
  198. return err
  199. }
  200. // it's okay to ignore the error returned by cmd.Wait(); we expect the
  201. // subprocess to sometimes have a non-zero exit status, since we may
  202. // prematurely close stdout, resulting in a broken pipe.
  203. defer cmd.Wait()
  204. numThreads := runtime.NumCPU()
  205. done := make(chan error, numThreads)
  206. for i := 0; i < numThreads; i++ {
  207. go targetedSearch(state, done, cache)
  208. }
  209. scanner := bufio.NewScanner(readCloser)
  210. err = state.processGitLogOutput(scanner)
  211. // it is important that we close stdout here; if we do not close
  212. // stdout, the subprocess will keep running, and the deffered call
  213. // cmd.Wait() may block for a long time.
  214. if closeErr := readCloser.Close(); closeErr != nil && err == nil {
  215. err = closeErr
  216. }
  217. for i := 0; i < numThreads; i++ {
  218. doneErr := <-done
  219. if doneErr != nil && err == nil {
  220. err = doneErr
  221. }
  222. }
  223. return err
  224. }
  225. func (state *getCommitsInfoState) processGitLogOutput(scanner *bufio.Scanner) error {
  226. // keep a local cache of seen paths to avoid acquiring a lock for paths
  227. // we've already seen
  228. seenPaths := make(map[string]struct{}, len(state.entryPaths))
  229. // number of consecutive commits without any finds
  230. coldStreak := 0
  231. var commit *Commit
  232. var err error
  233. for scanner.Scan() {
  234. line := scanner.Text()
  235. if len(line) == 0 { // in-between commits
  236. numRemainingEntries := state.numRemainingEntries()
  237. if numRemainingEntries == 0 {
  238. break
  239. }
  240. if coldStreak >= deferToTargetedSearchColdStreak &&
  241. numRemainingEntries <= deferToTargetedSearchNumRemainingEntries {
  242. // stop this untargeted search, and let the targeted-search threads
  243. // finish the work
  244. break
  245. }
  246. continue
  247. }
  248. if line[0] >= 'A' && line[0] <= 'X' { // a file was changed by the current commit
  249. // look for the last tab, since for copies (C) and renames (R) two
  250. // filenames are printed: src, then dest
  251. tabIndex := strings.LastIndexByte(line, '\t')
  252. if tabIndex < 1 {
  253. return fmt.Errorf("misformatted line: %s", line)
  254. }
  255. entryPath, err := state.cleanEntryPath(line[tabIndex+1:])
  256. if err != nil {
  257. return err
  258. }
  259. if _, ok := seenPaths[entryPath]; !ok {
  260. if state.update(entryPath, commit) {
  261. coldStreak = 0
  262. }
  263. seenPaths[entryPath] = struct{}{}
  264. }
  265. continue
  266. }
  267. // a new commit
  268. commit, err = parseCommitInfo(line)
  269. if err != nil {
  270. return err
  271. }
  272. coldStreak++
  273. }
  274. return scanner.Err()
  275. }
  276. // parseCommitInfo parse a commit from a line of `git log` output. Expects the
  277. // line to be formatted according to getCommitsInfoPretty.
  278. func parseCommitInfo(line string) (*Commit, error) {
  279. if len(line) < 43 {
  280. return nil, fmt.Errorf("invalid git output: %s", line)
  281. }
  282. ref, err := NewIDFromString(line[:40])
  283. if err != nil {
  284. return nil, err
  285. }
  286. spaceIndex := strings.IndexByte(line[41:], ' ')
  287. if spaceIndex < 0 {
  288. return nil, fmt.Errorf("invalid git output: %s", line)
  289. }
  290. unixSeconds, err := strconv.Atoi(line[41 : 41+spaceIndex])
  291. if err != nil {
  292. return nil, err
  293. }
  294. message := line[spaceIndex+42:]
  295. return &Commit{
  296. ID: ref,
  297. CommitMessage: message,
  298. Committer: &Signature{
  299. When: time.Unix(int64(unixSeconds), 0),
  300. },
  301. }, nil
  302. }