summaryrefslogtreecommitdiffstats
path: root/vendor/gopkg.in/src-d/go-git.v4/blame.go
diff options
context:
space:
mode:
Diffstat (limited to 'vendor/gopkg.in/src-d/go-git.v4/blame.go')
-rw-r--r--vendor/gopkg.in/src-d/go-git.v4/blame.go302
1 files changed, 302 insertions, 0 deletions
diff --git a/vendor/gopkg.in/src-d/go-git.v4/blame.go b/vendor/gopkg.in/src-d/go-git.v4/blame.go
new file mode 100644
index 0000000000..adb72d5745
--- /dev/null
+++ b/vendor/gopkg.in/src-d/go-git.v4/blame.go
@@ -0,0 +1,302 @@
+package git
+
+import (
+ "bytes"
+ "errors"
+ "fmt"
+ "strconv"
+ "strings"
+ "time"
+ "unicode/utf8"
+
+ "gopkg.in/src-d/go-git.v4/plumbing"
+ "gopkg.in/src-d/go-git.v4/plumbing/object"
+ "gopkg.in/src-d/go-git.v4/utils/diff"
+)
+
+// BlameResult represents the result of a Blame operation.
+type BlameResult struct {
+ // Path is the path of the File that we're blaming.
+ Path string
+ // Rev (Revision) is the hash of the specified Commit used to generate this result.
+ Rev plumbing.Hash
+ // Lines contains every line with its authorship.
+ Lines []*Line
+}
+
+// Blame returns a BlameResult with the information about the last author of
+// each line from file `path` at commit `c`.
+func Blame(c *object.Commit, path string) (*BlameResult, error) {
+ // The file to blame is identified by the input arguments:
+ // commit and path. commit is a Commit object obtained from a Repository. Path
+ // represents a path to a specific file contained into the repository.
+ //
+ // Blaming a file is a two step process:
+ //
+ // 1. Create a linear history of the commits affecting a file. We use
+ // revlist.New for that.
+ //
+ // 2. Then build a graph with a node for every line in every file in
+ // the history of the file.
+ //
+ // Each node is assigned a commit: Start by the nodes in the first
+ // commit. Assign that commit as the creator of all its lines.
+ //
+ // Then jump to the nodes in the next commit, and calculate the diff
+ // between the two files. Newly created lines get
+ // assigned the new commit as its origin. Modified lines also get
+ // this new commit. Untouched lines retain the old commit.
+ //
+ // All this work is done in the assignOrigin function which holds all
+ // the internal relevant data in a "blame" struct, that is not
+ // exported.
+ //
+ // TODO: ways to improve the efficiency of this function:
+ // 1. Improve revlist
+ // 2. Improve how to traverse the history (example a backward traversal will
+ // be much more efficient)
+ //
+ // TODO: ways to improve the function in general:
+ // 1. Add memoization between revlist and assign.
+ // 2. It is using much more memory than needed, see the TODOs below.
+
+ b := new(blame)
+ b.fRev = c
+ b.path = path
+
+ // get all the file revisions
+ if err := b.fillRevs(); err != nil {
+ return nil, err
+ }
+
+ // calculate the line tracking graph and fill in
+ // file contents in data.
+ if err := b.fillGraphAndData(); err != nil {
+ return nil, err
+ }
+
+ file, err := b.fRev.File(b.path)
+ if err != nil {
+ return nil, err
+ }
+ finalLines, err := file.Lines()
+ if err != nil {
+ return nil, err
+ }
+
+ // Each node (line) holds the commit where it was introduced or
+ // last modified. To achieve that we use the FORWARD algorithm
+ // described in Zimmermann, et al. "Mining Version Archives for
+ // Co-changed Lines", in proceedings of the Mining Software
+ // Repositories workshop, Shanghai, May 22-23, 2006.
+ lines, err := newLines(finalLines, b.sliceGraph(len(b.graph)-1))
+ if err != nil {
+ return nil, err
+ }
+
+ return &BlameResult{
+ Path: path,
+ Rev: c.Hash,
+ Lines: lines,
+ }, nil
+}
+
+// Line values represent the contents and author of a line in BlamedResult values.
+type Line struct {
+ // Author is the email address of the last author that modified the line.
+ Author string
+ // Text is the original text of the line.
+ Text string
+ // Date is when the original text of the line was introduced
+ Date time.Time
+ // Hash is the commit hash that introduced the original line
+ Hash plumbing.Hash
+}
+
+func newLine(author, text string, date time.Time, hash plumbing.Hash) *Line {
+ return &Line{
+ Author: author,
+ Text: text,
+ Hash: hash,
+ Date: date,
+ }
+}
+
+func newLines(contents []string, commits []*object.Commit) ([]*Line, error) {
+ lcontents := len(contents)
+ lcommits := len(commits)
+
+ if lcontents != lcommits {
+ if lcontents == lcommits-1 && contents[lcontents-1] != "\n" {
+ contents = append(contents, "\n")
+ } else {
+ return nil, errors.New("contents and commits have different length")
+ }
+ }
+
+ result := make([]*Line, 0, lcontents)
+ for i := range contents {
+ result = append(result, newLine(
+ commits[i].Author.Email, contents[i],
+ commits[i].Author.When, commits[i].Hash,
+ ))
+ }
+
+ return result, nil
+}
+
+// this struct is internally used by the blame function to hold its
+// inputs, outputs and state.
+type blame struct {
+ // the path of the file to blame
+ path string
+ // the commit of the final revision of the file to blame
+ fRev *object.Commit
+ // the chain of revisions affecting the the file to blame
+ revs []*object.Commit
+ // the contents of the file across all its revisions
+ data []string
+ // the graph of the lines in the file across all the revisions
+ graph [][]*object.Commit
+}
+
+// calculate the history of a file "path", starting from commit "from", sorted by commit date.
+func (b *blame) fillRevs() error {
+ var err error
+
+ b.revs, err = references(b.fRev, b.path)
+ return err
+}
+
+// build graph of a file from its revision history
+func (b *blame) fillGraphAndData() error {
+ //TODO: not all commits are needed, only the current rev and the prev
+ b.graph = make([][]*object.Commit, len(b.revs))
+ b.data = make([]string, len(b.revs)) // file contents in all the revisions
+ // for every revision of the file, starting with the first
+ // one...
+ for i, rev := range b.revs {
+ // get the contents of the file
+ file, err := rev.File(b.path)
+ if err != nil {
+ return nil
+ }
+ b.data[i], err = file.Contents()
+ if err != nil {
+ return err
+ }
+ nLines := countLines(b.data[i])
+ // create a node for each line
+ b.graph[i] = make([]*object.Commit, nLines)
+ // assign a commit to each node
+ // if this is the first revision, then the node is assigned to
+ // this first commit.
+ if i == 0 {
+ for j := 0; j < nLines; j++ {
+ b.graph[i][j] = (*object.Commit)(b.revs[i])
+ }
+ } else {
+ // if this is not the first commit, then assign to the old
+ // commit or to the new one, depending on what the diff
+ // says.
+ b.assignOrigin(i, i-1)
+ }
+ }
+ return nil
+}
+
+// sliceGraph returns a slice of commits (one per line) for a particular
+// revision of a file (0=first revision).
+func (b *blame) sliceGraph(i int) []*object.Commit {
+ fVs := b.graph[i]
+ result := make([]*object.Commit, 0, len(fVs))
+ for _, v := range fVs {
+ c := object.Commit(*v)
+ result = append(result, &c)
+ }
+ return result
+}
+
+// Assigns origin to vertexes in current (c) rev from data in its previous (p)
+// revision
+func (b *blame) assignOrigin(c, p int) {
+ // assign origin based on diff info
+ hunks := diff.Do(b.data[p], b.data[c])
+ sl := -1 // source line
+ dl := -1 // destination line
+ for h := range hunks {
+ hLines := countLines(hunks[h].Text)
+ for hl := 0; hl < hLines; hl++ {
+ switch {
+ case hunks[h].Type == 0:
+ sl++
+ dl++
+ b.graph[c][dl] = b.graph[p][sl]
+ case hunks[h].Type == 1:
+ dl++
+ b.graph[c][dl] = (*object.Commit)(b.revs[c])
+ case hunks[h].Type == -1:
+ sl++
+ default:
+ panic("unreachable")
+ }
+ }
+ }
+}
+
+// GoString prints the results of a Blame using git-blame's style.
+func (b *blame) GoString() string {
+ var buf bytes.Buffer
+
+ file, err := b.fRev.File(b.path)
+ if err != nil {
+ panic("PrettyPrint: internal error in repo.Data")
+ }
+ contents, err := file.Contents()
+ if err != nil {
+ panic("PrettyPrint: internal error in repo.Data")
+ }
+
+ lines := strings.Split(contents, "\n")
+ // max line number length
+ mlnl := len(strconv.Itoa(len(lines)))
+ // max author length
+ mal := b.maxAuthorLength()
+ format := fmt.Sprintf("%%s (%%-%ds %%%dd) %%s\n",
+ mal, mlnl)
+
+ fVs := b.graph[len(b.graph)-1]
+ for ln, v := range fVs {
+ fmt.Fprintf(&buf, format, v.Hash.String()[:8],
+ prettyPrintAuthor(fVs[ln]), ln+1, lines[ln])
+ }
+ return buf.String()
+}
+
+// utility function to pretty print the author.
+func prettyPrintAuthor(c *object.Commit) string {
+ return fmt.Sprintf("%s %s", c.Author.Name, c.Author.When.Format("2006-01-02"))
+}
+
+// utility function to calculate the number of runes needed
+// to print the longest author name in the blame of a file.
+func (b *blame) maxAuthorLength() int {
+ memo := make(map[plumbing.Hash]struct{}, len(b.graph)-1)
+ fVs := b.graph[len(b.graph)-1]
+ m := 0
+ for ln := range fVs {
+ if _, ok := memo[fVs[ln].Hash]; ok {
+ continue
+ }
+ memo[fVs[ln].Hash] = struct{}{}
+ m = max(m, utf8.RuneCountInString(prettyPrintAuthor(fVs[ln])))
+ }
+ return m
+}
+
+func max(a, b int) int {
+ if a > b {
+ return a
+ }
+ return b
+}