summaryrefslogtreecommitdiffstats
path: root/vendor/gopkg.in/src-d/go-git.v4/blame.go
blob: adb72d5745f1d9c4490aeba1fa9267ac00a6af66 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
package git

import (
	"bytes"
	"errors"
	"fmt"
	"strconv"
	"strings"
	"time"
	"unicode/utf8"

	"gopkg.in/src-d/go-git.v4/plumbing"
	"gopkg.in/src-d/go-git.v4/plumbing/object"
	"gopkg.in/src-d/go-git.v4/utils/diff"
)

// BlameResult represents the result of a Blame operation.
type BlameResult struct {
	// Path is the path of the File that we're blaming.
	Path string
	// Rev (Revision) is the hash of the specified Commit used to generate this result.
	Rev plumbing.Hash
	// Lines contains every line with its authorship.
	Lines []*Line
}

// Blame returns a BlameResult with the information about the last author of
// each line from file `path` at commit `c`.
func Blame(c *object.Commit, path string) (*BlameResult, error) {
	// The file to blame is identified by the input arguments:
	// commit and path. commit is a Commit object obtained from a Repository. Path
	// represents a path to a specific file contained into the repository.
	//
	// Blaming a file is a two step process:
	//
	// 1. Create a linear history of the commits affecting a file. We use
	// revlist.New for that.
	//
	// 2. Then build a graph with a node for every line in every file in
	// the history of the file.
	//
	// Each node is assigned a commit: Start by the nodes in the first
	// commit. Assign that commit as the creator of all its lines.
	//
	// Then jump to the nodes in the next commit, and calculate the diff
	// between the two files. Newly created lines get
	// assigned the new commit as its origin. Modified lines also get
	// this new commit. Untouched lines retain the old commit.
	//
	// All this work is done in the assignOrigin function which holds all
	// the internal relevant data in a "blame" struct, that is not
	// exported.
	//
	// TODO: ways to improve the efficiency of this function:
	// 1. Improve revlist
	// 2. Improve how to traverse the history (example a backward traversal will
	// be much more efficient)
	//
	// TODO: ways to improve the function in general:
	// 1. Add memoization between revlist and assign.
	// 2. It is using much more memory than needed, see the TODOs below.

	b := new(blame)
	b.fRev = c
	b.path = path

	// get all the file revisions
	if err := b.fillRevs(); err != nil {
		return nil, err
	}

	// calculate the line tracking graph and fill in
	// file contents in data.
	if err := b.fillGraphAndData(); err != nil {
		return nil, err
	}

	file, err := b.fRev.File(b.path)
	if err != nil {
		return nil, err
	}
	finalLines, err := file.Lines()
	if err != nil {
		return nil, err
	}

	// Each node (line) holds the commit where it was introduced or
	// last modified. To achieve that we use the FORWARD algorithm
	// described in Zimmermann, et al. "Mining Version Archives for
	// Co-changed Lines", in proceedings of the Mining Software
	// Repositories workshop, Shanghai, May 22-23, 2006.
	lines, err := newLines(finalLines, b.sliceGraph(len(b.graph)-1))
	if err != nil {
		return nil, err
	}

	return &BlameResult{
		Path:  path,
		Rev:   c.Hash,
		Lines: lines,
	}, nil
}

// Line values represent the contents and author of a line in BlamedResult values.
type Line struct {
	// Author is the email address of the last author that modified the line.
	Author string
	// Text is the original text of the line.
	Text string
	// Date is when the original text of the line was introduced
	Date time.Time
	// Hash is the commit hash that introduced the original line
	Hash plumbing.Hash
}

func newLine(author, text string, date time.Time, hash plumbing.Hash) *Line {
	return &Line{
		Author: author,
		Text:   text,
		Hash:   hash,
		Date:   date,
	}
}

func newLines(contents []string, commits []*object.Commit) ([]*Line, error) {
	lcontents := len(contents)
	lcommits := len(commits)

	if lcontents != lcommits {
		if lcontents == lcommits-1 && contents[lcontents-1] != "\n" {
			contents = append(contents, "\n")
		} else {
			return nil, errors.New("contents and commits have different length")
		}
	}

	result := make([]*Line, 0, lcontents)
	for i := range contents {
		result = append(result, newLine(
			commits[i].Author.Email, contents[i],
			commits[i].Author.When, commits[i].Hash,
		))
	}

	return result, nil
}

// this struct is internally used by the blame function to hold its
// inputs, outputs and state.
type blame struct {
	// the path of the file to blame
	path string
	// the commit of the final revision of the file to blame
	fRev *object.Commit
	// the chain of revisions affecting the the file to blame
	revs []*object.Commit
	// the contents of the file across all its revisions
	data []string
	// the graph of the lines in the file across all the revisions
	graph [][]*object.Commit
}

// calculate the history of a file "path", starting from commit "from", sorted by commit date.
func (b *blame) fillRevs() error {
	var err error

	b.revs, err = references(b.fRev, b.path)
	return err
}

// build graph of a file from its revision history
func (b *blame) fillGraphAndData() error {
	//TODO: not all commits are needed, only the current rev and the prev
	b.graph = make([][]*object.Commit, len(b.revs))
	b.data = make([]string, len(b.revs)) // file contents in all the revisions
	// for every revision of the file, starting with the first
	// one...
	for i, rev := range b.revs {
		// get the contents of the file
		file, err := rev.File(b.path)
		if err != nil {
			return nil
		}
		b.data[i], err = file.Contents()
		if err != nil {
			return err
		}
		nLines := countLines(b.data[i])
		// create a node for each line
		b.graph[i] = make([]*object.Commit, nLines)
		// assign a commit to each node
		// if this is the first revision, then the node is assigned to
		// this first commit.
		if i == 0 {
			for j := 0; j < nLines; j++ {
				b.graph[i][j] = (*object.Commit)(b.revs[i])
			}
		} else {
			// if this is not the first commit, then assign to the old
			// commit or to the new one, depending on what the diff
			// says.
			b.assignOrigin(i, i-1)
		}
	}
	return nil
}

// sliceGraph returns a slice of commits (one per line) for a particular
// revision of a file (0=first revision).
func (b *blame) sliceGraph(i int) []*object.Commit {
	fVs := b.graph[i]
	result := make([]*object.Commit, 0, len(fVs))
	for _, v := range fVs {
		c := object.Commit(*v)
		result = append(result, &c)
	}
	return result
}

// Assigns origin to vertexes in current (c) rev from data in its previous (p)
// revision
func (b *blame) assignOrigin(c, p int) {
	// assign origin based on diff info
	hunks := diff.Do(b.data[p], b.data[c])
	sl := -1 // source line
	dl := -1 // destination line
	for h := range hunks {
		hLines := countLines(hunks[h].Text)
		for hl := 0; hl < hLines; hl++ {
			switch {
			case hunks[h].Type == 0:
				sl++
				dl++
				b.graph[c][dl] = b.graph[p][sl]
			case hunks[h].Type == 1:
				dl++
				b.graph[c][dl] = (*object.Commit)(b.revs[c])
			case hunks[h].Type == -1:
				sl++
			default:
				panic("unreachable")
			}
		}
	}
}

// GoString prints the results of a Blame using git-blame's style.
func (b *blame) GoString() string {
	var buf bytes.Buffer

	file, err := b.fRev.File(b.path)
	if err != nil {
		panic("PrettyPrint: internal error in repo.Data")
	}
	contents, err := file.Contents()
	if err != nil {
		panic("PrettyPrint: internal error in repo.Data")
	}

	lines := strings.Split(contents, "\n")
	// max line number length
	mlnl := len(strconv.Itoa(len(lines)))
	// max author length
	mal := b.maxAuthorLength()
	format := fmt.Sprintf("%%s (%%-%ds %%%dd) %%s\n",
		mal, mlnl)

	fVs := b.graph[len(b.graph)-1]
	for ln, v := range fVs {
		fmt.Fprintf(&buf, format, v.Hash.String()[:8],
			prettyPrintAuthor(fVs[ln]), ln+1, lines[ln])
	}
	return buf.String()
}

// utility function to pretty print the author.
func prettyPrintAuthor(c *object.Commit) string {
	return fmt.Sprintf("%s %s", c.Author.Name, c.Author.When.Format("2006-01-02"))
}

// utility function to calculate the number of runes needed
// to print the longest author name in the blame of a file.
func (b *blame) maxAuthorLength() int {
	memo := make(map[plumbing.Hash]struct{}, len(b.graph)-1)
	fVs := b.graph[len(b.graph)-1]
	m := 0
	for ln := range fVs {
		if _, ok := memo[fVs[ln].Hash]; ok {
			continue
		}
		memo[fVs[ln].Hash] = struct{}{}
		m = max(m, utf8.RuneCountInString(prettyPrintAuthor(fVs[ln])))
	}
	return m
}

func max(a, b int) int {
	if a > b {
		return a
	}
	return b
}