You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

git_diff.go 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559
  1. // Copyright 2014 The Gogs Authors. All rights reserved.
  2. // Use of this source code is governed by a MIT-style
  3. // license that can be found in the LICENSE file.
  4. package models
  5. import (
  6. "bufio"
  7. "bytes"
  8. "fmt"
  9. "html"
  10. "html/template"
  11. "io"
  12. "io/ioutil"
  13. "os"
  14. "os/exec"
  15. "strings"
  16. "code.gitea.io/git"
  17. "code.gitea.io/gitea/modules/base"
  18. "code.gitea.io/gitea/modules/highlight"
  19. "code.gitea.io/gitea/modules/log"
  20. "code.gitea.io/gitea/modules/process"
  21. "code.gitea.io/gitea/modules/setting"
  22. "github.com/Unknwon/com"
  23. "github.com/sergi/go-diff/diffmatchpatch"
  24. "golang.org/x/net/html/charset"
  25. "golang.org/x/text/transform"
  26. )
  27. // DiffLineType represents the type of a DiffLine.
  28. type DiffLineType uint8
  29. // DiffLineType possible values.
  30. const (
  31. DiffLinePlain DiffLineType = iota + 1
  32. DiffLineAdd
  33. DiffLineDel
  34. DiffLineSection
  35. )
  36. // DiffFileType represents the type of a DiffFile.
  37. type DiffFileType uint8
  38. // DiffFileType possible values.
  39. const (
  40. DiffFileAdd DiffFileType = iota + 1
  41. DiffFileChange
  42. DiffFileDel
  43. DiffFileRename
  44. )
  45. // DiffLine represents a line difference in a DiffSection.
  46. type DiffLine struct {
  47. LeftIdx int
  48. RightIdx int
  49. Type DiffLineType
  50. Content string
  51. }
  52. // GetType returns the type of a DiffLine.
  53. func (d *DiffLine) GetType() int {
  54. return int(d.Type)
  55. }
  56. // DiffSection represents a section of a DiffFile.
  57. type DiffSection struct {
  58. Name string
  59. Lines []*DiffLine
  60. }
  61. var (
  62. addedCodePrefix = []byte("<span class=\"added-code\">")
  63. removedCodePrefix = []byte("<span class=\"removed-code\">")
  64. codeTagSuffix = []byte("</span>")
  65. )
  66. func diffToHTML(diffs []diffmatchpatch.Diff, lineType DiffLineType) template.HTML {
  67. buf := bytes.NewBuffer(nil)
  68. // Reproduce signs which are cutted for inline diff before.
  69. switch lineType {
  70. case DiffLineAdd:
  71. buf.WriteByte('+')
  72. case DiffLineDel:
  73. buf.WriteByte('-')
  74. }
  75. for i := range diffs {
  76. switch {
  77. case diffs[i].Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd:
  78. buf.Write(addedCodePrefix)
  79. buf.WriteString(html.EscapeString(diffs[i].Text))
  80. buf.Write(codeTagSuffix)
  81. case diffs[i].Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel:
  82. buf.Write(removedCodePrefix)
  83. buf.WriteString(html.EscapeString(diffs[i].Text))
  84. buf.Write(codeTagSuffix)
  85. case diffs[i].Type == diffmatchpatch.DiffEqual:
  86. buf.WriteString(html.EscapeString(diffs[i].Text))
  87. }
  88. }
  89. return template.HTML(buf.Bytes())
  90. }
  91. // GetLine gets a specific line by type (add or del) and file line number
  92. func (diffSection *DiffSection) GetLine(lineType DiffLineType, idx int) *DiffLine {
  93. var (
  94. difference = 0
  95. addCount = 0
  96. delCount = 0
  97. matchDiffLine *DiffLine
  98. )
  99. LOOP:
  100. for _, diffLine := range diffSection.Lines {
  101. switch diffLine.Type {
  102. case DiffLineAdd:
  103. addCount++
  104. case DiffLineDel:
  105. delCount++
  106. default:
  107. if matchDiffLine != nil {
  108. break LOOP
  109. }
  110. difference = diffLine.RightIdx - diffLine.LeftIdx
  111. addCount = 0
  112. delCount = 0
  113. }
  114. switch lineType {
  115. case DiffLineDel:
  116. if diffLine.RightIdx == 0 && diffLine.LeftIdx == idx-difference {
  117. matchDiffLine = diffLine
  118. }
  119. case DiffLineAdd:
  120. if diffLine.LeftIdx == 0 && diffLine.RightIdx == idx+difference {
  121. matchDiffLine = diffLine
  122. }
  123. }
  124. }
  125. if addCount == delCount {
  126. return matchDiffLine
  127. }
  128. return nil
  129. }
  130. var diffMatchPatch = diffmatchpatch.New()
  131. func init() {
  132. diffMatchPatch.DiffEditCost = 100
  133. }
  134. // GetComputedInlineDiffFor computes inline diff for the given line.
  135. func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) template.HTML {
  136. if setting.Git.DisableDiffHighlight {
  137. return template.HTML(html.EscapeString(diffLine.Content[1:]))
  138. }
  139. var (
  140. compareDiffLine *DiffLine
  141. diff1 string
  142. diff2 string
  143. )
  144. // try to find equivalent diff line. ignore, otherwise
  145. switch diffLine.Type {
  146. case DiffLineAdd:
  147. compareDiffLine = diffSection.GetLine(DiffLineDel, diffLine.RightIdx)
  148. if compareDiffLine == nil {
  149. return template.HTML(html.EscapeString(diffLine.Content))
  150. }
  151. diff1 = compareDiffLine.Content
  152. diff2 = diffLine.Content
  153. case DiffLineDel:
  154. compareDiffLine = diffSection.GetLine(DiffLineAdd, diffLine.LeftIdx)
  155. if compareDiffLine == nil {
  156. return template.HTML(html.EscapeString(diffLine.Content))
  157. }
  158. diff1 = diffLine.Content
  159. diff2 = compareDiffLine.Content
  160. default:
  161. return template.HTML(html.EscapeString(diffLine.Content))
  162. }
  163. diffRecord := diffMatchPatch.DiffMain(diff1[1:], diff2[1:], true)
  164. diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
  165. return diffToHTML(diffRecord, diffLine.Type)
  166. }
  167. // DiffFile represents a file diff.
  168. type DiffFile struct {
  169. Name string
  170. OldName string
  171. Index int
  172. Addition, Deletion int
  173. Type DiffFileType
  174. IsCreated bool
  175. IsDeleted bool
  176. IsBin bool
  177. IsLFSFile bool
  178. IsRenamed bool
  179. IsSubmodule bool
  180. Sections []*DiffSection
  181. IsIncomplete bool
  182. }
  183. // GetType returns type of diff file.
  184. func (diffFile *DiffFile) GetType() int {
  185. return int(diffFile.Type)
  186. }
  187. // GetHighlightClass returns highlight class for a filename.
  188. func (diffFile *DiffFile) GetHighlightClass() string {
  189. return highlight.FileNameToHighlightClass(diffFile.Name)
  190. }
  191. // Diff represents a difference between two git trees.
  192. type Diff struct {
  193. TotalAddition, TotalDeletion int
  194. Files []*DiffFile
  195. IsIncomplete bool
  196. }
  197. // NumFiles returns number of files changes in a diff.
  198. func (diff *Diff) NumFiles() int {
  199. return len(diff.Files)
  200. }
  201. const cmdDiffHead = "diff --git "
  202. // ParsePatch builds a Diff object from a io.Reader and some
  203. // parameters.
  204. // TODO: move this function to gogits/git-module
  205. func ParsePatch(maxLines, maxLineCharacteres, maxFiles int, reader io.Reader) (*Diff, error) {
  206. var (
  207. diff = &Diff{Files: make([]*DiffFile, 0)}
  208. curFile *DiffFile
  209. curSection = &DiffSection{
  210. Lines: make([]*DiffLine, 0, 10),
  211. }
  212. leftLine, rightLine int
  213. lineCount int
  214. curFileLinesCount int
  215. curFileLFSPrefix bool
  216. )
  217. input := bufio.NewReader(reader)
  218. isEOF := false
  219. for !isEOF {
  220. line, err := input.ReadString('\n')
  221. if err != nil {
  222. if err == io.EOF {
  223. isEOF = true
  224. } else {
  225. return nil, fmt.Errorf("ReadString: %v", err)
  226. }
  227. }
  228. if len(line) > 0 && line[len(line)-1] == '\n' {
  229. // Remove line break.
  230. line = line[:len(line)-1]
  231. }
  232. if strings.HasPrefix(line, "+++ ") || strings.HasPrefix(line, "--- ") || len(line) == 0 {
  233. continue
  234. }
  235. trimLine := strings.Trim(line, "+- ")
  236. if trimLine == LFSMetaFileIdentifier {
  237. curFileLFSPrefix = true
  238. }
  239. if curFileLFSPrefix && strings.HasPrefix(trimLine, LFSMetaFileOidPrefix) {
  240. oid := strings.TrimPrefix(trimLine, LFSMetaFileOidPrefix)
  241. if len(oid) == 64 {
  242. m := &LFSMetaObject{Oid: oid}
  243. count, err := x.Count(m)
  244. if err == nil && count > 0 {
  245. curFile.IsBin = true
  246. curFile.IsLFSFile = true
  247. curSection.Lines = nil
  248. break
  249. }
  250. }
  251. }
  252. curFileLinesCount++
  253. lineCount++
  254. // Diff data too large, we only show the first about maxlines lines
  255. if curFileLinesCount >= maxLines || len(line) >= maxLineCharacteres {
  256. curFile.IsIncomplete = true
  257. }
  258. switch {
  259. case line[0] == ' ':
  260. diffLine := &DiffLine{Type: DiffLinePlain, Content: line, LeftIdx: leftLine, RightIdx: rightLine}
  261. leftLine++
  262. rightLine++
  263. curSection.Lines = append(curSection.Lines, diffLine)
  264. continue
  265. case line[0] == '@':
  266. curSection = &DiffSection{}
  267. curFile.Sections = append(curFile.Sections, curSection)
  268. ss := strings.Split(line, "@@")
  269. diffLine := &DiffLine{Type: DiffLineSection, Content: line}
  270. curSection.Lines = append(curSection.Lines, diffLine)
  271. // Parse line number.
  272. ranges := strings.Split(ss[1][1:], " ")
  273. leftLine, _ = com.StrTo(strings.Split(ranges[0], ",")[0][1:]).Int()
  274. if len(ranges) > 1 {
  275. rightLine, _ = com.StrTo(strings.Split(ranges[1], ",")[0]).Int()
  276. } else {
  277. log.Warn("Parse line number failed: %v", line)
  278. rightLine = leftLine
  279. }
  280. continue
  281. case line[0] == '+':
  282. curFile.Addition++
  283. diff.TotalAddition++
  284. diffLine := &DiffLine{Type: DiffLineAdd, Content: line, RightIdx: rightLine}
  285. rightLine++
  286. curSection.Lines = append(curSection.Lines, diffLine)
  287. continue
  288. case line[0] == '-':
  289. curFile.Deletion++
  290. diff.TotalDeletion++
  291. diffLine := &DiffLine{Type: DiffLineDel, Content: line, LeftIdx: leftLine}
  292. if leftLine > 0 {
  293. leftLine++
  294. }
  295. curSection.Lines = append(curSection.Lines, diffLine)
  296. case strings.HasPrefix(line, "Binary"):
  297. curFile.IsBin = true
  298. continue
  299. }
  300. // Get new file.
  301. if strings.HasPrefix(line, cmdDiffHead) {
  302. middle := -1
  303. // Note: In case file name is surrounded by double quotes (it happens only in git-shell).
  304. // e.g. diff --git "a/xxx" "b/xxx"
  305. hasQuote := line[len(cmdDiffHead)] == '"'
  306. if hasQuote {
  307. middle = strings.Index(line, ` "b/`)
  308. } else {
  309. middle = strings.Index(line, " b/")
  310. }
  311. beg := len(cmdDiffHead)
  312. a := line[beg+2 : middle]
  313. b := line[middle+3:]
  314. if hasQuote {
  315. a = string(git.UnescapeChars([]byte(a[1 : len(a)-1])))
  316. b = string(git.UnescapeChars([]byte(b[1 : len(b)-1])))
  317. }
  318. curFile = &DiffFile{
  319. Name: a,
  320. Index: len(diff.Files) + 1,
  321. Type: DiffFileChange,
  322. Sections: make([]*DiffSection, 0, 10),
  323. }
  324. diff.Files = append(diff.Files, curFile)
  325. if len(diff.Files) >= maxFiles {
  326. diff.IsIncomplete = true
  327. io.Copy(ioutil.Discard, reader)
  328. break
  329. }
  330. curFileLinesCount = 0
  331. curFileLFSPrefix = false
  332. // Check file diff type and is submodule.
  333. for {
  334. line, err := input.ReadString('\n')
  335. if err != nil {
  336. if err == io.EOF {
  337. isEOF = true
  338. } else {
  339. return nil, fmt.Errorf("ReadString: %v", err)
  340. }
  341. }
  342. switch {
  343. case strings.HasPrefix(line, "new file"):
  344. curFile.Type = DiffFileAdd
  345. curFile.IsCreated = true
  346. case strings.HasPrefix(line, "deleted"):
  347. curFile.Type = DiffFileDel
  348. curFile.IsDeleted = true
  349. case strings.HasPrefix(line, "index"):
  350. curFile.Type = DiffFileChange
  351. case strings.HasPrefix(line, "similarity index 100%"):
  352. curFile.Type = DiffFileRename
  353. curFile.IsRenamed = true
  354. curFile.OldName = curFile.Name
  355. curFile.Name = b
  356. }
  357. if curFile.Type > 0 {
  358. if strings.HasSuffix(line, " 160000\n") {
  359. curFile.IsSubmodule = true
  360. }
  361. break
  362. }
  363. }
  364. }
  365. }
  366. // FIXME: detect encoding while parsing.
  367. var buf bytes.Buffer
  368. for _, f := range diff.Files {
  369. buf.Reset()
  370. for _, sec := range f.Sections {
  371. for _, l := range sec.Lines {
  372. buf.WriteString(l.Content)
  373. buf.WriteString("\n")
  374. }
  375. }
  376. charsetLabel, err := base.DetectEncoding(buf.Bytes())
  377. if charsetLabel != "UTF-8" && err == nil {
  378. encoding, _ := charset.Lookup(charsetLabel)
  379. if encoding != nil {
  380. d := encoding.NewDecoder()
  381. for _, sec := range f.Sections {
  382. for _, l := range sec.Lines {
  383. if c, _, err := transform.String(d, l.Content); err == nil {
  384. l.Content = c
  385. }
  386. }
  387. }
  388. }
  389. }
  390. }
  391. return diff, nil
  392. }
  393. // GetDiffRange builds a Diff between two commits of a repository.
  394. // passing the empty string as beforeCommitID returns a diff from the
  395. // parent commit.
  396. func GetDiffRange(repoPath, beforeCommitID, afterCommitID string, maxLines, maxLineCharacteres, maxFiles int) (*Diff, error) {
  397. gitRepo, err := git.OpenRepository(repoPath)
  398. if err != nil {
  399. return nil, err
  400. }
  401. commit, err := gitRepo.GetCommit(afterCommitID)
  402. if err != nil {
  403. return nil, err
  404. }
  405. var cmd *exec.Cmd
  406. // if "after" commit given
  407. if len(beforeCommitID) == 0 {
  408. // First commit of repository.
  409. if commit.ParentCount() == 0 {
  410. cmd = exec.Command("git", "show", afterCommitID)
  411. } else {
  412. c, _ := commit.Parent(0)
  413. cmd = exec.Command("git", "diff", "-M", c.ID.String(), afterCommitID)
  414. }
  415. } else {
  416. cmd = exec.Command("git", "diff", "-M", beforeCommitID, afterCommitID)
  417. }
  418. cmd.Dir = repoPath
  419. cmd.Stderr = os.Stderr
  420. stdout, err := cmd.StdoutPipe()
  421. if err != nil {
  422. return nil, fmt.Errorf("StdoutPipe: %v", err)
  423. }
  424. if err = cmd.Start(); err != nil {
  425. return nil, fmt.Errorf("Start: %v", err)
  426. }
  427. pid := process.Add(fmt.Sprintf("GetDiffRange [repo_path: %s]", repoPath), cmd)
  428. defer process.Remove(pid)
  429. diff, err := ParsePatch(maxLines, maxLineCharacteres, maxFiles, stdout)
  430. if err != nil {
  431. return nil, fmt.Errorf("ParsePatch: %v", err)
  432. }
  433. if err = cmd.Wait(); err != nil {
  434. return nil, fmt.Errorf("Wait: %v", err)
  435. }
  436. return diff, nil
  437. }
  438. // RawDiffType type of a raw diff.
  439. type RawDiffType string
  440. // RawDiffType possible values.
  441. const (
  442. RawDiffNormal RawDiffType = "diff"
  443. RawDiffPatch RawDiffType = "patch"
  444. )
  445. // GetRawDiff dumps diff results of repository in given commit ID to io.Writer.
  446. // TODO: move this function to gogits/git-module
  447. func GetRawDiff(repoPath, commitID string, diffType RawDiffType, writer io.Writer) error {
  448. repo, err := git.OpenRepository(repoPath)
  449. if err != nil {
  450. return fmt.Errorf("OpenRepository: %v", err)
  451. }
  452. commit, err := repo.GetCommit(commitID)
  453. if err != nil {
  454. return fmt.Errorf("GetCommit: %v", err)
  455. }
  456. var cmd *exec.Cmd
  457. switch diffType {
  458. case RawDiffNormal:
  459. if commit.ParentCount() == 0 {
  460. cmd = exec.Command("git", "show", commitID)
  461. } else {
  462. c, _ := commit.Parent(0)
  463. cmd = exec.Command("git", "diff", "-M", c.ID.String(), commitID)
  464. }
  465. case RawDiffPatch:
  466. if commit.ParentCount() == 0 {
  467. cmd = exec.Command("git", "format-patch", "--no-signature", "--stdout", "--root", commitID)
  468. } else {
  469. c, _ := commit.Parent(0)
  470. query := fmt.Sprintf("%s...%s", commitID, c.ID.String())
  471. cmd = exec.Command("git", "format-patch", "--no-signature", "--stdout", query)
  472. }
  473. default:
  474. return fmt.Errorf("invalid diffType: %s", diffType)
  475. }
  476. stderr := new(bytes.Buffer)
  477. cmd.Dir = repoPath
  478. cmd.Stdout = writer
  479. cmd.Stderr = stderr
  480. if err = cmd.Run(); err != nil {
  481. return fmt.Errorf("Run: %v - %s", err, stderr)
  482. }
  483. return nil
  484. }
  485. // GetDiffCommit builds a Diff representing the given commitID.
  486. func GetDiffCommit(repoPath, commitID string, maxLines, maxLineCharacteres, maxFiles int) (*Diff, error) {
  487. return GetDiffRange(repoPath, "", commitID, maxLines, maxLineCharacteres, maxFiles)
  488. }