You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

git_diff.go 14KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573
  1. // Copyright 2014 The Gogs Authors. All rights reserved.
  2. // Use of this source code is governed by a MIT-style
  3. // license that can be found in the LICENSE file.
  4. package models
  5. import (
  6. "bufio"
  7. "bytes"
  8. "fmt"
  9. "html"
  10. "html/template"
  11. "io"
  12. "io/ioutil"
  13. "os"
  14. "os/exec"
  15. "strconv"
  16. "strings"
  17. "code.gitea.io/git"
  18. "code.gitea.io/gitea/modules/base"
  19. "code.gitea.io/gitea/modules/highlight"
  20. "code.gitea.io/gitea/modules/log"
  21. "code.gitea.io/gitea/modules/process"
  22. "code.gitea.io/gitea/modules/setting"
  23. "github.com/Unknwon/com"
  24. "github.com/sergi/go-diff/diffmatchpatch"
  25. "golang.org/x/net/html/charset"
  26. "golang.org/x/text/transform"
  27. )
  28. // DiffLineType represents the type of a DiffLine.
  29. type DiffLineType uint8
  30. // DiffLineType possible values.
  31. const (
  32. DiffLinePlain DiffLineType = iota + 1
  33. DiffLineAdd
  34. DiffLineDel
  35. DiffLineSection
  36. )
  37. // DiffFileType represents the type of a DiffFile.
  38. type DiffFileType uint8
  39. // DiffFileType possible values.
  40. const (
  41. DiffFileAdd DiffFileType = iota + 1
  42. DiffFileChange
  43. DiffFileDel
  44. DiffFileRename
  45. )
  46. // DiffLine represents a line difference in a DiffSection.
  47. type DiffLine struct {
  48. LeftIdx int
  49. RightIdx int
  50. Type DiffLineType
  51. Content string
  52. }
  53. // GetType returns the type of a DiffLine.
  54. func (d *DiffLine) GetType() int {
  55. return int(d.Type)
  56. }
  57. // DiffSection represents a section of a DiffFile.
  58. type DiffSection struct {
  59. Name string
  60. Lines []*DiffLine
  61. }
  62. var (
  63. addedCodePrefix = []byte("<span class=\"added-code\">")
  64. removedCodePrefix = []byte("<span class=\"removed-code\">")
  65. codeTagSuffix = []byte("</span>")
  66. )
  67. func diffToHTML(diffs []diffmatchpatch.Diff, lineType DiffLineType) template.HTML {
  68. buf := bytes.NewBuffer(nil)
  69. // Reproduce signs which are cut for inline diff before.
  70. switch lineType {
  71. case DiffLineAdd:
  72. buf.WriteByte('+')
  73. case DiffLineDel:
  74. buf.WriteByte('-')
  75. }
  76. for i := range diffs {
  77. switch {
  78. case diffs[i].Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd:
  79. buf.Write(addedCodePrefix)
  80. buf.WriteString(html.EscapeString(diffs[i].Text))
  81. buf.Write(codeTagSuffix)
  82. case diffs[i].Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel:
  83. buf.Write(removedCodePrefix)
  84. buf.WriteString(html.EscapeString(diffs[i].Text))
  85. buf.Write(codeTagSuffix)
  86. case diffs[i].Type == diffmatchpatch.DiffEqual:
  87. buf.WriteString(html.EscapeString(diffs[i].Text))
  88. }
  89. }
  90. return template.HTML(buf.Bytes())
  91. }
  92. // GetLine gets a specific line by type (add or del) and file line number
  93. func (diffSection *DiffSection) GetLine(lineType DiffLineType, idx int) *DiffLine {
  94. var (
  95. difference = 0
  96. addCount = 0
  97. delCount = 0
  98. matchDiffLine *DiffLine
  99. )
  100. LOOP:
  101. for _, diffLine := range diffSection.Lines {
  102. switch diffLine.Type {
  103. case DiffLineAdd:
  104. addCount++
  105. case DiffLineDel:
  106. delCount++
  107. default:
  108. if matchDiffLine != nil {
  109. break LOOP
  110. }
  111. difference = diffLine.RightIdx - diffLine.LeftIdx
  112. addCount = 0
  113. delCount = 0
  114. }
  115. switch lineType {
  116. case DiffLineDel:
  117. if diffLine.RightIdx == 0 && diffLine.LeftIdx == idx-difference {
  118. matchDiffLine = diffLine
  119. }
  120. case DiffLineAdd:
  121. if diffLine.LeftIdx == 0 && diffLine.RightIdx == idx+difference {
  122. matchDiffLine = diffLine
  123. }
  124. }
  125. }
  126. if addCount == delCount {
  127. return matchDiffLine
  128. }
  129. return nil
  130. }
  131. var diffMatchPatch = diffmatchpatch.New()
  132. func init() {
  133. diffMatchPatch.DiffEditCost = 100
  134. }
  135. // GetComputedInlineDiffFor computes inline diff for the given line.
  136. func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine) template.HTML {
  137. if setting.Git.DisableDiffHighlight {
  138. return template.HTML(html.EscapeString(diffLine.Content[1:]))
  139. }
  140. var (
  141. compareDiffLine *DiffLine
  142. diff1 string
  143. diff2 string
  144. )
  145. // try to find equivalent diff line. ignore, otherwise
  146. switch diffLine.Type {
  147. case DiffLineAdd:
  148. compareDiffLine = diffSection.GetLine(DiffLineDel, diffLine.RightIdx)
  149. if compareDiffLine == nil {
  150. return template.HTML(html.EscapeString(diffLine.Content))
  151. }
  152. diff1 = compareDiffLine.Content
  153. diff2 = diffLine.Content
  154. case DiffLineDel:
  155. compareDiffLine = diffSection.GetLine(DiffLineAdd, diffLine.LeftIdx)
  156. if compareDiffLine == nil {
  157. return template.HTML(html.EscapeString(diffLine.Content))
  158. }
  159. diff1 = diffLine.Content
  160. diff2 = compareDiffLine.Content
  161. default:
  162. return template.HTML(html.EscapeString(diffLine.Content))
  163. }
  164. diffRecord := diffMatchPatch.DiffMain(diff1[1:], diff2[1:], true)
  165. diffRecord = diffMatchPatch.DiffCleanupEfficiency(diffRecord)
  166. return diffToHTML(diffRecord, diffLine.Type)
  167. }
  168. // DiffFile represents a file diff.
  169. type DiffFile struct {
  170. Name string
  171. OldName string
  172. Index int
  173. Addition, Deletion int
  174. Type DiffFileType
  175. IsCreated bool
  176. IsDeleted bool
  177. IsBin bool
  178. IsLFSFile bool
  179. IsRenamed bool
  180. IsSubmodule bool
  181. Sections []*DiffSection
  182. IsIncomplete bool
  183. }
  184. // GetType returns type of diff file.
  185. func (diffFile *DiffFile) GetType() int {
  186. return int(diffFile.Type)
  187. }
  188. // GetHighlightClass returns highlight class for a filename.
  189. func (diffFile *DiffFile) GetHighlightClass() string {
  190. return highlight.FileNameToHighlightClass(diffFile.Name)
  191. }
  192. // Diff represents a difference between two git trees.
  193. type Diff struct {
  194. TotalAddition, TotalDeletion int
  195. Files []*DiffFile
  196. IsIncomplete bool
  197. }
  198. // NumFiles returns number of files changes in a diff.
  199. func (diff *Diff) NumFiles() int {
  200. return len(diff.Files)
  201. }
  202. const cmdDiffHead = "diff --git "
  203. // ParsePatch builds a Diff object from a io.Reader and some
  204. // parameters.
  205. // TODO: move this function to gogits/git-module
  206. func ParsePatch(maxLines, maxLineCharacters, maxFiles int, reader io.Reader) (*Diff, error) {
  207. var (
  208. diff = &Diff{Files: make([]*DiffFile, 0)}
  209. curFile = &DiffFile{}
  210. curSection = &DiffSection{
  211. Lines: make([]*DiffLine, 0, 10),
  212. }
  213. leftLine, rightLine int
  214. lineCount int
  215. curFileLinesCount int
  216. curFileLFSPrefix bool
  217. )
  218. input := bufio.NewReader(reader)
  219. isEOF := false
  220. for !isEOF {
  221. var linebuf bytes.Buffer
  222. for {
  223. b, err := input.ReadByte()
  224. if err != nil {
  225. if err == io.EOF {
  226. isEOF = true
  227. break
  228. } else {
  229. return nil, fmt.Errorf("ReadByte: %v", err)
  230. }
  231. }
  232. if b == '\n' {
  233. break
  234. }
  235. if linebuf.Len() < maxLineCharacters {
  236. linebuf.WriteByte(b)
  237. } else if linebuf.Len() == maxLineCharacters {
  238. curFile.IsIncomplete = true
  239. }
  240. }
  241. line := linebuf.String()
  242. if strings.HasPrefix(line, "+++ ") || strings.HasPrefix(line, "--- ") || len(line) == 0 {
  243. continue
  244. }
  245. trimLine := strings.Trim(line, "+- ")
  246. if trimLine == LFSMetaFileIdentifier {
  247. curFileLFSPrefix = true
  248. }
  249. if curFileLFSPrefix && strings.HasPrefix(trimLine, LFSMetaFileOidPrefix) {
  250. oid := strings.TrimPrefix(trimLine, LFSMetaFileOidPrefix)
  251. if len(oid) == 64 {
  252. m := &LFSMetaObject{Oid: oid}
  253. count, err := x.Count(m)
  254. if err == nil && count > 0 {
  255. curFile.IsBin = true
  256. curFile.IsLFSFile = true
  257. curSection.Lines = nil
  258. }
  259. }
  260. }
  261. curFileLinesCount++
  262. lineCount++
  263. // Diff data too large, we only show the first about maxLines lines
  264. if curFileLinesCount >= maxLines {
  265. curFile.IsIncomplete = true
  266. }
  267. switch {
  268. case line[0] == ' ':
  269. diffLine := &DiffLine{Type: DiffLinePlain, Content: line, LeftIdx: leftLine, RightIdx: rightLine}
  270. leftLine++
  271. rightLine++
  272. curSection.Lines = append(curSection.Lines, diffLine)
  273. continue
  274. case line[0] == '@':
  275. curSection = &DiffSection{}
  276. curFile.Sections = append(curFile.Sections, curSection)
  277. ss := strings.Split(line, "@@")
  278. diffLine := &DiffLine{Type: DiffLineSection, Content: line}
  279. curSection.Lines = append(curSection.Lines, diffLine)
  280. // Parse line number.
  281. ranges := strings.Split(ss[1][1:], " ")
  282. leftLine, _ = com.StrTo(strings.Split(ranges[0], ",")[0][1:]).Int()
  283. if len(ranges) > 1 {
  284. rightLine, _ = com.StrTo(strings.Split(ranges[1], ",")[0]).Int()
  285. } else {
  286. log.Warn("Parse line number failed: %v", line)
  287. rightLine = leftLine
  288. }
  289. continue
  290. case line[0] == '+':
  291. curFile.Addition++
  292. diff.TotalAddition++
  293. diffLine := &DiffLine{Type: DiffLineAdd, Content: line, RightIdx: rightLine}
  294. rightLine++
  295. curSection.Lines = append(curSection.Lines, diffLine)
  296. continue
  297. case line[0] == '-':
  298. curFile.Deletion++
  299. diff.TotalDeletion++
  300. diffLine := &DiffLine{Type: DiffLineDel, Content: line, LeftIdx: leftLine}
  301. if leftLine > 0 {
  302. leftLine++
  303. }
  304. curSection.Lines = append(curSection.Lines, diffLine)
  305. case strings.HasPrefix(line, "Binary"):
  306. curFile.IsBin = true
  307. continue
  308. }
  309. // Get new file.
  310. if strings.HasPrefix(line, cmdDiffHead) {
  311. middle := -1
  312. // Note: In case file name is surrounded by double quotes (it happens only in git-shell).
  313. // e.g. diff --git "a/xxx" "b/xxx"
  314. hasQuote := line[len(cmdDiffHead)] == '"'
  315. if hasQuote {
  316. middle = strings.Index(line, ` "b/`)
  317. } else {
  318. middle = strings.Index(line, " b/")
  319. }
  320. beg := len(cmdDiffHead)
  321. a := line[beg+2 : middle]
  322. b := line[middle+3:]
  323. if hasQuote {
  324. var err error
  325. a, err = strconv.Unquote(a)
  326. if err != nil {
  327. return nil, fmt.Errorf("Unquote: %v", err)
  328. }
  329. b, err = strconv.Unquote(b)
  330. if err != nil {
  331. return nil, fmt.Errorf("Unquote: %v", err)
  332. }
  333. }
  334. curFile = &DiffFile{
  335. Name: b,
  336. OldName: a,
  337. Index: len(diff.Files) + 1,
  338. Type: DiffFileChange,
  339. Sections: make([]*DiffSection, 0, 10),
  340. IsRenamed: a != b,
  341. }
  342. diff.Files = append(diff.Files, curFile)
  343. if len(diff.Files) >= maxFiles {
  344. diff.IsIncomplete = true
  345. io.Copy(ioutil.Discard, reader)
  346. break
  347. }
  348. curFileLinesCount = 0
  349. curFileLFSPrefix = false
  350. // Check file diff type and is submodule.
  351. for {
  352. line, err := input.ReadString('\n')
  353. if err != nil {
  354. if err == io.EOF {
  355. isEOF = true
  356. } else {
  357. return nil, fmt.Errorf("ReadString: %v", err)
  358. }
  359. }
  360. switch {
  361. case strings.HasPrefix(line, "new file"):
  362. curFile.Type = DiffFileAdd
  363. curFile.IsCreated = true
  364. case strings.HasPrefix(line, "deleted"):
  365. curFile.Type = DiffFileDel
  366. curFile.IsDeleted = true
  367. case strings.HasPrefix(line, "index"):
  368. curFile.Type = DiffFileChange
  369. case strings.HasPrefix(line, "similarity index 100%"):
  370. curFile.Type = DiffFileRename
  371. }
  372. if curFile.Type > 0 {
  373. if strings.HasSuffix(line, " 160000\n") {
  374. curFile.IsSubmodule = true
  375. }
  376. break
  377. }
  378. }
  379. }
  380. }
  381. // FIXME: detect encoding while parsing.
  382. var buf bytes.Buffer
  383. for _, f := range diff.Files {
  384. buf.Reset()
  385. for _, sec := range f.Sections {
  386. for _, l := range sec.Lines {
  387. buf.WriteString(l.Content)
  388. buf.WriteString("\n")
  389. }
  390. }
  391. charsetLabel, err := base.DetectEncoding(buf.Bytes())
  392. if charsetLabel != "UTF-8" && err == nil {
  393. encoding, _ := charset.Lookup(charsetLabel)
  394. if encoding != nil {
  395. d := encoding.NewDecoder()
  396. for _, sec := range f.Sections {
  397. for _, l := range sec.Lines {
  398. if c, _, err := transform.String(d, l.Content); err == nil {
  399. l.Content = c
  400. }
  401. }
  402. }
  403. }
  404. }
  405. }
  406. return diff, nil
  407. }
  408. // GetDiffRange builds a Diff between two commits of a repository.
  409. // passing the empty string as beforeCommitID returns a diff from the
  410. // parent commit.
  411. func GetDiffRange(repoPath, beforeCommitID, afterCommitID string, maxLines, maxLineCharacters, maxFiles int) (*Diff, error) {
  412. gitRepo, err := git.OpenRepository(repoPath)
  413. if err != nil {
  414. return nil, err
  415. }
  416. commit, err := gitRepo.GetCommit(afterCommitID)
  417. if err != nil {
  418. return nil, err
  419. }
  420. var cmd *exec.Cmd
  421. // if "after" commit given
  422. if len(beforeCommitID) == 0 {
  423. // First commit of repository.
  424. if commit.ParentCount() == 0 {
  425. cmd = exec.Command("git", "show", afterCommitID)
  426. } else {
  427. c, _ := commit.Parent(0)
  428. cmd = exec.Command("git", "diff", "-M", c.ID.String(), afterCommitID)
  429. }
  430. } else {
  431. cmd = exec.Command("git", "diff", "-M", beforeCommitID, afterCommitID)
  432. }
  433. cmd.Dir = repoPath
  434. cmd.Stderr = os.Stderr
  435. stdout, err := cmd.StdoutPipe()
  436. if err != nil {
  437. return nil, fmt.Errorf("StdoutPipe: %v", err)
  438. }
  439. if err = cmd.Start(); err != nil {
  440. return nil, fmt.Errorf("Start: %v", err)
  441. }
  442. pid := process.GetManager().Add(fmt.Sprintf("GetDiffRange [repo_path: %s]", repoPath), cmd)
  443. defer process.GetManager().Remove(pid)
  444. diff, err := ParsePatch(maxLines, maxLineCharacters, maxFiles, stdout)
  445. if err != nil {
  446. return nil, fmt.Errorf("ParsePatch: %v", err)
  447. }
  448. if err = cmd.Wait(); err != nil {
  449. return nil, fmt.Errorf("Wait: %v", err)
  450. }
  451. return diff, nil
  452. }
  453. // RawDiffType type of a raw diff.
  454. type RawDiffType string
  455. // RawDiffType possible values.
  456. const (
  457. RawDiffNormal RawDiffType = "diff"
  458. RawDiffPatch RawDiffType = "patch"
  459. )
  460. // GetRawDiff dumps diff results of repository in given commit ID to io.Writer.
  461. // TODO: move this function to gogits/git-module
  462. func GetRawDiff(repoPath, commitID string, diffType RawDiffType, writer io.Writer) error {
  463. repo, err := git.OpenRepository(repoPath)
  464. if err != nil {
  465. return fmt.Errorf("OpenRepository: %v", err)
  466. }
  467. commit, err := repo.GetCommit(commitID)
  468. if err != nil {
  469. return fmt.Errorf("GetCommit: %v", err)
  470. }
  471. var cmd *exec.Cmd
  472. switch diffType {
  473. case RawDiffNormal:
  474. if commit.ParentCount() == 0 {
  475. cmd = exec.Command("git", "show", commitID)
  476. } else {
  477. c, _ := commit.Parent(0)
  478. cmd = exec.Command("git", "diff", "-M", c.ID.String(), commitID)
  479. }
  480. case RawDiffPatch:
  481. if commit.ParentCount() == 0 {
  482. cmd = exec.Command("git", "format-patch", "--no-signature", "--stdout", "--root", commitID)
  483. } else {
  484. c, _ := commit.Parent(0)
  485. query := fmt.Sprintf("%s...%s", commitID, c.ID.String())
  486. cmd = exec.Command("git", "format-patch", "--no-signature", "--stdout", query)
  487. }
  488. default:
  489. return fmt.Errorf("invalid diffType: %s", diffType)
  490. }
  491. stderr := new(bytes.Buffer)
  492. cmd.Dir = repoPath
  493. cmd.Stdout = writer
  494. cmd.Stderr = stderr
  495. if err = cmd.Run(); err != nil {
  496. return fmt.Errorf("Run: %v - %s", err, stderr)
  497. }
  498. return nil
  499. }
  500. // GetDiffCommit builds a Diff representing the given commitID.
  501. func GetDiffCommit(repoPath, commitID string, maxLines, maxLineCharacters, maxFiles int) (*Diff, error) {
  502. return GetDiffRange(repoPath, "", commitID, maxLines, maxLineCharacters, maxFiles)
  503. }