You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

gitdiff.go 43KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398
  1. // Copyright 2014 The Gogs Authors. All rights reserved.
  2. // Copyright 2019 The Gitea Authors. All rights reserved.
  3. // SPDX-License-Identifier: MIT
  4. package gitdiff
  5. import (
  6. "bufio"
  7. "bytes"
  8. "context"
  9. "fmt"
  10. "html"
  11. "html/template"
  12. "io"
  13. "net/url"
  14. "sort"
  15. "strings"
  16. "time"
  17. "code.gitea.io/gitea/models/db"
  18. git_model "code.gitea.io/gitea/models/git"
  19. issues_model "code.gitea.io/gitea/models/issues"
  20. pull_model "code.gitea.io/gitea/models/pull"
  21. user_model "code.gitea.io/gitea/models/user"
  22. "code.gitea.io/gitea/modules/analyze"
  23. "code.gitea.io/gitea/modules/base"
  24. "code.gitea.io/gitea/modules/charset"
  25. "code.gitea.io/gitea/modules/git"
  26. "code.gitea.io/gitea/modules/highlight"
  27. "code.gitea.io/gitea/modules/lfs"
  28. "code.gitea.io/gitea/modules/log"
  29. "code.gitea.io/gitea/modules/setting"
  30. "code.gitea.io/gitea/modules/translation"
  31. "github.com/sergi/go-diff/diffmatchpatch"
  32. stdcharset "golang.org/x/net/html/charset"
  33. "golang.org/x/text/encoding"
  34. "golang.org/x/text/transform"
  35. )
  36. // DiffLineType represents the type of DiffLine.
  37. type DiffLineType uint8
  38. // DiffLineType possible values.
  39. const (
  40. DiffLinePlain DiffLineType = iota + 1
  41. DiffLineAdd
  42. DiffLineDel
  43. DiffLineSection
  44. )
  45. // DiffFileType represents the type of DiffFile.
  46. type DiffFileType uint8
  47. // DiffFileType possible values.
  48. const (
  49. DiffFileAdd DiffFileType = iota + 1
  50. DiffFileChange
  51. DiffFileDel
  52. DiffFileRename
  53. DiffFileCopy
  54. )
  55. // DiffLineExpandDirection represents the DiffLineSection expand direction
  56. type DiffLineExpandDirection uint8
  57. // DiffLineExpandDirection possible values.
  58. const (
  59. DiffLineExpandNone DiffLineExpandDirection = iota + 1
  60. DiffLineExpandSingle
  61. DiffLineExpandUpDown
  62. DiffLineExpandUp
  63. DiffLineExpandDown
  64. )
  65. // DiffLine represents a line difference in a DiffSection.
  66. type DiffLine struct {
  67. LeftIdx int
  68. RightIdx int
  69. Match int
  70. Type DiffLineType
  71. Content string
  72. Comments []*issues_model.Comment
  73. SectionInfo *DiffLineSectionInfo
  74. }
  75. // DiffLineSectionInfo represents diff line section meta data
  76. type DiffLineSectionInfo struct {
  77. Path string
  78. LastLeftIdx int
  79. LastRightIdx int
  80. LeftIdx int
  81. RightIdx int
  82. LeftHunkSize int
  83. RightHunkSize int
  84. }
  85. // BlobExcerptChunkSize represent max lines of excerpt
  86. const BlobExcerptChunkSize = 20
  87. // GetType returns the type of DiffLine.
  88. func (d *DiffLine) GetType() int {
  89. return int(d.Type)
  90. }
  91. // GetHTMLDiffLineType returns the diff line type name for HTML
  92. func (d *DiffLine) GetHTMLDiffLineType() string {
  93. switch d.Type {
  94. case DiffLineAdd:
  95. return "add"
  96. case DiffLineDel:
  97. return "del"
  98. case DiffLineSection:
  99. return "tag"
  100. }
  101. return "same"
  102. }
  103. // CanComment returns whether a line can get commented
  104. func (d *DiffLine) CanComment() bool {
  105. return len(d.Comments) == 0 && d.Type != DiffLineSection
  106. }
  107. // GetCommentSide returns the comment side of the first comment, if not set returns empty string
  108. func (d *DiffLine) GetCommentSide() string {
  109. if len(d.Comments) == 0 {
  110. return ""
  111. }
  112. return d.Comments[0].DiffSide()
  113. }
  114. // GetLineTypeMarker returns the line type marker
  115. func (d *DiffLine) GetLineTypeMarker() string {
  116. if strings.IndexByte(" +-", d.Content[0]) > -1 {
  117. return d.Content[0:1]
  118. }
  119. return ""
  120. }
  121. // GetBlobExcerptQuery builds query string to get blob excerpt
  122. func (d *DiffLine) GetBlobExcerptQuery() string {
  123. query := fmt.Sprintf(
  124. "last_left=%d&last_right=%d&"+
  125. "left=%d&right=%d&"+
  126. "left_hunk_size=%d&right_hunk_size=%d&"+
  127. "path=%s",
  128. d.SectionInfo.LastLeftIdx, d.SectionInfo.LastRightIdx,
  129. d.SectionInfo.LeftIdx, d.SectionInfo.RightIdx,
  130. d.SectionInfo.LeftHunkSize, d.SectionInfo.RightHunkSize,
  131. url.QueryEscape(d.SectionInfo.Path))
  132. return query
  133. }
  134. // GetExpandDirection gets DiffLineExpandDirection
  135. func (d *DiffLine) GetExpandDirection() DiffLineExpandDirection {
  136. if d.Type != DiffLineSection || d.SectionInfo == nil || d.SectionInfo.RightIdx-d.SectionInfo.LastRightIdx <= 1 {
  137. return DiffLineExpandNone
  138. }
  139. if d.SectionInfo.LastLeftIdx <= 0 && d.SectionInfo.LastRightIdx <= 0 {
  140. return DiffLineExpandUp
  141. } else if d.SectionInfo.RightIdx-d.SectionInfo.LastRightIdx > BlobExcerptChunkSize && d.SectionInfo.RightHunkSize > 0 {
  142. return DiffLineExpandUpDown
  143. } else if d.SectionInfo.LeftHunkSize <= 0 && d.SectionInfo.RightHunkSize <= 0 {
  144. return DiffLineExpandDown
  145. }
  146. return DiffLineExpandSingle
  147. }
  148. func getDiffLineSectionInfo(treePath, line string, lastLeftIdx, lastRightIdx int) *DiffLineSectionInfo {
  149. leftLine, leftHunk, rightLine, righHunk := git.ParseDiffHunkString(line)
  150. return &DiffLineSectionInfo{
  151. Path: treePath,
  152. LastLeftIdx: lastLeftIdx,
  153. LastRightIdx: lastRightIdx,
  154. LeftIdx: leftLine,
  155. RightIdx: rightLine,
  156. LeftHunkSize: leftHunk,
  157. RightHunkSize: righHunk,
  158. }
  159. }
  160. // escape a line's content or return <br> needed for copy/paste purposes
  161. func getLineContent(content string, locale translation.Locale) DiffInline {
  162. if len(content) > 0 {
  163. return DiffInlineWithUnicodeEscape(template.HTML(html.EscapeString(content)), locale)
  164. }
  165. return DiffInline{EscapeStatus: &charset.EscapeStatus{}, Content: "<br>"}
  166. }
  167. // DiffSection represents a section of a DiffFile.
  168. type DiffSection struct {
  169. file *DiffFile
  170. FileName string
  171. Name string
  172. Lines []*DiffLine
  173. }
  174. var (
  175. addedCodePrefix = []byte(`<span class="added-code">`)
  176. removedCodePrefix = []byte(`<span class="removed-code">`)
  177. codeTagSuffix = []byte(`</span>`)
  178. )
  179. func diffToHTML(lineWrapperTags []string, diffs []diffmatchpatch.Diff, lineType DiffLineType) string {
  180. buf := bytes.NewBuffer(nil)
  181. // restore the line wrapper tags <span class="line"> and <span class="cl">, if necessary
  182. for _, tag := range lineWrapperTags {
  183. buf.WriteString(tag)
  184. }
  185. for _, diff := range diffs {
  186. switch {
  187. case diff.Type == diffmatchpatch.DiffEqual:
  188. buf.WriteString(diff.Text)
  189. case diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd:
  190. buf.Write(addedCodePrefix)
  191. buf.WriteString(diff.Text)
  192. buf.Write(codeTagSuffix)
  193. case diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel:
  194. buf.Write(removedCodePrefix)
  195. buf.WriteString(diff.Text)
  196. buf.Write(codeTagSuffix)
  197. }
  198. }
  199. for range lineWrapperTags {
  200. buf.WriteString("</span>")
  201. }
  202. return buf.String()
  203. }
  204. // GetLine gets a specific line by type (add or del) and file line number
  205. func (diffSection *DiffSection) GetLine(lineType DiffLineType, idx int) *DiffLine {
  206. var (
  207. difference = 0
  208. addCount = 0
  209. delCount = 0
  210. matchDiffLine *DiffLine
  211. )
  212. LOOP:
  213. for _, diffLine := range diffSection.Lines {
  214. switch diffLine.Type {
  215. case DiffLineAdd:
  216. addCount++
  217. case DiffLineDel:
  218. delCount++
  219. default:
  220. if matchDiffLine != nil {
  221. break LOOP
  222. }
  223. difference = diffLine.RightIdx - diffLine.LeftIdx
  224. addCount = 0
  225. delCount = 0
  226. }
  227. switch lineType {
  228. case DiffLineDel:
  229. if diffLine.RightIdx == 0 && diffLine.LeftIdx == idx-difference {
  230. matchDiffLine = diffLine
  231. }
  232. case DiffLineAdd:
  233. if diffLine.LeftIdx == 0 && diffLine.RightIdx == idx+difference {
  234. matchDiffLine = diffLine
  235. }
  236. }
  237. }
  238. if addCount == delCount {
  239. return matchDiffLine
  240. }
  241. return nil
  242. }
  243. var diffMatchPatch = diffmatchpatch.New()
  244. func init() {
  245. diffMatchPatch.DiffEditCost = 100
  246. }
  247. // DiffInline is a struct that has a content and escape status
  248. type DiffInline struct {
  249. EscapeStatus *charset.EscapeStatus
  250. Content template.HTML
  251. }
  252. // DiffInlineWithUnicodeEscape makes a DiffInline with hidden unicode characters escaped
  253. func DiffInlineWithUnicodeEscape(s template.HTML, locale translation.Locale) DiffInline {
  254. status, content := charset.EscapeControlHTML(s, locale)
  255. return DiffInline{EscapeStatus: status, Content: content}
  256. }
  257. // DiffInlineWithHighlightCode makes a DiffInline with code highlight and hidden unicode characters escaped
  258. func DiffInlineWithHighlightCode(fileName, language, code string, locale translation.Locale) DiffInline {
  259. highlighted, _ := highlight.Code(fileName, language, code)
  260. status, content := charset.EscapeControlHTML(highlighted, locale)
  261. return DiffInline{EscapeStatus: status, Content: content}
  262. }
  263. // GetComputedInlineDiffFor computes inline diff for the given line.
  264. func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine, locale translation.Locale) DiffInline {
  265. if setting.Git.DisableDiffHighlight {
  266. return getLineContent(diffLine.Content[1:], locale)
  267. }
  268. var (
  269. compareDiffLine *DiffLine
  270. diff1 string
  271. diff2 string
  272. )
  273. language := ""
  274. if diffSection.file != nil {
  275. language = diffSection.file.Language
  276. }
  277. // try to find equivalent diff line. ignore, otherwise
  278. switch diffLine.Type {
  279. case DiffLineSection:
  280. return getLineContent(diffLine.Content[1:], locale)
  281. case DiffLineAdd:
  282. compareDiffLine = diffSection.GetLine(DiffLineDel, diffLine.RightIdx)
  283. if compareDiffLine == nil {
  284. return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:], locale)
  285. }
  286. diff1 = compareDiffLine.Content
  287. diff2 = diffLine.Content
  288. case DiffLineDel:
  289. compareDiffLine = diffSection.GetLine(DiffLineAdd, diffLine.LeftIdx)
  290. if compareDiffLine == nil {
  291. return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:], locale)
  292. }
  293. diff1 = diffLine.Content
  294. diff2 = compareDiffLine.Content
  295. default:
  296. if strings.IndexByte(" +-", diffLine.Content[0]) > -1 {
  297. return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:], locale)
  298. }
  299. return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content, locale)
  300. }
  301. hcd := newHighlightCodeDiff()
  302. diffRecord := hcd.diffWithHighlight(diffSection.FileName, language, diff1[1:], diff2[1:])
  303. // it seems that Gitea doesn't need the line wrapper of Chroma, so do not add them back
  304. // if the line wrappers are still needed in the future, it can be added back by "diffToHTML(hcd.lineWrapperTags. ...)"
  305. diffHTML := diffToHTML(nil, diffRecord, diffLine.Type)
  306. return DiffInlineWithUnicodeEscape(template.HTML(diffHTML), locale)
  307. }
  308. // DiffFile represents a file diff.
  309. type DiffFile struct {
  310. Name string
  311. NameHash string
  312. OldName string
  313. Index int
  314. Addition, Deletion int
  315. Type DiffFileType
  316. IsCreated bool
  317. IsDeleted bool
  318. IsBin bool
  319. IsLFSFile bool
  320. IsRenamed bool
  321. IsAmbiguous bool
  322. IsSubmodule bool
  323. Sections []*DiffSection
  324. IsIncomplete bool
  325. IsIncompleteLineTooLong bool
  326. IsProtected bool
  327. IsGenerated bool
  328. IsVendored bool
  329. IsViewed bool // User specific
  330. HasChangedSinceLastReview bool // User specific
  331. Language string
  332. Mode string
  333. OldMode string
  334. }
  335. // GetType returns type of diff file.
  336. func (diffFile *DiffFile) GetType() int {
  337. return int(diffFile.Type)
  338. }
  339. // GetTailSection creates a fake DiffLineSection if the last section is not the end of the file
  340. func (diffFile *DiffFile) GetTailSection(gitRepo *git.Repository, leftCommitID, rightCommitID string) *DiffSection {
  341. if len(diffFile.Sections) == 0 || diffFile.Type != DiffFileChange || diffFile.IsBin || diffFile.IsLFSFile {
  342. return nil
  343. }
  344. leftCommit, err := gitRepo.GetCommit(leftCommitID)
  345. if err != nil {
  346. return nil
  347. }
  348. rightCommit, err := gitRepo.GetCommit(rightCommitID)
  349. if err != nil {
  350. return nil
  351. }
  352. lastSection := diffFile.Sections[len(diffFile.Sections)-1]
  353. lastLine := lastSection.Lines[len(lastSection.Lines)-1]
  354. leftLineCount := getCommitFileLineCount(leftCommit, diffFile.Name)
  355. rightLineCount := getCommitFileLineCount(rightCommit, diffFile.Name)
  356. if leftLineCount <= lastLine.LeftIdx || rightLineCount <= lastLine.RightIdx {
  357. return nil
  358. }
  359. tailDiffLine := &DiffLine{
  360. Type: DiffLineSection,
  361. Content: " ",
  362. SectionInfo: &DiffLineSectionInfo{
  363. Path: diffFile.Name,
  364. LastLeftIdx: lastLine.LeftIdx,
  365. LastRightIdx: lastLine.RightIdx,
  366. LeftIdx: leftLineCount,
  367. RightIdx: rightLineCount,
  368. },
  369. }
  370. tailSection := &DiffSection{FileName: diffFile.Name, Lines: []*DiffLine{tailDiffLine}}
  371. return tailSection
  372. }
  373. // GetDiffFileName returns the name of the diff file, or its old name in case it was deleted
  374. func (diffFile *DiffFile) GetDiffFileName() string {
  375. if diffFile.Name == "" {
  376. return diffFile.OldName
  377. }
  378. return diffFile.Name
  379. }
  380. func (diffFile *DiffFile) ShouldBeHidden() bool {
  381. return diffFile.IsGenerated || diffFile.IsViewed
  382. }
  383. func (diffFile *DiffFile) ModeTranslationKey(mode string) string {
  384. switch mode {
  385. case "040000":
  386. return "git.filemode.directory"
  387. case "100644":
  388. return "git.filemode.normal_file"
  389. case "100755":
  390. return "git.filemode.executable_file"
  391. case "120000":
  392. return "git.filemode.symbolic_link"
  393. case "160000":
  394. return "git.filemode.submodule"
  395. default:
  396. return mode
  397. }
  398. }
  399. func getCommitFileLineCount(commit *git.Commit, filePath string) int {
  400. blob, err := commit.GetBlobByPath(filePath)
  401. if err != nil {
  402. return 0
  403. }
  404. lineCount, err := blob.GetBlobLineCount()
  405. if err != nil {
  406. return 0
  407. }
  408. return lineCount
  409. }
  410. // Diff represents a difference between two git trees.
  411. type Diff struct {
  412. Start, End string
  413. NumFiles int
  414. TotalAddition, TotalDeletion int
  415. Files []*DiffFile
  416. IsIncomplete bool
  417. NumViewedFiles int // user-specific
  418. }
  419. // LoadComments loads comments into each line
  420. func (diff *Diff) LoadComments(ctx context.Context, issue *issues_model.Issue, currentUser *user_model.User, showOutdatedComments bool) error {
  421. allComments, err := issues_model.FetchCodeComments(ctx, issue, currentUser, showOutdatedComments)
  422. if err != nil {
  423. return err
  424. }
  425. for _, file := range diff.Files {
  426. if lineCommits, ok := allComments[file.Name]; ok {
  427. for _, section := range file.Sections {
  428. for _, line := range section.Lines {
  429. if comments, ok := lineCommits[int64(line.LeftIdx*-1)]; ok {
  430. line.Comments = append(line.Comments, comments...)
  431. }
  432. if comments, ok := lineCommits[int64(line.RightIdx)]; ok {
  433. line.Comments = append(line.Comments, comments...)
  434. }
  435. sort.SliceStable(line.Comments, func(i, j int) bool {
  436. return line.Comments[i].CreatedUnix < line.Comments[j].CreatedUnix
  437. })
  438. }
  439. }
  440. }
  441. }
  442. return nil
  443. }
  444. const cmdDiffHead = "diff --git "
  445. // ParsePatch builds a Diff object from a io.Reader and some parameters.
  446. func ParsePatch(maxLines, maxLineCharacters, maxFiles int, reader io.Reader, skipToFile string) (*Diff, error) {
  447. log.Debug("ParsePatch(%d, %d, %d, ..., %s)", maxLines, maxLineCharacters, maxFiles, skipToFile)
  448. var curFile *DiffFile
  449. skipping := skipToFile != ""
  450. diff := &Diff{Files: make([]*DiffFile, 0)}
  451. sb := strings.Builder{}
  452. // OK let's set a reasonable buffer size.
  453. // This should be at least the size of maxLineCharacters or 4096 whichever is larger.
  454. readerSize := maxLineCharacters
  455. if readerSize < 4096 {
  456. readerSize = 4096
  457. }
  458. input := bufio.NewReaderSize(reader, readerSize)
  459. line, err := input.ReadString('\n')
  460. if err != nil {
  461. if err == io.EOF {
  462. return diff, nil
  463. }
  464. return diff, err
  465. }
  466. prepareValue := func(s, p string) string {
  467. return strings.TrimSpace(strings.TrimPrefix(s, p))
  468. }
  469. parsingLoop:
  470. for {
  471. // 1. A patch file always begins with `diff --git ` + `a/path b/path` (possibly quoted)
  472. // if it does not we have bad input!
  473. if !strings.HasPrefix(line, cmdDiffHead) {
  474. return diff, fmt.Errorf("invalid first file line: %s", line)
  475. }
  476. if maxFiles > -1 && len(diff.Files) >= maxFiles {
  477. lastFile := createDiffFile(diff, line)
  478. diff.End = lastFile.Name
  479. diff.IsIncomplete = true
  480. _, err := io.Copy(io.Discard, reader)
  481. if err != nil {
  482. // By the definition of io.Copy this never returns io.EOF
  483. return diff, fmt.Errorf("error during io.Copy: %w", err)
  484. }
  485. break parsingLoop
  486. }
  487. curFile = createDiffFile(diff, line)
  488. if skipping {
  489. if curFile.Name != skipToFile {
  490. line, err = skipToNextDiffHead(input)
  491. if err != nil {
  492. if err == io.EOF {
  493. return diff, nil
  494. }
  495. return diff, err
  496. }
  497. continue
  498. }
  499. skipping = false
  500. }
  501. diff.Files = append(diff.Files, curFile)
  502. // 2. It is followed by one or more extended header lines:
  503. //
  504. // old mode <mode>
  505. // new mode <mode>
  506. // deleted file mode <mode>
  507. // new file mode <mode>
  508. // copy from <path>
  509. // copy to <path>
  510. // rename from <path>
  511. // rename to <path>
  512. // similarity index <number>
  513. // dissimilarity index <number>
  514. // index <hash>..<hash> <mode>
  515. //
  516. // * <mode> 6-digit octal numbers including the file type and file permission bits.
  517. // * <path> does not include the a/ and b/ prefixes
  518. // * <number> percentage of unchanged lines for similarity, percentage of changed
  519. // lines dissimilarity as integer rounded down with terminal %. 100% => equal files.
  520. // * The index line includes the blob object names before and after the change.
  521. // The <mode> is included if the file mode does not change; otherwise, separate
  522. // lines indicate the old and the new mode.
  523. // 3. Following this header the "standard unified" diff format header may be encountered: (but not for every case...)
  524. //
  525. // --- a/<path>
  526. // +++ b/<path>
  527. //
  528. // With multiple hunks
  529. //
  530. // @@ <hunk descriptor> @@
  531. // +added line
  532. // -removed line
  533. // unchanged line
  534. //
  535. // 4. Binary files get:
  536. //
  537. // Binary files a/<path> and b/<path> differ
  538. //
  539. // but one of a/<path> and b/<path> could be /dev/null.
  540. curFileLoop:
  541. for {
  542. line, err = input.ReadString('\n')
  543. if err != nil {
  544. if err != io.EOF {
  545. return diff, err
  546. }
  547. break parsingLoop
  548. }
  549. switch {
  550. case strings.HasPrefix(line, cmdDiffHead):
  551. break curFileLoop
  552. case strings.HasPrefix(line, "old mode ") ||
  553. strings.HasPrefix(line, "new mode "):
  554. if strings.HasPrefix(line, "old mode ") {
  555. curFile.OldMode = prepareValue(line, "old mode ")
  556. }
  557. if strings.HasPrefix(line, "new mode ") {
  558. curFile.Mode = prepareValue(line, "new mode ")
  559. }
  560. if strings.HasSuffix(line, " 160000\n") {
  561. curFile.IsSubmodule = true
  562. }
  563. case strings.HasPrefix(line, "rename from "):
  564. curFile.IsRenamed = true
  565. curFile.Type = DiffFileRename
  566. if curFile.IsAmbiguous {
  567. curFile.OldName = prepareValue(line, "rename from ")
  568. }
  569. case strings.HasPrefix(line, "rename to "):
  570. curFile.IsRenamed = true
  571. curFile.Type = DiffFileRename
  572. if curFile.IsAmbiguous {
  573. curFile.Name = prepareValue(line, "rename to ")
  574. curFile.IsAmbiguous = false
  575. }
  576. case strings.HasPrefix(line, "copy from "):
  577. curFile.IsRenamed = true
  578. curFile.Type = DiffFileCopy
  579. if curFile.IsAmbiguous {
  580. curFile.OldName = prepareValue(line, "copy from ")
  581. }
  582. case strings.HasPrefix(line, "copy to "):
  583. curFile.IsRenamed = true
  584. curFile.Type = DiffFileCopy
  585. if curFile.IsAmbiguous {
  586. curFile.Name = prepareValue(line, "copy to ")
  587. curFile.IsAmbiguous = false
  588. }
  589. case strings.HasPrefix(line, "new file"):
  590. curFile.Type = DiffFileAdd
  591. curFile.IsCreated = true
  592. if strings.HasPrefix(line, "new file mode ") {
  593. curFile.Mode = prepareValue(line, "new file mode ")
  594. }
  595. if strings.HasSuffix(line, " 160000\n") {
  596. curFile.IsSubmodule = true
  597. }
  598. case strings.HasPrefix(line, "deleted"):
  599. curFile.Type = DiffFileDel
  600. curFile.IsDeleted = true
  601. if strings.HasSuffix(line, " 160000\n") {
  602. curFile.IsSubmodule = true
  603. }
  604. case strings.HasPrefix(line, "index"):
  605. if strings.HasSuffix(line, " 160000\n") {
  606. curFile.IsSubmodule = true
  607. }
  608. case strings.HasPrefix(line, "similarity index 100%"):
  609. curFile.Type = DiffFileRename
  610. case strings.HasPrefix(line, "Binary"):
  611. curFile.IsBin = true
  612. case strings.HasPrefix(line, "--- "):
  613. // Handle ambiguous filenames
  614. if curFile.IsAmbiguous {
  615. // The shortest string that can end up here is:
  616. // "--- a\t\n" without the quotes.
  617. // This line has a len() of 7 but doesn't contain a oldName.
  618. // So the amount that the line need is at least 8 or more.
  619. // The code will otherwise panic for a out-of-bounds.
  620. if len(line) > 7 && line[4] == 'a' {
  621. curFile.OldName = line[6 : len(line)-1]
  622. if line[len(line)-2] == '\t' {
  623. curFile.OldName = curFile.OldName[:len(curFile.OldName)-1]
  624. }
  625. } else {
  626. curFile.OldName = ""
  627. }
  628. }
  629. // Otherwise do nothing with this line
  630. case strings.HasPrefix(line, "+++ "):
  631. // Handle ambiguous filenames
  632. if curFile.IsAmbiguous {
  633. if len(line) > 6 && line[4] == 'b' {
  634. curFile.Name = line[6 : len(line)-1]
  635. if line[len(line)-2] == '\t' {
  636. curFile.Name = curFile.Name[:len(curFile.Name)-1]
  637. }
  638. if curFile.OldName == "" {
  639. curFile.OldName = curFile.Name
  640. }
  641. } else {
  642. curFile.Name = curFile.OldName
  643. }
  644. curFile.IsAmbiguous = false
  645. }
  646. // Otherwise do nothing with this line, but now switch to parsing hunks
  647. lineBytes, isFragment, err := parseHunks(curFile, maxLines, maxLineCharacters, input)
  648. diff.TotalAddition += curFile.Addition
  649. diff.TotalDeletion += curFile.Deletion
  650. if err != nil {
  651. if err != io.EOF {
  652. return diff, err
  653. }
  654. break parsingLoop
  655. }
  656. sb.Reset()
  657. _, _ = sb.Write(lineBytes)
  658. for isFragment {
  659. lineBytes, isFragment, err = input.ReadLine()
  660. if err != nil {
  661. // Now by the definition of ReadLine this cannot be io.EOF
  662. return diff, fmt.Errorf("unable to ReadLine: %w", err)
  663. }
  664. _, _ = sb.Write(lineBytes)
  665. }
  666. line = sb.String()
  667. sb.Reset()
  668. break curFileLoop
  669. }
  670. }
  671. }
  672. // TODO: There are numerous issues with this:
  673. // - we might want to consider detecting encoding while parsing but...
  674. // - we're likely to fail to get the correct encoding here anyway as we won't have enough information
  675. diffLineTypeBuffers := make(map[DiffLineType]*bytes.Buffer, 3)
  676. diffLineTypeDecoders := make(map[DiffLineType]*encoding.Decoder, 3)
  677. diffLineTypeBuffers[DiffLinePlain] = new(bytes.Buffer)
  678. diffLineTypeBuffers[DiffLineAdd] = new(bytes.Buffer)
  679. diffLineTypeBuffers[DiffLineDel] = new(bytes.Buffer)
  680. for _, f := range diff.Files {
  681. f.NameHash = base.EncodeSha1(f.Name)
  682. for _, buffer := range diffLineTypeBuffers {
  683. buffer.Reset()
  684. }
  685. for _, sec := range f.Sections {
  686. for _, l := range sec.Lines {
  687. if l.Type == DiffLineSection {
  688. continue
  689. }
  690. diffLineTypeBuffers[l.Type].WriteString(l.Content[1:])
  691. diffLineTypeBuffers[l.Type].WriteString("\n")
  692. }
  693. }
  694. for lineType, buffer := range diffLineTypeBuffers {
  695. diffLineTypeDecoders[lineType] = nil
  696. if buffer.Len() == 0 {
  697. continue
  698. }
  699. charsetLabel, err := charset.DetectEncoding(buffer.Bytes())
  700. if charsetLabel != "UTF-8" && err == nil {
  701. encoding, _ := stdcharset.Lookup(charsetLabel)
  702. if encoding != nil {
  703. diffLineTypeDecoders[lineType] = encoding.NewDecoder()
  704. }
  705. }
  706. }
  707. for _, sec := range f.Sections {
  708. for _, l := range sec.Lines {
  709. decoder := diffLineTypeDecoders[l.Type]
  710. if decoder != nil {
  711. if c, _, err := transform.String(decoder, l.Content[1:]); err == nil {
  712. l.Content = l.Content[0:1] + c
  713. }
  714. }
  715. }
  716. }
  717. }
  718. diff.NumFiles = len(diff.Files)
  719. return diff, nil
  720. }
  721. func skipToNextDiffHead(input *bufio.Reader) (line string, err error) {
  722. // need to skip until the next cmdDiffHead
  723. var isFragment, wasFragment bool
  724. var lineBytes []byte
  725. for {
  726. lineBytes, isFragment, err = input.ReadLine()
  727. if err != nil {
  728. return "", err
  729. }
  730. if wasFragment {
  731. wasFragment = isFragment
  732. continue
  733. }
  734. if bytes.HasPrefix(lineBytes, []byte(cmdDiffHead)) {
  735. break
  736. }
  737. wasFragment = isFragment
  738. }
  739. line = string(lineBytes)
  740. if isFragment {
  741. var tail string
  742. tail, err = input.ReadString('\n')
  743. if err != nil {
  744. return "", err
  745. }
  746. line += tail
  747. }
  748. return line, err
  749. }
  750. func parseHunks(curFile *DiffFile, maxLines, maxLineCharacters int, input *bufio.Reader) (lineBytes []byte, isFragment bool, err error) {
  751. sb := strings.Builder{}
  752. var (
  753. curSection *DiffSection
  754. curFileLinesCount int
  755. curFileLFSPrefix bool
  756. )
  757. lastLeftIdx := -1
  758. leftLine, rightLine := 1, 1
  759. for {
  760. for isFragment {
  761. curFile.IsIncomplete = true
  762. curFile.IsIncompleteLineTooLong = true
  763. _, isFragment, err = input.ReadLine()
  764. if err != nil {
  765. // Now by the definition of ReadLine this cannot be io.EOF
  766. return nil, false, fmt.Errorf("unable to ReadLine: %w", err)
  767. }
  768. }
  769. sb.Reset()
  770. lineBytes, isFragment, err = input.ReadLine()
  771. if err != nil {
  772. if err == io.EOF {
  773. return lineBytes, isFragment, err
  774. }
  775. err = fmt.Errorf("unable to ReadLine: %w", err)
  776. return nil, false, err
  777. }
  778. if lineBytes[0] == 'd' {
  779. // End of hunks
  780. return lineBytes, isFragment, err
  781. }
  782. switch lineBytes[0] {
  783. case '@':
  784. if maxLines > -1 && curFileLinesCount >= maxLines {
  785. curFile.IsIncomplete = true
  786. continue
  787. }
  788. _, _ = sb.Write(lineBytes)
  789. for isFragment {
  790. // This is very odd indeed - we're in a section header and the line is too long
  791. // This really shouldn't happen...
  792. lineBytes, isFragment, err = input.ReadLine()
  793. if err != nil {
  794. // Now by the definition of ReadLine this cannot be io.EOF
  795. return nil, false, fmt.Errorf("unable to ReadLine: %w", err)
  796. }
  797. _, _ = sb.Write(lineBytes)
  798. }
  799. line := sb.String()
  800. // Create a new section to represent this hunk
  801. curSection = &DiffSection{file: curFile}
  802. lastLeftIdx = -1
  803. curFile.Sections = append(curFile.Sections, curSection)
  804. lineSectionInfo := getDiffLineSectionInfo(curFile.Name, line, leftLine-1, rightLine-1)
  805. diffLine := &DiffLine{
  806. Type: DiffLineSection,
  807. Content: line,
  808. SectionInfo: lineSectionInfo,
  809. }
  810. curSection.Lines = append(curSection.Lines, diffLine)
  811. curSection.FileName = curFile.Name
  812. // update line number.
  813. leftLine = lineSectionInfo.LeftIdx
  814. rightLine = lineSectionInfo.RightIdx
  815. continue
  816. case '\\':
  817. if maxLines > -1 && curFileLinesCount >= maxLines {
  818. curFile.IsIncomplete = true
  819. continue
  820. }
  821. // This is used only to indicate that the current file does not have a terminal newline
  822. if !bytes.Equal(lineBytes, []byte("\\ No newline at end of file")) {
  823. return nil, false, fmt.Errorf("unexpected line in hunk: %s", string(lineBytes))
  824. }
  825. // Technically this should be the end the file!
  826. // FIXME: we should be putting a marker at the end of the file if there is no terminal new line
  827. continue
  828. case '+':
  829. curFileLinesCount++
  830. curFile.Addition++
  831. if maxLines > -1 && curFileLinesCount >= maxLines {
  832. curFile.IsIncomplete = true
  833. continue
  834. }
  835. diffLine := &DiffLine{Type: DiffLineAdd, RightIdx: rightLine, Match: -1}
  836. rightLine++
  837. if curSection == nil {
  838. // Create a new section to represent this hunk
  839. curSection = &DiffSection{file: curFile}
  840. curFile.Sections = append(curFile.Sections, curSection)
  841. lastLeftIdx = -1
  842. }
  843. if lastLeftIdx > -1 {
  844. diffLine.Match = lastLeftIdx
  845. curSection.Lines[lastLeftIdx].Match = len(curSection.Lines)
  846. lastLeftIdx++
  847. if lastLeftIdx >= len(curSection.Lines) || curSection.Lines[lastLeftIdx].Type != DiffLineDel {
  848. lastLeftIdx = -1
  849. }
  850. }
  851. curSection.Lines = append(curSection.Lines, diffLine)
  852. case '-':
  853. curFileLinesCount++
  854. curFile.Deletion++
  855. if maxLines > -1 && curFileLinesCount >= maxLines {
  856. curFile.IsIncomplete = true
  857. continue
  858. }
  859. diffLine := &DiffLine{Type: DiffLineDel, LeftIdx: leftLine, Match: -1}
  860. if leftLine > 0 {
  861. leftLine++
  862. }
  863. if curSection == nil {
  864. // Create a new section to represent this hunk
  865. curSection = &DiffSection{file: curFile}
  866. curFile.Sections = append(curFile.Sections, curSection)
  867. lastLeftIdx = -1
  868. }
  869. if len(curSection.Lines) == 0 || curSection.Lines[len(curSection.Lines)-1].Type != DiffLineDel {
  870. lastLeftIdx = len(curSection.Lines)
  871. }
  872. curSection.Lines = append(curSection.Lines, diffLine)
  873. case ' ':
  874. curFileLinesCount++
  875. if maxLines > -1 && curFileLinesCount >= maxLines {
  876. curFile.IsIncomplete = true
  877. continue
  878. }
  879. diffLine := &DiffLine{Type: DiffLinePlain, LeftIdx: leftLine, RightIdx: rightLine}
  880. leftLine++
  881. rightLine++
  882. lastLeftIdx = -1
  883. if curSection == nil {
  884. // Create a new section to represent this hunk
  885. curSection = &DiffSection{file: curFile}
  886. curFile.Sections = append(curFile.Sections, curSection)
  887. }
  888. curSection.Lines = append(curSection.Lines, diffLine)
  889. default:
  890. // This is unexpected
  891. return nil, false, fmt.Errorf("unexpected line in hunk: %s", string(lineBytes))
  892. }
  893. line := string(lineBytes)
  894. if isFragment {
  895. curFile.IsIncomplete = true
  896. curFile.IsIncompleteLineTooLong = true
  897. for isFragment {
  898. lineBytes, isFragment, err = input.ReadLine()
  899. if err != nil {
  900. // Now by the definition of ReadLine this cannot be io.EOF
  901. return lineBytes, isFragment, fmt.Errorf("unable to ReadLine: %w", err)
  902. }
  903. }
  904. }
  905. if len(line) > maxLineCharacters {
  906. curFile.IsIncomplete = true
  907. curFile.IsIncompleteLineTooLong = true
  908. line = line[:maxLineCharacters]
  909. }
  910. curSection.Lines[len(curSection.Lines)-1].Content = line
  911. // handle LFS
  912. if line[1:] == lfs.MetaFileIdentifier {
  913. curFileLFSPrefix = true
  914. } else if curFileLFSPrefix && strings.HasPrefix(line[1:], lfs.MetaFileOidPrefix) {
  915. oid := strings.TrimPrefix(line[1:], lfs.MetaFileOidPrefix)
  916. if len(oid) == 64 {
  917. m := &git_model.LFSMetaObject{Pointer: lfs.Pointer{Oid: oid}}
  918. count, err := db.CountByBean(db.DefaultContext, m)
  919. if err == nil && count > 0 {
  920. curFile.IsBin = true
  921. curFile.IsLFSFile = true
  922. curSection.Lines = nil
  923. lastLeftIdx = -1
  924. }
  925. }
  926. }
  927. }
  928. }
  929. func createDiffFile(diff *Diff, line string) *DiffFile {
  930. // The a/ and b/ filenames are the same unless rename/copy is involved.
  931. // Especially, even for a creation or a deletion, /dev/null is not used
  932. // in place of the a/ or b/ filenames.
  933. //
  934. // When rename/copy is involved, file1 and file2 show the name of the
  935. // source file of the rename/copy and the name of the file that rename/copy
  936. // produces, respectively.
  937. //
  938. // Path names are quoted if necessary.
  939. //
  940. // This means that you should always be able to determine the file name even when there
  941. // there is potential ambiguity...
  942. //
  943. // but we can be simpler with our heuristics by just forcing git to prefix things nicely
  944. curFile := &DiffFile{
  945. Index: len(diff.Files) + 1,
  946. Type: DiffFileChange,
  947. Sections: make([]*DiffSection, 0, 10),
  948. }
  949. rd := strings.NewReader(line[len(cmdDiffHead):] + " ")
  950. curFile.Type = DiffFileChange
  951. var oldNameAmbiguity, newNameAmbiguity bool
  952. curFile.OldName, oldNameAmbiguity = readFileName(rd)
  953. curFile.Name, newNameAmbiguity = readFileName(rd)
  954. if oldNameAmbiguity && newNameAmbiguity {
  955. curFile.IsAmbiguous = true
  956. // OK we should bet that the oldName and the newName are the same if they can be made to be same
  957. // So we need to start again ...
  958. if (len(line)-len(cmdDiffHead)-1)%2 == 0 {
  959. // diff --git a/b b/b b/b b/b b/b b/b
  960. //
  961. midpoint := (len(line) + len(cmdDiffHead) - 1) / 2
  962. new, old := line[len(cmdDiffHead):midpoint], line[midpoint+1:]
  963. if len(new) > 2 && len(old) > 2 && new[2:] == old[2:] {
  964. curFile.OldName = old[2:]
  965. curFile.Name = old[2:]
  966. }
  967. }
  968. }
  969. curFile.IsRenamed = curFile.Name != curFile.OldName
  970. return curFile
  971. }
  972. func readFileName(rd *strings.Reader) (string, bool) {
  973. ambiguity := false
  974. var name string
  975. char, _ := rd.ReadByte()
  976. _ = rd.UnreadByte()
  977. if char == '"' {
  978. fmt.Fscanf(rd, "%q ", &name)
  979. if len(name) == 0 {
  980. log.Error("Reader has no file name: reader=%+v", rd)
  981. return "", true
  982. }
  983. if name[0] == '\\' {
  984. name = name[1:]
  985. }
  986. } else {
  987. // This technique is potentially ambiguous it may not be possible to uniquely identify the filenames from the diff line alone
  988. ambiguity = true
  989. fmt.Fscanf(rd, "%s ", &name)
  990. char, _ := rd.ReadByte()
  991. _ = rd.UnreadByte()
  992. for !(char == 0 || char == '"' || char == 'b') {
  993. var suffix string
  994. fmt.Fscanf(rd, "%s ", &suffix)
  995. name += " " + suffix
  996. char, _ = rd.ReadByte()
  997. _ = rd.UnreadByte()
  998. }
  999. }
  1000. if len(name) < 2 {
  1001. log.Error("Unable to determine name from reader: reader=%+v", rd)
  1002. return "", true
  1003. }
  1004. return name[2:], ambiguity
  1005. }
  1006. // DiffOptions represents the options for a DiffRange
  1007. type DiffOptions struct {
  1008. BeforeCommitID string
  1009. AfterCommitID string
  1010. SkipTo string
  1011. MaxLines int
  1012. MaxLineCharacters int
  1013. MaxFiles int
  1014. WhitespaceBehavior git.TrustedCmdArgs
  1015. DirectComparison bool
  1016. }
  1017. // GetDiff builds a Diff between two commits of a repository.
  1018. // Passing the empty string as beforeCommitID returns a diff from the parent commit.
  1019. // The whitespaceBehavior is either an empty string or a git flag
  1020. func GetDiff(gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) {
  1021. repoPath := gitRepo.Path
  1022. commit, err := gitRepo.GetCommit(opts.AfterCommitID)
  1023. if err != nil {
  1024. return nil, err
  1025. }
  1026. cmdDiff := git.NewCommand(gitRepo.Ctx)
  1027. if (len(opts.BeforeCommitID) == 0 || opts.BeforeCommitID == git.EmptySHA) && commit.ParentCount() == 0 {
  1028. cmdDiff.AddArguments("diff", "--src-prefix=\\a/", "--dst-prefix=\\b/", "-M").
  1029. AddArguments(opts.WhitespaceBehavior...).
  1030. AddArguments("4b825dc642cb6eb9a060e54bf8d69288fbee4904"). // append empty tree ref
  1031. AddDynamicArguments(opts.AfterCommitID)
  1032. } else {
  1033. actualBeforeCommitID := opts.BeforeCommitID
  1034. if len(actualBeforeCommitID) == 0 {
  1035. parentCommit, _ := commit.Parent(0)
  1036. actualBeforeCommitID = parentCommit.ID.String()
  1037. }
  1038. cmdDiff.AddArguments("diff", "--src-prefix=\\a/", "--dst-prefix=\\b/", "-M").
  1039. AddArguments(opts.WhitespaceBehavior...).
  1040. AddDynamicArguments(actualBeforeCommitID, opts.AfterCommitID)
  1041. opts.BeforeCommitID = actualBeforeCommitID
  1042. }
  1043. // In git 2.31, git diff learned --skip-to which we can use to shortcut skip to file
  1044. // so if we are using at least this version of git we don't have to tell ParsePatch to do
  1045. // the skipping for us
  1046. parsePatchSkipToFile := opts.SkipTo
  1047. if opts.SkipTo != "" && git.CheckGitVersionAtLeast("2.31") == nil {
  1048. cmdDiff.AddOptionFormat("--skip-to=%s", opts.SkipTo)
  1049. parsePatchSkipToFile = ""
  1050. }
  1051. cmdDiff.AddDashesAndList(files...)
  1052. reader, writer := io.Pipe()
  1053. defer func() {
  1054. _ = reader.Close()
  1055. _ = writer.Close()
  1056. }()
  1057. go func() {
  1058. stderr := &bytes.Buffer{}
  1059. cmdDiff.SetDescription(fmt.Sprintf("GetDiffRange [repo_path: %s]", repoPath))
  1060. if err := cmdDiff.Run(&git.RunOpts{
  1061. Timeout: time.Duration(setting.Git.Timeout.Default) * time.Second,
  1062. Dir: repoPath,
  1063. Stdout: writer,
  1064. Stderr: stderr,
  1065. }); err != nil {
  1066. log.Error("error during GetDiff(git diff dir: %s): %v, stderr: %s", repoPath, err, stderr.String())
  1067. }
  1068. _ = writer.Close()
  1069. }()
  1070. diff, err := ParsePatch(opts.MaxLines, opts.MaxLineCharacters, opts.MaxFiles, reader, parsePatchSkipToFile)
  1071. if err != nil {
  1072. return nil, fmt.Errorf("unable to ParsePatch: %w", err)
  1073. }
  1074. diff.Start = opts.SkipTo
  1075. checker, deferable := gitRepo.CheckAttributeReader(opts.AfterCommitID)
  1076. defer deferable()
  1077. for _, diffFile := range diff.Files {
  1078. gotVendor := false
  1079. gotGenerated := false
  1080. if checker != nil {
  1081. attrs, err := checker.CheckPath(diffFile.Name)
  1082. if err == nil {
  1083. if vendored, has := attrs["linguist-vendored"]; has {
  1084. if vendored == "set" || vendored == "true" {
  1085. diffFile.IsVendored = true
  1086. gotVendor = true
  1087. } else {
  1088. gotVendor = vendored == "false"
  1089. }
  1090. }
  1091. if generated, has := attrs["linguist-generated"]; has {
  1092. if generated == "set" || generated == "true" {
  1093. diffFile.IsGenerated = true
  1094. gotGenerated = true
  1095. } else {
  1096. gotGenerated = generated == "false"
  1097. }
  1098. }
  1099. if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" {
  1100. diffFile.Language = language
  1101. } else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
  1102. diffFile.Language = language
  1103. }
  1104. }
  1105. }
  1106. if !gotVendor {
  1107. diffFile.IsVendored = analyze.IsVendor(diffFile.Name)
  1108. }
  1109. if !gotGenerated {
  1110. diffFile.IsGenerated = analyze.IsGenerated(diffFile.Name)
  1111. }
  1112. tailSection := diffFile.GetTailSection(gitRepo, opts.BeforeCommitID, opts.AfterCommitID)
  1113. if tailSection != nil {
  1114. diffFile.Sections = append(diffFile.Sections, tailSection)
  1115. }
  1116. }
  1117. separator := "..."
  1118. if opts.DirectComparison {
  1119. separator = ".."
  1120. }
  1121. diffPaths := []string{opts.BeforeCommitID + separator + opts.AfterCommitID}
  1122. if len(opts.BeforeCommitID) == 0 || opts.BeforeCommitID == git.EmptySHA {
  1123. diffPaths = []string{git.EmptyTreeSHA, opts.AfterCommitID}
  1124. }
  1125. diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
  1126. if err != nil && strings.Contains(err.Error(), "no merge base") {
  1127. // git >= 2.28 now returns an error if base and head have become unrelated.
  1128. // previously it would return the results of git diff --shortstat base head so let's try that...
  1129. diffPaths = []string{opts.BeforeCommitID, opts.AfterCommitID}
  1130. diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
  1131. }
  1132. if err != nil {
  1133. return nil, err
  1134. }
  1135. return diff, nil
  1136. }
  1137. type PullDiffStats struct {
  1138. TotalAddition, TotalDeletion int
  1139. }
  1140. // GetPullDiffStats
  1141. func GetPullDiffStats(gitRepo *git.Repository, opts *DiffOptions) (*PullDiffStats, error) {
  1142. repoPath := gitRepo.Path
  1143. diff := &PullDiffStats{}
  1144. separator := "..."
  1145. if opts.DirectComparison {
  1146. separator = ".."
  1147. }
  1148. diffPaths := []string{opts.BeforeCommitID + separator + opts.AfterCommitID}
  1149. if len(opts.BeforeCommitID) == 0 || opts.BeforeCommitID == git.EmptySHA {
  1150. diffPaths = []string{git.EmptyTreeSHA, opts.AfterCommitID}
  1151. }
  1152. var err error
  1153. _, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
  1154. if err != nil && strings.Contains(err.Error(), "no merge base") {
  1155. // git >= 2.28 now returns an error if base and head have become unrelated.
  1156. // previously it would return the results of git diff --shortstat base head so let's try that...
  1157. diffPaths = []string{opts.BeforeCommitID, opts.AfterCommitID}
  1158. _, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, nil, diffPaths...)
  1159. }
  1160. if err != nil {
  1161. return nil, err
  1162. }
  1163. return diff, nil
  1164. }
  1165. // SyncAndGetUserSpecificDiff is like GetDiff, except that user specific data such as which files the given user has already viewed on the given PR will also be set
  1166. // Additionally, the database asynchronously is updated if files have changed since the last review
  1167. func SyncAndGetUserSpecificDiff(ctx context.Context, userID int64, pull *issues_model.PullRequest, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) {
  1168. diff, err := GetDiff(gitRepo, opts, files...)
  1169. if err != nil {
  1170. return nil, err
  1171. }
  1172. review, err := pull_model.GetNewestReviewState(ctx, userID, pull.ID)
  1173. if err != nil || review == nil || review.UpdatedFiles == nil {
  1174. return diff, err
  1175. }
  1176. latestCommit := opts.AfterCommitID
  1177. if latestCommit == "" {
  1178. latestCommit = pull.HeadBranch // opts.AfterCommitID is preferred because it handles PRs from forks correctly and the branch name doesn't
  1179. }
  1180. changedFiles, err := gitRepo.GetFilesChangedBetween(review.CommitSHA, latestCommit)
  1181. // There are way too many possible errors.
  1182. // Examples are various git errors such as the commit the review was based on was gc'ed and hence doesn't exist anymore as well as unrecoverable errors where we should serve a 500 response
  1183. // Due to the current architecture and physical limitation of needing to compare explicit error messages, we can only choose one approach without the code getting ugly
  1184. // For SOME of the errors such as the gc'ed commit, it would be best to mark all files as changed
  1185. // But as that does not work for all potential errors, we simply mark all files as unchanged and drop the error which always works, even if not as good as possible
  1186. if err != nil {
  1187. log.Error("Could not get changed files between %s and %s for pull request %d in repo with path %s. Assuming no changes. Error: %w", review.CommitSHA, latestCommit, pull.Index, gitRepo.Path, err)
  1188. }
  1189. filesChangedSinceLastDiff := make(map[string]pull_model.ViewedState)
  1190. outer:
  1191. for _, diffFile := range diff.Files {
  1192. fileViewedState := review.UpdatedFiles[diffFile.GetDiffFileName()]
  1193. // Check whether it was previously detected that the file has changed since the last review
  1194. if fileViewedState == pull_model.HasChanged {
  1195. diffFile.HasChangedSinceLastReview = true
  1196. continue
  1197. }
  1198. filename := diffFile.GetDiffFileName()
  1199. // Check explicitly whether the file has changed since the last review
  1200. for _, changedFile := range changedFiles {
  1201. diffFile.HasChangedSinceLastReview = filename == changedFile
  1202. if diffFile.HasChangedSinceLastReview {
  1203. filesChangedSinceLastDiff[filename] = pull_model.HasChanged
  1204. continue outer // We don't want to check if the file is viewed here as that would fold the file, which is in this case unwanted
  1205. }
  1206. }
  1207. // Check whether the file has already been viewed
  1208. if fileViewedState == pull_model.Viewed {
  1209. diffFile.IsViewed = true
  1210. diff.NumViewedFiles++
  1211. }
  1212. }
  1213. // Explicitly store files that have changed in the database, if any is present at all.
  1214. // This has the benefit that the "Has Changed" attribute will be present as long as the user does not explicitly mark this file as viewed, so it will even survive a page reload after marking another file as viewed.
  1215. // On the other hand, this means that even if a commit reverting an unseen change is committed, the file will still be seen as changed.
  1216. if len(filesChangedSinceLastDiff) > 0 {
  1217. err := pull_model.UpdateReviewState(ctx, review.UserID, review.PullID, review.CommitSHA, filesChangedSinceLastDiff)
  1218. if err != nil {
  1219. log.Warn("Could not update review for user %d, pull %d, commit %s and the changed files %v: %v", review.UserID, review.PullID, review.CommitSHA, filesChangedSinceLastDiff, err)
  1220. return nil, err
  1221. }
  1222. }
  1223. return diff, nil
  1224. }
  1225. // CommentAsDiff returns c.Patch as *Diff
  1226. func CommentAsDiff(c *issues_model.Comment) (*Diff, error) {
  1227. diff, err := ParsePatch(setting.Git.MaxGitDiffLines,
  1228. setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(c.Patch), "")
  1229. if err != nil {
  1230. log.Error("Unable to parse patch: %v", err)
  1231. return nil, err
  1232. }
  1233. if len(diff.Files) == 0 {
  1234. return nil, fmt.Errorf("no file found for comment ID: %d", c.ID)
  1235. }
  1236. secs := diff.Files[0].Sections
  1237. if len(secs) == 0 {
  1238. return nil, fmt.Errorf("no sections found for comment ID: %d", c.ID)
  1239. }
  1240. return diff, nil
  1241. }
  1242. // CommentMustAsDiff executes AsDiff and logs the error instead of returning
  1243. func CommentMustAsDiff(c *issues_model.Comment) *Diff {
  1244. if c == nil {
  1245. return nil
  1246. }
  1247. defer func() {
  1248. if err := recover(); err != nil {
  1249. log.Error("PANIC whilst retrieving diff for comment[%d] Error: %v\nStack: %s", c.ID, err, log.Stack(2))
  1250. }
  1251. }()
  1252. diff, err := CommentAsDiff(c)
  1253. if err != nil {
  1254. log.Warn("CommentMustAsDiff: %v", err)
  1255. }
  1256. return diff
  1257. }
  1258. // GetWhitespaceFlag returns git diff flag for treating whitespaces
  1259. func GetWhitespaceFlag(whitespaceBehavior string) git.TrustedCmdArgs {
  1260. whitespaceFlags := map[string]git.TrustedCmdArgs{
  1261. "ignore-all": {"-w"},
  1262. "ignore-change": {"-b"},
  1263. "ignore-eol": {"--ignore-space-at-eol"},
  1264. "show-all": nil,
  1265. }
  1266. if flag, ok := whitespaceFlags[whitespaceBehavior]; ok {
  1267. return flag
  1268. }
  1269. log.Warn("unknown whitespace behavior: %q, default to 'show-all'", whitespaceBehavior)
  1270. return nil
  1271. }