You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

gitdiff.go 41KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340
  1. // Copyright 2014 The Gogs Authors. All rights reserved.
  2. // Copyright 2019 The Gitea Authors. All rights reserved.
  3. // SPDX-License-Identifier: MIT
  4. package gitdiff
  5. import (
  6. "bufio"
  7. "bytes"
  8. "context"
  9. "fmt"
  10. "html"
  11. "html/template"
  12. "io"
  13. "net/url"
  14. "os"
  15. "sort"
  16. "strings"
  17. "time"
  18. "code.gitea.io/gitea/models/db"
  19. git_model "code.gitea.io/gitea/models/git"
  20. issues_model "code.gitea.io/gitea/models/issues"
  21. pull_model "code.gitea.io/gitea/models/pull"
  22. user_model "code.gitea.io/gitea/models/user"
  23. "code.gitea.io/gitea/modules/analyze"
  24. "code.gitea.io/gitea/modules/base"
  25. "code.gitea.io/gitea/modules/charset"
  26. "code.gitea.io/gitea/modules/git"
  27. "code.gitea.io/gitea/modules/highlight"
  28. "code.gitea.io/gitea/modules/lfs"
  29. "code.gitea.io/gitea/modules/log"
  30. "code.gitea.io/gitea/modules/setting"
  31. "code.gitea.io/gitea/modules/translation"
  32. "github.com/sergi/go-diff/diffmatchpatch"
  33. stdcharset "golang.org/x/net/html/charset"
  34. "golang.org/x/text/encoding"
  35. "golang.org/x/text/transform"
  36. )
  37. // DiffLineType represents the type of DiffLine.
  38. type DiffLineType uint8
  39. // DiffLineType possible values.
  40. const (
  41. DiffLinePlain DiffLineType = iota + 1
  42. DiffLineAdd
  43. DiffLineDel
  44. DiffLineSection
  45. )
  46. // DiffFileType represents the type of DiffFile.
  47. type DiffFileType uint8
  48. // DiffFileType possible values.
  49. const (
  50. DiffFileAdd DiffFileType = iota + 1
  51. DiffFileChange
  52. DiffFileDel
  53. DiffFileRename
  54. DiffFileCopy
  55. )
  56. // DiffLineExpandDirection represents the DiffLineSection expand direction
  57. type DiffLineExpandDirection uint8
  58. // DiffLineExpandDirection possible values.
  59. const (
  60. DiffLineExpandNone DiffLineExpandDirection = iota + 1
  61. DiffLineExpandSingle
  62. DiffLineExpandUpDown
  63. DiffLineExpandUp
  64. DiffLineExpandDown
  65. )
  66. // DiffLine represents a line difference in a DiffSection.
  67. type DiffLine struct {
  68. LeftIdx int
  69. RightIdx int
  70. Match int
  71. Type DiffLineType
  72. Content string
  73. Comments []*issues_model.Comment
  74. SectionInfo *DiffLineSectionInfo
  75. }
  76. // DiffLineSectionInfo represents diff line section meta data
  77. type DiffLineSectionInfo struct {
  78. Path string
  79. LastLeftIdx int
  80. LastRightIdx int
  81. LeftIdx int
  82. RightIdx int
  83. LeftHunkSize int
  84. RightHunkSize int
  85. }
  86. // BlobExcerptChunkSize represent max lines of excerpt
  87. const BlobExcerptChunkSize = 20
  88. // GetType returns the type of DiffLine.
  89. func (d *DiffLine) GetType() int {
  90. return int(d.Type)
  91. }
  92. // CanComment returns whether a line can get commented
  93. func (d *DiffLine) CanComment() bool {
  94. return len(d.Comments) == 0 && d.Type != DiffLineSection
  95. }
  96. // GetCommentSide returns the comment side of the first comment, if not set returns empty string
  97. func (d *DiffLine) GetCommentSide() string {
  98. if len(d.Comments) == 0 {
  99. return ""
  100. }
  101. return d.Comments[0].DiffSide()
  102. }
  103. // GetLineTypeMarker returns the line type marker
  104. func (d *DiffLine) GetLineTypeMarker() string {
  105. if strings.IndexByte(" +-", d.Content[0]) > -1 {
  106. return d.Content[0:1]
  107. }
  108. return ""
  109. }
  110. // GetBlobExcerptQuery builds query string to get blob excerpt
  111. func (d *DiffLine) GetBlobExcerptQuery() string {
  112. query := fmt.Sprintf(
  113. "last_left=%d&last_right=%d&"+
  114. "left=%d&right=%d&"+
  115. "left_hunk_size=%d&right_hunk_size=%d&"+
  116. "path=%s",
  117. d.SectionInfo.LastLeftIdx, d.SectionInfo.LastRightIdx,
  118. d.SectionInfo.LeftIdx, d.SectionInfo.RightIdx,
  119. d.SectionInfo.LeftHunkSize, d.SectionInfo.RightHunkSize,
  120. url.QueryEscape(d.SectionInfo.Path))
  121. return query
  122. }
  123. // GetExpandDirection gets DiffLineExpandDirection
  124. func (d *DiffLine) GetExpandDirection() DiffLineExpandDirection {
  125. if d.Type != DiffLineSection || d.SectionInfo == nil || d.SectionInfo.RightIdx-d.SectionInfo.LastRightIdx <= 1 {
  126. return DiffLineExpandNone
  127. }
  128. if d.SectionInfo.LastLeftIdx <= 0 && d.SectionInfo.LastRightIdx <= 0 {
  129. return DiffLineExpandUp
  130. } else if d.SectionInfo.RightIdx-d.SectionInfo.LastRightIdx > BlobExcerptChunkSize && d.SectionInfo.RightHunkSize > 0 {
  131. return DiffLineExpandUpDown
  132. } else if d.SectionInfo.LeftHunkSize <= 0 && d.SectionInfo.RightHunkSize <= 0 {
  133. return DiffLineExpandDown
  134. }
  135. return DiffLineExpandSingle
  136. }
  137. func getDiffLineSectionInfo(treePath, line string, lastLeftIdx, lastRightIdx int) *DiffLineSectionInfo {
  138. leftLine, leftHunk, rightLine, righHunk := git.ParseDiffHunkString(line)
  139. return &DiffLineSectionInfo{
  140. Path: treePath,
  141. LastLeftIdx: lastLeftIdx,
  142. LastRightIdx: lastRightIdx,
  143. LeftIdx: leftLine,
  144. RightIdx: rightLine,
  145. LeftHunkSize: leftHunk,
  146. RightHunkSize: righHunk,
  147. }
  148. }
  149. // escape a line's content or return <br> needed for copy/paste purposes
  150. func getLineContent(content string, locale translation.Locale) DiffInline {
  151. if len(content) > 0 {
  152. return DiffInlineWithUnicodeEscape(template.HTML(html.EscapeString(content)), locale)
  153. }
  154. return DiffInline{EscapeStatus: &charset.EscapeStatus{}, Content: "<br>"}
  155. }
  156. // DiffSection represents a section of a DiffFile.
  157. type DiffSection struct {
  158. file *DiffFile
  159. FileName string
  160. Name string
  161. Lines []*DiffLine
  162. }
  163. var (
  164. addedCodePrefix = []byte(`<span class="added-code">`)
  165. removedCodePrefix = []byte(`<span class="removed-code">`)
  166. codeTagSuffix = []byte(`</span>`)
  167. )
  168. func diffToHTML(lineWrapperTags []string, diffs []diffmatchpatch.Diff, lineType DiffLineType) string {
  169. buf := bytes.NewBuffer(nil)
  170. // restore the line wrapper tags <span class="line"> and <span class="cl">, if necessary
  171. for _, tag := range lineWrapperTags {
  172. buf.WriteString(tag)
  173. }
  174. for _, diff := range diffs {
  175. switch {
  176. case diff.Type == diffmatchpatch.DiffEqual:
  177. buf.WriteString(diff.Text)
  178. case diff.Type == diffmatchpatch.DiffInsert && lineType == DiffLineAdd:
  179. buf.Write(addedCodePrefix)
  180. buf.WriteString(diff.Text)
  181. buf.Write(codeTagSuffix)
  182. case diff.Type == diffmatchpatch.DiffDelete && lineType == DiffLineDel:
  183. buf.Write(removedCodePrefix)
  184. buf.WriteString(diff.Text)
  185. buf.Write(codeTagSuffix)
  186. }
  187. }
  188. for range lineWrapperTags {
  189. buf.WriteString("</span>")
  190. }
  191. return buf.String()
  192. }
  193. // GetLine gets a specific line by type (add or del) and file line number
  194. func (diffSection *DiffSection) GetLine(lineType DiffLineType, idx int) *DiffLine {
  195. var (
  196. difference = 0
  197. addCount = 0
  198. delCount = 0
  199. matchDiffLine *DiffLine
  200. )
  201. LOOP:
  202. for _, diffLine := range diffSection.Lines {
  203. switch diffLine.Type {
  204. case DiffLineAdd:
  205. addCount++
  206. case DiffLineDel:
  207. delCount++
  208. default:
  209. if matchDiffLine != nil {
  210. break LOOP
  211. }
  212. difference = diffLine.RightIdx - diffLine.LeftIdx
  213. addCount = 0
  214. delCount = 0
  215. }
  216. switch lineType {
  217. case DiffLineDel:
  218. if diffLine.RightIdx == 0 && diffLine.LeftIdx == idx-difference {
  219. matchDiffLine = diffLine
  220. }
  221. case DiffLineAdd:
  222. if diffLine.LeftIdx == 0 && diffLine.RightIdx == idx+difference {
  223. matchDiffLine = diffLine
  224. }
  225. }
  226. }
  227. if addCount == delCount {
  228. return matchDiffLine
  229. }
  230. return nil
  231. }
  232. var diffMatchPatch = diffmatchpatch.New()
  233. func init() {
  234. diffMatchPatch.DiffEditCost = 100
  235. }
  236. // DiffInline is a struct that has a content and escape status
  237. type DiffInline struct {
  238. EscapeStatus *charset.EscapeStatus
  239. Content template.HTML
  240. }
  241. // DiffInlineWithUnicodeEscape makes a DiffInline with hidden unicode characters escaped
  242. func DiffInlineWithUnicodeEscape(s template.HTML, locale translation.Locale) DiffInline {
  243. status, content := charset.EscapeControlHTML(string(s), locale)
  244. return DiffInline{EscapeStatus: status, Content: template.HTML(content)}
  245. }
  246. // DiffInlineWithHighlightCode makes a DiffInline with code highlight and hidden unicode characters escaped
  247. func DiffInlineWithHighlightCode(fileName, language, code string, locale translation.Locale) DiffInline {
  248. highlighted, _ := highlight.Code(fileName, language, code)
  249. status, content := charset.EscapeControlHTML(highlighted, locale)
  250. return DiffInline{EscapeStatus: status, Content: template.HTML(content)}
  251. }
  252. // GetComputedInlineDiffFor computes inline diff for the given line.
  253. func (diffSection *DiffSection) GetComputedInlineDiffFor(diffLine *DiffLine, locale translation.Locale) DiffInline {
  254. if setting.Git.DisableDiffHighlight {
  255. return getLineContent(diffLine.Content[1:], locale)
  256. }
  257. var (
  258. compareDiffLine *DiffLine
  259. diff1 string
  260. diff2 string
  261. )
  262. language := ""
  263. if diffSection.file != nil {
  264. language = diffSection.file.Language
  265. }
  266. // try to find equivalent diff line. ignore, otherwise
  267. switch diffLine.Type {
  268. case DiffLineSection:
  269. return getLineContent(diffLine.Content[1:], locale)
  270. case DiffLineAdd:
  271. compareDiffLine = diffSection.GetLine(DiffLineDel, diffLine.RightIdx)
  272. if compareDiffLine == nil {
  273. return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:], locale)
  274. }
  275. diff1 = compareDiffLine.Content
  276. diff2 = diffLine.Content
  277. case DiffLineDel:
  278. compareDiffLine = diffSection.GetLine(DiffLineAdd, diffLine.LeftIdx)
  279. if compareDiffLine == nil {
  280. return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:], locale)
  281. }
  282. diff1 = diffLine.Content
  283. diff2 = compareDiffLine.Content
  284. default:
  285. if strings.IndexByte(" +-", diffLine.Content[0]) > -1 {
  286. return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content[1:], locale)
  287. }
  288. return DiffInlineWithHighlightCode(diffSection.FileName, language, diffLine.Content, locale)
  289. }
  290. hcd := newHighlightCodeDiff()
  291. diffRecord := hcd.diffWithHighlight(diffSection.FileName, language, diff1[1:], diff2[1:])
  292. // it seems that Gitea doesn't need the line wrapper of Chroma, so do not add them back
  293. // if the line wrappers are still needed in the future, it can be added back by "diffToHTML(hcd.lineWrapperTags. ...)"
  294. diffHTML := diffToHTML(nil, diffRecord, diffLine.Type)
  295. return DiffInlineWithUnicodeEscape(template.HTML(diffHTML), locale)
  296. }
  297. // DiffFile represents a file diff.
  298. type DiffFile struct {
  299. Name string
  300. NameHash string
  301. OldName string
  302. Index int
  303. Addition, Deletion int
  304. Type DiffFileType
  305. IsCreated bool
  306. IsDeleted bool
  307. IsBin bool
  308. IsLFSFile bool
  309. IsRenamed bool
  310. IsAmbiguous bool
  311. IsSubmodule bool
  312. Sections []*DiffSection
  313. IsIncomplete bool
  314. IsIncompleteLineTooLong bool
  315. IsProtected bool
  316. IsGenerated bool
  317. IsVendored bool
  318. IsViewed bool // User specific
  319. HasChangedSinceLastReview bool // User specific
  320. Language string
  321. }
  322. // GetType returns type of diff file.
  323. func (diffFile *DiffFile) GetType() int {
  324. return int(diffFile.Type)
  325. }
  326. // GetTailSection creates a fake DiffLineSection if the last section is not the end of the file
  327. func (diffFile *DiffFile) GetTailSection(gitRepo *git.Repository, leftCommitID, rightCommitID string) *DiffSection {
  328. if len(diffFile.Sections) == 0 || diffFile.Type != DiffFileChange || diffFile.IsBin || diffFile.IsLFSFile {
  329. return nil
  330. }
  331. leftCommit, err := gitRepo.GetCommit(leftCommitID)
  332. if err != nil {
  333. return nil
  334. }
  335. rightCommit, err := gitRepo.GetCommit(rightCommitID)
  336. if err != nil {
  337. return nil
  338. }
  339. lastSection := diffFile.Sections[len(diffFile.Sections)-1]
  340. lastLine := lastSection.Lines[len(lastSection.Lines)-1]
  341. leftLineCount := getCommitFileLineCount(leftCommit, diffFile.Name)
  342. rightLineCount := getCommitFileLineCount(rightCommit, diffFile.Name)
  343. if leftLineCount <= lastLine.LeftIdx || rightLineCount <= lastLine.RightIdx {
  344. return nil
  345. }
  346. tailDiffLine := &DiffLine{
  347. Type: DiffLineSection,
  348. Content: " ",
  349. SectionInfo: &DiffLineSectionInfo{
  350. Path: diffFile.Name,
  351. LastLeftIdx: lastLine.LeftIdx,
  352. LastRightIdx: lastLine.RightIdx,
  353. LeftIdx: leftLineCount,
  354. RightIdx: rightLineCount,
  355. },
  356. }
  357. tailSection := &DiffSection{FileName: diffFile.Name, Lines: []*DiffLine{tailDiffLine}}
  358. return tailSection
  359. }
  360. // GetDiffFileName returns the name of the diff file, or its old name in case it was deleted
  361. func (diffFile *DiffFile) GetDiffFileName() string {
  362. if diffFile.Name == "" {
  363. return diffFile.OldName
  364. }
  365. return diffFile.Name
  366. }
  367. func (diffFile *DiffFile) ShouldBeHidden() bool {
  368. return diffFile.IsGenerated || diffFile.IsViewed
  369. }
  370. func getCommitFileLineCount(commit *git.Commit, filePath string) int {
  371. blob, err := commit.GetBlobByPath(filePath)
  372. if err != nil {
  373. return 0
  374. }
  375. lineCount, err := blob.GetBlobLineCount()
  376. if err != nil {
  377. return 0
  378. }
  379. return lineCount
  380. }
  381. // Diff represents a difference between two git trees.
  382. type Diff struct {
  383. Start, End string
  384. NumFiles int
  385. TotalAddition, TotalDeletion int
  386. Files []*DiffFile
  387. IsIncomplete bool
  388. NumViewedFiles int // user-specific
  389. }
  390. // LoadComments loads comments into each line
  391. func (diff *Diff) LoadComments(ctx context.Context, issue *issues_model.Issue, currentUser *user_model.User) error {
  392. allComments, err := issues_model.FetchCodeComments(ctx, issue, currentUser)
  393. if err != nil {
  394. return err
  395. }
  396. for _, file := range diff.Files {
  397. if lineCommits, ok := allComments[file.Name]; ok {
  398. for _, section := range file.Sections {
  399. for _, line := range section.Lines {
  400. if comments, ok := lineCommits[int64(line.LeftIdx*-1)]; ok {
  401. line.Comments = append(line.Comments, comments...)
  402. }
  403. if comments, ok := lineCommits[int64(line.RightIdx)]; ok {
  404. line.Comments = append(line.Comments, comments...)
  405. }
  406. sort.SliceStable(line.Comments, func(i, j int) bool {
  407. return line.Comments[i].CreatedUnix < line.Comments[j].CreatedUnix
  408. })
  409. }
  410. }
  411. }
  412. }
  413. return nil
  414. }
  415. const cmdDiffHead = "diff --git "
  416. // ParsePatch builds a Diff object from a io.Reader and some parameters.
  417. func ParsePatch(maxLines, maxLineCharacters, maxFiles int, reader io.Reader, skipToFile string) (*Diff, error) {
  418. log.Debug("ParsePatch(%d, %d, %d, ..., %s)", maxLines, maxLineCharacters, maxFiles, skipToFile)
  419. var curFile *DiffFile
  420. skipping := skipToFile != ""
  421. diff := &Diff{Files: make([]*DiffFile, 0)}
  422. sb := strings.Builder{}
  423. // OK let's set a reasonable buffer size.
  424. // This should be let's say at least the size of maxLineCharacters or 4096 whichever is larger.
  425. readerSize := maxLineCharacters
  426. if readerSize < 4096 {
  427. readerSize = 4096
  428. }
  429. input := bufio.NewReaderSize(reader, readerSize)
  430. line, err := input.ReadString('\n')
  431. if err != nil {
  432. if err == io.EOF {
  433. return diff, nil
  434. }
  435. return diff, err
  436. }
  437. parsingLoop:
  438. for {
  439. // 1. A patch file always begins with `diff --git ` + `a/path b/path` (possibly quoted)
  440. // if it does not we have bad input!
  441. if !strings.HasPrefix(line, cmdDiffHead) {
  442. return diff, fmt.Errorf("invalid first file line: %s", line)
  443. }
  444. if maxFiles > -1 && len(diff.Files) >= maxFiles {
  445. lastFile := createDiffFile(diff, line)
  446. diff.End = lastFile.Name
  447. diff.IsIncomplete = true
  448. _, err := io.Copy(io.Discard, reader)
  449. if err != nil {
  450. // By the definition of io.Copy this never returns io.EOF
  451. return diff, fmt.Errorf("error during io.Copy: %w", err)
  452. }
  453. break parsingLoop
  454. }
  455. curFile = createDiffFile(diff, line)
  456. if skipping {
  457. if curFile.Name != skipToFile {
  458. line, err = skipToNextDiffHead(input)
  459. if err != nil {
  460. if err == io.EOF {
  461. return diff, nil
  462. }
  463. return diff, err
  464. }
  465. continue
  466. }
  467. skipping = false
  468. }
  469. diff.Files = append(diff.Files, curFile)
  470. // 2. It is followed by one or more extended header lines:
  471. //
  472. // old mode <mode>
  473. // new mode <mode>
  474. // deleted file mode <mode>
  475. // new file mode <mode>
  476. // copy from <path>
  477. // copy to <path>
  478. // rename from <path>
  479. // rename to <path>
  480. // similarity index <number>
  481. // dissimilarity index <number>
  482. // index <hash>..<hash> <mode>
  483. //
  484. // * <mode> 6-digit octal numbers including the file type and file permission bits.
  485. // * <path> does not include the a/ and b/ prefixes
  486. // * <number> percentage of unchanged lines for similarity, percentage of changed
  487. // lines dissimilarity as integer rounded down with terminal %. 100% => equal files.
  488. // * The index line includes the blob object names before and after the change.
  489. // The <mode> is included if the file mode does not change; otherwise, separate
  490. // lines indicate the old and the new mode.
  491. // 3. Following this header the "standard unified" diff format header may be encountered: (but not for every case...)
  492. //
  493. // --- a/<path>
  494. // +++ b/<path>
  495. //
  496. // With multiple hunks
  497. //
  498. // @@ <hunk descriptor> @@
  499. // +added line
  500. // -removed line
  501. // unchanged line
  502. //
  503. // 4. Binary files get:
  504. //
  505. // Binary files a/<path> and b/<path> differ
  506. //
  507. // but one of a/<path> and b/<path> could be /dev/null.
  508. curFileLoop:
  509. for {
  510. line, err = input.ReadString('\n')
  511. if err != nil {
  512. if err != io.EOF {
  513. return diff, err
  514. }
  515. break parsingLoop
  516. }
  517. switch {
  518. case strings.HasPrefix(line, cmdDiffHead):
  519. break curFileLoop
  520. case strings.HasPrefix(line, "old mode ") ||
  521. strings.HasPrefix(line, "new mode "):
  522. if strings.HasSuffix(line, " 160000\n") {
  523. curFile.IsSubmodule = true
  524. }
  525. case strings.HasPrefix(line, "rename from "):
  526. curFile.IsRenamed = true
  527. curFile.Type = DiffFileRename
  528. if curFile.IsAmbiguous {
  529. curFile.OldName = line[len("rename from ") : len(line)-1]
  530. }
  531. case strings.HasPrefix(line, "rename to "):
  532. curFile.IsRenamed = true
  533. curFile.Type = DiffFileRename
  534. if curFile.IsAmbiguous {
  535. curFile.Name = line[len("rename to ") : len(line)-1]
  536. curFile.IsAmbiguous = false
  537. }
  538. case strings.HasPrefix(line, "copy from "):
  539. curFile.IsRenamed = true
  540. curFile.Type = DiffFileCopy
  541. if curFile.IsAmbiguous {
  542. curFile.OldName = line[len("copy from ") : len(line)-1]
  543. }
  544. case strings.HasPrefix(line, "copy to "):
  545. curFile.IsRenamed = true
  546. curFile.Type = DiffFileCopy
  547. if curFile.IsAmbiguous {
  548. curFile.Name = line[len("copy to ") : len(line)-1]
  549. curFile.IsAmbiguous = false
  550. }
  551. case strings.HasPrefix(line, "new file"):
  552. curFile.Type = DiffFileAdd
  553. curFile.IsCreated = true
  554. if strings.HasSuffix(line, " 160000\n") {
  555. curFile.IsSubmodule = true
  556. }
  557. case strings.HasPrefix(line, "deleted"):
  558. curFile.Type = DiffFileDel
  559. curFile.IsDeleted = true
  560. if strings.HasSuffix(line, " 160000\n") {
  561. curFile.IsSubmodule = true
  562. }
  563. case strings.HasPrefix(line, "index"):
  564. if strings.HasSuffix(line, " 160000\n") {
  565. curFile.IsSubmodule = true
  566. }
  567. case strings.HasPrefix(line, "similarity index 100%"):
  568. curFile.Type = DiffFileRename
  569. case strings.HasPrefix(line, "Binary"):
  570. curFile.IsBin = true
  571. case strings.HasPrefix(line, "--- "):
  572. // Handle ambiguous filenames
  573. if curFile.IsAmbiguous {
  574. // The shortest string that can end up here is:
  575. // "--- a\t\n" without the quotes.
  576. // This line has a len() of 7 but doesn't contain a oldName.
  577. // So the amount that the line need is at least 8 or more.
  578. // The code will otherwise panic for a out-of-bounds.
  579. if len(line) > 7 && line[4] == 'a' {
  580. curFile.OldName = line[6 : len(line)-1]
  581. if line[len(line)-2] == '\t' {
  582. curFile.OldName = curFile.OldName[:len(curFile.OldName)-1]
  583. }
  584. } else {
  585. curFile.OldName = ""
  586. }
  587. }
  588. // Otherwise do nothing with this line
  589. case strings.HasPrefix(line, "+++ "):
  590. // Handle ambiguous filenames
  591. if curFile.IsAmbiguous {
  592. if len(line) > 6 && line[4] == 'b' {
  593. curFile.Name = line[6 : len(line)-1]
  594. if line[len(line)-2] == '\t' {
  595. curFile.Name = curFile.Name[:len(curFile.Name)-1]
  596. }
  597. if curFile.OldName == "" {
  598. curFile.OldName = curFile.Name
  599. }
  600. } else {
  601. curFile.Name = curFile.OldName
  602. }
  603. curFile.IsAmbiguous = false
  604. }
  605. // Otherwise do nothing with this line, but now switch to parsing hunks
  606. lineBytes, isFragment, err := parseHunks(curFile, maxLines, maxLineCharacters, input)
  607. diff.TotalAddition += curFile.Addition
  608. diff.TotalDeletion += curFile.Deletion
  609. if err != nil {
  610. if err != io.EOF {
  611. return diff, err
  612. }
  613. break parsingLoop
  614. }
  615. sb.Reset()
  616. _, _ = sb.Write(lineBytes)
  617. for isFragment {
  618. lineBytes, isFragment, err = input.ReadLine()
  619. if err != nil {
  620. // Now by the definition of ReadLine this cannot be io.EOF
  621. return diff, fmt.Errorf("unable to ReadLine: %w", err)
  622. }
  623. _, _ = sb.Write(lineBytes)
  624. }
  625. line = sb.String()
  626. sb.Reset()
  627. break curFileLoop
  628. }
  629. }
  630. }
  631. // TODO: There are numerous issues with this:
  632. // - we might want to consider detecting encoding while parsing but...
  633. // - we're likely to fail to get the correct encoding here anyway as we won't have enough information
  634. diffLineTypeBuffers := make(map[DiffLineType]*bytes.Buffer, 3)
  635. diffLineTypeDecoders := make(map[DiffLineType]*encoding.Decoder, 3)
  636. diffLineTypeBuffers[DiffLinePlain] = new(bytes.Buffer)
  637. diffLineTypeBuffers[DiffLineAdd] = new(bytes.Buffer)
  638. diffLineTypeBuffers[DiffLineDel] = new(bytes.Buffer)
  639. for _, f := range diff.Files {
  640. f.NameHash = base.EncodeSha1(f.Name)
  641. for _, buffer := range diffLineTypeBuffers {
  642. buffer.Reset()
  643. }
  644. for _, sec := range f.Sections {
  645. for _, l := range sec.Lines {
  646. if l.Type == DiffLineSection {
  647. continue
  648. }
  649. diffLineTypeBuffers[l.Type].WriteString(l.Content[1:])
  650. diffLineTypeBuffers[l.Type].WriteString("\n")
  651. }
  652. }
  653. for lineType, buffer := range diffLineTypeBuffers {
  654. diffLineTypeDecoders[lineType] = nil
  655. if buffer.Len() == 0 {
  656. continue
  657. }
  658. charsetLabel, err := charset.DetectEncoding(buffer.Bytes())
  659. if charsetLabel != "UTF-8" && err == nil {
  660. encoding, _ := stdcharset.Lookup(charsetLabel)
  661. if encoding != nil {
  662. diffLineTypeDecoders[lineType] = encoding.NewDecoder()
  663. }
  664. }
  665. }
  666. for _, sec := range f.Sections {
  667. for _, l := range sec.Lines {
  668. decoder := diffLineTypeDecoders[l.Type]
  669. if decoder != nil {
  670. if c, _, err := transform.String(decoder, l.Content[1:]); err == nil {
  671. l.Content = l.Content[0:1] + c
  672. }
  673. }
  674. }
  675. }
  676. }
  677. diff.NumFiles = len(diff.Files)
  678. return diff, nil
  679. }
  680. func skipToNextDiffHead(input *bufio.Reader) (line string, err error) {
  681. // need to skip until the next cmdDiffHead
  682. var isFragment, wasFragment bool
  683. var lineBytes []byte
  684. for {
  685. lineBytes, isFragment, err = input.ReadLine()
  686. if err != nil {
  687. return
  688. }
  689. if wasFragment {
  690. wasFragment = isFragment
  691. continue
  692. }
  693. if bytes.HasPrefix(lineBytes, []byte(cmdDiffHead)) {
  694. break
  695. }
  696. wasFragment = isFragment
  697. }
  698. line = string(lineBytes)
  699. if isFragment {
  700. var tail string
  701. tail, err = input.ReadString('\n')
  702. if err != nil {
  703. return
  704. }
  705. line += tail
  706. }
  707. return line, err
  708. }
  709. func parseHunks(curFile *DiffFile, maxLines, maxLineCharacters int, input *bufio.Reader) (lineBytes []byte, isFragment bool, err error) {
  710. sb := strings.Builder{}
  711. var (
  712. curSection *DiffSection
  713. curFileLinesCount int
  714. curFileLFSPrefix bool
  715. )
  716. lastLeftIdx := -1
  717. leftLine, rightLine := 1, 1
  718. for {
  719. for isFragment {
  720. curFile.IsIncomplete = true
  721. curFile.IsIncompleteLineTooLong = true
  722. _, isFragment, err = input.ReadLine()
  723. if err != nil {
  724. // Now by the definition of ReadLine this cannot be io.EOF
  725. err = fmt.Errorf("unable to ReadLine: %w", err)
  726. return
  727. }
  728. }
  729. sb.Reset()
  730. lineBytes, isFragment, err = input.ReadLine()
  731. if err != nil {
  732. if err == io.EOF {
  733. return
  734. }
  735. err = fmt.Errorf("unable to ReadLine: %w", err)
  736. return
  737. }
  738. if lineBytes[0] == 'd' {
  739. // End of hunks
  740. return
  741. }
  742. switch lineBytes[0] {
  743. case '@':
  744. if maxLines > -1 && curFileLinesCount >= maxLines {
  745. curFile.IsIncomplete = true
  746. continue
  747. }
  748. _, _ = sb.Write(lineBytes)
  749. for isFragment {
  750. // This is very odd indeed - we're in a section header and the line is too long
  751. // This really shouldn't happen...
  752. lineBytes, isFragment, err = input.ReadLine()
  753. if err != nil {
  754. // Now by the definition of ReadLine this cannot be io.EOF
  755. err = fmt.Errorf("unable to ReadLine: %w", err)
  756. return
  757. }
  758. _, _ = sb.Write(lineBytes)
  759. }
  760. line := sb.String()
  761. // Create a new section to represent this hunk
  762. curSection = &DiffSection{file: curFile}
  763. lastLeftIdx = -1
  764. curFile.Sections = append(curFile.Sections, curSection)
  765. lineSectionInfo := getDiffLineSectionInfo(curFile.Name, line, leftLine-1, rightLine-1)
  766. diffLine := &DiffLine{
  767. Type: DiffLineSection,
  768. Content: line,
  769. SectionInfo: lineSectionInfo,
  770. }
  771. curSection.Lines = append(curSection.Lines, diffLine)
  772. curSection.FileName = curFile.Name
  773. // update line number.
  774. leftLine = lineSectionInfo.LeftIdx
  775. rightLine = lineSectionInfo.RightIdx
  776. continue
  777. case '\\':
  778. if maxLines > -1 && curFileLinesCount >= maxLines {
  779. curFile.IsIncomplete = true
  780. continue
  781. }
  782. // This is used only to indicate that the current file does not have a terminal newline
  783. if !bytes.Equal(lineBytes, []byte("\\ No newline at end of file")) {
  784. err = fmt.Errorf("unexpected line in hunk: %s", string(lineBytes))
  785. return
  786. }
  787. // Technically this should be the end the file!
  788. // FIXME: we should be putting a marker at the end of the file if there is no terminal new line
  789. continue
  790. case '+':
  791. curFileLinesCount++
  792. curFile.Addition++
  793. if maxLines > -1 && curFileLinesCount >= maxLines {
  794. curFile.IsIncomplete = true
  795. continue
  796. }
  797. diffLine := &DiffLine{Type: DiffLineAdd, RightIdx: rightLine, Match: -1}
  798. rightLine++
  799. if curSection == nil {
  800. // Create a new section to represent this hunk
  801. curSection = &DiffSection{file: curFile}
  802. curFile.Sections = append(curFile.Sections, curSection)
  803. lastLeftIdx = -1
  804. }
  805. if lastLeftIdx > -1 {
  806. diffLine.Match = lastLeftIdx
  807. curSection.Lines[lastLeftIdx].Match = len(curSection.Lines)
  808. lastLeftIdx++
  809. if lastLeftIdx >= len(curSection.Lines) || curSection.Lines[lastLeftIdx].Type != DiffLineDel {
  810. lastLeftIdx = -1
  811. }
  812. }
  813. curSection.Lines = append(curSection.Lines, diffLine)
  814. case '-':
  815. curFileLinesCount++
  816. curFile.Deletion++
  817. if maxLines > -1 && curFileLinesCount >= maxLines {
  818. curFile.IsIncomplete = true
  819. continue
  820. }
  821. diffLine := &DiffLine{Type: DiffLineDel, LeftIdx: leftLine, Match: -1}
  822. if leftLine > 0 {
  823. leftLine++
  824. }
  825. if curSection == nil {
  826. // Create a new section to represent this hunk
  827. curSection = &DiffSection{file: curFile}
  828. curFile.Sections = append(curFile.Sections, curSection)
  829. lastLeftIdx = -1
  830. }
  831. if len(curSection.Lines) == 0 || curSection.Lines[len(curSection.Lines)-1].Type != DiffLineDel {
  832. lastLeftIdx = len(curSection.Lines)
  833. }
  834. curSection.Lines = append(curSection.Lines, diffLine)
  835. case ' ':
  836. curFileLinesCount++
  837. if maxLines > -1 && curFileLinesCount >= maxLines {
  838. curFile.IsIncomplete = true
  839. continue
  840. }
  841. diffLine := &DiffLine{Type: DiffLinePlain, LeftIdx: leftLine, RightIdx: rightLine}
  842. leftLine++
  843. rightLine++
  844. lastLeftIdx = -1
  845. if curSection == nil {
  846. // Create a new section to represent this hunk
  847. curSection = &DiffSection{file: curFile}
  848. curFile.Sections = append(curFile.Sections, curSection)
  849. }
  850. curSection.Lines = append(curSection.Lines, diffLine)
  851. default:
  852. // This is unexpected
  853. err = fmt.Errorf("unexpected line in hunk: %s", string(lineBytes))
  854. return
  855. }
  856. line := string(lineBytes)
  857. if isFragment {
  858. curFile.IsIncomplete = true
  859. curFile.IsIncompleteLineTooLong = true
  860. for isFragment {
  861. lineBytes, isFragment, err = input.ReadLine()
  862. if err != nil {
  863. // Now by the definition of ReadLine this cannot be io.EOF
  864. err = fmt.Errorf("unable to ReadLine: %w", err)
  865. return
  866. }
  867. }
  868. }
  869. if len(line) > maxLineCharacters {
  870. curFile.IsIncomplete = true
  871. curFile.IsIncompleteLineTooLong = true
  872. line = line[:maxLineCharacters]
  873. }
  874. curSection.Lines[len(curSection.Lines)-1].Content = line
  875. // handle LFS
  876. if line[1:] == lfs.MetaFileIdentifier {
  877. curFileLFSPrefix = true
  878. } else if curFileLFSPrefix && strings.HasPrefix(line[1:], lfs.MetaFileOidPrefix) {
  879. oid := strings.TrimPrefix(line[1:], lfs.MetaFileOidPrefix)
  880. if len(oid) == 64 {
  881. m := &git_model.LFSMetaObject{Pointer: lfs.Pointer{Oid: oid}}
  882. count, err := db.CountByBean(db.DefaultContext, m)
  883. if err == nil && count > 0 {
  884. curFile.IsBin = true
  885. curFile.IsLFSFile = true
  886. curSection.Lines = nil
  887. lastLeftIdx = -1
  888. }
  889. }
  890. }
  891. }
  892. }
  893. func createDiffFile(diff *Diff, line string) *DiffFile {
  894. // The a/ and b/ filenames are the same unless rename/copy is involved.
  895. // Especially, even for a creation or a deletion, /dev/null is not used
  896. // in place of the a/ or b/ filenames.
  897. //
  898. // When rename/copy is involved, file1 and file2 show the name of the
  899. // source file of the rename/copy and the name of the file that rename/copy
  900. // produces, respectively.
  901. //
  902. // Path names are quoted if necessary.
  903. //
  904. // This means that you should always be able to determine the file name even when there
  905. // there is potential ambiguity...
  906. //
  907. // but we can be simpler with our heuristics by just forcing git to prefix things nicely
  908. curFile := &DiffFile{
  909. Index: len(diff.Files) + 1,
  910. Type: DiffFileChange,
  911. Sections: make([]*DiffSection, 0, 10),
  912. }
  913. rd := strings.NewReader(line[len(cmdDiffHead):] + " ")
  914. curFile.Type = DiffFileChange
  915. var oldNameAmbiguity, newNameAmbiguity bool
  916. curFile.OldName, oldNameAmbiguity = readFileName(rd)
  917. curFile.Name, newNameAmbiguity = readFileName(rd)
  918. if oldNameAmbiguity && newNameAmbiguity {
  919. curFile.IsAmbiguous = true
  920. // OK we should bet that the oldName and the newName are the same if they can be made to be same
  921. // So we need to start again ...
  922. if (len(line)-len(cmdDiffHead)-1)%2 == 0 {
  923. // diff --git a/b b/b b/b b/b b/b b/b
  924. //
  925. midpoint := (len(line) + len(cmdDiffHead) - 1) / 2
  926. new, old := line[len(cmdDiffHead):midpoint], line[midpoint+1:]
  927. if len(new) > 2 && len(old) > 2 && new[2:] == old[2:] {
  928. curFile.OldName = old[2:]
  929. curFile.Name = old[2:]
  930. }
  931. }
  932. }
  933. curFile.IsRenamed = curFile.Name != curFile.OldName
  934. return curFile
  935. }
  936. func readFileName(rd *strings.Reader) (string, bool) {
  937. ambiguity := false
  938. var name string
  939. char, _ := rd.ReadByte()
  940. _ = rd.UnreadByte()
  941. if char == '"' {
  942. fmt.Fscanf(rd, "%q ", &name)
  943. if len(name) == 0 {
  944. log.Error("Reader has no file name: reader=%+v", rd)
  945. return "", true
  946. }
  947. if name[0] == '\\' {
  948. name = name[1:]
  949. }
  950. } else {
  951. // This technique is potentially ambiguous it may not be possible to uniquely identify the filenames from the diff line alone
  952. ambiguity = true
  953. fmt.Fscanf(rd, "%s ", &name)
  954. char, _ := rd.ReadByte()
  955. _ = rd.UnreadByte()
  956. for !(char == 0 || char == '"' || char == 'b') {
  957. var suffix string
  958. fmt.Fscanf(rd, "%s ", &suffix)
  959. name += " " + suffix
  960. char, _ = rd.ReadByte()
  961. _ = rd.UnreadByte()
  962. }
  963. }
  964. if len(name) < 2 {
  965. log.Error("Unable to determine name from reader: reader=%+v", rd)
  966. return "", true
  967. }
  968. return name[2:], ambiguity
  969. }
  970. // DiffOptions represents the options for a DiffRange
  971. type DiffOptions struct {
  972. BeforeCommitID string
  973. AfterCommitID string
  974. SkipTo string
  975. MaxLines int
  976. MaxLineCharacters int
  977. MaxFiles int
  978. WhitespaceBehavior git.CmdArg
  979. DirectComparison bool
  980. }
  981. // GetDiff builds a Diff between two commits of a repository.
  982. // Passing the empty string as beforeCommitID returns a diff from the parent commit.
  983. // The whitespaceBehavior is either an empty string or a git flag
  984. func GetDiff(gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) {
  985. repoPath := gitRepo.Path
  986. commit, err := gitRepo.GetCommit(opts.AfterCommitID)
  987. if err != nil {
  988. return nil, err
  989. }
  990. argsLength := 6
  991. if len(opts.WhitespaceBehavior) > 0 {
  992. argsLength++
  993. }
  994. if len(opts.SkipTo) > 0 {
  995. argsLength++
  996. }
  997. if len(files) > 0 {
  998. argsLength += len(files) + 1
  999. }
  1000. diffArgs := make([]git.CmdArg, 0, argsLength)
  1001. if (len(opts.BeforeCommitID) == 0 || opts.BeforeCommitID == git.EmptySHA) && commit.ParentCount() == 0 {
  1002. diffArgs = append(diffArgs, "diff", "--src-prefix=\\a/", "--dst-prefix=\\b/", "-M")
  1003. if len(opts.WhitespaceBehavior) != 0 {
  1004. diffArgs = append(diffArgs, opts.WhitespaceBehavior)
  1005. }
  1006. // append empty tree ref
  1007. diffArgs = append(diffArgs, "4b825dc642cb6eb9a060e54bf8d69288fbee4904")
  1008. diffArgs = append(diffArgs, git.CmdArgCheck(opts.AfterCommitID))
  1009. } else {
  1010. actualBeforeCommitID := opts.BeforeCommitID
  1011. if len(actualBeforeCommitID) == 0 {
  1012. parentCommit, _ := commit.Parent(0)
  1013. actualBeforeCommitID = parentCommit.ID.String()
  1014. }
  1015. diffArgs = append(diffArgs, "diff", "--src-prefix=\\a/", "--dst-prefix=\\b/", "-M")
  1016. if len(opts.WhitespaceBehavior) != 0 {
  1017. diffArgs = append(diffArgs, opts.WhitespaceBehavior)
  1018. }
  1019. diffArgs = append(diffArgs, git.CmdArgCheck(actualBeforeCommitID))
  1020. diffArgs = append(diffArgs, git.CmdArgCheck(opts.AfterCommitID))
  1021. opts.BeforeCommitID = actualBeforeCommitID
  1022. }
  1023. // In git 2.31, git diff learned --skip-to which we can use to shortcut skip to file
  1024. // so if we are using at least this version of git we don't have to tell ParsePatch to do
  1025. // the skipping for us
  1026. parsePatchSkipToFile := opts.SkipTo
  1027. if opts.SkipTo != "" && git.CheckGitVersionAtLeast("2.31") == nil {
  1028. diffArgs = append(diffArgs, git.CmdArg("--skip-to="+opts.SkipTo))
  1029. parsePatchSkipToFile = ""
  1030. }
  1031. if len(files) > 0 {
  1032. diffArgs = append(diffArgs, "--")
  1033. for _, file := range files {
  1034. diffArgs = append(diffArgs, git.CmdArg(file)) // it's safe to cast it to CmdArg because there is a "--" before
  1035. }
  1036. }
  1037. reader, writer := io.Pipe()
  1038. defer func() {
  1039. _ = reader.Close()
  1040. _ = writer.Close()
  1041. }()
  1042. go func(ctx context.Context, diffArgs []git.CmdArg, repoPath string, writer *io.PipeWriter) {
  1043. cmd := git.NewCommand(ctx, diffArgs...)
  1044. cmd.SetDescription(fmt.Sprintf("GetDiffRange [repo_path: %s]", repoPath))
  1045. if err := cmd.Run(&git.RunOpts{
  1046. Timeout: time.Duration(setting.Git.Timeout.Default) * time.Second,
  1047. Dir: repoPath,
  1048. Stderr: os.Stderr,
  1049. Stdout: writer,
  1050. }); err != nil {
  1051. log.Error("error during RunWithContext: %w", err)
  1052. }
  1053. _ = writer.Close()
  1054. }(gitRepo.Ctx, diffArgs, repoPath, writer)
  1055. diff, err := ParsePatch(opts.MaxLines, opts.MaxLineCharacters, opts.MaxFiles, reader, parsePatchSkipToFile)
  1056. if err != nil {
  1057. return nil, fmt.Errorf("unable to ParsePatch: %w", err)
  1058. }
  1059. diff.Start = opts.SkipTo
  1060. checker, deferable := gitRepo.CheckAttributeReader(opts.AfterCommitID)
  1061. defer deferable()
  1062. for _, diffFile := range diff.Files {
  1063. gotVendor := false
  1064. gotGenerated := false
  1065. if checker != nil {
  1066. attrs, err := checker.CheckPath(diffFile.Name)
  1067. if err == nil {
  1068. if vendored, has := attrs["linguist-vendored"]; has {
  1069. if vendored == "set" || vendored == "true" {
  1070. diffFile.IsVendored = true
  1071. gotVendor = true
  1072. } else {
  1073. gotVendor = vendored == "false"
  1074. }
  1075. }
  1076. if generated, has := attrs["linguist-generated"]; has {
  1077. if generated == "set" || generated == "true" {
  1078. diffFile.IsGenerated = true
  1079. gotGenerated = true
  1080. } else {
  1081. gotGenerated = generated == "false"
  1082. }
  1083. }
  1084. if language, has := attrs["linguist-language"]; has && language != "unspecified" && language != "" {
  1085. diffFile.Language = language
  1086. } else if language, has := attrs["gitlab-language"]; has && language != "unspecified" && language != "" {
  1087. diffFile.Language = language
  1088. }
  1089. }
  1090. }
  1091. if !gotVendor {
  1092. diffFile.IsVendored = analyze.IsVendor(diffFile.Name)
  1093. }
  1094. if !gotGenerated {
  1095. diffFile.IsGenerated = analyze.IsGenerated(diffFile.Name)
  1096. }
  1097. tailSection := diffFile.GetTailSection(gitRepo, opts.BeforeCommitID, opts.AfterCommitID)
  1098. if tailSection != nil {
  1099. diffFile.Sections = append(diffFile.Sections, tailSection)
  1100. }
  1101. }
  1102. separator := "..."
  1103. if opts.DirectComparison {
  1104. separator = ".."
  1105. }
  1106. shortstatArgs := []git.CmdArg{git.CmdArgCheck(opts.BeforeCommitID + separator + opts.AfterCommitID)}
  1107. if len(opts.BeforeCommitID) == 0 || opts.BeforeCommitID == git.EmptySHA {
  1108. shortstatArgs = []git.CmdArg{git.EmptyTreeSHA, git.CmdArgCheck(opts.AfterCommitID)}
  1109. }
  1110. diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, shortstatArgs...)
  1111. if err != nil && strings.Contains(err.Error(), "no merge base") {
  1112. // git >= 2.28 now returns an error if base and head have become unrelated.
  1113. // previously it would return the results of git diff --shortstat base head so let's try that...
  1114. shortstatArgs = []git.CmdArg{git.CmdArgCheck(opts.BeforeCommitID), git.CmdArgCheck(opts.AfterCommitID)}
  1115. diff.NumFiles, diff.TotalAddition, diff.TotalDeletion, err = git.GetDiffShortStat(gitRepo.Ctx, repoPath, shortstatArgs...)
  1116. }
  1117. if err != nil {
  1118. return nil, err
  1119. }
  1120. return diff, nil
  1121. }
  1122. // SyncAndGetUserSpecificDiff is like GetDiff, except that user specific data such as which files the given user has already viewed on the given PR will also be set
  1123. // Additionally, the database asynchronously is updated if files have changed since the last review
  1124. func SyncAndGetUserSpecificDiff(ctx context.Context, userID int64, pull *issues_model.PullRequest, gitRepo *git.Repository, opts *DiffOptions, files ...string) (*Diff, error) {
  1125. diff, err := GetDiff(gitRepo, opts, files...)
  1126. if err != nil {
  1127. return nil, err
  1128. }
  1129. review, err := pull_model.GetNewestReviewState(ctx, userID, pull.ID)
  1130. if err != nil || review == nil || review.UpdatedFiles == nil {
  1131. return diff, err
  1132. }
  1133. latestCommit := opts.AfterCommitID
  1134. if latestCommit == "" {
  1135. latestCommit = pull.HeadBranch // opts.AfterCommitID is preferred because it handles PRs from forks correctly and the branch name doesn't
  1136. }
  1137. changedFiles, err := gitRepo.GetFilesChangedBetween(review.CommitSHA, latestCommit)
  1138. // There are way too many possible errors.
  1139. // Examples are various git errors such as the commit the review was based on was gc'ed and hence doesn't exist anymore as well as unrecoverable errors where we should serve a 500 response
  1140. // Due to the current architecture and physical limitation of needing to compare explicit error messages, we can only choose one approach without the code getting ugly
  1141. // For SOME of the errors such as the gc'ed commit, it would be best to mark all files as changed
  1142. // But as that does not work for all potential errors, we simply mark all files as unchanged and drop the error which always works, even if not as good as possible
  1143. if err != nil {
  1144. log.Error("Could not get changed files between %s and %s for pull request %d in repo with path %s. Assuming no changes. Error: %w", review.CommitSHA, latestCommit, pull.Index, gitRepo.Path, err)
  1145. }
  1146. filesChangedSinceLastDiff := make(map[string]pull_model.ViewedState)
  1147. outer:
  1148. for _, diffFile := range diff.Files {
  1149. fileViewedState := review.UpdatedFiles[diffFile.GetDiffFileName()]
  1150. // Check whether it was previously detected that the file has changed since the last review
  1151. if fileViewedState == pull_model.HasChanged {
  1152. diffFile.HasChangedSinceLastReview = true
  1153. continue
  1154. }
  1155. filename := diffFile.GetDiffFileName()
  1156. // Check explicitly whether the file has changed since the last review
  1157. for _, changedFile := range changedFiles {
  1158. diffFile.HasChangedSinceLastReview = filename == changedFile
  1159. if diffFile.HasChangedSinceLastReview {
  1160. filesChangedSinceLastDiff[filename] = pull_model.HasChanged
  1161. continue outer // We don't want to check if the file is viewed here as that would fold the file, which is in this case unwanted
  1162. }
  1163. }
  1164. // Check whether the file has already been viewed
  1165. if fileViewedState == pull_model.Viewed {
  1166. diffFile.IsViewed = true
  1167. diff.NumViewedFiles++
  1168. }
  1169. }
  1170. // Explicitly store files that have changed in the database, if any is present at all.
  1171. // This has the benefit that the "Has Changed" attribute will be present as long as the user does not explicitly mark this file as viewed, so it will even survive a page reload after marking another file as viewed.
  1172. // On the other hand, this means that even if a commit reverting an unseen change is committed, the file will still be seen as changed.
  1173. if len(filesChangedSinceLastDiff) > 0 {
  1174. err := pull_model.UpdateReviewState(ctx, review.UserID, review.PullID, review.CommitSHA, filesChangedSinceLastDiff)
  1175. if err != nil {
  1176. log.Warn("Could not update review for user %d, pull %d, commit %s and the changed files %v: %v", review.UserID, review.PullID, review.CommitSHA, filesChangedSinceLastDiff, err)
  1177. return nil, err
  1178. }
  1179. }
  1180. return diff, err
  1181. }
  1182. // CommentAsDiff returns c.Patch as *Diff
  1183. func CommentAsDiff(c *issues_model.Comment) (*Diff, error) {
  1184. diff, err := ParsePatch(setting.Git.MaxGitDiffLines,
  1185. setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(c.Patch), "")
  1186. if err != nil {
  1187. log.Error("Unable to parse patch: %v", err)
  1188. return nil, err
  1189. }
  1190. if len(diff.Files) == 0 {
  1191. return nil, fmt.Errorf("no file found for comment ID: %d", c.ID)
  1192. }
  1193. secs := diff.Files[0].Sections
  1194. if len(secs) == 0 {
  1195. return nil, fmt.Errorf("no sections found for comment ID: %d", c.ID)
  1196. }
  1197. return diff, nil
  1198. }
  1199. // CommentMustAsDiff executes AsDiff and logs the error instead of returning
  1200. func CommentMustAsDiff(c *issues_model.Comment) *Diff {
  1201. if c == nil {
  1202. return nil
  1203. }
  1204. defer func() {
  1205. if err := recover(); err != nil {
  1206. log.Error("PANIC whilst retrieving diff for comment[%d] Error: %v\nStack: %s", c.ID, err, log.Stack(2))
  1207. }
  1208. }()
  1209. diff, err := CommentAsDiff(c)
  1210. if err != nil {
  1211. log.Warn("CommentMustAsDiff: %v", err)
  1212. }
  1213. return diff
  1214. }
  1215. // GetWhitespaceFlag returns git diff flag for treating whitespaces
  1216. func GetWhitespaceFlag(whitespaceBehavior string) git.CmdArg {
  1217. whitespaceFlags := map[string]string{
  1218. "ignore-all": "-w",
  1219. "ignore-change": "-b",
  1220. "ignore-eol": "--ignore-space-at-eol",
  1221. "show-all": "",
  1222. }
  1223. if flag, ok := whitespaceFlags[whitespaceBehavior]; ok {
  1224. return git.CmdArg(flag)
  1225. }
  1226. log.Warn("unknown whitespace behavior: %q, default to 'show-all'", whitespaceBehavior)
  1227. return ""
  1228. }