Vous ne pouvez pas sélectionner plus de 25 sujets Les noms de sujets doivent commencer par une lettre ou un nombre, peuvent contenir des tirets ('-') et peuvent comporter jusqu'à 35 caractères.

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469
  1. // Copyright 2021 The Gitea Authors. All rights reserved.
  2. // SPDX-License-Identifier: MIT
  3. package gitdiff
  4. import (
  5. "encoding/csv"
  6. "errors"
  7. "io"
  8. )
  9. const (
  10. unmappedColumn = -1
  11. maxRowsToInspect int = 10
  12. minRatioToMatch float32 = 0.8
  13. )
  14. // TableDiffCellType represents the type of a TableDiffCell.
  15. type TableDiffCellType uint8
  16. // TableDiffCellType possible values.
  17. const (
  18. TableDiffCellUnchanged TableDiffCellType = iota + 1
  19. TableDiffCellChanged
  20. TableDiffCellAdd
  21. TableDiffCellDel
  22. TableDiffCellMovedUnchanged
  23. TableDiffCellMovedChanged
  24. )
  25. // TableDiffCell represents a cell of a TableDiffRow
  26. type TableDiffCell struct {
  27. LeftCell string
  28. RightCell string
  29. Type TableDiffCellType
  30. }
  31. // TableDiffRow represents a row of a TableDiffSection.
  32. type TableDiffRow struct {
  33. RowIdx int
  34. Cells []*TableDiffCell
  35. }
  36. // TableDiffSection represents a section of a DiffFile.
  37. type TableDiffSection struct {
  38. Rows []*TableDiffRow
  39. }
  40. // csvReader wraps a csv.Reader which buffers the first rows.
  41. type csvReader struct {
  42. reader *csv.Reader
  43. buffer [][]string
  44. line int
  45. eof bool
  46. }
  47. // ErrorUndefinedCell is for when a row, column coordinates do not exist in the CSV
  48. var ErrorUndefinedCell = errors.New("undefined cell")
  49. // createCsvReader creates a csvReader and fills the buffer
  50. func createCsvReader(reader *csv.Reader, bufferRowCount int) (*csvReader, error) {
  51. csv := &csvReader{reader: reader}
  52. csv.buffer = make([][]string, bufferRowCount)
  53. for i := 0; i < bufferRowCount && !csv.eof; i++ {
  54. row, err := csv.readNextRow()
  55. if err != nil {
  56. return nil, err
  57. }
  58. csv.buffer[i] = row
  59. }
  60. csv.line = bufferRowCount
  61. return csv, nil
  62. }
  63. // GetRow gets a row from the buffer if present or advances the reader to the requested row. On the end of the file only nil gets returned.
  64. func (csv *csvReader) GetRow(row int) ([]string, error) {
  65. if row < len(csv.buffer) && row >= 0 {
  66. return csv.buffer[row], nil
  67. }
  68. if csv.eof {
  69. return nil, nil
  70. }
  71. for {
  72. fields, err := csv.readNextRow()
  73. if err != nil {
  74. return nil, err
  75. }
  76. if csv.eof {
  77. return nil, nil
  78. }
  79. csv.line++
  80. if csv.line-1 == row {
  81. return fields, nil
  82. }
  83. }
  84. }
  85. func (csv *csvReader) readNextRow() ([]string, error) {
  86. if csv.eof {
  87. return nil, nil
  88. }
  89. row, err := csv.reader.Read()
  90. if err != nil {
  91. if err != io.EOF {
  92. return nil, err
  93. }
  94. csv.eof = true
  95. }
  96. return row, nil
  97. }
  98. // CreateCsvDiff creates a tabular diff based on two CSV readers.
  99. func CreateCsvDiff(diffFile *DiffFile, baseReader, headReader *csv.Reader) ([]*TableDiffSection, error) {
  100. if baseReader != nil && headReader != nil {
  101. return createCsvDiff(diffFile, baseReader, headReader)
  102. }
  103. if baseReader != nil {
  104. return createCsvDiffSingle(baseReader, TableDiffCellDel)
  105. }
  106. return createCsvDiffSingle(headReader, TableDiffCellAdd)
  107. }
  108. // createCsvDiffSingle creates a tabular diff based on a single CSV reader. All cells are added or deleted.
  109. func createCsvDiffSingle(reader *csv.Reader, celltype TableDiffCellType) ([]*TableDiffSection, error) {
  110. var rows []*TableDiffRow
  111. i := 1
  112. for {
  113. row, err := reader.Read()
  114. if err != nil {
  115. if err == io.EOF {
  116. break
  117. }
  118. return nil, err
  119. }
  120. cells := make([]*TableDiffCell, len(row))
  121. for j := 0; j < len(row); j++ {
  122. if celltype == TableDiffCellDel {
  123. cells[j] = &TableDiffCell{LeftCell: row[j], Type: celltype}
  124. } else {
  125. cells[j] = &TableDiffCell{RightCell: row[j], Type: celltype}
  126. }
  127. }
  128. rows = append(rows, &TableDiffRow{RowIdx: i, Cells: cells})
  129. i++
  130. }
  131. return []*TableDiffSection{{Rows: rows}}, nil
  132. }
  133. func createCsvDiff(diffFile *DiffFile, baseReader, headReader *csv.Reader) ([]*TableDiffSection, error) {
  134. // Given the baseReader and headReader, we are going to create CSV Reader for each, baseCSVReader and b respectively
  135. baseCSVReader, err := createCsvReader(baseReader, maxRowsToInspect)
  136. if err != nil {
  137. return nil, err
  138. }
  139. headCSVReader, err := createCsvReader(headReader, maxRowsToInspect)
  140. if err != nil {
  141. return nil, err
  142. }
  143. // Initializing the mappings of base to head (a2bColMap) and head to base (b2aColMap) columns
  144. a2bColMap, b2aColMap := getColumnMapping(baseCSVReader, headCSVReader)
  145. // Determines how many cols there will be in the diff table, which includes deleted columns from base and added columns to base
  146. numDiffTableCols := len(a2bColMap) + countUnmappedColumns(b2aColMap)
  147. if len(a2bColMap) < len(b2aColMap) {
  148. numDiffTableCols = len(b2aColMap) + countUnmappedColumns(a2bColMap)
  149. }
  150. // createDiffTableRow takes the row # of the `a` line and `b` line of a diff (starting from 1), 0 if the line doesn't exist (undefined)
  151. // in the base or head respectively.
  152. // Returns a TableDiffRow which has the row index
  153. createDiffTableRow := func(aLineNum, bLineNum int) (*TableDiffRow, error) {
  154. // diffTableCells is a row of the diff table. It will have a cells for added, deleted, changed, and unchanged content, thus either
  155. // the same size as the head table or bigger
  156. diffTableCells := make([]*TableDiffCell, numDiffTableCols)
  157. var bRow *[]string
  158. if bLineNum > 0 {
  159. row, err := headCSVReader.GetRow(bLineNum - 1)
  160. if err != nil {
  161. return nil, err
  162. }
  163. bRow = &row
  164. }
  165. var aRow *[]string
  166. if aLineNum > 0 {
  167. row, err := baseCSVReader.GetRow(aLineNum - 1)
  168. if err != nil {
  169. return nil, err
  170. }
  171. aRow = &row
  172. }
  173. if aRow == nil && bRow == nil {
  174. // No content
  175. return nil, nil
  176. }
  177. aIndex := 0 // tracks where we are in the a2bColMap
  178. bIndex := 0 // tracks where we are in the b2aColMap
  179. colsAdded := 0 // incremented whenever we found a column was added
  180. colsDeleted := 0 // incrememted whenever a column was deleted
  181. // We loop until both the aIndex and bIndex are greater than their col map, which then we are done
  182. for aIndex < len(a2bColMap) || bIndex < len(b2aColMap) {
  183. // Starting from where aIndex is currently pointing, we see if the map is -1 (dleeted) and if is, create column to note that, increment, and look at the next aIndex
  184. for aIndex < len(a2bColMap) && a2bColMap[aIndex] == -1 && (bIndex >= len(b2aColMap) || aIndex <= bIndex) {
  185. var aCell string
  186. if aRow != nil {
  187. if cell, err := getCell(*aRow, aIndex); err != nil {
  188. if err != ErrorUndefinedCell {
  189. return nil, err
  190. }
  191. } else {
  192. aCell = cell
  193. }
  194. }
  195. diffTableCells[bIndex+colsDeleted] = &TableDiffCell{LeftCell: aCell, Type: TableDiffCellDel}
  196. aIndex++
  197. colsDeleted++
  198. }
  199. // aIndex is now pointing to a column that also exists in b, or is at the end of a2bColMap. If the former,
  200. // we can just increment aIndex until it points to a -1 column or one greater than the current bIndex
  201. for aIndex < len(a2bColMap) && a2bColMap[aIndex] != -1 {
  202. aIndex++
  203. }
  204. // Starting from where bIndex is currently pointing, we see if the map is -1 (added) and if is, create column to note that, increment, and look at the next aIndex
  205. for bIndex < len(b2aColMap) && b2aColMap[bIndex] == -1 && (aIndex >= len(a2bColMap) || bIndex < aIndex) {
  206. var bCell string
  207. cellType := TableDiffCellAdd
  208. if bRow != nil {
  209. if cell, err := getCell(*bRow, bIndex); err != nil {
  210. if err != ErrorUndefinedCell {
  211. return nil, err
  212. }
  213. } else {
  214. bCell = cell
  215. }
  216. } else {
  217. cellType = TableDiffCellDel
  218. }
  219. diffTableCells[bIndex+colsDeleted] = &TableDiffCell{RightCell: bCell, Type: cellType}
  220. bIndex++
  221. colsAdded++
  222. }
  223. // aIndex is now pointing to a column that also exists in a, or is at the end of b2aColMap. If the former,
  224. // we get the a col and b col values (if they exist), figure out if they are the same or not, and if the column moved, and add it to the diff table
  225. for bIndex < len(b2aColMap) && b2aColMap[bIndex] != -1 && (aIndex >= len(a2bColMap) || bIndex < aIndex) {
  226. var diffTableCell TableDiffCell
  227. var aCell *string
  228. // get the aCell value if the aRow exists
  229. if aRow != nil {
  230. if cell, err := getCell(*aRow, b2aColMap[bIndex]); err != nil {
  231. if err != ErrorUndefinedCell {
  232. return nil, err
  233. }
  234. } else {
  235. aCell = &cell
  236. diffTableCell.LeftCell = cell
  237. }
  238. } else {
  239. diffTableCell.Type = TableDiffCellAdd
  240. }
  241. var bCell *string
  242. // get the bCell value if the bRow exists
  243. if bRow != nil {
  244. if cell, err := getCell(*bRow, bIndex); err != nil {
  245. if err != ErrorUndefinedCell {
  246. return nil, err
  247. }
  248. } else {
  249. bCell = &cell
  250. diffTableCell.RightCell = cell
  251. }
  252. } else {
  253. diffTableCell.Type = TableDiffCellDel
  254. }
  255. // if both a and b have a row that exists, compare the value and determine if the row has moved
  256. if aCell != nil && bCell != nil {
  257. moved := ((bIndex + colsDeleted) != (b2aColMap[bIndex] + colsAdded))
  258. if *aCell != *bCell {
  259. if moved {
  260. diffTableCell.Type = TableDiffCellMovedChanged
  261. } else {
  262. diffTableCell.Type = TableDiffCellChanged
  263. }
  264. } else {
  265. if moved {
  266. diffTableCell.Type = TableDiffCellMovedUnchanged
  267. } else {
  268. diffTableCell.Type = TableDiffCellUnchanged
  269. }
  270. diffTableCell.LeftCell = ""
  271. }
  272. }
  273. // Add the diff column to the diff row
  274. diffTableCells[bIndex+colsDeleted] = &diffTableCell
  275. bIndex++
  276. }
  277. }
  278. return &TableDiffRow{RowIdx: bLineNum, Cells: diffTableCells}, nil
  279. }
  280. // diffTableSections are TableDiffSections which represent the diffTableSections we get when doing a diff, each will be its own table in the view
  281. var diffTableSections []*TableDiffSection
  282. for i, section := range diffFile.Sections {
  283. // Each section has multiple diffTableRows
  284. var diffTableRows []*TableDiffRow
  285. lines := tryMergeLines(section.Lines)
  286. // Loop through the merged lines to get each row of the CSV diff table for this section
  287. for j, line := range lines {
  288. if i == 0 && j == 0 && (line[0] != 1 || line[1] != 1) {
  289. diffTableRow, err := createDiffTableRow(1, 1)
  290. if err != nil {
  291. return nil, err
  292. }
  293. if diffTableRow != nil {
  294. diffTableRows = append(diffTableRows, diffTableRow)
  295. }
  296. }
  297. diffTableRow, err := createDiffTableRow(line[0], line[1])
  298. if err != nil {
  299. return nil, err
  300. }
  301. if diffTableRow != nil {
  302. diffTableRows = append(diffTableRows, diffTableRow)
  303. }
  304. }
  305. if len(diffTableRows) > 0 {
  306. diffTableSections = append(diffTableSections, &TableDiffSection{Rows: diffTableRows})
  307. }
  308. }
  309. return diffTableSections, nil
  310. }
  311. // getColumnMapping creates a mapping of columns between a and b
  312. func getColumnMapping(baseCSVReader, headCSVReader *csvReader) ([]int, []int) {
  313. baseRow, _ := baseCSVReader.GetRow(0)
  314. headRow, _ := headCSVReader.GetRow(0)
  315. base2HeadColMap := []int{}
  316. head2BaseColMap := []int{}
  317. if baseRow != nil {
  318. base2HeadColMap = make([]int, len(baseRow))
  319. }
  320. if headRow != nil {
  321. head2BaseColMap = make([]int, len(headRow))
  322. }
  323. // Initializes all head2base mappings to be unmappedColumn (-1)
  324. for i := 0; i < len(head2BaseColMap); i++ {
  325. head2BaseColMap[i] = unmappedColumn
  326. }
  327. // Loops through the baseRow and see if there is a match in the head row
  328. for i := 0; i < len(baseRow); i++ {
  329. base2HeadColMap[i] = unmappedColumn
  330. baseCell, err := getCell(baseRow, i)
  331. if err == nil {
  332. for j := 0; j < len(headRow); j++ {
  333. if head2BaseColMap[j] == -1 {
  334. headCell, err := getCell(headRow, j)
  335. if err == nil && baseCell == headCell {
  336. base2HeadColMap[i] = j
  337. head2BaseColMap[j] = i
  338. break
  339. }
  340. }
  341. }
  342. }
  343. }
  344. tryMapColumnsByContent(baseCSVReader, base2HeadColMap, headCSVReader, head2BaseColMap)
  345. tryMapColumnsByContent(headCSVReader, head2BaseColMap, baseCSVReader, base2HeadColMap)
  346. return base2HeadColMap, head2BaseColMap
  347. }
  348. // tryMapColumnsByContent tries to map missing columns by the content of the first lines.
  349. func tryMapColumnsByContent(baseCSVReader *csvReader, base2HeadColMap []int, headCSVReader *csvReader, head2BaseColMap []int) {
  350. for i := 0; i < len(base2HeadColMap); i++ {
  351. headStart := 0
  352. for base2HeadColMap[i] == unmappedColumn && headStart < len(head2BaseColMap) {
  353. if head2BaseColMap[headStart] == unmappedColumn {
  354. rows := min(maxRowsToInspect, max(0, min(len(baseCSVReader.buffer), len(headCSVReader.buffer))-1))
  355. same := 0
  356. for j := 1; j <= rows; j++ {
  357. baseCell, baseErr := getCell(baseCSVReader.buffer[j], i)
  358. headCell, headErr := getCell(headCSVReader.buffer[j], headStart)
  359. if baseErr == nil && headErr == nil && baseCell == headCell {
  360. same++
  361. }
  362. }
  363. if (float32(same) / float32(rows)) > minRatioToMatch {
  364. base2HeadColMap[i] = headStart
  365. head2BaseColMap[headStart] = i
  366. }
  367. }
  368. headStart++
  369. }
  370. }
  371. }
  372. // getCell returns the specific cell or nil if not present.
  373. func getCell(row []string, column int) (string, error) {
  374. if column < len(row) {
  375. return row[column], nil
  376. }
  377. return "", ErrorUndefinedCell
  378. }
  379. // countUnmappedColumns returns the count of unmapped columns.
  380. func countUnmappedColumns(mapping []int) int {
  381. count := 0
  382. for i := 0; i < len(mapping); i++ {
  383. if mapping[i] == unmappedColumn {
  384. count++
  385. }
  386. }
  387. return count
  388. }
  389. // tryMergeLines maps the separated line numbers of a git diff. The result is assumed to be ordered.
  390. func tryMergeLines(lines []*DiffLine) [][2]int {
  391. ids := make([][2]int, len(lines))
  392. i := 0
  393. for _, line := range lines {
  394. if line.Type != DiffLineSection {
  395. ids[i][0] = line.LeftIdx
  396. ids[i][1] = line.RightIdx
  397. i++
  398. }
  399. }
  400. ids = ids[:i]
  401. result := make([][2]int, len(ids))
  402. j := 0
  403. for i = 0; i < len(ids); i++ {
  404. if ids[i][0] == 0 {
  405. if j > 0 && result[j-1][1] == 0 {
  406. temp := j
  407. for temp > 0 && result[temp-1][1] == 0 {
  408. temp--
  409. }
  410. result[temp][1] = ids[i][1]
  411. continue
  412. }
  413. }
  414. result[j] = ids[i]
  415. j++
  416. }
  417. return result[:j]
  418. }