You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

csv.go 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471
  1. // Copyright 2021 The Gitea Authors. All rights reserved.
  2. // SPDX-License-Identifier: MIT
  3. package gitdiff
  4. import (
  5. "encoding/csv"
  6. "errors"
  7. "io"
  8. "code.gitea.io/gitea/modules/util"
  9. )
  10. const (
  11. unmappedColumn = -1
  12. maxRowsToInspect int = 10
  13. minRatioToMatch float32 = 0.8
  14. )
  15. // TableDiffCellType represents the type of a TableDiffCell.
  16. type TableDiffCellType uint8
  17. // TableDiffCellType possible values.
  18. const (
  19. TableDiffCellUnchanged TableDiffCellType = iota + 1
  20. TableDiffCellChanged
  21. TableDiffCellAdd
  22. TableDiffCellDel
  23. TableDiffCellMovedUnchanged
  24. TableDiffCellMovedChanged
  25. )
  26. // TableDiffCell represents a cell of a TableDiffRow
  27. type TableDiffCell struct {
  28. LeftCell string
  29. RightCell string
  30. Type TableDiffCellType
  31. }
  32. // TableDiffRow represents a row of a TableDiffSection.
  33. type TableDiffRow struct {
  34. RowIdx int
  35. Cells []*TableDiffCell
  36. }
  37. // TableDiffSection represents a section of a DiffFile.
  38. type TableDiffSection struct {
  39. Rows []*TableDiffRow
  40. }
  41. // csvReader wraps a csv.Reader which buffers the first rows.
  42. type csvReader struct {
  43. reader *csv.Reader
  44. buffer [][]string
  45. line int
  46. eof bool
  47. }
  48. // ErrorUndefinedCell is for when a row, column coordinates do not exist in the CSV
  49. var ErrorUndefinedCell = errors.New("undefined cell")
  50. // createCsvReader creates a csvReader and fills the buffer
  51. func createCsvReader(reader *csv.Reader, bufferRowCount int) (*csvReader, error) {
  52. csv := &csvReader{reader: reader}
  53. csv.buffer = make([][]string, bufferRowCount)
  54. for i := 0; i < bufferRowCount && !csv.eof; i++ {
  55. row, err := csv.readNextRow()
  56. if err != nil {
  57. return nil, err
  58. }
  59. csv.buffer[i] = row
  60. }
  61. csv.line = bufferRowCount
  62. return csv, nil
  63. }
  64. // GetRow gets a row from the buffer if present or advances the reader to the requested row. On the end of the file only nil gets returned.
  65. func (csv *csvReader) GetRow(row int) ([]string, error) {
  66. if row < len(csv.buffer) && row >= 0 {
  67. return csv.buffer[row], nil
  68. }
  69. if csv.eof {
  70. return nil, nil
  71. }
  72. for {
  73. fields, err := csv.readNextRow()
  74. if err != nil {
  75. return nil, err
  76. }
  77. if csv.eof {
  78. return nil, nil
  79. }
  80. csv.line++
  81. if csv.line-1 == row {
  82. return fields, nil
  83. }
  84. }
  85. }
  86. func (csv *csvReader) readNextRow() ([]string, error) {
  87. if csv.eof {
  88. return nil, nil
  89. }
  90. row, err := csv.reader.Read()
  91. if err != nil {
  92. if err != io.EOF {
  93. return nil, err
  94. }
  95. csv.eof = true
  96. }
  97. return row, nil
  98. }
  99. // CreateCsvDiff creates a tabular diff based on two CSV readers.
  100. func CreateCsvDiff(diffFile *DiffFile, baseReader, headReader *csv.Reader) ([]*TableDiffSection, error) {
  101. if baseReader != nil && headReader != nil {
  102. return createCsvDiff(diffFile, baseReader, headReader)
  103. }
  104. if baseReader != nil {
  105. return createCsvDiffSingle(baseReader, TableDiffCellDel)
  106. }
  107. return createCsvDiffSingle(headReader, TableDiffCellAdd)
  108. }
  109. // createCsvDiffSingle creates a tabular diff based on a single CSV reader. All cells are added or deleted.
  110. func createCsvDiffSingle(reader *csv.Reader, celltype TableDiffCellType) ([]*TableDiffSection, error) {
  111. var rows []*TableDiffRow
  112. i := 1
  113. for {
  114. row, err := reader.Read()
  115. if err != nil {
  116. if err == io.EOF {
  117. break
  118. }
  119. return nil, err
  120. }
  121. cells := make([]*TableDiffCell, len(row))
  122. for j := 0; j < len(row); j++ {
  123. if celltype == TableDiffCellDel {
  124. cells[j] = &TableDiffCell{LeftCell: row[j], Type: celltype}
  125. } else {
  126. cells[j] = &TableDiffCell{RightCell: row[j], Type: celltype}
  127. }
  128. }
  129. rows = append(rows, &TableDiffRow{RowIdx: i, Cells: cells})
  130. i++
  131. }
  132. return []*TableDiffSection{{Rows: rows}}, nil
  133. }
  134. func createCsvDiff(diffFile *DiffFile, baseReader, headReader *csv.Reader) ([]*TableDiffSection, error) {
  135. // Given the baseReader and headReader, we are going to create CSV Reader for each, baseCSVReader and b respectively
  136. baseCSVReader, err := createCsvReader(baseReader, maxRowsToInspect)
  137. if err != nil {
  138. return nil, err
  139. }
  140. headCSVReader, err := createCsvReader(headReader, maxRowsToInspect)
  141. if err != nil {
  142. return nil, err
  143. }
  144. // Initializing the mappings of base to head (a2bColMap) and head to base (b2aColMap) columns
  145. a2bColMap, b2aColMap := getColumnMapping(baseCSVReader, headCSVReader)
  146. // Determines how many cols there will be in the diff table, which includes deleted columns from base and added columns to base
  147. numDiffTableCols := len(a2bColMap) + countUnmappedColumns(b2aColMap)
  148. if len(a2bColMap) < len(b2aColMap) {
  149. numDiffTableCols = len(b2aColMap) + countUnmappedColumns(a2bColMap)
  150. }
  151. // createDiffTableRow takes the row # of the `a` line and `b` line of a diff (starting from 1), 0 if the line doesn't exist (undefined)
  152. // in the base or head respectively.
  153. // Returns a TableDiffRow which has the row index
  154. createDiffTableRow := func(aLineNum, bLineNum int) (*TableDiffRow, error) {
  155. // diffTableCells is a row of the diff table. It will have a cells for added, deleted, changed, and unchanged content, thus either
  156. // the same size as the head table or bigger
  157. diffTableCells := make([]*TableDiffCell, numDiffTableCols)
  158. var bRow *[]string
  159. if bLineNum > 0 {
  160. row, err := headCSVReader.GetRow(bLineNum - 1)
  161. if err != nil {
  162. return nil, err
  163. }
  164. bRow = &row
  165. }
  166. var aRow *[]string
  167. if aLineNum > 0 {
  168. row, err := baseCSVReader.GetRow(aLineNum - 1)
  169. if err != nil {
  170. return nil, err
  171. }
  172. aRow = &row
  173. }
  174. if aRow == nil && bRow == nil {
  175. // No content
  176. return nil, nil
  177. }
  178. aIndex := 0 // tracks where we are in the a2bColMap
  179. bIndex := 0 // tracks where we are in the b2aColMap
  180. colsAdded := 0 // incremented whenever we found a column was added
  181. colsDeleted := 0 // incrememted whenever a column was deleted
  182. // We loop until both the aIndex and bIndex are greater than their col map, which then we are done
  183. for aIndex < len(a2bColMap) || bIndex < len(b2aColMap) {
  184. // Starting from where aIndex is currently pointing, we see if the map is -1 (dleeted) and if is, create column to note that, increment, and look at the next aIndex
  185. for aIndex < len(a2bColMap) && a2bColMap[aIndex] == -1 && (bIndex >= len(b2aColMap) || aIndex <= bIndex) {
  186. var aCell string
  187. if aRow != nil {
  188. if cell, err := getCell(*aRow, aIndex); err != nil {
  189. if err != ErrorUndefinedCell {
  190. return nil, err
  191. }
  192. } else {
  193. aCell = cell
  194. }
  195. }
  196. diffTableCells[bIndex+colsDeleted] = &TableDiffCell{LeftCell: aCell, Type: TableDiffCellDel}
  197. aIndex++
  198. colsDeleted++
  199. }
  200. // aIndex is now pointing to a column that also exists in b, or is at the end of a2bColMap. If the former,
  201. // we can just increment aIndex until it points to a -1 column or one greater than the current bIndex
  202. for aIndex < len(a2bColMap) && a2bColMap[aIndex] != -1 {
  203. aIndex++
  204. }
  205. // Starting from where bIndex is currently pointing, we see if the map is -1 (added) and if is, create column to note that, increment, and look at the next aIndex
  206. for bIndex < len(b2aColMap) && b2aColMap[bIndex] == -1 && (aIndex >= len(a2bColMap) || bIndex < aIndex) {
  207. var bCell string
  208. cellType := TableDiffCellAdd
  209. if bRow != nil {
  210. if cell, err := getCell(*bRow, bIndex); err != nil {
  211. if err != ErrorUndefinedCell {
  212. return nil, err
  213. }
  214. } else {
  215. bCell = cell
  216. }
  217. } else {
  218. cellType = TableDiffCellDel
  219. }
  220. diffTableCells[bIndex+colsDeleted] = &TableDiffCell{RightCell: bCell, Type: cellType}
  221. bIndex++
  222. colsAdded++
  223. }
  224. // aIndex is now pointing to a column that also exists in a, or is at the end of b2aColMap. If the former,
  225. // we get the a col and b col values (if they exist), figure out if they are the same or not, and if the column moved, and add it to the diff table
  226. for bIndex < len(b2aColMap) && b2aColMap[bIndex] != -1 && (aIndex >= len(a2bColMap) || bIndex < aIndex) {
  227. var diffTableCell TableDiffCell
  228. var aCell *string
  229. // get the aCell value if the aRow exists
  230. if aRow != nil {
  231. if cell, err := getCell(*aRow, b2aColMap[bIndex]); err != nil {
  232. if err != ErrorUndefinedCell {
  233. return nil, err
  234. }
  235. } else {
  236. aCell = &cell
  237. diffTableCell.LeftCell = cell
  238. }
  239. } else {
  240. diffTableCell.Type = TableDiffCellAdd
  241. }
  242. var bCell *string
  243. // get the bCell value if the bRow exists
  244. if bRow != nil {
  245. if cell, err := getCell(*bRow, bIndex); err != nil {
  246. if err != ErrorUndefinedCell {
  247. return nil, err
  248. }
  249. } else {
  250. bCell = &cell
  251. diffTableCell.RightCell = cell
  252. }
  253. } else {
  254. diffTableCell.Type = TableDiffCellDel
  255. }
  256. // if both a and b have a row that exists, compare the value and determine if the row has moved
  257. if aCell != nil && bCell != nil {
  258. moved := ((bIndex + colsDeleted) != (b2aColMap[bIndex] + colsAdded))
  259. if *aCell != *bCell {
  260. if moved {
  261. diffTableCell.Type = TableDiffCellMovedChanged
  262. } else {
  263. diffTableCell.Type = TableDiffCellChanged
  264. }
  265. } else {
  266. if moved {
  267. diffTableCell.Type = TableDiffCellMovedUnchanged
  268. } else {
  269. diffTableCell.Type = TableDiffCellUnchanged
  270. }
  271. diffTableCell.LeftCell = ""
  272. }
  273. }
  274. // Add the diff column to the diff row
  275. diffTableCells[bIndex+colsDeleted] = &diffTableCell
  276. bIndex++
  277. }
  278. }
  279. return &TableDiffRow{RowIdx: bLineNum, Cells: diffTableCells}, nil
  280. }
  281. // diffTableSections are TableDiffSections which represent the diffTableSections we get when doing a diff, each will be its own table in the view
  282. var diffTableSections []*TableDiffSection
  283. for i, section := range diffFile.Sections {
  284. // Each section has multiple diffTableRows
  285. var diffTableRows []*TableDiffRow
  286. lines := tryMergeLines(section.Lines)
  287. // Loop through the merged lines to get each row of the CSV diff table for this section
  288. for j, line := range lines {
  289. if i == 0 && j == 0 && (line[0] != 1 || line[1] != 1) {
  290. diffTableRow, err := createDiffTableRow(1, 1)
  291. if err != nil {
  292. return nil, err
  293. }
  294. if diffTableRow != nil {
  295. diffTableRows = append(diffTableRows, diffTableRow)
  296. }
  297. }
  298. diffTableRow, err := createDiffTableRow(line[0], line[1])
  299. if err != nil {
  300. return nil, err
  301. }
  302. if diffTableRow != nil {
  303. diffTableRows = append(diffTableRows, diffTableRow)
  304. }
  305. }
  306. if len(diffTableRows) > 0 {
  307. diffTableSections = append(diffTableSections, &TableDiffSection{Rows: diffTableRows})
  308. }
  309. }
  310. return diffTableSections, nil
  311. }
  312. // getColumnMapping creates a mapping of columns between a and b
  313. func getColumnMapping(baseCSVReader, headCSVReader *csvReader) ([]int, []int) {
  314. baseRow, _ := baseCSVReader.GetRow(0)
  315. headRow, _ := headCSVReader.GetRow(0)
  316. base2HeadColMap := []int{}
  317. head2BaseColMap := []int{}
  318. if baseRow != nil {
  319. base2HeadColMap = make([]int, len(baseRow))
  320. }
  321. if headRow != nil {
  322. head2BaseColMap = make([]int, len(headRow))
  323. }
  324. // Initializes all head2base mappings to be unmappedColumn (-1)
  325. for i := 0; i < len(head2BaseColMap); i++ {
  326. head2BaseColMap[i] = unmappedColumn
  327. }
  328. // Loops through the baseRow and see if there is a match in the head row
  329. for i := 0; i < len(baseRow); i++ {
  330. base2HeadColMap[i] = unmappedColumn
  331. baseCell, err := getCell(baseRow, i)
  332. if err == nil {
  333. for j := 0; j < len(headRow); j++ {
  334. if head2BaseColMap[j] == -1 {
  335. headCell, err := getCell(headRow, j)
  336. if err == nil && baseCell == headCell {
  337. base2HeadColMap[i] = j
  338. head2BaseColMap[j] = i
  339. break
  340. }
  341. }
  342. }
  343. }
  344. }
  345. tryMapColumnsByContent(baseCSVReader, base2HeadColMap, headCSVReader, head2BaseColMap)
  346. tryMapColumnsByContent(headCSVReader, head2BaseColMap, baseCSVReader, base2HeadColMap)
  347. return base2HeadColMap, head2BaseColMap
  348. }
  349. // tryMapColumnsByContent tries to map missing columns by the content of the first lines.
  350. func tryMapColumnsByContent(baseCSVReader *csvReader, base2HeadColMap []int, headCSVReader *csvReader, head2BaseColMap []int) {
  351. for i := 0; i < len(base2HeadColMap); i++ {
  352. headStart := 0
  353. for base2HeadColMap[i] == unmappedColumn && headStart < len(head2BaseColMap) {
  354. if head2BaseColMap[headStart] == unmappedColumn {
  355. rows := util.Min(maxRowsToInspect, util.Max(0, util.Min(len(baseCSVReader.buffer), len(headCSVReader.buffer))-1))
  356. same := 0
  357. for j := 1; j <= rows; j++ {
  358. baseCell, baseErr := getCell(baseCSVReader.buffer[j], i)
  359. headCell, headErr := getCell(headCSVReader.buffer[j], headStart)
  360. if baseErr == nil && headErr == nil && baseCell == headCell {
  361. same++
  362. }
  363. }
  364. if (float32(same) / float32(rows)) > minRatioToMatch {
  365. base2HeadColMap[i] = headStart
  366. head2BaseColMap[headStart] = i
  367. }
  368. }
  369. headStart++
  370. }
  371. }
  372. }
  373. // getCell returns the specific cell or nil if not present.
  374. func getCell(row []string, column int) (string, error) {
  375. if column < len(row) {
  376. return row[column], nil
  377. }
  378. return "", ErrorUndefinedCell
  379. }
  380. // countUnmappedColumns returns the count of unmapped columns.
  381. func countUnmappedColumns(mapping []int) int {
  382. count := 0
  383. for i := 0; i < len(mapping); i++ {
  384. if mapping[i] == unmappedColumn {
  385. count++
  386. }
  387. }
  388. return count
  389. }
  390. // tryMergeLines maps the separated line numbers of a git diff. The result is assumed to be ordered.
  391. func tryMergeLines(lines []*DiffLine) [][2]int {
  392. ids := make([][2]int, len(lines))
  393. i := 0
  394. for _, line := range lines {
  395. if line.Type != DiffLineSection {
  396. ids[i][0] = line.LeftIdx
  397. ids[i][1] = line.RightIdx
  398. i++
  399. }
  400. }
  401. ids = ids[:i]
  402. result := make([][2]int, len(ids))
  403. j := 0
  404. for i = 0; i < len(ids); i++ {
  405. if ids[i][0] == 0 {
  406. if j > 0 && result[j-1][1] == 0 {
  407. temp := j
  408. for temp > 0 && result[temp-1][1] == 0 {
  409. temp--
  410. }
  411. result[temp][1] = ids[i][1]
  412. continue
  413. }
  414. }
  415. result[j] = ids[i]
  416. j++
  417. }
  418. return result[:j]
  419. }