123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471 |
- // Copyright 2021 The Gitea Authors. All rights reserved.
- // SPDX-License-Identifier: MIT
-
- package gitdiff
-
- import (
- "encoding/csv"
- "errors"
- "io"
-
- "code.gitea.io/gitea/modules/util"
- )
-
- const (
- unmappedColumn = -1
- maxRowsToInspect int = 10
- minRatioToMatch float32 = 0.8
- )
-
- // TableDiffCellType represents the type of a TableDiffCell.
- type TableDiffCellType uint8
-
- // TableDiffCellType possible values.
- const (
- TableDiffCellUnchanged TableDiffCellType = iota + 1
- TableDiffCellChanged
- TableDiffCellAdd
- TableDiffCellDel
- TableDiffCellMovedUnchanged
- TableDiffCellMovedChanged
- )
-
- // TableDiffCell represents a cell of a TableDiffRow
- type TableDiffCell struct {
- LeftCell string
- RightCell string
- Type TableDiffCellType
- }
-
- // TableDiffRow represents a row of a TableDiffSection.
- type TableDiffRow struct {
- RowIdx int
- Cells []*TableDiffCell
- }
-
- // TableDiffSection represents a section of a DiffFile.
- type TableDiffSection struct {
- Rows []*TableDiffRow
- }
-
- // csvReader wraps a csv.Reader which buffers the first rows.
- type csvReader struct {
- reader *csv.Reader
- buffer [][]string
- line int
- eof bool
- }
-
- // ErrorUndefinedCell is for when a row, column coordinates do not exist in the CSV
- var ErrorUndefinedCell = errors.New("undefined cell")
-
- // createCsvReader creates a csvReader and fills the buffer
- func createCsvReader(reader *csv.Reader, bufferRowCount int) (*csvReader, error) {
- csv := &csvReader{reader: reader}
- csv.buffer = make([][]string, bufferRowCount)
- for i := 0; i < bufferRowCount && !csv.eof; i++ {
- row, err := csv.readNextRow()
- if err != nil {
- return nil, err
- }
- csv.buffer[i] = row
- }
- csv.line = bufferRowCount
- return csv, nil
- }
-
- // GetRow gets a row from the buffer if present or advances the reader to the requested row. On the end of the file only nil gets returned.
- func (csv *csvReader) GetRow(row int) ([]string, error) {
- if row < len(csv.buffer) && row >= 0 {
- return csv.buffer[row], nil
- }
- if csv.eof {
- return nil, nil
- }
- for {
- fields, err := csv.readNextRow()
- if err != nil {
- return nil, err
- }
- if csv.eof {
- return nil, nil
- }
- csv.line++
- if csv.line-1 == row {
- return fields, nil
- }
- }
- }
-
- func (csv *csvReader) readNextRow() ([]string, error) {
- if csv.eof {
- return nil, nil
- }
- row, err := csv.reader.Read()
- if err != nil {
- if err != io.EOF {
- return nil, err
- }
- csv.eof = true
- }
- return row, nil
- }
-
- // CreateCsvDiff creates a tabular diff based on two CSV readers.
- func CreateCsvDiff(diffFile *DiffFile, baseReader, headReader *csv.Reader) ([]*TableDiffSection, error) {
- if baseReader != nil && headReader != nil {
- return createCsvDiff(diffFile, baseReader, headReader)
- }
-
- if baseReader != nil {
- return createCsvDiffSingle(baseReader, TableDiffCellDel)
- }
- return createCsvDiffSingle(headReader, TableDiffCellAdd)
- }
-
- // createCsvDiffSingle creates a tabular diff based on a single CSV reader. All cells are added or deleted.
- func createCsvDiffSingle(reader *csv.Reader, celltype TableDiffCellType) ([]*TableDiffSection, error) {
- var rows []*TableDiffRow
- i := 1
- for {
- row, err := reader.Read()
- if err != nil {
- if err == io.EOF {
- break
- }
- return nil, err
- }
- cells := make([]*TableDiffCell, len(row))
- for j := 0; j < len(row); j++ {
- if celltype == TableDiffCellDel {
- cells[j] = &TableDiffCell{LeftCell: row[j], Type: celltype}
- } else {
- cells[j] = &TableDiffCell{RightCell: row[j], Type: celltype}
- }
- }
- rows = append(rows, &TableDiffRow{RowIdx: i, Cells: cells})
- i++
- }
-
- return []*TableDiffSection{{Rows: rows}}, nil
- }
-
- func createCsvDiff(diffFile *DiffFile, baseReader, headReader *csv.Reader) ([]*TableDiffSection, error) {
- // Given the baseReader and headReader, we are going to create CSV Reader for each, baseCSVReader and b respectively
- baseCSVReader, err := createCsvReader(baseReader, maxRowsToInspect)
- if err != nil {
- return nil, err
- }
- headCSVReader, err := createCsvReader(headReader, maxRowsToInspect)
- if err != nil {
- return nil, err
- }
-
- // Initializing the mappings of base to head (a2bColMap) and head to base (b2aColMap) columns
- a2bColMap, b2aColMap := getColumnMapping(baseCSVReader, headCSVReader)
-
- // Determines how many cols there will be in the diff table, which includes deleted columns from base and added columns to base
- numDiffTableCols := len(a2bColMap) + countUnmappedColumns(b2aColMap)
- if len(a2bColMap) < len(b2aColMap) {
- numDiffTableCols = len(b2aColMap) + countUnmappedColumns(a2bColMap)
- }
-
- // createDiffTableRow takes the row # of the `a` line and `b` line of a diff (starting from 1), 0 if the line doesn't exist (undefined)
- // in the base or head respectively.
- // Returns a TableDiffRow which has the row index
- createDiffTableRow := func(aLineNum, bLineNum int) (*TableDiffRow, error) {
- // diffTableCells is a row of the diff table. It will have a cells for added, deleted, changed, and unchanged content, thus either
- // the same size as the head table or bigger
- diffTableCells := make([]*TableDiffCell, numDiffTableCols)
- var bRow *[]string
- if bLineNum > 0 {
- row, err := headCSVReader.GetRow(bLineNum - 1)
- if err != nil {
- return nil, err
- }
- bRow = &row
- }
- var aRow *[]string
- if aLineNum > 0 {
- row, err := baseCSVReader.GetRow(aLineNum - 1)
- if err != nil {
- return nil, err
- }
- aRow = &row
- }
- if aRow == nil && bRow == nil {
- // No content
- return nil, nil
- }
-
- aIndex := 0 // tracks where we are in the a2bColMap
- bIndex := 0 // tracks where we are in the b2aColMap
- colsAdded := 0 // incremented whenever we found a column was added
- colsDeleted := 0 // incrememted whenever a column was deleted
-
- // We loop until both the aIndex and bIndex are greater than their col map, which then we are done
- for aIndex < len(a2bColMap) || bIndex < len(b2aColMap) {
- // Starting from where aIndex is currently pointing, we see if the map is -1 (dleeted) and if is, create column to note that, increment, and look at the next aIndex
- for aIndex < len(a2bColMap) && a2bColMap[aIndex] == -1 && (bIndex >= len(b2aColMap) || aIndex <= bIndex) {
- var aCell string
- if aRow != nil {
- if cell, err := getCell(*aRow, aIndex); err != nil {
- if err != ErrorUndefinedCell {
- return nil, err
- }
- } else {
- aCell = cell
- }
- }
- diffTableCells[bIndex+colsDeleted] = &TableDiffCell{LeftCell: aCell, Type: TableDiffCellDel}
- aIndex++
- colsDeleted++
- }
-
- // aIndex is now pointing to a column that also exists in b, or is at the end of a2bColMap. If the former,
- // we can just increment aIndex until it points to a -1 column or one greater than the current bIndex
- for aIndex < len(a2bColMap) && a2bColMap[aIndex] != -1 {
- aIndex++
- }
-
- // Starting from where bIndex is currently pointing, we see if the map is -1 (added) and if is, create column to note that, increment, and look at the next aIndex
- for bIndex < len(b2aColMap) && b2aColMap[bIndex] == -1 && (aIndex >= len(a2bColMap) || bIndex < aIndex) {
- var bCell string
- cellType := TableDiffCellAdd
- if bRow != nil {
- if cell, err := getCell(*bRow, bIndex); err != nil {
- if err != ErrorUndefinedCell {
- return nil, err
- }
- } else {
- bCell = cell
- }
- } else {
- cellType = TableDiffCellDel
- }
- diffTableCells[bIndex+colsDeleted] = &TableDiffCell{RightCell: bCell, Type: cellType}
- bIndex++
- colsAdded++
- }
-
- // aIndex is now pointing to a column that also exists in a, or is at the end of b2aColMap. If the former,
- // we get the a col and b col values (if they exist), figure out if they are the same or not, and if the column moved, and add it to the diff table
- for bIndex < len(b2aColMap) && b2aColMap[bIndex] != -1 && (aIndex >= len(a2bColMap) || bIndex < aIndex) {
- var diffTableCell TableDiffCell
-
- var aCell *string
- // get the aCell value if the aRow exists
- if aRow != nil {
- if cell, err := getCell(*aRow, b2aColMap[bIndex]); err != nil {
- if err != ErrorUndefinedCell {
- return nil, err
- }
- } else {
- aCell = &cell
- diffTableCell.LeftCell = cell
- }
- } else {
- diffTableCell.Type = TableDiffCellAdd
- }
-
- var bCell *string
- // get the bCell value if the bRow exists
- if bRow != nil {
- if cell, err := getCell(*bRow, bIndex); err != nil {
- if err != ErrorUndefinedCell {
- return nil, err
- }
- } else {
- bCell = &cell
- diffTableCell.RightCell = cell
- }
- } else {
- diffTableCell.Type = TableDiffCellDel
- }
-
- // if both a and b have a row that exists, compare the value and determine if the row has moved
- if aCell != nil && bCell != nil {
- moved := ((bIndex + colsDeleted) != (b2aColMap[bIndex] + colsAdded))
- if *aCell != *bCell {
- if moved {
- diffTableCell.Type = TableDiffCellMovedChanged
- } else {
- diffTableCell.Type = TableDiffCellChanged
- }
- } else {
- if moved {
- diffTableCell.Type = TableDiffCellMovedUnchanged
- } else {
- diffTableCell.Type = TableDiffCellUnchanged
- }
- diffTableCell.LeftCell = ""
- }
- }
-
- // Add the diff column to the diff row
- diffTableCells[bIndex+colsDeleted] = &diffTableCell
- bIndex++
- }
- }
-
- return &TableDiffRow{RowIdx: bLineNum, Cells: diffTableCells}, nil
- }
-
- // diffTableSections are TableDiffSections which represent the diffTableSections we get when doing a diff, each will be its own table in the view
- var diffTableSections []*TableDiffSection
-
- for i, section := range diffFile.Sections {
- // Each section has multiple diffTableRows
- var diffTableRows []*TableDiffRow
- lines := tryMergeLines(section.Lines)
- // Loop through the merged lines to get each row of the CSV diff table for this section
- for j, line := range lines {
- if i == 0 && j == 0 && (line[0] != 1 || line[1] != 1) {
- diffTableRow, err := createDiffTableRow(1, 1)
- if err != nil {
- return nil, err
- }
- if diffTableRow != nil {
- diffTableRows = append(diffTableRows, diffTableRow)
- }
- }
- diffTableRow, err := createDiffTableRow(line[0], line[1])
- if err != nil {
- return nil, err
- }
- if diffTableRow != nil {
- diffTableRows = append(diffTableRows, diffTableRow)
- }
- }
-
- if len(diffTableRows) > 0 {
- diffTableSections = append(diffTableSections, &TableDiffSection{Rows: diffTableRows})
- }
- }
-
- return diffTableSections, nil
- }
-
- // getColumnMapping creates a mapping of columns between a and b
- func getColumnMapping(baseCSVReader, headCSVReader *csvReader) ([]int, []int) {
- baseRow, _ := baseCSVReader.GetRow(0)
- headRow, _ := headCSVReader.GetRow(0)
-
- base2HeadColMap := []int{}
- head2BaseColMap := []int{}
-
- if baseRow != nil {
- base2HeadColMap = make([]int, len(baseRow))
- }
- if headRow != nil {
- head2BaseColMap = make([]int, len(headRow))
- }
-
- // Initializes all head2base mappings to be unmappedColumn (-1)
- for i := 0; i < len(head2BaseColMap); i++ {
- head2BaseColMap[i] = unmappedColumn
- }
-
- // Loops through the baseRow and see if there is a match in the head row
- for i := 0; i < len(baseRow); i++ {
- base2HeadColMap[i] = unmappedColumn
- baseCell, err := getCell(baseRow, i)
- if err == nil {
- for j := 0; j < len(headRow); j++ {
- if head2BaseColMap[j] == -1 {
- headCell, err := getCell(headRow, j)
- if err == nil && baseCell == headCell {
- base2HeadColMap[i] = j
- head2BaseColMap[j] = i
- break
- }
- }
- }
- }
- }
-
- tryMapColumnsByContent(baseCSVReader, base2HeadColMap, headCSVReader, head2BaseColMap)
- tryMapColumnsByContent(headCSVReader, head2BaseColMap, baseCSVReader, base2HeadColMap)
-
- return base2HeadColMap, head2BaseColMap
- }
-
- // tryMapColumnsByContent tries to map missing columns by the content of the first lines.
- func tryMapColumnsByContent(baseCSVReader *csvReader, base2HeadColMap []int, headCSVReader *csvReader, head2BaseColMap []int) {
- for i := 0; i < len(base2HeadColMap); i++ {
- headStart := 0
- for base2HeadColMap[i] == unmappedColumn && headStart < len(head2BaseColMap) {
- if head2BaseColMap[headStart] == unmappedColumn {
- rows := util.Min(maxRowsToInspect, util.Max(0, util.Min(len(baseCSVReader.buffer), len(headCSVReader.buffer))-1))
- same := 0
- for j := 1; j <= rows; j++ {
- baseCell, baseErr := getCell(baseCSVReader.buffer[j], i)
- headCell, headErr := getCell(headCSVReader.buffer[j], headStart)
- if baseErr == nil && headErr == nil && baseCell == headCell {
- same++
- }
- }
- if (float32(same) / float32(rows)) > minRatioToMatch {
- base2HeadColMap[i] = headStart
- head2BaseColMap[headStart] = i
- }
- }
- headStart++
- }
- }
- }
-
- // getCell returns the specific cell or nil if not present.
- func getCell(row []string, column int) (string, error) {
- if column < len(row) {
- return row[column], nil
- }
- return "", ErrorUndefinedCell
- }
-
- // countUnmappedColumns returns the count of unmapped columns.
- func countUnmappedColumns(mapping []int) int {
- count := 0
- for i := 0; i < len(mapping); i++ {
- if mapping[i] == unmappedColumn {
- count++
- }
- }
- return count
- }
-
- // tryMergeLines maps the separated line numbers of a git diff. The result is assumed to be ordered.
- func tryMergeLines(lines []*DiffLine) [][2]int {
- ids := make([][2]int, len(lines))
-
- i := 0
- for _, line := range lines {
- if line.Type != DiffLineSection {
- ids[i][0] = line.LeftIdx
- ids[i][1] = line.RightIdx
- i++
- }
- }
-
- ids = ids[:i]
-
- result := make([][2]int, len(ids))
-
- j := 0
- for i = 0; i < len(ids); i++ {
- if ids[i][0] == 0 {
- if j > 0 && result[j-1][1] == 0 {
- temp := j
- for temp > 0 && result[temp-1][1] == 0 {
- temp--
- }
- result[temp][1] = ids[i][1]
- continue
- }
- }
- result[j] = ids[i]
- j++
- }
-
- return result[:j]
- }
|