diff options
author | KN4CK3R <KN4CK3R@users.noreply.github.com> | 2021-03-29 22:44:28 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-03-29 22:44:28 +0200 |
commit | 0c6137617fbf41ee6cb315f96a2acc2dd91203e8 (patch) | |
tree | 27c8d1304334f1783232166927093419079ecd2a /services | |
parent | d3b8127ad372bbce8d891d8893ffe6e834590751 (diff) | |
download | gitea-0c6137617fbf41ee6cb315f96a2acc2dd91203e8.tar.gz gitea-0c6137617fbf41ee6cb315f96a2acc2dd91203e8.zip |
Add Tabular Diff for CSV files (#14661)
Implements request #14320 The rendering of CSV files does match the diff style.
* Moved CSV logic into base package.
* Added method to create a tabular diff.
* Added CSV compare context.
* Added CSV diff template.
* Use new table style in CSV markup.
* Added file size limit for CSV rendering.
* Display CSV parser errors in diff.
* Lazy read single file.
* Lazy read rows for full diff.
* Added unit tests for various CSV changes.
Diffstat (limited to 'services')
-rw-r--r-- | services/gitdiff/csv.go | 379 | ||||
-rw-r--r-- | services/gitdiff/csv_test.go | 119 |
2 files changed, 498 insertions, 0 deletions
diff --git a/services/gitdiff/csv.go b/services/gitdiff/csv.go new file mode 100644 index 0000000000..f4310d8772 --- /dev/null +++ b/services/gitdiff/csv.go @@ -0,0 +1,379 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package gitdiff + +import ( + "encoding/csv" + "errors" + "io" + + "code.gitea.io/gitea/modules/util" +) + +const unmappedColumn = -1 +const maxRowsToInspect int = 10 +const minRatioToMatch float32 = 0.8 + +// TableDiffCellType represents the type of a TableDiffCell. +type TableDiffCellType uint8 + +// TableDiffCellType possible values. +const ( + TableDiffCellEqual TableDiffCellType = iota + 1 + TableDiffCellChanged + TableDiffCellAdd + TableDiffCellDel +) + +// TableDiffCell represents a cell of a TableDiffRow +type TableDiffCell struct { + LeftCell string + RightCell string + Type TableDiffCellType +} + +// TableDiffRow represents a row of a TableDiffSection. +type TableDiffRow struct { + RowIdx int + Cells []*TableDiffCell +} + +// TableDiffSection represents a section of a DiffFile. +type TableDiffSection struct { + Rows []*TableDiffRow +} + +// csvReader wraps a csv.Reader which buffers the first rows. +type csvReader struct { + reader *csv.Reader + buffer [][]string + line int + eof bool +} + +// createCsvReader creates a csvReader and fills the buffer +func createCsvReader(reader *csv.Reader, bufferRowCount int) (*csvReader, error) { + csv := &csvReader{reader: reader} + csv.buffer = make([][]string, bufferRowCount) + for i := 0; i < bufferRowCount && !csv.eof; i++ { + row, err := csv.readNextRow() + if err != nil { + return nil, err + } + csv.buffer[i] = row + } + csv.line = bufferRowCount + return csv, nil +} + +// GetRow gets a row from the buffer if present or advances the reader to the requested row. On the end of the file only nil gets returned. +func (csv *csvReader) GetRow(row int) ([]string, error) { + if row < len(csv.buffer) { + return csv.buffer[row], nil + } + if csv.eof { + return nil, nil + } + for { + fields, err := csv.readNextRow() + if err != nil { + return nil, err + } + if csv.eof { + return nil, nil + } + csv.line++ + if csv.line-1 == row { + return fields, nil + } + } +} + +func (csv *csvReader) readNextRow() ([]string, error) { + if csv.eof { + return nil, nil + } + row, err := csv.reader.Read() + if err != nil { + if err != io.EOF { + return nil, err + } + csv.eof = true + } + return row, nil +} + +// CreateCsvDiff creates a tabular diff based on two CSV readers. +func CreateCsvDiff(diffFile *DiffFile, baseReader *csv.Reader, headReader *csv.Reader) ([]*TableDiffSection, error) { + if baseReader != nil && headReader != nil { + return createCsvDiff(diffFile, baseReader, headReader) + } + + if baseReader != nil { + return createCsvDiffSingle(baseReader, TableDiffCellDel) + } + return createCsvDiffSingle(headReader, TableDiffCellAdd) +} + +// createCsvDiffSingle creates a tabular diff based on a single CSV reader. All cells are added or deleted. +func createCsvDiffSingle(reader *csv.Reader, celltype TableDiffCellType) ([]*TableDiffSection, error) { + var rows []*TableDiffRow + i := 1 + for { + row, err := reader.Read() + if err != nil { + if err == io.EOF { + break + } + return nil, err + } + cells := make([]*TableDiffCell, len(row)) + for j := 0; j < len(row); j++ { + cells[j] = &TableDiffCell{LeftCell: row[j], Type: celltype} + } + rows = append(rows, &TableDiffRow{RowIdx: i, Cells: cells}) + i++ + } + + return []*TableDiffSection{{Rows: rows}}, nil +} + +func createCsvDiff(diffFile *DiffFile, baseReader *csv.Reader, headReader *csv.Reader) ([]*TableDiffSection, error) { + a, err := createCsvReader(baseReader, maxRowsToInspect) + if err != nil { + return nil, err + } + + b, err := createCsvReader(headReader, maxRowsToInspect) + if err != nil { + return nil, err + } + + a2b, b2a := getColumnMapping(a, b) + + columns := len(a2b) + countUnmappedColumns(b2a) + if len(a2b) < len(b2a) { + columns = len(b2a) + countUnmappedColumns(a2b) + } + + createDiffRow := func(aline int, bline int) (*TableDiffRow, error) { + cells := make([]*TableDiffCell, columns) + + if aline == 0 || bline == 0 { + var ( + row []string + celltype TableDiffCellType + err error + ) + if bline == 0 { + row, err = a.GetRow(aline - 1) + celltype = TableDiffCellDel + } else { + row, err = b.GetRow(bline - 1) + celltype = TableDiffCellAdd + } + if err != nil { + return nil, err + } + if row == nil { + return nil, nil + } + for i := 0; i < len(row); i++ { + cells[i] = &TableDiffCell{LeftCell: row[i], Type: celltype} + } + return &TableDiffRow{RowIdx: bline, Cells: cells}, nil + } + + arow, err := a.GetRow(aline - 1) + if err != nil { + return nil, err + } + brow, err := b.GetRow(bline - 1) + if err != nil { + return nil, err + } + if len(arow) == 0 && len(brow) == 0 { + return nil, nil + } + + for i := 0; i < len(a2b); i++ { + acell, _ := getCell(arow, i) + if a2b[i] == unmappedColumn { + cells[i] = &TableDiffCell{LeftCell: acell, Type: TableDiffCellDel} + } else { + bcell, _ := getCell(brow, a2b[i]) + + celltype := TableDiffCellChanged + if acell == bcell { + celltype = TableDiffCellEqual + } + + cells[i] = &TableDiffCell{LeftCell: acell, RightCell: bcell, Type: celltype} + } + } + for i := 0; i < len(b2a); i++ { + if b2a[i] == unmappedColumn { + bcell, _ := getCell(brow, i) + cells[i] = &TableDiffCell{LeftCell: bcell, Type: TableDiffCellAdd} + } + } + + return &TableDiffRow{RowIdx: bline, Cells: cells}, nil + } + + var sections []*TableDiffSection + + for i, section := range diffFile.Sections { + var rows []*TableDiffRow + lines := tryMergeLines(section.Lines) + for j, line := range lines { + if i == 0 && j == 0 && (line[0] != 1 || line[1] != 1) { + diffRow, err := createDiffRow(1, 1) + if err != nil { + return nil, err + } + if diffRow != nil { + rows = append(rows, diffRow) + } + } + diffRow, err := createDiffRow(line[0], line[1]) + if err != nil { + return nil, err + } + if diffRow != nil { + rows = append(rows, diffRow) + } + } + + if len(rows) > 0 { + sections = append(sections, &TableDiffSection{Rows: rows}) + } + } + + return sections, nil +} + +// getColumnMapping creates a mapping of columns between a and b +func getColumnMapping(a *csvReader, b *csvReader) ([]int, []int) { + arow, _ := a.GetRow(0) + brow, _ := b.GetRow(0) + + a2b := []int{} + b2a := []int{} + + if arow != nil { + a2b = make([]int, len(arow)) + } + if brow != nil { + b2a = make([]int, len(brow)) + } + + for i := 0; i < len(b2a); i++ { + b2a[i] = unmappedColumn + } + + bcol := 0 + for i := 0; i < len(a2b); i++ { + a2b[i] = unmappedColumn + + acell, ea := getCell(arow, i) + if ea == nil { + for j := bcol; j < len(b2a); j++ { + bcell, eb := getCell(brow, j) + if eb == nil && acell == bcell { + a2b[i] = j + b2a[j] = i + bcol = j + 1 + break + } + } + } + } + + tryMapColumnsByContent(a, a2b, b, b2a) + tryMapColumnsByContent(b, b2a, a, a2b) + + return a2b, b2a +} + +// tryMapColumnsByContent tries to map missing columns by the content of the first lines. +func tryMapColumnsByContent(a *csvReader, a2b []int, b *csvReader, b2a []int) { + start := 0 + for i := 0; i < len(a2b); i++ { + if a2b[i] == unmappedColumn { + if b2a[start] == unmappedColumn { + rows := util.Min(maxRowsToInspect, util.Max(0, util.Min(len(a.buffer), len(b.buffer))-1)) + same := 0 + for j := 1; j <= rows; j++ { + acell, ea := getCell(a.buffer[j], i) + bcell, eb := getCell(b.buffer[j], start+1) + if ea == nil && eb == nil && acell == bcell { + same++ + } + } + if (float32(same) / float32(rows)) > minRatioToMatch { + a2b[i] = start + 1 + b2a[start+1] = i + } + } + } + start = a2b[i] + } +} + +// getCell returns the specific cell or nil if not present. +func getCell(row []string, column int) (string, error) { + if column < len(row) { + return row[column], nil + } + return "", errors.New("Undefined column") +} + +// countUnmappedColumns returns the count of unmapped columns. +func countUnmappedColumns(mapping []int) int { + count := 0 + for i := 0; i < len(mapping); i++ { + if mapping[i] == unmappedColumn { + count++ + } + } + return count +} + +// tryMergeLines maps the separated line numbers of a git diff. The result is assumed to be ordered. +func tryMergeLines(lines []*DiffLine) [][2]int { + ids := make([][2]int, len(lines)) + + i := 0 + for _, line := range lines { + if line.Type != DiffLineSection { + ids[i][0] = line.LeftIdx + ids[i][1] = line.RightIdx + i++ + } + } + + ids = ids[:i] + + result := make([][2]int, len(ids)) + + j := 0 + for i = 0; i < len(ids); i++ { + if ids[i][0] == 0 { + if j > 0 && result[j-1][1] == 0 { + temp := j + for temp > 0 && result[temp-1][1] == 0 { + temp-- + } + result[temp][1] = ids[i][1] + continue + } + } + result[j] = ids[i] + j++ + } + + return result[:j] +} diff --git a/services/gitdiff/csv_test.go b/services/gitdiff/csv_test.go new file mode 100644 index 0000000000..17edea582c --- /dev/null +++ b/services/gitdiff/csv_test.go @@ -0,0 +1,119 @@ +// Copyright 2021 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package gitdiff + +import ( + "encoding/csv" + "strings" + "testing" + + csv_module "code.gitea.io/gitea/modules/csv" + "code.gitea.io/gitea/modules/setting" + "github.com/stretchr/testify/assert" +) + +func TestCSVDiff(t *testing.T) { + var cases = []struct { + diff string + base string + head string + cells [][2]TableDiffCellType + }{ + // case 0 + { + diff: `diff --git a/unittest.csv b/unittest.csv +--- a/unittest.csv ++++ b/unittest.csv +@@ -0,0 +1,2 @@ ++col1,col2 ++a,a`, + base: "", + head: "col1,col2\na,a", + cells: [][2]TableDiffCellType{{TableDiffCellAdd, TableDiffCellAdd}, {TableDiffCellAdd, TableDiffCellAdd}}, + }, + // case 1 + { + diff: `diff --git a/unittest.csv b/unittest.csv +--- a/unittest.csv ++++ b/unittest.csv +@@ -1,2 +1,3 @@ + col1,col2 +-a,a ++a,a ++b,b`, + base: "col1,col2\na,a", + head: "col1,col2\na,a\nb,b", + cells: [][2]TableDiffCellType{{TableDiffCellEqual, TableDiffCellEqual}, {TableDiffCellEqual, TableDiffCellEqual}, {TableDiffCellAdd, TableDiffCellAdd}}, + }, + // case 2 + { + diff: `diff --git a/unittest.csv b/unittest.csv +--- a/unittest.csv ++++ b/unittest.csv +@@ -1,3 +1,2 @@ + col1,col2 +-a,a + b,b`, + base: "col1,col2\na,a\nb,b", + head: "col1,col2\nb,b", + cells: [][2]TableDiffCellType{{TableDiffCellEqual, TableDiffCellEqual}, {TableDiffCellDel, TableDiffCellDel}, {TableDiffCellEqual, TableDiffCellEqual}}, + }, + // case 3 + { + diff: `diff --git a/unittest.csv b/unittest.csv +--- a/unittest.csv ++++ b/unittest.csv +@@ -1,2 +1,2 @@ + col1,col2 +-b,b ++b,c`, + base: "col1,col2\nb,b", + head: "col1,col2\nb,c", + cells: [][2]TableDiffCellType{{TableDiffCellEqual, TableDiffCellEqual}, {TableDiffCellEqual, TableDiffCellChanged}}, + }, + // case 4 + { + diff: `diff --git a/unittest.csv b/unittest.csv +--- a/unittest.csv ++++ b/unittest.csv +@@ -1,2 +0,0 @@ +-col1,col2 +-b,c`, + base: "col1,col2\nb,c", + head: "", + cells: [][2]TableDiffCellType{{TableDiffCellDel, TableDiffCellDel}, {TableDiffCellDel, TableDiffCellDel}}, + }, + } + + for n, c := range cases { + diff, err := ParsePatch(setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(c.diff)) + if err != nil { + t.Errorf("ParsePatch failed: %s", err) + } + + var baseReader *csv.Reader + if len(c.base) > 0 { + baseReader = csv_module.CreateReaderAndGuessDelimiter([]byte(c.base)) + } + var headReader *csv.Reader + if len(c.head) > 0 { + headReader = csv_module.CreateReaderAndGuessDelimiter([]byte(c.head)) + } + + result, err := CreateCsvDiff(diff.Files[0], baseReader, headReader) + assert.NoError(t, err) + assert.Equal(t, 1, len(result), "case %d: should be one section", n) + + section := result[0] + assert.Equal(t, len(c.cells), len(section.Rows), "case %d: should be %d rows", n, len(c.cells)) + + for i, row := range section.Rows { + assert.Equal(t, 2, len(row.Cells), "case %d: row %d should have two cells", n, i) + for j, cell := range row.Cells { + assert.Equal(t, c.cells[i][j], cell.Type, "case %d: row %d cell %d should be equal", n, i, j) + } + } + } +} |