summaryrefslogtreecommitdiffstats
path: root/services
diff options
context:
space:
mode:
authorKN4CK3R <KN4CK3R@users.noreply.github.com>2021-03-29 22:44:28 +0200
committerGitHub <noreply@github.com>2021-03-29 22:44:28 +0200
commit0c6137617fbf41ee6cb315f96a2acc2dd91203e8 (patch)
tree27c8d1304334f1783232166927093419079ecd2a /services
parentd3b8127ad372bbce8d891d8893ffe6e834590751 (diff)
downloadgitea-0c6137617fbf41ee6cb315f96a2acc2dd91203e8.tar.gz
gitea-0c6137617fbf41ee6cb315f96a2acc2dd91203e8.zip
Add Tabular Diff for CSV files (#14661)
Implements request #14320 The rendering of CSV files does match the diff style. * Moved CSV logic into base package. * Added method to create a tabular diff. * Added CSV compare context. * Added CSV diff template. * Use new table style in CSV markup. * Added file size limit for CSV rendering. * Display CSV parser errors in diff. * Lazy read single file. * Lazy read rows for full diff. * Added unit tests for various CSV changes.
Diffstat (limited to 'services')
-rw-r--r--services/gitdiff/csv.go379
-rw-r--r--services/gitdiff/csv_test.go119
2 files changed, 498 insertions, 0 deletions
diff --git a/services/gitdiff/csv.go b/services/gitdiff/csv.go
new file mode 100644
index 0000000000..f4310d8772
--- /dev/null
+++ b/services/gitdiff/csv.go
@@ -0,0 +1,379 @@
+// Copyright 2021 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package gitdiff
+
+import (
+ "encoding/csv"
+ "errors"
+ "io"
+
+ "code.gitea.io/gitea/modules/util"
+)
+
+const unmappedColumn = -1
+const maxRowsToInspect int = 10
+const minRatioToMatch float32 = 0.8
+
+// TableDiffCellType represents the type of a TableDiffCell.
+type TableDiffCellType uint8
+
+// TableDiffCellType possible values.
+const (
+ TableDiffCellEqual TableDiffCellType = iota + 1
+ TableDiffCellChanged
+ TableDiffCellAdd
+ TableDiffCellDel
+)
+
+// TableDiffCell represents a cell of a TableDiffRow
+type TableDiffCell struct {
+ LeftCell string
+ RightCell string
+ Type TableDiffCellType
+}
+
+// TableDiffRow represents a row of a TableDiffSection.
+type TableDiffRow struct {
+ RowIdx int
+ Cells []*TableDiffCell
+}
+
+// TableDiffSection represents a section of a DiffFile.
+type TableDiffSection struct {
+ Rows []*TableDiffRow
+}
+
+// csvReader wraps a csv.Reader which buffers the first rows.
+type csvReader struct {
+ reader *csv.Reader
+ buffer [][]string
+ line int
+ eof bool
+}
+
+// createCsvReader creates a csvReader and fills the buffer
+func createCsvReader(reader *csv.Reader, bufferRowCount int) (*csvReader, error) {
+ csv := &csvReader{reader: reader}
+ csv.buffer = make([][]string, bufferRowCount)
+ for i := 0; i < bufferRowCount && !csv.eof; i++ {
+ row, err := csv.readNextRow()
+ if err != nil {
+ return nil, err
+ }
+ csv.buffer[i] = row
+ }
+ csv.line = bufferRowCount
+ return csv, nil
+}
+
+// GetRow gets a row from the buffer if present or advances the reader to the requested row. On the end of the file only nil gets returned.
+func (csv *csvReader) GetRow(row int) ([]string, error) {
+ if row < len(csv.buffer) {
+ return csv.buffer[row], nil
+ }
+ if csv.eof {
+ return nil, nil
+ }
+ for {
+ fields, err := csv.readNextRow()
+ if err != nil {
+ return nil, err
+ }
+ if csv.eof {
+ return nil, nil
+ }
+ csv.line++
+ if csv.line-1 == row {
+ return fields, nil
+ }
+ }
+}
+
+func (csv *csvReader) readNextRow() ([]string, error) {
+ if csv.eof {
+ return nil, nil
+ }
+ row, err := csv.reader.Read()
+ if err != nil {
+ if err != io.EOF {
+ return nil, err
+ }
+ csv.eof = true
+ }
+ return row, nil
+}
+
+// CreateCsvDiff creates a tabular diff based on two CSV readers.
+func CreateCsvDiff(diffFile *DiffFile, baseReader *csv.Reader, headReader *csv.Reader) ([]*TableDiffSection, error) {
+ if baseReader != nil && headReader != nil {
+ return createCsvDiff(diffFile, baseReader, headReader)
+ }
+
+ if baseReader != nil {
+ return createCsvDiffSingle(baseReader, TableDiffCellDel)
+ }
+ return createCsvDiffSingle(headReader, TableDiffCellAdd)
+}
+
+// createCsvDiffSingle creates a tabular diff based on a single CSV reader. All cells are added or deleted.
+func createCsvDiffSingle(reader *csv.Reader, celltype TableDiffCellType) ([]*TableDiffSection, error) {
+ var rows []*TableDiffRow
+ i := 1
+ for {
+ row, err := reader.Read()
+ if err != nil {
+ if err == io.EOF {
+ break
+ }
+ return nil, err
+ }
+ cells := make([]*TableDiffCell, len(row))
+ for j := 0; j < len(row); j++ {
+ cells[j] = &TableDiffCell{LeftCell: row[j], Type: celltype}
+ }
+ rows = append(rows, &TableDiffRow{RowIdx: i, Cells: cells})
+ i++
+ }
+
+ return []*TableDiffSection{{Rows: rows}}, nil
+}
+
+func createCsvDiff(diffFile *DiffFile, baseReader *csv.Reader, headReader *csv.Reader) ([]*TableDiffSection, error) {
+ a, err := createCsvReader(baseReader, maxRowsToInspect)
+ if err != nil {
+ return nil, err
+ }
+
+ b, err := createCsvReader(headReader, maxRowsToInspect)
+ if err != nil {
+ return nil, err
+ }
+
+ a2b, b2a := getColumnMapping(a, b)
+
+ columns := len(a2b) + countUnmappedColumns(b2a)
+ if len(a2b) < len(b2a) {
+ columns = len(b2a) + countUnmappedColumns(a2b)
+ }
+
+ createDiffRow := func(aline int, bline int) (*TableDiffRow, error) {
+ cells := make([]*TableDiffCell, columns)
+
+ if aline == 0 || bline == 0 {
+ var (
+ row []string
+ celltype TableDiffCellType
+ err error
+ )
+ if bline == 0 {
+ row, err = a.GetRow(aline - 1)
+ celltype = TableDiffCellDel
+ } else {
+ row, err = b.GetRow(bline - 1)
+ celltype = TableDiffCellAdd
+ }
+ if err != nil {
+ return nil, err
+ }
+ if row == nil {
+ return nil, nil
+ }
+ for i := 0; i < len(row); i++ {
+ cells[i] = &TableDiffCell{LeftCell: row[i], Type: celltype}
+ }
+ return &TableDiffRow{RowIdx: bline, Cells: cells}, nil
+ }
+
+ arow, err := a.GetRow(aline - 1)
+ if err != nil {
+ return nil, err
+ }
+ brow, err := b.GetRow(bline - 1)
+ if err != nil {
+ return nil, err
+ }
+ if len(arow) == 0 && len(brow) == 0 {
+ return nil, nil
+ }
+
+ for i := 0; i < len(a2b); i++ {
+ acell, _ := getCell(arow, i)
+ if a2b[i] == unmappedColumn {
+ cells[i] = &TableDiffCell{LeftCell: acell, Type: TableDiffCellDel}
+ } else {
+ bcell, _ := getCell(brow, a2b[i])
+
+ celltype := TableDiffCellChanged
+ if acell == bcell {
+ celltype = TableDiffCellEqual
+ }
+
+ cells[i] = &TableDiffCell{LeftCell: acell, RightCell: bcell, Type: celltype}
+ }
+ }
+ for i := 0; i < len(b2a); i++ {
+ if b2a[i] == unmappedColumn {
+ bcell, _ := getCell(brow, i)
+ cells[i] = &TableDiffCell{LeftCell: bcell, Type: TableDiffCellAdd}
+ }
+ }
+
+ return &TableDiffRow{RowIdx: bline, Cells: cells}, nil
+ }
+
+ var sections []*TableDiffSection
+
+ for i, section := range diffFile.Sections {
+ var rows []*TableDiffRow
+ lines := tryMergeLines(section.Lines)
+ for j, line := range lines {
+ if i == 0 && j == 0 && (line[0] != 1 || line[1] != 1) {
+ diffRow, err := createDiffRow(1, 1)
+ if err != nil {
+ return nil, err
+ }
+ if diffRow != nil {
+ rows = append(rows, diffRow)
+ }
+ }
+ diffRow, err := createDiffRow(line[0], line[1])
+ if err != nil {
+ return nil, err
+ }
+ if diffRow != nil {
+ rows = append(rows, diffRow)
+ }
+ }
+
+ if len(rows) > 0 {
+ sections = append(sections, &TableDiffSection{Rows: rows})
+ }
+ }
+
+ return sections, nil
+}
+
+// getColumnMapping creates a mapping of columns between a and b
+func getColumnMapping(a *csvReader, b *csvReader) ([]int, []int) {
+ arow, _ := a.GetRow(0)
+ brow, _ := b.GetRow(0)
+
+ a2b := []int{}
+ b2a := []int{}
+
+ if arow != nil {
+ a2b = make([]int, len(arow))
+ }
+ if brow != nil {
+ b2a = make([]int, len(brow))
+ }
+
+ for i := 0; i < len(b2a); i++ {
+ b2a[i] = unmappedColumn
+ }
+
+ bcol := 0
+ for i := 0; i < len(a2b); i++ {
+ a2b[i] = unmappedColumn
+
+ acell, ea := getCell(arow, i)
+ if ea == nil {
+ for j := bcol; j < len(b2a); j++ {
+ bcell, eb := getCell(brow, j)
+ if eb == nil && acell == bcell {
+ a2b[i] = j
+ b2a[j] = i
+ bcol = j + 1
+ break
+ }
+ }
+ }
+ }
+
+ tryMapColumnsByContent(a, a2b, b, b2a)
+ tryMapColumnsByContent(b, b2a, a, a2b)
+
+ return a2b, b2a
+}
+
+// tryMapColumnsByContent tries to map missing columns by the content of the first lines.
+func tryMapColumnsByContent(a *csvReader, a2b []int, b *csvReader, b2a []int) {
+ start := 0
+ for i := 0; i < len(a2b); i++ {
+ if a2b[i] == unmappedColumn {
+ if b2a[start] == unmappedColumn {
+ rows := util.Min(maxRowsToInspect, util.Max(0, util.Min(len(a.buffer), len(b.buffer))-1))
+ same := 0
+ for j := 1; j <= rows; j++ {
+ acell, ea := getCell(a.buffer[j], i)
+ bcell, eb := getCell(b.buffer[j], start+1)
+ if ea == nil && eb == nil && acell == bcell {
+ same++
+ }
+ }
+ if (float32(same) / float32(rows)) > minRatioToMatch {
+ a2b[i] = start + 1
+ b2a[start+1] = i
+ }
+ }
+ }
+ start = a2b[i]
+ }
+}
+
+// getCell returns the specific cell or nil if not present.
+func getCell(row []string, column int) (string, error) {
+ if column < len(row) {
+ return row[column], nil
+ }
+ return "", errors.New("Undefined column")
+}
+
+// countUnmappedColumns returns the count of unmapped columns.
+func countUnmappedColumns(mapping []int) int {
+ count := 0
+ for i := 0; i < len(mapping); i++ {
+ if mapping[i] == unmappedColumn {
+ count++
+ }
+ }
+ return count
+}
+
+// tryMergeLines maps the separated line numbers of a git diff. The result is assumed to be ordered.
+func tryMergeLines(lines []*DiffLine) [][2]int {
+ ids := make([][2]int, len(lines))
+
+ i := 0
+ for _, line := range lines {
+ if line.Type != DiffLineSection {
+ ids[i][0] = line.LeftIdx
+ ids[i][1] = line.RightIdx
+ i++
+ }
+ }
+
+ ids = ids[:i]
+
+ result := make([][2]int, len(ids))
+
+ j := 0
+ for i = 0; i < len(ids); i++ {
+ if ids[i][0] == 0 {
+ if j > 0 && result[j-1][1] == 0 {
+ temp := j
+ for temp > 0 && result[temp-1][1] == 0 {
+ temp--
+ }
+ result[temp][1] = ids[i][1]
+ continue
+ }
+ }
+ result[j] = ids[i]
+ j++
+ }
+
+ return result[:j]
+}
diff --git a/services/gitdiff/csv_test.go b/services/gitdiff/csv_test.go
new file mode 100644
index 0000000000..17edea582c
--- /dev/null
+++ b/services/gitdiff/csv_test.go
@@ -0,0 +1,119 @@
+// Copyright 2021 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package gitdiff
+
+import (
+ "encoding/csv"
+ "strings"
+ "testing"
+
+ csv_module "code.gitea.io/gitea/modules/csv"
+ "code.gitea.io/gitea/modules/setting"
+ "github.com/stretchr/testify/assert"
+)
+
+func TestCSVDiff(t *testing.T) {
+ var cases = []struct {
+ diff string
+ base string
+ head string
+ cells [][2]TableDiffCellType
+ }{
+ // case 0
+ {
+ diff: `diff --git a/unittest.csv b/unittest.csv
+--- a/unittest.csv
++++ b/unittest.csv
+@@ -0,0 +1,2 @@
++col1,col2
++a,a`,
+ base: "",
+ head: "col1,col2\na,a",
+ cells: [][2]TableDiffCellType{{TableDiffCellAdd, TableDiffCellAdd}, {TableDiffCellAdd, TableDiffCellAdd}},
+ },
+ // case 1
+ {
+ diff: `diff --git a/unittest.csv b/unittest.csv
+--- a/unittest.csv
++++ b/unittest.csv
+@@ -1,2 +1,3 @@
+ col1,col2
+-a,a
++a,a
++b,b`,
+ base: "col1,col2\na,a",
+ head: "col1,col2\na,a\nb,b",
+ cells: [][2]TableDiffCellType{{TableDiffCellEqual, TableDiffCellEqual}, {TableDiffCellEqual, TableDiffCellEqual}, {TableDiffCellAdd, TableDiffCellAdd}},
+ },
+ // case 2
+ {
+ diff: `diff --git a/unittest.csv b/unittest.csv
+--- a/unittest.csv
++++ b/unittest.csv
+@@ -1,3 +1,2 @@
+ col1,col2
+-a,a
+ b,b`,
+ base: "col1,col2\na,a\nb,b",
+ head: "col1,col2\nb,b",
+ cells: [][2]TableDiffCellType{{TableDiffCellEqual, TableDiffCellEqual}, {TableDiffCellDel, TableDiffCellDel}, {TableDiffCellEqual, TableDiffCellEqual}},
+ },
+ // case 3
+ {
+ diff: `diff --git a/unittest.csv b/unittest.csv
+--- a/unittest.csv
++++ b/unittest.csv
+@@ -1,2 +1,2 @@
+ col1,col2
+-b,b
++b,c`,
+ base: "col1,col2\nb,b",
+ head: "col1,col2\nb,c",
+ cells: [][2]TableDiffCellType{{TableDiffCellEqual, TableDiffCellEqual}, {TableDiffCellEqual, TableDiffCellChanged}},
+ },
+ // case 4
+ {
+ diff: `diff --git a/unittest.csv b/unittest.csv
+--- a/unittest.csv
++++ b/unittest.csv
+@@ -1,2 +0,0 @@
+-col1,col2
+-b,c`,
+ base: "col1,col2\nb,c",
+ head: "",
+ cells: [][2]TableDiffCellType{{TableDiffCellDel, TableDiffCellDel}, {TableDiffCellDel, TableDiffCellDel}},
+ },
+ }
+
+ for n, c := range cases {
+ diff, err := ParsePatch(setting.Git.MaxGitDiffLines, setting.Git.MaxGitDiffLineCharacters, setting.Git.MaxGitDiffFiles, strings.NewReader(c.diff))
+ if err != nil {
+ t.Errorf("ParsePatch failed: %s", err)
+ }
+
+ var baseReader *csv.Reader
+ if len(c.base) > 0 {
+ baseReader = csv_module.CreateReaderAndGuessDelimiter([]byte(c.base))
+ }
+ var headReader *csv.Reader
+ if len(c.head) > 0 {
+ headReader = csv_module.CreateReaderAndGuessDelimiter([]byte(c.head))
+ }
+
+ result, err := CreateCsvDiff(diff.Files[0], baseReader, headReader)
+ assert.NoError(t, err)
+ assert.Equal(t, 1, len(result), "case %d: should be one section", n)
+
+ section := result[0]
+ assert.Equal(t, len(c.cells), len(section.Rows), "case %d: should be %d rows", n, len(c.cells))
+
+ for i, row := range section.Rows {
+ assert.Equal(t, 2, len(row.Cells), "case %d: row %d should have two cells", n, i)
+ for j, cell := range row.Cells {
+ assert.Equal(t, c.cells[i][j], cell.Type, "case %d: row %d cell %d should be equal", n, i, j)
+ }
+ }
+ }
+}