From 0c6137617fbf41ee6cb315f96a2acc2dd91203e8 Mon Sep 17 00:00:00 2001 From: KN4CK3R Date: Mon, 29 Mar 2021 22:44:28 +0200 Subject: Add Tabular Diff for CSV files (#14661) Implements request #14320 The rendering of CSV files does match the diff style. * Moved CSV logic into base package. * Added method to create a tabular diff. * Added CSV compare context. * Added CSV diff template. * Use new table style in CSV markup. * Added file size limit for CSV rendering. * Display CSV parser errors in diff. * Lazy read single file. * Lazy read rows for full diff. * Added unit tests for various CSV changes. --- modules/markup/csv/csv.go | 103 ++++++++++++++++------------------------- modules/markup/csv/csv_test.go | 12 ++--- 2 files changed, 44 insertions(+), 71 deletions(-) (limited to 'modules/markup/csv') diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go index 1e3acc9b47..430e759eb5 100644 --- a/modules/markup/csv/csv.go +++ b/modules/markup/csv/csv.go @@ -6,24 +6,20 @@ package markup import ( "bytes" - "encoding/csv" "html" "io" - "regexp" - "strings" + "strconv" + "code.gitea.io/gitea/modules/csv" "code.gitea.io/gitea/modules/markup" - "code.gitea.io/gitea/modules/util" + "code.gitea.io/gitea/modules/setting" ) -var quoteRegexp = regexp.MustCompile(`["'][\s\S]+?["']`) - func init() { markup.RegisterParser(Parser{}) - } -// Parser implements markup.Parser for orgmode +// Parser implements markup.Parser for csv files type Parser struct { } @@ -38,11 +34,35 @@ func (Parser) Extensions() []string { } // Render implements markup.Parser -func (p Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]string, isWiki bool) []byte { - rd := csv.NewReader(bytes.NewReader(rawBytes)) - rd.Comma = p.bestDelimiter(rawBytes) +func (Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]string, isWiki bool) []byte { var tmpBlock bytes.Buffer - tmpBlock.WriteString(``) + + if setting.UI.CSV.MaxFileSize != 0 && setting.UI.CSV.MaxFileSize < int64(len(rawBytes)) { + tmpBlock.WriteString("
")
+		tmpBlock.WriteString(html.EscapeString(string(rawBytes)))
+		tmpBlock.WriteString("
") + return tmpBlock.Bytes() + } + + rd := csv.CreateReaderAndGuessDelimiter(rawBytes) + + writeField := func(element, class, field string) { + tmpBlock.WriteString("<") + tmpBlock.WriteString(element) + if len(class) > 0 { + tmpBlock.WriteString(" class=\"") + tmpBlock.WriteString(class) + tmpBlock.WriteString("\"") + } + tmpBlock.WriteString(">") + tmpBlock.WriteString(html.EscapeString(field)) + tmpBlock.WriteString("") + } + + tmpBlock.WriteString(`
`) + row := 1 for { fields, err := rd.Read() if err == io.EOF { @@ -52,62 +72,19 @@ func (p Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]strin continue } tmpBlock.WriteString("") + element := "td" + if row == 1 { + element = "th" + } + writeField(element, "line-num", strconv.Itoa(row)) for _, field := range fields { - tmpBlock.WriteString("") + writeField(element, "", field) } tmpBlock.WriteString("") + + row++ } tmpBlock.WriteString("
") - tmpBlock.WriteString(html.EscapeString(field)) - tmpBlock.WriteString("
") return tmpBlock.Bytes() } - -// bestDelimiter scores the input CSV data against delimiters, and returns the best match. -// Reads at most 10k bytes & 10 lines. -func (p Parser) bestDelimiter(data []byte) rune { - maxLines := 10 - maxBytes := util.Min(len(data), 1e4) - text := string(data[:maxBytes]) - text = quoteRegexp.ReplaceAllLiteralString(text, "") - lines := strings.SplitN(text, "\n", maxLines+1) - lines = lines[:util.Min(maxLines, len(lines))] - - delimiters := []rune{',', ';', '\t', '|'} - bestDelim := delimiters[0] - bestScore := 0.0 - for _, delim := range delimiters { - score := p.scoreDelimiter(lines, delim) - if score > bestScore { - bestScore = score - bestDelim = delim - } - } - - return bestDelim -} - -// scoreDelimiter uses a count & regularity metric to evaluate a delimiter against lines of CSV -func (Parser) scoreDelimiter(lines []string, delim rune) (score float64) { - countTotal := 0 - countLineMax := 0 - linesNotEqual := 0 - - for _, line := range lines { - if len(line) == 0 { - continue - } - - countLine := strings.Count(line, string(delim)) - countTotal += countLine - if countLine != countLineMax { - if countLineMax != 0 { - linesNotEqual++ - } - countLineMax = util.Max(countLine, countLineMax) - } - } - - return float64(countTotal) * (1 - float64(linesNotEqual)/float64(len(lines))) -} diff --git a/modules/markup/csv/csv_test.go b/modules/markup/csv/csv_test.go index 4d4e0871e9..5438ebdf5c 100644 --- a/modules/markup/csv/csv_test.go +++ b/modules/markup/csv/csv_test.go @@ -13,14 +13,10 @@ import ( func TestRenderCSV(t *testing.T) { var parser Parser var kases = map[string]string{ - "a": "
a
", - "1,2": "
12
", - "1;2": "
12
", - "1\t2": "
12
", - "1|2": "
12
", - "1,2,3;4,5,6;7,8,9\na;b;c": "
1,2,34,5,67,8,9
abc
", - "\"1,2,3,4\";\"a\nb\"\nc;d": "
1,2,3,4a\nb
cd
", - "
": "
<br/>
", + "a": "
1a
", + "1,2": "
112
", + "1;2\n3;4": "
112
234
", + "
": "
1<br/>
", } for k, v := range kases { -- cgit v1.2.3