aboutsummaryrefslogtreecommitdiffstats
path: root/modules/markup
diff options
context:
space:
mode:
authorKN4CK3R <KN4CK3R@users.noreply.github.com>2021-03-29 22:44:28 +0200
committerGitHub <noreply@github.com>2021-03-29 22:44:28 +0200
commit0c6137617fbf41ee6cb315f96a2acc2dd91203e8 (patch)
tree27c8d1304334f1783232166927093419079ecd2a /modules/markup
parentd3b8127ad372bbce8d891d8893ffe6e834590751 (diff)
downloadgitea-0c6137617fbf41ee6cb315f96a2acc2dd91203e8.tar.gz
gitea-0c6137617fbf41ee6cb315f96a2acc2dd91203e8.zip
Add Tabular Diff for CSV files (#14661)
Implements request #14320 The rendering of CSV files does match the diff style. * Moved CSV logic into base package. * Added method to create a tabular diff. * Added CSV compare context. * Added CSV diff template. * Use new table style in CSV markup. * Added file size limit for CSV rendering. * Display CSV parser errors in diff. * Lazy read single file. * Lazy read rows for full diff. * Added unit tests for various CSV changes.
Diffstat (limited to 'modules/markup')
-rw-r--r--modules/markup/csv/csv.go103
-rw-r--r--modules/markup/csv/csv_test.go12
-rw-r--r--modules/markup/sanitizer.go4
3 files changed, 48 insertions, 71 deletions
diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go
index 1e3acc9b47..430e759eb5 100644
--- a/modules/markup/csv/csv.go
+++ b/modules/markup/csv/csv.go
@@ -6,24 +6,20 @@ package markup
import (
"bytes"
- "encoding/csv"
"html"
"io"
- "regexp"
- "strings"
+ "strconv"
+ "code.gitea.io/gitea/modules/csv"
"code.gitea.io/gitea/modules/markup"
- "code.gitea.io/gitea/modules/util"
+ "code.gitea.io/gitea/modules/setting"
)
-var quoteRegexp = regexp.MustCompile(`["'][\s\S]+?["']`)
-
func init() {
markup.RegisterParser(Parser{})
-
}
-// Parser implements markup.Parser for orgmode
+// Parser implements markup.Parser for csv files
type Parser struct {
}
@@ -38,11 +34,35 @@ func (Parser) Extensions() []string {
}
// Render implements markup.Parser
-func (p Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]string, isWiki bool) []byte {
- rd := csv.NewReader(bytes.NewReader(rawBytes))
- rd.Comma = p.bestDelimiter(rawBytes)
+func (Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]string, isWiki bool) []byte {
var tmpBlock bytes.Buffer
- tmpBlock.WriteString(`<table class="table">`)
+
+ if setting.UI.CSV.MaxFileSize != 0 && setting.UI.CSV.MaxFileSize < int64(len(rawBytes)) {
+ tmpBlock.WriteString("<pre>")
+ tmpBlock.WriteString(html.EscapeString(string(rawBytes)))
+ tmpBlock.WriteString("</pre>")
+ return tmpBlock.Bytes()
+ }
+
+ rd := csv.CreateReaderAndGuessDelimiter(rawBytes)
+
+ writeField := func(element, class, field string) {
+ tmpBlock.WriteString("<")
+ tmpBlock.WriteString(element)
+ if len(class) > 0 {
+ tmpBlock.WriteString(" class=\"")
+ tmpBlock.WriteString(class)
+ tmpBlock.WriteString("\"")
+ }
+ tmpBlock.WriteString(">")
+ tmpBlock.WriteString(html.EscapeString(field))
+ tmpBlock.WriteString("</")
+ tmpBlock.WriteString(element)
+ tmpBlock.WriteString(">")
+ }
+
+ tmpBlock.WriteString(`<table class="data-table">`)
+ row := 1
for {
fields, err := rd.Read()
if err == io.EOF {
@@ -52,62 +72,19 @@ func (p Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]strin
continue
}
tmpBlock.WriteString("<tr>")
+ element := "td"
+ if row == 1 {
+ element = "th"
+ }
+ writeField(element, "line-num", strconv.Itoa(row))
for _, field := range fields {
- tmpBlock.WriteString("<td>")
- tmpBlock.WriteString(html.EscapeString(field))
- tmpBlock.WriteString("</td>")
+ writeField(element, "", field)
}
tmpBlock.WriteString("</tr>")
+
+ row++
}
tmpBlock.WriteString("</table>")
return tmpBlock.Bytes()
}
-
-// bestDelimiter scores the input CSV data against delimiters, and returns the best match.
-// Reads at most 10k bytes & 10 lines.
-func (p Parser) bestDelimiter(data []byte) rune {
- maxLines := 10
- maxBytes := util.Min(len(data), 1e4)
- text := string(data[:maxBytes])
- text = quoteRegexp.ReplaceAllLiteralString(text, "")
- lines := strings.SplitN(text, "\n", maxLines+1)
- lines = lines[:util.Min(maxLines, len(lines))]
-
- delimiters := []rune{',', ';', '\t', '|'}
- bestDelim := delimiters[0]
- bestScore := 0.0
- for _, delim := range delimiters {
- score := p.scoreDelimiter(lines, delim)
- if score > bestScore {
- bestScore = score
- bestDelim = delim
- }
- }
-
- return bestDelim
-}
-
-// scoreDelimiter uses a count & regularity metric to evaluate a delimiter against lines of CSV
-func (Parser) scoreDelimiter(lines []string, delim rune) (score float64) {
- countTotal := 0
- countLineMax := 0
- linesNotEqual := 0
-
- for _, line := range lines {
- if len(line) == 0 {
- continue
- }
-
- countLine := strings.Count(line, string(delim))
- countTotal += countLine
- if countLine != countLineMax {
- if countLineMax != 0 {
- linesNotEqual++
- }
- countLineMax = util.Max(countLine, countLineMax)
- }
- }
-
- return float64(countTotal) * (1 - float64(linesNotEqual)/float64(len(lines)))
-}
diff --git a/modules/markup/csv/csv_test.go b/modules/markup/csv/csv_test.go
index 4d4e0871e9..5438ebdf5c 100644
--- a/modules/markup/csv/csv_test.go
+++ b/modules/markup/csv/csv_test.go
@@ -13,14 +13,10 @@ import (
func TestRenderCSV(t *testing.T) {
var parser Parser
var kases = map[string]string{
- "a": "<table class=\"table\"><tr><td>a</td></tr></table>",
- "1,2": "<table class=\"table\"><tr><td>1</td><td>2</td></tr></table>",
- "1;2": "<table class=\"table\"><tr><td>1</td><td>2</td></tr></table>",
- "1\t2": "<table class=\"table\"><tr><td>1</td><td>2</td></tr></table>",
- "1|2": "<table class=\"table\"><tr><td>1</td><td>2</td></tr></table>",
- "1,2,3;4,5,6;7,8,9\na;b;c": "<table class=\"table\"><tr><td>1,2,3</td><td>4,5,6</td><td>7,8,9</td></tr><tr><td>a</td><td>b</td><td>c</td></tr></table>",
- "\"1,2,3,4\";\"a\nb\"\nc;d": "<table class=\"table\"><tr><td>1,2,3,4</td><td>a\nb</td></tr><tr><td>c</td><td>d</td></tr></table>",
- "<br/>": "<table class=\"table\"><tr><td>&lt;br/&gt;</td></tr></table>",
+ "a": "<table class=\"data-table\"><tr><th class=\"line-num\">1</th><th>a</th></tr></table>",
+ "1,2": "<table class=\"data-table\"><tr><th class=\"line-num\">1</th><th>1</th><th>2</th></tr></table>",
+ "1;2\n3;4": "<table class=\"data-table\"><tr><th class=\"line-num\">1</th><th>1</th><th>2</th></tr><tr><td class=\"line-num\">2</td><td>3</td><td>4</td></tr></table>",
+ "<br/>": "<table class=\"data-table\"><tr><th class=\"line-num\">1</th><th>&lt;br/&gt;</th></tr></table>",
}
for k, v := range kases {
diff --git a/modules/markup/sanitizer.go b/modules/markup/sanitizer.go
index 9214a75fb3..19feaa3cce 100644
--- a/modules/markup/sanitizer.go
+++ b/modules/markup/sanitizer.go
@@ -69,6 +69,10 @@ func ReplaceSanitizer() {
// Allow icons, emojis, and chroma syntax on span
sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji))$|^([a-z][a-z0-9]{0,2})$`)).OnElements("span")
+ // Allow data tables
+ sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`data-table`)).OnElements("table")
+ sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`line-num`)).OnElements("th", "td")
+
// Allow generally safe attributes
generalSafeAttrs := []string{"abbr", "accept", "accept-charset",
"accesskey", "action", "align", "alt",