diff options
Diffstat (limited to 'modules/markup/csv/csv.go')
-rw-r--r-- | modules/markup/csv/csv.go | 103 |
1 files changed, 40 insertions, 63 deletions
diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go index 1e3acc9b47..430e759eb5 100644 --- a/modules/markup/csv/csv.go +++ b/modules/markup/csv/csv.go @@ -6,24 +6,20 @@ package markup import ( "bytes" - "encoding/csv" "html" "io" - "regexp" - "strings" + "strconv" + "code.gitea.io/gitea/modules/csv" "code.gitea.io/gitea/modules/markup" - "code.gitea.io/gitea/modules/util" + "code.gitea.io/gitea/modules/setting" ) -var quoteRegexp = regexp.MustCompile(`["'][\s\S]+?["']`) - func init() { markup.RegisterParser(Parser{}) - } -// Parser implements markup.Parser for orgmode +// Parser implements markup.Parser for csv files type Parser struct { } @@ -38,11 +34,35 @@ func (Parser) Extensions() []string { } // Render implements markup.Parser -func (p Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]string, isWiki bool) []byte { - rd := csv.NewReader(bytes.NewReader(rawBytes)) - rd.Comma = p.bestDelimiter(rawBytes) +func (Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]string, isWiki bool) []byte { var tmpBlock bytes.Buffer - tmpBlock.WriteString(`<table class="table">`) + + if setting.UI.CSV.MaxFileSize != 0 && setting.UI.CSV.MaxFileSize < int64(len(rawBytes)) { + tmpBlock.WriteString("<pre>") + tmpBlock.WriteString(html.EscapeString(string(rawBytes))) + tmpBlock.WriteString("</pre>") + return tmpBlock.Bytes() + } + + rd := csv.CreateReaderAndGuessDelimiter(rawBytes) + + writeField := func(element, class, field string) { + tmpBlock.WriteString("<") + tmpBlock.WriteString(element) + if len(class) > 0 { + tmpBlock.WriteString(" class=\"") + tmpBlock.WriteString(class) + tmpBlock.WriteString("\"") + } + tmpBlock.WriteString(">") + tmpBlock.WriteString(html.EscapeString(field)) + tmpBlock.WriteString("</") + tmpBlock.WriteString(element) + tmpBlock.WriteString(">") + } + + tmpBlock.WriteString(`<table class="data-table">`) + row := 1 for { fields, err := rd.Read() if err == io.EOF { @@ -52,62 +72,19 @@ func (p Parser) Render(rawBytes []byte, urlPrefix string, metas map[string]strin continue } tmpBlock.WriteString("<tr>") + element := "td" + if row == 1 { + element = "th" + } + writeField(element, "line-num", strconv.Itoa(row)) for _, field := range fields { - tmpBlock.WriteString("<td>") - tmpBlock.WriteString(html.EscapeString(field)) - tmpBlock.WriteString("</td>") + writeField(element, "", field) } tmpBlock.WriteString("</tr>") + + row++ } tmpBlock.WriteString("</table>") return tmpBlock.Bytes() } - -// bestDelimiter scores the input CSV data against delimiters, and returns the best match. -// Reads at most 10k bytes & 10 lines. -func (p Parser) bestDelimiter(data []byte) rune { - maxLines := 10 - maxBytes := util.Min(len(data), 1e4) - text := string(data[:maxBytes]) - text = quoteRegexp.ReplaceAllLiteralString(text, "") - lines := strings.SplitN(text, "\n", maxLines+1) - lines = lines[:util.Min(maxLines, len(lines))] - - delimiters := []rune{',', ';', '\t', '|'} - bestDelim := delimiters[0] - bestScore := 0.0 - for _, delim := range delimiters { - score := p.scoreDelimiter(lines, delim) - if score > bestScore { - bestScore = score - bestDelim = delim - } - } - - return bestDelim -} - -// scoreDelimiter uses a count & regularity metric to evaluate a delimiter against lines of CSV -func (Parser) scoreDelimiter(lines []string, delim rune) (score float64) { - countTotal := 0 - countLineMax := 0 - linesNotEqual := 0 - - for _, line := range lines { - if len(line) == 0 { - continue - } - - countLine := strings.Count(line, string(delim)) - countTotal += countLine - if countLine != countLineMax { - if countLineMax != 0 { - linesNotEqual++ - } - countLineMax = util.Max(countLine, countLineMax) - } - } - - return float64(countTotal) * (1 - float64(linesNotEqual)/float64(len(lines))) -} |