diff options
author | Henrique Pimentel <66185935+HenriquerPimentel@users.noreply.github.com> | 2024-06-06 09:06:59 +0100 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-06-06 16:06:59 +0800 |
commit | f7125ab61aaa02fd4c7ab0062a2dc9a57726e2ec (patch) | |
tree | 99a05104c9092e23f5d73ac4af702b2b80e862ae /modules/markup | |
parent | 24dace8f76a8166d48203ed41fd1c3d66ace715c (diff) | |
download | gitea-f7125ab61aaa02fd4c7ab0062a2dc9a57726e2ec.tar.gz gitea-f7125ab61aaa02fd4c7ab0062a2dc9a57726e2ec.zip |
Add `MAX_ROWS` option for CSV rendering (#30268)
This solution implements a new config variable MAX_ROWS, which
corresponds to the “Maximum allowed rows to render CSV files. (0 for no
limit)” and rewrites the Render function for CSV files in markup module.
Now the render function only reads the file once, having MAX_FILE_SIZE+1
as a reader limit and MAX_ROWS as a row limit. When the file is larger
than MAX_FILE_SIZE or has more rows than MAX_ROWS, it only renders until
the limit, and displays a user-friendly warning informing that the
rendered data is not complete, in the user's language.
---
Previously, when a CSV file was larger than the limit, the render
function lost its function to render the code. There were also multiple
reads to the file, in order to determine its size and render or
pre-render.
The warning: 
Diffstat (limited to 'modules/markup')
-rw-r--r-- | modules/markup/csv/csv.go | 94 | ||||
-rw-r--r-- | modules/markup/csv/csv_test.go | 10 |
2 files changed, 35 insertions, 69 deletions
diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go index 1dd26eb8ac..3d952b0de4 100644 --- a/modules/markup/csv/csv.go +++ b/modules/markup/csv/csv.go @@ -5,8 +5,6 @@ package markup import ( "bufio" - "bytes" - "fmt" "html" "io" "regexp" @@ -15,6 +13,8 @@ import ( "code.gitea.io/gitea/modules/csv" "code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/setting" + "code.gitea.io/gitea/modules/translation" + "code.gitea.io/gitea/modules/util" ) func init() { @@ -81,86 +81,38 @@ func writeField(w io.Writer, element, class, field string) error { func (r Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error { tmpBlock := bufio.NewWriter(output) maxSize := setting.UI.CSV.MaxFileSize + maxRows := setting.UI.CSV.MaxRows - if maxSize == 0 { - return r.tableRender(ctx, input, tmpBlock) + if maxSize != 0 { + input = io.LimitReader(input, maxSize+1) } - rawBytes, err := io.ReadAll(io.LimitReader(input, maxSize+1)) - if err != nil { - return err - } - - if int64(len(rawBytes)) <= maxSize { - return r.tableRender(ctx, bytes.NewReader(rawBytes), tmpBlock) - } - return r.fallbackRender(io.MultiReader(bytes.NewReader(rawBytes), input), tmpBlock) -} - -func (Renderer) fallbackRender(input io.Reader, tmpBlock *bufio.Writer) error { - _, err := tmpBlock.WriteString("<pre>") - if err != nil { - return err - } - - scan := bufio.NewScanner(input) - scan.Split(bufio.ScanRunes) - for scan.Scan() { - switch scan.Text() { - case `&`: - _, err = tmpBlock.WriteString("&") - case `'`: - _, err = tmpBlock.WriteString("'") // "'" is shorter than "'" and apos was not in HTML until HTML5. - case `<`: - _, err = tmpBlock.WriteString("<") - case `>`: - _, err = tmpBlock.WriteString(">") - case `"`: - _, err = tmpBlock.WriteString(""") // """ is shorter than """. - default: - _, err = tmpBlock.Write(scan.Bytes()) - } - if err != nil { - return err - } - } - if err = scan.Err(); err != nil { - return fmt.Errorf("fallbackRender scan: %w", err) - } - - _, err = tmpBlock.WriteString("</pre>") - if err != nil { - return err - } - return tmpBlock.Flush() -} - -func (Renderer) tableRender(ctx *markup.RenderContext, input io.Reader, tmpBlock *bufio.Writer) error { rd, err := csv.CreateReaderAndDetermineDelimiter(ctx, input) if err != nil { return err } - if _, err := tmpBlock.WriteString(`<table class="data-table">`); err != nil { return err } - row := 1 + + row := 0 for { fields, err := rd.Read() - if err == io.EOF { + if err == io.EOF || (row >= maxRows && maxRows != 0) { break } if err != nil { continue } + if _, err := tmpBlock.WriteString("<tr>"); err != nil { return err } element := "td" - if row == 1 { + if row == 0 { element = "th" } - if err := writeField(tmpBlock, element, "line-num", strconv.Itoa(row)); err != nil { + if err := writeField(tmpBlock, element, "line-num", strconv.Itoa(row+1)); err != nil { return err } for _, field := range fields { @@ -174,8 +126,32 @@ func (Renderer) tableRender(ctx *markup.RenderContext, input io.Reader, tmpBlock row++ } + if _, err = tmpBlock.WriteString("</table>"); err != nil { return err } + + // Check if maxRows or maxSize is reached, and if true, warn. + if (row >= maxRows && maxRows != 0) || (rd.InputOffset() >= maxSize && maxSize != 0) { + warn := `<table class="data-table"><tr><td>` + rawLink := ` <a href="` + ctx.Links.RawLink() + `/` + util.PathEscapeSegments(ctx.RelativePath) + `">` + + // Try to get the user translation + if locale, ok := ctx.Ctx.Value(translation.ContextKey).(translation.Locale); ok { + warn += locale.TrString("repo.file_too_large") + rawLink += locale.TrString("repo.file_view_raw") + } else { + warn += "The file is too large to be shown." + rawLink += "View Raw" + } + + warn += rawLink + `</a></td></tr></table>` + + // Write the HTML string to the output + if _, err := tmpBlock.WriteString(warn); err != nil { + return err + } + } + return tmpBlock.Flush() } diff --git a/modules/markup/csv/csv_test.go b/modules/markup/csv/csv_test.go index 3d12be477c..8c07184b21 100644 --- a/modules/markup/csv/csv_test.go +++ b/modules/markup/csv/csv_test.go @@ -4,8 +4,6 @@ package markup import ( - "bufio" - "bytes" "strings" "testing" @@ -31,12 +29,4 @@ func TestRenderCSV(t *testing.T) { assert.NoError(t, err) assert.EqualValues(t, v, buf.String()) } - - t.Run("fallbackRender", func(t *testing.T) { - var buf bytes.Buffer - err := render.fallbackRender(strings.NewReader("1,<a>\n2,<b>"), bufio.NewWriter(&buf)) - assert.NoError(t, err) - want := "<pre>1,<a>\n2,<b></pre>" - assert.Equal(t, want, buf.String()) - }) } |