]> source.dussan.org Git - gitea.git/commitdiff
Fix several render issues (#14986)
authorzeripath <art27@cantab.net>
Mon, 15 Mar 2021 23:20:05 +0000 (23:20 +0000)
committerGitHub <noreply@github.com>
Mon, 15 Mar 2021 23:20:05 +0000 (00:20 +0100)
* Fix an issue with panics related to attributes
* Wrap goldmark render in a recovery function
* Reduce memory use in render emoji
* Use a pipe for rendering goldmark - still needs more work and a limiter

Signed-off-by: Andrew Thornton <art27@cantab.net>
Co-authored-by: Lauris BH <lauris@nix.lv>
modules/emoji/emoji.go
modules/emoji/emoji_test.go
modules/markup/html.go
modules/markup/markdown/goldmark.go
modules/markup/markdown/markdown.go
modules/markup/markdown/markdown_test.go

index 169ee0a182d4d7f7ec691137facf8814b8dd1ac7..01fb764ce368ea1492c7c3cf24e9f1c65363f4fd 100644 (file)
@@ -30,6 +30,9 @@ var (
        // aliasMap provides a map of the alias to its emoji data.
        aliasMap map[string]int
 
+       // emptyReplacer is the string replacer for emoji codes.
+       emptyReplacer *strings.Replacer
+
        // codeReplacer is the string replacer for emoji codes.
        codeReplacer *strings.Replacer
 
@@ -49,6 +52,7 @@ func loadMap() {
 
                // process emoji codes and aliases
                codePairs := make([]string, 0)
+               emptyPairs := make([]string, 0)
                aliasPairs := make([]string, 0)
 
                // sort from largest to small so we match combined emoji first
@@ -64,6 +68,7 @@ func loadMap() {
                        // setup codes
                        codeMap[e.Emoji] = i
                        codePairs = append(codePairs, e.Emoji, ":"+e.Aliases[0]+":")
+                       emptyPairs = append(emptyPairs, e.Emoji, e.Emoji)
 
                        // setup aliases
                        for _, a := range e.Aliases {
@@ -77,6 +82,7 @@ func loadMap() {
                }
 
                // create replacers
+               emptyReplacer = strings.NewReplacer(emptyPairs...)
                codeReplacer = strings.NewReplacer(codePairs...)
                aliasReplacer = strings.NewReplacer(aliasPairs...)
        })
@@ -127,38 +133,53 @@ func ReplaceAliases(s string) string {
        return aliasReplacer.Replace(s)
 }
 
-// FindEmojiSubmatchIndex returns index pair of longest emoji in a string
-func FindEmojiSubmatchIndex(s string) []int {
-       loadMap()
-       found := make(map[int]int)
-       keys := make([]int, 0)
+type rememberSecondWriteWriter struct {
+       pos        int
+       idx        int
+       end        int
+       writecount int
+}
 
-       //see if there are any emoji in string before looking for position of specific ones
-       //no performance difference when there is a match but 10x faster when there are not
-       if s == ReplaceCodes(s) {
-               return nil
+func (n *rememberSecondWriteWriter) Write(p []byte) (int, error) {
+       n.writecount++
+       if n.writecount == 2 {
+               n.idx = n.pos
+               n.end = n.pos + len(p)
        }
+       n.pos += len(p)
+       return len(p), nil
+}
 
-       // get index of first emoji occurrence while also checking for longest combination
-       for j := range GemojiData {
-               i := strings.Index(s, GemojiData[j].Emoji)
-               if i != -1 {
-                       if _, ok := found[i]; !ok {
-                               if len(keys) == 0 || i < keys[0] {
-                                       found[i] = j
-                                       keys = []int{i}
-                               }
-                               if i == 0 {
-                                       break
-                               }
-                       }
-               }
+func (n *rememberSecondWriteWriter) WriteString(s string) (int, error) {
+       n.writecount++
+       if n.writecount == 2 {
+               n.idx = n.pos
+               n.end = n.pos + len(s)
        }
+       n.pos += len(s)
+       return len(s), nil
+}
 
-       if len(keys) > 0 {
-               index := keys[0]
-               return []int{index, index + len(GemojiData[found[index]].Emoji)}
+// FindEmojiSubmatchIndex returns index pair of longest emoji in a string
+func FindEmojiSubmatchIndex(s string) []int {
+       loadMap()
+       secondWriteWriter := rememberSecondWriteWriter{}
+
+       // A faster and clean implementation would copy the trie tree formation in strings.NewReplacer but
+       // we can be lazy here.
+       //
+       // The implementation of strings.Replacer.WriteString is such that the first index of the emoji
+       // submatch is simply the second thing that is written to WriteString in the writer.
+       //
+       // Therefore we can simply take the index of the second write as our first emoji
+       //
+       // FIXME: just copy the trie implementation from strings.NewReplacer
+       _, _ = emptyReplacer.WriteString(&secondWriteWriter, s)
+
+       // if we wrote less than twice then we never "replaced"
+       if secondWriteWriter.writecount < 2 {
+               return nil
        }
 
-       return nil
+       return []int{secondWriteWriter.idx, secondWriteWriter.end}
 }
index 3eca3a8d8a65f080596ad0d56b9d9343276100fc..def252896fd4053161fb62f13a99700bec12bdb4 100644 (file)
@@ -8,6 +8,8 @@ package emoji
 import (
        "reflect"
        "testing"
+
+       "github.com/stretchr/testify/assert"
 )
 
 func TestDumpInfo(t *testing.T) {
@@ -65,3 +67,34 @@ func TestReplacers(t *testing.T) {
                }
        }
 }
+
+func TestFindEmojiSubmatchIndex(t *testing.T) {
+       type testcase struct {
+               teststring string
+               expected   []int
+       }
+
+       testcases := []testcase{
+               {
+                       "\U0001f44d",
+                       []int{0, len("\U0001f44d")},
+               },
+               {
+                       "\U0001f44d +1 \U0001f44d \U0001f37a",
+                       []int{0, 4},
+               },
+               {
+                       " \U0001f44d",
+                       []int{1, 1 + len("\U0001f44d")},
+               },
+               {
+                       string([]byte{'\u0001'}) + "\U0001f44d",
+                       []int{1, 1 + len("\U0001f44d")},
+               },
+       }
+
+       for _, kase := range testcases {
+               actual := FindEmojiSubmatchIndex(kase.teststring)
+               assert.Equal(t, kase.expected, actual)
+       }
+}
index b254fd083f6b3d48e3f2eb440124ce22a7bed9e9..96692752a137e4646002ab9ddc64ca00b8f95211 100644 (file)
@@ -313,41 +313,27 @@ func RenderEmoji(
        return ctx.postProcess(rawHTML)
 }
 
+var tagCleaner = regexp.MustCompile(`<((?:/?\w+/\w+)|(?:/[\w ]+/)|(/?[hH][tT][mM][lL][ />]))`)
+var nulCleaner = strings.NewReplacer("\000", "")
+
 func (ctx *postProcessCtx) postProcess(rawHTML []byte) ([]byte, error) {
        if ctx.procs == nil {
                ctx.procs = defaultProcessors
        }
 
        // give a generous extra 50 bytes
-       res := make([]byte, 0, len(rawHTML)+50)
-
+       res := bytes.NewBuffer(make([]byte, 0, len(rawHTML)+50))
        // prepend "<html><body>"
-       res = append(res, "<html><body>"...)
+       _, _ = res.WriteString("<html><body>")
 
        // Strip out nuls - they're always invalid
-       start := bytes.IndexByte(rawHTML, '\000')
-       if start >= 0 {
-               res = append(res, rawHTML[:start]...)
-               start++
-               for start < len(rawHTML) {
-                       end := bytes.IndexByte(rawHTML[start:], '\000')
-                       if end < 0 {
-                               res = append(res, rawHTML[start:]...)
-                               break
-                       } else if end > 0 {
-                               res = append(res, rawHTML[start:start+end]...)
-                       }
-                       start += end + 1
-               }
-       } else {
-               res = append(res, rawHTML...)
-       }
+       _, _ = nulCleaner.WriteString(res, string(tagCleaner.ReplaceAll(rawHTML, []byte("&lt;$1"))))
 
        // close the tags
-       res = append(res, "</body></html>"...)
+       _, _ = res.WriteString("</body></html>")
 
        // parse the HTML
-       nodes, err := html.ParseFragment(bytes.NewReader(res), nil)
+       nodes, err := html.ParseFragment(res, nil)
        if err != nil {
                return nil, &postProcessError{"invalid HTML", err}
        }
@@ -384,17 +370,17 @@ func (ctx *postProcessCtx) postProcess(rawHTML []byte) ([]byte, error) {
        // Create buffer in which the data will be placed again. We know that the
        // length will be at least that of res; to spare a few alloc+copy, we
        // reuse res, resetting its length to 0.
-       buf := bytes.NewBuffer(res[:0])
+       res.Reset()
        // Render everything to buf.
        for _, node := range nodes {
-               err = html.Render(buf, node)
+               err = html.Render(res, node)
                if err != nil {
                        return nil, &postProcessError{"error rendering processed HTML", err}
                }
        }
 
        // Everything done successfully, return parsed data.
-       return buf.Bytes(), nil
+       return res.Bytes(), nil
 }
 
 func (ctx *postProcessCtx) visitNode(node *html.Node, visitText bool) {
index 148067f1b07d62ac9fc179a3aaa3b25064ff0a20..0b6ea222d95a73fd9b3fa994b5562acd5bfe8b57 100644 (file)
@@ -77,6 +77,12 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa
                                        header.ID = util.BytesToReadOnlyString(id.([]byte))
                                }
                                toc = append(toc, header)
+                       } else {
+                               for _, attr := range v.Attributes() {
+                                       if _, ok := attr.Value.([]byte); !ok {
+                                               v.SetAttribute(attr.Name, []byte(fmt.Sprintf("%v", attr.Value)))
+                                       }
+                               }
                        }
                case *ast.Image:
                        // Images need two things:
index 999ae52bb5299768a45ef0204fa029128004740e..93235d77e5afe830783f959a0b880395436b2b8b 100644 (file)
@@ -6,7 +6,8 @@
 package markdown
 
 import (
-       "bytes"
+       "fmt"
+       "io"
        "strings"
        "sync"
 
@@ -18,7 +19,7 @@ import (
 
        chromahtml "github.com/alecthomas/chroma/formatters/html"
        "github.com/yuin/goldmark"
-       "github.com/yuin/goldmark-highlighting"
+       highlighting "github.com/yuin/goldmark-highlighting"
        meta "github.com/yuin/goldmark-meta"
        "github.com/yuin/goldmark/extension"
        "github.com/yuin/goldmark/parser"
@@ -34,6 +35,44 @@ var urlPrefixKey = parser.NewContextKey()
 var isWikiKey = parser.NewContextKey()
 var renderMetasKey = parser.NewContextKey()
 
+type closesWithError interface {
+       io.WriteCloser
+       CloseWithError(err error) error
+}
+
+type limitWriter struct {
+       w     closesWithError
+       sum   int64
+       limit int64
+}
+
+// Write implements the standard Write interface:
+func (l *limitWriter) Write(data []byte) (int, error) {
+       leftToWrite := l.limit - l.sum
+       if leftToWrite < int64(len(data)) {
+               n, err := l.w.Write(data[:leftToWrite])
+               l.sum += int64(n)
+               if err != nil {
+                       return n, err
+               }
+               _ = l.w.Close()
+               return n, fmt.Errorf("Rendered content too large - truncating render")
+       }
+       n, err := l.w.Write(data)
+       l.sum += int64(n)
+       return n, err
+}
+
+// Close closes the writer
+func (l *limitWriter) Close() error {
+       return l.w.Close()
+}
+
+// CloseWithError closes the writer
+func (l *limitWriter) CloseWithError(err error) error {
+       return l.w.CloseWithError(err)
+}
+
 // NewGiteaParseContext creates a parser.Context with the gitea context set
 func NewGiteaParseContext(urlPrefix string, metas map[string]string, isWiki bool) parser.Context {
        pc := parser.NewContext(parser.WithIDs(newPrefixedIDs()))
@@ -43,8 +82,8 @@ func NewGiteaParseContext(urlPrefix string, metas map[string]string, isWiki bool
        return pc
 }
 
-// render renders Markdown to HTML without handling special links.
-func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown bool) []byte {
+// actualRender renders Markdown to HTML without handling special links.
+func actualRender(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown bool) []byte {
        once.Do(func() {
                converter = goldmark.New(
                        goldmark.WithExtensions(extension.Table,
@@ -119,12 +158,57 @@ func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown
 
        })
 
-       pc := NewGiteaParseContext(urlPrefix, metas, wikiMarkdown)
-       var buf bytes.Buffer
-       if err := converter.Convert(giteautil.NormalizeEOL(body), &buf, parser.WithContext(pc)); err != nil {
-               log.Error("Unable to render: %v", err)
+       rd, wr := io.Pipe()
+       defer func() {
+               _ = rd.Close()
+               _ = wr.Close()
+       }()
+
+       lw := &limitWriter{
+               w:     wr,
+               limit: setting.UI.MaxDisplayFileSize * 3,
        }
-       return markup.SanitizeReader(&buf).Bytes()
+
+       // FIXME: should we include a timeout that closes the pipe to abort the parser and sanitizer if it takes too long?
+       go func() {
+               defer func() {
+                       err := recover()
+                       if err == nil {
+                               return
+                       }
+
+                       log.Warn("Unable to render markdown due to panic in goldmark: %v", err)
+                       if log.IsDebug() {
+                               log.Debug("Panic in markdown: %v\n%s", err, string(log.Stack(2)))
+                       }
+                       _ = lw.CloseWithError(fmt.Errorf("%v", err))
+               }()
+
+               pc := NewGiteaParseContext(urlPrefix, metas, wikiMarkdown)
+               if err := converter.Convert(giteautil.NormalizeEOL(body), lw, parser.WithContext(pc)); err != nil {
+                       log.Error("Unable to render: %v", err)
+                       _ = lw.CloseWithError(err)
+                       return
+               }
+               _ = lw.Close()
+       }()
+       return markup.SanitizeReader(rd).Bytes()
+}
+
+func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown bool) (ret []byte) {
+       defer func() {
+               err := recover()
+               if err == nil {
+                       return
+               }
+
+               log.Warn("Unable to render markdown due to panic in goldmark - will return sanitized raw bytes")
+               if log.IsDebug() {
+                       log.Debug("Panic in markdown: %v\n%s", err, string(log.Stack(2)))
+               }
+               ret = markup.SanitizeBytes(body)
+       }()
+       return actualRender(body, urlPrefix, metas, wikiMarkdown)
 }
 
 var (
index 3196beea1f945d64f6a64a6e813bc3a249ac4168..5b3ef21fb66b9c0724c86141d0ec4983e4d1db83 100644 (file)
@@ -309,6 +309,25 @@ func TestRender_RenderParagraphs(t *testing.T) {
        test(t, "A\n\n\nB\nC\n", 2)
 }
 
+func TestMarkdownRenderRaw(t *testing.T) {
+       testcases := [][]byte{
+               { // clusterfuzz_testcase_minimized_fuzz_markdown_render_raw_6267570554535936
+                       0x2a, 0x20, 0x2d, 0x0a, 0x09, 0x20, 0x60, 0x5b, 0x0a, 0x09, 0x20, 0x60,
+                       0x5b,
+               },
+               { // clusterfuzz_testcase_minimized_fuzz_markdown_render_raw_6278827345051648
+                       0x2d, 0x20, 0x2d, 0x0d, 0x09, 0x60, 0x0d, 0x09, 0x60,
+               },
+               { // clusterfuzz_testcase_minimized_fuzz_markdown_render_raw_6016973788020736[] = {
+                       0x7b, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x3d, 0x35, 0x7d, 0x0a, 0x3d,
+               },
+       }
+
+       for _, testcase := range testcases {
+               _ = RenderRaw(testcase, "", false)
+       }
+}
+
 func TestRenderSiblingImages_Issue12925(t *testing.T) {
        testcase := `![image1](/image1)
 ![image2](/image2)
@@ -318,4 +337,5 @@ func TestRenderSiblingImages_Issue12925(t *testing.T) {
 `
        res := string(RenderRaw([]byte(testcase), "", false))
        assert.Equal(t, expected, res)
+
 }