summaryrefslogtreecommitdiffstats
path: root/modules
diff options
context:
space:
mode:
authorzeripath <art27@cantab.net>2021-03-15 23:20:05 +0000
committerGitHub <noreply@github.com>2021-03-16 00:20:05 +0100
commited31ddc29a1cae7af193fb0793d129b07da91ce2 (patch)
tree2a1ce0fd4085d4ded6c913f0e7763a7239fc8ffd /modules
parent044cd4d016196e8c7091eee90b7e6f230bba142f (diff)
downloadgitea-ed31ddc29a1cae7af193fb0793d129b07da91ce2.tar.gz
gitea-ed31ddc29a1cae7af193fb0793d129b07da91ce2.zip
Fix several render issues (#14986)
* Fix an issue with panics related to attributes * Wrap goldmark render in a recovery function * Reduce memory use in render emoji * Use a pipe for rendering goldmark - still needs more work and a limiter Signed-off-by: Andrew Thornton <art27@cantab.net> Co-authored-by: Lauris BH <lauris@nix.lv>
Diffstat (limited to 'modules')
-rw-r--r--modules/emoji/emoji.go75
-rw-r--r--modules/emoji/emoji_test.go33
-rw-r--r--modules/markup/html.go36
-rw-r--r--modules/markup/markdown/goldmark.go6
-rw-r--r--modules/markup/markdown/markdown.go102
-rw-r--r--modules/markup/markdown/markdown_test.go20
6 files changed, 211 insertions, 61 deletions
diff --git a/modules/emoji/emoji.go b/modules/emoji/emoji.go
index 169ee0a182..01fb764ce3 100644
--- a/modules/emoji/emoji.go
+++ b/modules/emoji/emoji.go
@@ -30,6 +30,9 @@ var (
// aliasMap provides a map of the alias to its emoji data.
aliasMap map[string]int
+ // emptyReplacer is the string replacer for emoji codes.
+ emptyReplacer *strings.Replacer
+
// codeReplacer is the string replacer for emoji codes.
codeReplacer *strings.Replacer
@@ -49,6 +52,7 @@ func loadMap() {
// process emoji codes and aliases
codePairs := make([]string, 0)
+ emptyPairs := make([]string, 0)
aliasPairs := make([]string, 0)
// sort from largest to small so we match combined emoji first
@@ -64,6 +68,7 @@ func loadMap() {
// setup codes
codeMap[e.Emoji] = i
codePairs = append(codePairs, e.Emoji, ":"+e.Aliases[0]+":")
+ emptyPairs = append(emptyPairs, e.Emoji, e.Emoji)
// setup aliases
for _, a := range e.Aliases {
@@ -77,6 +82,7 @@ func loadMap() {
}
// create replacers
+ emptyReplacer = strings.NewReplacer(emptyPairs...)
codeReplacer = strings.NewReplacer(codePairs...)
aliasReplacer = strings.NewReplacer(aliasPairs...)
})
@@ -127,38 +133,53 @@ func ReplaceAliases(s string) string {
return aliasReplacer.Replace(s)
}
-// FindEmojiSubmatchIndex returns index pair of longest emoji in a string
-func FindEmojiSubmatchIndex(s string) []int {
- loadMap()
- found := make(map[int]int)
- keys := make([]int, 0)
+type rememberSecondWriteWriter struct {
+ pos int
+ idx int
+ end int
+ writecount int
+}
- //see if there are any emoji in string before looking for position of specific ones
- //no performance difference when there is a match but 10x faster when there are not
- if s == ReplaceCodes(s) {
- return nil
+func (n *rememberSecondWriteWriter) Write(p []byte) (int, error) {
+ n.writecount++
+ if n.writecount == 2 {
+ n.idx = n.pos
+ n.end = n.pos + len(p)
}
+ n.pos += len(p)
+ return len(p), nil
+}
- // get index of first emoji occurrence while also checking for longest combination
- for j := range GemojiData {
- i := strings.Index(s, GemojiData[j].Emoji)
- if i != -1 {
- if _, ok := found[i]; !ok {
- if len(keys) == 0 || i < keys[0] {
- found[i] = j
- keys = []int{i}
- }
- if i == 0 {
- break
- }
- }
- }
+func (n *rememberSecondWriteWriter) WriteString(s string) (int, error) {
+ n.writecount++
+ if n.writecount == 2 {
+ n.idx = n.pos
+ n.end = n.pos + len(s)
}
+ n.pos += len(s)
+ return len(s), nil
+}
- if len(keys) > 0 {
- index := keys[0]
- return []int{index, index + len(GemojiData[found[index]].Emoji)}
+// FindEmojiSubmatchIndex returns index pair of longest emoji in a string
+func FindEmojiSubmatchIndex(s string) []int {
+ loadMap()
+ secondWriteWriter := rememberSecondWriteWriter{}
+
+ // A faster and clean implementation would copy the trie tree formation in strings.NewReplacer but
+ // we can be lazy here.
+ //
+ // The implementation of strings.Replacer.WriteString is such that the first index of the emoji
+ // submatch is simply the second thing that is written to WriteString in the writer.
+ //
+ // Therefore we can simply take the index of the second write as our first emoji
+ //
+ // FIXME: just copy the trie implementation from strings.NewReplacer
+ _, _ = emptyReplacer.WriteString(&secondWriteWriter, s)
+
+ // if we wrote less than twice then we never "replaced"
+ if secondWriteWriter.writecount < 2 {
+ return nil
}
- return nil
+ return []int{secondWriteWriter.idx, secondWriteWriter.end}
}
diff --git a/modules/emoji/emoji_test.go b/modules/emoji/emoji_test.go
index 3eca3a8d8a..def252896f 100644
--- a/modules/emoji/emoji_test.go
+++ b/modules/emoji/emoji_test.go
@@ -8,6 +8,8 @@ package emoji
import (
"reflect"
"testing"
+
+ "github.com/stretchr/testify/assert"
)
func TestDumpInfo(t *testing.T) {
@@ -65,3 +67,34 @@ func TestReplacers(t *testing.T) {
}
}
}
+
+func TestFindEmojiSubmatchIndex(t *testing.T) {
+ type testcase struct {
+ teststring string
+ expected []int
+ }
+
+ testcases := []testcase{
+ {
+ "\U0001f44d",
+ []int{0, len("\U0001f44d")},
+ },
+ {
+ "\U0001f44d +1 \U0001f44d \U0001f37a",
+ []int{0, 4},
+ },
+ {
+ " \U0001f44d",
+ []int{1, 1 + len("\U0001f44d")},
+ },
+ {
+ string([]byte{'\u0001'}) + "\U0001f44d",
+ []int{1, 1 + len("\U0001f44d")},
+ },
+ }
+
+ for _, kase := range testcases {
+ actual := FindEmojiSubmatchIndex(kase.teststring)
+ assert.Equal(t, kase.expected, actual)
+ }
+}
diff --git a/modules/markup/html.go b/modules/markup/html.go
index b254fd083f..96692752a1 100644
--- a/modules/markup/html.go
+++ b/modules/markup/html.go
@@ -313,41 +313,27 @@ func RenderEmoji(
return ctx.postProcess(rawHTML)
}
+var tagCleaner = regexp.MustCompile(`<((?:/?\w+/\w+)|(?:/[\w ]+/)|(/?[hH][tT][mM][lL][ />]))`)
+var nulCleaner = strings.NewReplacer("\000", "")
+
func (ctx *postProcessCtx) postProcess(rawHTML []byte) ([]byte, error) {
if ctx.procs == nil {
ctx.procs = defaultProcessors
}
// give a generous extra 50 bytes
- res := make([]byte, 0, len(rawHTML)+50)
-
+ res := bytes.NewBuffer(make([]byte, 0, len(rawHTML)+50))
// prepend "<html><body>"
- res = append(res, "<html><body>"...)
+ _, _ = res.WriteString("<html><body>")
// Strip out nuls - they're always invalid
- start := bytes.IndexByte(rawHTML, '\000')
- if start >= 0 {
- res = append(res, rawHTML[:start]...)
- start++
- for start < len(rawHTML) {
- end := bytes.IndexByte(rawHTML[start:], '\000')
- if end < 0 {
- res = append(res, rawHTML[start:]...)
- break
- } else if end > 0 {
- res = append(res, rawHTML[start:start+end]...)
- }
- start += end + 1
- }
- } else {
- res = append(res, rawHTML...)
- }
+ _, _ = nulCleaner.WriteString(res, string(tagCleaner.ReplaceAll(rawHTML, []byte("&lt;$1"))))
// close the tags
- res = append(res, "</body></html>"...)
+ _, _ = res.WriteString("</body></html>")
// parse the HTML
- nodes, err := html.ParseFragment(bytes.NewReader(res), nil)
+ nodes, err := html.ParseFragment(res, nil)
if err != nil {
return nil, &postProcessError{"invalid HTML", err}
}
@@ -384,17 +370,17 @@ func (ctx *postProcessCtx) postProcess(rawHTML []byte) ([]byte, error) {
// Create buffer in which the data will be placed again. We know that the
// length will be at least that of res; to spare a few alloc+copy, we
// reuse res, resetting its length to 0.
- buf := bytes.NewBuffer(res[:0])
+ res.Reset()
// Render everything to buf.
for _, node := range nodes {
- err = html.Render(buf, node)
+ err = html.Render(res, node)
if err != nil {
return nil, &postProcessError{"error rendering processed HTML", err}
}
}
// Everything done successfully, return parsed data.
- return buf.Bytes(), nil
+ return res.Bytes(), nil
}
func (ctx *postProcessCtx) visitNode(node *html.Node, visitText bool) {
diff --git a/modules/markup/markdown/goldmark.go b/modules/markup/markdown/goldmark.go
index 148067f1b0..0b6ea222d9 100644
--- a/modules/markup/markdown/goldmark.go
+++ b/modules/markup/markdown/goldmark.go
@@ -77,6 +77,12 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa
header.ID = util.BytesToReadOnlyString(id.([]byte))
}
toc = append(toc, header)
+ } else {
+ for _, attr := range v.Attributes() {
+ if _, ok := attr.Value.([]byte); !ok {
+ v.SetAttribute(attr.Name, []byte(fmt.Sprintf("%v", attr.Value)))
+ }
+ }
}
case *ast.Image:
// Images need two things:
diff --git a/modules/markup/markdown/markdown.go b/modules/markup/markdown/markdown.go
index 999ae52bb5..93235d77e5 100644
--- a/modules/markup/markdown/markdown.go
+++ b/modules/markup/markdown/markdown.go
@@ -6,7 +6,8 @@
package markdown
import (
- "bytes"
+ "fmt"
+ "io"
"strings"
"sync"
@@ -18,7 +19,7 @@ import (
chromahtml "github.com/alecthomas/chroma/formatters/html"
"github.com/yuin/goldmark"
- "github.com/yuin/goldmark-highlighting"
+ highlighting "github.com/yuin/goldmark-highlighting"
meta "github.com/yuin/goldmark-meta"
"github.com/yuin/goldmark/extension"
"github.com/yuin/goldmark/parser"
@@ -34,6 +35,44 @@ var urlPrefixKey = parser.NewContextKey()
var isWikiKey = parser.NewContextKey()
var renderMetasKey = parser.NewContextKey()
+type closesWithError interface {
+ io.WriteCloser
+ CloseWithError(err error) error
+}
+
+type limitWriter struct {
+ w closesWithError
+ sum int64
+ limit int64
+}
+
+// Write implements the standard Write interface:
+func (l *limitWriter) Write(data []byte) (int, error) {
+ leftToWrite := l.limit - l.sum
+ if leftToWrite < int64(len(data)) {
+ n, err := l.w.Write(data[:leftToWrite])
+ l.sum += int64(n)
+ if err != nil {
+ return n, err
+ }
+ _ = l.w.Close()
+ return n, fmt.Errorf("Rendered content too large - truncating render")
+ }
+ n, err := l.w.Write(data)
+ l.sum += int64(n)
+ return n, err
+}
+
+// Close closes the writer
+func (l *limitWriter) Close() error {
+ return l.w.Close()
+}
+
+// CloseWithError closes the writer
+func (l *limitWriter) CloseWithError(err error) error {
+ return l.w.CloseWithError(err)
+}
+
// NewGiteaParseContext creates a parser.Context with the gitea context set
func NewGiteaParseContext(urlPrefix string, metas map[string]string, isWiki bool) parser.Context {
pc := parser.NewContext(parser.WithIDs(newPrefixedIDs()))
@@ -43,8 +82,8 @@ func NewGiteaParseContext(urlPrefix string, metas map[string]string, isWiki bool
return pc
}
-// render renders Markdown to HTML without handling special links.
-func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown bool) []byte {
+// actualRender renders Markdown to HTML without handling special links.
+func actualRender(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown bool) []byte {
once.Do(func() {
converter = goldmark.New(
goldmark.WithExtensions(extension.Table,
@@ -119,12 +158,57 @@ func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown
})
- pc := NewGiteaParseContext(urlPrefix, metas, wikiMarkdown)
- var buf bytes.Buffer
- if err := converter.Convert(giteautil.NormalizeEOL(body), &buf, parser.WithContext(pc)); err != nil {
- log.Error("Unable to render: %v", err)
+ rd, wr := io.Pipe()
+ defer func() {
+ _ = rd.Close()
+ _ = wr.Close()
+ }()
+
+ lw := &limitWriter{
+ w: wr,
+ limit: setting.UI.MaxDisplayFileSize * 3,
}
- return markup.SanitizeReader(&buf).Bytes()
+
+ // FIXME: should we include a timeout that closes the pipe to abort the parser and sanitizer if it takes too long?
+ go func() {
+ defer func() {
+ err := recover()
+ if err == nil {
+ return
+ }
+
+ log.Warn("Unable to render markdown due to panic in goldmark: %v", err)
+ if log.IsDebug() {
+ log.Debug("Panic in markdown: %v\n%s", err, string(log.Stack(2)))
+ }
+ _ = lw.CloseWithError(fmt.Errorf("%v", err))
+ }()
+
+ pc := NewGiteaParseContext(urlPrefix, metas, wikiMarkdown)
+ if err := converter.Convert(giteautil.NormalizeEOL(body), lw, parser.WithContext(pc)); err != nil {
+ log.Error("Unable to render: %v", err)
+ _ = lw.CloseWithError(err)
+ return
+ }
+ _ = lw.Close()
+ }()
+ return markup.SanitizeReader(rd).Bytes()
+}
+
+func render(body []byte, urlPrefix string, metas map[string]string, wikiMarkdown bool) (ret []byte) {
+ defer func() {
+ err := recover()
+ if err == nil {
+ return
+ }
+
+ log.Warn("Unable to render markdown due to panic in goldmark - will return sanitized raw bytes")
+ if log.IsDebug() {
+ log.Debug("Panic in markdown: %v\n%s", err, string(log.Stack(2)))
+ }
+ ret = markup.SanitizeBytes(body)
+ }()
+ return actualRender(body, urlPrefix, metas, wikiMarkdown)
}
var (
diff --git a/modules/markup/markdown/markdown_test.go b/modules/markup/markdown/markdown_test.go
index 3196beea1f..5b3ef21fb6 100644
--- a/modules/markup/markdown/markdown_test.go
+++ b/modules/markup/markdown/markdown_test.go
@@ -309,6 +309,25 @@ func TestRender_RenderParagraphs(t *testing.T) {
test(t, "A\n\n\nB\nC\n", 2)
}
+func TestMarkdownRenderRaw(t *testing.T) {
+ testcases := [][]byte{
+ { // clusterfuzz_testcase_minimized_fuzz_markdown_render_raw_6267570554535936
+ 0x2a, 0x20, 0x2d, 0x0a, 0x09, 0x20, 0x60, 0x5b, 0x0a, 0x09, 0x20, 0x60,
+ 0x5b,
+ },
+ { // clusterfuzz_testcase_minimized_fuzz_markdown_render_raw_6278827345051648
+ 0x2d, 0x20, 0x2d, 0x0d, 0x09, 0x60, 0x0d, 0x09, 0x60,
+ },
+ { // clusterfuzz_testcase_minimized_fuzz_markdown_render_raw_6016973788020736[] = {
+ 0x7b, 0x63, 0x6c, 0x61, 0x73, 0x73, 0x3d, 0x35, 0x7d, 0x0a, 0x3d,
+ },
+ }
+
+ for _, testcase := range testcases {
+ _ = RenderRaw(testcase, "", false)
+ }
+}
+
func TestRenderSiblingImages_Issue12925(t *testing.T) {
testcase := `![image1](/image1)
![image2](/image2)
@@ -318,4 +337,5 @@ func TestRenderSiblingImages_Issue12925(t *testing.T) {
`
res := string(RenderRaw([]byte(testcase), "", false))
assert.Equal(t, expected, res)
+
}