diff options
author | wxiaoguang <wxiaoguang@gmail.com> | 2024-11-18 13:25:42 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2024-11-18 13:25:42 +0800 |
commit | 8a20fba8eb1ac01a0de9355eff84af69d4636d96 (patch) | |
tree | bf37fa52f688a31b6cf8d4879438020b108235cc | |
parent | 4f879a00df029e09b40f64bf8de0572704766115 (diff) | |
download | gitea-8a20fba8eb1ac01a0de9355eff84af69d4636d96.tar.gz gitea-8a20fba8eb1ac01a0de9355eff84af69d4636d96.zip |
Refactor markup render system (#32533)
Remove unmaintainable sanitizer rules. No need to add special "class"
regexp rules anymore, use RenderInternal.SafeAttr instead, more details
(and examples) are in the tests
42 files changed, 567 insertions, 507 deletions
diff --git a/modules/html/html.go b/modules/html/html.go deleted file mode 100644 index b1ebd584c6..0000000000 --- a/modules/html/html.go +++ /dev/null @@ -1,25 +0,0 @@ -// Copyright 2022 The Gitea Authors. All rights reserved. -// SPDX-License-Identifier: MIT - -package html - -// ParseSizeAndClass get size and class from string with default values -// If present, "others" expects the new size first and then the classes to use -func ParseSizeAndClass(defaultSize int, defaultClass string, others ...any) (int, string) { - size := defaultSize - if len(others) >= 1 { - if v, ok := others[0].(int); ok && v != 0 { - size = v - } - } - class := defaultClass - if len(others) >= 2 { - if v, ok := others[1].(string); ok && v != "" { - if class != "" { - class += " " - } - class += v - } - } - return size, class -} diff --git a/modules/htmlutil/html.go b/modules/htmlutil/html.go new file mode 100644 index 0000000000..9b5f5a92d8 --- /dev/null +++ b/modules/htmlutil/html.go @@ -0,0 +1,48 @@ +// Copyright 2022 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package htmlutil + +import ( + "fmt" + "html/template" + "slices" +) + +// ParseSizeAndClass get size and class from string with default values +// If present, "others" expects the new size first and then the classes to use +func ParseSizeAndClass(defaultSize int, defaultClass string, others ...any) (int, string) { + size := defaultSize + if len(others) >= 1 { + if v, ok := others[0].(int); ok && v != 0 { + size = v + } + } + class := defaultClass + if len(others) >= 2 { + if v, ok := others[1].(string); ok && v != "" { + if class != "" { + class += " " + } + class += v + } + } + return size, class +} + +func HTMLFormat(s string, rawArgs ...any) template.HTML { + args := slices.Clone(rawArgs) + for i, v := range args { + switch v := v.(type) { + case nil, bool, int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64, float32, float64, template.HTML: + // for most basic types (including template.HTML which is safe), just do nothing and use it + case string: + args[i] = template.HTMLEscapeString(v) + case fmt.Stringer: + args[i] = template.HTMLEscapeString(v.String()) + default: + args[i] = template.HTMLEscapeString(fmt.Sprint(v)) + } + } + return template.HTML(fmt.Sprintf(s, args...)) +} diff --git a/modules/htmlutil/html_test.go b/modules/htmlutil/html_test.go new file mode 100644 index 0000000000..5ff05d75b3 --- /dev/null +++ b/modules/htmlutil/html_test.go @@ -0,0 +1,15 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package htmlutil + +import ( + "html/template" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestHTMLFormat(t *testing.T) { + assert.Equal(t, template.HTML("<a>< < 1</a>"), HTMLFormat("<a>%s %s %d</a>", "<", template.HTML("<"), 1)) +} diff --git a/modules/markup/asciicast/asciicast.go b/modules/markup/asciicast/asciicast.go index 0678062340..e92b78a4bc 100644 --- a/modules/markup/asciicast/asciicast.go +++ b/modules/markup/asciicast/asciicast.go @@ -7,7 +7,6 @@ import ( "fmt" "io" "net/url" - "regexp" "code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/setting" @@ -38,10 +37,7 @@ const ( // SanitizerRules implements markup.Renderer func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { - return []setting.MarkupSanitizerRule{ - {Element: "div", AllowAttr: "class", Regexp: regexp.MustCompile(playerClassName)}, - {Element: "div", AllowAttr: playerSrcAttr}, - } + return []setting.MarkupSanitizerRule{{Element: "div", AllowAttr: playerSrcAttr}} } // Render implements markup.Renderer @@ -53,12 +49,5 @@ func (Renderer) Render(ctx *markup.RenderContext, _ io.Reader, output io.Writer) ctx.Metas["BranchNameSubURL"], url.PathEscape(ctx.RelativePath), ) - - _, err := io.WriteString(output, fmt.Sprintf( - `<div class="%s" %s="%s"></div>`, - playerClassName, - playerSrcAttr, - rawURL, - )) - return err + return ctx.RenderInternal.FormatWithSafeAttrs(output, `<div class="%s" %s="%s"></div>`, playerClassName, playerSrcAttr, rawURL) } diff --git a/modules/markup/common/html.go b/modules/markup/common/html.go deleted file mode 100644 index 5658839c6f..0000000000 --- a/modules/markup/common/html.go +++ /dev/null @@ -1,16 +0,0 @@ -// Copyright 2019 The Gitea Authors. All rights reserved. -// SPDX-License-Identifier: MIT - -package common - -import ( - "mvdan.cc/xurls/v2" -) - -// NOTE: All below regex matching do not perform any extra validation. -// Thus a link is produced even if the linked entity does not exist. -// While fast, this is also incorrect and lead to false positives. -// TODO: fix invalid linking issue - -// LinkRegex is a regexp matching a valid link -var LinkRegex, _ = xurls.StrictMatchingScheme("https?://") diff --git a/modules/markup/common/linkify.go b/modules/markup/common/linkify.go index f84680205e..be6ab22b55 100644 --- a/modules/markup/common/linkify.go +++ b/modules/markup/common/linkify.go @@ -9,15 +9,27 @@ package common import ( "bytes" "regexp" + "sync" "github.com/yuin/goldmark" "github.com/yuin/goldmark/ast" "github.com/yuin/goldmark/parser" "github.com/yuin/goldmark/text" "github.com/yuin/goldmark/util" + "mvdan.cc/xurls/v2" ) -var wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}((?:/|[#?])[-a-zA-Z0-9@:%_\+.~#!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`) +type GlobalVarsType struct { + wwwURLRegxp *regexp.Regexp + LinkRegex *regexp.Regexp // fast matching a URL link, no any extra validation. +} + +var GlobalVars = sync.OnceValue[*GlobalVarsType](func() *GlobalVarsType { + v := &GlobalVarsType{} + v.wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}((?:/|[#?])[-a-zA-Z0-9@:%_\+.~#!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`) + v.LinkRegex, _ = xurls.StrictMatchingScheme("https?://") + return v +}) type linkifyParser struct{} @@ -60,10 +72,10 @@ func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Cont var protocol []byte typ := ast.AutoLinkURL if bytes.HasPrefix(line, protoHTTP) || bytes.HasPrefix(line, protoHTTPS) || bytes.HasPrefix(line, protoFTP) { - m = LinkRegex.FindSubmatchIndex(line) + m = GlobalVars().LinkRegex.FindSubmatchIndex(line) } if m == nil && bytes.HasPrefix(line, domainWWW) { - m = wwwURLRegxp.FindSubmatchIndex(line) + m = GlobalVars().wwwURLRegxp.FindSubmatchIndex(line) protocol = []byte("http") } if m != nil { diff --git a/modules/markup/console/console.go b/modules/markup/console/console.go index d991527b80..06f3acfa68 100644 --- a/modules/markup/console/console.go +++ b/modules/markup/console/console.go @@ -6,8 +6,7 @@ package console import ( "bytes" "io" - "path/filepath" - "regexp" + "path" "code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/setting" @@ -36,7 +35,7 @@ func (Renderer) Extensions() []string { // SanitizerRules implements markup.Renderer func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { return []setting.MarkupSanitizerRule{ - {Element: "span", AllowAttr: "class", Regexp: regexp.MustCompile(`^term-((fg[ix]?|bg)\d+|container)$`)}, + {Element: "span", AllowAttr: "class", Regexp: `^term-((fg[ix]?|bg)\d+|container)$`}, } } @@ -46,7 +45,7 @@ func (Renderer) CanRender(filename string, input io.Reader) bool { if err != nil { return false } - if enry.GetLanguage(filepath.Base(filename), buf) != enry.OtherLanguage { + if enry.GetLanguage(path.Base(filename), buf) != enry.OtherLanguage { return false } return bytes.ContainsRune(buf, '\x1b') diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go index 3d952b0de4..a3e6bbaac6 100644 --- a/modules/markup/csv/csv.go +++ b/modules/markup/csv/csv.go @@ -7,7 +7,6 @@ import ( "bufio" "html" "io" - "regexp" "strconv" "code.gitea.io/gitea/modules/csv" @@ -37,9 +36,9 @@ func (Renderer) Extensions() []string { // SanitizerRules implements markup.Renderer func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { return []setting.MarkupSanitizerRule{ - {Element: "table", AllowAttr: "class", Regexp: regexp.MustCompile(`data-table`)}, - {Element: "th", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)}, - {Element: "td", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)}, + {Element: "table", AllowAttr: "class", Regexp: `^data-table$`}, + {Element: "th", AllowAttr: "class", Regexp: `^line-num$`}, + {Element: "td", AllowAttr: "class", Regexp: `^line-num$`}, } } @@ -51,13 +50,13 @@ func writeField(w io.Writer, element, class, field string) error { return err } if len(class) > 0 { - if _, err := io.WriteString(w, " class=\""); err != nil { + if _, err := io.WriteString(w, ` class="`); err != nil { return err } if _, err := io.WriteString(w, class); err != nil { return err } - if _, err := io.WriteString(w, "\""); err != nil { + if _, err := io.WriteString(w, `"`); err != nil { return err } } diff --git a/modules/markup/external/external.go b/modules/markup/external/external.go index 122517ed11..d28dc9fa5d 100644 --- a/modules/markup/external/external.go +++ b/modules/markup/external/external.go @@ -102,7 +102,7 @@ func (p *Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io. _, err = io.Copy(f, input) if err != nil { - f.Close() + _ = f.Close() return fmt.Errorf("%s write data to temp file when rendering %s failed: %w", p.Name(), p.Command, err) } @@ -113,10 +113,9 @@ func (p *Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io. args = append(args, f.Name()) } - if ctx == nil || ctx.Ctx == nil { - if ctx == nil { - log.Warn("RenderContext not provided defaulting to empty ctx") - ctx = &markup.RenderContext{} + if ctx.Ctx == nil { + if !setting.IsProd || setting.IsInTesting { + panic("RenderContext did not provide context") } log.Warn("RenderContext did not provide context, defaulting to Shutdown context") ctx.Ctx = graceful.GetManager().ShutdownContext() diff --git a/modules/markup/html.go b/modules/markup/html.go index 16ccd4b406..e8799c401c 100644 --- a/modules/markup/html.go +++ b/modules/markup/html.go @@ -25,9 +25,6 @@ const ( IssueNameStyleRegexp = "regexp" ) -// CSS class for action keywords (e.g. "closes: #1") -const keywordClass = "issue-keyword" - type globalVarsType struct { hashCurrentPattern *regexp.Regexp shortLinkPattern *regexp.Regexp @@ -39,6 +36,7 @@ type globalVarsType struct { emojiShortCodeRegex *regexp.Regexp issueFullPattern *regexp.Regexp filesChangedFullPattern *regexp.Regexp + codePreviewPattern *regexp.Regexp tagCleaner *regexp.Regexp nulCleaner *strings.Replacer @@ -88,6 +86,9 @@ var globalVars = sync.OnceValue[*globalVarsType](func() *globalVarsType { // example: https://domain/org/repo/pulls/27/files#hash v.filesChangedFullPattern = regexp.MustCompile(`https?://(?:\S+/)[\w_.-]+/[\w_.-]+/pulls/((?:\w{1,10}-)?[1-9][0-9]*)/files([\?|#](\S+)?)?\b`) + // codePreviewPattern matches "http://domain/.../{owner}/{repo}/src/commit/{commit}/{filepath}#L10-L20" + v.codePreviewPattern = regexp.MustCompile(`https?://\S+/([^\s/]+)/([^\s/]+)/src/commit/([0-9a-f]{7,64})(/\S+)#(L\d+(-L\d+)?)`) + v.tagCleaner = regexp.MustCompile(`<((?:/?\w+/\w+)|(?:/[\w ]+/)|(/?[hH][tT][mM][lL]\b)|(/?[hH][eE][aA][dD]\b))`) v.nulCleaner = strings.NewReplacer("\000", "") return v @@ -129,7 +130,7 @@ func CustomLinkURLSchemes(schemes []string) { } withAuth = append(withAuth, s) } - common.LinkRegex, _ = xurls.StrictMatchingScheme(strings.Join(withAuth, "|")) + common.GlobalVars().LinkRegex, _ = xurls.StrictMatchingScheme(strings.Join(withAuth, "|")) } type postProcessError struct { @@ -164,11 +165,7 @@ var defaultProcessors = []processor{ // emails with HTML links, parsing shortlinks in the format of [[Link]], like // MediaWiki, linking issues in the format #ID, and mentions in the format // @user, and others. -func PostProcess( - ctx *RenderContext, - input io.Reader, - output io.Writer, -) error { +func PostProcess(ctx *RenderContext, input io.Reader, output io.Writer) error { return postProcess(ctx, defaultProcessors, input, output) } @@ -189,10 +186,7 @@ var commitMessageProcessors = []processor{ // RenderCommitMessage will use the same logic as PostProcess, but will disable // the shortLinkProcessor and will add a defaultLinkProcessor if defaultLink is // set, which changes every text node into a link to the passed default link. -func RenderCommitMessage( - ctx *RenderContext, - content string, -) (string, error) { +func RenderCommitMessage(ctx *RenderContext, content string) (string, error) { procs := commitMessageProcessors return renderProcessString(ctx, procs, content) } @@ -219,10 +213,7 @@ var emojiProcessors = []processor{ // RenderCommitMessage, but will disable the shortLinkProcessor and // emailAddressProcessor, will add a defaultLinkProcessor if defaultLink is set, // which changes every text node into a link to the passed default link. -func RenderCommitMessageSubject( - ctx *RenderContext, - defaultLink, content string, -) (string, error) { +func RenderCommitMessageSubject(ctx *RenderContext, defaultLink, content string) (string, error) { procs := slices.Clone(commitMessageSubjectProcessors) procs = append(procs, func(ctx *RenderContext, node *html.Node) { ch := &html.Node{Parent: node, Type: html.TextNode, Data: node.Data} @@ -236,10 +227,7 @@ func RenderCommitMessageSubject( } // RenderIssueTitle to process title on individual issue/pull page -func RenderIssueTitle( - ctx *RenderContext, - title string, -) (string, error) { +func RenderIssueTitle(ctx *RenderContext, title string) (string, error) { // do not render other issue/commit links in an issue's title - which in most cases is already a link. return renderProcessString(ctx, []processor{ emojiShortCodeProcessor, @@ -257,10 +245,7 @@ func renderProcessString(ctx *RenderContext, procs []processor, content string) // RenderDescriptionHTML will use similar logic as PostProcess, but will // use a single special linkProcessor. -func RenderDescriptionHTML( - ctx *RenderContext, - content string, -) (string, error) { +func RenderDescriptionHTML(ctx *RenderContext, content string) (string, error) { return renderProcessString(ctx, []processor{ descriptionLinkProcessor, emojiShortCodeProcessor, @@ -270,10 +255,7 @@ func RenderDescriptionHTML( // RenderEmoji for when we want to just process emoji and shortcodes // in various places it isn't already run through the normal markdown processor -func RenderEmoji( - ctx *RenderContext, - content string, -) (string, error) { +func RenderEmoji(ctx *RenderContext, content string) (string, error) { return renderProcessString(ctx, emojiProcessors, content) } @@ -333,6 +315,17 @@ func postProcess(ctx *RenderContext, procs []processor, input io.Reader, output return nil } +func isEmojiNode(node *html.Node) bool { + if node.Type == html.ElementNode && node.Data == atom.Span.String() { + for _, attr := range node.Attr { + if (attr.Key == "class" || attr.Key == "data-attr-class") && strings.Contains(attr.Val, "emoji") { + return true + } + } + } + return false +} + func visitNode(ctx *RenderContext, procs []processor, node *html.Node) *html.Node { // Add user-content- to IDs and "#" links if they don't already have them for idx, attr := range node.Attr { @@ -346,47 +339,27 @@ func visitNode(ctx *RenderContext, procs []processor, node *html.Node) *html.Nod if attr.Key == "href" && strings.HasPrefix(attr.Val, "#") && notHasPrefix { node.Attr[idx].Val = "#user-content-" + val } - - if attr.Key == "class" && attr.Val == "emoji" { - procs = nil - } } switch node.Type { case html.TextNode: - processTextNodes(ctx, procs, node) + for _, proc := range procs { + proc(ctx, node) // it might add siblings + } + case html.ElementNode: - if node.Data == "code" || node.Data == "pre" { - // ignore code and pre nodes + if isEmojiNode(node) { + // TextNode emoji will be converted to `<span class="emoji">`, then the next iteration will visit the "span" + // if we don't stop it, it will go into the TextNode again and create an infinite recursion return node.NextSibling + } else if node.Data == "code" || node.Data == "pre" { + return node.NextSibling // ignore code and pre nodes } else if node.Data == "img" { return visitNodeImg(ctx, node) } else if node.Data == "video" { return visitNodeVideo(ctx, node) } else if node.Data == "a" { - // Restrict text in links to emojis - procs = emojiProcessors - } else if node.Data == "i" { - for _, attr := range node.Attr { - if attr.Key != "class" { - continue - } - classes := strings.Split(attr.Val, " ") - for i, class := range classes { - if class == "icon" { - classes[0], classes[i] = classes[i], classes[0] - attr.Val = strings.Join(classes, " ") - - // Remove all children of icons - child := node.FirstChild - for child != nil { - node.RemoveChild(child) - child = node.FirstChild - } - break - } - } - } + procs = emojiProcessors // Restrict text in links to emojis } for n := node.FirstChild; n != nil; { n = visitNode(ctx, procs, n) @@ -396,22 +369,17 @@ func visitNode(ctx *RenderContext, procs []processor, node *html.Node) *html.Nod return node.NextSibling } -// processTextNodes runs the passed node through various processors, in order to handle -// all kinds of special links handled by the post-processing. -func processTextNodes(ctx *RenderContext, procs []processor, node *html.Node) { - for _, p := range procs { - p(ctx, node) - } -} - // createKeyword() renders a highlighted version of an action keyword -func createKeyword(content string) *html.Node { +func createKeyword(ctx *RenderContext, content string) *html.Node { + // CSS class for action keywords (e.g. "closes: #1") + const keywordClass = "issue-keyword" + span := &html.Node{ Type: html.ElementNode, Data: atom.Span.String(), Attr: []html.Attribute{}, } - span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: keywordClass}) + span.Attr = append(span.Attr, ctx.RenderInternal.NodeSafeAttr("class", keywordClass)) text := &html.Node{ Type: html.TextNode, @@ -422,7 +390,7 @@ func createKeyword(content string) *html.Node { return span } -func createLink(href, content, class string) *html.Node { +func createLink(ctx *RenderContext, href, content, class string) *html.Node { a := &html.Node{ Type: html.ElementNode, Data: atom.A.String(), @@ -432,7 +400,7 @@ func createLink(href, content, class string) *html.Node { a.Attr = append(a.Attr, html.Attribute{Key: "data-markdown-generated-content"}) } if class != "" { - a.Attr = append(a.Attr, html.Attribute{Key: "class", Val: class}) + a.Attr = append(a.Attr, ctx.RenderInternal.NodeSafeAttr("class", class)) } text := &html.Node{ diff --git a/modules/markup/html_codepreview.go b/modules/markup/html_codepreview.go index 5ab9290b3e..5c88481d76 100644 --- a/modules/markup/html_codepreview.go +++ b/modules/markup/html_codepreview.go @@ -6,7 +6,6 @@ package markup import ( "html/template" "net/url" - "regexp" "strconv" "strings" @@ -16,9 +15,6 @@ import ( "golang.org/x/net/html" ) -// codePreviewPattern matches "http://domain/.../{owner}/{repo}/src/commit/{commit}/{filepath}#L10-L20" -var codePreviewPattern = regexp.MustCompile(`https?://\S+/([^\s/]+)/([^\s/]+)/src/commit/([0-9a-f]{7,64})(/\S+)#(L\d+(-L\d+)?)`) - type RenderCodePreviewOptions struct { FullURL string OwnerName string @@ -30,7 +26,7 @@ type RenderCodePreviewOptions struct { } func renderCodeBlock(ctx *RenderContext, node *html.Node) (urlPosStart, urlPosStop int, htm template.HTML, err error) { - m := codePreviewPattern.FindStringSubmatchIndex(node.Data) + m := globalVars().codePreviewPattern.FindStringSubmatchIndex(node.Data) if m == nil { return 0, 0, "", nil } @@ -66,8 +62,8 @@ func codePreviewPatternProcessor(ctx *RenderContext, node *html.Node) { node = node.NextSibling continue } - urlPosStart, urlPosEnd, h, err := renderCodeBlock(ctx, node) - if err != nil || h == "" { + urlPosStart, urlPosEnd, renderedCodeBlock, err := renderCodeBlock(ctx, node) + if err != nil || renderedCodeBlock == "" { if err != nil { log.Error("Unable to render code preview: %v", err) } @@ -84,7 +80,8 @@ func codePreviewPatternProcessor(ctx *RenderContext, node *html.Node) { // then it is resolved as: "<p>{TextBefore}</p><div NewNode/><p>{TextAfter}</p>", // so unless it could correctly replace the parent "p/li" node, it is very difficult to eliminate the "TextBefore" empty node. node.Data = textBefore - node.Parent.InsertBefore(&html.Node{Type: html.RawNode, Data: string(h)}, next) + renderedCodeNode := &html.Node{Type: html.RawNode, Data: string(ctx.RenderInternal.ProtectSafeAttrs(renderedCodeBlock))} + node.Parent.InsertBefore(renderedCodeNode, next) if textAfter != "" { node.Parent.InsertBefore(&html.Node{Type: html.TextNode, Data: textAfter}, next) } diff --git a/modules/markup/html_email.go b/modules/markup/html_email.go index 32d0285eb4..cbfae8b829 100644 --- a/modules/markup/html_email.go +++ b/modules/markup/html_email.go @@ -15,7 +15,7 @@ func emailAddressProcessor(ctx *RenderContext, node *html.Node) { } mail := node.Data[m[2]:m[3]] - replaceContent(node, m[2], m[3], createLink("mailto:"+mail, mail, "mailto")) + replaceContent(node, m[2], m[3], createLink(ctx, "mailto:"+mail, mail, "" /*mailto*/)) node = node.NextSibling.NextSibling } } diff --git a/modules/markup/html_emoji.go b/modules/markup/html_emoji.go index 6eacb2067f..c638065425 100644 --- a/modules/markup/html_emoji.go +++ b/modules/markup/html_emoji.go @@ -13,15 +13,13 @@ import ( "golang.org/x/net/html/atom" ) -func createEmoji(content, class, name string) *html.Node { +func createEmoji(ctx *RenderContext, content, name string) *html.Node { span := &html.Node{ Type: html.ElementNode, Data: atom.Span.String(), Attr: []html.Attribute{}, } - if class != "" { - span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: class}) - } + span.Attr = append(span.Attr, ctx.RenderInternal.NodeSafeAttr("class", "emoji")) if name != "" { span.Attr = append(span.Attr, html.Attribute{Key: "aria-label", Val: name}) } @@ -35,13 +33,13 @@ func createEmoji(content, class, name string) *html.Node { return span } -func createCustomEmoji(alias string) *html.Node { +func createCustomEmoji(ctx *RenderContext, alias string) *html.Node { span := &html.Node{ Type: html.ElementNode, Data: atom.Span.String(), Attr: []html.Attribute{}, } - span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: "emoji"}) + span.Attr = append(span.Attr, ctx.RenderInternal.NodeSafeAttr("class", "emoji")) span.Attr = append(span.Attr, html.Attribute{Key: "aria-label", Val: alias}) img := &html.Node{ @@ -77,7 +75,7 @@ func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) { if converted == nil { // check if this is a custom reaction if _, exist := setting.UI.CustomEmojisMap[alias]; exist { - replaceContent(node, m[0], m[1], createCustomEmoji(alias)) + replaceContent(node, m[0], m[1], createCustomEmoji(ctx, alias)) node = node.NextSibling.NextSibling start = 0 continue @@ -85,7 +83,7 @@ func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) { continue } - replaceContent(node, m[0], m[1], createEmoji(converted.Emoji, "emoji", converted.Description)) + replaceContent(node, m[0], m[1], createEmoji(ctx, converted.Emoji, converted.Description)) node = node.NextSibling.NextSibling start = 0 } @@ -107,7 +105,7 @@ func emojiProcessor(ctx *RenderContext, node *html.Node) { start = m[1] val := emoji.FromCode(codepoint) if val != nil { - replaceContent(node, m[0], m[1], createEmoji(codepoint, "emoji", val.Description)) + replaceContent(node, m[0], m[1], createEmoji(ctx, codepoint, val.Description)) node = node.NextSibling.NextSibling start = 0 } diff --git a/modules/markup/html_issue.go b/modules/markup/html_issue.go index 2acf154ad2..7341af7eb6 100644 --- a/modules/markup/html_issue.go +++ b/modules/markup/html_issue.go @@ -57,10 +57,10 @@ func fullIssuePatternProcessor(ctx *RenderContext, node *html.Node) { matchRepo := linkParts[len(linkParts)-3] if matchOrg == ctx.Metas["user"] && matchRepo == ctx.Metas["repo"] { - replaceContent(node, m[0], m[1], createLink(link, text, "ref-issue")) + replaceContent(node, m[0], m[1], createLink(ctx, link, text, "ref-issue")) } else { text = matchOrg + "/" + matchRepo + text - replaceContent(node, m[0], m[1], createLink(link, text, "ref-issue")) + replaceContent(node, m[0], m[1], createLink(ctx, link, text, "ref-issue")) } node = node.NextSibling.NextSibling } @@ -129,16 +129,16 @@ func issueIndexPatternProcessor(ctx *RenderContext, node *html.Node) { log.Error("unable to expand template vars for ref %s, err: %v", ref.Issue, err) } - link = createLink(res, reftext, "ref-issue ref-external-issue") + link = createLink(ctx, res, reftext, "ref-issue ref-external-issue") } else { // Path determines the type of link that will be rendered. It's unknown at this point whether // the linked item is actually a PR or an issue. Luckily it's of no real consequence because // Gitea will redirect on click as appropriate. issuePath := util.Iif(ref.IsPull, "pulls", "issues") if ref.Owner == "" { - link = createLink(util.URLJoin(ctx.Links.Prefix(), ctx.Metas["user"], ctx.Metas["repo"], issuePath, ref.Issue), reftext, "ref-issue") + link = createLink(ctx, util.URLJoin(ctx.Links.Prefix(), ctx.Metas["user"], ctx.Metas["repo"], issuePath, ref.Issue), reftext, "ref-issue") } else { - link = createLink(util.URLJoin(ctx.Links.Prefix(), ref.Owner, ref.Name, issuePath, ref.Issue), reftext, "ref-issue") + link = createLink(ctx, util.URLJoin(ctx.Links.Prefix(), ref.Owner, ref.Name, issuePath, ref.Issue), reftext, "ref-issue") } } @@ -151,7 +151,7 @@ func issueIndexPatternProcessor(ctx *RenderContext, node *html.Node) { // Decorate action keywords if actionable var keyword *html.Node if references.IsXrefActionable(ref, hasExtTrackFormat) { - keyword = createKeyword(node.Data[ref.ActionLocation.Start:ref.ActionLocation.End]) + keyword = createKeyword(ctx, node.Data[ref.ActionLocation.Start:ref.ActionLocation.End]) } else { keyword = &html.Node{ Type: html.TextNode, @@ -177,7 +177,7 @@ func commitCrossReferencePatternProcessor(ctx *RenderContext, node *html.Node) { } reftext := ref.Owner + "/" + ref.Name + "@" + base.ShortSha(ref.CommitSha) - link := createLink(util.URLJoin(ctx.Links.Prefix(), ref.Owner, ref.Name, "commit", ref.CommitSha), reftext, "commit") + link := createLink(ctx, util.URLJoin(ctx.Links.Prefix(), ref.Owner, ref.Name, "commit", ref.CommitSha), reftext, "commit") replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link) node = node.NextSibling.NextSibling diff --git a/modules/markup/html_link.go b/modules/markup/html_link.go index b7562d0aa6..32aa7dc614 100644 --- a/modules/markup/html_link.go +++ b/modules/markup/html_link.go @@ -189,13 +189,13 @@ func shortLinkProcessor(ctx *RenderContext, node *html.Node) { func linkProcessor(ctx *RenderContext, node *html.Node) { next := node.NextSibling for node != nil && node != next { - m := common.LinkRegex.FindStringIndex(node.Data) + m := common.GlobalVars().LinkRegex.FindStringIndex(node.Data) if m == nil { return } uri := node.Data[m[0]:m[1]] - replaceContent(node, m[0], m[1], createLink(uri, uri, "link")) + replaceContent(node, m[0], m[1], createLink(ctx, uri, uri, "" /*link*/)) node = node.NextSibling.NextSibling } } @@ -204,7 +204,7 @@ func linkProcessor(ctx *RenderContext, node *html.Node) { func descriptionLinkProcessor(ctx *RenderContext, node *html.Node) { next := node.NextSibling for node != nil && node != next { - m := common.LinkRegex.FindStringIndex(node.Data) + m := common.GlobalVars().LinkRegex.FindStringIndex(node.Data) if m == nil { return } diff --git a/modules/markup/html_mention.go b/modules/markup/html_mention.go index 3f0692e05f..f7e2ad50f1 100644 --- a/modules/markup/html_mention.go +++ b/modules/markup/html_mention.go @@ -33,7 +33,7 @@ func mentionProcessor(ctx *RenderContext, node *html.Node) { if ok && strings.Contains(mention, "/") { mentionOrgAndTeam := strings.Split(mention, "/") if mentionOrgAndTeam[0][1:] == ctx.Metas["org"] && strings.Contains(teams, ","+strings.ToLower(mentionOrgAndTeam[1])+",") { - replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(ctx.Links.Prefix(), "org", ctx.Metas["org"], "teams", mentionOrgAndTeam[1]), mention, "mention")) + replaceContent(node, loc.Start, loc.End, createLink(ctx, util.URLJoin(ctx.Links.Prefix(), "org", ctx.Metas["org"], "teams", mentionOrgAndTeam[1]), mention, "" /*mention*/)) node = node.NextSibling.NextSibling start = 0 continue @@ -44,7 +44,7 @@ func mentionProcessor(ctx *RenderContext, node *html.Node) { mentionedUsername := mention[1:] if DefaultProcessorHelper.IsUsernameMentionable != nil && DefaultProcessorHelper.IsUsernameMentionable(ctx.Ctx, mentionedUsername) { - replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(ctx.Links.Prefix(), mentionedUsername), mention, "mention")) + replaceContent(node, loc.Start, loc.End, createLink(ctx, util.URLJoin(ctx.Links.Prefix(), mentionedUsername), mention, "" /*mention*/)) node = node.NextSibling.NextSibling start = 0 } else { diff --git a/modules/markup/internal/finalprocessor.go b/modules/markup/internal/finalprocessor.go new file mode 100644 index 0000000000..14d46a161f --- /dev/null +++ b/modules/markup/internal/finalprocessor.go @@ -0,0 +1,30 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package internal + +import ( + "bytes" + "io" +) + +type finalProcessor struct { + renderInternal *RenderInternal + + output io.Writer + buf bytes.Buffer +} + +func (p *finalProcessor) Write(data []byte) (int, error) { + p.buf.Write(data) + return len(data), nil +} + +func (p *finalProcessor) Close() error { + // TODO: reading the whole markdown isn't a problem at the moment, + // because "postProcess" already does so. In the future we could optimize the code to process data on the fly. + buf := p.buf.Bytes() + buf = bytes.ReplaceAll(buf, []byte(` data-attr-class="`+p.renderInternal.secureIDPrefix), []byte(` class="`)) + _, err := p.output.Write(buf) + return err +} diff --git a/modules/markup/internal/internal_test.go b/modules/markup/internal/internal_test.go new file mode 100644 index 0000000000..98ff3bc079 --- /dev/null +++ b/modules/markup/internal/internal_test.go @@ -0,0 +1,61 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package internal + +import ( + "bytes" + "html/template" + "io" + "testing" + + "github.com/stretchr/testify/assert" +) + +func TestRenderInternal(t *testing.T) { + cases := []struct { + input, protected, recovered string + }{ + { + input: `<div class="test">class="content"</div>`, + protected: `<div data-attr-class="sec:test">class="content"</div>`, + recovered: `<div class="test">class="content"</div>`, + }, + { + input: "<div\nclass=\"test\" data-xxx></div>", + protected: `<div data-attr-class="sec:test" data-xxx></div>`, + recovered: `<div class="test" data-xxx></div>`, + }, + } + for _, c := range cases { + var r RenderInternal + out := &bytes.Buffer{} + in := r.init("sec", out) + protected := r.ProtectSafeAttrs(template.HTML(c.input)) + assert.EqualValues(t, c.protected, protected) + _, _ = io.WriteString(in, string(protected)) + _ = in.Close() + assert.EqualValues(t, c.recovered, out.String()) + } + + var r1, r2 RenderInternal + protected := r1.ProtectSafeAttrs(`<div class="test"></div>`) + assert.EqualValues(t, `<div class="test"></div>`, protected, "non-initialized RenderInternal should not protect any attributes") + _ = r1.init("sec", nil) + protected = r1.ProtectSafeAttrs(`<div class="test"></div>`) + assert.EqualValues(t, `<div data-attr-class="sec:test"></div>`, protected) + assert.EqualValues(t, "data-attr-class", r1.SafeAttr("class")) + assert.EqualValues(t, "sec:val", r1.SafeValue("val")) + recovered, ok := r1.RecoverProtectedValue("sec:val") + assert.True(t, ok) + assert.EqualValues(t, "val", recovered) + recovered, ok = r1.RecoverProtectedValue("other:val") + assert.False(t, ok) + assert.Empty(t, recovered) + + out2 := &bytes.Buffer{} + in2 := r2.init("sec-other", out2) + _, _ = io.WriteString(in2, string(protected)) + _ = in2.Close() + assert.EqualValues(t, `<div data-attr-class="sec:test"></div>`, out2.String(), "different secureID should not recover the value") +} diff --git a/modules/markup/internal/renderinternal.go b/modules/markup/internal/renderinternal.go new file mode 100644 index 0000000000..4775fecfc7 --- /dev/null +++ b/modules/markup/internal/renderinternal.go @@ -0,0 +1,82 @@ +// Copyright 2024 The Gitea Authors. All rights reserved. +// SPDX-License-Identifier: MIT + +package internal + +import ( + "crypto/rand" + "encoding/base64" + "html/template" + "io" + "regexp" + "strings" + "sync" + + "code.gitea.io/gitea/modules/htmlutil" + + "golang.org/x/net/html" +) + +var reAttrClass = sync.OnceValue[*regexp.Regexp](func() *regexp.Regexp { + // TODO: it isn't a problem at the moment because our HTML contents are always well constructed + return regexp.MustCompile(`(<[^>]+)\s+class="([^"]+)"([^>]*>)`) +}) + +// RenderInternal also works without initialization +// If no initialization (no secureID), it will not protect any attributes and return the original name&value +type RenderInternal struct { + secureID string + secureIDPrefix string +} + +func (r *RenderInternal) Init(output io.Writer) io.WriteCloser { + buf := make([]byte, 12) + _, err := rand.Read(buf) + if err != nil { + panic("unable to generate secure id") + } + return r.init(base64.URLEncoding.EncodeToString(buf), output) +} + +func (r *RenderInternal) init(secID string, output io.Writer) io.WriteCloser { + r.secureID = secID + r.secureIDPrefix = r.secureID + ":" + return &finalProcessor{renderInternal: r, output: output} +} + +func (r *RenderInternal) RecoverProtectedValue(v string) (string, bool) { + if !strings.HasPrefix(v, r.secureIDPrefix) { + return "", false + } + return v[len(r.secureIDPrefix):], true +} + +func (r *RenderInternal) SafeAttr(name string) string { + if r.secureID == "" { + return name + } + return "data-attr-" + name +} + +func (r *RenderInternal) SafeValue(val string) string { + if r.secureID == "" { + return val + } + return r.secureID + ":" + val +} + +func (r *RenderInternal) NodeSafeAttr(attr, val string) html.Attribute { + return html.Attribute{Key: r.SafeAttr(attr), Val: r.SafeValue(val)} +} + +func (r *RenderInternal) ProtectSafeAttrs(content template.HTML) template.HTML { + if r.secureID == "" { + return content + } + return template.HTML(reAttrClass().ReplaceAllString(string(content), `$1 data-attr-class="`+r.secureIDPrefix+`$2"$3`)) +} + +func (r *RenderInternal) FormatWithSafeAttrs(w io.Writer, fmt string, a ...any) error { + _, err := w.Write([]byte(r.ProtectSafeAttrs(htmlutil.HTMLFormat(fmt, a...)))) + return err +} diff --git a/modules/markup/markdown/ast.go b/modules/markup/markdown/ast.go index 624c35d945..ca165b1ba0 100644 --- a/modules/markup/markdown/ast.go +++ b/modules/markup/markdown/ast.go @@ -34,13 +34,6 @@ func NewDetails() *Details { } } -// IsDetails returns true if the given node implements the Details interface, -// otherwise false. -func IsDetails(node ast.Node) bool { - _, ok := node.(*Details) - return ok -} - // Summary is a block that contains the summary of details block type Summary struct { ast.BaseBlock @@ -66,13 +59,6 @@ func NewSummary() *Summary { } } -// IsSummary returns true if the given node implements the Summary interface, -// otherwise false. -func IsSummary(node ast.Node) bool { - _, ok := node.(*Summary) - return ok -} - // TaskCheckBoxListItem is a block that represents a list item of a markdown block with a checkbox type TaskCheckBoxListItem struct { *ast.ListItem @@ -103,14 +89,7 @@ func NewTaskCheckBoxListItem(listItem *ast.ListItem) *TaskCheckBoxListItem { } } -// IsTaskCheckBoxListItem returns true if the given node implements the TaskCheckBoxListItem interface, -// otherwise false. -func IsTaskCheckBoxListItem(node ast.Node) bool { - _, ok := node.(*TaskCheckBoxListItem) - return ok -} - -// Icon is an inline for a fomantic icon +// Icon is an inline for a Fomantic UI icon type Icon struct { ast.BaseInline Name []byte @@ -139,13 +118,6 @@ func NewIcon(name string) *Icon { } } -// IsIcon returns true if the given node implements the Icon interface, -// otherwise false. -func IsIcon(node ast.Node) bool { - _, ok := node.(*Icon) - return ok -} - // ColorPreview is an inline for a color preview type ColorPreview struct { ast.BaseInline diff --git a/modules/markup/markdown/goldmark.go b/modules/markup/markdown/goldmark.go index c837b21e78..47dcfa8b5a 100644 --- a/modules/markup/markdown/goldmark.go +++ b/modules/markup/markdown/goldmark.go @@ -7,9 +7,11 @@ import ( "fmt" "regexp" "strings" + "sync" "code.gitea.io/gitea/modules/container" "code.gitea.io/gitea/modules/markup" + "code.gitea.io/gitea/modules/markup/internal" "code.gitea.io/gitea/modules/setting" "github.com/yuin/goldmark/ast" @@ -23,11 +25,13 @@ import ( // ASTTransformer is a default transformer of the goldmark tree. type ASTTransformer struct { + renderInternal *internal.RenderInternal attentionTypes container.Set[string] } -func NewASTTransformer() *ASTTransformer { +func NewASTTransformer(renderInternal *internal.RenderInternal) *ASTTransformer { return &ASTTransformer{ + renderInternal: renderInternal, attentionTypes: container.SetOf("note", "tip", "important", "warning", "caution"), } } @@ -109,12 +113,16 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa } } -// NewHTMLRenderer creates a HTMLRenderer to render -// in the gitea form. -func NewHTMLRenderer(opts ...html.Option) renderer.NodeRenderer { +// it is copied from old code, which is quite doubtful whether it is correct +var reValidIconName = sync.OnceValue[*regexp.Regexp](func() *regexp.Regexp { + return regexp.MustCompile(`^[-\w]+$`) // old: regexp.MustCompile("^[a-z ]+$") +}) + +// NewHTMLRenderer creates a HTMLRenderer to render in the gitea form. +func NewHTMLRenderer(renderInternal *internal.RenderInternal, opts ...html.Option) renderer.NodeRenderer { r := &HTMLRenderer{ - Config: html.NewConfig(), - reValidName: regexp.MustCompile("^[a-z ]+$"), + renderInternal: renderInternal, + Config: html.NewConfig(), } for _, opt := range opts { opt.SetHTMLOption(&r.Config) @@ -126,7 +134,7 @@ func NewHTMLRenderer(opts ...html.Option) renderer.NodeRenderer { // renders gitea specific features. type HTMLRenderer struct { html.Config - reValidName *regexp.Regexp + renderInternal *internal.RenderInternal } // RegisterFuncs implements renderer.NodeRenderer.RegisterFuncs. @@ -214,12 +222,13 @@ func (r *HTMLRenderer) renderIcon(w util.BufWriter, source []byte, node ast.Node return ast.WalkContinue, nil } - if !r.reValidName.MatchString(name) { + if !reValidIconName().MatchString(name) { // skip this return ast.WalkContinue, nil } - _, err := w.WriteString(fmt.Sprintf(`<i class="icon %s"></i>`, name)) + // FIXME: the "icon xxx" is from Fomantic UI, it's really questionable whether it still works correctly + err := r.renderInternal.FormatWithSafeAttrs(w, `<i class="icon %s"></i>`, name) if err != nil { return ast.WalkStop, err } diff --git a/modules/markup/markdown/markdown.go b/modules/markup/markdown/markdown.go index 6af0deb27b..a3915ad439 100644 --- a/modules/markup/markdown/markdown.go +++ b/modules/markup/markdown/markdown.go @@ -9,7 +9,6 @@ import ( "html/template" "io" "strings" - "sync" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/markup" @@ -30,11 +29,6 @@ import ( ) var ( - specMarkdown goldmark.Markdown - specMarkdownOnce sync.Once -) - -var ( renderContextKey = parser.NewContextKey() renderConfigKey = parser.NewContextKey() ) @@ -68,85 +62,95 @@ func newParserContext(ctx *markup.RenderContext) parser.Context { return pc } +type GlodmarkRender struct { + ctx *markup.RenderContext + + goldmarkMarkdown goldmark.Markdown +} + +func (r *GlodmarkRender) Convert(source []byte, writer io.Writer, opts ...parser.ParseOption) error { + return r.goldmarkMarkdown.Convert(source, writer, opts...) +} + +func (r *GlodmarkRender) Renderer() renderer.Renderer { + return r.goldmarkMarkdown.Renderer() +} + +func (r *GlodmarkRender) highlightingRenderer(w util.BufWriter, c highlighting.CodeBlockContext, entering bool) { + if entering { + language, _ := c.Language() + if language == nil { + language = []byte("text") + } + + languageStr := string(language) + + preClasses := []string{"code-block"} + if languageStr == "mermaid" || languageStr == "math" { + preClasses = append(preClasses, "is-loading") + } + + err := r.ctx.RenderInternal.FormatWithSafeAttrs(w, `<pre class="%s">`, strings.Join(preClasses, " ")) + if err != nil { + return + } + + // include language-x class as part of commonmark spec + // the "display" class is used by "js/markup/math.js" to render the code element as a block + err = r.ctx.RenderInternal.FormatWithSafeAttrs(w, `<code class="chroma language-%s display">`, string(language)) + if err != nil { + return + } + } else { + _, err := w.WriteString("</code></pre>") + if err != nil { + return + } + } +} + // SpecializedMarkdown sets up the Gitea specific markdown extensions -func SpecializedMarkdown() goldmark.Markdown { - specMarkdownOnce.Do(func() { - specMarkdown = goldmark.New( - goldmark.WithExtensions( - extension.NewTable( - extension.WithTableCellAlignMethod(extension.TableCellAlignAttribute)), - extension.Strikethrough, - extension.TaskList, - extension.DefinitionList, - common.FootnoteExtension, - highlighting.NewHighlighting( - highlighting.WithFormatOptions( - chromahtml.WithClasses(true), - chromahtml.PreventSurroundingPre(true), - ), - highlighting.WithWrapperRenderer(func(w util.BufWriter, c highlighting.CodeBlockContext, entering bool) { - if entering { - language, _ := c.Language() - if language == nil { - language = []byte("text") - } - - languageStr := string(language) - - preClasses := []string{"code-block"} - if languageStr == "mermaid" || languageStr == "math" { - preClasses = append(preClasses, "is-loading") - } - - _, err := w.WriteString(`<pre class="` + strings.Join(preClasses, " ") + `">`) - if err != nil { - return - } - - // include language-x class as part of commonmark spec - // the "display" class is used by "js/markup/math.js" to render the code element as a block - _, err = w.WriteString(`<code class="chroma language-` + string(language) + ` display">`) - if err != nil { - return - } - } else { - _, err := w.WriteString("</code></pre>") - if err != nil { - return - } - } - }), - ), - math.NewExtension( - math.Enabled(setting.Markdown.EnableMath), - ), - meta.Meta, - ), - goldmark.WithParserOptions( - parser.WithAttribute(), - parser.WithAutoHeadingID(), - parser.WithASTTransformers( - util.Prioritized(NewASTTransformer(), 10000), +func SpecializedMarkdown(ctx *markup.RenderContext) *GlodmarkRender { + // TODO: it could use a pool to cache the renderers to reuse them with different contexts + // at the moment it is fast enough (see the benchmarks) + r := &GlodmarkRender{ctx: ctx} + r.goldmarkMarkdown = goldmark.New( + goldmark.WithExtensions( + extension.NewTable(extension.WithTableCellAlignMethod(extension.TableCellAlignAttribute)), + extension.Strikethrough, + extension.TaskList, + extension.DefinitionList, + common.FootnoteExtension, + highlighting.NewHighlighting( + highlighting.WithFormatOptions( + chromahtml.WithClasses(true), + chromahtml.PreventSurroundingPre(true), ), + highlighting.WithWrapperRenderer(r.highlightingRenderer), ), - goldmark.WithRendererOptions( - html.WithUnsafe(), - ), - ) - - // Override the original Tasklist renderer! - specMarkdown.Renderer().AddOptions( - renderer.WithNodeRenderers( - util.Prioritized(NewHTMLRenderer(), 10), - ), - ) - }) - return specMarkdown + math.NewExtension(&ctx.RenderInternal, math.Enabled(setting.Markdown.EnableMath)), + meta.Meta, + ), + goldmark.WithParserOptions( + parser.WithAttribute(), + parser.WithAutoHeadingID(), + parser.WithASTTransformers(util.Prioritized(NewASTTransformer(&ctx.RenderInternal), 10000)), + ), + goldmark.WithRendererOptions(html.WithUnsafe()), + ) + + // Override the original Tasklist renderer! + r.goldmarkMarkdown.Renderer().AddOptions( + renderer.WithNodeRenderers(util.Prioritized(NewHTMLRenderer(&ctx.RenderInternal), 10)), + ) + + return r } -// actualRender renders Markdown to HTML without handling special links. -func actualRender(ctx *markup.RenderContext, input io.Reader, output io.Writer) error { - converter := SpecializedMarkdown() +// render calls goldmark render to convert Markdown to HTML +// NOTE: The output of this method MUST get sanitized separately!!! +func render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error { + converter := SpecializedMarkdown(ctx) lw := &limitWriter{ w: output, limit: setting.UI.MaxDisplayFileSize * 3, @@ -160,8 +164,8 @@ func actualRender(ctx *markup.RenderContext, input io.Reader, output io.Writer) } log.Warn("Unable to render markdown due to panic in goldmark: %v", err) - if log.IsDebug() { - log.Debug("Panic in markdown: %v\n%s", err, log.Stack(2)) + if (!setting.IsProd && !setting.IsInTesting) || log.IsDebug() { + log.Error("Panic in markdown: %v\n%s", err, log.Stack(2)) } }() @@ -200,26 +204,6 @@ func actualRender(ctx *markup.RenderContext, input io.Reader, output io.Writer) return nil } -// Note: The output of this method must get sanitized. -func render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error { - defer func() { - err := recover() - if err == nil { - return - } - - log.Warn("Unable to render markdown due to panic in goldmark - will return raw bytes") - if log.IsDebug() { - log.Debug("Panic in markdown: %v\n%s", err, log.Stack(2)) - } - _, err = io.Copy(output, input) - if err != nil { - log.Error("io.Copy failed: %v", err) - } - }() - return actualRender(ctx, input, output) -} - // MarkupName describes markup's name var MarkupName = "markdown" diff --git a/modules/markup/markdown/markdown_test.go b/modules/markup/markdown/markdown_test.go index 780df8727f..e4889a75e5 100644 --- a/modules/markup/markdown/markdown_test.go +++ b/modules/markup/markdown/markdown_test.go @@ -1051,3 +1051,17 @@ func TestAttention(t *testing.T) { // legacy GitHub style test(`> **warning**`, renderAttention("warning", "octicon-alert")+"\n</blockquote>") } + +func BenchmarkSpecializedMarkdown(b *testing.B) { + // 240856 4719 ns/op + for i := 0; i < b.N; i++ { + markdown.SpecializedMarkdown(&markup.RenderContext{}) + } +} + +func BenchmarkMarkdownRender(b *testing.B) { + // 23202 50840 ns/op + for i := 0; i < b.N; i++ { + _, _ = markdown.RenderString(&markup.RenderContext{Ctx: context.Background()}, "https://example.com\n- a\n- b\n") + } +} diff --git a/modules/markup/markdown/math/block_renderer.go b/modules/markup/markdown/math/block_renderer.go index 84817ef1e4..0d2a966102 100644 --- a/modules/markup/markdown/math/block_renderer.go +++ b/modules/markup/markdown/math/block_renderer.go @@ -4,17 +4,21 @@ package math import ( + "code.gitea.io/gitea/modules/markup/internal" + gast "github.com/yuin/goldmark/ast" "github.com/yuin/goldmark/renderer" "github.com/yuin/goldmark/util" ) // BlockRenderer represents a renderer for math Blocks -type BlockRenderer struct{} +type BlockRenderer struct { + renderInternal *internal.RenderInternal +} // NewBlockRenderer creates a new renderer for math Blocks -func NewBlockRenderer() renderer.NodeRenderer { - return &BlockRenderer{} +func NewBlockRenderer(renderInternal *internal.RenderInternal) renderer.NodeRenderer { + return &BlockRenderer{renderInternal: renderInternal} } // RegisterFuncs registers the renderer for math Blocks @@ -33,7 +37,7 @@ func (r *BlockRenderer) writeLines(w util.BufWriter, source []byte, n gast.Node) func (r *BlockRenderer) renderBlock(w util.BufWriter, source []byte, node gast.Node, entering bool) (gast.WalkStatus, error) { n := node.(*Block) if entering { - _, _ = w.WriteString(`<pre class="code-block is-loading"><code class="chroma language-math display">`) + _ = r.renderInternal.FormatWithSafeAttrs(w, `<pre class="code-block is-loading"><code class="chroma language-math display">`) r.writeLines(w, source, n) } else { _, _ = w.WriteString(`</code></pre>` + "\n") diff --git a/modules/markup/markdown/math/inline_renderer.go b/modules/markup/markdown/math/inline_renderer.go index 96848099cc..0cff4f1e74 100644 --- a/modules/markup/markdown/math/inline_renderer.go +++ b/modules/markup/markdown/math/inline_renderer.go @@ -6,17 +6,21 @@ package math import ( "bytes" + "code.gitea.io/gitea/modules/markup/internal" + "github.com/yuin/goldmark/ast" "github.com/yuin/goldmark/renderer" "github.com/yuin/goldmark/util" ) // InlineRenderer is an inline renderer -type InlineRenderer struct{} +type InlineRenderer struct { + renderInternal *internal.RenderInternal +} // NewInlineRenderer returns a new renderer for inline math -func NewInlineRenderer() renderer.NodeRenderer { - return &InlineRenderer{} +func NewInlineRenderer(renderInternal *internal.RenderInternal) renderer.NodeRenderer { + return &InlineRenderer{renderInternal: renderInternal} } func (r *InlineRenderer) renderInline(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) { @@ -25,7 +29,7 @@ func (r *InlineRenderer) renderInline(w util.BufWriter, source []byte, n ast.Nod if _, ok := n.(*InlineBlock); ok { extraClass = "display " } - _, _ = w.WriteString(`<code class="language-math ` + extraClass + `is-loading">`) + _ = r.renderInternal.FormatWithSafeAttrs(w, `<code class="language-math %sis-loading">`, extraClass) for c := n.FirstChild(); c != nil; c = c.NextSibling() { segment := c.(*ast.Text).Segment value := util.EscapeHTML(segment.Value(source)) diff --git a/modules/markup/markdown/math/math.go b/modules/markup/markdown/math/math.go index 3d9f376bc6..7e8defcd4a 100644 --- a/modules/markup/markdown/math/math.go +++ b/modules/markup/markdown/math/math.go @@ -4,6 +4,8 @@ package math import ( + "code.gitea.io/gitea/modules/markup/internal" + "github.com/yuin/goldmark" "github.com/yuin/goldmark/parser" "github.com/yuin/goldmark/renderer" @@ -12,6 +14,7 @@ import ( // Extension is a math extension type Extension struct { + renderInternal *internal.RenderInternal enabled bool parseDollarInline bool parseDollarBlock bool @@ -39,38 +42,10 @@ func Enabled(enable ...bool) Option { }) } -// WithInlineDollarParser enables or disables the parsing of $...$ -func WithInlineDollarParser(enable ...bool) Option { - value := true - if len(enable) > 0 { - value = enable[0] - } - return extensionFunc(func(e *Extension) { - e.parseDollarInline = value - }) -} - -// WithBlockDollarParser enables or disables the parsing of $$...$$ -func WithBlockDollarParser(enable ...bool) Option { - value := true - if len(enable) > 0 { - value = enable[0] - } - return extensionFunc(func(e *Extension) { - e.parseDollarBlock = value - }) -} - -// Math represents a math extension with default rendered delimiters -var Math = &Extension{ - enabled: true, - parseDollarBlock: true, - parseDollarInline: true, -} - // NewExtension creates a new math extension with the provided options -func NewExtension(opts ...Option) *Extension { +func NewExtension(renderInternal *internal.RenderInternal, opts ...Option) *Extension { r := &Extension{ + renderInternal: renderInternal, enabled: true, parseDollarBlock: true, parseDollarInline: true, @@ -102,7 +77,7 @@ func (e *Extension) Extend(m goldmark.Markdown) { m.Parser().AddOptions(parser.WithInlineParsers(inlines...)) m.Renderer().AddOptions(renderer.WithNodeRenderers( - util.Prioritized(NewBlockRenderer(), 501), - util.Prioritized(NewInlineRenderer(), 502), + util.Prioritized(NewBlockRenderer(e.renderInternal), 501), + util.Prioritized(NewInlineRenderer(e.renderInternal), 502), )) } diff --git a/modules/markup/markdown/meta_test.go b/modules/markup/markdown/meta_test.go index 6949966328..278c33f1d2 100644 --- a/modules/markup/markdown/meta_test.go +++ b/modules/markup/markdown/meta_test.go @@ -11,10 +11,8 @@ import ( "github.com/stretchr/testify/assert" ) -/* -IssueTemplate is a legacy to keep the unit tests working. -Copied from structs.IssueTemplate, the original type has been changed a lot to support yaml template. -*/ +// IssueTemplate is a legacy to keep the unit tests working. +// Copied from structs.IssueTemplate, the original type has been changed a lot to support yaml template. type IssueTemplate struct { Name string `json:"name" yaml:"name"` Title string `json:"title" yaml:"title"` diff --git a/modules/markup/markdown/transform_blockquote.go b/modules/markup/markdown/transform_blockquote.go index 92dc500e69..2651d44a69 100644 --- a/modules/markup/markdown/transform_blockquote.go +++ b/modules/markup/markdown/transform_blockquote.go @@ -32,7 +32,8 @@ func (r *HTMLRenderer) renderAttention(w util.BufWriter, source []byte, node ast default: // including "note" octiconName = "info" } - _, _ = w.WriteString(string(svg.RenderHTML("octicon-"+octiconName, 16, "attention-icon attention-"+n.AttentionType))) + svgHTML := svg.RenderHTML("octicon-"+octiconName, 16, "attention-icon attention-"+n.AttentionType) + _, _ = w.WriteString(string(r.renderInternal.ProtectSafeAttrs(svgHTML))) } return ast.WalkContinue, nil } @@ -128,13 +129,13 @@ func (g *ASTTransformer) transformBlockquote(v *ast.Blockquote, reader text.Read } // color the blockquote - v.SetAttributeString("class", []byte("attention-header attention-"+attentionType)) + v.SetAttributeString(g.renderInternal.SafeAttr("class"), []byte(g.renderInternal.SafeValue("attention-header attention-"+attentionType))) // create an emphasis to make it bold attentionParagraph := ast.NewParagraph() g.applyElementDir(attentionParagraph) emphasis := ast.NewEmphasis(2) - emphasis.SetAttributeString("class", []byte("attention-"+attentionType)) + emphasis.SetAttributeString(g.renderInternal.SafeAttr("class"), []byte(g.renderInternal.SafeValue("attention-"+attentionType))) attentionAstString := ast.NewString([]byte(cases.Title(language.English).String(attentionType))) diff --git a/modules/markup/markdown/transform_codespan.go b/modules/markup/markdown/transform_codespan.go index ff7d24eec9..bccc43aad2 100644 --- a/modules/markup/markdown/transform_codespan.go +++ b/modules/markup/markdown/transform_codespan.go @@ -5,7 +5,6 @@ package markdown import ( "bytes" - "fmt" "strings" "code.gitea.io/gitea/modules/markup" @@ -40,7 +39,7 @@ func (r *HTMLRenderer) renderCodeSpan(w util.BufWriter, source []byte, n ast.Nod r.Writer.RawWrite(w, value) } case *ColorPreview: - _, _ = w.WriteString(fmt.Sprintf(`<span class="color-preview" style="background-color: %v"></span>`, string(v.Color))) + _ = r.renderInternal.FormatWithSafeAttrs(w, `<span class="color-preview" style="background-color: %s"></span>`, string(v.Color)) } } return ast.WalkSkipChildren, nil diff --git a/modules/markup/markdown/transform_list.go b/modules/markup/markdown/transform_list.go index b982fd4a83..c89ad2f2cf 100644 --- a/modules/markup/markdown/transform_list.go +++ b/modules/markup/markdown/transform_list.go @@ -72,7 +72,7 @@ func (g *ASTTransformer) transformList(_ *markup.RenderContext, v *ast.List, rc } newChild := NewTaskCheckBoxListItem(listItem) newChild.IsChecked = taskCheckBox.IsChecked - newChild.SetAttributeString("class", []byte("task-list-item")) + newChild.SetAttributeString(g.renderInternal.SafeAttr("class"), []byte(g.renderInternal.SafeValue("task-list-item"))) segments := newChild.FirstChild().Lines() if segments.Len() > 0 { segment := segments.At(0) diff --git a/modules/markup/render.go b/modules/markup/render.go index 1977dc73f5..f05cb62626 100644 --- a/modules/markup/render.go +++ b/modules/markup/render.go @@ -9,14 +9,15 @@ import ( "io" "net/url" "strings" - "sync" "code.gitea.io/gitea/modules/git" "code.gitea.io/gitea/modules/gitrepo" + "code.gitea.io/gitea/modules/markup/internal" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/util" "github.com/yuin/goldmark/ast" + "golang.org/x/sync/errgroup" ) type RenderMetaMode string @@ -65,6 +66,8 @@ type RenderContext struct { SidebarTocNode ast.Node RenderMetaAs RenderMetaMode InStandalonePage bool // used by external render. the router "/org/repo/render/..." will output the rendered content in a standalone page + + RenderInternal internal.RenderInternal } // Cancel runs any cleanup functions that have been registered for this Ctx @@ -156,59 +159,53 @@ sandbox="allow-scripts" return err } -func render(ctx *RenderContext, renderer Renderer, input io.Reader, output io.Writer) error { - var wg sync.WaitGroup - var err error +func pipes() (io.ReadCloser, io.WriteCloser, func()) { pr, pw := io.Pipe() - defer func() { + return pr, pw, func() { _ = pr.Close() _ = pw.Close() - }() - - var pr2 io.ReadCloser - var pw2 io.WriteCloser - - var sanitizerDisabled bool - if r, ok := renderer.(ExternalRenderer); ok { - sanitizerDisabled = r.SanitizerDisabled() } +} - if !sanitizerDisabled { - pr2, pw2 = io.Pipe() - defer func() { - _ = pr2.Close() - _ = pw2.Close() - }() - - wg.Add(1) - go func() { - err = SanitizeReader(pr2, renderer.Name(), output) - _ = pr2.Close() - wg.Done() - }() - } else { - pw2 = util.NopCloser{Writer: output} +func render(ctx *RenderContext, renderer Renderer, input io.Reader, output io.Writer) error { + finalProcessor := ctx.RenderInternal.Init(output) + defer finalProcessor.Close() + + // input -> (pw1=pr1) -> renderer -> (pw2=pr2) -> SanitizeReader -> finalProcessor -> output + // no sanitizer: input -> (pw1=pr1) -> renderer -> pw2(finalProcessor) -> output + pr1, pw1, close1 := pipes() + defer close1() + + eg, _ := errgroup.WithContext(ctx.Ctx) + var pw2 io.WriteCloser = util.NopCloser{Writer: finalProcessor} + + if r, ok := renderer.(ExternalRenderer); !ok || !r.SanitizerDisabled() { + var pr2 io.ReadCloser + var close2 func() + pr2, pw2, close2 = pipes() + defer close2() + eg.Go(func() error { + defer pr2.Close() + return SanitizeReader(pr2, renderer.Name(), finalProcessor) + }) } - wg.Add(1) - go func() { + eg.Go(func() (err error) { if r, ok := renderer.(PostProcessRenderer); ok && r.NeedPostProcess() { - err = PostProcess(ctx, pr, pw2) + err = PostProcess(ctx, pr1, pw2) } else { - _, err = io.Copy(pw2, pr) + _, err = io.Copy(pw2, pr1) } - _ = pr.Close() - _ = pw2.Close() - wg.Done() - }() + _, _ = pr1.Close(), pw2.Close() + return err + }) - if err1 := renderer.Render(ctx, input, pw); err1 != nil { - return err1 + if err := renderer.Render(ctx, input, pw1); err != nil { + return err } - _ = pw.Close() + _ = pw1.Close() - wg.Wait() - return err + return eg.Wait() } // Init initializes the render global variables diff --git a/modules/markup/sanitizer_custom.go b/modules/markup/sanitizer_custom.go index 7978973166..7f96556fd7 100644 --- a/modules/markup/sanitizer_custom.go +++ b/modules/markup/sanitizer_custom.go @@ -4,6 +4,9 @@ package markup import ( + "regexp" + "strings" + "code.gitea.io/gitea/modules/setting" "github.com/microcosm-cc/bluemonday" @@ -15,8 +18,11 @@ func (st *Sanitizer) addSanitizerRules(policy *bluemonday.Policy, rules []settin policy.AllowDataURIImages() } if rule.Element != "" { - if rule.Regexp != nil { - policy.AllowAttrs(rule.AllowAttr).Matching(rule.Regexp).OnElements(rule.Element) + if rule.Regexp != "" { + if !strings.HasPrefix(rule.Regexp, "^") || !strings.HasSuffix(rule.Regexp, "$") { + panic("Markup sanitizer rule regexp must start with ^ and end with $ to be strict") + } + policy.AllowAttrs(rule.AllowAttr).Matching(regexp.MustCompile(rule.Regexp)).OnElements(rule.Element) } else { policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element) } diff --git a/modules/markup/sanitizer_default.go b/modules/markup/sanitizer_default.go index 476ae5e26f..0fa54efd45 100644 --- a/modules/markup/sanitizer_default.go +++ b/modules/markup/sanitizer_default.go @@ -16,36 +16,11 @@ import ( func (st *Sanitizer) createDefaultPolicy() *bluemonday.Policy { policy := bluemonday.UGCPolicy() - // For JS code copy and Mermaid loading state - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^code-block( is-loading)?$`)).OnElements("pre") - - // For code preview - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^code-preview-[-\w]+( file-content)?$`)).Globally() - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^lines-num$`)).OnElements("td") - policy.AllowAttrs("data-line-number").OnElements("span") - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^lines-code chroma$`)).OnElements("td") - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^code-inner$`)).OnElements("div") - - // For code preview (unicode escape) - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^file-view( unicode-escaped)?$`)).OnElements("table") - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^lines-escape$`)).OnElements("td") - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^toggle-escape-button btn interact-bg$`)).OnElements("a") // don't use button, button might submit a form - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(ambiguous-code-point|escaped-code-point|broken-code-point)$`)).OnElements("span") - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^char$`)).OnElements("span") - policy.AllowAttrs("data-tooltip-content", "data-escaped").OnElements("span") - - // For color preview - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^color-preview$`)).OnElements("span") - - // For attention - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^attention-header attention-\w+$`)).OnElements("blockquote") - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^attention-\w+$`)).OnElements("strong") - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^attention-icon attention-\w+ svg octicon-[\w-]+$`)).OnElements("svg") - policy.AllowAttrs("viewBox", "width", "height", "aria-hidden").OnElements("svg") - policy.AllowAttrs("fill-rule", "d").OnElements("path") + // NOTICE: DO NOT add special "class" regexp rules here anymore, use RenderInternal.SafeAttr instead - // For Chroma markdown plugin - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(chroma )?language-[\w-]+( display)?( is-loading)?$`)).OnElements("code") + // General safe SVG attributes + policy.AllowAttrs("viewBox", "width", "height", "aria-hidden", "data-attr-class").OnElements("svg") + policy.AllowAttrs("fill-rule", "d").OnElements("path") // Checkboxes policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input") @@ -66,28 +41,15 @@ func (st *Sanitizer) createDefaultPolicy() *bluemonday.Policy { policy.AllowURLSchemeWithCustomPolicy("data", disallowScheme) } - // Allow classes for anchors - policy.AllowAttrs("class").Matching(regexp.MustCompile(`ref-issue( ref-external-issue)?`)).OnElements("a") - - // Allow classes for task lists - policy.AllowAttrs("class").Matching(regexp.MustCompile(`task-list-item`)).OnElements("li") - // Allow classes for org mode list item status. policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(unchecked|checked|indeterminate)$`)).OnElements("li") - // Allow icons - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^icon(\s+[\p{L}\p{N}_-]+)+$`)).OnElements("i") - - // Allow classes for emojis - policy.AllowAttrs("class").Matching(regexp.MustCompile(`emoji`)).OnElements("img") - - // Allow icons, emojis, chroma syntax and keyword markup on span - policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji)|(language-math display)|(language-math inline))$|^([a-z][a-z0-9]{0,2})$|^` + keywordClass + `$`)).OnElements("span") - // Allow 'color' and 'background-color' properties for the style attribute on text elements. policy.AllowStyles("color", "background-color").OnElements("span", "p") - // Allow generally safe attributes + policy.AllowAttrs("src", "autoplay", "controls").OnElements("video") + + // Allow generally safe attributes (reference: https://github.com/jch/html-pipeline) generalSafeAttrs := []string{ "abbr", "accept", "accept-charset", "accesskey", "action", "align", "alt", @@ -106,10 +68,9 @@ func (st *Sanitizer) createDefaultPolicy() *bluemonday.Policy { "selected", "shape", "size", "span", "start", "summary", "tabindex", "target", "title", "type", "usemap", "valign", "value", - "vspace", "width", "itemprop", - "data-markdown-generated-content", + "vspace", "width", "itemprop", "itemscope", "itemtype", + "data-markdown-generated-content", "data-attr-class", } - generalSafeElements := []string{ "h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "br", "b", "i", "strong", "em", "a", "pre", "code", "img", "tt", "div", "ins", "del", "sup", "sub", "p", "ol", "ul", "table", "thead", "tbody", "tfoot", "blockquote", "label", @@ -117,14 +78,8 @@ func (st *Sanitizer) createDefaultPolicy() *bluemonday.Policy { "details", "caption", "figure", "figcaption", "abbr", "bdo", "cite", "dfn", "mark", "small", "span", "time", "video", "wbr", } - - policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...) - - policy.AllowAttrs("src", "autoplay", "controls").OnElements("video") - - policy.AllowAttrs("itemscope", "itemtype").OnElements("div") - // FIXME: Need to handle longdesc in img but there is no easy way to do it + policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...) // Custom keyword markup defaultSanitizer.addSanitizerRules(policy, setting.ExternalSanitizerRules) diff --git a/modules/markup/sanitizer_default_test.go b/modules/markup/sanitizer_default_test.go index 20370509c1..c5c43695ea 100644 --- a/modules/markup/sanitizer_default_test.go +++ b/modules/markup/sanitizer_default_test.go @@ -19,7 +19,6 @@ func TestSanitizer(t *testing.T) { // Code highlighting class `<code class="random string"></code>`, `<code></code>`, `<code class="language-random ui tab active menu attached animating sidebar following bar center"></code>`, `<code></code>`, - `<code class="language-go"></code>`, `<code class="language-go"></code>`, // Input checkbox `<input type="hidden">`, ``, @@ -38,10 +37,8 @@ func TestSanitizer(t *testing.T) { // <kbd> tags `<kbd>Ctrl + C</kbd>`, `<kbd>Ctrl + C</kbd>`, `<i class="dropdown icon">NAUGHTY</i>`, `<i>NAUGHTY</i>`, - `<i class="icon dropdown"></i>`, `<i class="icon dropdown"></i>`, `<input type="checkbox" disabled=""/>unchecked`, `<input type="checkbox" disabled=""/>unchecked`, `<span class="emoji dropdown">NAUGHTY</span>`, `<span>NAUGHTY</span>`, - `<span class="emoji">contents</span>`, `<span class="emoji">contents</span>`, // Color property `<span style="color: red">Hello World</span>`, `<span style="color: red">Hello World</span>`, diff --git a/modules/setting/markup.go b/modules/setting/markup.go index 6c2246342b..dfce8afa77 100644 --- a/modules/setting/markup.go +++ b/modules/setting/markup.go @@ -54,7 +54,7 @@ type MarkupRenderer struct { type MarkupSanitizerRule struct { Element string AllowAttr string - Regexp *regexp.Regexp + Regexp string AllowDataURIImages bool } @@ -117,15 +117,24 @@ func createMarkupSanitizerRule(name string, sec ConfigSection) (MarkupSanitizerR regexpStr := sec.Key("REGEXP").Value() if regexpStr != "" { - // Validate when parsing the config that this is a valid regular - // expression. Then we can use regexp.MustCompile(...) later. - compiled, err := regexp.Compile(regexpStr) + hasPrefix := strings.HasPrefix(regexpStr, "^") + hasSuffix := strings.HasSuffix(regexpStr, "$") + if !hasPrefix || !hasSuffix { + log.Error("In markup.%s: REGEXP must start with ^ and end with $ to be strict", name) + // to avoid breaking existing user configurations and satisfy the strict requirement in addSanitizerRules + if !hasPrefix { + regexpStr = "^.*" + regexpStr + } + if !hasSuffix { + regexpStr += ".*$" + } + } + _, err := regexp.Compile(regexpStr) if err != nil { log.Error("In markup.%s: REGEXP (%s) failed to compile: %v", name, regexpStr, err) return rule, false } - - rule.Regexp = compiled + rule.Regexp = regexpStr } ok = true diff --git a/modules/svg/svg.go b/modules/svg/svg.go index 8132978cac..fded9d0873 100644 --- a/modules/svg/svg.go +++ b/modules/svg/svg.go @@ -9,7 +9,7 @@ import ( "path" "strings" - gitea_html "code.gitea.io/gitea/modules/html" + gitea_html "code.gitea.io/gitea/modules/htmlutil" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/public" ) diff --git a/modules/templates/helper.go b/modules/templates/helper.go index 3ef11772dc..d5b32358da 100644 --- a/modules/templates/helper.go +++ b/modules/templates/helper.go @@ -10,12 +10,12 @@ import ( "html/template" "net/url" "reflect" - "slices" "strings" "time" user_model "code.gitea.io/gitea/models/user" "code.gitea.io/gitea/modules/base" + "code.gitea.io/gitea/modules/htmlutil" "code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/svg" @@ -39,7 +39,7 @@ func NewFuncMap() template.FuncMap { "Iif": iif, "Eval": evalTokens, "SafeHTML": safeHTML, - "HTMLFormat": HTMLFormat, + "HTMLFormat": htmlutil.HTMLFormat, "HTMLEscape": htmlEscape, "QueryEscape": queryEscape, "JSEscape": jsEscapeSafe, @@ -184,23 +184,6 @@ func NewFuncMap() template.FuncMap { } } -func HTMLFormat(s string, rawArgs ...any) template.HTML { - args := slices.Clone(rawArgs) - for i, v := range args { - switch v := v.(type) { - case nil, bool, int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64, float32, float64, template.HTML: - // for most basic types (including template.HTML which is safe), just do nothing and use it - case string: - args[i] = template.HTMLEscapeString(v) - case fmt.Stringer: - args[i] = template.HTMLEscapeString(v.String()) - default: - args[i] = template.HTMLEscapeString(fmt.Sprint(v)) - } - } - return template.HTML(fmt.Sprintf(s, args...)) -} - // safeHTML render raw as HTML func safeHTML(s any) template.HTML { switch v := s.(type) { diff --git a/modules/templates/helper_test.go b/modules/templates/helper_test.go index b9fabb7016..3e17e86c66 100644 --- a/modules/templates/helper_test.go +++ b/modules/templates/helper_test.go @@ -61,10 +61,6 @@ func TestJSEscapeSafe(t *testing.T) { assert.EqualValues(t, `\u0026\u003C\u003E\'\"`, jsEscapeSafe(`&<>'"`)) } -func TestHTMLFormat(t *testing.T) { - assert.Equal(t, template.HTML("<a>< < 1</a>"), HTMLFormat("<a>%s %s %d</a>", "<", template.HTML("<"), 1)) -} - func TestSanitizeHTML(t *testing.T) { assert.Equal(t, template.HTML(`<a href="/" rel="nofollow">link</a> xss <div>inline</div>`), SanitizeHTML(`<a href="/">link</a> <a href="javascript:">xss</a> <div style="dangerous">inline</div>`)) } diff --git a/modules/templates/util_avatar.go b/modules/templates/util_avatar.go index afc1091516..f7dd408ee2 100644 --- a/modules/templates/util_avatar.go +++ b/modules/templates/util_avatar.go @@ -14,7 +14,7 @@ import ( "code.gitea.io/gitea/models/organization" repo_model "code.gitea.io/gitea/models/repo" user_model "code.gitea.io/gitea/models/user" - gitea_html "code.gitea.io/gitea/modules/html" + gitea_html "code.gitea.io/gitea/modules/htmlutil" "code.gitea.io/gitea/modules/setting" ) diff --git a/modules/templates/util_render.go b/modules/templates/util_render.go index 8e443446bd..5776eefced 100644 --- a/modules/templates/util_render.go +++ b/modules/templates/util_render.go @@ -16,6 +16,7 @@ import ( issues_model "code.gitea.io/gitea/models/issues" "code.gitea.io/gitea/modules/emoji" + "code.gitea.io/gitea/modules/htmlutil" "code.gitea.io/gitea/modules/log" "code.gitea.io/gitea/modules/markup" "code.gitea.io/gitea/modules/markup/markdown" @@ -140,7 +141,7 @@ func (ut *RenderUtils) RenderLabel(label *issues_model.Label) template.HTML { if labelScope == "" { // Regular label - return HTMLFormat(`<div class="ui label %s" style="color: %s !important; background-color: %s !important;" data-tooltip-content title="%s">%s</div>`, + return htmlutil.HTMLFormat(`<div class="ui label %s" style="color: %s !important; background-color: %s !important;" data-tooltip-content title="%s">%s</div>`, extraCSSClasses, textColor, label.Color, descriptionText, ut.RenderEmoji(label.Name)) } @@ -174,7 +175,7 @@ func (ut *RenderUtils) RenderLabel(label *issues_model.Label) template.HTML { itemColor := "#" + hex.EncodeToString(itemBytes) scopeColor := "#" + hex.EncodeToString(scopeBytes) - return HTMLFormat(`<span class="ui label %s scope-parent" data-tooltip-content title="%s">`+ + return htmlutil.HTMLFormat(`<span class="ui label %s scope-parent" data-tooltip-content title="%s">`+ `<div class="ui label scope-left" style="color: %s !important; background-color: %s !important">%s</div>`+ `<div class="ui label scope-right" style="color: %s !important; background-color: %s !important">%s</div>`+ `</span>`, diff --git a/modules/templates/util_render_test.go b/modules/templates/util_render_test.go index 529507e7ea..cf6d839cbf 100644 --- a/modules/templates/util_render_test.go +++ b/modules/templates/util_render_test.go @@ -113,34 +113,34 @@ func TestRenderCommitBody(t *testing.T) { } expected := `/just/a/path.bin -<a href="https://example.com/file.bin" class="link">https://example.com/file.bin</a> +<a href="https://example.com/file.bin">https://example.com/file.bin</a> [local link](file.bin) -[remote link](<a href="https://example.com" class="link">https://example.com</a>) +[remote link](<a href="https://example.com">https://example.com</a>) [[local link|file.bin]] -[[remote link|<a href="https://example.com" class="link">https://example.com</a>]] +[[remote link|<a href="https://example.com">https://example.com</a>]] ![local image](image.jpg) -![remote image](<a href="https://example.com/image.jpg" class="link">https://example.com/image.jpg</a>) +![remote image](<a href="https://example.com/image.jpg">https://example.com/image.jpg</a>) [[local image|image.jpg]] -[[remote link|<a href="https://example.com/image.jpg" class="link">https://example.com/image.jpg</a>]] +[[remote link|<a href="https://example.com/image.jpg">https://example.com/image.jpg</a>]] <a href="https://example.com/user/repo/compare/88fc37a3c0a4dda553bdcfc80c178a58247f42fb...12fc37a3c0a4dda553bdcfc80c178a58247f42fb#hash" class="compare"><code class="nohighlight">88fc37a3c0...12fc37a3c0 (hash)</code></a> com 88fc37a3c0a4dda553bdcfc80c178a58247f42fb...12fc37a3c0a4dda553bdcfc80c178a58247f42fb pare <a href="https://example.com/user/repo/commit/88fc37a3c0a4dda553bdcfc80c178a58247f42fb" class="commit"><code class="nohighlight">88fc37a3c0</code></a> com 88fc37a3c0a4dda553bdcfc80c178a58247f42fb mit <span class="emoji" aria-label="thumbs up">👍</span> -<a href="mailto:mail@domain.com" class="mailto">mail@domain.com</a> -<a href="/mention-user" class="mention">@mention-user</a> test +<a href="mailto:mail@domain.com">mail@domain.com</a> +<a href="/mention-user">@mention-user</a> test <a href="/user13/repo11/issues/123" class="ref-issue">#123</a> space` assert.EqualValues(t, expected, string(newTestRenderUtils().RenderCommitBody(testInput(), testMetas))) } func TestRenderCommitMessage(t *testing.T) { - expected := `space <a href="/mention-user" data-markdown-generated-content="" class="mention">@mention-user</a> ` + expected := `space <a href="/mention-user" data-markdown-generated-content="">@mention-user</a> ` assert.EqualValues(t, expected, newTestRenderUtils().RenderCommitMessage(testInput(), testMetas)) } func TestRenderCommitMessageLinkSubject(t *testing.T) { - expected := `<a href="https://example.com/link" class="muted">space </a><a href="/mention-user" data-markdown-generated-content="" class="mention">@mention-user</a>` + expected := `<a href="https://example.com/link" class="muted">space </a><a href="/mention-user" data-markdown-generated-content="">@mention-user</a>` assert.EqualValues(t, expected, newTestRenderUtils().RenderCommitMessageLinkSubject(testInput(), "https://example.com/link", testMetas)) } diff --git a/routers/web/repo/wiki.go b/routers/web/repo/wiki.go index 13f6b69493..2732a67e71 100644 --- a/routers/web/repo/wiki.go +++ b/routers/web/repo/wiki.go @@ -326,7 +326,7 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) { if rctx.SidebarTocNode != nil { sb := &strings.Builder{} - err = markdown.SpecializedMarkdown().Renderer().Render(sb, nil, rctx.SidebarTocNode) + err = markdown.SpecializedMarkdown(rctx).Renderer().Render(sb, nil, rctx.SidebarTocNode) if err != nil { log.Error("Failed to render wiki sidebar TOC: %v", err) } else { |