]> source.dussan.org Git - gitea.git/commitdiff
Refactor markup render system (#32533)
authorwxiaoguang <wxiaoguang@gmail.com>
Mon, 18 Nov 2024 05:25:42 +0000 (13:25 +0800)
committerGitHub <noreply@github.com>
Mon, 18 Nov 2024 05:25:42 +0000 (13:25 +0800)
Remove unmaintainable sanitizer rules. No need to add special "class"
regexp rules anymore, use RenderInternal.SafeAttr instead, more details
(and examples) are in the tests

42 files changed:
modules/html/html.go [deleted file]
modules/htmlutil/html.go [new file with mode: 0644]
modules/htmlutil/html_test.go [new file with mode: 0644]
modules/markup/asciicast/asciicast.go
modules/markup/common/html.go [deleted file]
modules/markup/common/linkify.go
modules/markup/console/console.go
modules/markup/csv/csv.go
modules/markup/external/external.go
modules/markup/html.go
modules/markup/html_codepreview.go
modules/markup/html_email.go
modules/markup/html_emoji.go
modules/markup/html_issue.go
modules/markup/html_link.go
modules/markup/html_mention.go
modules/markup/internal/finalprocessor.go [new file with mode: 0644]
modules/markup/internal/internal_test.go [new file with mode: 0644]
modules/markup/internal/renderinternal.go [new file with mode: 0644]
modules/markup/markdown/ast.go
modules/markup/markdown/goldmark.go
modules/markup/markdown/markdown.go
modules/markup/markdown/markdown_test.go
modules/markup/markdown/math/block_renderer.go
modules/markup/markdown/math/inline_renderer.go
modules/markup/markdown/math/math.go
modules/markup/markdown/meta_test.go
modules/markup/markdown/transform_blockquote.go
modules/markup/markdown/transform_codespan.go
modules/markup/markdown/transform_list.go
modules/markup/render.go
modules/markup/sanitizer_custom.go
modules/markup/sanitizer_default.go
modules/markup/sanitizer_default_test.go
modules/setting/markup.go
modules/svg/svg.go
modules/templates/helper.go
modules/templates/helper_test.go
modules/templates/util_avatar.go
modules/templates/util_render.go
modules/templates/util_render_test.go
routers/web/repo/wiki.go

diff --git a/modules/html/html.go b/modules/html/html.go
deleted file mode 100644 (file)
index b1ebd58..0000000
+++ /dev/null
@@ -1,25 +0,0 @@
-// Copyright 2022 The Gitea Authors. All rights reserved.
-// SPDX-License-Identifier: MIT
-
-package html
-
-// ParseSizeAndClass get size and class from string with default values
-// If present, "others" expects the new size first and then the classes to use
-func ParseSizeAndClass(defaultSize int, defaultClass string, others ...any) (int, string) {
-       size := defaultSize
-       if len(others) >= 1 {
-               if v, ok := others[0].(int); ok && v != 0 {
-                       size = v
-               }
-       }
-       class := defaultClass
-       if len(others) >= 2 {
-               if v, ok := others[1].(string); ok && v != "" {
-                       if class != "" {
-                               class += " "
-                       }
-                       class += v
-               }
-       }
-       return size, class
-}
diff --git a/modules/htmlutil/html.go b/modules/htmlutil/html.go
new file mode 100644 (file)
index 0000000..9b5f5a9
--- /dev/null
@@ -0,0 +1,48 @@
+// Copyright 2022 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package htmlutil
+
+import (
+       "fmt"
+       "html/template"
+       "slices"
+)
+
+// ParseSizeAndClass get size and class from string with default values
+// If present, "others" expects the new size first and then the classes to use
+func ParseSizeAndClass(defaultSize int, defaultClass string, others ...any) (int, string) {
+       size := defaultSize
+       if len(others) >= 1 {
+               if v, ok := others[0].(int); ok && v != 0 {
+                       size = v
+               }
+       }
+       class := defaultClass
+       if len(others) >= 2 {
+               if v, ok := others[1].(string); ok && v != "" {
+                       if class != "" {
+                               class += " "
+                       }
+                       class += v
+               }
+       }
+       return size, class
+}
+
+func HTMLFormat(s string, rawArgs ...any) template.HTML {
+       args := slices.Clone(rawArgs)
+       for i, v := range args {
+               switch v := v.(type) {
+               case nil, bool, int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64, float32, float64, template.HTML:
+                       // for most basic types (including template.HTML which is safe), just do nothing and use it
+               case string:
+                       args[i] = template.HTMLEscapeString(v)
+               case fmt.Stringer:
+                       args[i] = template.HTMLEscapeString(v.String())
+               default:
+                       args[i] = template.HTMLEscapeString(fmt.Sprint(v))
+               }
+       }
+       return template.HTML(fmt.Sprintf(s, args...))
+}
diff --git a/modules/htmlutil/html_test.go b/modules/htmlutil/html_test.go
new file mode 100644 (file)
index 0000000..5ff05d7
--- /dev/null
@@ -0,0 +1,15 @@
+// Copyright 2024 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package htmlutil
+
+import (
+       "html/template"
+       "testing"
+
+       "github.com/stretchr/testify/assert"
+)
+
+func TestHTMLFormat(t *testing.T) {
+       assert.Equal(t, template.HTML("<a>&lt; < 1</a>"), HTMLFormat("<a>%s %s %d</a>", "<", template.HTML("<"), 1))
+}
index 06780623403a4fac1f2212fea1474c920e461602..e92b78a4bcad64cc4150ab3678d2b91a541addc7 100644 (file)
@@ -7,7 +7,6 @@ import (
        "fmt"
        "io"
        "net/url"
-       "regexp"
 
        "code.gitea.io/gitea/modules/markup"
        "code.gitea.io/gitea/modules/setting"
@@ -38,10 +37,7 @@ const (
 
 // SanitizerRules implements markup.Renderer
 func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
-       return []setting.MarkupSanitizerRule{
-               {Element: "div", AllowAttr: "class", Regexp: regexp.MustCompile(playerClassName)},
-               {Element: "div", AllowAttr: playerSrcAttr},
-       }
+       return []setting.MarkupSanitizerRule{{Element: "div", AllowAttr: playerSrcAttr}}
 }
 
 // Render implements markup.Renderer
@@ -53,12 +49,5 @@ func (Renderer) Render(ctx *markup.RenderContext, _ io.Reader, output io.Writer)
                ctx.Metas["BranchNameSubURL"],
                url.PathEscape(ctx.RelativePath),
        )
-
-       _, err := io.WriteString(output, fmt.Sprintf(
-               `<div class="%s" %s="%s"></div>`,
-               playerClassName,
-               playerSrcAttr,
-               rawURL,
-       ))
-       return err
+       return ctx.RenderInternal.FormatWithSafeAttrs(output, `<div class="%s" %s="%s"></div>`, playerClassName, playerSrcAttr, rawURL)
 }
diff --git a/modules/markup/common/html.go b/modules/markup/common/html.go
deleted file mode 100644 (file)
index 5658839..0000000
+++ /dev/null
@@ -1,16 +0,0 @@
-// Copyright 2019 The Gitea Authors. All rights reserved.
-// SPDX-License-Identifier: MIT
-
-package common
-
-import (
-       "mvdan.cc/xurls/v2"
-)
-
-// NOTE: All below regex matching do not perform any extra validation.
-// Thus a link is produced even if the linked entity does not exist.
-// While fast, this is also incorrect and lead to false positives.
-// TODO: fix invalid linking issue
-
-// LinkRegex is a regexp matching a valid link
-var LinkRegex, _ = xurls.StrictMatchingScheme("https?://")
index f84680205e88860eb3db3f9c5eb4307daa06ed6e..be6ab22b55f4979a137826156d75b8f6f250a7a2 100644 (file)
@@ -9,15 +9,27 @@ package common
 import (
        "bytes"
        "regexp"
+       "sync"
 
        "github.com/yuin/goldmark"
        "github.com/yuin/goldmark/ast"
        "github.com/yuin/goldmark/parser"
        "github.com/yuin/goldmark/text"
        "github.com/yuin/goldmark/util"
+       "mvdan.cc/xurls/v2"
 )
 
-var wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}((?:/|[#?])[-a-zA-Z0-9@:%_\+.~#!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`)
+type GlobalVarsType struct {
+       wwwURLRegxp *regexp.Regexp
+       LinkRegex   *regexp.Regexp // fast matching a URL link, no any extra validation.
+}
+
+var GlobalVars = sync.OnceValue[*GlobalVarsType](func() *GlobalVarsType {
+       v := &GlobalVarsType{}
+       v.wwwURLRegxp = regexp.MustCompile(`^www\.[-a-zA-Z0-9@:%._\+~#=]{2,256}\.[a-z]{2,6}((?:/|[#?])[-a-zA-Z0-9@:%_\+.~#!?&//=\(\);,'">\^{}\[\]` + "`" + `]*)?`)
+       v.LinkRegex, _ = xurls.StrictMatchingScheme("https?://")
+       return v
+})
 
 type linkifyParser struct{}
 
@@ -60,10 +72,10 @@ func (s *linkifyParser) Parse(parent ast.Node, block text.Reader, pc parser.Cont
        var protocol []byte
        typ := ast.AutoLinkURL
        if bytes.HasPrefix(line, protoHTTP) || bytes.HasPrefix(line, protoHTTPS) || bytes.HasPrefix(line, protoFTP) {
-               m = LinkRegex.FindSubmatchIndex(line)
+               m = GlobalVars().LinkRegex.FindSubmatchIndex(line)
        }
        if m == nil && bytes.HasPrefix(line, domainWWW) {
-               m = wwwURLRegxp.FindSubmatchIndex(line)
+               m = GlobalVars().wwwURLRegxp.FindSubmatchIndex(line)
                protocol = []byte("http")
        }
        if m != nil {
index d991527b80f59ea6faf1c1e55540acbf634ecd80..06f3acfa689486572cc39a51dd79e55395b3a316 100644 (file)
@@ -6,8 +6,7 @@ package console
 import (
        "bytes"
        "io"
-       "path/filepath"
-       "regexp"
+       "path"
 
        "code.gitea.io/gitea/modules/markup"
        "code.gitea.io/gitea/modules/setting"
@@ -36,7 +35,7 @@ func (Renderer) Extensions() []string {
 // SanitizerRules implements markup.Renderer
 func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
        return []setting.MarkupSanitizerRule{
-               {Element: "span", AllowAttr: "class", Regexp: regexp.MustCompile(`^term-((fg[ix]?|bg)\d+|container)$`)},
+               {Element: "span", AllowAttr: "class", Regexp: `^term-((fg[ix]?|bg)\d+|container)$`},
        }
 }
 
@@ -46,7 +45,7 @@ func (Renderer) CanRender(filename string, input io.Reader) bool {
        if err != nil {
                return false
        }
-       if enry.GetLanguage(filepath.Base(filename), buf) != enry.OtherLanguage {
+       if enry.GetLanguage(path.Base(filename), buf) != enry.OtherLanguage {
                return false
        }
        return bytes.ContainsRune(buf, '\x1b')
index 3d952b0de4fe38e261201b1137af4cb16cfc96ef..a3e6bbaac683096fe4fba4b1552b3824c0159ca6 100644 (file)
@@ -7,7 +7,6 @@ import (
        "bufio"
        "html"
        "io"
-       "regexp"
        "strconv"
 
        "code.gitea.io/gitea/modules/csv"
@@ -37,9 +36,9 @@ func (Renderer) Extensions() []string {
 // SanitizerRules implements markup.Renderer
 func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
        return []setting.MarkupSanitizerRule{
-               {Element: "table", AllowAttr: "class", Regexp: regexp.MustCompile(`data-table`)},
-               {Element: "th", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)},
-               {Element: "td", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)},
+               {Element: "table", AllowAttr: "class", Regexp: `^data-table$`},
+               {Element: "th", AllowAttr: "class", Regexp: `^line-num$`},
+               {Element: "td", AllowAttr: "class", Regexp: `^line-num$`},
        }
 }
 
@@ -51,13 +50,13 @@ func writeField(w io.Writer, element, class, field string) error {
                return err
        }
        if len(class) > 0 {
-               if _, err := io.WriteString(w, " class=\""); err != nil {
+               if _, err := io.WriteString(w, ` class="`); err != nil {
                        return err
                }
                if _, err := io.WriteString(w, class); err != nil {
                        return err
                }
-               if _, err := io.WriteString(w, "\""); err != nil {
+               if _, err := io.WriteString(w, `"`); err != nil {
                        return err
                }
        }
index 122517ed11c0a250842e3540d39dd1aa29f6e616..d28dc9fa5d19a99734b8055d801f5a199f69286b 100644 (file)
@@ -102,7 +102,7 @@ func (p *Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.
 
                _, err = io.Copy(f, input)
                if err != nil {
-                       f.Close()
+                       _ = f.Close()
                        return fmt.Errorf("%s write data to temp file when rendering %s failed: %w", p.Name(), p.Command, err)
                }
 
@@ -113,10 +113,9 @@ func (p *Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.
                args = append(args, f.Name())
        }
 
-       if ctx == nil || ctx.Ctx == nil {
-               if ctx == nil {
-                       log.Warn("RenderContext not provided defaulting to empty ctx")
-                       ctx = &markup.RenderContext{}
+       if ctx.Ctx == nil {
+               if !setting.IsProd || setting.IsInTesting {
+                       panic("RenderContext did not provide context")
                }
                log.Warn("RenderContext did not provide context, defaulting to Shutdown context")
                ctx.Ctx = graceful.GetManager().ShutdownContext()
index 16ccd4b40672f7a7873a9f484d29ba8b0bd86e33..e8799c401c5376952d610c1288785284e871851e 100644 (file)
@@ -25,9 +25,6 @@ const (
        IssueNameStyleRegexp       = "regexp"
 )
 
-// CSS class for action keywords (e.g. "closes: #1")
-const keywordClass = "issue-keyword"
-
 type globalVarsType struct {
        hashCurrentPattern      *regexp.Regexp
        shortLinkPattern        *regexp.Regexp
@@ -39,6 +36,7 @@ type globalVarsType struct {
        emojiShortCodeRegex     *regexp.Regexp
        issueFullPattern        *regexp.Regexp
        filesChangedFullPattern *regexp.Regexp
+       codePreviewPattern      *regexp.Regexp
 
        tagCleaner *regexp.Regexp
        nulCleaner *strings.Replacer
@@ -88,6 +86,9 @@ var globalVars = sync.OnceValue[*globalVarsType](func() *globalVarsType {
        // example: https://domain/org/repo/pulls/27/files#hash
        v.filesChangedFullPattern = regexp.MustCompile(`https?://(?:\S+/)[\w_.-]+/[\w_.-]+/pulls/((?:\w{1,10}-)?[1-9][0-9]*)/files([\?|#](\S+)?)?\b`)
 
+       // codePreviewPattern matches "http://domain/.../{owner}/{repo}/src/commit/{commit}/{filepath}#L10-L20"
+       v.codePreviewPattern = regexp.MustCompile(`https?://\S+/([^\s/]+)/([^\s/]+)/src/commit/([0-9a-f]{7,64})(/\S+)#(L\d+(-L\d+)?)`)
+
        v.tagCleaner = regexp.MustCompile(`<((?:/?\w+/\w+)|(?:/[\w ]+/)|(/?[hH][tT][mM][lL]\b)|(/?[hH][eE][aA][dD]\b))`)
        v.nulCleaner = strings.NewReplacer("\000", "")
        return v
@@ -129,7 +130,7 @@ func CustomLinkURLSchemes(schemes []string) {
                }
                withAuth = append(withAuth, s)
        }
-       common.LinkRegex, _ = xurls.StrictMatchingScheme(strings.Join(withAuth, "|"))
+       common.GlobalVars().LinkRegex, _ = xurls.StrictMatchingScheme(strings.Join(withAuth, "|"))
 }
 
 type postProcessError struct {
@@ -164,11 +165,7 @@ var defaultProcessors = []processor{
 // emails with HTML links, parsing shortlinks in the format of [[Link]], like
 // MediaWiki, linking issues in the format #ID, and mentions in the format
 // @user, and others.
-func PostProcess(
-       ctx *RenderContext,
-       input io.Reader,
-       output io.Writer,
-) error {
+func PostProcess(ctx *RenderContext, input io.Reader, output io.Writer) error {
        return postProcess(ctx, defaultProcessors, input, output)
 }
 
@@ -189,10 +186,7 @@ var commitMessageProcessors = []processor{
 // RenderCommitMessage will use the same logic as PostProcess, but will disable
 // the shortLinkProcessor and will add a defaultLinkProcessor if defaultLink is
 // set, which changes every text node into a link to the passed default link.
-func RenderCommitMessage(
-       ctx *RenderContext,
-       content string,
-) (string, error) {
+func RenderCommitMessage(ctx *RenderContext, content string) (string, error) {
        procs := commitMessageProcessors
        return renderProcessString(ctx, procs, content)
 }
@@ -219,10 +213,7 @@ var emojiProcessors = []processor{
 // RenderCommitMessage, but will disable the shortLinkProcessor and
 // emailAddressProcessor, will add a defaultLinkProcessor if defaultLink is set,
 // which changes every text node into a link to the passed default link.
-func RenderCommitMessageSubject(
-       ctx *RenderContext,
-       defaultLink, content string,
-) (string, error) {
+func RenderCommitMessageSubject(ctx *RenderContext, defaultLink, content string) (string, error) {
        procs := slices.Clone(commitMessageSubjectProcessors)
        procs = append(procs, func(ctx *RenderContext, node *html.Node) {
                ch := &html.Node{Parent: node, Type: html.TextNode, Data: node.Data}
@@ -236,10 +227,7 @@ func RenderCommitMessageSubject(
 }
 
 // RenderIssueTitle to process title on individual issue/pull page
-func RenderIssueTitle(
-       ctx *RenderContext,
-       title string,
-) (string, error) {
+func RenderIssueTitle(ctx *RenderContext, title string) (string, error) {
        // do not render other issue/commit links in an issue's title - which in most cases is already a link.
        return renderProcessString(ctx, []processor{
                emojiShortCodeProcessor,
@@ -257,10 +245,7 @@ func renderProcessString(ctx *RenderContext, procs []processor, content string)
 
 // RenderDescriptionHTML will use similar logic as PostProcess, but will
 // use a single special linkProcessor.
-func RenderDescriptionHTML(
-       ctx *RenderContext,
-       content string,
-) (string, error) {
+func RenderDescriptionHTML(ctx *RenderContext, content string) (string, error) {
        return renderProcessString(ctx, []processor{
                descriptionLinkProcessor,
                emojiShortCodeProcessor,
@@ -270,10 +255,7 @@ func RenderDescriptionHTML(
 
 // RenderEmoji for when we want to just process emoji and shortcodes
 // in various places it isn't already run through the normal markdown processor
-func RenderEmoji(
-       ctx *RenderContext,
-       content string,
-) (string, error) {
+func RenderEmoji(ctx *RenderContext, content string) (string, error) {
        return renderProcessString(ctx, emojiProcessors, content)
 }
 
@@ -333,6 +315,17 @@ func postProcess(ctx *RenderContext, procs []processor, input io.Reader, output
        return nil
 }
 
+func isEmojiNode(node *html.Node) bool {
+       if node.Type == html.ElementNode && node.Data == atom.Span.String() {
+               for _, attr := range node.Attr {
+                       if (attr.Key == "class" || attr.Key == "data-attr-class") && strings.Contains(attr.Val, "emoji") {
+                               return true
+                       }
+               }
+       }
+       return false
+}
+
 func visitNode(ctx *RenderContext, procs []processor, node *html.Node) *html.Node {
        // Add user-content- to IDs and "#" links if they don't already have them
        for idx, attr := range node.Attr {
@@ -346,47 +339,27 @@ func visitNode(ctx *RenderContext, procs []processor, node *html.Node) *html.Nod
                if attr.Key == "href" && strings.HasPrefix(attr.Val, "#") && notHasPrefix {
                        node.Attr[idx].Val = "#user-content-" + val
                }
-
-               if attr.Key == "class" && attr.Val == "emoji" {
-                       procs = nil
-               }
        }
 
        switch node.Type {
        case html.TextNode:
-               processTextNodes(ctx, procs, node)
+               for _, proc := range procs {
+                       proc(ctx, node) // it might add siblings
+               }
+
        case html.ElementNode:
-               if node.Data == "code" || node.Data == "pre" {
-                       // ignore code and pre nodes
+               if isEmojiNode(node) {
+                       // TextNode emoji will be converted to `<span class="emoji">`, then the next iteration will visit the "span"
+                       // if we don't stop it, it will go into the TextNode again and create an infinite recursion
                        return node.NextSibling
+               } else if node.Data == "code" || node.Data == "pre" {
+                       return node.NextSibling // ignore code and pre nodes
                } else if node.Data == "img" {
                        return visitNodeImg(ctx, node)
                } else if node.Data == "video" {
                        return visitNodeVideo(ctx, node)
                } else if node.Data == "a" {
-                       // Restrict text in links to emojis
-                       procs = emojiProcessors
-               } else if node.Data == "i" {
-                       for _, attr := range node.Attr {
-                               if attr.Key != "class" {
-                                       continue
-                               }
-                               classes := strings.Split(attr.Val, " ")
-                               for i, class := range classes {
-                                       if class == "icon" {
-                                               classes[0], classes[i] = classes[i], classes[0]
-                                               attr.Val = strings.Join(classes, " ")
-
-                                               // Remove all children of icons
-                                               child := node.FirstChild
-                                               for child != nil {
-                                                       node.RemoveChild(child)
-                                                       child = node.FirstChild
-                                               }
-                                               break
-                                       }
-                               }
-                       }
+                       procs = emojiProcessors // Restrict text in links to emojis
                }
                for n := node.FirstChild; n != nil; {
                        n = visitNode(ctx, procs, n)
@@ -396,22 +369,17 @@ func visitNode(ctx *RenderContext, procs []processor, node *html.Node) *html.Nod
        return node.NextSibling
 }
 
-// processTextNodes runs the passed node through various processors, in order to handle
-// all kinds of special links handled by the post-processing.
-func processTextNodes(ctx *RenderContext, procs []processor, node *html.Node) {
-       for _, p := range procs {
-               p(ctx, node)
-       }
-}
-
 // createKeyword() renders a highlighted version of an action keyword
-func createKeyword(content string) *html.Node {
+func createKeyword(ctx *RenderContext, content string) *html.Node {
+       // CSS class for action keywords (e.g. "closes: #1")
+       const keywordClass = "issue-keyword"
+
        span := &html.Node{
                Type: html.ElementNode,
                Data: atom.Span.String(),
                Attr: []html.Attribute{},
        }
-       span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: keywordClass})
+       span.Attr = append(span.Attr, ctx.RenderInternal.NodeSafeAttr("class", keywordClass))
 
        text := &html.Node{
                Type: html.TextNode,
@@ -422,7 +390,7 @@ func createKeyword(content string) *html.Node {
        return span
 }
 
-func createLink(href, content, class string) *html.Node {
+func createLink(ctx *RenderContext, href, content, class string) *html.Node {
        a := &html.Node{
                Type: html.ElementNode,
                Data: atom.A.String(),
@@ -432,7 +400,7 @@ func createLink(href, content, class string) *html.Node {
                a.Attr = append(a.Attr, html.Attribute{Key: "data-markdown-generated-content"})
        }
        if class != "" {
-               a.Attr = append(a.Attr, html.Attribute{Key: "class", Val: class})
+               a.Attr = append(a.Attr, ctx.RenderInternal.NodeSafeAttr("class", class))
        }
 
        text := &html.Node{
index 5ab9290b3e42f5187a43b66a5ae5fbbec4cfae65..5c88481d769efcc05bdf6fea60e1f994bc3de2a7 100644 (file)
@@ -6,7 +6,6 @@ package markup
 import (
        "html/template"
        "net/url"
-       "regexp"
        "strconv"
        "strings"
 
@@ -16,9 +15,6 @@ import (
        "golang.org/x/net/html"
 )
 
-// codePreviewPattern matches "http://domain/.../{owner}/{repo}/src/commit/{commit}/{filepath}#L10-L20"
-var codePreviewPattern = regexp.MustCompile(`https?://\S+/([^\s/]+)/([^\s/]+)/src/commit/([0-9a-f]{7,64})(/\S+)#(L\d+(-L\d+)?)`)
-
 type RenderCodePreviewOptions struct {
        FullURL   string
        OwnerName string
@@ -30,7 +26,7 @@ type RenderCodePreviewOptions struct {
 }
 
 func renderCodeBlock(ctx *RenderContext, node *html.Node) (urlPosStart, urlPosStop int, htm template.HTML, err error) {
-       m := codePreviewPattern.FindStringSubmatchIndex(node.Data)
+       m := globalVars().codePreviewPattern.FindStringSubmatchIndex(node.Data)
        if m == nil {
                return 0, 0, "", nil
        }
@@ -66,8 +62,8 @@ func codePreviewPatternProcessor(ctx *RenderContext, node *html.Node) {
                        node = node.NextSibling
                        continue
                }
-               urlPosStart, urlPosEnd, h, err := renderCodeBlock(ctx, node)
-               if err != nil || h == "" {
+               urlPosStart, urlPosEnd, renderedCodeBlock, err := renderCodeBlock(ctx, node)
+               if err != nil || renderedCodeBlock == "" {
                        if err != nil {
                                log.Error("Unable to render code preview: %v", err)
                        }
@@ -84,7 +80,8 @@ func codePreviewPatternProcessor(ctx *RenderContext, node *html.Node) {
                //    then it is resolved as: "<p>{TextBefore}</p><div NewNode/><p>{TextAfter}</p>",
                //    so unless it could correctly replace the parent "p/li" node, it is very difficult to eliminate the "TextBefore" empty node.
                node.Data = textBefore
-               node.Parent.InsertBefore(&html.Node{Type: html.RawNode, Data: string(h)}, next)
+               renderedCodeNode := &html.Node{Type: html.RawNode, Data: string(ctx.RenderInternal.ProtectSafeAttrs(renderedCodeBlock))}
+               node.Parent.InsertBefore(renderedCodeNode, next)
                if textAfter != "" {
                        node.Parent.InsertBefore(&html.Node{Type: html.TextNode, Data: textAfter}, next)
                }
index 32d0285eb4c464f3c8e5ca83e730ef849f303d86..cbfae8b82940e40bd4ad1297e29ad02df7419b32 100644 (file)
@@ -15,7 +15,7 @@ func emailAddressProcessor(ctx *RenderContext, node *html.Node) {
                }
 
                mail := node.Data[m[2]:m[3]]
-               replaceContent(node, m[2], m[3], createLink("mailto:"+mail, mail, "mailto"))
+               replaceContent(node, m[2], m[3], createLink(ctx, "mailto:"+mail, mail, "" /*mailto*/))
                node = node.NextSibling.NextSibling
        }
 }
index 6eacb2067f02801cf62d95da618320d60cf88472..c63806542524c72466a0d0f2b62ed8ec267ee702 100644 (file)
@@ -13,15 +13,13 @@ import (
        "golang.org/x/net/html/atom"
 )
 
-func createEmoji(content, class, name string) *html.Node {
+func createEmoji(ctx *RenderContext, content, name string) *html.Node {
        span := &html.Node{
                Type: html.ElementNode,
                Data: atom.Span.String(),
                Attr: []html.Attribute{},
        }
-       if class != "" {
-               span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: class})
-       }
+       span.Attr = append(span.Attr, ctx.RenderInternal.NodeSafeAttr("class", "emoji"))
        if name != "" {
                span.Attr = append(span.Attr, html.Attribute{Key: "aria-label", Val: name})
        }
@@ -35,13 +33,13 @@ func createEmoji(content, class, name string) *html.Node {
        return span
 }
 
-func createCustomEmoji(alias string) *html.Node {
+func createCustomEmoji(ctx *RenderContext, alias string) *html.Node {
        span := &html.Node{
                Type: html.ElementNode,
                Data: atom.Span.String(),
                Attr: []html.Attribute{},
        }
-       span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: "emoji"})
+       span.Attr = append(span.Attr, ctx.RenderInternal.NodeSafeAttr("class", "emoji"))
        span.Attr = append(span.Attr, html.Attribute{Key: "aria-label", Val: alias})
 
        img := &html.Node{
@@ -77,7 +75,7 @@ func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) {
                if converted == nil {
                        // check if this is a custom reaction
                        if _, exist := setting.UI.CustomEmojisMap[alias]; exist {
-                               replaceContent(node, m[0], m[1], createCustomEmoji(alias))
+                               replaceContent(node, m[0], m[1], createCustomEmoji(ctx, alias))
                                node = node.NextSibling.NextSibling
                                start = 0
                                continue
@@ -85,7 +83,7 @@ func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) {
                        continue
                }
 
-               replaceContent(node, m[0], m[1], createEmoji(converted.Emoji, "emoji", converted.Description))
+               replaceContent(node, m[0], m[1], createEmoji(ctx, converted.Emoji, converted.Description))
                node = node.NextSibling.NextSibling
                start = 0
        }
@@ -107,7 +105,7 @@ func emojiProcessor(ctx *RenderContext, node *html.Node) {
                start = m[1]
                val := emoji.FromCode(codepoint)
                if val != nil {
-                       replaceContent(node, m[0], m[1], createEmoji(codepoint, "emoji", val.Description))
+                       replaceContent(node, m[0], m[1], createEmoji(ctx, codepoint, val.Description))
                        node = node.NextSibling.NextSibling
                        start = 0
                }
index 2acf154ad2ad7607539ae81ecd6a899fea19a728..7341af7eb697b908d9b2cf2ba9ed81d072fe7ce8 100644 (file)
@@ -57,10 +57,10 @@ func fullIssuePatternProcessor(ctx *RenderContext, node *html.Node) {
                matchRepo := linkParts[len(linkParts)-3]
 
                if matchOrg == ctx.Metas["user"] && matchRepo == ctx.Metas["repo"] {
-                       replaceContent(node, m[0], m[1], createLink(link, text, "ref-issue"))
+                       replaceContent(node, m[0], m[1], createLink(ctx, link, text, "ref-issue"))
                } else {
                        text = matchOrg + "/" + matchRepo + text
-                       replaceContent(node, m[0], m[1], createLink(link, text, "ref-issue"))
+                       replaceContent(node, m[0], m[1], createLink(ctx, link, text, "ref-issue"))
                }
                node = node.NextSibling.NextSibling
        }
@@ -129,16 +129,16 @@ func issueIndexPatternProcessor(ctx *RenderContext, node *html.Node) {
                                log.Error("unable to expand template vars for ref %s, err: %v", ref.Issue, err)
                        }
 
-                       link = createLink(res, reftext, "ref-issue ref-external-issue")
+                       link = createLink(ctx, res, reftext, "ref-issue ref-external-issue")
                } else {
                        // Path determines the type of link that will be rendered. It's unknown at this point whether
                        // the linked item is actually a PR or an issue. Luckily it's of no real consequence because
                        // Gitea will redirect on click as appropriate.
                        issuePath := util.Iif(ref.IsPull, "pulls", "issues")
                        if ref.Owner == "" {
-                               link = createLink(util.URLJoin(ctx.Links.Prefix(), ctx.Metas["user"], ctx.Metas["repo"], issuePath, ref.Issue), reftext, "ref-issue")
+                               link = createLink(ctx, util.URLJoin(ctx.Links.Prefix(), ctx.Metas["user"], ctx.Metas["repo"], issuePath, ref.Issue), reftext, "ref-issue")
                        } else {
-                               link = createLink(util.URLJoin(ctx.Links.Prefix(), ref.Owner, ref.Name, issuePath, ref.Issue), reftext, "ref-issue")
+                               link = createLink(ctx, util.URLJoin(ctx.Links.Prefix(), ref.Owner, ref.Name, issuePath, ref.Issue), reftext, "ref-issue")
                        }
                }
 
@@ -151,7 +151,7 @@ func issueIndexPatternProcessor(ctx *RenderContext, node *html.Node) {
                // Decorate action keywords if actionable
                var keyword *html.Node
                if references.IsXrefActionable(ref, hasExtTrackFormat) {
-                       keyword = createKeyword(node.Data[ref.ActionLocation.Start:ref.ActionLocation.End])
+                       keyword = createKeyword(ctx, node.Data[ref.ActionLocation.Start:ref.ActionLocation.End])
                } else {
                        keyword = &html.Node{
                                Type: html.TextNode,
@@ -177,7 +177,7 @@ func commitCrossReferencePatternProcessor(ctx *RenderContext, node *html.Node) {
                }
 
                reftext := ref.Owner + "/" + ref.Name + "@" + base.ShortSha(ref.CommitSha)
-               link := createLink(util.URLJoin(ctx.Links.Prefix(), ref.Owner, ref.Name, "commit", ref.CommitSha), reftext, "commit")
+               link := createLink(ctx, util.URLJoin(ctx.Links.Prefix(), ref.Owner, ref.Name, "commit", ref.CommitSha), reftext, "commit")
 
                replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link)
                node = node.NextSibling.NextSibling
index b7562d0aa6d26a4cb5d840c9d15bd64b633ea86d..32aa7dc614ca78831bf3f93fcc513c3cec5da1d7 100644 (file)
@@ -189,13 +189,13 @@ func shortLinkProcessor(ctx *RenderContext, node *html.Node) {
 func linkProcessor(ctx *RenderContext, node *html.Node) {
        next := node.NextSibling
        for node != nil && node != next {
-               m := common.LinkRegex.FindStringIndex(node.Data)
+               m := common.GlobalVars().LinkRegex.FindStringIndex(node.Data)
                if m == nil {
                        return
                }
 
                uri := node.Data[m[0]:m[1]]
-               replaceContent(node, m[0], m[1], createLink(uri, uri, "link"))
+               replaceContent(node, m[0], m[1], createLink(ctx, uri, uri, "" /*link*/))
                node = node.NextSibling.NextSibling
        }
 }
@@ -204,7 +204,7 @@ func linkProcessor(ctx *RenderContext, node *html.Node) {
 func descriptionLinkProcessor(ctx *RenderContext, node *html.Node) {
        next := node.NextSibling
        for node != nil && node != next {
-               m := common.LinkRegex.FindStringIndex(node.Data)
+               m := common.GlobalVars().LinkRegex.FindStringIndex(node.Data)
                if m == nil {
                        return
                }
index 3f0692e05f55e38423943745912eb2186a2cd151..f7e2ad50f139fe0082a8817d6e75e573baa8937a 100644 (file)
@@ -33,7 +33,7 @@ func mentionProcessor(ctx *RenderContext, node *html.Node) {
                if ok && strings.Contains(mention, "/") {
                        mentionOrgAndTeam := strings.Split(mention, "/")
                        if mentionOrgAndTeam[0][1:] == ctx.Metas["org"] && strings.Contains(teams, ","+strings.ToLower(mentionOrgAndTeam[1])+",") {
-                               replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(ctx.Links.Prefix(), "org", ctx.Metas["org"], "teams", mentionOrgAndTeam[1]), mention, "mention"))
+                               replaceContent(node, loc.Start, loc.End, createLink(ctx, util.URLJoin(ctx.Links.Prefix(), "org", ctx.Metas["org"], "teams", mentionOrgAndTeam[1]), mention, "" /*mention*/))
                                node = node.NextSibling.NextSibling
                                start = 0
                                continue
@@ -44,7 +44,7 @@ func mentionProcessor(ctx *RenderContext, node *html.Node) {
                mentionedUsername := mention[1:]
 
                if DefaultProcessorHelper.IsUsernameMentionable != nil && DefaultProcessorHelper.IsUsernameMentionable(ctx.Ctx, mentionedUsername) {
-                       replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(ctx.Links.Prefix(), mentionedUsername), mention, "mention"))
+                       replaceContent(node, loc.Start, loc.End, createLink(ctx, util.URLJoin(ctx.Links.Prefix(), mentionedUsername), mention, "" /*mention*/))
                        node = node.NextSibling.NextSibling
                        start = 0
                } else {
diff --git a/modules/markup/internal/finalprocessor.go b/modules/markup/internal/finalprocessor.go
new file mode 100644 (file)
index 0000000..14d46a1
--- /dev/null
@@ -0,0 +1,30 @@
+// Copyright 2024 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package internal
+
+import (
+       "bytes"
+       "io"
+)
+
+type finalProcessor struct {
+       renderInternal *RenderInternal
+
+       output io.Writer
+       buf    bytes.Buffer
+}
+
+func (p *finalProcessor) Write(data []byte) (int, error) {
+       p.buf.Write(data)
+       return len(data), nil
+}
+
+func (p *finalProcessor) Close() error {
+       // TODO: reading the whole markdown isn't a problem at the moment,
+       // because "postProcess" already does so. In the future we could optimize the code to process data on the fly.
+       buf := p.buf.Bytes()
+       buf = bytes.ReplaceAll(buf, []byte(` data-attr-class="`+p.renderInternal.secureIDPrefix), []byte(` class="`))
+       _, err := p.output.Write(buf)
+       return err
+}
diff --git a/modules/markup/internal/internal_test.go b/modules/markup/internal/internal_test.go
new file mode 100644 (file)
index 0000000..98ff3bc
--- /dev/null
@@ -0,0 +1,61 @@
+// Copyright 2024 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package internal
+
+import (
+       "bytes"
+       "html/template"
+       "io"
+       "testing"
+
+       "github.com/stretchr/testify/assert"
+)
+
+func TestRenderInternal(t *testing.T) {
+       cases := []struct {
+               input, protected, recovered string
+       }{
+               {
+                       input:     `<div class="test">class="content"</div>`,
+                       protected: `<div data-attr-class="sec:test">class="content"</div>`,
+                       recovered: `<div class="test">class="content"</div>`,
+               },
+               {
+                       input:     "<div\nclass=\"test\" data-xxx></div>",
+                       protected: `<div data-attr-class="sec:test" data-xxx></div>`,
+                       recovered: `<div class="test" data-xxx></div>`,
+               },
+       }
+       for _, c := range cases {
+               var r RenderInternal
+               out := &bytes.Buffer{}
+               in := r.init("sec", out)
+               protected := r.ProtectSafeAttrs(template.HTML(c.input))
+               assert.EqualValues(t, c.protected, protected)
+               _, _ = io.WriteString(in, string(protected))
+               _ = in.Close()
+               assert.EqualValues(t, c.recovered, out.String())
+       }
+
+       var r1, r2 RenderInternal
+       protected := r1.ProtectSafeAttrs(`<div class="test"></div>`)
+       assert.EqualValues(t, `<div class="test"></div>`, protected, "non-initialized RenderInternal should not protect any attributes")
+       _ = r1.init("sec", nil)
+       protected = r1.ProtectSafeAttrs(`<div class="test"></div>`)
+       assert.EqualValues(t, `<div data-attr-class="sec:test"></div>`, protected)
+       assert.EqualValues(t, "data-attr-class", r1.SafeAttr("class"))
+       assert.EqualValues(t, "sec:val", r1.SafeValue("val"))
+       recovered, ok := r1.RecoverProtectedValue("sec:val")
+       assert.True(t, ok)
+       assert.EqualValues(t, "val", recovered)
+       recovered, ok = r1.RecoverProtectedValue("other:val")
+       assert.False(t, ok)
+       assert.Empty(t, recovered)
+
+       out2 := &bytes.Buffer{}
+       in2 := r2.init("sec-other", out2)
+       _, _ = io.WriteString(in2, string(protected))
+       _ = in2.Close()
+       assert.EqualValues(t, `<div data-attr-class="sec:test"></div>`, out2.String(), "different secureID should not recover the value")
+}
diff --git a/modules/markup/internal/renderinternal.go b/modules/markup/internal/renderinternal.go
new file mode 100644 (file)
index 0000000..4775fec
--- /dev/null
@@ -0,0 +1,82 @@
+// Copyright 2024 The Gitea Authors. All rights reserved.
+// SPDX-License-Identifier: MIT
+
+package internal
+
+import (
+       "crypto/rand"
+       "encoding/base64"
+       "html/template"
+       "io"
+       "regexp"
+       "strings"
+       "sync"
+
+       "code.gitea.io/gitea/modules/htmlutil"
+
+       "golang.org/x/net/html"
+)
+
+var reAttrClass = sync.OnceValue[*regexp.Regexp](func() *regexp.Regexp {
+       // TODO: it isn't a problem at the moment because our HTML contents are always well constructed
+       return regexp.MustCompile(`(<[^>]+)\s+class="([^"]+)"([^>]*>)`)
+})
+
+// RenderInternal also works without initialization
+// If no initialization (no secureID), it will not protect any attributes and return the original name&value
+type RenderInternal struct {
+       secureID       string
+       secureIDPrefix string
+}
+
+func (r *RenderInternal) Init(output io.Writer) io.WriteCloser {
+       buf := make([]byte, 12)
+       _, err := rand.Read(buf)
+       if err != nil {
+               panic("unable to generate secure id")
+       }
+       return r.init(base64.URLEncoding.EncodeToString(buf), output)
+}
+
+func (r *RenderInternal) init(secID string, output io.Writer) io.WriteCloser {
+       r.secureID = secID
+       r.secureIDPrefix = r.secureID + ":"
+       return &finalProcessor{renderInternal: r, output: output}
+}
+
+func (r *RenderInternal) RecoverProtectedValue(v string) (string, bool) {
+       if !strings.HasPrefix(v, r.secureIDPrefix) {
+               return "", false
+       }
+       return v[len(r.secureIDPrefix):], true
+}
+
+func (r *RenderInternal) SafeAttr(name string) string {
+       if r.secureID == "" {
+               return name
+       }
+       return "data-attr-" + name
+}
+
+func (r *RenderInternal) SafeValue(val string) string {
+       if r.secureID == "" {
+               return val
+       }
+       return r.secureID + ":" + val
+}
+
+func (r *RenderInternal) NodeSafeAttr(attr, val string) html.Attribute {
+       return html.Attribute{Key: r.SafeAttr(attr), Val: r.SafeValue(val)}
+}
+
+func (r *RenderInternal) ProtectSafeAttrs(content template.HTML) template.HTML {
+       if r.secureID == "" {
+               return content
+       }
+       return template.HTML(reAttrClass().ReplaceAllString(string(content), `$1 data-attr-class="`+r.secureIDPrefix+`$2"$3`))
+}
+
+func (r *RenderInternal) FormatWithSafeAttrs(w io.Writer, fmt string, a ...any) error {
+       _, err := w.Write([]byte(r.ProtectSafeAttrs(htmlutil.HTMLFormat(fmt, a...))))
+       return err
+}
index 624c35d94537149255e688f2cb30c4e53c34646d..ca165b1ba045fdc455fd132d69016c2ceb1d454c 100644 (file)
@@ -34,13 +34,6 @@ func NewDetails() *Details {
        }
 }
 
-// IsDetails returns true if the given node implements the Details interface,
-// otherwise false.
-func IsDetails(node ast.Node) bool {
-       _, ok := node.(*Details)
-       return ok
-}
-
 // Summary is a block that contains the summary of details block
 type Summary struct {
        ast.BaseBlock
@@ -66,13 +59,6 @@ func NewSummary() *Summary {
        }
 }
 
-// IsSummary returns true if the given node implements the Summary interface,
-// otherwise false.
-func IsSummary(node ast.Node) bool {
-       _, ok := node.(*Summary)
-       return ok
-}
-
 // TaskCheckBoxListItem is a block that represents a list item of a markdown block with a checkbox
 type TaskCheckBoxListItem struct {
        *ast.ListItem
@@ -103,14 +89,7 @@ func NewTaskCheckBoxListItem(listItem *ast.ListItem) *TaskCheckBoxListItem {
        }
 }
 
-// IsTaskCheckBoxListItem returns true if the given node implements the TaskCheckBoxListItem interface,
-// otherwise false.
-func IsTaskCheckBoxListItem(node ast.Node) bool {
-       _, ok := node.(*TaskCheckBoxListItem)
-       return ok
-}
-
-// Icon is an inline for a fomantic icon
+// Icon is an inline for a Fomantic UI icon
 type Icon struct {
        ast.BaseInline
        Name []byte
@@ -139,13 +118,6 @@ func NewIcon(name string) *Icon {
        }
 }
 
-// IsIcon returns true if the given node implements the Icon interface,
-// otherwise false.
-func IsIcon(node ast.Node) bool {
-       _, ok := node.(*Icon)
-       return ok
-}
-
 // ColorPreview is an inline for a color preview
 type ColorPreview struct {
        ast.BaseInline
index c837b21e7804ce37766000b12d48777c938ec84f..47dcfa8b5afc1bcb6ee9c9e8bc17dfd9301ce8b4 100644 (file)
@@ -7,9 +7,11 @@ import (
        "fmt"
        "regexp"
        "strings"
+       "sync"
 
        "code.gitea.io/gitea/modules/container"
        "code.gitea.io/gitea/modules/markup"
+       "code.gitea.io/gitea/modules/markup/internal"
        "code.gitea.io/gitea/modules/setting"
 
        "github.com/yuin/goldmark/ast"
@@ -23,11 +25,13 @@ import (
 
 // ASTTransformer is a default transformer of the goldmark tree.
 type ASTTransformer struct {
+       renderInternal *internal.RenderInternal
        attentionTypes container.Set[string]
 }
 
-func NewASTTransformer() *ASTTransformer {
+func NewASTTransformer(renderInternal *internal.RenderInternal) *ASTTransformer {
        return &ASTTransformer{
+               renderInternal: renderInternal,
                attentionTypes: container.SetOf("note", "tip", "important", "warning", "caution"),
        }
 }
@@ -109,12 +113,16 @@ func (g *ASTTransformer) Transform(node *ast.Document, reader text.Reader, pc pa
        }
 }
 
-// NewHTMLRenderer creates a HTMLRenderer to render
-// in the gitea form.
-func NewHTMLRenderer(opts ...html.Option) renderer.NodeRenderer {
+// it is copied from old code, which is quite doubtful whether it is correct
+var reValidIconName = sync.OnceValue[*regexp.Regexp](func() *regexp.Regexp {
+       return regexp.MustCompile(`^[-\w]+$`) // old: regexp.MustCompile("^[a-z ]+$")
+})
+
+// NewHTMLRenderer creates a HTMLRenderer to render in the gitea form.
+func NewHTMLRenderer(renderInternal *internal.RenderInternal, opts ...html.Option) renderer.NodeRenderer {
        r := &HTMLRenderer{
-               Config:      html.NewConfig(),
-               reValidName: regexp.MustCompile("^[a-z ]+$"),
+               renderInternal: renderInternal,
+               Config:         html.NewConfig(),
        }
        for _, opt := range opts {
                opt.SetHTMLOption(&r.Config)
@@ -126,7 +134,7 @@ func NewHTMLRenderer(opts ...html.Option) renderer.NodeRenderer {
 // renders gitea specific features.
 type HTMLRenderer struct {
        html.Config
-       reValidName *regexp.Regexp
+       renderInternal *internal.RenderInternal
 }
 
 // RegisterFuncs implements renderer.NodeRenderer.RegisterFuncs.
@@ -214,12 +222,13 @@ func (r *HTMLRenderer) renderIcon(w util.BufWriter, source []byte, node ast.Node
                return ast.WalkContinue, nil
        }
 
-       if !r.reValidName.MatchString(name) {
+       if !reValidIconName().MatchString(name) {
                // skip this
                return ast.WalkContinue, nil
        }
 
-       _, err := w.WriteString(fmt.Sprintf(`<i class="icon %s"></i>`, name))
+       // FIXME: the "icon xxx" is from Fomantic UI, it's really questionable whether it still works correctly
+       err := r.renderInternal.FormatWithSafeAttrs(w, `<i class="icon %s"></i>`, name)
        if err != nil {
                return ast.WalkStop, err
        }
index 6af0deb27bb2bd00865e13f3392db6ec882a281f..a3915ad4398d3258276a754586256de6a913f682 100644 (file)
@@ -9,7 +9,6 @@ import (
        "html/template"
        "io"
        "strings"
-       "sync"
 
        "code.gitea.io/gitea/modules/log"
        "code.gitea.io/gitea/modules/markup"
@@ -29,11 +28,6 @@ import (
        "github.com/yuin/goldmark/util"
 )
 
-var (
-       specMarkdown     goldmark.Markdown
-       specMarkdownOnce sync.Once
-)
-
 var (
        renderContextKey = parser.NewContextKey()
        renderConfigKey  = parser.NewContextKey()
@@ -68,85 +62,95 @@ func newParserContext(ctx *markup.RenderContext) parser.Context {
        return pc
 }
 
+type GlodmarkRender struct {
+       ctx *markup.RenderContext
+
+       goldmarkMarkdown goldmark.Markdown
+}
+
+func (r *GlodmarkRender) Convert(source []byte, writer io.Writer, opts ...parser.ParseOption) error {
+       return r.goldmarkMarkdown.Convert(source, writer, opts...)
+}
+
+func (r *GlodmarkRender) Renderer() renderer.Renderer {
+       return r.goldmarkMarkdown.Renderer()
+}
+
+func (r *GlodmarkRender) highlightingRenderer(w util.BufWriter, c highlighting.CodeBlockContext, entering bool) {
+       if entering {
+               language, _ := c.Language()
+               if language == nil {
+                       language = []byte("text")
+               }
+
+               languageStr := string(language)
+
+               preClasses := []string{"code-block"}
+               if languageStr == "mermaid" || languageStr == "math" {
+                       preClasses = append(preClasses, "is-loading")
+               }
+
+               err := r.ctx.RenderInternal.FormatWithSafeAttrs(w, `<pre class="%s">`, strings.Join(preClasses, " "))
+               if err != nil {
+                       return
+               }
+
+               // include language-x class as part of commonmark spec
+               // the "display" class is used by "js/markup/math.js" to render the code element as a block
+               err = r.ctx.RenderInternal.FormatWithSafeAttrs(w, `<code class="chroma language-%s display">`, string(language))
+               if err != nil {
+                       return
+               }
+       } else {
+               _, err := w.WriteString("</code></pre>")
+               if err != nil {
+                       return
+               }
+       }
+}
+
 // SpecializedMarkdown sets up the Gitea specific markdown extensions
-func SpecializedMarkdown() goldmark.Markdown {
-       specMarkdownOnce.Do(func() {
-               specMarkdown = goldmark.New(
-                       goldmark.WithExtensions(
-                               extension.NewTable(
-                                       extension.WithTableCellAlignMethod(extension.TableCellAlignAttribute)),
-                               extension.Strikethrough,
-                               extension.TaskList,
-                               extension.DefinitionList,
-                               common.FootnoteExtension,
-                               highlighting.NewHighlighting(
-                                       highlighting.WithFormatOptions(
-                                               chromahtml.WithClasses(true),
-                                               chromahtml.PreventSurroundingPre(true),
-                                       ),
-                                       highlighting.WithWrapperRenderer(func(w util.BufWriter, c highlighting.CodeBlockContext, entering bool) {
-                                               if entering {
-                                                       language, _ := c.Language()
-                                                       if language == nil {
-                                                               language = []byte("text")
-                                                       }
-
-                                                       languageStr := string(language)
-
-                                                       preClasses := []string{"code-block"}
-                                                       if languageStr == "mermaid" || languageStr == "math" {
-                                                               preClasses = append(preClasses, "is-loading")
-                                                       }
-
-                                                       _, err := w.WriteString(`<pre class="` + strings.Join(preClasses, " ") + `">`)
-                                                       if err != nil {
-                                                               return
-                                                       }
-
-                                                       // include language-x class as part of commonmark spec
-                                                       // the "display" class is used by "js/markup/math.js" to render the code element as a block
-                                                       _, err = w.WriteString(`<code class="chroma language-` + string(language) + ` display">`)
-                                                       if err != nil {
-                                                               return
-                                                       }
-                                               } else {
-                                                       _, err := w.WriteString("</code></pre>")
-                                                       if err != nil {
-                                                               return
-                                                       }
-                                               }
-                                       }),
-                               ),
-                               math.NewExtension(
-                                       math.Enabled(setting.Markdown.EnableMath),
-                               ),
-                               meta.Meta,
-                       ),
-                       goldmark.WithParserOptions(
-                               parser.WithAttribute(),
-                               parser.WithAutoHeadingID(),
-                               parser.WithASTTransformers(
-                                       util.Prioritized(NewASTTransformer(), 10000),
+func SpecializedMarkdown(ctx *markup.RenderContext) *GlodmarkRender {
+       // TODO: it could use a pool to cache the renderers to reuse them with different contexts
+       // at the moment it is fast enough (see the benchmarks)
+       r := &GlodmarkRender{ctx: ctx}
+       r.goldmarkMarkdown = goldmark.New(
+               goldmark.WithExtensions(
+                       extension.NewTable(extension.WithTableCellAlignMethod(extension.TableCellAlignAttribute)),
+                       extension.Strikethrough,
+                       extension.TaskList,
+                       extension.DefinitionList,
+                       common.FootnoteExtension,
+                       highlighting.NewHighlighting(
+                               highlighting.WithFormatOptions(
+                                       chromahtml.WithClasses(true),
+                                       chromahtml.PreventSurroundingPre(true),
                                ),
+                               highlighting.WithWrapperRenderer(r.highlightingRenderer),
                        ),
-                       goldmark.WithRendererOptions(
-                               html.WithUnsafe(),
-                       ),
-               )
-
-               // Override the original Tasklist renderer!
-               specMarkdown.Renderer().AddOptions(
-                       renderer.WithNodeRenderers(
-                               util.Prioritized(NewHTMLRenderer(), 10),
-                       ),
-               )
-       })
-       return specMarkdown
+                       math.NewExtension(&ctx.RenderInternal, math.Enabled(setting.Markdown.EnableMath)),
+                       meta.Meta,
+               ),
+               goldmark.WithParserOptions(
+                       parser.WithAttribute(),
+                       parser.WithAutoHeadingID(),
+                       parser.WithASTTransformers(util.Prioritized(NewASTTransformer(&ctx.RenderInternal), 10000)),
+               ),
+               goldmark.WithRendererOptions(html.WithUnsafe()),
+       )
+
+       // Override the original Tasklist renderer!
+       r.goldmarkMarkdown.Renderer().AddOptions(
+               renderer.WithNodeRenderers(util.Prioritized(NewHTMLRenderer(&ctx.RenderInternal), 10)),
+       )
+
+       return r
 }
 
-// actualRender renders Markdown to HTML without handling special links.
-func actualRender(ctx *markup.RenderContext, input io.Reader, output io.Writer) error {
-       converter := SpecializedMarkdown()
+// render calls goldmark render to convert Markdown to HTML
+// NOTE: The output of this method MUST get sanitized separately!!!
+func render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error {
+       converter := SpecializedMarkdown(ctx)
        lw := &limitWriter{
                w:     output,
                limit: setting.UI.MaxDisplayFileSize * 3,
@@ -160,8 +164,8 @@ func actualRender(ctx *markup.RenderContext, input io.Reader, output io.Writer)
                }
 
                log.Warn("Unable to render markdown due to panic in goldmark: %v", err)
-               if log.IsDebug() {
-                       log.Debug("Panic in markdown: %v\n%s", err, log.Stack(2))
+               if (!setting.IsProd && !setting.IsInTesting) || log.IsDebug() {
+                       log.Error("Panic in markdown: %v\n%s", err, log.Stack(2))
                }
        }()
 
@@ -200,26 +204,6 @@ func actualRender(ctx *markup.RenderContext, input io.Reader, output io.Writer)
        return nil
 }
 
-// Note: The output of this method must get sanitized.
-func render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error {
-       defer func() {
-               err := recover()
-               if err == nil {
-                       return
-               }
-
-               log.Warn("Unable to render markdown due to panic in goldmark - will return raw bytes")
-               if log.IsDebug() {
-                       log.Debug("Panic in markdown: %v\n%s", err, log.Stack(2))
-               }
-               _, err = io.Copy(output, input)
-               if err != nil {
-                       log.Error("io.Copy failed: %v", err)
-               }
-       }()
-       return actualRender(ctx, input, output)
-}
-
 // MarkupName describes markup's name
 var MarkupName = "markdown"
 
index 780df8727f0c879259a287f8a1e1830e652c3f0d..e4889a75e59fde1f4ddc4859ca49734fca5fdda3 100644 (file)
@@ -1051,3 +1051,17 @@ func TestAttention(t *testing.T) {
        // legacy GitHub style
        test(`> **warning**`, renderAttention("warning", "octicon-alert")+"\n</blockquote>")
 }
+
+func BenchmarkSpecializedMarkdown(b *testing.B) {
+       // 240856             4719 ns/op
+       for i := 0; i < b.N; i++ {
+               markdown.SpecializedMarkdown(&markup.RenderContext{})
+       }
+}
+
+func BenchmarkMarkdownRender(b *testing.B) {
+       // 23202             50840 ns/op
+       for i := 0; i < b.N; i++ {
+               _, _ = markdown.RenderString(&markup.RenderContext{Ctx: context.Background()}, "https://example.com\n- a\n- b\n")
+       }
+}
index 84817ef1e4a51b083dea45a75f1ab1685691b19a..0d2a966102e952c3f1465a79cee57595044248bb 100644 (file)
@@ -4,17 +4,21 @@
 package math
 
 import (
+       "code.gitea.io/gitea/modules/markup/internal"
+
        gast "github.com/yuin/goldmark/ast"
        "github.com/yuin/goldmark/renderer"
        "github.com/yuin/goldmark/util"
 )
 
 // BlockRenderer represents a renderer for math Blocks
-type BlockRenderer struct{}
+type BlockRenderer struct {
+       renderInternal *internal.RenderInternal
+}
 
 // NewBlockRenderer creates a new renderer for math Blocks
-func NewBlockRenderer() renderer.NodeRenderer {
-       return &BlockRenderer{}
+func NewBlockRenderer(renderInternal *internal.RenderInternal) renderer.NodeRenderer {
+       return &BlockRenderer{renderInternal: renderInternal}
 }
 
 // RegisterFuncs registers the renderer for math Blocks
@@ -33,7 +37,7 @@ func (r *BlockRenderer) writeLines(w util.BufWriter, source []byte, n gast.Node)
 func (r *BlockRenderer) renderBlock(w util.BufWriter, source []byte, node gast.Node, entering bool) (gast.WalkStatus, error) {
        n := node.(*Block)
        if entering {
-               _, _ = w.WriteString(`<pre class="code-block is-loading"><code class="chroma language-math display">`)
+               _ = r.renderInternal.FormatWithSafeAttrs(w, `<pre class="code-block is-loading"><code class="chroma language-math display">`)
                r.writeLines(w, source, n)
        } else {
                _, _ = w.WriteString(`</code></pre>` + "\n")
index 96848099cce230e16c03812c46c7da4cad1cda29..0cff4f1e74e11bcff1e76c53c6ec8693d5523f8e 100644 (file)
@@ -6,17 +6,21 @@ package math
 import (
        "bytes"
 
+       "code.gitea.io/gitea/modules/markup/internal"
+
        "github.com/yuin/goldmark/ast"
        "github.com/yuin/goldmark/renderer"
        "github.com/yuin/goldmark/util"
 )
 
 // InlineRenderer is an inline renderer
-type InlineRenderer struct{}
+type InlineRenderer struct {
+       renderInternal *internal.RenderInternal
+}
 
 // NewInlineRenderer returns a new renderer for inline math
-func NewInlineRenderer() renderer.NodeRenderer {
-       return &InlineRenderer{}
+func NewInlineRenderer(renderInternal *internal.RenderInternal) renderer.NodeRenderer {
+       return &InlineRenderer{renderInternal: renderInternal}
 }
 
 func (r *InlineRenderer) renderInline(w util.BufWriter, source []byte, n ast.Node, entering bool) (ast.WalkStatus, error) {
@@ -25,7 +29,7 @@ func (r *InlineRenderer) renderInline(w util.BufWriter, source []byte, n ast.Nod
                if _, ok := n.(*InlineBlock); ok {
                        extraClass = "display "
                }
-               _, _ = w.WriteString(`<code class="language-math ` + extraClass + `is-loading">`)
+               _ = r.renderInternal.FormatWithSafeAttrs(w, `<code class="language-math %sis-loading">`, extraClass)
                for c := n.FirstChild(); c != nil; c = c.NextSibling() {
                        segment := c.(*ast.Text).Segment
                        value := util.EscapeHTML(segment.Value(source))
index 3d9f376bc60e47a0c69362ac0af670c5557b225b..7e8defcd4a1e79f31ddcf044864d3cb13e04b4ab 100644 (file)
@@ -4,6 +4,8 @@
 package math
 
 import (
+       "code.gitea.io/gitea/modules/markup/internal"
+
        "github.com/yuin/goldmark"
        "github.com/yuin/goldmark/parser"
        "github.com/yuin/goldmark/renderer"
@@ -12,6 +14,7 @@ import (
 
 // Extension is a math extension
 type Extension struct {
+       renderInternal    *internal.RenderInternal
        enabled           bool
        parseDollarInline bool
        parseDollarBlock  bool
@@ -39,38 +42,10 @@ func Enabled(enable ...bool) Option {
        })
 }
 
-// WithInlineDollarParser enables or disables the parsing of $...$
-func WithInlineDollarParser(enable ...bool) Option {
-       value := true
-       if len(enable) > 0 {
-               value = enable[0]
-       }
-       return extensionFunc(func(e *Extension) {
-               e.parseDollarInline = value
-       })
-}
-
-// WithBlockDollarParser enables or disables the parsing of $$...$$
-func WithBlockDollarParser(enable ...bool) Option {
-       value := true
-       if len(enable) > 0 {
-               value = enable[0]
-       }
-       return extensionFunc(func(e *Extension) {
-               e.parseDollarBlock = value
-       })
-}
-
-// Math represents a math extension with default rendered delimiters
-var Math = &Extension{
-       enabled:           true,
-       parseDollarBlock:  true,
-       parseDollarInline: true,
-}
-
 // NewExtension creates a new math extension with the provided options
-func NewExtension(opts ...Option) *Extension {
+func NewExtension(renderInternal *internal.RenderInternal, opts ...Option) *Extension {
        r := &Extension{
+               renderInternal:    renderInternal,
                enabled:           true,
                parseDollarBlock:  true,
                parseDollarInline: true,
@@ -102,7 +77,7 @@ func (e *Extension) Extend(m goldmark.Markdown) {
        m.Parser().AddOptions(parser.WithInlineParsers(inlines...))
 
        m.Renderer().AddOptions(renderer.WithNodeRenderers(
-               util.Prioritized(NewBlockRenderer(), 501),
-               util.Prioritized(NewInlineRenderer(), 502),
+               util.Prioritized(NewBlockRenderer(e.renderInternal), 501),
+               util.Prioritized(NewInlineRenderer(e.renderInternal), 502),
        ))
 }
index 6949966328c4bd14b006cdf7bab3986e292b7611..278c33f1d2f07662312419b9109d629c967ad60b 100644 (file)
@@ -11,10 +11,8 @@ import (
        "github.com/stretchr/testify/assert"
 )
 
-/*
-IssueTemplate is a legacy to keep the unit tests working.
-Copied from structs.IssueTemplate, the original type has been changed a lot to support yaml template.
-*/
+// IssueTemplate is a legacy to keep the unit tests working.
+// Copied from structs.IssueTemplate, the original type has been changed a lot to support yaml template.
 type IssueTemplate struct {
        Name   string   `json:"name" yaml:"name"`
        Title  string   `json:"title" yaml:"title"`
index 92dc500e6962597b2ecfa7deedb125d87c01e4dc..2651d44a69ff99fce4722e79f2d3ed09f61cdacb 100644 (file)
@@ -32,7 +32,8 @@ func (r *HTMLRenderer) renderAttention(w util.BufWriter, source []byte, node ast
                default: // including "note"
                        octiconName = "info"
                }
-               _, _ = w.WriteString(string(svg.RenderHTML("octicon-"+octiconName, 16, "attention-icon attention-"+n.AttentionType)))
+               svgHTML := svg.RenderHTML("octicon-"+octiconName, 16, "attention-icon attention-"+n.AttentionType)
+               _, _ = w.WriteString(string(r.renderInternal.ProtectSafeAttrs(svgHTML)))
        }
        return ast.WalkContinue, nil
 }
@@ -128,13 +129,13 @@ func (g *ASTTransformer) transformBlockquote(v *ast.Blockquote, reader text.Read
        }
 
        // color the blockquote
-       v.SetAttributeString("class", []byte("attention-header attention-"+attentionType))
+       v.SetAttributeString(g.renderInternal.SafeAttr("class"), []byte(g.renderInternal.SafeValue("attention-header attention-"+attentionType)))
 
        // create an emphasis to make it bold
        attentionParagraph := ast.NewParagraph()
        g.applyElementDir(attentionParagraph)
        emphasis := ast.NewEmphasis(2)
-       emphasis.SetAttributeString("class", []byte("attention-"+attentionType))
+       emphasis.SetAttributeString(g.renderInternal.SafeAttr("class"), []byte(g.renderInternal.SafeValue("attention-"+attentionType)))
 
        attentionAstString := ast.NewString([]byte(cases.Title(language.English).String(attentionType)))
 
index ff7d24eec97c9e2ad0fd04688c63f9050ea5a533..bccc43aad251085c12191c698cafcab8800996d3 100644 (file)
@@ -5,7 +5,6 @@ package markdown
 
 import (
        "bytes"
-       "fmt"
        "strings"
 
        "code.gitea.io/gitea/modules/markup"
@@ -40,7 +39,7 @@ func (r *HTMLRenderer) renderCodeSpan(w util.BufWriter, source []byte, n ast.Nod
                                        r.Writer.RawWrite(w, value)
                                }
                        case *ColorPreview:
-                               _, _ = w.WriteString(fmt.Sprintf(`<span class="color-preview" style="background-color: %v"></span>`, string(v.Color)))
+                               _ = r.renderInternal.FormatWithSafeAttrs(w, `<span class="color-preview" style="background-color: %s"></span>`, string(v.Color))
                        }
                }
                return ast.WalkSkipChildren, nil
index b982fd4a8306afe948e0f4425992faf439a7a3b2..c89ad2f2cf34b1558aba09603913d2bcaecbc635 100644 (file)
@@ -72,7 +72,7 @@ func (g *ASTTransformer) transformList(_ *markup.RenderContext, v *ast.List, rc
                        }
                        newChild := NewTaskCheckBoxListItem(listItem)
                        newChild.IsChecked = taskCheckBox.IsChecked
-                       newChild.SetAttributeString("class", []byte("task-list-item"))
+                       newChild.SetAttributeString(g.renderInternal.SafeAttr("class"), []byte(g.renderInternal.SafeValue("task-list-item")))
                        segments := newChild.FirstChild().Lines()
                        if segments.Len() > 0 {
                                segment := segments.At(0)
index 1977dc73f55ef415be4b931801d400fa5bb92b14..f05cb6262645198a4a7fb321a3ab08bf964ba0d0 100644 (file)
@@ -9,14 +9,15 @@ import (
        "io"
        "net/url"
        "strings"
-       "sync"
 
        "code.gitea.io/gitea/modules/git"
        "code.gitea.io/gitea/modules/gitrepo"
+       "code.gitea.io/gitea/modules/markup/internal"
        "code.gitea.io/gitea/modules/setting"
        "code.gitea.io/gitea/modules/util"
 
        "github.com/yuin/goldmark/ast"
+       "golang.org/x/sync/errgroup"
 )
 
 type RenderMetaMode string
@@ -65,6 +66,8 @@ type RenderContext struct {
        SidebarTocNode   ast.Node
        RenderMetaAs     RenderMetaMode
        InStandalonePage bool // used by external render. the router "/org/repo/render/..." will output the rendered content in a standalone page
+
+       RenderInternal internal.RenderInternal
 }
 
 // Cancel runs any cleanup functions that have been registered for this Ctx
@@ -156,59 +159,53 @@ sandbox="allow-scripts"
        return err
 }
 
-func render(ctx *RenderContext, renderer Renderer, input io.Reader, output io.Writer) error {
-       var wg sync.WaitGroup
-       var err error
+func pipes() (io.ReadCloser, io.WriteCloser, func()) {
        pr, pw := io.Pipe()
-       defer func() {
+       return pr, pw, func() {
                _ = pr.Close()
                _ = pw.Close()
-       }()
-
-       var pr2 io.ReadCloser
-       var pw2 io.WriteCloser
-
-       var sanitizerDisabled bool
-       if r, ok := renderer.(ExternalRenderer); ok {
-               sanitizerDisabled = r.SanitizerDisabled()
        }
+}
 
-       if !sanitizerDisabled {
-               pr2, pw2 = io.Pipe()
-               defer func() {
-                       _ = pr2.Close()
-                       _ = pw2.Close()
-               }()
-
-               wg.Add(1)
-               go func() {
-                       err = SanitizeReader(pr2, renderer.Name(), output)
-                       _ = pr2.Close()
-                       wg.Done()
-               }()
-       } else {
-               pw2 = util.NopCloser{Writer: output}
+func render(ctx *RenderContext, renderer Renderer, input io.Reader, output io.Writer) error {
+       finalProcessor := ctx.RenderInternal.Init(output)
+       defer finalProcessor.Close()
+
+       // input -> (pw1=pr1) -> renderer -> (pw2=pr2) -> SanitizeReader -> finalProcessor -> output
+       // no sanitizer: input -> (pw1=pr1) -> renderer -> pw2(finalProcessor) -> output
+       pr1, pw1, close1 := pipes()
+       defer close1()
+
+       eg, _ := errgroup.WithContext(ctx.Ctx)
+       var pw2 io.WriteCloser = util.NopCloser{Writer: finalProcessor}
+
+       if r, ok := renderer.(ExternalRenderer); !ok || !r.SanitizerDisabled() {
+               var pr2 io.ReadCloser
+               var close2 func()
+               pr2, pw2, close2 = pipes()
+               defer close2()
+               eg.Go(func() error {
+                       defer pr2.Close()
+                       return SanitizeReader(pr2, renderer.Name(), finalProcessor)
+               })
        }
 
-       wg.Add(1)
-       go func() {
+       eg.Go(func() (err error) {
                if r, ok := renderer.(PostProcessRenderer); ok && r.NeedPostProcess() {
-                       err = PostProcess(ctx, pr, pw2)
+                       err = PostProcess(ctx, pr1, pw2)
                } else {
-                       _, err = io.Copy(pw2, pr)
+                       _, err = io.Copy(pw2, pr1)
                }
-               _ = pr.Close()
-               _ = pw2.Close()
-               wg.Done()
-       }()
+               _, _ = pr1.Close(), pw2.Close()
+               return err
+       })
 
-       if err1 := renderer.Render(ctx, input, pw); err1 != nil {
-               return err1
+       if err := renderer.Render(ctx, input, pw1); err != nil {
+               return err
        }
-       _ = pw.Close()
+       _ = pw1.Close()
 
-       wg.Wait()
-       return err
+       return eg.Wait()
 }
 
 // Init initializes the render global variables
index 7978973166f6f86feae108ef0424c4dfbd6d8c90..7f96556fd79546e04267aae939ecc8bc5c8ce76e 100644 (file)
@@ -4,6 +4,9 @@
 package markup
 
 import (
+       "regexp"
+       "strings"
+
        "code.gitea.io/gitea/modules/setting"
 
        "github.com/microcosm-cc/bluemonday"
@@ -15,8 +18,11 @@ func (st *Sanitizer) addSanitizerRules(policy *bluemonday.Policy, rules []settin
                        policy.AllowDataURIImages()
                }
                if rule.Element != "" {
-                       if rule.Regexp != nil {
-                               policy.AllowAttrs(rule.AllowAttr).Matching(rule.Regexp).OnElements(rule.Element)
+                       if rule.Regexp != "" {
+                               if !strings.HasPrefix(rule.Regexp, "^") || !strings.HasSuffix(rule.Regexp, "$") {
+                                       panic("Markup sanitizer rule regexp must start with ^ and end with $ to be strict")
+                               }
+                               policy.AllowAttrs(rule.AllowAttr).Matching(regexp.MustCompile(rule.Regexp)).OnElements(rule.Element)
                        } else {
                                policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element)
                        }
index 476ae5e26f0c1673d58e48a3c744de25ca5e7edd..0fa54efd45ef932218fcbb5dcf55eb287b4fa163 100644 (file)
@@ -16,36 +16,11 @@ import (
 func (st *Sanitizer) createDefaultPolicy() *bluemonday.Policy {
        policy := bluemonday.UGCPolicy()
 
-       // For JS code copy and Mermaid loading state
-       policy.AllowAttrs("class").Matching(regexp.MustCompile(`^code-block( is-loading)?$`)).OnElements("pre")
-
-       // For code preview
-       policy.AllowAttrs("class").Matching(regexp.MustCompile(`^code-preview-[-\w]+( file-content)?$`)).Globally()
-       policy.AllowAttrs("class").Matching(regexp.MustCompile(`^lines-num$`)).OnElements("td")
-       policy.AllowAttrs("data-line-number").OnElements("span")
-       policy.AllowAttrs("class").Matching(regexp.MustCompile(`^lines-code chroma$`)).OnElements("td")
-       policy.AllowAttrs("class").Matching(regexp.MustCompile(`^code-inner$`)).OnElements("div")
-
-       // For code preview (unicode escape)
-       policy.AllowAttrs("class").Matching(regexp.MustCompile(`^file-view( unicode-escaped)?$`)).OnElements("table")
-       policy.AllowAttrs("class").Matching(regexp.MustCompile(`^lines-escape$`)).OnElements("td")
-       policy.AllowAttrs("class").Matching(regexp.MustCompile(`^toggle-escape-button btn interact-bg$`)).OnElements("a") // don't use button, button might submit a form
-       policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(ambiguous-code-point|escaped-code-point|broken-code-point)$`)).OnElements("span")
-       policy.AllowAttrs("class").Matching(regexp.MustCompile(`^char$`)).OnElements("span")
-       policy.AllowAttrs("data-tooltip-content", "data-escaped").OnElements("span")
-
-       // For color preview
-       policy.AllowAttrs("class").Matching(regexp.MustCompile(`^color-preview$`)).OnElements("span")
-
-       // For attention
-       policy.AllowAttrs("class").Matching(regexp.MustCompile(`^attention-header attention-\w+$`)).OnElements("blockquote")
-       policy.AllowAttrs("class").Matching(regexp.MustCompile(`^attention-\w+$`)).OnElements("strong")
-       policy.AllowAttrs("class").Matching(regexp.MustCompile(`^attention-icon attention-\w+ svg octicon-[\w-]+$`)).OnElements("svg")
-       policy.AllowAttrs("viewBox", "width", "height", "aria-hidden").OnElements("svg")
-       policy.AllowAttrs("fill-rule", "d").OnElements("path")
+       // NOTICE: DO NOT add special "class" regexp rules here anymore, use RenderInternal.SafeAttr instead
 
-       // For Chroma markdown plugin
-       policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(chroma )?language-[\w-]+( display)?( is-loading)?$`)).OnElements("code")
+       // General safe SVG attributes
+       policy.AllowAttrs("viewBox", "width", "height", "aria-hidden", "data-attr-class").OnElements("svg")
+       policy.AllowAttrs("fill-rule", "d").OnElements("path")
 
        // Checkboxes
        policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input")
@@ -66,28 +41,15 @@ func (st *Sanitizer) createDefaultPolicy() *bluemonday.Policy {
                policy.AllowURLSchemeWithCustomPolicy("data", disallowScheme)
        }
 
-       // Allow classes for anchors
-       policy.AllowAttrs("class").Matching(regexp.MustCompile(`ref-issue( ref-external-issue)?`)).OnElements("a")
-
-       // Allow classes for task lists
-       policy.AllowAttrs("class").Matching(regexp.MustCompile(`task-list-item`)).OnElements("li")
-
        // Allow classes for org mode list item status.
        policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(unchecked|checked|indeterminate)$`)).OnElements("li")
 
-       // Allow icons
-       policy.AllowAttrs("class").Matching(regexp.MustCompile(`^icon(\s+[\p{L}\p{N}_-]+)+$`)).OnElements("i")
-
-       // Allow classes for emojis
-       policy.AllowAttrs("class").Matching(regexp.MustCompile(`emoji`)).OnElements("img")
-
-       // Allow icons, emojis, chroma syntax and keyword markup on span
-       policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji)|(language-math display)|(language-math inline))$|^([a-z][a-z0-9]{0,2})$|^` + keywordClass + `$`)).OnElements("span")
-
        // Allow 'color' and 'background-color' properties for the style attribute on text elements.
        policy.AllowStyles("color", "background-color").OnElements("span", "p")
 
-       // Allow generally safe attributes
+       policy.AllowAttrs("src", "autoplay", "controls").OnElements("video")
+
+       // Allow generally safe attributes (reference: https://github.com/jch/html-pipeline)
        generalSafeAttrs := []string{
                "abbr", "accept", "accept-charset",
                "accesskey", "action", "align", "alt",
@@ -106,10 +68,9 @@ func (st *Sanitizer) createDefaultPolicy() *bluemonday.Policy {
                "selected", "shape", "size", "span",
                "start", "summary", "tabindex", "target",
                "title", "type", "usemap", "valign", "value",
-               "vspace", "width", "itemprop",
-               "data-markdown-generated-content",
+               "vspace", "width", "itemprop", "itemscope", "itemtype",
+               "data-markdown-generated-content", "data-attr-class",
        }
-
        generalSafeElements := []string{
                "h1", "h2", "h3", "h4", "h5", "h6", "h7", "h8", "br", "b", "i", "strong", "em", "a", "pre", "code", "img", "tt",
                "div", "ins", "del", "sup", "sub", "p", "ol", "ul", "table", "thead", "tbody", "tfoot", "blockquote", "label",
@@ -117,14 +78,8 @@ func (st *Sanitizer) createDefaultPolicy() *bluemonday.Policy {
                "details", "caption", "figure", "figcaption",
                "abbr", "bdo", "cite", "dfn", "mark", "small", "span", "time", "video", "wbr",
        }
-
-       policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...)
-
-       policy.AllowAttrs("src", "autoplay", "controls").OnElements("video")
-
-       policy.AllowAttrs("itemscope", "itemtype").OnElements("div")
-
        // FIXME: Need to handle longdesc in img but there is no easy way to do it
+       policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...)
 
        // Custom keyword markup
        defaultSanitizer.addSanitizerRules(policy, setting.ExternalSanitizerRules)
index 20370509c134fc4801c272f8907132e92d1f4926..c5c43695ea08a796e4235d694b46a20b17d9621e 100644 (file)
@@ -19,7 +19,6 @@ func TestSanitizer(t *testing.T) {
                // Code highlighting class
                `<code class="random string"></code>`, `<code></code>`,
                `<code class="language-random ui tab active menu attached animating sidebar following bar center"></code>`, `<code></code>`,
-               `<code class="language-go"></code>`, `<code class="language-go"></code>`,
 
                // Input checkbox
                `<input type="hidden">`, ``,
@@ -38,10 +37,8 @@ func TestSanitizer(t *testing.T) {
                // <kbd> tags
                `<kbd>Ctrl + C</kbd>`, `<kbd>Ctrl + C</kbd>`,
                `<i class="dropdown icon">NAUGHTY</i>`, `<i>NAUGHTY</i>`,
-               `<i class="icon dropdown"></i>`, `<i class="icon dropdown"></i>`,
                `<input type="checkbox" disabled=""/>unchecked`, `<input type="checkbox" disabled=""/>unchecked`,
                `<span class="emoji dropdown">NAUGHTY</span>`, `<span>NAUGHTY</span>`,
-               `<span class="emoji">contents</span>`, `<span class="emoji">contents</span>`,
 
                // Color property
                `<span style="color: red">Hello World</span>`, `<span style="color: red">Hello World</span>`,
index 6c2246342be8eae0f1bb2470f03cac1f413b7c57..dfce8afa77f820d2e169acfdef1d24992b43d848 100644 (file)
@@ -54,7 +54,7 @@ type MarkupRenderer struct {
 type MarkupSanitizerRule struct {
        Element            string
        AllowAttr          string
-       Regexp             *regexp.Regexp
+       Regexp             string
        AllowDataURIImages bool
 }
 
@@ -117,15 +117,24 @@ func createMarkupSanitizerRule(name string, sec ConfigSection) (MarkupSanitizerR
 
                regexpStr := sec.Key("REGEXP").Value()
                if regexpStr != "" {
-                       // Validate when parsing the config that this is a valid regular
-                       // expression. Then we can use regexp.MustCompile(...) later.
-                       compiled, err := regexp.Compile(regexpStr)
+                       hasPrefix := strings.HasPrefix(regexpStr, "^")
+                       hasSuffix := strings.HasSuffix(regexpStr, "$")
+                       if !hasPrefix || !hasSuffix {
+                               log.Error("In markup.%s: REGEXP must start with ^ and end with $ to be strict", name)
+                               // to avoid breaking existing user configurations and satisfy the strict requirement in addSanitizerRules
+                               if !hasPrefix {
+                                       regexpStr = "^.*" + regexpStr
+                               }
+                               if !hasSuffix {
+                                       regexpStr += ".*$"
+                               }
+                       }
+                       _, err := regexp.Compile(regexpStr)
                        if err != nil {
                                log.Error("In markup.%s: REGEXP (%s) failed to compile: %v", name, regexpStr, err)
                                return rule, false
                        }
-
-                       rule.Regexp = compiled
+                       rule.Regexp = regexpStr
                }
 
                ok = true
index 8132978caca996ae0d58788d55e35e146d141249..fded9d0873744b8ffd8dac8a7492533af72292c9 100644 (file)
@@ -9,7 +9,7 @@ import (
        "path"
        "strings"
 
-       gitea_html "code.gitea.io/gitea/modules/html"
+       gitea_html "code.gitea.io/gitea/modules/htmlutil"
        "code.gitea.io/gitea/modules/log"
        "code.gitea.io/gitea/modules/public"
 )
index 3ef11772dc7d7c542de442b834199c2f02a1f028..d5b32358da752ff33c56928f511604453fb38a84 100644 (file)
@@ -10,12 +10,12 @@ import (
        "html/template"
        "net/url"
        "reflect"
-       "slices"
        "strings"
        "time"
 
        user_model "code.gitea.io/gitea/models/user"
        "code.gitea.io/gitea/modules/base"
+       "code.gitea.io/gitea/modules/htmlutil"
        "code.gitea.io/gitea/modules/markup"
        "code.gitea.io/gitea/modules/setting"
        "code.gitea.io/gitea/modules/svg"
@@ -39,7 +39,7 @@ func NewFuncMap() template.FuncMap {
                "Iif":          iif,
                "Eval":         evalTokens,
                "SafeHTML":     safeHTML,
-               "HTMLFormat":   HTMLFormat,
+               "HTMLFormat":   htmlutil.HTMLFormat,
                "HTMLEscape":   htmlEscape,
                "QueryEscape":  queryEscape,
                "JSEscape":     jsEscapeSafe,
@@ -184,23 +184,6 @@ func NewFuncMap() template.FuncMap {
        }
 }
 
-func HTMLFormat(s string, rawArgs ...any) template.HTML {
-       args := slices.Clone(rawArgs)
-       for i, v := range args {
-               switch v := v.(type) {
-               case nil, bool, int, int8, int16, int32, int64, uint, uint8, uint16, uint32, uint64, float32, float64, template.HTML:
-                       // for most basic types (including template.HTML which is safe), just do nothing and use it
-               case string:
-                       args[i] = template.HTMLEscapeString(v)
-               case fmt.Stringer:
-                       args[i] = template.HTMLEscapeString(v.String())
-               default:
-                       args[i] = template.HTMLEscapeString(fmt.Sprint(v))
-               }
-       }
-       return template.HTML(fmt.Sprintf(s, args...))
-}
-
 // safeHTML render raw as HTML
 func safeHTML(s any) template.HTML {
        switch v := s.(type) {
index b9fabb7016460c7c1bc9a5363318f8da4ab81689..3e17e86c66256484b2018bd2639f8ed7b5dc572f 100644 (file)
@@ -61,10 +61,6 @@ func TestJSEscapeSafe(t *testing.T) {
        assert.EqualValues(t, `\u0026\u003C\u003E\'\"`, jsEscapeSafe(`&<>'"`))
 }
 
-func TestHTMLFormat(t *testing.T) {
-       assert.Equal(t, template.HTML("<a>&lt; < 1</a>"), HTMLFormat("<a>%s %s %d</a>", "<", template.HTML("<"), 1))
-}
-
 func TestSanitizeHTML(t *testing.T) {
        assert.Equal(t, template.HTML(`<a href="/" rel="nofollow">link</a> xss <div>inline</div>`), SanitizeHTML(`<a href="/">link</a> <a href="javascript:">xss</a> <div style="dangerous">inline</div>`))
 }
index afc10915163bcfa989c70d883b169cadf05369ce..f7dd408ee2133fc65a11331f7dae0ee55cf4c188 100644 (file)
@@ -14,7 +14,7 @@ import (
        "code.gitea.io/gitea/models/organization"
        repo_model "code.gitea.io/gitea/models/repo"
        user_model "code.gitea.io/gitea/models/user"
-       gitea_html "code.gitea.io/gitea/modules/html"
+       gitea_html "code.gitea.io/gitea/modules/htmlutil"
        "code.gitea.io/gitea/modules/setting"
 )
 
index 8e443446bd69cc43a51eb62f8f42f756c854f6a0..5776eefced96197616548db3d826f618c066fc6b 100644 (file)
@@ -16,6 +16,7 @@ import (
 
        issues_model "code.gitea.io/gitea/models/issues"
        "code.gitea.io/gitea/modules/emoji"
+       "code.gitea.io/gitea/modules/htmlutil"
        "code.gitea.io/gitea/modules/log"
        "code.gitea.io/gitea/modules/markup"
        "code.gitea.io/gitea/modules/markup/markdown"
@@ -140,7 +141,7 @@ func (ut *RenderUtils) RenderLabel(label *issues_model.Label) template.HTML {
 
        if labelScope == "" {
                // Regular label
-               return HTMLFormat(`<div class="ui label %s" style="color: %s !important; background-color: %s !important;" data-tooltip-content title="%s">%s</div>`,
+               return htmlutil.HTMLFormat(`<div class="ui label %s" style="color: %s !important; background-color: %s !important;" data-tooltip-content title="%s">%s</div>`,
                        extraCSSClasses, textColor, label.Color, descriptionText, ut.RenderEmoji(label.Name))
        }
 
@@ -174,7 +175,7 @@ func (ut *RenderUtils) RenderLabel(label *issues_model.Label) template.HTML {
        itemColor := "#" + hex.EncodeToString(itemBytes)
        scopeColor := "#" + hex.EncodeToString(scopeBytes)
 
-       return HTMLFormat(`<span class="ui label %s scope-parent" data-tooltip-content title="%s">`+
+       return htmlutil.HTMLFormat(`<span class="ui label %s scope-parent" data-tooltip-content title="%s">`+
                `<div class="ui label scope-left" style="color: %s !important; background-color: %s !important">%s</div>`+
                `<div class="ui label scope-right" style="color: %s !important; background-color: %s !important">%s</div>`+
                `</span>`,
index 529507e7eab828ac4da176191305c0fd146db55d..cf6d839cbf68642520054fb4c1bb16fd36322a9d 100644 (file)
@@ -113,34 +113,34 @@ func TestRenderCommitBody(t *testing.T) {
        }
 
        expected := `/just/a/path.bin
-<a href="https://example.com/file.bin" class="link">https://example.com/file.bin</a>
+<a href="https://example.com/file.bin">https://example.com/file.bin</a>
 [local link](file.bin)
-[remote link](<a href="https://example.com" class="link">https://example.com</a>)
+[remote link](<a href="https://example.com">https://example.com</a>)
 [[local link|file.bin]]
-[[remote link|<a href="https://example.com" class="link">https://example.com</a>]]
+[[remote link|<a href="https://example.com">https://example.com</a>]]
 ![local image](image.jpg)
-![remote image](<a href="https://example.com/image.jpg" class="link">https://example.com/image.jpg</a>)
+![remote image](<a href="https://example.com/image.jpg">https://example.com/image.jpg</a>)
 [[local image|image.jpg]]
-[[remote link|<a href="https://example.com/image.jpg" class="link">https://example.com/image.jpg</a>]]
+[[remote link|<a href="https://example.com/image.jpg">https://example.com/image.jpg</a>]]
 <a href="https://example.com/user/repo/compare/88fc37a3c0a4dda553bdcfc80c178a58247f42fb...12fc37a3c0a4dda553bdcfc80c178a58247f42fb#hash" class="compare"><code class="nohighlight">88fc37a3c0...12fc37a3c0 (hash)</code></a>
 com 88fc37a3c0a4dda553bdcfc80c178a58247f42fb...12fc37a3c0a4dda553bdcfc80c178a58247f42fb pare
 <a href="https://example.com/user/repo/commit/88fc37a3c0a4dda553bdcfc80c178a58247f42fb" class="commit"><code class="nohighlight">88fc37a3c0</code></a>
 com 88fc37a3c0a4dda553bdcfc80c178a58247f42fb mit
 <span class="emoji" aria-label="thumbs up">👍</span>
-<a href="mailto:mail@domain.com" class="mailto">mail@domain.com</a>
-<a href="/mention-user" class="mention">@mention-user</a> test
+<a href="mailto:mail@domain.com">mail@domain.com</a>
+<a href="/mention-user">@mention-user</a> test
 <a href="/user13/repo11/issues/123" class="ref-issue">#123</a>
   space`
        assert.EqualValues(t, expected, string(newTestRenderUtils().RenderCommitBody(testInput(), testMetas)))
 }
 
 func TestRenderCommitMessage(t *testing.T) {
-       expected := `space <a href="/mention-user" data-markdown-generated-content="" class="mention">@mention-user</a>  `
+       expected := `space <a href="/mention-user" data-markdown-generated-content="">@mention-user</a>  `
        assert.EqualValues(t, expected, newTestRenderUtils().RenderCommitMessage(testInput(), testMetas))
 }
 
 func TestRenderCommitMessageLinkSubject(t *testing.T) {
-       expected := `<a href="https://example.com/link" class="muted">space </a><a href="/mention-user" data-markdown-generated-content="" class="mention">@mention-user</a>`
+       expected := `<a href="https://example.com/link" class="muted">space </a><a href="/mention-user" data-markdown-generated-content="">@mention-user</a>`
        assert.EqualValues(t, expected, newTestRenderUtils().RenderCommitMessageLinkSubject(testInput(), "https://example.com/link", testMetas))
 }
 
index 13f6b69493e096a1ca2d92a3933312bff692a106..2732a67e714fdc3c30e3fbf5af3a1308b522175f 100644 (file)
@@ -326,7 +326,7 @@ func renderViewPage(ctx *context.Context) (*git.Repository, *git.TreeEntry) {
 
        if rctx.SidebarTocNode != nil {
                sb := &strings.Builder{}
-               err = markdown.SpecializedMarkdown().Renderer().Render(sb, nil, rctx.SidebarTocNode)
+               err = markdown.SpecializedMarkdown(rctx).Renderer().Render(sb, nil, rctx.SidebarTocNode)
                if err != nil {
                        log.Error("Failed to render wiki sidebar TOC: %v", err)
                } else {