diff options
Diffstat (limited to 'modules/markup/html.go')
-rw-r--r-- | modules/markup/html.go | 19 |
1 files changed, 8 insertions, 11 deletions
diff --git a/modules/markup/html.go b/modules/markup/html.go index 0e074cbcfa..51afd4be00 100644 --- a/modules/markup/html.go +++ b/modules/markup/html.go @@ -8,6 +8,7 @@ import ( "fmt" "io" "regexp" + "slices" "strings" "sync" @@ -71,7 +72,8 @@ var globalVars = sync.OnceValue(func() *globalVarsType { // it is still accepted by the CommonMark specification, as well as the HTML5 spec: // http://spec.commonmark.org/0.28/#email-address // https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail) - v.emailRegex = regexp.MustCompile("(?:\\s|^|\\(|\\[)([a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9]{2,}(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)(?:\\s|$|\\)|\\]|;|,|\\?|!|\\.(\\s|$))") + // At the moment, we use stricter rule for rendering purpose: only allow the "name" part starting after the word boundary + v.emailRegex = regexp.MustCompile(`\b([-\w.!#$%&'*+/=?^{|}~]*@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9]{2,}(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)\b`) // emojiShortCodeRegex find emoji by alias like :smile: v.emojiShortCodeRegex = regexp.MustCompile(`:[-+\w]+:`) @@ -85,8 +87,8 @@ var globalVars = sync.OnceValue(func() *globalVarsType { // codePreviewPattern matches "http://domain/.../{owner}/{repo}/src/commit/{commit}/{filepath}#L10-L20" v.codePreviewPattern = regexp.MustCompile(`https?://\S+/([^\s/]+)/([^\s/]+)/src/commit/([0-9a-f]{7,64})(/\S+)#(L\d+(-L\d+)?)`) - // cleans: "<foo/bar", "<any words/", ("<html", "<head", "<script", "<style") - v.tagCleaner = regexp.MustCompile(`(?i)<(/?\w+/\w+|/[\w ]+/|/?(html|head|script|style\b))`) + // cleans: "<foo/bar", "<any words/", ("<html", "<head", "<script", "<style", "<?", "<%") + v.tagCleaner = regexp.MustCompile(`(?i)<(/?\w+/\w+|/[\w ]+/|/?(html|head|script|style|%|\?)\b)`) v.nulCleaner = strings.NewReplacer("\000", "") return v }) @@ -108,13 +110,7 @@ func CustomLinkURLSchemes(schemes []string) { if !validScheme.MatchString(s) { continue } - without := false - for _, sna := range xurls.SchemesNoAuthority { - if s == sna { - without = true - break - } - } + without := slices.Contains(xurls.SchemesNoAuthority, s) if without { s += ":" } else { @@ -252,7 +248,7 @@ func postProcess(ctx *RenderContext, procs []processor, input io.Reader, output node, err := html.Parse(io.MultiReader( // prepend "<html><body>" strings.NewReader("<html><body>"), - // Strip out nuls - they're always invalid + // strip out NULLs (they're always invalid), and escape known tags bytes.NewReader(globalVars().tagCleaner.ReplaceAll([]byte(globalVars().nulCleaner.Replace(string(rawHTML))), []byte("<$1"))), // close the tags strings.NewReader("</body></html>"), @@ -319,6 +315,7 @@ func visitNode(ctx *RenderContext, procs []processor, node *html.Node) *html.Nod } processNodeAttrID(node) + processFootnoteNode(ctx, node) // FIXME: the footnote processing should be done in the "footnote.go" renderer directly if isEmojiNode(node) { // TextNode emoji will be converted to `<span class="emoji">`, then the next iteration will visit the "span" |