aboutsummaryrefslogtreecommitdiffstats
path: root/modules/markup/html.go
diff options
context:
space:
mode:
Diffstat (limited to 'modules/markup/html.go')
-rw-r--r--modules/markup/html.go91
1 files changed, 37 insertions, 54 deletions
diff --git a/modules/markup/html.go b/modules/markup/html.go
index 3aaf669c63..51afd4be00 100644
--- a/modules/markup/html.go
+++ b/modules/markup/html.go
@@ -8,6 +8,7 @@ import (
"fmt"
"io"
"regexp"
+ "slices"
"strings"
"sync"
@@ -32,7 +33,6 @@ type globalVarsType struct {
comparePattern *regexp.Regexp
fullURLPattern *regexp.Regexp
emailRegex *regexp.Regexp
- blackfridayExtRegex *regexp.Regexp
emojiShortCodeRegex *regexp.Regexp
issueFullPattern *regexp.Regexp
filesChangedFullPattern *regexp.Regexp
@@ -72,10 +72,8 @@ var globalVars = sync.OnceValue(func() *globalVarsType {
// it is still accepted by the CommonMark specification, as well as the HTML5 spec:
// http://spec.commonmark.org/0.28/#email-address
// https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail)
- v.emailRegex = regexp.MustCompile("(?:\\s|^|\\(|\\[)([a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9]{2,}(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)(?:\\s|$|\\)|\\]|;|,|\\?|!|\\.(\\s|$))")
-
- // blackfridayExtRegex is for blackfriday extensions create IDs like fn:user-content-footnote
- v.blackfridayExtRegex = regexp.MustCompile(`[^:]*:user-content-`)
+ // At the moment, we use stricter rule for rendering purpose: only allow the "name" part starting after the word boundary
+ v.emailRegex = regexp.MustCompile(`\b([-\w.!#$%&'*+/=?^{|}~]*@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9]{2,}(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)\b`)
// emojiShortCodeRegex find emoji by alias like :smile:
v.emojiShortCodeRegex = regexp.MustCompile(`:[-+\w]+:`)
@@ -89,22 +87,18 @@ var globalVars = sync.OnceValue(func() *globalVarsType {
// codePreviewPattern matches "http://domain/.../{owner}/{repo}/src/commit/{commit}/{filepath}#L10-L20"
v.codePreviewPattern = regexp.MustCompile(`https?://\S+/([^\s/]+)/([^\s/]+)/src/commit/([0-9a-f]{7,64})(/\S+)#(L\d+(-L\d+)?)`)
- v.tagCleaner = regexp.MustCompile(`<((?:/?\w+/\w+)|(?:/[\w ]+/)|(/?[hH][tT][mM][lL]\b)|(/?[hH][eE][aA][dD]\b))`)
+ // cleans: "<foo/bar", "<any words/", ("<html", "<head", "<script", "<style", "<?", "<%")
+ v.tagCleaner = regexp.MustCompile(`(?i)<(/?\w+/\w+|/[\w ]+/|/?(html|head|script|style|%|\?)\b)`)
v.nulCleaner = strings.NewReplacer("\000", "")
return v
})
-// IsFullURLBytes reports whether link fits valid format.
-func IsFullURLBytes(link []byte) bool {
- return globalVars().fullURLPattern.Match(link)
-}
-
func IsFullURLString(link string) bool {
return globalVars().fullURLPattern.MatchString(link)
}
func IsNonEmptyRelativePath(link string) bool {
- return link != "" && !IsFullURLString(link) && link[0] != '/' && link[0] != '?' && link[0] != '#'
+ return link != "" && !IsFullURLString(link) && link[0] != '?' && link[0] != '#'
}
// CustomLinkURLSchemes allows for additional schemes to be detected when parsing links within text
@@ -116,13 +110,7 @@ func CustomLinkURLSchemes(schemes []string) {
if !validScheme.MatchString(s) {
continue
}
- without := false
- for _, sna := range xurls.SchemesNoAuthority {
- if s == sna {
- without = true
- break
- }
- }
+ without := slices.Contains(xurls.SchemesNoAuthority, s)
if without {
s += ":"
} else {
@@ -260,7 +248,7 @@ func postProcess(ctx *RenderContext, procs []processor, input io.Reader, output
node, err := html.Parse(io.MultiReader(
// prepend "<html><body>"
strings.NewReader("<html><body>"),
- // Strip out nuls - they're always invalid
+ // strip out NULLs (they're always invalid), and escape known tags
bytes.NewReader(globalVars().tagCleaner.ReplaceAll([]byte(globalVars().nulCleaner.Replace(string(rawHTML))), []byte("&lt;$1"))),
// close the tags
strings.NewReader("</body></html>"),
@@ -316,44 +304,39 @@ func isEmojiNode(node *html.Node) bool {
}
func visitNode(ctx *RenderContext, procs []processor, node *html.Node) *html.Node {
- // Add user-content- to IDs and "#" links if they don't already have them
- for idx, attr := range node.Attr {
- val := strings.TrimPrefix(attr.Val, "#")
- notHasPrefix := !(strings.HasPrefix(val, "user-content-") || globalVars().blackfridayExtRegex.MatchString(val))
-
- if attr.Key == "id" && notHasPrefix {
- node.Attr[idx].Val = "user-content-" + attr.Val
- }
-
- if attr.Key == "href" && strings.HasPrefix(attr.Val, "#") && notHasPrefix {
- node.Attr[idx].Val = "#user-content-" + val
- }
- }
-
- switch node.Type {
- case html.TextNode:
+ if node.Type == html.TextNode {
for _, proc := range procs {
proc(ctx, node) // it might add siblings
}
+ return node.NextSibling
+ }
+ if node.Type != html.ElementNode {
+ return node.NextSibling
+ }
- case html.ElementNode:
- if isEmojiNode(node) {
- // TextNode emoji will be converted to `<span class="emoji">`, then the next iteration will visit the "span"
- // if we don't stop it, it will go into the TextNode again and create an infinite recursion
- return node.NextSibling
- } else if node.Data == "code" || node.Data == "pre" {
- return node.NextSibling // ignore code and pre nodes
- } else if node.Data == "img" {
- return visitNodeImg(ctx, node)
- } else if node.Data == "video" {
- return visitNodeVideo(ctx, node)
- } else if node.Data == "a" {
- procs = emojiProcessors // Restrict text in links to emojis
- }
- for n := node.FirstChild; n != nil; {
- n = visitNode(ctx, procs, n)
- }
- default:
+ processNodeAttrID(node)
+ processFootnoteNode(ctx, node) // FIXME: the footnote processing should be done in the "footnote.go" renderer directly
+
+ if isEmojiNode(node) {
+ // TextNode emoji will be converted to `<span class="emoji">`, then the next iteration will visit the "span"
+ // if we don't stop it, it will go into the TextNode again and create an infinite recursion
+ return node.NextSibling
+ } else if node.Data == "code" || node.Data == "pre" {
+ return node.NextSibling // ignore code and pre nodes
+ } else if node.Data == "img" {
+ return visitNodeImg(ctx, node)
+ } else if node.Data == "video" {
+ return visitNodeVideo(ctx, node)
+ }
+
+ if node.Data == "a" {
+ processNodeA(ctx, node)
+ // only use emoji processors for the content in the "A" tag,
+ // because the content there is not processable, for example: the content is a commit id or a full URL.
+ procs = emojiProcessors
+ }
+ for n := node.FirstChild; n != nil; {
+ n = visitNode(ctx, procs, n)
}
return node.NextSibling
}