1 files changed, 37 insertions, 54 deletions
diff --git a/modules/markup/html.go b/modules/markup/html.go
index 3aaf669c63..51afd4be00 100644
--- a/modules/markup/html.go
+++ b/modules/markup/html.go
@@ -8,6 +8,7 @@ import (
 	"fmt"
 	"io"
 	"regexp"
+	"slices"
 	"strings"
 	"sync"
 
@@ -32,7 +33,6 @@ type globalVarsType struct {
 	comparePattern          *regexp.Regexp
 	fullURLPattern          *regexp.Regexp
 	emailRegex              *regexp.Regexp
-	blackfridayExtRegex     *regexp.Regexp
 	emojiShortCodeRegex     *regexp.Regexp
 	issueFullPattern        *regexp.Regexp
 	filesChangedFullPattern *regexp.Regexp
@@ -72,10 +72,8 @@ var globalVars = sync.OnceValue(func() *globalVarsType {
 	// it is still accepted by the CommonMark specification, as well as the HTML5 spec:
 	//   http://spec.commonmark.org/0.28/#email-address
 	//   https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail)
-	v.emailRegex = regexp.MustCompile("(?:\\s|^|\\(|\\[)([a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9]{2,}(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)(?:\\s|$|\\)|\\]|;|,|\\?|!|\\.(\\s|$))")
-
-	// blackfridayExtRegex is for blackfriday extensions create IDs like fn:user-content-footnote
-	v.blackfridayExtRegex = regexp.MustCompile(`[^:]*:user-content-`)
+	// At the moment, we use stricter rule for rendering purpose: only allow the "name" part starting after the word boundary
+	v.emailRegex = regexp.MustCompile(`\b([-\w.!#$%&'*+/=?^{|}~]*@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\.[a-zA-Z0-9]{2,}(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)\b`)
 
 	// emojiShortCodeRegex find emoji by alias like :smile:
 	v.emojiShortCodeRegex = regexp.MustCompile(`:[-+\w]+:`)
@@ -89,22 +87,18 @@ var globalVars = sync.OnceValue(func() *globalVarsType {
 	// codePreviewPattern matches "http://domain/.../{owner}/{repo}/src/commit/{commit}/{filepath}#L10-L20"
 	v.codePreviewPattern = regexp.MustCompile(`https?://\S+/([^\s/]+)/([^\s/]+)/src/commit/([0-9a-f]{7,64})(/\S+)#(L\d+(-L\d+)?)`)
 
-	v.tagCleaner = regexp.MustCompile(`<((?:/?\w+/\w+)|(?:/[\w ]+/)|(/?[hH][tT][mM][lL]\b)|(/?[hH][eE][aA][dD]\b))`)
+	// cleans: "<foo/bar", "<any words/", ("<html", "<head", "<script", "<style", "<?", "<%")
+	v.tagCleaner = regexp.MustCompile(`(?i)<(/?\w+/\w+|/[\w ]+/|/?(html|head|script|style|%|\?)\b)`)
 	v.nulCleaner = strings.NewReplacer("\000", "")
 	return v
 })
 
-// IsFullURLBytes reports whether link fits valid format.
-func IsFullURLBytes(link []byte) bool {
-	return globalVars().fullURLPattern.Match(link)
-}
-
 func IsFullURLString(link string) bool {
 	return globalVars().fullURLPattern.MatchString(link)
 }
 
 func IsNonEmptyRelativePath(link string) bool {
-	return link != "" && !IsFullURLString(link) && link[0] != '/' && link[0] != '?' && link[0] != '#'
+	return link != "" && !IsFullURLString(link) && link[0] != '?' && link[0] != '#'
 }
 
 // CustomLinkURLSchemes allows for additional schemes to be detected when parsing links within text
@@ -116,13 +110,7 @@ func CustomLinkURLSchemes(schemes []string) {
 		if !validScheme.MatchString(s) {
 			continue
 		}
-		without := false
-		for _, sna := range xurls.SchemesNoAuthority {
-			if s == sna {
-				without = true
-				break
-			}
-		}
+		without := slices.Contains(xurls.SchemesNoAuthority, s)
 		if without {
 			s += ":"
 		} else {
@@ -260,7 +248,7 @@ func postProcess(ctx *RenderContext, procs []processor, input io.Reader, output
 	node, err := html.Parse(io.MultiReader(
 		// prepend "<html><body>"
 		strings.NewReader("<html><body>"),
-		// Strip out nuls - they're always invalid
+		// strip out NULLs (they're always invalid), and escape known tags
 		bytes.NewReader(globalVars().tagCleaner.ReplaceAll([]byte(globalVars().nulCleaner.Replace(string(rawHTML))), []byte("&lt;$1"))),
 		// close the tags
 		strings.NewReader("</body></html>"),
@@ -316,44 +304,39 @@ func isEmojiNode(node *html.Node) bool {
 }
 
 func visitNode(ctx *RenderContext, procs []processor, node *html.Node) *html.Node {
-	// Add user-content- to IDs and "#" links if they don't already have them
-	for idx, attr := range node.Attr {
-		val := strings.TrimPrefix(attr.Val, "#")
-		notHasPrefix := !(strings.HasPrefix(val, "user-content-") || globalVars().blackfridayExtRegex.MatchString(val))
-
-		if attr.Key == "id" && notHasPrefix {
-			node.Attr[idx].Val = "user-content-" + attr.Val
-		}
-
-		if attr.Key == "href" && strings.HasPrefix(attr.Val, "#") && notHasPrefix {
-			node.Attr[idx].Val = "#user-content-" + val
-		}
-	}
-
-	switch node.Type {
-	case html.TextNode:
+	if node.Type == html.TextNode {
 		for _, proc := range procs {
 			proc(ctx, node) // it might add siblings
 		}
+		return node.NextSibling
+	}
+	if node.Type != html.ElementNode {
+		return node.NextSibling
+	}
 
-	case html.ElementNode:
-		if isEmojiNode(node) {
-			// TextNode emoji will be converted to `<span class="emoji">`, then the next iteration will visit the "span"
-			// if we don't stop it, it will go into the TextNode again and create an infinite recursion
-			return node.NextSibling
-		} else if node.Data == "code" || node.Data == "pre" {
-			return node.NextSibling // ignore code and pre nodes
-		} else if node.Data == "img" {
-			return visitNodeImg(ctx, node)
-		} else if node.Data == "video" {
-			return visitNodeVideo(ctx, node)
-		} else if node.Data == "a" {
-			procs = emojiProcessors // Restrict text in links to emojis
-		}
-		for n := node.FirstChild; n != nil; {
-			n = visitNode(ctx, procs, n)
-		}
-	default:
+	processNodeAttrID(node)
+	processFootnoteNode(ctx, node) // FIXME: the footnote processing should be done in the "footnote.go" renderer directly
+
+	if isEmojiNode(node) {
+		// TextNode emoji will be converted to `<span class="emoji">`, then the next iteration will visit the "span"
+		// if we don't stop it, it will go into the TextNode again and create an infinite recursion
+		return node.NextSibling
+	} else if node.Data == "code" || node.Data == "pre" {
+		return node.NextSibling // ignore code and pre nodes
+	} else if node.Data == "img" {
+		return visitNodeImg(ctx, node)
+	} else if node.Data == "video" {
+		return visitNodeVideo(ctx, node)
+	}
+
+	if node.Data == "a" {
+		processNodeA(ctx, node)
+		// only use emoji processors for the content in the "A" tag,
+		// because the content there is not processable, for example: the content is a commit id or a full URL.
+		procs = emojiProcessors
+	}
+	for n := node.FirstChild; n != nil; {
+		n = visitNode(ctx, procs, n)
 	}
 	return node.NextSibling
 }