Refactor markup package (#32399)

To make the markup package easier to maintain: 1. Split some go files into small files 2. Use a shared util.NopCloser, remove duplicate code 3. Remove unused functions
author: wxiaoguang <wxiaoguang@gmail.com> 2024-11-04 18:59:50 +0800
committer: GitHub <noreply@github.com> 2024-11-04 10:59:50 +0000
commit: 61be51e56baf037aa7902e7cd066b895a10da244 (patch)
tree: 1cbf5fed4146977538975ea3d7ce4e302294cba0 /modules/markup/html_link.go
parent: af28ce59b8695a8412632c50cf96fdd420215719 (diff)
download: gitea-61be51e56baf037aa7902e7cd066b895a10da244.tar.gz
gitea-61be51e56baf037aa7902e7cd066b895a10da244.zip
1 files changed, 227 insertions, 0 deletions
diff --git a/modules/markup/html_link.go b/modules/markup/html_link.go
index b086135348..9350634568 100644
--- a/modules/markup/html_link.go
+++ b/modules/markup/html_link.go
@@ -4,7 +4,16 @@
 package markup
 
 import (
+	"net/url"
+	"path"
+	"path/filepath"
+	"strings"
+
+	"code.gitea.io/gitea/modules/markup/common"
 	"code.gitea.io/gitea/modules/util"
+
+	"golang.org/x/net/html"
+	"golang.org/x/net/html/atom"
 )
 
 func ResolveLink(ctx *RenderContext, link, userContentAnchorPrefix string) (result string, resolved bool) {
@@ -27,3 +36,221 @@ func ResolveLink(ctx *RenderContext, link, userContentAnchorPrefix string) (resu
 	}
 	return link, resolved
 }
+
+func shortLinkProcessor(ctx *RenderContext, node *html.Node) {
+	next := node.NextSibling
+	for node != nil && node != next {
+		m := shortLinkPattern.FindStringSubmatchIndex(node.Data)
+		if m == nil {
+			return
+		}
+
+		content := node.Data[m[2]:m[3]]
+		tail := node.Data[m[4]:m[5]]
+		props := make(map[string]string)
+
+		// MediaWiki uses [[link|text]], while GitHub uses [[text|link]]
+		// It makes page handling terrible, but we prefer GitHub syntax
+		// And fall back to MediaWiki only when it is obvious from the look
+		// Of text and link contents
+		sl := strings.Split(content, "|")
+		for _, v := range sl {
+			if equalPos := strings.IndexByte(v, '='); equalPos == -1 {
+				// There is no equal in this argument; this is a mandatory arg
+				if props["name"] == "" {
+					if IsFullURLString(v) {
+						// If we clearly see it is a link, we save it so
+
+						// But first we need to ensure, that if both mandatory args provided
+						// look like links, we stick to GitHub syntax
+						if props["link"] != "" {
+							props["name"] = props["link"]
+						}
+
+						props["link"] = strings.TrimSpace(v)
+					} else {
+						props["name"] = v
+					}
+				} else {
+					props["link"] = strings.TrimSpace(v)
+				}
+			} else {
+				// There is an equal; optional argument.
+
+				sep := strings.IndexByte(v, '=')
+				key, val := v[:sep], html.UnescapeString(v[sep+1:])
+
+				// When parsing HTML, x/net/html will change all quotes which are
+				// not used for syntax into UTF-8 quotes. So checking val[0] won't
+				// be enough, since that only checks a single byte.
+				if len(val) > 1 {
+					if (strings.HasPrefix(val, "“") && strings.HasSuffix(val, "”")) ||
+						(strings.HasPrefix(val, "‘") && strings.HasSuffix(val, "’")) {
+						const lenQuote = len("‘")
+						val = val[lenQuote : len(val)-lenQuote]
+					} else if (strings.HasPrefix(val, "\"") && strings.HasSuffix(val, "\"")) ||
+						(strings.HasPrefix(val, "'") && strings.HasSuffix(val, "'")) {
+						val = val[1 : len(val)-1]
+					} else if strings.HasPrefix(val, "'") && strings.HasSuffix(val, "’") {
+						const lenQuote = len("‘")
+						val = val[1 : len(val)-lenQuote]
+					}
+				}
+				props[key] = val
+			}
+		}
+
+		var name, link string
+		if props["link"] != "" {
+			link = props["link"]
+		} else if props["name"] != "" {
+			link = props["name"]
+		}
+		if props["title"] != "" {
+			name = props["title"]
+		} else if props["name"] != "" {
+			name = props["name"]
+		} else {
+			name = link
+		}
+
+		name += tail
+		image := false
+		ext := filepath.Ext(link)
+		switch ext {
+		// fast path: empty string, ignore
+		case "":
+			// leave image as false
+		case ".jpg", ".jpeg", ".png", ".tif", ".tiff", ".webp", ".gif", ".bmp", ".ico", ".svg":
+			image = true
+		}
+
+		childNode := &html.Node{}
+		linkNode := &html.Node{
+			FirstChild: childNode,
+			LastChild:  childNode,
+			Type:       html.ElementNode,
+			Data:       "a",
+			DataAtom:   atom.A,
+		}
+		childNode.Parent = linkNode
+		absoluteLink := IsFullURLString(link)
+		if !absoluteLink {
+			if image {
+				link = strings.ReplaceAll(link, " ", "+")
+			} else {
+				link = strings.ReplaceAll(link, " ", "-") // FIXME: it should support dashes in the link, eg: "the-dash-support.-"
+			}
+			if !strings.Contains(link, "/") {
+				link = url.PathEscape(link) // FIXME: it doesn't seem right and it might cause double-escaping
+			}
+		}
+		if image {
+			if !absoluteLink {
+				link = util.URLJoin(ctx.Links.ResolveMediaLink(ctx.IsWiki), link)
+			}
+			title := props["title"]
+			if title == "" {
+				title = props["alt"]
+			}
+			if title == "" {
+				title = path.Base(name)
+			}
+			alt := props["alt"]
+			if alt == "" {
+				alt = name
+			}
+
+			// make the childNode an image - if we can, we also place the alt
+			childNode.Type = html.ElementNode
+			childNode.Data = "img"
+			childNode.DataAtom = atom.Img
+			childNode.Attr = []html.Attribute{
+				{Key: "src", Val: link},
+				{Key: "title", Val: title},
+				{Key: "alt", Val: alt},
+			}
+			if alt == "" {
+				childNode.Attr = childNode.Attr[:2]
+			}
+		} else {
+			link, _ = ResolveLink(ctx, link, "")
+			childNode.Type = html.TextNode
+			childNode.Data = name
+		}
+		linkNode.Attr = []html.Attribute{{Key: "href", Val: link}}
+		replaceContent(node, m[0], m[1], linkNode)
+		node = node.NextSibling.NextSibling
+	}
+}
+
+// linkProcessor creates links for any HTTP or HTTPS URL not captured by
+// markdown.
+func linkProcessor(ctx *RenderContext, node *html.Node) {
+	next := node.NextSibling
+	for node != nil && node != next {
+		m := common.LinkRegex.FindStringIndex(node.Data)
+		if m == nil {
+			return
+		}
+
+		uri := node.Data[m[0]:m[1]]
+		replaceContent(node, m[0], m[1], createLink(uri, uri, "link"))
+		node = node.NextSibling.NextSibling
+	}
+}
+
+func genDefaultLinkProcessor(defaultLink string) processor {
+	return func(ctx *RenderContext, node *html.Node) {
+		ch := &html.Node{
+			Parent: node,
+			Type:   html.TextNode,
+			Data:   node.Data,
+		}
+
+		node.Type = html.ElementNode
+		node.Data = "a"
+		node.DataAtom = atom.A
+		node.Attr = []html.Attribute{
+			{Key: "href", Val: defaultLink},
+			{Key: "class", Val: "default-link muted"},
+		}
+		node.FirstChild, node.LastChild = ch, ch
+	}
+}
+
+// descriptionLinkProcessor creates links for DescriptionHTML
+func descriptionLinkProcessor(ctx *RenderContext, node *html.Node) {
+	next := node.NextSibling
+	for node != nil && node != next {
+		m := common.LinkRegex.FindStringIndex(node.Data)
+		if m == nil {
+			return
+		}
+
+		uri := node.Data[m[0]:m[1]]
+		replaceContent(node, m[0], m[1], createDescriptionLink(uri, uri))
+		node = node.NextSibling.NextSibling
+	}
+}
+
+func createDescriptionLink(href, content string) *html.Node {
+	textNode := &html.Node{
+		Type: html.TextNode,
+		Data: content,
+	}
+	linkNode := &html.Node{
+		FirstChild: textNode,
+		LastChild:  textNode,
+		Type:       html.ElementNode,
+		Data:       "a",
+		DataAtom:   atom.A,
+		Attr: []html.Attribute{
+			{Key: "href", Val: href},
+			{Key: "target", Val: "_blank"},
+			{Key: "rel", Val: "noopener noreferrer"},
+		},
+	}
+	textNode.Parent = linkNode
+	return linkNode
+}
author	wxiaoguang <wxiaoguang@gmail.com>	2024-11-04 18:59:50 +0800
committer	GitHub <noreply@github.com>	2024-11-04 10:59:50 +0000
commit	61be51e56baf037aa7902e7cd066b895a10da244 (patch)
tree	1cbf5fed4146977538975ea3d7ce4e302294cba0 /modules/markup/html_link.go
parent	af28ce59b8695a8412632c50cf96fdd420215719 (diff)
download	gitea-61be51e56baf037aa7902e7cd066b895a10da244.tar.gz gitea-61be51e56baf037aa7902e7cd066b895a10da244.zip