]> source.dussan.org Git - gitea.git/commitdiff
Run processors on whole of text (#16155)
authorzeripath <art27@cantab.net>
Thu, 17 Jun 2021 10:35:05 +0000 (11:35 +0100)
committerGitHub <noreply@github.com>
Thu, 17 Jun 2021 10:35:05 +0000 (11:35 +0100)
There is an inefficiency in the design of our processors which means that Emoji
and other processors run in order n^2 time.

This PR forces the processors to process the entirety of text node before passing
back up. The fundamental inefficiency remains but it should be significantly
ameliorated.

Signed-off-by: Andrew Thornton <art27@cantab.net>
modules/emoji/emoji.go
modules/markup/html.go
modules/markup/html_test.go

index 01fb764ce368ea1492c7c3cf24e9f1c65363f4fd..85df2d697307e050e7e28c42d76a12b96077cd83 100644 (file)
@@ -6,6 +6,7 @@
 package emoji
 
 import (
+       "io"
        "sort"
        "strings"
        "sync"
@@ -145,6 +146,8 @@ func (n *rememberSecondWriteWriter) Write(p []byte) (int, error) {
        if n.writecount == 2 {
                n.idx = n.pos
                n.end = n.pos + len(p)
+               n.pos += len(p)
+               return len(p), io.EOF
        }
        n.pos += len(p)
        return len(p), nil
@@ -155,6 +158,8 @@ func (n *rememberSecondWriteWriter) WriteString(s string) (int, error) {
        if n.writecount == 2 {
                n.idx = n.pos
                n.end = n.pos + len(s)
+               n.pos += len(s)
+               return len(s), io.EOF
        }
        n.pos += len(s)
        return len(s), nil
index e5e622068d1b892e1b21d9e5575127989ee039cc..2a83b8716e0fa1729f029a929df215db872ed3e6 100644 (file)
@@ -89,6 +89,7 @@ func isLinkStr(link string) bool {
        return validLinksPattern.MatchString(link)
 }
 
+// FIXME: This function is not concurrent safe
 func getIssueFullPattern() *regexp.Regexp {
        if issueFullPattern == nil {
                issueFullPattern = regexp.MustCompile(regexp.QuoteMeta(setting.AppURL) +
@@ -566,26 +567,38 @@ func replaceContentList(node *html.Node, i, j int, newNodes []*html.Node) {
 }
 
 func mentionProcessor(ctx *RenderContext, node *html.Node) {
-       // We replace only the first mention; other mentions will be addressed later
-       found, loc := references.FindFirstMentionBytes([]byte(node.Data))
-       if !found {
-               return
-       }
-       mention := node.Data[loc.Start:loc.End]
-       var teams string
-       teams, ok := ctx.Metas["teams"]
-       // FIXME: util.URLJoin may not be necessary here:
-       // - setting.AppURL is defined to have a terminal '/' so unless mention[1:]
-       // is an AppSubURL link we can probably fallback to concatenation.
-       // team mention should follow @orgName/teamName style
-       if ok && strings.Contains(mention, "/") {
-               mentionOrgAndTeam := strings.Split(mention, "/")
-               if mentionOrgAndTeam[0][1:] == ctx.Metas["org"] && strings.Contains(teams, ","+strings.ToLower(mentionOrgAndTeam[1])+",") {
-                       replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(setting.AppURL, "org", ctx.Metas["org"], "teams", mentionOrgAndTeam[1]), mention, "mention"))
+       start := 0
+       next := node.NextSibling
+       for node != nil && node != next && start < len(node.Data) {
+               // We replace only the first mention; other mentions will be addressed later
+               found, loc := references.FindFirstMentionBytes([]byte(node.Data[start:]))
+               if !found {
+                       return
                }
-               return
+               loc.Start += start
+               loc.End += start
+               mention := node.Data[loc.Start:loc.End]
+               var teams string
+               teams, ok := ctx.Metas["teams"]
+               // FIXME: util.URLJoin may not be necessary here:
+               // - setting.AppURL is defined to have a terminal '/' so unless mention[1:]
+               // is an AppSubURL link we can probably fallback to concatenation.
+               // team mention should follow @orgName/teamName style
+               if ok && strings.Contains(mention, "/") {
+                       mentionOrgAndTeam := strings.Split(mention, "/")
+                       if mentionOrgAndTeam[0][1:] == ctx.Metas["org"] && strings.Contains(teams, ","+strings.ToLower(mentionOrgAndTeam[1])+",") {
+                               replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(setting.AppURL, "org", ctx.Metas["org"], "teams", mentionOrgAndTeam[1]), mention, "mention"))
+                               node = node.NextSibling.NextSibling
+                               start = 0
+                               continue
+                       }
+                       start = loc.End
+                       continue
+               }
+               replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(setting.AppURL, mention[1:]), mention, "mention"))
+               node = node.NextSibling.NextSibling
+               start = 0
        }
-       replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(setting.AppURL, mention[1:]), mention, "mention"))
 }
 
 func shortLinkProcessor(ctx *RenderContext, node *html.Node) {
@@ -593,188 +606,196 @@ func shortLinkProcessor(ctx *RenderContext, node *html.Node) {
 }
 
 func shortLinkProcessorFull(ctx *RenderContext, node *html.Node, noLink bool) {
-       m := shortLinkPattern.FindStringSubmatchIndex(node.Data)
-       if m == nil {
-               return
-       }
+       next := node.NextSibling
+       for node != nil && node != next {
+               m := shortLinkPattern.FindStringSubmatchIndex(node.Data)
+               if m == nil {
+                       return
+               }
 
-       content := node.Data[m[2]:m[3]]
-       tail := node.Data[m[4]:m[5]]
-       props := make(map[string]string)
-
-       // MediaWiki uses [[link|text]], while GitHub uses [[text|link]]
-       // It makes page handling terrible, but we prefer GitHub syntax
-       // And fall back to MediaWiki only when it is obvious from the look
-       // Of text and link contents
-       sl := strings.Split(content, "|")
-       for _, v := range sl {
-               if equalPos := strings.IndexByte(v, '='); equalPos == -1 {
-                       // There is no equal in this argument; this is a mandatory arg
-                       if props["name"] == "" {
-                               if isLinkStr(v) {
-                                       // If we clearly see it is a link, we save it so
-
-                                       // But first we need to ensure, that if both mandatory args provided
-                                       // look like links, we stick to GitHub syntax
-                                       if props["link"] != "" {
-                                               props["name"] = props["link"]
-                                       }
+               content := node.Data[m[2]:m[3]]
+               tail := node.Data[m[4]:m[5]]
+               props := make(map[string]string)
+
+               // MediaWiki uses [[link|text]], while GitHub uses [[text|link]]
+               // It makes page handling terrible, but we prefer GitHub syntax
+               // And fall back to MediaWiki only when it is obvious from the look
+               // Of text and link contents
+               sl := strings.Split(content, "|")
+               for _, v := range sl {
+                       if equalPos := strings.IndexByte(v, '='); equalPos == -1 {
+                               // There is no equal in this argument; this is a mandatory arg
+                               if props["name"] == "" {
+                                       if isLinkStr(v) {
+                                               // If we clearly see it is a link, we save it so
+
+                                               // But first we need to ensure, that if both mandatory args provided
+                                               // look like links, we stick to GitHub syntax
+                                               if props["link"] != "" {
+                                                       props["name"] = props["link"]
+                                               }
 
-                                       props["link"] = strings.TrimSpace(v)
+                                               props["link"] = strings.TrimSpace(v)
+                                       } else {
+                                               props["name"] = v
+                                       }
                                } else {
-                                       props["name"] = v
+                                       props["link"] = strings.TrimSpace(v)
                                }
                        } else {
-                               props["link"] = strings.TrimSpace(v)
-                       }
-               } else {
-                       // There is an equal; optional argument.
-
-                       sep := strings.IndexByte(v, '=')
-                       key, val := v[:sep], html.UnescapeString(v[sep+1:])
-
-                       // When parsing HTML, x/net/html will change all quotes which are
-                       // not used for syntax into UTF-8 quotes. So checking val[0] won't
-                       // be enough, since that only checks a single byte.
-                       if len(val) > 1 {
-                               if (strings.HasPrefix(val, "“") && strings.HasSuffix(val, "”")) ||
-                                       (strings.HasPrefix(val, "‘") && strings.HasSuffix(val, "’")) {
-                                       const lenQuote = len("‘")
-                                       val = val[lenQuote : len(val)-lenQuote]
-                               } else if (strings.HasPrefix(val, "\"") && strings.HasSuffix(val, "\"")) ||
-                                       (strings.HasPrefix(val, "'") && strings.HasSuffix(val, "'")) {
-                                       val = val[1 : len(val)-1]
-                               } else if strings.HasPrefix(val, "'") && strings.HasSuffix(val, "’") {
-                                       const lenQuote = len("‘")
-                                       val = val[1 : len(val)-lenQuote]
+                               // There is an equal; optional argument.
+
+                               sep := strings.IndexByte(v, '=')
+                               key, val := v[:sep], html.UnescapeString(v[sep+1:])
+
+                               // When parsing HTML, x/net/html will change all quotes which are
+                               // not used for syntax into UTF-8 quotes. So checking val[0] won't
+                               // be enough, since that only checks a single byte.
+                               if len(val) > 1 {
+                                       if (strings.HasPrefix(val, "“") && strings.HasSuffix(val, "”")) ||
+                                               (strings.HasPrefix(val, "‘") && strings.HasSuffix(val, "’")) {
+                                               const lenQuote = len("‘")
+                                               val = val[lenQuote : len(val)-lenQuote]
+                                       } else if (strings.HasPrefix(val, "\"") && strings.HasSuffix(val, "\"")) ||
+                                               (strings.HasPrefix(val, "'") && strings.HasSuffix(val, "'")) {
+                                               val = val[1 : len(val)-1]
+                                       } else if strings.HasPrefix(val, "'") && strings.HasSuffix(val, "’") {
+                                               const lenQuote = len("‘")
+                                               val = val[1 : len(val)-lenQuote]
+                                       }
                                }
+                               props[key] = val
                        }
-                       props[key] = val
                }
-       }
-
-       var name, link string
-       if props["link"] != "" {
-               link = props["link"]
-       } else if props["name"] != "" {
-               link = props["name"]
-       }
-       if props["title"] != "" {
-               name = props["title"]
-       } else if props["name"] != "" {
-               name = props["name"]
-       } else {
-               name = link
-       }
 
-       name += tail
-       image := false
-       switch ext := filepath.Ext(link); ext {
-       // fast path: empty string, ignore
-       case "":
-               break
-       case ".jpg", ".jpeg", ".png", ".tif", ".tiff", ".webp", ".gif", ".bmp", ".ico", ".svg":
-               image = true
-       }
-
-       childNode := &html.Node{}
-       linkNode := &html.Node{
-               FirstChild: childNode,
-               LastChild:  childNode,
-               Type:       html.ElementNode,
-               Data:       "a",
-               DataAtom:   atom.A,
-       }
-       childNode.Parent = linkNode
-       absoluteLink := isLinkStr(link)
-       if !absoluteLink {
-               if image {
-                       link = strings.ReplaceAll(link, " ", "+")
+               var name, link string
+               if props["link"] != "" {
+                       link = props["link"]
+               } else if props["name"] != "" {
+                       link = props["name"]
+               }
+               if props["title"] != "" {
+                       name = props["title"]
+               } else if props["name"] != "" {
+                       name = props["name"]
                } else {
-                       link = strings.ReplaceAll(link, " ", "-")
+                       name = link
                }
-               if !strings.Contains(link, "/") {
-                       link = url.PathEscape(link)
+
+               name += tail
+               image := false
+               switch ext := filepath.Ext(link); ext {
+               // fast path: empty string, ignore
+               case "":
+                       // leave image as false
+               case ".jpg", ".jpeg", ".png", ".tif", ".tiff", ".webp", ".gif", ".bmp", ".ico", ".svg":
+                       image = true
                }
-       }
-       urlPrefix := ctx.URLPrefix
-       if image {
+
+               childNode := &html.Node{}
+               linkNode := &html.Node{
+                       FirstChild: childNode,
+                       LastChild:  childNode,
+                       Type:       html.ElementNode,
+                       Data:       "a",
+                       DataAtom:   atom.A,
+               }
+               childNode.Parent = linkNode
+               absoluteLink := isLinkStr(link)
                if !absoluteLink {
-                       if IsSameDomain(urlPrefix) {
-                               urlPrefix = strings.Replace(urlPrefix, "/src/", "/raw/", 1)
+                       if image {
+                               link = strings.ReplaceAll(link, " ", "+")
+                       } else {
+                               link = strings.ReplaceAll(link, " ", "-")
                        }
-                       if ctx.IsWiki {
-                               link = util.URLJoin("wiki", "raw", link)
+                       if !strings.Contains(link, "/") {
+                               link = url.PathEscape(link)
                        }
-                       link = util.URLJoin(urlPrefix, link)
-               }
-               title := props["title"]
-               if title == "" {
-                       title = props["alt"]
-               }
-               if title == "" {
-                       title = path.Base(name)
-               }
-               alt := props["alt"]
-               if alt == "" {
-                       alt = name
                }
+               urlPrefix := ctx.URLPrefix
+               if image {
+                       if !absoluteLink {
+                               if IsSameDomain(urlPrefix) {
+                                       urlPrefix = strings.Replace(urlPrefix, "/src/", "/raw/", 1)
+                               }
+                               if ctx.IsWiki {
+                                       link = util.URLJoin("wiki", "raw", link)
+                               }
+                               link = util.URLJoin(urlPrefix, link)
+                       }
+                       title := props["title"]
+                       if title == "" {
+                               title = props["alt"]
+                       }
+                       if title == "" {
+                               title = path.Base(name)
+                       }
+                       alt := props["alt"]
+                       if alt == "" {
+                               alt = name
+                       }
 
-               // make the childNode an image - if we can, we also place the alt
-               childNode.Type = html.ElementNode
-               childNode.Data = "img"
-               childNode.DataAtom = atom.Img
-               childNode.Attr = []html.Attribute{
-                       {Key: "src", Val: link},
-                       {Key: "title", Val: title},
-                       {Key: "alt", Val: alt},
-               }
-               if alt == "" {
-                       childNode.Attr = childNode.Attr[:2]
-               }
-       } else {
-               if !absoluteLink {
-                       if ctx.IsWiki {
-                               link = util.URLJoin("wiki", link)
+                       // make the childNode an image - if we can, we also place the alt
+                       childNode.Type = html.ElementNode
+                       childNode.Data = "img"
+                       childNode.DataAtom = atom.Img
+                       childNode.Attr = []html.Attribute{
+                               {Key: "src", Val: link},
+                               {Key: "title", Val: title},
+                               {Key: "alt", Val: alt},
+                       }
+                       if alt == "" {
+                               childNode.Attr = childNode.Attr[:2]
                        }
-                       link = util.URLJoin(urlPrefix, link)
+               } else {
+                       if !absoluteLink {
+                               if ctx.IsWiki {
+                                       link = util.URLJoin("wiki", link)
+                               }
+                               link = util.URLJoin(urlPrefix, link)
+                       }
+                       childNode.Type = html.TextNode
+                       childNode.Data = name
                }
-               childNode.Type = html.TextNode
-               childNode.Data = name
-       }
-       if noLink {
-               linkNode = childNode
-       } else {
-               linkNode.Attr = []html.Attribute{{Key: "href", Val: link}}
+               if noLink {
+                       linkNode = childNode
+               } else {
+                       linkNode.Attr = []html.Attribute{{Key: "href", Val: link}}
+               }
+               replaceContent(node, m[0], m[1], linkNode)
+               node = node.NextSibling.NextSibling
        }
-       replaceContent(node, m[0], m[1], linkNode)
 }
 
 func fullIssuePatternProcessor(ctx *RenderContext, node *html.Node) {
        if ctx.Metas == nil {
                return
        }
-       m := getIssueFullPattern().FindStringSubmatchIndex(node.Data)
-       if m == nil {
-               return
-       }
-       link := node.Data[m[0]:m[1]]
-       id := "#" + node.Data[m[2]:m[3]]
-
-       // extract repo and org name from matched link like
-       // http://localhost:3000/gituser/myrepo/issues/1
-       linkParts := strings.Split(path.Clean(link), "/")
-       matchOrg := linkParts[len(linkParts)-4]
-       matchRepo := linkParts[len(linkParts)-3]
-
-       if matchOrg == ctx.Metas["user"] && matchRepo == ctx.Metas["repo"] {
-               // TODO if m[4]:m[5] is not nil, then link is to a comment,
-               // and we should indicate that in the text somehow
-               replaceContent(node, m[0], m[1], createLink(link, id, "ref-issue"))
-
-       } else {
-               orgRepoID := matchOrg + "/" + matchRepo + id
-               replaceContent(node, m[0], m[1], createLink(link, orgRepoID, "ref-issue"))
+
+       next := node.NextSibling
+       for node != nil && node != next {
+               m := getIssueFullPattern().FindStringSubmatchIndex(node.Data)
+               if m == nil {
+                       return
+               }
+               link := node.Data[m[0]:m[1]]
+               id := "#" + node.Data[m[2]:m[3]]
+
+               // extract repo and org name from matched link like
+               // http://localhost:3000/gituser/myrepo/issues/1
+               linkParts := strings.Split(path.Clean(link), "/")
+               matchOrg := linkParts[len(linkParts)-4]
+               matchRepo := linkParts[len(linkParts)-3]
+
+               if matchOrg == ctx.Metas["user"] && matchRepo == ctx.Metas["repo"] {
+                       // TODO if m[4]:m[5] is not nil, then link is to a comment,
+                       // and we should indicate that in the text somehow
+                       replaceContent(node, m[0], m[1], createLink(link, id, "ref-issue"))
+               } else {
+                       orgRepoID := matchOrg + "/" + matchRepo + id
+                       replaceContent(node, m[0], m[1], createLink(link, orgRepoID, "ref-issue"))
+               }
+               node = node.NextSibling.NextSibling
        }
 }
 
@@ -782,70 +803,74 @@ func issueIndexPatternProcessor(ctx *RenderContext, node *html.Node) {
        if ctx.Metas == nil {
                return
        }
-
        var (
                found bool
                ref   *references.RenderizableReference
        )
 
-       _, exttrack := ctx.Metas["format"]
-       alphanum := ctx.Metas["style"] == IssueNameStyleAlphanumeric
-
-       // Repos with external issue trackers might still need to reference local PRs
-       // We need to concern with the first one that shows up in the text, whichever it is
-       found, ref = references.FindRenderizableReferenceNumeric(node.Data, exttrack && alphanum)
-       if exttrack && alphanum {
-               if found2, ref2 := references.FindRenderizableReferenceAlphanumeric(node.Data); found2 {
-                       if !found || ref2.RefLocation.Start < ref.RefLocation.Start {
-                               found = true
-                               ref = ref2
+       next := node.NextSibling
+       for node != nil && node != next {
+               _, exttrack := ctx.Metas["format"]
+               alphanum := ctx.Metas["style"] == IssueNameStyleAlphanumeric
+
+               // Repos with external issue trackers might still need to reference local PRs
+               // We need to concern with the first one that shows up in the text, whichever it is
+               found, ref = references.FindRenderizableReferenceNumeric(node.Data, exttrack && alphanum)
+               if exttrack && alphanum {
+                       if found2, ref2 := references.FindRenderizableReferenceAlphanumeric(node.Data); found2 {
+                               if !found || ref2.RefLocation.Start < ref.RefLocation.Start {
+                                       found = true
+                                       ref = ref2
+                               }
                        }
                }
-       }
-       if !found {
-               return
-       }
-
-       var link *html.Node
-       reftext := node.Data[ref.RefLocation.Start:ref.RefLocation.End]
-       if exttrack && !ref.IsPull {
-               ctx.Metas["index"] = ref.Issue
-               link = createLink(com.Expand(ctx.Metas["format"], ctx.Metas), reftext, "ref-issue")
-       } else {
-               // Path determines the type of link that will be rendered. It's unknown at this point whether
-               // the linked item is actually a PR or an issue. Luckily it's of no real consequence because
-               // Gitea will redirect on click as appropriate.
-               path := "issues"
-               if ref.IsPull {
-                       path = "pulls"
+               if !found {
+                       return
                }
-               if ref.Owner == "" {
-                       link = createLink(util.URLJoin(setting.AppURL, ctx.Metas["user"], ctx.Metas["repo"], path, ref.Issue), reftext, "ref-issue")
+
+               var link *html.Node
+               reftext := node.Data[ref.RefLocation.Start:ref.RefLocation.End]
+               if exttrack && !ref.IsPull {
+                       ctx.Metas["index"] = ref.Issue
+                       link = createLink(com.Expand(ctx.Metas["format"], ctx.Metas), reftext, "ref-issue")
                } else {
-                       link = createLink(util.URLJoin(setting.AppURL, ref.Owner, ref.Name, path, ref.Issue), reftext, "ref-issue")
+                       // Path determines the type of link that will be rendered. It's unknown at this point whether
+                       // the linked item is actually a PR or an issue. Luckily it's of no real consequence because
+                       // Gitea will redirect on click as appropriate.
+                       path := "issues"
+                       if ref.IsPull {
+                               path = "pulls"
+                       }
+                       if ref.Owner == "" {
+                               link = createLink(util.URLJoin(setting.AppURL, ctx.Metas["user"], ctx.Metas["repo"], path, ref.Issue), reftext, "ref-issue")
+                       } else {
+                               link = createLink(util.URLJoin(setting.AppURL, ref.Owner, ref.Name, path, ref.Issue), reftext, "ref-issue")
+                       }
                }
-       }
 
-       if ref.Action == references.XRefActionNone {
-               replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link)
-               return
-       }
+               if ref.Action == references.XRefActionNone {
+                       replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link)
+                       node = node.NextSibling.NextSibling
+                       continue
+               }
 
-       // Decorate action keywords if actionable
-       var keyword *html.Node
-       if references.IsXrefActionable(ref, exttrack, alphanum) {
-               keyword = createKeyword(node.Data[ref.ActionLocation.Start:ref.ActionLocation.End])
-       } else {
-               keyword = &html.Node{
+               // Decorate action keywords if actionable
+               var keyword *html.Node
+               if references.IsXrefActionable(ref, exttrack, alphanum) {
+                       keyword = createKeyword(node.Data[ref.ActionLocation.Start:ref.ActionLocation.End])
+               } else {
+                       keyword = &html.Node{
+                               Type: html.TextNode,
+                               Data: node.Data[ref.ActionLocation.Start:ref.ActionLocation.End],
+                       }
+               }
+               spaces := &html.Node{
                        Type: html.TextNode,
-                       Data: node.Data[ref.ActionLocation.Start:ref.ActionLocation.End],
+                       Data: node.Data[ref.ActionLocation.End:ref.RefLocation.Start],
                }
+               replaceContentList(node, ref.ActionLocation.Start, ref.RefLocation.End, []*html.Node{keyword, spaces, link})
+               node = node.NextSibling.NextSibling.NextSibling.NextSibling
        }
-       spaces := &html.Node{
-               Type: html.TextNode,
-               Data: node.Data[ref.ActionLocation.End:ref.RefLocation.Start],
-       }
-       replaceContentList(node, ref.ActionLocation.Start, ref.RefLocation.End, []*html.Node{keyword, spaces, link})
 }
 
 // fullSha1PatternProcessor renders SHA containing URLs
@@ -853,86 +878,112 @@ func fullSha1PatternProcessor(ctx *RenderContext, node *html.Node) {
        if ctx.Metas == nil {
                return
        }
-       m := anySHA1Pattern.FindStringSubmatchIndex(node.Data)
-       if m == nil {
-               return
-       }
 
-       urlFull := node.Data[m[0]:m[1]]
-       text := base.ShortSha(node.Data[m[2]:m[3]])
+       next := node.NextSibling
+       for node != nil && node != next {
+               m := anySHA1Pattern.FindStringSubmatchIndex(node.Data)
+               if m == nil {
+                       return
+               }
 
-       // 3rd capture group matches a optional path
-       subpath := ""
-       if m[5] > 0 {
-               subpath = node.Data[m[4]:m[5]]
-       }
+               urlFull := node.Data[m[0]:m[1]]
+               text := base.ShortSha(node.Data[m[2]:m[3]])
 
-       // 4th capture group matches a optional url hash
-       hash := ""
-       if m[7] > 0 {
-               hash = node.Data[m[6]:m[7]][1:]
-       }
+               // 3rd capture group matches a optional path
+               subpath := ""
+               if m[5] > 0 {
+                       subpath = node.Data[m[4]:m[5]]
+               }
 
-       start := m[0]
-       end := m[1]
+               // 4th capture group matches a optional url hash
+               hash := ""
+               if m[7] > 0 {
+                       hash = node.Data[m[6]:m[7]][1:]
+               }
 
-       // If url ends in '.', it's very likely that it is not part of the
-       // actual url but used to finish a sentence.
-       if strings.HasSuffix(urlFull, ".") {
-               end--
-               urlFull = urlFull[:len(urlFull)-1]
-               if hash != "" {
-                       hash = hash[:len(hash)-1]
-               } else if subpath != "" {
-                       subpath = subpath[:len(subpath)-1]
+               start := m[0]
+               end := m[1]
+
+               // If url ends in '.', it's very likely that it is not part of the
+               // actual url but used to finish a sentence.
+               if strings.HasSuffix(urlFull, ".") {
+                       end--
+                       urlFull = urlFull[:len(urlFull)-1]
+                       if hash != "" {
+                               hash = hash[:len(hash)-1]
+                       } else if subpath != "" {
+                               subpath = subpath[:len(subpath)-1]
+                       }
                }
-       }
 
-       if subpath != "" {
-               text += subpath
-       }
+               if subpath != "" {
+                       text += subpath
+               }
 
-       if hash != "" {
-               text += " (" + hash + ")"
-       }
+               if hash != "" {
+                       text += " (" + hash + ")"
+               }
 
-       replaceContent(node, start, end, createCodeLink(urlFull, text, "commit"))
+               replaceContent(node, start, end, createCodeLink(urlFull, text, "commit"))
+               node = node.NextSibling.NextSibling
+       }
 }
 
 // emojiShortCodeProcessor for rendering text like :smile: into emoji
 func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) {
-       m := EmojiShortCodeRegex.FindStringSubmatchIndex(node.Data)
-       if m == nil {
-               return
-       }
-
-       alias := node.Data[m[0]:m[1]]
-       alias = strings.ReplaceAll(alias, ":", "")
-       converted := emoji.FromAlias(alias)
-       if converted == nil {
-               // check if this is a custom reaction
-               s := strings.Join(setting.UI.Reactions, " ") + "gitea"
-               if strings.Contains(s, alias) {
-                       replaceContent(node, m[0], m[1], createCustomEmoji(alias, "emoji"))
+       start := 0
+       next := node.NextSibling
+       for node != nil && node != next && start < len(node.Data) {
+               m := EmojiShortCodeRegex.FindStringSubmatchIndex(node.Data[start:])
+               if m == nil {
                        return
                }
-               return
-       }
+               m[0] += start
+               m[1] += start
+
+               start = m[1]
+
+               alias := node.Data[m[0]:m[1]]
+               alias = strings.ReplaceAll(alias, ":", "")
+               converted := emoji.FromAlias(alias)
+               if converted == nil {
+                       // check if this is a custom reaction
+                       s := strings.Join(setting.UI.Reactions, " ") + "gitea"
+                       if strings.Contains(s, alias) {
+                               replaceContent(node, m[0], m[1], createCustomEmoji(alias, "emoji"))
+                               node = node.NextSibling.NextSibling
+                               start = 0
+                               continue
+                       }
+                       continue
+               }
 
-       replaceContent(node, m[0], m[1], createEmoji(converted.Emoji, "emoji", converted.Description))
+               replaceContent(node, m[0], m[1], createEmoji(converted.Emoji, "emoji", converted.Description))
+               node = node.NextSibling.NextSibling
+               start = 0
+       }
 }
 
 // emoji processor to match emoji and add emoji class
 func emojiProcessor(ctx *RenderContext, node *html.Node) {
-       m := emoji.FindEmojiSubmatchIndex(node.Data)
-       if m == nil {
-               return
-       }
-
-       codepoint := node.Data[m[0]:m[1]]
-       val := emoji.FromCode(codepoint)
-       if val != nil {
-               replaceContent(node, m[0], m[1], createEmoji(codepoint, "emoji", val.Description))
+       start := 0
+       next := node.NextSibling
+       for node != nil && node != next && start < len(node.Data) {
+               m := emoji.FindEmojiSubmatchIndex(node.Data[start:])
+               if m == nil {
+                       return
+               }
+               m[0] += start
+               m[1] += start
+
+               codepoint := node.Data[m[0]:m[1]]
+               start = m[1]
+               val := emoji.FromCode(codepoint)
+               if val != nil {
+                       replaceContent(node, m[0], m[1], createEmoji(codepoint, "emoji", val.Description))
+                       node = node.NextSibling.NextSibling
+                       start = 0
+               }
        }
 }
 
@@ -942,49 +993,70 @@ func sha1CurrentPatternProcessor(ctx *RenderContext, node *html.Node) {
        if ctx.Metas == nil || ctx.Metas["user"] == "" || ctx.Metas["repo"] == "" || ctx.Metas["repoPath"] == "" {
                return
        }
-       m := sha1CurrentPattern.FindStringSubmatchIndex(node.Data)
-       if m == nil {
-               return
-       }
-       hash := node.Data[m[2]:m[3]]
-       // The regex does not lie, it matches the hash pattern.
-       // However, a regex cannot know if a hash actually exists or not.
-       // We could assume that a SHA1 hash should probably contain alphas AND numerics
-       // but that is not always the case.
-       // Although unlikely, deadbeef and 1234567 are valid short forms of SHA1 hash
-       // as used by git and github for linking and thus we have to do similar.
-       // Because of this, we check to make sure that a matched hash is actually
-       // a commit in the repository before making it a link.
-       if _, err := git.NewCommand("rev-parse", "--verify", hash).RunInDirBytes(ctx.Metas["repoPath"]); err != nil {
-               if !strings.Contains(err.Error(), "fatal: Needed a single revision") {
-                       log.Debug("sha1CurrentPatternProcessor git rev-parse: %v", err)
+
+       start := 0
+       next := node.NextSibling
+       for node != nil && node != next && start < len(node.Data) {
+               m := sha1CurrentPattern.FindStringSubmatchIndex(node.Data[start:])
+               if m == nil {
+                       return
+               }
+               m[2] += start
+               m[3] += start
+
+               hash := node.Data[m[2]:m[3]]
+               // The regex does not lie, it matches the hash pattern.
+               // However, a regex cannot know if a hash actually exists or not.
+               // We could assume that a SHA1 hash should probably contain alphas AND numerics
+               // but that is not always the case.
+               // Although unlikely, deadbeef and 1234567 are valid short forms of SHA1 hash
+               // as used by git and github for linking and thus we have to do similar.
+               // Because of this, we check to make sure that a matched hash is actually
+               // a commit in the repository before making it a link.
+               if _, err := git.NewCommand("rev-parse", "--verify", hash).RunInDirBytes(ctx.Metas["repoPath"]); err != nil {
+                       if !strings.Contains(err.Error(), "fatal: Needed a single revision") {
+                               log.Debug("sha1CurrentPatternProcessor git rev-parse: %v", err)
+                       }
+                       start = m[3]
+                       continue
                }
-               return
-       }
 
-       replaceContent(node, m[2], m[3],
-               createCodeLink(util.URLJoin(setting.AppURL, ctx.Metas["user"], ctx.Metas["repo"], "commit", hash), base.ShortSha(hash), "commit"))
+               replaceContent(node, m[2], m[3],
+                       createCodeLink(util.URLJoin(setting.AppURL, ctx.Metas["user"], ctx.Metas["repo"], "commit", hash), base.ShortSha(hash), "commit"))
+               start = 0
+               node = node.NextSibling.NextSibling
+       }
 }
 
 // emailAddressProcessor replaces raw email addresses with a mailto: link.
 func emailAddressProcessor(ctx *RenderContext, node *html.Node) {
-       m := emailRegex.FindStringSubmatchIndex(node.Data)
-       if m == nil {
-               return
+       next := node.NextSibling
+       for node != nil && node != next {
+               m := emailRegex.FindStringSubmatchIndex(node.Data)
+               if m == nil {
+                       return
+               }
+
+               mail := node.Data[m[2]:m[3]]
+               replaceContent(node, m[2], m[3], createLink("mailto:"+mail, mail, "mailto"))
+               node = node.NextSibling.NextSibling
        }
-       mail := node.Data[m[2]:m[3]]
-       replaceContent(node, m[2], m[3], createLink("mailto:"+mail, mail, "mailto"))
 }
 
 // linkProcessor creates links for any HTTP or HTTPS URL not captured by
 // markdown.
 func linkProcessor(ctx *RenderContext, node *html.Node) {
-       m := common.LinkRegex.FindStringIndex(node.Data)
-       if m == nil {
-               return
+       next := node.NextSibling
+       for node != nil && node != next {
+               m := common.LinkRegex.FindStringIndex(node.Data)
+               if m == nil {
+                       return
+               }
+
+               uri := node.Data[m[0]:m[1]]
+               replaceContent(node, m[0], m[1], createLink(uri, uri, "link"))
+               node = node.NextSibling.NextSibling
        }
-       uri := node.Data[m[0]:m[1]]
-       replaceContent(node, m[0], m[1], createLink(uri, uri, "link"))
 }
 
 func genDefaultLinkProcessor(defaultLink string) processor {
@@ -1008,12 +1080,17 @@ func genDefaultLinkProcessor(defaultLink string) processor {
 
 // descriptionLinkProcessor creates links for DescriptionHTML
 func descriptionLinkProcessor(ctx *RenderContext, node *html.Node) {
-       m := common.LinkRegex.FindStringIndex(node.Data)
-       if m == nil {
-               return
+       next := node.NextSibling
+       for node != nil && node != next {
+               m := common.LinkRegex.FindStringIndex(node.Data)
+               if m == nil {
+                       return
+               }
+
+               uri := node.Data[m[0]:m[1]]
+               replaceContent(node, m[0], m[1], createDescriptionLink(uri, uri))
+               node = node.NextSibling.NextSibling
        }
-       uri := node.Data[m[0]:m[1]]
-       replaceContent(node, m[0], m[1], createDescriptionLink(uri, uri))
 }
 
 func createDescriptionLink(href, content string) *html.Node {
index 4c3c2399f521ab714b070456afe8fea2bbc1942d..4cdd5798c8a9f72eb1355cb4703ecd322c2aa145 100644 (file)
@@ -464,3 +464,19 @@ func TestIssue16020(t *testing.T) {
        assert.NoError(t, err)
        assert.Equal(t, data, res.String())
 }
+
+func BenchmarkEmojiPostprocess(b *testing.B) {
+       data := "🥰 "
+       for len(data) < 1<<16 {
+               data += data
+       }
+       b.ResetTimer()
+       for i := 0; i < b.N; i++ {
+               var res strings.Builder
+               err := PostProcess(&RenderContext{
+                       URLPrefix: "https://example.com",
+                       Metas:     localMetas,
+               }, strings.NewReader(data), &res)
+               assert.NoError(b, err)
+       }
+}