]> source.dussan.org Git - gitea.git/commitdiff
Fix markdown URL parsing for commit ID (#30812) (#30855)
authorGiteabot <teabot@gitea.io>
Sat, 4 May 2024 04:04:05 +0000 (12:04 +0800)
committerGitHub <noreply@github.com>
Sat, 4 May 2024 04:04:05 +0000 (12:04 +0800)
Backport #30812 by wxiaoguang

Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
modules/markup/html.go
modules/markup/html_codepreview.go
modules/markup/html_internal_test.go
modules/markup/html_test.go

index 5ae0cc8755c0420dab16437dcc9170d6f23db6ed..2958dc964671f37c168e41bb5657250f7d7e27b5 100644 (file)
@@ -10,6 +10,7 @@ import (
        "path"
        "path/filepath"
        "regexp"
+       "slices"
        "strings"
        "sync"
 
@@ -54,7 +55,7 @@ var (
        shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`)
 
        // anyHashPattern splits url containing SHA into parts
-       anyHashPattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40,64})(/[-+~_%.a-zA-Z0-9/]+)?(#[-+~_%.a-zA-Z0-9]+)?`)
+       anyHashPattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40,64})(/[-+~%./\w]+)?(\?[-+~%.\w&=]+)?(#[-+~%.\w]+)?`)
 
        // comparePattern matches "http://domain/org/repo/compare/COMMIT1...COMMIT2#hash"
        comparePattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{7,64})(\.\.\.?)([0-9a-f]{7,64})?(#[-+~_%.a-zA-Z0-9]+)?`)
@@ -591,7 +592,8 @@ func replaceContentList(node *html.Node, i, j int, newNodes []*html.Node) {
 
 func mentionProcessor(ctx *RenderContext, node *html.Node) {
        start := 0
-       for node != nil {
+       nodeStop := node.NextSibling
+       for node != nodeStop {
                found, loc := references.FindFirstMentionBytes(util.UnsafeStringToBytes(node.Data[start:]))
                if !found {
                        node = node.NextSibling
@@ -962,57 +964,68 @@ func commitCrossReferencePatternProcessor(ctx *RenderContext, node *html.Node) {
        }
 }
 
-// fullHashPatternProcessor renders SHA containing URLs
-func fullHashPatternProcessor(ctx *RenderContext, node *html.Node) {
-       if ctx.Metas == nil {
-               return
+type anyHashPatternResult struct {
+       PosStart  int
+       PosEnd    int
+       FullURL   string
+       CommitID  string
+       SubPath   string
+       QueryHash string
+}
+
+func anyHashPatternExtract(s string) (ret anyHashPatternResult, ok bool) {
+       m := anyHashPattern.FindStringSubmatchIndex(s)
+       if m == nil {
+               return ret, false
        }
 
-       next := node.NextSibling
-       for node != nil && node != next {
-               m := anyHashPattern.FindStringSubmatchIndex(node.Data)
-               if m == nil {
-                       return
+       ret.PosStart, ret.PosEnd = m[0], m[1]
+       ret.FullURL = s[ret.PosStart:ret.PosEnd]
+       if strings.HasSuffix(ret.FullURL, ".") {
+               // if url ends in '.', it's very likely that it is not part of the actual url but used to finish a sentence.
+               ret.PosEnd--
+               ret.FullURL = ret.FullURL[:len(ret.FullURL)-1]
+               for i := 0; i < len(m); i++ {
+                       m[i] = min(m[i], ret.PosEnd)
                }
+       }
 
-               urlFull := node.Data[m[0]:m[1]]
-               text := base.ShortSha(node.Data[m[2]:m[3]])
+       ret.CommitID = s[m[2]:m[3]]
+       if m[5] > 0 {
+               ret.SubPath = s[m[4]:m[5]]
+       }
 
-               // 3rd capture group matches a optional path
-               subpath := ""
-               if m[5] > 0 {
-                       subpath = node.Data[m[4]:m[5]]
-               }
+       lastStart, lastEnd := m[len(m)-2], m[len(m)-1]
+       if lastEnd > 0 {
+               ret.QueryHash = s[lastStart:lastEnd][1:]
+       }
+       return ret, true
+}
 
-               // 4th capture group matches a optional url hash
-               hash := ""
-               if m[7] > 0 {
-                       hash = node.Data[m[6]:m[7]][1:]
+// fullHashPatternProcessor renders SHA containing URLs
+func fullHashPatternProcessor(ctx *RenderContext, node *html.Node) {
+       if ctx.Metas == nil {
+               return
+       }
+       nodeStop := node.NextSibling
+       for node != nodeStop {
+               if node.Type != html.TextNode {
+                       node = node.NextSibling
+                       continue
                }
-
-               start := m[0]
-               end := m[1]
-
-               // If url ends in '.', it's very likely that it is not part of the
-               // actual url but used to finish a sentence.
-               if strings.HasSuffix(urlFull, ".") {
-                       end--
-                       urlFull = urlFull[:len(urlFull)-1]
-                       if hash != "" {
-                               hash = hash[:len(hash)-1]
-                       } else if subpath != "" {
-                               subpath = subpath[:len(subpath)-1]
-                       }
+               ret, ok := anyHashPatternExtract(node.Data)
+               if !ok {
+                       node = node.NextSibling
+                       continue
                }
-
-               if subpath != "" {
-                       text += subpath
+               text := base.ShortSha(ret.CommitID)
+               if ret.SubPath != "" {
+                       text += ret.SubPath
                }
-
-               if hash != "" {
-                       text += " (" + hash + ")"
+               if ret.QueryHash != "" {
+                       text += " (" + ret.QueryHash + ")"
                }
-               replaceContent(node, start, end, createCodeLink(urlFull, text, "commit"))
+               replaceContent(node, ret.PosStart, ret.PosEnd, createCodeLink(ret.FullURL, text, "commit"))
                node = node.NextSibling.NextSibling
        }
 }
@@ -1021,19 +1034,16 @@ func comparePatternProcessor(ctx *RenderContext, node *html.Node) {
        if ctx.Metas == nil {
                return
        }
-
-       next := node.NextSibling
-       for node != nil && node != next {
-               m := comparePattern.FindStringSubmatchIndex(node.Data)
-               if m == nil {
-                       return
+       nodeStop := node.NextSibling
+       for node != nodeStop {
+               if node.Type != html.TextNode {
+                       node = node.NextSibling
+                       continue
                }
-
-               // Ensure that every group (m[0]...m[7]) has a match
-               for i := 0; i < 8; i++ {
-                       if m[i] == -1 {
-                               return
-                       }
+               m := comparePattern.FindStringSubmatchIndex(node.Data)
+               if m == nil || slices.Contains(m[:8], -1) { // ensure that every group (m[0]...m[7]) has a match
+                       node = node.NextSibling
+                       continue
                }
 
                urlFull := node.Data[m[0]:m[1]]
index d9da24ea344958c883e8e7b876c8cd7f9e554c60..5ef2217e3d765b0bca7347ac43dbd113abc86b56 100644 (file)
@@ -60,7 +60,8 @@ func renderCodeBlock(ctx *RenderContext, node *html.Node) (urlPosStart, urlPosSt
 }
 
 func codePreviewPatternProcessor(ctx *RenderContext, node *html.Node) {
-       for node != nil {
+       nodeStop := node.NextSibling
+       for node != nodeStop {
                if node.Type != html.TextNode {
                        node = node.NextSibling
                        continue
index e313be7040c7af4e2a92305d8f876943ee0b89b0..3ff0597851bba60030d63a8729c395bb992ec698 100644 (file)
@@ -399,36 +399,61 @@ func TestRegExp_sha1CurrentPattern(t *testing.T) {
 }
 
 func TestRegExp_anySHA1Pattern(t *testing.T) {
-       testCases := map[string][]string{
+       testCases := map[string]anyHashPatternResult{
                "https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js#L2703": {
-                       "a644101ed04d0beacea864ce805e0c4f86ba1cd1",
-                       "/test/unit/event.js",
-                       "#L2703",
+                       CommitID:  "a644101ed04d0beacea864ce805e0c4f86ba1cd1",
+                       SubPath:   "/test/unit/event.js",
+                       QueryHash: "L2703",
                },
                "https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js": {
-                       "a644101ed04d0beacea864ce805e0c4f86ba1cd1",
-                       "/test/unit/event.js",
-                       "",
+                       CommitID: "a644101ed04d0beacea864ce805e0c4f86ba1cd1",
+                       SubPath:  "/test/unit/event.js",
                },
                "https://github.com/jquery/jquery/commit/0705be475092aede1eddae01319ec931fb9c65fc": {
-                       "0705be475092aede1eddae01319ec931fb9c65fc",
-                       "",
-                       "",
+                       CommitID: "0705be475092aede1eddae01319ec931fb9c65fc",
                },
                "https://github.com/jquery/jquery/tree/0705be475092aede1eddae01319ec931fb9c65fc/src": {
-                       "0705be475092aede1eddae01319ec931fb9c65fc",
-                       "/src",
-                       "",
+                       CommitID: "0705be475092aede1eddae01319ec931fb9c65fc",
+                       SubPath:  "/src",
                },
                "https://try.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2": {
-                       "d8a994ef243349f321568f9e36d5c3f444b99cae",
-                       "",
-                       "#diff-2",
+                       CommitID:  "d8a994ef243349f321568f9e36d5c3f444b99cae",
+                       QueryHash: "diff-2",
+               },
+               "non-url": {},
+               "http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678?a=b#L1-L2": {
+                       CommitID:  "1234567812345678123456781234567812345678123456781234567812345678",
+                       QueryHash: "L1-L2",
+               },
+               "http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678.": {
+                       CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
+               },
+               "http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678/sub.": {
+                       CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
+                       SubPath:  "/sub",
+               },
+               "http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678?a=b.": {
+                       CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
+               },
+               "http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678?a=b&c=d": {
+                       CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
+               },
+               "http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678#hash.": {
+                       CommitID:  "1234567812345678123456781234567812345678123456781234567812345678",
+                       QueryHash: "hash",
                },
        }
 
        for k, v := range testCases {
-               assert.Equal(t, anyHashPattern.FindStringSubmatch(k)[1:], v)
+               ret, ok := anyHashPatternExtract(k)
+               if v.CommitID == "" {
+                       assert.False(t, ok)
+               } else {
+                       assert.EqualValues(t, strings.TrimSuffix(k, "."), ret.FullURL)
+                       assert.EqualValues(t, v.CommitID, ret.CommitID)
+                       assert.EqualValues(t, v.SubPath, ret.SubPath)
+                       assert.EqualValues(t, v.QueryHash, ret.QueryHash)
+               }
        }
 }
 
index 916e74fb6228d1c2dd9e51cb71ef2d23295c51f5..a2ae18d777129d2caf64a977aabb5fe2b9ebacbe 100644 (file)
@@ -124,6 +124,11 @@ func TestRender_CrossReferences(t *testing.T) {
        test(
                util.URLJoin(markup.TestAppURL, "gogitea", "some-repo-name", "issues", "12345"),
                `<p><a href="`+util.URLJoin(markup.TestAppURL, "gogitea", "some-repo-name", "issues", "12345")+`" class="ref-issue" rel="nofollow">gogitea/some-repo-name#12345</a></p>`)
+
+       inputURL := "https://host/a/b/commit/0123456789012345678901234567890123456789/foo.txt?a=b#L2-L3"
+       test(
+               inputURL,
+               `<p><a href="`+inputURL+`" rel="nofollow"><code>0123456789/foo.txt (L2-L3)</code></a></p>`)
 }
 
 func TestMisc_IsSameDomain(t *testing.T) {
@@ -695,7 +700,7 @@ func TestIssue18471(t *testing.T) {
        }, strings.NewReader(data), &res)
 
        assert.NoError(t, err)
-       assert.Equal(t, "<a href=\"http://domain/org/repo/compare/783b039...da951ce\" class=\"compare\"><code class=\"nohighlight\">783b039...da951ce</code></a>", res.String())
+       assert.Equal(t, `<a href="http://domain/org/repo/compare/783b039...da951ce" class="compare"><code class="nohighlight">783b039...da951ce</code></a>`, res.String())
 }
 
 func TestIsFullURL(t *testing.T) {