summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorGiteabot <teabot@gitea.io>2024-05-04 12:04:05 +0800
committerGitHub <noreply@github.com>2024-05-04 12:04:05 +0800
commit471b411873fd14f2fb161322022863680e294d75 (patch)
tree355c44aa75f02d25f628d61eb761b3cac1f28c8f
parenta82e6301f77d3dce95e7945a05429e3d594bd246 (diff)
downloadgitea-471b411873fd14f2fb161322022863680e294d75.tar.gz
gitea-471b411873fd14f2fb161322022863680e294d75.zip
Fix markdown URL parsing for commit ID (#30812) (#30855)
Backport #30812 by wxiaoguang Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
-rw-r--r--modules/markup/html.go120
-rw-r--r--modules/markup/html_codepreview.go3
-rw-r--r--modules/markup/html_internal_test.go59
-rw-r--r--modules/markup/html_test.go7
4 files changed, 115 insertions, 74 deletions
diff --git a/modules/markup/html.go b/modules/markup/html.go
index 5ae0cc8755..2958dc9646 100644
--- a/modules/markup/html.go
+++ b/modules/markup/html.go
@@ -10,6 +10,7 @@ import (
"path"
"path/filepath"
"regexp"
+ "slices"
"strings"
"sync"
@@ -54,7 +55,7 @@ var (
shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`)
// anyHashPattern splits url containing SHA into parts
- anyHashPattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40,64})(/[-+~_%.a-zA-Z0-9/]+)?(#[-+~_%.a-zA-Z0-9]+)?`)
+ anyHashPattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{40,64})(/[-+~%./\w]+)?(\?[-+~%.\w&=]+)?(#[-+~%.\w]+)?`)
// comparePattern matches "http://domain/org/repo/compare/COMMIT1...COMMIT2#hash"
comparePattern = regexp.MustCompile(`https?://(?:\S+/){4,5}([0-9a-f]{7,64})(\.\.\.?)([0-9a-f]{7,64})?(#[-+~_%.a-zA-Z0-9]+)?`)
@@ -591,7 +592,8 @@ func replaceContentList(node *html.Node, i, j int, newNodes []*html.Node) {
func mentionProcessor(ctx *RenderContext, node *html.Node) {
start := 0
- for node != nil {
+ nodeStop := node.NextSibling
+ for node != nodeStop {
found, loc := references.FindFirstMentionBytes(util.UnsafeStringToBytes(node.Data[start:]))
if !found {
node = node.NextSibling
@@ -962,57 +964,68 @@ func commitCrossReferencePatternProcessor(ctx *RenderContext, node *html.Node) {
}
}
-// fullHashPatternProcessor renders SHA containing URLs
-func fullHashPatternProcessor(ctx *RenderContext, node *html.Node) {
- if ctx.Metas == nil {
- return
+type anyHashPatternResult struct {
+ PosStart int
+ PosEnd int
+ FullURL string
+ CommitID string
+ SubPath string
+ QueryHash string
+}
+
+func anyHashPatternExtract(s string) (ret anyHashPatternResult, ok bool) {
+ m := anyHashPattern.FindStringSubmatchIndex(s)
+ if m == nil {
+ return ret, false
}
- next := node.NextSibling
- for node != nil && node != next {
- m := anyHashPattern.FindStringSubmatchIndex(node.Data)
- if m == nil {
- return
+ ret.PosStart, ret.PosEnd = m[0], m[1]
+ ret.FullURL = s[ret.PosStart:ret.PosEnd]
+ if strings.HasSuffix(ret.FullURL, ".") {
+ // if url ends in '.', it's very likely that it is not part of the actual url but used to finish a sentence.
+ ret.PosEnd--
+ ret.FullURL = ret.FullURL[:len(ret.FullURL)-1]
+ for i := 0; i < len(m); i++ {
+ m[i] = min(m[i], ret.PosEnd)
}
+ }
- urlFull := node.Data[m[0]:m[1]]
- text := base.ShortSha(node.Data[m[2]:m[3]])
+ ret.CommitID = s[m[2]:m[3]]
+ if m[5] > 0 {
+ ret.SubPath = s[m[4]:m[5]]
+ }
- // 3rd capture group matches a optional path
- subpath := ""
- if m[5] > 0 {
- subpath = node.Data[m[4]:m[5]]
- }
+ lastStart, lastEnd := m[len(m)-2], m[len(m)-1]
+ if lastEnd > 0 {
+ ret.QueryHash = s[lastStart:lastEnd][1:]
+ }
+ return ret, true
+}
- // 4th capture group matches a optional url hash
- hash := ""
- if m[7] > 0 {
- hash = node.Data[m[6]:m[7]][1:]
+// fullHashPatternProcessor renders SHA containing URLs
+func fullHashPatternProcessor(ctx *RenderContext, node *html.Node) {
+ if ctx.Metas == nil {
+ return
+ }
+ nodeStop := node.NextSibling
+ for node != nodeStop {
+ if node.Type != html.TextNode {
+ node = node.NextSibling
+ continue
}
-
- start := m[0]
- end := m[1]
-
- // If url ends in '.', it's very likely that it is not part of the
- // actual url but used to finish a sentence.
- if strings.HasSuffix(urlFull, ".") {
- end--
- urlFull = urlFull[:len(urlFull)-1]
- if hash != "" {
- hash = hash[:len(hash)-1]
- } else if subpath != "" {
- subpath = subpath[:len(subpath)-1]
- }
+ ret, ok := anyHashPatternExtract(node.Data)
+ if !ok {
+ node = node.NextSibling
+ continue
}
-
- if subpath != "" {
- text += subpath
+ text := base.ShortSha(ret.CommitID)
+ if ret.SubPath != "" {
+ text += ret.SubPath
}
-
- if hash != "" {
- text += " (" + hash + ")"
+ if ret.QueryHash != "" {
+ text += " (" + ret.QueryHash + ")"
}
- replaceContent(node, start, end, createCodeLink(urlFull, text, "commit"))
+ replaceContent(node, ret.PosStart, ret.PosEnd, createCodeLink(ret.FullURL, text, "commit"))
node = node.NextSibling.NextSibling
}
}
@@ -1021,19 +1034,16 @@ func comparePatternProcessor(ctx *RenderContext, node *html.Node) {
if ctx.Metas == nil {
return
}
-
- next := node.NextSibling
- for node != nil && node != next {
- m := comparePattern.FindStringSubmatchIndex(node.Data)
- if m == nil {
- return
+ nodeStop := node.NextSibling
+ for node != nodeStop {
+ if node.Type != html.TextNode {
+ node = node.NextSibling
+ continue
}
-
- // Ensure that every group (m[0]...m[7]) has a match
- for i := 0; i < 8; i++ {
- if m[i] == -1 {
- return
- }
+ m := comparePattern.FindStringSubmatchIndex(node.Data)
+ if m == nil || slices.Contains(m[:8], -1) { // ensure that every group (m[0]...m[7]) has a match
+ node = node.NextSibling
+ continue
}
urlFull := node.Data[m[0]:m[1]]
diff --git a/modules/markup/html_codepreview.go b/modules/markup/html_codepreview.go
index d9da24ea34..5ef2217e3d 100644
--- a/modules/markup/html_codepreview.go
+++ b/modules/markup/html_codepreview.go
@@ -60,7 +60,8 @@ func renderCodeBlock(ctx *RenderContext, node *html.Node) (urlPosStart, urlPosSt
}
func codePreviewPatternProcessor(ctx *RenderContext, node *html.Node) {
- for node != nil {
+ nodeStop := node.NextSibling
+ for node != nodeStop {
if node.Type != html.TextNode {
node = node.NextSibling
continue
diff --git a/modules/markup/html_internal_test.go b/modules/markup/html_internal_test.go
index e313be7040..3ff0597851 100644
--- a/modules/markup/html_internal_test.go
+++ b/modules/markup/html_internal_test.go
@@ -399,36 +399,61 @@ func TestRegExp_sha1CurrentPattern(t *testing.T) {
}
func TestRegExp_anySHA1Pattern(t *testing.T) {
- testCases := map[string][]string{
+ testCases := map[string]anyHashPatternResult{
"https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js#L2703": {
- "a644101ed04d0beacea864ce805e0c4f86ba1cd1",
- "/test/unit/event.js",
- "#L2703",
+ CommitID: "a644101ed04d0beacea864ce805e0c4f86ba1cd1",
+ SubPath: "/test/unit/event.js",
+ QueryHash: "L2703",
},
"https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js": {
- "a644101ed04d0beacea864ce805e0c4f86ba1cd1",
- "/test/unit/event.js",
- "",
+ CommitID: "a644101ed04d0beacea864ce805e0c4f86ba1cd1",
+ SubPath: "/test/unit/event.js",
},
"https://github.com/jquery/jquery/commit/0705be475092aede1eddae01319ec931fb9c65fc": {
- "0705be475092aede1eddae01319ec931fb9c65fc",
- "",
- "",
+ CommitID: "0705be475092aede1eddae01319ec931fb9c65fc",
},
"https://github.com/jquery/jquery/tree/0705be475092aede1eddae01319ec931fb9c65fc/src": {
- "0705be475092aede1eddae01319ec931fb9c65fc",
- "/src",
- "",
+ CommitID: "0705be475092aede1eddae01319ec931fb9c65fc",
+ SubPath: "/src",
},
"https://try.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2": {
- "d8a994ef243349f321568f9e36d5c3f444b99cae",
- "",
- "#diff-2",
+ CommitID: "d8a994ef243349f321568f9e36d5c3f444b99cae",
+ QueryHash: "diff-2",
+ },
+ "non-url": {},
+ "http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678?a=b#L1-L2": {
+ CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
+ QueryHash: "L1-L2",
+ },
+ "http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678.": {
+ CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
+ },
+ "http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678/sub.": {
+ CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
+ SubPath: "/sub",
+ },
+ "http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678?a=b.": {
+ CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
+ },
+ "http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678?a=b&c=d": {
+ CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
+ },
+ "http://a/b/c/d/e/1234567812345678123456781234567812345678123456781234567812345678#hash.": {
+ CommitID: "1234567812345678123456781234567812345678123456781234567812345678",
+ QueryHash: "hash",
},
}
for k, v := range testCases {
- assert.Equal(t, anyHashPattern.FindStringSubmatch(k)[1:], v)
+ ret, ok := anyHashPatternExtract(k)
+ if v.CommitID == "" {
+ assert.False(t, ok)
+ } else {
+ assert.EqualValues(t, strings.TrimSuffix(k, "."), ret.FullURL)
+ assert.EqualValues(t, v.CommitID, ret.CommitID)
+ assert.EqualValues(t, v.SubPath, ret.SubPath)
+ assert.EqualValues(t, v.QueryHash, ret.QueryHash)
+ }
}
}
diff --git a/modules/markup/html_test.go b/modules/markup/html_test.go
index 916e74fb62..a2ae18d777 100644
--- a/modules/markup/html_test.go
+++ b/modules/markup/html_test.go
@@ -124,6 +124,11 @@ func TestRender_CrossReferences(t *testing.T) {
test(
util.URLJoin(markup.TestAppURL, "gogitea", "some-repo-name", "issues", "12345"),
`<p><a href="`+util.URLJoin(markup.TestAppURL, "gogitea", "some-repo-name", "issues", "12345")+`" class="ref-issue" rel="nofollow">gogitea/some-repo-name#12345</a></p>`)
+
+ inputURL := "https://host/a/b/commit/0123456789012345678901234567890123456789/foo.txt?a=b#L2-L3"
+ test(
+ inputURL,
+ `<p><a href="`+inputURL+`" rel="nofollow"><code>0123456789/foo.txt (L2-L3)</code></a></p>`)
}
func TestMisc_IsSameDomain(t *testing.T) {
@@ -695,7 +700,7 @@ func TestIssue18471(t *testing.T) {
}, strings.NewReader(data), &res)
assert.NoError(t, err)
- assert.Equal(t, "<a href=\"http://domain/org/repo/compare/783b039...da951ce\" class=\"compare\"><code class=\"nohighlight\">783b039...da951ce</code></a>", res.String())
+ assert.Equal(t, `<a href="http://domain/org/repo/compare/783b039...da951ce" class="compare"><code class="nohighlight">783b039...da951ce</code></a>`, res.String())
}
func TestIsFullURL(t *testing.T) {