summaryrefslogtreecommitdiffstats
path: root/modules
diff options
context:
space:
mode:
authorsilverwind <me@silverwind.io>2019-04-06 20:28:45 +0200
committerLauris BH <lauris@nix.lv>2019-04-06 21:28:45 +0300
commit2242a9f82e26ac8a4725a8ffc0aa0cfb25aed30a (patch)
treeadec26445d4dc4424bf9bade53d5e5429330e2fa /modules
parent0bdd81df9d6f13752b93dbcce19a4f703ad43fc2 (diff)
downloadgitea-2242a9f82e26ac8a4725a8ffc0aa0cfb25aed30a.tar.gz
gitea-2242a9f82e26ac8a4725a8ffc0aa0cfb25aed30a.zip
Improve SHA1 link detection (#6526)
This improves the SHA1 link detection to not pick up extraneous non-whitespace characters at the end of the URL. The '.' is a special case handled in code itself because of missing regexp lookahead support. Regex test cases: https://regex101.com/r/xUMlqh/3
Diffstat (limited to 'modules')
-rw-r--r--modules/markup/html.go47
-rw-r--r--modules/markup/html_internal_test.go10
2 files changed, 36 insertions, 21 deletions
diff --git a/modules/markup/html.go b/modules/markup/html.go
index e016b67d0c..7bd8e8d8f4 100644
--- a/modules/markup/html.go
+++ b/modules/markup/html.go
@@ -54,7 +54,7 @@ var (
shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`)
// anySHA1Pattern allows to split url containing SHA into parts
- anySHA1Pattern = regexp.MustCompile(`https?://(?:\S+/){4}([0-9a-f]{40})/?([^#\s]+)?(?:#(\S+))?`)
+ anySHA1Pattern = regexp.MustCompile(`https?://(?:\S+/){4}([0-9a-f]{40})(/[^#\s]+)?(#\S+)?`)
validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`)
@@ -594,31 +594,46 @@ func fullSha1PatternProcessor(ctx *postProcessCtx, node *html.Node) {
if m == nil {
return
}
- // take out what's relevant
+
urlFull := node.Data[m[0]:m[1]]
- hash := node.Data[m[2]:m[3]]
+ text := base.ShortSha(node.Data[m[2]:m[3]])
- var subtree, line string
+ // 3rd capture group matches a optional path
+ subpath := ""
+ if m[5] > 0 {
+ subpath = node.Data[m[4]:m[5]]
+ }
- // optional, we do them depending on the length.
+ // 4th capture group matches a optional url hash
+ hash := ""
if m[7] > 0 {
- line = node.Data[m[6]:m[7]]
+ hash = node.Data[m[6]:m[7]][1:]
}
- if m[5] > 0 {
- subtree = node.Data[m[4]:m[5]]
+
+ start := m[0]
+ end := m[1]
+
+ // If url ends in '.', it's very likely that it is not part of the
+ // actual url but used to finish a sentence.
+ if strings.HasSuffix(urlFull, ".") {
+ end--
+ urlFull = urlFull[:len(urlFull)-1]
+ if hash != "" {
+ hash = hash[:len(hash)-1]
+ } else if subpath != "" {
+ subpath = subpath[:len(subpath)-1]
+ }
}
- text := base.ShortSha(hash)
- if subtree != "" {
- text += "/" + subtree
+ if subpath != "" {
+ text += subpath
}
- if line != "" {
- text += " ("
- text += line
- text += ")"
+
+ if hash != "" {
+ text += " (" + hash + ")"
}
- replaceContent(node, m[0], m[1], createLink(urlFull, text))
+ replaceContent(node, start, end, createLink(urlFull, text))
}
// sha1CurrentPatternProcessor renders SHA1 strings to corresponding links that
diff --git a/modules/markup/html_internal_test.go b/modules/markup/html_internal_test.go
index ff07bab913..b8612eb2bb 100644
--- a/modules/markup/html_internal_test.go
+++ b/modules/markup/html_internal_test.go
@@ -273,12 +273,12 @@ func TestRegExp_anySHA1Pattern(t *testing.T) {
testCases := map[string][]string{
"https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js#L2703": {
"a644101ed04d0beacea864ce805e0c4f86ba1cd1",
- "test/unit/event.js",
- "L2703",
+ "/test/unit/event.js",
+ "#L2703",
},
"https://github.com/jquery/jquery/blob/a644101ed04d0beacea864ce805e0c4f86ba1cd1/test/unit/event.js": {
"a644101ed04d0beacea864ce805e0c4f86ba1cd1",
- "test/unit/event.js",
+ "/test/unit/event.js",
"",
},
"https://github.com/jquery/jquery/commit/0705be475092aede1eddae01319ec931fb9c65fc": {
@@ -288,13 +288,13 @@ func TestRegExp_anySHA1Pattern(t *testing.T) {
},
"https://github.com/jquery/jquery/tree/0705be475092aede1eddae01319ec931fb9c65fc/src": {
"0705be475092aede1eddae01319ec931fb9c65fc",
- "src",
+ "/src",
"",
},
"https://try.gogs.io/gogs/gogs/commit/d8a994ef243349f321568f9e36d5c3f444b99cae#diff-2": {
"d8a994ef243349f321568f9e36d5c3f444b99cae",
"",
- "diff-2",
+ "#diff-2",
},
}