summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormrsdizzie <info@mrsdizzie.com>2019-04-07 07:18:16 -0400
committerzeripath <art27@cantab.net>2019-04-07 12:18:16 +0100
commit6293736d02992ef317c0f1ffc875cdccf0fd5837 (patch)
tree00fbf299043df33893f1c173a824c4f45d13eb30
parent5422f23ed8174661b6e658250e4007b7fdf0d603 (diff)
downloadgitea-6293736d02992ef317c0f1ffc875cdccf0fd5837.tar.gz
gitea-6293736d02992ef317c0f1ffc875cdccf0fd5837.zip
Use stricter boundaries for auto-link detection (#6522)
* Use stricter boundaries for auto-link detection Currently autolinks use \W for boundary detection which creates many situations of inserting links into places they don't belong (paths, URLs, UUIDs, etc...) This fixes that by replacing \W and only allowing these matches to touch an open paren or bracket (matching what seems to be Github behavior) in addition to whitespace and start of line. Similar for ending boundary as well. Fixes #6149 (and probably others) * Update test Replace incorrect test with a value that is a valid username, based on: "Username should contain only alphanumeric, dash ('-'), underscore ('_') and dot ('.') characters." * Also allow for period at the end Matching Github behavior * Fix email regex to work properly with specificed boundaries Create a specific capture group for email address and then use FindStringSubmatchIndex to allow for non-matching patterns as boundaries. * Add Tests Add tests for new behavior -- including tests for email addresses which were absent before.
-rw-r--r--modules/markup/html.go18
-rw-r--r--modules/markup/html_internal_test.go20
-rw-r--r--modules/markup/html_test.go43
3 files changed, 70 insertions, 11 deletions
diff --git a/modules/markup/html.go b/modules/markup/html.go
index 7bd8e8d8f4..a3bf15fe25 100644
--- a/modules/markup/html.go
+++ b/modules/markup/html.go
@@ -35,20 +35,20 @@ var (
// TODO: fix invalid linking issue
// mentionPattern matches all mentions in the form of "@user"
- mentionPattern = regexp.MustCompile(`(?:\s|^|\W)(@[0-9a-zA-Z-_\.]+)`)
+ mentionPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@[0-9a-zA-Z-_\.]+)(?:\s|$|\)|\])`)
// issueNumericPattern matches string that references to a numeric issue, e.g. #1287
- issueNumericPattern = regexp.MustCompile(`(?:\s|^|\W)(#[0-9]+)\b`)
+ issueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(#[0-9]+)(?:\s|$|\)|\]|\.(\s|$))`)
// issueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-1234
- issueAlphanumericPattern = regexp.MustCompile(`(?:\s|^|\W)([A-Z]{1,10}-[1-9][0-9]*)\b`)
+ issueAlphanumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([A-Z]{1,10}-[1-9][0-9]*)(?:\s|$|\)|\]|\.(\s|$))`)
// crossReferenceIssueNumericPattern matches string that references a numeric issue in a different repository
// e.g. gogits/gogs#12345
- crossReferenceIssueNumericPattern = regexp.MustCompile(`(?:\s|^|\W)([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+#[0-9]+)\b`)
+ crossReferenceIssueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+#[0-9]+)(?:\s|$|\)|\]|\.(\s|$))`)
// sha1CurrentPattern matches string that represents a commit SHA, e.g. d8a994ef243349f321568f9e36d5c3f444b99cae
// Although SHA1 hashes are 40 chars long, the regex matches the hash from 7 to 40 chars in length
// so that abbreviated hash links can be used as well. This matches git and github useability.
- sha1CurrentPattern = regexp.MustCompile(`(?:\s|^|\W)([0-9a-f]{7,40})\b`)
+ sha1CurrentPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-f]{7,40})(?:\s|$|\)|\]|\.(\s|$))`)
// shortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax
shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`)
@@ -63,7 +63,7 @@ var (
// well as the HTML5 spec:
// http://spec.commonmark.org/0.28/#email-address
// https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail)
- emailRegex = regexp.MustCompile("[a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*")
+ emailRegex = regexp.MustCompile("(?:\\s|^|\\(|\\[)([a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)(?:\\s|$|\\)|\\]|\\.(\\s|$))")
linkRegex, _ = xurls.StrictMatchingScheme("https?://")
)
@@ -656,12 +656,12 @@ func sha1CurrentPatternProcessor(ctx *postProcessCtx, node *html.Node) {
// emailAddressProcessor replaces raw email addresses with a mailto: link.
func emailAddressProcessor(ctx *postProcessCtx, node *html.Node) {
- m := emailRegex.FindStringIndex(node.Data)
+ m := emailRegex.FindStringSubmatchIndex(node.Data)
if m == nil {
return
}
- mail := node.Data[m[0]:m[1]]
- replaceContent(node, m[0], m[1], createLink("mailto:"+mail, mail))
+ mail := node.Data[m[2]:m[3]]
+ replaceContent(node, m[2], m[3], createLink("mailto:"+mail, mail))
}
// linkProcessor creates links for any HTTP or HTTPS URL not captured by
diff --git a/modules/markup/html_internal_test.go b/modules/markup/html_internal_test.go
index b8612eb2bb..cc261318eb 100644
--- a/modules/markup/html_internal_test.go
+++ b/modules/markup/html_internal_test.go
@@ -71,6 +71,7 @@ func TestRender_IssueIndexPattern(t *testing.T) {
test("test#1234")
test("#1234test")
test(" test #1234test")
+ test("/home/gitea/#1234")
// should not render issue mention without leading space
test("test#54321 issue")
@@ -103,9 +104,11 @@ func TestRender_IssueIndexPattern2(t *testing.T) {
test("#1234 test", "%s test", 1234)
test("test #8 issue", "test %s issue", 8)
test("test issue #1234", "test issue %s", 1234)
+ test("fixes issue #1234.", "fixes issue %s.", 1234)
- // should render mentions in parentheses
+ // should render mentions in parentheses / brackets
test("(#54321 issue)", "(%s issue)", 54321)
+ test("[#54321 issue]", "[%s issue]", 54321)
test("test (#9801 extra) issue", "test (%s extra) issue", 9801)
test("test (#1)", "test (%s)", 1)
@@ -253,10 +256,14 @@ func TestRegExp_sha1CurrentPattern(t *testing.T) {
trueTestCases := []string{
"d8a994ef243349f321568f9e36d5c3f444b99cae",
"abcdefabcdefabcdefabcdefabcdefabcdefabcd",
+ "(abcdefabcdefabcdefabcdefabcdefabcdefabcd)",
+ "[abcdefabcdefabcdefabcdefabcdefabcdefabcd]",
+ "abcdefabcdefabcdefabcdefabcdefabcdefabcd.",
}
falseTestCases := []string{
"test",
"abcdefg",
+ "e59ff077-2d03-4e6b-964d-63fbaea81f",
"abcdefghijklmnopqrstuvwxyzabcdefghijklmn",
"abcdefghijklmnopqrstuvwxyzabcdefghijklmO",
}
@@ -309,7 +316,9 @@ func TestRegExp_mentionPattern(t *testing.T) {
"@ANT_123",
"@xxx-DiN0-z-A..uru..s-xxx",
" @lol ",
- " @Te/st",
+ " @Te-st",
+ "(@gitea)",
+ "[@gitea]",
}
falseTestCases := []string{
"@ 0",
@@ -317,6 +326,8 @@ func TestRegExp_mentionPattern(t *testing.T) {
"@",
"",
"ABC",
+ "/home/gitea/@gitea",
+ "\"@gitea\"",
}
for _, testCase := range trueTestCases {
@@ -335,6 +346,9 @@ func TestRegExp_issueAlphanumericPattern(t *testing.T) {
"A-1",
"RC-80",
"ABCDEFGHIJ-1234567890987654321234567890",
+ "ABC-123.",
+ "(ABC-123)",
+ "[ABC-123]",
}
falseTestCases := []string{
"RC-08",
@@ -347,6 +361,8 @@ func TestRegExp_issueAlphanumericPattern(t *testing.T) {
"ABC",
"GG-",
"rm-1",
+ "/home/gitea/ABC-1234",
+ "MY-STRING-ABC-123",
}
for _, testCase := range trueTestCases {
diff --git a/modules/markup/html_test.go b/modules/markup/html_test.go
index 8d113b18a1..6bd9a465b5 100644
--- a/modules/markup/html_test.go
+++ b/modules/markup/html_test.go
@@ -36,6 +36,8 @@ func TestRender_Commits(t *testing.T) {
test(commit, `<p><a href="`+commit+`" rel="nofollow">b6dd6210ea</a></p>`)
test(tree, `<p><a href="`+tree+`" rel="nofollow">b6dd6210ea/src</a></p>`)
test("commit "+sha, `<p>commit <a href="`+commit+`" rel="nofollow">b6dd6210ea</a></p>`)
+ test("/home/gitea/"+sha, "<p>/home/gitea/"+sha+"</p>")
+
}
func TestRender_CrossReferences(t *testing.T) {
@@ -53,6 +55,9 @@ func TestRender_CrossReferences(t *testing.T) {
test(
"go-gitea/gitea#12345",
`<p><a href="`+util.URLJoin(AppURL, "go-gitea", "gitea", "issues", "12345")+`" rel="nofollow">go-gitea/gitea#12345</a></p>`)
+ test(
+ "/home/gitea/go-gitea/gitea#12345",
+ `<p>/home/gitea/go-gitea/gitea#12345</p>`)
}
func TestMisc_IsSameDomain(t *testing.T) {
@@ -144,6 +149,44 @@ func TestRender_links(t *testing.T) {
`<p>www</p>`)
}
+func TestRender_email(t *testing.T) {
+ setting.AppURL = AppURL
+ setting.AppSubURL = AppSubURL
+
+ test := func(input, expected string) {
+ buffer := RenderString("a.md", input, setting.AppSubURL, nil)
+ assert.Equal(t, strings.TrimSpace(expected), strings.TrimSpace(string(buffer)))
+ }
+ // Text that should be turned into email link
+
+ test(
+ "info@gitea.com",
+ `<p><a href="mailto:info@gitea.com" rel="nofollow">info@gitea.com</a></p>`)
+ test(
+ "(info@gitea.com)",
+ `<p>(<a href="mailto:info@gitea.com" rel="nofollow">info@gitea.com</a>)</p>`)
+ test(
+ "[info@gitea.com]",
+ `<p>[<a href="mailto:info@gitea.com" rel="nofollow">info@gitea.com</a>]</p>`)
+ test(
+ "info@gitea.com.",
+ `<p><a href="mailto:info@gitea.com" rel="nofollow">info@gitea.com</a>.</p>`)
+ test(
+ "send email to info@gitea.co.uk.",
+ `<p>send email to <a href="mailto:info@gitea.co.uk" rel="nofollow">info@gitea.co.uk</a>.</p>`)
+
+ // Test that should *not* be turned into email links
+ test(
+ "\"info@gitea.com\"",
+ `<p>“info@gitea.com”</p>`)
+ test(
+ "/home/gitea/mailstore/info@gitea/com",
+ `<p>/home/gitea/mailstore/info@gitea/com</p>`)
+ test(
+ "git@try.gitea.io:go-gitea/gitea.git",
+ `<p>git@try.gitea.io:go-gitea/gitea.git</p>`)
+}
+
func TestRender_ShortLinks(t *testing.T) {
setting.AppURL = AppURL
setting.AppSubURL = AppSubURL