diff options
author | mrsdizzie <info@mrsdizzie.com> | 2019-04-07 07:18:16 -0400 |
---|---|---|
committer | zeripath <art27@cantab.net> | 2019-04-07 12:18:16 +0100 |
commit | 6293736d02992ef317c0f1ffc875cdccf0fd5837 (patch) | |
tree | 00fbf299043df33893f1c173a824c4f45d13eb30 /modules/markup/html.go | |
parent | 5422f23ed8174661b6e658250e4007b7fdf0d603 (diff) | |
download | gitea-6293736d02992ef317c0f1ffc875cdccf0fd5837.tar.gz gitea-6293736d02992ef317c0f1ffc875cdccf0fd5837.zip |
Use stricter boundaries for auto-link detection (#6522)
* Use stricter boundaries for auto-link detection
Currently autolinks use \W for boundary detection which creates many
situations of inserting links into places they don't belong (paths,
URLs, UUIDs, etc...)
This fixes that by replacing \W and only allowing these matches to touch
an open paren or bracket (matching what seems to be Github behavior) in
addition to whitespace and start of line. Similar for ending boundary as
well.
Fixes #6149
(and probably others)
* Update test
Replace incorrect test with a value that is a valid username, based on:
"Username should contain only alphanumeric, dash ('-'), underscore ('_')
and dot ('.') characters."
* Also allow for period at the end
Matching Github behavior
* Fix email regex to work properly with specificed boundaries
Create a specific capture group for email address and then use
FindStringSubmatchIndex to allow for non-matching patterns as
boundaries.
* Add Tests
Add tests for new behavior -- including tests for email addresses which
were absent before.
Diffstat (limited to 'modules/markup/html.go')
-rw-r--r-- | modules/markup/html.go | 18 |
1 files changed, 9 insertions, 9 deletions
diff --git a/modules/markup/html.go b/modules/markup/html.go index 7bd8e8d8f4..a3bf15fe25 100644 --- a/modules/markup/html.go +++ b/modules/markup/html.go @@ -35,20 +35,20 @@ var ( // TODO: fix invalid linking issue // mentionPattern matches all mentions in the form of "@user" - mentionPattern = regexp.MustCompile(`(?:\s|^|\W)(@[0-9a-zA-Z-_\.]+)`) + mentionPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@[0-9a-zA-Z-_\.]+)(?:\s|$|\)|\])`) // issueNumericPattern matches string that references to a numeric issue, e.g. #1287 - issueNumericPattern = regexp.MustCompile(`(?:\s|^|\W)(#[0-9]+)\b`) + issueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(#[0-9]+)(?:\s|$|\)|\]|\.(\s|$))`) // issueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-1234 - issueAlphanumericPattern = regexp.MustCompile(`(?:\s|^|\W)([A-Z]{1,10}-[1-9][0-9]*)\b`) + issueAlphanumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([A-Z]{1,10}-[1-9][0-9]*)(?:\s|$|\)|\]|\.(\s|$))`) // crossReferenceIssueNumericPattern matches string that references a numeric issue in a different repository // e.g. gogits/gogs#12345 - crossReferenceIssueNumericPattern = regexp.MustCompile(`(?:\s|^|\W)([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+#[0-9]+)\b`) + crossReferenceIssueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+#[0-9]+)(?:\s|$|\)|\]|\.(\s|$))`) // sha1CurrentPattern matches string that represents a commit SHA, e.g. d8a994ef243349f321568f9e36d5c3f444b99cae // Although SHA1 hashes are 40 chars long, the regex matches the hash from 7 to 40 chars in length // so that abbreviated hash links can be used as well. This matches git and github useability. - sha1CurrentPattern = regexp.MustCompile(`(?:\s|^|\W)([0-9a-f]{7,40})\b`) + sha1CurrentPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-f]{7,40})(?:\s|$|\)|\]|\.(\s|$))`) // shortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`) @@ -63,7 +63,7 @@ var ( // well as the HTML5 spec: // http://spec.commonmark.org/0.28/#email-address // https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail) - emailRegex = regexp.MustCompile("[a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*") + emailRegex = regexp.MustCompile("(?:\\s|^|\\(|\\[)([a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*)(?:\\s|$|\\)|\\]|\\.(\\s|$))") linkRegex, _ = xurls.StrictMatchingScheme("https?://") ) @@ -656,12 +656,12 @@ func sha1CurrentPatternProcessor(ctx *postProcessCtx, node *html.Node) { // emailAddressProcessor replaces raw email addresses with a mailto: link. func emailAddressProcessor(ctx *postProcessCtx, node *html.Node) { - m := emailRegex.FindStringIndex(node.Data) + m := emailRegex.FindStringSubmatchIndex(node.Data) if m == nil { return } - mail := node.Data[m[0]:m[1]] - replaceContent(node, m[0], m[1], createLink("mailto:"+mail, mail)) + mail := node.Data[m[2]:m[3]] + replaceContent(node, m[2], m[3], createLink("mailto:"+mail, mail)) } // linkProcessor creates links for any HTTP or HTTPS URL not captured by |