diff options
author | mrsdizzie <joe.mccann@gmail.com> | 2019-03-07 15:12:01 -0500 |
---|---|---|
committer | techknowlogick <matti@mdranta.net> | 2019-03-07 15:12:01 -0500 |
commit | f2de5dc8c87b4625790fb45446c296d476b1e747 (patch) | |
tree | 145d6f8f0bdecce6df0e7bf776f474af9cd34403 /modules | |
parent | 01bd1fcd33a7af55bfbba10f391b22a631ac592c (diff) | |
download | gitea-f2de5dc8c87b4625790fb45446c296d476b1e747.tar.gz gitea-f2de5dc8c87b4625790fb45446c296d476b1e747.zip |
Replace linkRegex with xurls library (#6261)
* Replace linkRegex with xurls library
Rather than maintaining a complicated regex to match URLs for
autolinking, gitea can use this existing go library that takes care of
the matching with very little code change to gitea itself. After
spending a while trying to find the perfect regex for all cases this library
still works better as it is more flexible than a single regex ever will be.
This will also fix the following issues: #5844 #3095 #3381
This passes all our current tests and I've added new ones mentioned in
those issues as well.
* Use xurls.StrictMatchingScheme instead of xurls.Strict
This is much faster and we only care about https? links to preserve
existing behavior.
Diffstat (limited to 'modules')
-rw-r--r-- | modules/markup/html.go | 5 | ||||
-rw-r--r-- | modules/markup/html_test.go | 9 |
2 files changed, 11 insertions, 3 deletions
diff --git a/modules/markup/html.go b/modules/markup/html.go index dab6d4e8e5..036b664b00 100644 --- a/modules/markup/html.go +++ b/modules/markup/html.go @@ -17,6 +17,7 @@ import ( "code.gitea.io/gitea/modules/util" "github.com/Unknwon/com" + "github.com/mvdan/xurls" "golang.org/x/net/html" "golang.org/x/net/html/atom" ) @@ -64,9 +65,7 @@ var ( // https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail) emailRegex = regexp.MustCompile("[a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*") - // matches http/https links. used for autlinking those. partly modified from - // the original present in autolink.js - linkRegex = regexp.MustCompile(`(?:(?:http|https):\/\/(?:[\-;:&=\+\$,\w]+@)?[A-Za-z0-9\.\-]+(?:\.|[\-;:&=\+\$,\w]+@)[A-Za-z0-9\.\-]+)(?:(?:\/[\+~%\/\.\w\-]*)?\??(?:[\-\+:=&;%@\.\w]*)#?(?:[\.\!\/\\\w]*))?`) + linkRegex, _ = xurls.StrictMatchingScheme("https?://") ) // regexp for full links to issues/pulls diff --git a/modules/markup/html_test.go b/modules/markup/html_test.go index f430cb04be..ff68201995 100644 --- a/modules/markup/html_test.go +++ b/modules/markup/html_test.go @@ -104,6 +104,15 @@ func TestRender_links(t *testing.T) { test( "http://142.42.1.1/", `<p><a href="http://142.42.1.1/" rel="nofollow">http://142.42.1.1/</a></p>`) + test( + "https://github.com/go-gitea/gitea/?p=aaa/bbb.html#ccc-ddd", + `<p><a href="https://github.com/go-gitea/gitea/?p=aaa/bbb.html#ccc-ddd" rel="nofollow">https://github.com/go-gitea/gitea/?p=aaa/bbb.html#ccc-ddd</a></p>`) + test( + "https://en.wikipedia.org/wiki/URL_(disambiguation)", + `<p><a href="https://en.wikipedia.org/wiki/URL_(disambiguation)" rel="nofollow">https://en.wikipedia.org/wiki/URL_(disambiguation)</a></p>`) + test( + "https://foo_bar.example.com/", + `<p><a href="https://foo_bar.example.com/" rel="nofollow">https://foo_bar.example.com/</a></p>`) // Test that should *not* be turned into URL test( |