summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--Gopkg.lock9
-rw-r--r--Gopkg.toml4
-rw-r--r--modules/markup/html.go5
-rw-r--r--modules/markup/html_test.go9
-rw-r--r--vendor/github.com/mvdan/xurls/LICENSE27
-rw-r--r--vendor/github.com/mvdan/xurls/schemes.go299
-rw-r--r--vendor/github.com/mvdan/xurls/tlds.go1557
-rw-r--r--vendor/github.com/mvdan/xurls/tlds_pseudo.go24
-rw-r--r--vendor/github.com/mvdan/xurls/xurls.go107
9 files changed, 2038 insertions, 3 deletions
diff --git a/Gopkg.lock b/Gopkg.lock
index b1103ce083..00dea0587a 100644
--- a/Gopkg.lock
+++ b/Gopkg.lock
@@ -726,6 +726,14 @@
revision = "02ccfbfaf0cc627aa3aec8ef7ed5cfeec5b43f63"
[[projects]]
+ digest = "1:63953ffb90bbc880c612d576fcfd973a5904277d25ec9e2d8d5719bf67969662"
+ name = "github.com/mvdan/xurls"
+ packages = ["."]
+ pruneopts = "NUT"
+ revision = "e52e821cbfe8fe163ff6f8628ab5869b11fc05af"
+ version = "v2.0.0"
+
+[[projects]]
digest = "1:2be1d891535ce3d6d2a3db9087f07415e909744e9eff1a30f8f0b2519df60ae6"
name = "github.com/nfnt/resize"
packages = ["."]
@@ -1293,6 +1301,7 @@
"github.com/mcuadros/go-version",
"github.com/microcosm-cc/bluemonday",
"github.com/msteinert/pam",
+ "github.com/mvdan/xurls",
"github.com/nfnt/resize",
"github.com/pquerna/otp",
"github.com/pquerna/otp/totp",
diff --git a/Gopkg.toml b/Gopkg.toml
index 3a981f5296..f5dcb46869 100644
--- a/Gopkg.toml
+++ b/Gopkg.toml
@@ -113,3 +113,7 @@ ignored = ["google.golang.org/appengine*"]
[[constraint]]
name = "github.com/prometheus/client_golang"
version = "0.9.0"
+
+[[constraint]]
+ name = "github.com/mvdan/xurls"
+ version = "2.0.0"
diff --git a/modules/markup/html.go b/modules/markup/html.go
index dab6d4e8e5..036b664b00 100644
--- a/modules/markup/html.go
+++ b/modules/markup/html.go
@@ -17,6 +17,7 @@ import (
"code.gitea.io/gitea/modules/util"
"github.com/Unknwon/com"
+ "github.com/mvdan/xurls"
"golang.org/x/net/html"
"golang.org/x/net/html/atom"
)
@@ -64,9 +65,7 @@ var (
// https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail)
emailRegex = regexp.MustCompile("[a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*")
- // matches http/https links. used for autlinking those. partly modified from
- // the original present in autolink.js
- linkRegex = regexp.MustCompile(`(?:(?:http|https):\/\/(?:[\-;:&=\+\$,\w]+@)?[A-Za-z0-9\.\-]+(?:\.|[\-;:&=\+\$,\w]+@)[A-Za-z0-9\.\-]+)(?:(?:\/[\+~%\/\.\w\-]*)?\??(?:[\-\+:=&;%@\.\w]*)#?(?:[\.\!\/\\\w]*))?`)
+ linkRegex, _ = xurls.StrictMatchingScheme("https?://")
)
// regexp for full links to issues/pulls
diff --git a/modules/markup/html_test.go b/modules/markup/html_test.go
index f430cb04be..ff68201995 100644
--- a/modules/markup/html_test.go
+++ b/modules/markup/html_test.go
@@ -104,6 +104,15 @@ func TestRender_links(t *testing.T) {
test(
"http://142.42.1.1/",
`<p><a href="http://142.42.1.1/" rel="nofollow">http://142.42.1.1/</a></p>`)
+ test(
+ "https://github.com/go-gitea/gitea/?p=aaa/bbb.html#ccc-ddd",
+ `<p><a href="https://github.com/go-gitea/gitea/?p=aaa/bbb.html#ccc-ddd" rel="nofollow">https://github.com/go-gitea/gitea/?p=aaa/bbb.html#ccc-ddd</a></p>`)
+ test(
+ "https://en.wikipedia.org/wiki/URL_(disambiguation)",
+ `<p><a href="https://en.wikipedia.org/wiki/URL_(disambiguation)" rel="nofollow">https://en.wikipedia.org/wiki/URL_(disambiguation)</a></p>`)
+ test(
+ "https://foo_bar.example.com/",
+ `<p><a href="https://foo_bar.example.com/" rel="nofollow">https://foo_bar.example.com/</a></p>`)
// Test that should *not* be turned into URL
test(
diff --git a/vendor/github.com/mvdan/xurls/LICENSE b/vendor/github.com/mvdan/xurls/LICENSE
new file mode 100644
index 0000000000..7d71d51a5e
--- /dev/null
+++ b/vendor/github.com/mvdan/xurls/LICENSE
@@ -0,0 +1,27 @@
+Copyright (c) 2015, Daniel Martí. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+ * Neither the name of the copyright holder nor the names of its
+contributors may be used to endorse or promote products derived from
+this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/github.com/mvdan/xurls/schemes.go b/vendor/github.com/mvdan/xurls/schemes.go
new file mode 100644
index 0000000000..01b7944ae3
--- /dev/null
+++ b/vendor/github.com/mvdan/xurls/schemes.go
@@ -0,0 +1,299 @@
+// Generated by schemesgen
+
+package xurls
+
+// Schemes is a sorted list of all IANA assigned schemes.
+//
+// Source:
+// https://www.iana.org/assignments/uri-schemes/uri-schemes-1.csv
+var Schemes = []string{
+ `aaa`,
+ `aaas`,
+ `about`,
+ `acap`,
+ `acct`,
+ `acr`,
+ `adiumxtra`,
+ `afp`,
+ `afs`,
+ `aim`,
+ `appdata`,
+ `apt`,
+ `attachment`,
+ `aw`,
+ `barion`,
+ `beshare`,
+ `bitcoin`,
+ `bitcoincash`,
+ `blob`,
+ `bolo`,
+ `browserext`,
+ `callto`,
+ `cap`,
+ `chrome`,
+ `chrome-extension`,
+ `cid`,
+ `coap`,
+ `coap+tcp`,
+ `coap+ws`,
+ `coaps`,
+ `coaps+tcp`,
+ `coaps+ws`,
+ `com-eventbrite-attendee`,
+ `content`,
+ `conti`,
+ `crid`,
+ `cvs`,
+ `data`,
+ `dav`,
+ `diaspora`,
+ `dict`,
+ `did`,
+ `dis`,
+ `dlna-playcontainer`,
+ `dlna-playsingle`,
+ `dns`,
+ `dntp`,
+ `dtn`,
+ `dvb`,
+ `ed2k`,
+ `elsi`,
+ `example`,
+ `facetime`,
+ `fax`,
+ `feed`,
+ `feedready`,
+ `file`,
+ `filesystem`,
+ `finger`,
+ `fish`,
+ `ftp`,
+ `geo`,
+ `gg`,
+ `git`,
+ `gizmoproject`,
+ `go`,
+ `gopher`,
+ `graph`,
+ `gtalk`,
+ `h323`,
+ `ham`,
+ `hcap`,
+ `hcp`,
+ `http`,
+ `https`,
+ `hxxp`,
+ `hxxps`,
+ `hydrazone`,
+ `iax`,
+ `icap`,
+ `icon`,
+ `im`,
+ `imap`,
+ `info`,
+ `iotdisco`,
+ `ipn`,
+ `ipp`,
+ `ipps`,
+ `irc`,
+ `irc6`,
+ `ircs`,
+ `iris`,
+ `iris.beep`,
+ `iris.lwz`,
+ `iris.xpc`,
+ `iris.xpcs`,
+ `isostore`,
+ `itms`,
+ `jabber`,
+ `jar`,
+ `jms`,
+ `keyparc`,
+ `lastfm`,
+ `ldap`,
+ `ldaps`,
+ `lvlt`,
+ `magnet`,
+ `mailserver`,
+ `mailto`,
+ `maps`,
+ `market`,
+ `message`,
+ `microsoft.windows.camera`,
+ `microsoft.windows.camera.multipicker`,
+ `microsoft.windows.camera.picker`,
+ `mid`,
+ `mms`,
+ `modem`,
+ `mongodb`,
+ `moz`,
+ `ms-access`,
+ `ms-browser-extension`,
+ `ms-drive-to`,
+ `ms-enrollment`,
+ `ms-excel`,
+ `ms-gamebarservices`,
+ `ms-gamingoverlay`,
+ `ms-getoffice`,
+ `ms-help`,
+ `ms-infopath`,
+ `ms-inputapp`,
+ `ms-lockscreencomponent-config`,
+ `ms-media-stream-id`,
+ `ms-mixedrealitycapture`,
+ `ms-officeapp`,
+ `ms-people`,
+ `ms-project`,
+ `ms-powerpoint`,
+ `ms-publisher`,
+ `ms-restoretabcompanion`,
+ `ms-screenclip`,
+ `ms-screensketch`,
+ `ms-search`,
+ `ms-search-repair`,
+ `ms-secondary-screen-controller`,
+ `ms-secondary-screen-setup`,
+ `ms-settings`,
+ `ms-settings-airplanemode`,
+ `ms-settings-bluetooth`,
+ `ms-settings-camera`,
+ `ms-settings-cellular`,
+ `ms-settings-cloudstorage`,
+ `ms-settings-connectabledevices`,
+ `ms-settings-displays-topology`,
+ `ms-settings-emailandaccounts`,
+ `ms-settings-language`,
+ `ms-settings-location`,
+ `ms-settings-lock`,
+ `ms-settings-nfctransactions`,
+ `ms-settings-notifications`,
+ `ms-settings-power`,
+ `ms-settings-privacy`,
+ `ms-settings-proximity`,
+ `ms-settings-screenrotation`,
+ `ms-settings-wifi`,
+ `ms-settings-workplace`,
+ `ms-spd`,
+ `ms-sttoverlay`,
+ `ms-transit-to`,
+ `ms-useractivityset`,
+ `ms-virtualtouchpad`,
+ `ms-visio`,
+ `ms-walk-to`,
+ `ms-whiteboard`,
+ `ms-whiteboard-cmd`,
+ `ms-word`,
+ `msnim`,
+ `msrp`,
+ `msrps`,
+ `mtqp`,
+ `mumble`,
+ `mupdate`,
+ `mvn`,
+ `news`,
+ `nfs`,
+ `ni`,
+ `nih`,
+ `nntp`,
+ `notes`,
+ `ocf`,
+ `oid`,
+ `onenote`,
+ `onenote-cmd`,
+ `opaquelocktoken`,
+ `openpgp4fpr`,
+ `pack`,
+ `palm`,
+ `paparazzi`,
+ `pkcs11`,
+ `platform`,
+ `pop`,
+ `pres`,
+ `prospero`,
+ `proxy`,
+ `pwid`,
+ `psyc`,
+ `qb`,
+ `query`,
+ `redis`,
+ `rediss`,
+ `reload`,
+ `res`,
+ `resource`,
+ `rmi`,
+ `rsync`,
+ `rtmfp`,
+ `rtmp`,
+ `rtsp`,
+ `rtsps`,
+ `rtspu`,
+ `secondlife`,
+ `service`,
+ `session`,
+ `sftp`,
+ `sgn`,
+ `shttp`,
+ `sieve`,
+ `simpleledger`,
+ `sip`,
+ `sips`,
+ `skype`,
+ `smb`,
+ `sms`,
+ `smtp`,
+ `snews`,
+ `snmp`,
+ `soap.beep`,
+ `soap.beeps`,
+ `soldat`,
+ `spiffe`,
+ `spotify`,
+ `ssh`,
+ `steam`,
+ `stun`,
+ `stuns`,
+ `submit`,
+ `svn`,
+ `tag`,
+ `teamspeak`,
+ `tel`,
+ `teliaeid`,
+ `telnet`,
+ `tftp`,
+ `things`,
+ `thismessage`,
+ `tip`,
+ `tn3270`,
+ `tool`,
+ `turn`,
+ `turns`,
+ `tv`,
+ `udp`,
+ `unreal`,
+ `urn`,
+ `ut2004`,
+ `v-event`,
+ `vemmi`,
+ `ventrilo`,
+ `videotex`,
+ `vnc`,
+ `view-source`,
+ `wais`,
+ `webcal`,
+ `wpid`,
+ `ws`,
+ `wss`,
+ `wtai`,
+ `wyciwyg`,
+ `xcon`,
+ `xcon-userid`,
+ `xfire`,
+ `xmlrpc.beep`,
+ `xmlrpc.beeps`,
+ `xmpp`,
+ `xri`,
+ `ymsgr`,
+ `z39.50`,
+ `z39.50r`,
+ `z39.50s`,
+}
diff --git a/vendor/github.com/mvdan/xurls/tlds.go b/vendor/github.com/mvdan/xurls/tlds.go
new file mode 100644
index 0000000000..084ab84d46
--- /dev/null
+++ b/vendor/github.com/mvdan/xurls/tlds.go
@@ -0,0 +1,1557 @@
+// Generated by tldsgen
+
+package xurls
+
+// TLDs is a sorted list of all public top-level domains.
+//
+// Sources:
+// * https://data.iana.org/TLD/tlds-alpha-by-domain.txt
+// * https://publicsuffix.org/list/effective_tld_names.dat
+var TLDs = []string{
+ `aaa`,
+ `aarp`,
+ `abarth`,
+ `abb`,
+ `abbott`,
+ `abbvie`,
+ `abc`,
+ `able`,
+ `abogado`,
+ `abudhabi`,
+ `ac`,
+ `academy`,
+ `accenture`,
+ `accountant`,
+ `accountants`,
+ `aco`,
+ `active`,
+ `actor`,
+ `ad`,
+ `adac`,
+ `ads`,
+ `adult`,
+ `ae`,
+ `aeg`,
+ `aero`,
+ `aetna`,
+ `af`,
+ `afamilycompany`,
+ `afl`,
+ `africa`,
+ `ag`,
+ `agakhan`,
+ `agency`,
+ `ai`,
+ `aig`,
+ `aigo`,
+ `airbus`,
+ `airforce`,
+ `airtel`,
+ `akdn`,
+ `al`,
+ `alfaromeo`,
+ `alibaba`,
+ `alipay`,
+ `allfinanz`,
+ `allstate`,
+ `ally`,
+ `alsace`,
+ `alstom`,
+ `am`,
+ `americanexpress`,
+ `americanfamily`,
+ `amex`,
+ `amfam`,
+ `amica`,
+ `amsterdam`,
+ `analytics`,
+ `android`,
+ `anquan`,
+ `anz`,
+ `ao`,
+ `aol`,
+ `apartments`,
+ `app`,
+ `apple`,
+ `aq`,
+ `aquarelle`,
+ `ar`,
+ `arab`,
+ `aramco`,
+ `archi`,
+ `army`,
+ `arpa`,
+ `art`,
+ `arte`,
+ `as`,
+ `asda`,
+ `asia`,
+ `associates`,
+ `at`,
+ `athleta`,
+ `attorney`,
+ `au`,
+ `auction`,
+ `audi`,
+ `audible`,
+ `audio`,
+ `auspost`,
+ `author`,
+ `auto`,
+ `autos`,
+ `avianca`,
+ `aw`,
+ `aws`,
+ `ax`,
+ `axa`,
+ `az`,
+ `azure`,
+ `ba`,
+ `baby`,
+ `baidu`,
+ `banamex`,
+ `bananarepublic`,
+ `band`,
+ `bank`,
+ `bar`,
+ `barcelona`,
+ `barclaycard`,
+ `barclays`,
+ `barefoot`,
+ `bargains`,
+ `baseball`,
+ `basketball`,
+ `bauhaus`,
+ `bayern`,
+ `bb`,
+ `bbc`,
+ `bbt`,
+ `bbva`,
+ `bcg`,
+ `bcn`,
+ `bd`,
+ `be`,
+ `beats`,
+ `beauty`,
+ `beer`,
+ `bentley`,
+ `berlin`,
+ `best`,
+ `bestbuy`,
+ `bet`,
+ `bf`,
+ `bg`,
+ `bh`,
+ `bharti`,
+ `bi`,
+ `bible`,
+ `bid`,
+ `bike`,
+ `bing`,
+ `bingo`,
+ `bio`,
+ `biz`,
+ `bj`,
+ `black`,
+ `blackfriday`,
+ `blanco`,
+ `blockbuster`,
+ `blog`,
+ `bloomberg`,
+ `blue`,
+ `bm`,
+ `bms`,
+ `bmw`,
+ `bn`,
+ `bnl`,
+ `bnpparibas`,
+ `bo`,
+ `boats`,
+ `boehringer`,
+ `bofa`,
+ `bom`,
+ `bond`,
+ `boo`,
+ `book`,
+ `booking`,
+ `bosch`,
+ `bostik`,
+ `boston`,
+ `bot`,
+ `boutique`,
+ `box`,
+ `br`,
+ `bradesco`,
+ `bridgestone`,
+ `broadway`,
+ `broker`,
+ `brother`,
+ `brussels`,
+ `bs`,
+ `bt`,
+ `budapest`,
+ `bugatti`,
+ `build`,
+ `builders`,
+ `business`,
+ `buy`,
+ `buzz`,
+ `bv`,
+ `bw`,
+ `by`,
+ `bz`,
+ `bzh`,
+ `ca`,
+ `cab`,
+ `cafe`,
+ `cal`,
+ `call`,
+ `calvinklein`,
+ `cam`,
+ `camera`,
+ `camp`,
+ `cancerresearch`,
+ `canon`,
+ `capetown`,
+ `capital`,
+ `capitalone`,
+ `car`,
+ `caravan`,
+ `cards`,
+ `care`,
+ `career`,
+ `careers`,
+ `cars`,
+ `cartier`,
+ `casa`,
+ `case`,
+ `caseih`,
+ `cash`,
+ `casino`,
+ `cat`,
+ `catering`,
+ `catholic`,
+ `cba`,
+ `cbn`,
+ `cbre`,
+ `cbs`,
+ `cc`,
+ `cd`,
+ `ceb`,
+ `center`,
+ `ceo`,
+ `cern`,
+ `cf`,
+ `cfa`,
+ `cfd`,
+ `cg`,
+ `ch`,
+ `chanel`,
+ `channel`,
+ `charity`,
+ `chase`,
+ `chat`,
+ `cheap`,
+ `chintai`,
+ `christmas`,
+ `chrome`,
+ `chrysler`,
+ `church`,
+ `ci`,
+ `cipriani`,
+ `circle`,
+ `cisco`,
+ `citadel`,
+ `citi`,
+ `citic`,
+ `city`,
+ `cityeats`,
+ `ck`,
+ `cl`,
+ `claims`,
+ `cleaning`,
+ `click`,
+ `clinic`,
+ `clinique`,
+ `clothing`,
+ `cloud`,
+ `club`,
+ `clubmed`,
+ `cm`,
+ `cn`,
+ `co`,
+ `coach`,
+ `codes`,
+ `coffee`,
+ `college`,
+ `cologne`,
+ `com`,
+ `comcast`,
+ `commbank`,
+ `community`,
+ `company`,
+ `compare`,
+ `computer`,
+ `comsec`,
+ `condos`,
+ `construction`,
+ `consulting`,
+ `contact`,
+ `contractors`,
+ `cooking`,
+ `cookingchannel`,
+ `cool`,
+ `coop`,
+ `corsica`,
+ `country`,
+ `coupon`,
+ `coupons`,
+ `courses`,
+ `cr`,
+ `credit`,
+ `creditcard`,
+ `creditunion`,
+ `cricket`,
+ `crown`,
+ `crs`,
+ `cruise`,
+ `cruises`,
+ `csc`,
+ `cu`,
+ `cuisinella`,
+ `cv`,
+ `cw`,
+ `cx`,
+ `cy`,
+ `cymru`,
+ `cyou`,
+ `cz`,
+ `dabur`,
+ `dad`,
+ `dance`,
+ `data`,
+ `date`,
+ `dating`,
+ `datsun`,
+ `day`,
+ `dclk`,
+ `dds`,
+ `de`,
+ `deal`,
+ `dealer`,
+ `deals`,
+ `degree`,
+ `delivery`,
+ `dell`,
+ `deloitte`,
+ `delta`,
+ `democrat`,
+ `dental`,
+ `dentist`,
+ `desi`,
+ `design`,
+ `dev`,
+ `dhl`,
+ `diamonds`,
+ `diet`,
+ `digital`,
+ `direct`,
+ `directory`,
+ `discount`,
+ `discover`,
+ `dish`,
+ `diy`,
+ `dj`,
+ `dk`,
+ `dm`,
+ `dnp`,
+ `do`,
+ `docs`,
+ `doctor`,
+ `dodge`,
+ `dog`,
+ `doha`,
+ `domains`,
+ `dot`,
+ `download`,
+ `drive`,
+ `dtv`,
+ `dubai`,
+ `duck`,
+ `dunlop`,
+ `duns`,
+ `dupont`,
+ `durban`,
+ `dvag`,
+ `dvr`,
+ `dz`,
+ `earth`,
+ `eat`,
+ `ec`,
+ `eco`,
+ `edeka`,
+ `edu`,
+ `education`,
+ `ee`,
+ `eg`,
+ `email`,
+ `emerck`,
+ `energy`,
+ `engineer`,
+ `engineering`,
+ `enterprises`,
+ `epost`,
+ `epson`,
+ `equipment`,
+ `er`,
+ `ericsson`,
+ `erni`,
+ `es`,
+ `esq`,
+ `estate`,
+ `esurance`,
+ `et`,
+ `etisalat`,
+ `eu`,
+ `eurovision`,
+ `eus`,
+ `events`,
+ `everbank`,
+ `exchange`,
+ `expert`,
+ `exposed`,
+ `express`,
+ `extraspace`,
+ `fage`,
+ `fail`,
+ `fairwinds`,
+ `faith`,
+ `family`,
+ `fan`,
+ `fans`,
+ `farm`,
+ `farmers`,
+ `fashion`,
+ `fast`,
+ `fedex`,
+ `feedback`,
+ `ferrari`,
+ `ferrero`,
+ `fi`,
+ `fiat`,
+ `fidelity`,
+ `fido`,
+ `film`,
+ `final`,
+ `finance`,
+ `financial`,
+ `fire`,
+ `firestone`,
+ `firmdale`,
+ `fish`,
+ `fishing`,
+ `fit`,
+ `fitness`,
+ `fj`,
+ `fk`,
+ `flickr`,
+ `flights`,
+ `flir`,
+ `florist`,
+ `flowers`,
+ `fly`,
+ `fm`,
+ `fo`,
+ `foo`,
+ `food`,
+ `foodnetwork`,
+ `football`,
+ `ford`,
+ `forex`,
+ `forsale`,
+ `forum`,
+ `foundation`,
+ `fox`,
+ `fr`,
+ `free`,
+ `fresenius`,
+ `frl`,
+ `frogans`,
+ `frontdoor`,
+ `frontier`,
+ `ftr`,
+ `fujitsu`,
+ `fujixerox`,
+ `fun`,
+ `fund`,
+ `furniture`,
+ `futbol`,
+ `fyi`,
+ `ga`,
+ `gal`,
+ `gallery`,
+ `gallo`,
+ `gallup`,
+ `game`,
+ `games`,
+ `gap`,
+ `garden`,
+ `gb`,
+ `gbiz`,
+ `gd`,
+ `gdn`,
+ `ge`,
+ `gea`,
+ `gent`,
+ `genting`,
+ `george`,
+ `gf`,
+ `gg`,
+ `ggee`,
+ `gh`,
+ `gi`,
+ `gift`,
+ `gifts`,
+ `gives`,
+ `giving`,
+ `gl`,
+ `glade`,
+ `glass`,
+ `gle`,
+ `global`,
+ `globo`,
+ `gm`,
+ `gmail`,
+ `gmbh`,
+ `gmo`,
+ `gmx`,
+ `gn`,
+ `godaddy`,
+ `gold`,
+ `goldpoint`,
+ `golf`,
+ `goo`,
+ `goodyear`,
+ `goog`,
+ `google`,
+ `gop`,
+ `got`,
+ `gov`,
+ `gp`,
+ `gq`,
+ `gr`,
+ `grainger`,
+ `graphics`,
+ `gratis`,
+ `green`,
+ `gripe`,
+ `grocery`,
+ `group`,
+ `gs`,
+ `gt`,
+ `gu`,
+ `guardian`,
+ `gucci`,
+ `guge`,
+ `guide`,
+ `guitars`,
+ `guru`,
+ `gw`,
+ `gy`,
+ `hair`,
+ `hamburg`,
+ `hangout`,
+ `haus`,
+ `hbo`,
+ `hdfc`,
+ `hdfcbank`,
+ `health`,
+ `healthcare`,
+ `help`,
+ `helsinki`,
+ `here`,
+ `hermes`,
+ `hgtv`,
+ `hiphop`,
+ `hisamitsu`,
+ `hitachi`,
+ `hiv`,
+ `hk`,
+ `hkt`,
+ `hm`,
+ `hn`,
+ `hockey`,
+ `holdings`,
+ `holiday`,
+ `homedepot`,
+ `homegoods`,
+ `homes`,
+ `homesense`,
+ `honda`,
+ `honeywell`,
+ `horse`,
+ `hospital`,
+ `host`,
+ `hosting`,
+ `hot`,
+ `hoteles`,
+ `hotels`,
+ `hotmail`,
+ `house`,
+ `how`,
+ `hr`,
+ `hsbc`,
+ `ht`,
+ `hu`,
+ `hughes`,
+ `hyatt`,
+ `hyundai`,
+ `ibm`,
+ `icbc`,
+ `ice`,
+ `icu`,
+ `id`,
+ `ie`,
+ `ieee`,
+ `ifm`,
+ `ikano`,
+ `il`,
+ `im`,
+ `imamat`,
+ `imdb`,
+ `immo`,
+ `immobilien`,
+ `in`,
+ `inc`,
+ `industries`,
+ `infiniti`,
+ `info`,
+ `ing`,
+ `ink`,
+ `institute`,
+ `insurance`,
+ `insure`,
+ `int`,
+ `intel`,
+ `international`,
+ `intuit`,
+ `investments`,
+ `io`,
+ `ipiranga`,
+ `iq`,
+ `ir`,
+ `irish`,
+ `is`,
+ `iselect`,
+ `ismaili`,
+ `ist`,
+ `istanbul`,
+ `it`,
+ `itau`,
+ `itv`,
+ `iveco`,
+ `jaguar`,
+ `java`,
+ `jcb`,
+ `jcp`,
+ `je`,
+ `jeep`,
+ `jetzt`,
+ `jewelry`,
+ `jio`,
+ `jll`,
+ `jm`,
+ `jmp`,
+ `jnj`,
+ `jo`,
+ `jobs`,
+ `joburg`,
+ `jot`,
+ `joy`,
+ `jp`,
+ `jpmorgan`,
+ `jprs`,
+ `juegos`,
+ `juniper`,
+ `kaufen`,
+ `kddi`,
+ `ke`,
+ `kerryhotels`,
+ `kerrylogistics`,
+ `kerryproperties`,
+ `kfh`,
+ `kg`,
+ `kh`,
+ `ki`,
+ `kia`,
+ `kim`,
+ `kinder`,
+ `kindle`,
+ `kitchen`,
+ `kiwi`,
+ `km`,
+ `kn`,
+ `koeln`,
+ `komatsu`,
+ `kosher`,
+ `kp`,
+ `kpmg`,
+ `kpn`,
+ `kr`,
+ `krd`,
+ `kred`,
+ `kuokgroup`,
+ `kw`,
+ `ky`,
+ `kyoto`,
+ `kz`,
+ `la`,
+ `lacaixa`,
+ `ladbrokes`,
+ `lamborghini`,
+ `lamer`,
+ `lancaster`,
+ `lancia`,
+ `lancome`,
+ `land`,
+ `landrover`,
+ `lanxess`,
+ `lasalle`,
+ `lat`,
+ `latino`,
+ `latrobe`,
+ `law`,
+ `lawyer`,
+ `lb`,
+ `lc`,
+ `lds`,
+ `lease`,
+ `leclerc`,
+ `lefrak`,
+ `legal`,
+ `lego`,
+ `lexus`,
+ `lgbt`,
+ `li`,
+ `liaison`,
+ `lidl`,
+ `life`,
+ `lifeinsurance`,
+ `lifestyle`,
+ `lighting`,
+ `like`,
+ `lilly`,
+ `limited`,
+ `limo`,
+ `lincoln`,
+ `linde`,
+ `link`,
+ `lipsy`,
+ `live`,
+ `living`,
+ `lixil`,
+ `lk`,
+ `llc`,
+ `loan`,
+ `loans`,
+ `locker`,
+ `locus`,
+ `loft`,
+ `lol`,
+ `london`,
+ `lotte`,
+ `lotto`,
+ `love`,
+ `lpl`,
+ `lplfinancial`,
+ `lr`,
+ `ls`,
+ `lt`,
+ `ltd`,
+ `ltda`,
+ `lu`,
+ `lundbeck`,
+ `lupin`,
+ `luxe`,
+ `luxury`,
+ `lv`,
+ `ly`,
+ `ma`,
+ `macys`,
+ `madrid`,
+ `maif`,
+ `maison`,
+ `makeup`,
+ `man`,
+ `management`,
+ `mango`,
+ `map`,
+ `market`,
+ `marketing`,
+ `markets`,
+ `marriott`,
+ `marshalls`,
+ `maserati`,
+ `mattel`,
+ `mba`,
+ `mc`,
+ `mckinsey`,
+ `md`,
+ `me`,
+ `med`,
+ `media`,
+ `meet`,
+ `melbourne`,
+ `meme`,
+ `memorial`,
+ `men`,
+ `menu`,
+ `merckmsd`,
+ `metlife`,
+ `mg`,
+ `mh`,
+ `miami`,
+ `microsoft`,
+ `mil`,
+ `mini`,
+ `mint`,
+ `mit`,
+ `mitsubishi`,
+ `mk`,
+ `ml`,
+ `mlb`,
+ `mls`,
+ `mm`,
+ `mma`,
+ `mn`,
+ `mo`,
+ `mobi`,
+ `mobile`,
+ `mobily`,
+ `moda`,
+ `moe`,
+ `moi`,
+ `mom`,
+ `monash`,
+ `money`,
+ `monster`,
+ `mopar`,
+ `mormon`,
+ `mortgage`,
+ `moscow`,
+ `moto`,
+ `motorcycles`,
+ `mov`,
+ `movie`,
+ `movistar`,
+ `mp`,
+ `mq`,
+ `mr`,
+ `ms`,
+ `msd`,
+ `mt`,
+ `mtn`,
+ `mtr`,
+ `mu`,
+ `museum`,
+ `mutual`,
+ `mv`,
+ `mw`,
+ `mx`,
+ `my`,
+ `mz`,
+ `na`,
+ `nab`,
+ `nadex`,
+ `nagoya`,
+ `name`,
+ `nationwide`,
+ `natura`,
+ `navy`,
+ `nba`,
+ `nc`,
+ `ne`,
+ `nec`,
+ `net`,
+ `netbank`,
+ `netflix`,
+ `network`,
+ `neustar`,
+ `new`,
+ `newholland`,
+ `news`,
+ `next`,
+ `nextdirect`,
+ `nexus`,
+ `nf`,
+ `nfl`,
+ `ng`,
+ `ngo`,
+ `nhk`,
+ `ni`,
+ `nico`,
+ `nike`,
+ `nikon`,
+ `ninja`,
+ `nissan`,
+ `nissay`,
+ `nl`,
+ `no`,
+ `nokia`,
+ `northwesternmutual`,
+ `norton`,
+ `now`,
+ `nowruz`,
+ `nowtv`,
+ `np`,
+ `nr`,
+ `nra`,
+ `nrw`,
+ `ntt`,
+ `nu`,
+ `nyc`,
+ `nz`,
+ `obi`,
+ `observer`,
+ `off`,
+ `office`,
+ `okinawa`,
+ `olayan`,
+ `olayangroup`,
+ `oldnavy`,
+ `ollo`,
+ `om`,
+ `omega`,
+ `one`,
+ `ong`,
+ `onion`,
+ `onl`,
+ `online`,
+ `onyourside`,
+ `ooo`,
+ `open`,
+ `oracle`,
+ `orange`,
+ `org`,
+ `organic`,
+ `origins`,
+ `osaka`,
+ `otsuka`,
+ `ott`,
+ `ovh`,
+ `pa`,
+ `page`,
+ `panasonic`,
+ `paris`,
+ `pars`,
+ `partners`,
+ `parts`,
+ `party`,
+ `passagens`,
+ `pay`,
+ `pccw`,
+ `pe`,
+ `pet`,
+ `pf`,
+ `pfizer`,
+ `pg`,
+ `ph`,
+ `pharmacy`,
+ `phd`,
+ `philips`,
+ `phone`,
+ `photo`,
+ `photography`,
+ `photos`,
+ `physio`,
+ `piaget`,
+ `pics`,
+ `pictet`,
+ `pictures`,
+ `pid`,
+ `pin`,
+ `ping`,
+ `pink`,
+ `pioneer`,
+ `pizza`,
+ `pk`,
+ `pl`,
+ `place`,
+ `play`,
+ `playstation`,
+ `plumbing`,
+ `plus`,
+ `pm`,
+ `pn`,
+ `pnc`,
+ `pohl`,
+ `poker`,
+ `politie`,
+ `porn`,
+ `post`,
+ `pr`,
+ `pramerica`,
+ `praxi`,
+ `press`,
+ `prime`,
+ `pro`,
+ `prod`,
+ `productions`,
+ `prof`,
+ `progressive`,
+ `promo`,
+ `properties`,
+ `property`,
+ `protection`,
+ `pru`,
+ `prudential`,
+ `ps`,
+ `pt`,
+ `pub`,
+ `pw`,
+ `pwc`,
+ `py`,
+ `qa`,
+ `qpon`,
+ `quebec`,
+ `quest`,
+ `qvc`,
+ `racing`,
+ `radio`,
+ `raid`,
+ `re`,
+ `read`,
+ `realestate`,
+ `realtor`,
+ `realty`,
+ `recipes`,
+ `red`,
+ `redstone`,
+ `redumbrella`,
+ `rehab`,
+ `reise`,
+ `reisen`,
+ `reit`,
+ `reliance`,
+ `ren`,
+ `rent`,
+ `rentals`,
+ `repair`,
+ `report`,
+ `republican`,
+ `rest`,
+ `restaurant`,
+ `review`,
+ `reviews`,
+ `rexroth`,
+ `rich`,
+ `richardli`,
+ `ricoh`,
+ `rightathome`,
+ `ril`,
+ `rio`,
+ `rip`,
+ `rmit`,
+ `ro`,
+ `rocher`,
+ `rocks`,
+ `rodeo`,
+ `rogers`,
+ `room`,
+ `rs`,
+ `rsvp`,
+ `ru`,
+ `rugby`,
+ `ruhr`,
+ `run`,
+ `rw`,
+ `rwe`,
+ `ryukyu`,
+ `sa`,
+ `saarland`,
+ `safe`,
+ `safety`,
+ `sakura`,
+ `sale`,
+ `salon`,
+ `samsclub`,
+ `samsung`,
+ `sandvik`,
+ `sandvikcoromant`,
+ `sanofi`,
+ `sap`,
+ `sarl`,
+ `sas`,
+ `save`,
+ `saxo`,
+ `sb`,
+ `sbi`,
+ `sbs`,
+ `sc`,
+ `sca`,
+ `scb`,
+ `schaeffler`,
+ `schmidt`,
+ `scholarships`,
+ `school`,
+ `schule`,
+ `schwarz`,
+ `science`,
+ `scjohnson`,
+ `scor`,
+ `scot`,
+ `sd`,
+ `se`,
+ `search`,
+ `seat`,
+ `secure`,
+ `security`,
+ `seek`,
+ `select`,
+ `sener`,
+ `services`,
+ `ses`,
+ `seven`,
+ `sew`,
+ `sex`,
+ `sexy`,
+ `sfr`,
+ `sg`,
+ `sh`,
+ `shangrila`,
+ `sharp`,
+ `shaw`,
+ `shell`,
+ `shia`,
+ `shiksha`,
+ `shoes`,
+ `shop`,
+ `shopping`,
+ `shouji`,
+ `show`,
+ `showtime`,
+ `shriram`,
+ `si`,
+ `silk`,
+ `sina`,
+ `singles`,
+ `site`,
+ `sj`,
+ `sk`,
+ `ski`,
+ `skin`,
+ `sky`,
+ `skype`,
+ `sl`,
+ `sling`,
+ `sm`,
+ `smart`,
+ `smile`,
+ `sn`,
+ `sncf`,
+ `so`,
+ `soccer`,
+ `social`,
+ `softbank`,
+ `software`,
+ `sohu`,
+ `solar`,
+ `solutions`,
+ `song`,
+ `sony`,
+ `soy`,
+ `space`,
+ `spiegel`,
+ `sport`,
+ `spot`,
+ `spreadbetting`,
+ `sr`,
+ `srl`,
+ `srt`,
+ `st`,
+ `stada`,
+ `staples`,
+ `star`,
+ `starhub`,
+ `statebank`,
+ `statefarm`,
+ `statoil`,
+ `stc`,
+ `stcgroup`,
+ `stockholm`,
+ `storage`,
+ `store`,
+ `stream`,
+ `studio`,
+ `study`,
+ `style`,
+ `su`,
+ `sucks`,
+ `supplies`,
+ `supply`,
+ `support`,
+ `surf`,
+ `surgery`,
+ `suzuki`,
+ `sv`,
+ `swatch`,
+ `swiftcover`,
+ `swiss`,
+ `sx`,
+ `sy`,
+ `sydney`,
+ `symantec`,
+ `systems`,
+ `sz`,
+ `tab`,
+ `taipei`,
+ `talk`,
+ `taobao`,
+ `target`,
+ `tatamotors`,
+ `tatar`,
+ `tattoo`,
+ `tax`,
+ `taxi`,
+ `tc`,
+ `tci`,
+ `td`,
+ `tdk`,
+ `team`,
+ `tech`,
+ `technology`,
+ `tel`,
+ `telefonica`,
+ `temasek`,
+ `tennis`,
+ `teva`,
+ `tf`,
+ `tg`,
+ `th`,
+ `thd`,
+ `theater`,
+ `theatre`,
+ `tiaa`,
+ `tickets`,
+ `tienda`,
+ `tiffany`,
+ `tips`,
+ `tires`,
+ `tirol`,
+ `tj`,
+ `tjmaxx`,
+ `tjx`,
+ `tk`,
+ `tkmaxx`,
+ `tl`,
+ `tm`,
+ `tmall`,
+ `tn`,
+ `to`,
+ `today`,
+ `tokyo`,
+ `tools`,
+ `top`,
+ `toray`,
+ `toshiba`,
+ `total`,
+ `tours`,
+ `town`,
+ `toyota`,
+ `toys`,
+ `tr`,
+ `trade`,
+ `trading`,
+ `training`,
+ `travel`,
+ `travelchannel`,
+ `travelers`,
+ `travelersinsurance`,
+ `trust`,
+ `trv`,
+ `tt`,
+ `tube`,
+ `tui`,
+ `tunes`,
+ `tushu`,
+ `tv`,
+ `tvs`,
+ `tw`,
+ `tz`,
+ `ua`,
+ `ubank`,
+ `ubs`,
+ `uconnect`,
+ `ug`,
+ `uk`,
+ `unicom`,
+ `university`,
+ `uno`,
+ `uol`,
+ `ups`,
+ `us`,
+ `uy`,
+ `uz`,
+ `va`,
+ `vacations`,
+ `vana`,
+ `vanguard`,
+ `vc`,
+ `ve`,
+ `vegas`,
+ `ventures`,
+ `verisign`,
+ `vermögensberater`,
+ `vermögensberatung`,
+ `versicherung`,
+ `vet`,
+ `vg`,
+ `vi`,
+ `viajes`,
+ `video`,
+ `vig`,
+ `viking`,
+ `villas`,
+ `vin`,
+ `vip`,
+ `virgin`,
+ `visa`,
+ `vision`,
+ `vistaprint`,
+ `viva`,
+ `vivo`,
+ `vlaanderen`,
+ `vn`,
+ `vodka`,
+ `volkswagen`,
+ `volvo`,
+ `vote`,
+ `voting`,
+ `voto`,
+ `voyage`,
+ `vu`,
+ `vuelos`,
+ `wales`,
+ `walmart`,
+ `walter`,
+ `wang`,
+ `wanggou`,
+ `warman`,
+ `watch`,
+ `watches`,
+ `weather`,
+ `weatherchannel`,
+ `webcam`,
+ `weber`,
+ `website`,
+ `wed`,
+ `wedding`,
+ `weibo`,
+ `weir`,
+ `wf`,
+ `whoswho`,
+ `wien`,
+ `wiki`,
+ `williamhill`,
+ `win`,
+ `windows`,
+ `wine`,
+ `winners`,
+ `wme`,
+ `wolterskluwer`,
+ `woodside`,
+ `work`,
+ `works`,
+ `world`,
+ `wow`,
+ `ws`,
+ `wtc`,
+ `wtf`,
+ `xbox`,
+ `xerox`,
+ `xfinity`,
+ `xihuan`,
+ `xin`,
+ `xxx`,
+ `xyz`,
+ `yachts`,
+ `yahoo`,
+ `yamaxun`,
+ `yandex`,
+ `ye`,
+ `yodobashi`,
+ `yoga`,
+ `yokohama`,
+ `you`,
+ `youtube`,
+ `yt`,
+ `yun`,
+ `za`,
+ `zappos`,
+ `zara`,
+ `zero`,
+ `zip`,
+ `zippo`,
+ `zm`,
+ `zone`,
+ `zuerich`,
+ `zw`,
+ `ελ`,
+ `бг`,
+ `бел`,
+ `дети`,
+ `ею`,
+ `католик`,
+ `ком`,
+ `мкд`,
+ `мон`,
+ `москва`,
+ `онлайн`,
+ `орг`,
+ `рус`,
+ `рф`,
+ `сайт`,
+ `срб`,
+ `укр`,
+ `қаз`,
+ `հայ`,
+ `קום`,
+ `ابوظبي`,
+ `اتصالات`,
+ `ارامكو`,
+ `الاردن`,
+ `الجزائر`,
+ `السعودية`,
+ `السعوديه`,
+ `السعودیة`,
+ `السعودیۃ`,
+ `العليان`,
+ `المغرب`,
+ `اليمن`,
+ `امارات`,
+ `ايران`,
+ `ایران`,
+ `بارت`,
+ `بازار`,
+ `بيتك`,
+ `بھارت`,
+ `تونس`,
+ `سودان`,
+ `سوريا`,
+ `سورية`,
+ `شبكة`,
+ `عراق`,
+ `عرب`,
+ `عمان`,
+ `فلسطين`,
+ `قطر`,
+ `كاثوليك`,
+ `كوم`,
+ `مصر`,
+ `مليسيا`,
+ `موبايلي`,
+ `موقع`,
+ `همراه`,
+ `پاكستان`,
+ `پاکستان`,
+ `ڀارت`,
+ `कॉम`,
+ `नेट`,
+ `भारत`,
+ `भारतम्`,
+ `भारोत`,
+ `संगठन`,
+ `বাংলা`,
+ `ভারত`,
+ `ভাৰত`,
+ `ਭਾਰਤ`,
+ `ભારત`,
+ `ଭାରତ`,
+ `இந்தியா`,
+ `இலங்கை`,
+ `சிங்கப்பூர்`,
+ `భారత్`,
+ `ಭಾರತ`,
+ `ഭാരതം`,
+ `ලංකා`,
+ `คอม`,
+ `ไทย`,
+ `გე`,
+ `みんな`,
+ `クラウド`,
+ `グーグル`,
+ `コム`,
+ `ストア`,
+ `セール`,
+ `ファッション`,
+ `ポイント`,
+ `世界`,
+ `中信`,
+ `中国`,
+ `中國`,
+ `中文网`,
+ `企业`,
+ `佛山`,
+ `信息`,
+ `健康`,
+ `八卦`,
+ `公司`,
+ `公益`,
+ `台湾`,
+ `台灣`,
+ `商城`,
+ `商店`,
+ `商标`,
+ `嘉里`,
+ `嘉里大酒店`,
+ `在线`,
+ `大众汽车`,
+ `大拿`,
+ `天主教`,
+ `娱乐`,
+ `家電`,
+ `工行`,
+ `广东`,
+ `微博`,
+ `慈善`,
+ `我爱你`,
+ `手机`,
+ `手表`,
+ `招聘`,
+ `政务`,
+ `政府`,
+ `新加坡`,
+ `新闻`,
+ `时尚`,
+ `書籍`,
+ `机构`,
+ `淡马锡`,
+ `游戏`,
+ `澳門`,
+ `澳门`,
+ `点看`,
+ `珠宝`,
+ `移动`,
+ `组织机构`,
+ `网址`,
+ `网店`,
+ `网站`,
+ `网络`,
+ `联通`,
+ `臺灣`,
+ `诺基亚`,
+ `谷歌`,
+ `购物`,
+ `通販`,
+ `集团`,
+ `電訊盈科`,
+ `飞利浦`,
+ `食品`,
+ `餐厅`,
+ `香格里拉`,
+ `香港`,
+ `닷넷`,
+ `닷컴`,
+ `삼성`,
+ `한국`,
+}
diff --git a/vendor/github.com/mvdan/xurls/tlds_pseudo.go b/vendor/github.com/mvdan/xurls/tlds_pseudo.go
new file mode 100644
index 0000000000..94c67d15b0
--- /dev/null
+++ b/vendor/github.com/mvdan/xurls/tlds_pseudo.go
@@ -0,0 +1,24 @@
+// Copyright (c) 2015, Daniel Martí <mvdan@mvdan.cc>
+// See LICENSE for licensing information
+
+package xurls
+
+// PseudoTLDs is a sorted list of some widely used unofficial TLDs.
+//
+// Sources:
+// * https://en.wikipedia.org/wiki/Pseudo-top-level_domain
+// * https://en.wikipedia.org/wiki/Category:Pseudo-top-level_domains
+// * https://tools.ietf.org/html/draft-grothoff-iesg-special-use-p2p-names-00
+// * https://www.iana.org/assignments/special-use-domain-names/special-use-domain-names.xhtml
+var PseudoTLDs = []string{
+ `bit`, // Namecoin
+ `example`, // Example domain
+ `exit`, // Tor exit node
+ `gnu`, // GNS by public key
+ `i2p`, // I2P network
+ `invalid`, // Invalid domain
+ `local`, // Local network
+ `localhost`, // Local network
+ `test`, // Test domain
+ `zkey`, // GNS domain name
+}
diff --git a/vendor/github.com/mvdan/xurls/xurls.go b/vendor/github.com/mvdan/xurls/xurls.go
new file mode 100644
index 0000000000..d6279ae60b
--- /dev/null
+++ b/vendor/github.com/mvdan/xurls/xurls.go
@@ -0,0 +1,107 @@
+// Copyright (c) 2015, Daniel Martí <mvdan@mvdan.cc>
+// See LICENSE for licensing information
+
+// Package xurls extracts urls from plain text using regular expressions.
+package xurls
+
+import (
+ "bytes"
+ "regexp"
+)
+
+//go:generate go run generate/tldsgen/main.go
+//go:generate go run generate/schemesgen/main.go
+
+const (
+ letter = `\p{L}`
+ mark = `\p{M}`
+ number = `\p{N}`
+ iriChar = letter + mark + number
+ currency = `\p{Sc}`
+ otherSymb = `\p{So}`
+ endChar = iriChar + `/\-+_&~*%=#` + currency + otherSymb
+ otherPunc = `\p{Po}`
+ midChar = endChar + `|` + otherPunc
+ wellParen = `\([` + midChar + `]*(\([` + midChar + `]*\)[` + midChar + `]*)*\)`
+ wellBrack = `\[[` + midChar + `]*(\[[` + midChar + `]*\][` + midChar + `]*)*\]`
+ wellBrace = `\{[` + midChar + `]*(\{[` + midChar + `]*\}[` + midChar + `]*)*\}`
+ wellAll = wellParen + `|` + wellBrack + `|` + wellBrace
+ pathCont = `([` + midChar + `]*(` + wellAll + `|[` + endChar + `])+)+`
+
+ iri = `[` + iriChar + `]([` + iriChar + `\-]*[` + iriChar + `])?`
+ domain = `(` + iri + `\.)+`
+ octet = `(25[0-5]|2[0-4][0-9]|1[0-9]{2}|[1-9][0-9]|[0-9])`
+ ipv4Addr = `\b` + octet + `\.` + octet + `\.` + octet + `\.` + octet + `\b`
+ ipv6Addr = `([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:([0-9a-fA-F]{1,4}:[0-9a-fA-F]{0,4}|:[0-9a-fA-F]{1,4})?|(:[0-9a-fA-F]{1,4}){0,2})|(:[0-9a-fA-F]{1,4}){0,3})|(:[0-9a-fA-F]{1,4}){0,4})|:(:[0-9a-fA-F]{1,4}){0,5})((:[0-9a-fA-F]{1,4}){2}|:(25[0-5]|(2[0-4]|1[0-9]|[1-9])?[0-9])(\.(25[0-5]|(2[0-4]|1[0-9]|[1-9])?[0-9])){3})|(([0-9a-fA-F]{1,4}:){1,6}|:):[0-9a-fA-F]{1,4}|([0-9a-fA-F]{1,4}:){7}:`
+ ipAddr = `(` + ipv4Addr + `|` + ipv6Addr + `)`
+ port = `(:[0-9]*)?`
+)
+
+// AnyScheme can be passed to StrictMatchingScheme to match any possibly valid
+// scheme, and not just the known ones.
+var AnyScheme = `([a-zA-Z][a-zA-Z.\-+]*://|` + anyOf(SchemesNoAuthority...) + `:)`
+
+// SchemesNoAuthority is a sorted list of some well-known url schemes that are
+// followed by ":" instead of "://".
+var SchemesNoAuthority = []string{
+ `bitcoin`, // Bitcoin
+ `file`, // Files
+ `magnet`, // Torrent magnets
+ `mailto`, // Mail
+ `sms`, // SMS
+ `tel`, // Telephone
+ `xmpp`, // XMPP
+}
+
+func anyOf(strs ...string) string {
+ var b bytes.Buffer
+ b.WriteByte('(')
+ for i, s := range strs {
+ if i != 0 {
+ b.WriteByte('|')
+ }
+ b.WriteString(regexp.QuoteMeta(s))
+ }
+ b.WriteByte(')')
+ return b.String()
+}
+
+func strictExp() string {
+ schemes := `(` + anyOf(Schemes...) + `://|` + anyOf(SchemesNoAuthority...) + `:)`
+ return `(?i)` + schemes + `(?-i)` + pathCont
+}
+
+func relaxedExp() string {
+ site := domain + `(?i)` + anyOf(append(TLDs, PseudoTLDs...)...) + `(?-i)`
+ hostName := `(` + site + `|` + ipAddr + `)`
+ webURL := hostName + port + `(/|/` + pathCont + `?|\b|$)`
+ return strictExp() + `|` + webURL
+}
+
+// Strict produces a regexp that matches any URL with a scheme in either the
+// Schemes or SchemesNoAuthority lists.
+func Strict() *regexp.Regexp {
+ re := regexp.MustCompile(strictExp())
+ re.Longest()
+ return re
+}
+
+// Relaxed produces a regexp that matches any URL matched by Strict, plus any
+// URL with no scheme.
+func Relaxed() *regexp.Regexp {
+ re := regexp.MustCompile(relaxedExp())
+ re.Longest()
+ return re
+}
+
+// StrictMatchingScheme produces a regexp similar to Strict, but requiring that
+// the scheme match the given regular expression. See AnyScheme too.
+func StrictMatchingScheme(exp string) (*regexp.Regexp, error) {
+ strictMatching := `(?i)(` + exp + `)(?-i)` + pathCont
+ re, err := regexp.Compile(strictMatching)
+ if err != nil {
+ return nil, err
+ }
+ re.Longest()
+ return re, nil
+}