@@ -120,7 +120,7 @@ require ( | |||
gopkg.in/src-d/go-git.v4 v4.13.1 | |||
gopkg.in/stretchr/testify.v1 v1.2.2 // indirect | |||
gopkg.in/testfixtures.v2 v2.5.0 | |||
mvdan.cc/xurls/v2 v2.0.0 | |||
mvdan.cc/xurls/v2 v2.1.0 | |||
strk.kbt.io/projects/go/libravatar v0.0.0-20191008002943-06d1c002b251 | |||
xorm.io/builder v0.3.6 | |||
xorm.io/core v0.7.2 |
@@ -812,8 +812,8 @@ honnef.co/go/tools v0.0.0-20190106161140-3f1c8253044a/go.mod h1:rf3lG4BRIbNafJWh | |||
honnef.co/go/tools v0.0.0-20190418001031-e561f6794a2a/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= | |||
honnef.co/go/tools v0.0.0-20190523083050-ea95bdfd59fc/go.mod h1:rf3lG4BRIbNafJWhAfAdb/ePZxsR/4RtNHQocxwk9r4= | |||
honnef.co/go/tools v0.0.1-2019.2.2/go.mod h1:a3bituU0lyd329TUQxRnasdCoJDkEUEAqEt0JzvZhAg= | |||
mvdan.cc/xurls/v2 v2.0.0 h1:r1zSOSNS/kqtpmATyMMMvaZ4/djsesbYz5kr0+qMRWc= | |||
mvdan.cc/xurls/v2 v2.0.0/go.mod h1:2/webFPYOXN9jp/lzuj0zuAVlF+9g4KPFJANH1oJhRU= | |||
mvdan.cc/xurls/v2 v2.1.0 h1:KaMb5GLhlcSX+e+qhbRJODnUUBvlw01jt4yrjFIHAuA= | |||
mvdan.cc/xurls/v2 v2.1.0/go.mod h1:5GrSd9rOnKOpZaji1OZLYL/yeAAtGDlo/cFe+8K5n8E= | |||
rsc.io/binaryregexp v0.2.0/go.mod h1:qTv7/COck+e2FymRvadv62gMdZztPaShugOCi3I+8D8= | |||
strk.kbt.io/projects/go/libravatar v0.0.0-20191008002943-06d1c002b251 h1:mUcz5b3FJbP5Cvdq7Khzn6J9OCUQJaBwgBkCR+MOwSs= | |||
strk.kbt.io/projects/go/libravatar v0.0.0-20191008002943-06d1c002b251/go.mod h1:FJGmPh3vz9jSos1L/F91iAgnC/aejc0wIIrF2ZwJxdY= |
@@ -610,7 +610,7 @@ gopkg.in/testfixtures.v2 | |||
gopkg.in/warnings.v0 | |||
# gopkg.in/yaml.v2 v2.2.2 | |||
gopkg.in/yaml.v2 | |||
# mvdan.cc/xurls/v2 v2.0.0 | |||
# mvdan.cc/xurls/v2 v2.1.0 | |||
mvdan.cc/xurls/v2 | |||
# strk.kbt.io/projects/go/libravatar v0.0.0-20191008002943-06d1c002b251 | |||
strk.kbt.io/projects/go/libravatar |
@@ -1,17 +0,0 @@ | |||
language: go | |||
go: | |||
- 1.10.x | |||
- 1.11.x | |||
go_import_path: mvdan.cc/xurls | |||
env: | |||
- GO111MODULE=on | |||
install: true | |||
script: | |||
- go get -t -d ./... | |||
- go build ./... | |||
- go test ./... |
@@ -1,18 +1,20 @@ | |||
# xurls | |||
[![GoDoc](https://godoc.org/mvdan.cc/xurls?status.svg)](https://godoc.org/mvdan.cc/xurls) | |||
[![Travis](https://travis-ci.org/mvdan/xurls.svg?branch=master)](https://travis-ci.org/mvdan/xurls) | |||
Extract urls from text using regular expressions. Requires Go 1.10.3 or later. | |||
Extract urls from text using regular expressions. Requires Go 1.12 or later. | |||
```go | |||
import "mvdan.cc/xurls/v2" | |||
func main() { | |||
xurls.Relaxed().FindString("Do gophers live in golang.org?") | |||
// "golang.org" | |||
xurls.Strict().FindAllString("foo.com is http://foo.com/.", -1) | |||
// []string{"http://foo.com/"} | |||
rxRelaxed := xurls.Relaxed() | |||
rxRelaxed.FindString("Do gophers live in golang.org?") // "golang.org" | |||
rxRelaxed.FindString("This string does not have a URL") // "" | |||
rxStrict := xurls.Strict() | |||
rxStrict.FindAllString("must have scheme: http://foo.com/.", -1) // []string{"http://foo.com/"} | |||
rxStrict.FindAllString("no scheme, no match: foo.com", -1) // []string{} | |||
} | |||
``` | |||
@@ -20,7 +22,9 @@ Note that the funcs compile regexes, so avoid calling them repeatedly. | |||
#### cmd/xurls | |||
go get -u mvdan.cc/xurls/v2/cmd/xurls | |||
To install the tool globally: | |||
go get mvdan.cc/xurls/cmd/xurls | |||
```shell | |||
$ echo "Do gophers live in http://golang.org?" | xurls |
@@ -1 +1,3 @@ | |||
module mvdan.cc/xurls/v2 | |||
go 1.13 |
@@ -12,13 +12,18 @@ var Schemes = []string{ | |||
`about`, | |||
`acap`, | |||
`acct`, | |||
`acd`, | |||
`acr`, | |||
`adiumxtra`, | |||
`adt`, | |||
`afp`, | |||
`afs`, | |||
`aim`, | |||
`amss`, | |||
`android`, | |||
`appdata`, | |||
`apt`, | |||
`ark`, | |||
`attachment`, | |||
`aw`, | |||
`barion`, | |||
@@ -28,8 +33,11 @@ var Schemes = []string{ | |||
`blob`, | |||
`bolo`, | |||
`browserext`, | |||
`calculator`, | |||
`callto`, | |||
`cap`, | |||
`cast`, | |||
`casts`, | |||
`chrome`, | |||
`chrome-extension`, | |||
`cid`, | |||
@@ -44,6 +52,7 @@ var Schemes = []string{ | |||
`conti`, | |||
`crid`, | |||
`cvs`, | |||
`dab`, | |||
`data`, | |||
`dav`, | |||
`diaspora`, | |||
@@ -54,6 +63,9 @@ var Schemes = []string{ | |||
`dlna-playsingle`, | |||
`dns`, | |||
`dntp`, | |||
`dpp`, | |||
`drm`, | |||
`drop`, | |||
`dtn`, | |||
`dvb`, | |||
`ed2k`, | |||
@@ -66,8 +78,11 @@ var Schemes = []string{ | |||
`file`, | |||
`filesystem`, | |||
`finger`, | |||
`first-run-pen-experience`, | |||
`fish`, | |||
`fm`, | |||
`ftp`, | |||
`fuchsia-pkg`, | |||
`geo`, | |||
`gg`, | |||
`git`, | |||
@@ -112,6 +127,8 @@ var Schemes = []string{ | |||
`lastfm`, | |||
`ldap`, | |||
`ldaps`, | |||
`leaptofrogans`, | |||
`lorawan`, | |||
`lvlt`, | |||
`magnet`, | |||
`mailserver`, | |||
@@ -129,9 +146,11 @@ var Schemes = []string{ | |||
`moz`, | |||
`ms-access`, | |||
`ms-browser-extension`, | |||
`ms-calculator`, | |||
`ms-drive-to`, | |||
`ms-enrollment`, | |||
`ms-excel`, | |||
`ms-eyecontrolspeech`, | |||
`ms-gamebarservices`, | |||
`ms-gamingoverlay`, | |||
`ms-getoffice`, | |||
@@ -141,6 +160,7 @@ var Schemes = []string{ | |||
`ms-lockscreencomponent-config`, | |||
`ms-media-stream-id`, | |||
`ms-mixedrealitycapture`, | |||
`ms-mobileplans`, | |||
`ms-officeapp`, | |||
`ms-people`, | |||
`ms-project`, | |||
@@ -186,6 +206,7 @@ var Schemes = []string{ | |||
`msnim`, | |||
`msrp`, | |||
`msrps`, | |||
`mss`, | |||
`mtqp`, | |||
`mumble`, | |||
`mupdate`, | |||
@@ -205,6 +226,7 @@ var Schemes = []string{ | |||
`pack`, | |||
`palm`, | |||
`paparazzi`, | |||
`payto`, | |||
`pkcs11`, | |||
`platform`, | |||
`pop`, | |||
@@ -213,6 +235,7 @@ var Schemes = []string{ | |||
`proxy`, | |||
`pwid`, | |||
`psyc`, | |||
`pttp`, | |||
`qb`, | |||
`query`, | |||
`redis`, |
@@ -24,7 +24,6 @@ var TLDs = []string{ | |||
`accountant`, | |||
`accountants`, | |||
`aco`, | |||
`active`, | |||
`actor`, | |||
`ad`, | |||
`adac`, | |||
@@ -154,7 +153,6 @@ var TLDs = []string{ | |||
`bj`, | |||
`black`, | |||
`blackfriday`, | |||
`blanco`, | |||
`blockbuster`, | |||
`blog`, | |||
`bloomberg`, | |||
@@ -163,7 +161,6 @@ var TLDs = []string{ | |||
`bms`, | |||
`bmw`, | |||
`bn`, | |||
`bnl`, | |||
`bnpparibas`, | |||
`bo`, | |||
`boats`, | |||
@@ -307,6 +304,7 @@ var TLDs = []string{ | |||
`coupon`, | |||
`coupons`, | |||
`courses`, | |||
`cpa`, | |||
`cr`, | |||
`credit`, | |||
`creditcard`, | |||
@@ -370,7 +368,6 @@ var TLDs = []string{ | |||
`doctor`, | |||
`dodge`, | |||
`dog`, | |||
`doha`, | |||
`domains`, | |||
`dot`, | |||
`download`, | |||
@@ -379,7 +376,6 @@ var TLDs = []string{ | |||
`dubai`, | |||
`duck`, | |||
`dunlop`, | |||
`duns`, | |||
`dupont`, | |||
`durban`, | |||
`dvag`, | |||
@@ -400,7 +396,6 @@ var TLDs = []string{ | |||
`engineer`, | |||
`engineering`, | |||
`enterprises`, | |||
`epost`, | |||
`epson`, | |||
`equipment`, | |||
`er`, | |||
@@ -496,6 +491,7 @@ var TLDs = []string{ | |||
`games`, | |||
`gap`, | |||
`garden`, | |||
`gay`, | |||
`gb`, | |||
`gbiz`, | |||
`gd`, | |||
@@ -588,7 +584,6 @@ var TLDs = []string{ | |||
`homes`, | |||
`homesense`, | |||
`honda`, | |||
`honeywell`, | |||
`horse`, | |||
`hospital`, | |||
`host`, | |||
@@ -642,7 +637,6 @@ var TLDs = []string{ | |||
`ir`, | |||
`irish`, | |||
`is`, | |||
`iselect`, | |||
`ismaili`, | |||
`ist`, | |||
`istanbul`, | |||
@@ -752,6 +746,7 @@ var TLDs = []string{ | |||
`lixil`, | |||
`lk`, | |||
`llc`, | |||
`llp`, | |||
`loan`, | |||
`loans`, | |||
`locker`, | |||
@@ -827,7 +822,6 @@ var TLDs = []string{ | |||
`mo`, | |||
`mobi`, | |||
`mobile`, | |||
`mobily`, | |||
`moda`, | |||
`moe`, | |||
`moi`, | |||
@@ -1161,21 +1155,19 @@ var TLDs = []string{ | |||
`sony`, | |||
`soy`, | |||
`space`, | |||
`spiegel`, | |||
`sport`, | |||
`spot`, | |||
`spreadbetting`, | |||
`sr`, | |||
`srl`, | |||
`srt`, | |||
`ss`, | |||
`st`, | |||
`stada`, | |||
`staples`, | |||
`star`, | |||
`starhub`, | |||
`statebank`, | |||
`statefarm`, | |||
`statoil`, | |||
`stc`, | |||
`stcgroup`, | |||
`stockholm`, | |||
@@ -1391,7 +1383,6 @@ var TLDs = []string{ | |||
`zara`, | |||
`zero`, | |||
`zip`, | |||
`zippo`, | |||
`zm`, | |||
`zone`, | |||
`zuerich`, | |||
@@ -1449,7 +1440,7 @@ var TLDs = []string{ | |||
`كوم`, | |||
`مصر`, | |||
`مليسيا`, | |||
`موبايلي`, | |||
`موريتانيا`, | |||
`موقع`, | |||
`همراه`, | |||
`پاكستان`, |
@@ -19,9 +19,9 @@ const ( | |||
iriChar = letter + mark + number | |||
currency = `\p{Sc}` | |||
otherSymb = `\p{So}` | |||
endChar = iriChar + `/\-+_&~*%=#` + currency + otherSymb | |||
endChar = iriChar + `/\-+&~%=#` + currency + otherSymb | |||
otherPunc = `\p{Po}` | |||
midChar = endChar + `|` + otherPunc | |||
midChar = endChar + "_*" + otherPunc | |||
wellParen = `\([` + midChar + `]*(\([` + midChar + `]*\)[` + midChar + `]*)*\)` | |||
wellBrack = `\[[` + midChar + `]*(\[[` + midChar + `]*\][` + midChar + `]*)*\]` | |||
wellBrace = `\{[` + midChar + `]*(\{[` + midChar + `]*\}[` + midChar + `]*)*\}` | |||
@@ -72,9 +72,11 @@ func strictExp() string { | |||
} | |||
func relaxedExp() string { | |||
site := domain + `(?i)` + anyOf(append(TLDs, PseudoTLDs...)...) + `(?-i)` | |||
punycode := `xn--[a-z0-9-]+` | |||
knownTLDs := anyOf(append(TLDs, PseudoTLDs...)...) | |||
site := domain + `(?i)(` + punycode + `|` + knownTLDs + `)(?-i)` | |||
hostName := `(` + site + `|` + ipAddr + `)` | |||
webURL := hostName + port + `(/|/` + pathCont + `?|\b|$)` | |||
webURL := hostName + port + `(/|/` + pathCont + `?|\b|(?m)$)` | |||
return strictExp() + `|` + webURL | |||
} | |||