* Update go-enry to v2.5.2tags/v1.13.0-rc1
@@ -37,7 +37,7 @@ require ( | |||
github.com/facebookgo/subset v0.0.0-20150612182917-8dac2c3c4870 // indirect | |||
github.com/gliderlabs/ssh v0.2.2 | |||
github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a // indirect | |||
github.com/go-enry/go-enry/v2 v2.3.0 | |||
github.com/go-enry/go-enry/v2 v2.5.2 | |||
github.com/go-git/go-billy/v5 v5.0.0 | |||
github.com/go-git/go-git/v5 v5.0.0 | |||
github.com/go-openapi/jsonreference v0.19.3 // indirect |
@@ -193,10 +193,10 @@ github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a h1:FQqo | |||
github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a/go.mod h1:/20jfyN9Y5QPEAprSgKAUr+glWDY39ZiUEAYOEv5dsE= | |||
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31 h1:gclg6gY70GLy3PbkQ1AERPfmLMMagS60DKF78eWwLn8= | |||
github.com/glycerine/goconvey v0.0.0-20190410193231-58a59202ab31/go.mod h1:Ogl1Tioa0aV7gstGFO7KhffUsb9M4ydbEbbxpcEDc24= | |||
github.com/go-enry/go-enry/v2 v2.3.0 h1:o8KwgY6uSplysrIpj+Y42J/xGPp90ogVpxE2Z3s8Unk= | |||
github.com/go-enry/go-enry/v2 v2.3.0/go.mod h1:+xFJwbqWi15bvqFHb2ELUWVRKFQtwB61+sDrkvvxxGI= | |||
github.com/go-enry/go-oniguruma v1.2.0 h1:oBO9XC1IDT9+AoWW5oFsa/7gFeOPacEqDbyXZKWXuDs= | |||
github.com/go-enry/go-oniguruma v1.2.0/go.mod h1:bWDhYP+S6xZQgiRL7wlTScFYBe023B6ilRZbCAD5Hf4= | |||
github.com/go-enry/go-enry/v2 v2.5.2 h1:3f3PFAO6JitWkPi1GQ5/m6Xu4gNL1U5soJ8QaYqJ0YQ= | |||
github.com/go-enry/go-enry/v2 v2.5.2/go.mod h1:GVzIiAytiS5uT/QiuakK7TF1u4xDab87Y8V5EJRpsIQ= | |||
github.com/go-enry/go-oniguruma v1.2.1 h1:k8aAMuJfMrqm/56SG2lV9Cfti6tC4x8673aHCcBk+eo= | |||
github.com/go-enry/go-oniguruma v1.2.1/go.mod h1:bWDhYP+S6xZQgiRL7wlTScFYBe023B6ilRZbCAD5Hf4= | |||
github.com/go-git/gcfg v1.5.0 h1:Q5ViNfGF8zFgyJWPqYwA7qGFoMTEiBmdlkcfRmpIMa4= | |||
github.com/go-git/gcfg v1.5.0/go.mod h1:5m20vg6GwYabIxaOonVkTdrILxQMpEShl1xiMF4ua+E= | |||
github.com/go-git/go-billy/v5 v5.0.0 h1:7NQHvd9FVid8VL4qVUMm8XifBK+2xCoZ2lSk0agRrHM= | |||
@@ -616,8 +616,6 @@ github.com/tinylib/msgp v1.1.0/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDW | |||
github.com/tinylib/msgp v1.1.2 h1:gWmO7n0Ys2RBEb7GPYB9Ujq8Mk5p2U08lRnmMcGy6BQ= | |||
github.com/tinylib/msgp v1.1.2/go.mod h1:+d+yLhGm8mzTaHzB+wgMYrodPfmZrzkirds8fDWklFE= | |||
github.com/tmc/grpc-websocket-proxy v0.0.0-20190109142713-0ad062ec5ee5/go.mod h1:ncp9v5uamzpCO7NfCPTXjqaC+bZgJeR0sMTm6dMHP7U= | |||
github.com/toqueteos/trie v1.0.0 h1:8i6pXxNUXNRAqP246iibb7w/pSFquNTQ+uNfriG7vlk= | |||
github.com/toqueteos/trie v1.0.0/go.mod h1:Ywk48QhEqhU1+DwhMkJ2x7eeGxDHiGkAdc9+0DYcbsM= | |||
github.com/toqueteos/webbrowser v1.2.0 h1:tVP/gpK69Fx+qMJKsLE7TD8LuGWPnEV71wBN9rrstGQ= | |||
github.com/toqueteos/webbrowser v1.2.0/go.mod h1:XWoZq4cyp9WeUeak7w7LXRUQf1F1ATJMir8RTqb4ayM= | |||
github.com/tstranex/u2f v1.0.0 h1:HhJkSzDDlVSVIVt7pDJwCHQj67k7A5EeBgPmeD+pVsQ= | |||
@@ -876,8 +874,6 @@ gopkg.in/testfixtures.v2 v2.5.0 h1:N08B7l2GzFQenyYbzqthDnKAA+cmb17iAZhhFxr7JHw= | |||
gopkg.in/testfixtures.v2 v2.5.0/go.mod h1:vyAq+MYCgNpR29qitQdLZhdbLFf4mR/2MFJRFoQZZ2M= | |||
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 h1:uRGJdciOHaEIrze2W8Q3AKkepLTh2hOroT7a+7czfdQ= | |||
gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= | |||
gopkg.in/toqueteos/substring.v1 v1.0.2 h1:urLqCeMm6x/eTuQa1oZerNw8N1KNOIp5hD5kGL7lFsE= | |||
gopkg.in/toqueteos/substring.v1 v1.0.2/go.mod h1:Eb2Z1UYehlVK8LYW2WBVR2rwbujsz3aX8XDrM1vbNew= | |||
gopkg.in/warnings.v0 v0.1.2 h1:wFXVbFY8DY5/xOe1ECiWdKCzZlxgshcYVNkBHstARME= | |||
gopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRNI= | |||
gopkg.in/yaml.v2 v2.0.0-20170812160011-eb3733d160e7/go.mod h1:JAlM8MvJe8wmxCU4Bli9HhUf9+ttbYbLASfIpnQbh74= |
@@ -10,8 +10,8 @@ import ( | |||
"github.com/go-enry/go-enry/v2" | |||
) | |||
// GetCodeLanguageWithCallback detects code language based on file name and content using callback | |||
func GetCodeLanguageWithCallback(filename string, contentFunc func() ([]byte, error)) string { | |||
// GetCodeLanguage detects code language based on file name and content | |||
func GetCodeLanguage(filename string, content []byte) string { | |||
if language, ok := enry.GetLanguageByExtension(filename); ok { | |||
return language | |||
} | |||
@@ -20,17 +20,9 @@ func GetCodeLanguageWithCallback(filename string, contentFunc func() ([]byte, er | |||
return language | |||
} | |||
content, err := contentFunc() | |||
if err != nil { | |||
if len(content) == 0 { | |||
return enry.OtherLanguage | |||
} | |||
return enry.GetLanguage(filepath.Base(filename), content) | |||
} | |||
// GetCodeLanguage detects code language based on file name and content | |||
func GetCodeLanguage(filename string, content []byte) string { | |||
return GetCodeLanguageWithCallback(filename, func() ([]byte, error) { | |||
return content, nil | |||
}) | |||
} |
@@ -50,11 +50,15 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]float32, e | |||
return nil | |||
} | |||
// If content can not be read just do detection by filename | |||
content, _ := readFile(f, fileSizeLimit) | |||
if enry.IsGenerated(f.Name, content) { | |||
return nil | |||
} | |||
// TODO: Use .gitattributes file for linguist overrides | |||
language := analyze.GetCodeLanguageWithCallback(f.Name, func() ([]byte, error) { | |||
return readFile(f, fileSizeLimit) | |||
}) | |||
language := analyze.GetCodeLanguage(f.Name, content) | |||
if language == enry.OtherLanguage || language == "" { | |||
return nil | |||
} |
@@ -1,26 +1,26 @@ | |||
# go-enry [![GoDoc](https://godoc.org/github.com/go-enry/go-enry?status.svg)](https://pkg.go.dev/github.com/go-enry/go-enry/v2) [![Test](https://github.com/go-enry/go-enry/workflows/Test/badge.svg)](https://github.com/go-enry/go-enry/actions?query=workflow%3ATest+branch%3Amaster) [![codecov](https://codecov.io/gh/go-enry/go-enry/branch/master/graph/badge.svg)](https://codecov.io/gh/go-enry/go-enry) | |||
Programming language detector and toolbox to ignore binary or vendored files. *enry*, started as a port to _Go_ of the original [Linguist](https://github.com/github/linguist) _Ruby_ library, that has an improved *2x performance*. | |||
* [CLI](#cli) | |||
* [Library](#library) | |||
* [Use cases](#use-cases) | |||
* [By filename](#by-filename) | |||
* [By text](#by-text) | |||
* [By file](#by-file) | |||
* [Filtering](#filtering-vendoring-binaries-etc) | |||
* [Coloring](#language-colors-and-groups) | |||
* [Languages](#languages) | |||
* [Go](#go) | |||
* [Java bindings](#java-bindings) | |||
* [Python bindings](#python-bindings) | |||
* [Divergences from linguist](#divergences-from-linguist) | |||
* [Benchmarks](#benchmarks) | |||
* [Why Enry?](#why-enry) | |||
* [Development](#development) | |||
* [Sync with github/linguist upstream](#sync-with-githublinguist-upstream) | |||
* [Misc](#misc) | |||
* [License](#license) | |||
Programming language detector and toolbox to ignore binary or vendored files. _enry_, started as a port to _Go_ of the original [Linguist](https://github.com/github/linguist) _Ruby_ library, that has an improved _2x performance_. | |||
- [CLI](#cli) | |||
- [Library](#library) | |||
- [Use cases](#use-cases) | |||
- [By filename](#by-filename) | |||
- [By text](#by-text) | |||
- [By file](#by-file) | |||
- [Filtering](#filtering-vendoring-binaries-etc) | |||
- [Coloring](#language-colors-and-groups) | |||
- [Languages](#languages) | |||
- [Go](#go) | |||
- [Java bindings](#java-bindings) | |||
- [Python bindings](#python-bindings) | |||
- [Divergences from linguist](#divergences-from-linguist) | |||
- [Benchmarks](#benchmarks) | |||
- [Why Enry?](#why-enry) | |||
- [Development](#development) | |||
- [Sync with github/linguist upstream](#sync-with-githublinguist-upstream) | |||
- [Misc](#misc) | |||
- [License](#license) | |||
# CLI | |||
@@ -28,50 +28,62 @@ The CLI binary is hosted in a separate repository [go-enry/enry](https://github. | |||
# Library | |||
*enry* is also a Go library for guessing a programming language that exposes API through FFI to multiple programming environments. | |||
_enry_ is also a Go library for guessing a programming language that exposes API through FFI to multiple programming environments. | |||
## Use cases | |||
*enry* guesses a programming language using a sequence of matching *strategies* that are | |||
applied progressively to narrow down the possible options. Each *strategy* varies on the type | |||
_enry_ guesses a programming language using a sequence of matching _strategies_ that are | |||
applied progressively to narrow down the possible options. Each _strategy_ varies on the type | |||
of input data that it needs to make a decision: file name, extension, the first line of the file, the full content of the file, etc. | |||
Depending on available input data, enry API can be roughly divided into the next categories or use cases. | |||
### By filename | |||
Next functions require only a name of the file to make a guess: | |||
- `GetLanguageByExtension` uses only file extension (wich may be ambiguous) | |||
- `GetLanguageByFilename` useful for cases like `.gitignore`, `.bashrc`, etc | |||
- all [filtering helpers](#filtering) | |||
Please note that such guesses are expected not to be very accurate. | |||
- `GetLanguageByExtension` uses only file extension (wich may be ambiguous) | |||
- `GetLanguageByFilename` useful for cases like `.gitignore`, `.bashrc`, etc | |||
- all [filtering helpers](#filtering) | |||
Please note that such guesses are expected not to be very accurate. | |||
### By text | |||
To make a guess only based on the content of the file or a text snippet, use | |||
- `GetLanguageByShebang` reads only the first line of text to identify the [shebang](https://en.wikipedia.org/wiki/Shebang_(Unix)). | |||
- `GetLanguageByModeline` for cases when Vim/Emacs modeline e.g. `/* vim: set ft=cpp: */` may be present at a head or a tail of the text. | |||
- `GetLanguageByClassifier` uses a Bayesian classifier trained on all the `./samples/` from Linguist. | |||
It usually is a last-resort strategy that is used to disambiguate the guess of the previous strategies, and thus it requires a list of "candidate" guesses. One can provide a list of all known languages - keys from the `data.LanguagesLogProbabilities` as possible candidates if more intelligent hypotheses are not available, at the price of possibly suboptimal accuracy. | |||
- `GetLanguageByShebang` reads only the first line of text to identify the [shebang](<https://en.wikipedia.org/wiki/Shebang_(Unix)>). | |||
- `GetLanguageByModeline` for cases when Vim/Emacs modeline e.g. `/* vim: set ft=cpp: */` may be present at a head or a tail of the text. | |||
- `GetLanguageByClassifier` uses a Bayesian classifier trained on all the `./samples/` from Linguist. | |||
It usually is a last-resort strategy that is used to disambiguate the guess of the previous strategies, and thus it requires a list of "candidate" guesses. One can provide a list of all known languages - keys from the `data.LanguagesLogProbabilities` as possible candidates if more intelligent hypotheses are not available, at the price of possibly suboptimal accuracy. | |||
### By file | |||
The most accurate guess would be one when both, the file name and the content are available: | |||
- `GetLanguagesByContent` only uses file extension and a set of regexp-based content heuristics. | |||
- `GetLanguages` uses the full set of matching strategies and is expected to be most accurate. | |||
- `GetLanguagesByContent` only uses file extension and a set of regexp-based content heuristics. | |||
- `GetLanguages` uses the full set of matching strategies and is expected to be most accurate. | |||
### Filtering: vendoring, binaries, etc | |||
*enry* expose a set of file-level helpers `Is*` to simplify filtering out the files that are less interesting for the purpose of source code analysis: | |||
- `IsBinary` | |||
- `IsVendor` | |||
- `IsConfiguration` | |||
- `IsDocumentation` | |||
- `IsDotFile` | |||
- `IsImage` | |||
_enry_ expose a set of file-level helpers `Is*` to simplify filtering out the files that are less interesting for the purpose of source code analysis: | |||
- `IsBinary` | |||
- `IsVendor` | |||
- `IsConfiguration` | |||
- `IsDocumentation` | |||
- `IsDotFile` | |||
- `IsImage` | |||
- `IsTest` | |||
- `IsGenerated` | |||
### Language colors and groups | |||
*enry* exposes function to get language color to use for example in presenting statistics in graphs: | |||
- `GetColor` | |||
- `GetLanguageGroup` can be used to group similar languages together e.g. for `Less` this function will return `CSS` | |||
_enry_ exposes function to get language color to use for example in presenting statistics in graphs: | |||
- `GetColor` | |||
- `GetLanguageGroup` can be used to group similar languages together e.g. for `Less` this function will return `CSS` | |||
## Languages | |||
@@ -136,39 +148,36 @@ Generated Python bindings using a C shared library and cffi are WIP under [src-d | |||
A library is going to be published on pypi as [enry](https://pypi.org/project/enry/) for | |||
macOS and linux platforms. Windows support is planned under [src-d/enry#150](https://github.com/src-d/enry/issues/150). | |||
Divergences from Linguist | |||
------------ | |||
## Divergences from Linguist | |||
The `enry` library is based on the data from `github/linguist` version **v7.9.0**. | |||
Parsing [linguist/samples](https://github.com/github/linguist/tree/master/samples) the following `enry` results are different from the Linguist: | |||
* [Heuristics for ".es" extension](https://github.com/github/linguist/blob/e761f9b013e5b61161481fcb898b59721ee40e3d/lib/linguist/heuristics.yml#L103) in JavaScript could not be parsed, due to unsupported backreference in RE2 regexp engine. | |||
- [Heuristics for ".es" extension](https://github.com/github/linguist/blob/e761f9b013e5b61161481fcb898b59721ee40e3d/lib/linguist/heuristics.yml#L103) in JavaScript could not be parsed, due to unsupported backreference in RE2 regexp engine. | |||
* [Heuristics for ".rno" extension](https://github.com/github/linguist/blob/3a1bd3c3d3e741a8aaec4704f782e06f5cd2a00d/lib/linguist/heuristics.yml#L365) in RUNOFF could not be parsed, due to unsupported lookahead in RE2 regexp engine. | |||
- [Heuristics for ".rno" extension](https://github.com/github/linguist/blob/3a1bd3c3d3e741a8aaec4704f782e06f5cd2a00d/lib/linguist/heuristics.yml#L365) in RUNOFF could not be parsed, due to unsupported lookahead in RE2 regexp engine. | |||
* [Heuristics for ".inc" extension](https://github.com/github/linguist/blob/f0e2d0d7f1ce600b2a5acccaef6b149c87d8b99c/lib/linguist/heuristics.yml#L222) in NASL could not be parsed, due to unsupported possessive quantifier in RE2 regexp engine. | |||
- [Heuristics for ".inc" extension](https://github.com/github/linguist/blob/f0e2d0d7f1ce600b2a5acccaef6b149c87d8b99c/lib/linguist/heuristics.yml#L222) in NASL could not be parsed, due to unsupported possessive quantifier in RE2 regexp engine. | |||
* As of [Linguist v5.3.2](https://github.com/github/linguist/releases/tag/v5.3.2) it is using [flex-based scanner in C for tokenization](https://github.com/github/linguist/pull/3846). Enry still uses [extract_token](https://github.com/github/linguist/pull/3846/files#diff-d5179df0b71620e3fac4535cd1368d15L60) regex-based algorithm. See [#193](https://github.com/src-d/enry/issues/193). | |||
- As of [Linguist v5.3.2](https://github.com/github/linguist/releases/tag/v5.3.2) it is using [flex-based scanner in C for tokenization](https://github.com/github/linguist/pull/3846). Enry still uses [extract_token](https://github.com/github/linguist/pull/3846/files#diff-d5179df0b71620e3fac4535cd1368d15L60) regex-based algorithm. See [#193](https://github.com/src-d/enry/issues/193). | |||
* Bayesian classifier can't distinguish "SQL" from "PLpgSQL. See [#194](https://github.com/src-d/enry/issues/194). | |||
- Bayesian classifier can't distinguish "SQL" from "PLpgSQL. See [#194](https://github.com/src-d/enry/issues/194). | |||
* Detection of [generated files](https://github.com/github/linguist/blob/bf95666fc15e49d556f2def4d0a85338423c25f3/lib/linguist/generated.rb#L53) is not supported yet. | |||
(Thus they are not excluded from CLI output). See [#213](https://github.com/src-d/enry/issues/213). | |||
- Detection of [generated files](https://github.com/github/linguist/blob/bf95666fc15e49d556f2def4d0a85338423c25f3/lib/linguist/generated.rb#L53) is not supported yet. | |||
(Thus they are not excluded from CLI output). See [#213](https://github.com/src-d/enry/issues/213). | |||
* XML detection strategy is not implemented. See [#192](https://github.com/src-d/enry/issues/192). | |||
- XML detection strategy is not implemented. See [#192](https://github.com/src-d/enry/issues/192). | |||
* Overriding languages and types though `.gitattributes` is not yet supported. See [#18](https://github.com/src-d/enry/issues/18). | |||
- Overriding languages and types though `.gitattributes` is not yet supported. See [#18](https://github.com/src-d/enry/issues/18). | |||
* `enry` CLI output does NOT exclude `.gitignore`ed files and git submodules, as Linguist does | |||
- `enry` CLI output does NOT exclude `.gitignore`ed files and git submodules, as Linguist does | |||
In all the cases above that have an issue number - we plan to update enry to match Linguist behavior. | |||
## Benchmarks | |||
Benchmarks | |||
------------ | |||
Enry's language detection has been compared with Linguist's on [*linguist/samples*](https://github.com/github/linguist/tree/master/samples). | |||
Enry's language detection has been compared with Linguist's on [_linguist/samples_](https://github.com/github/linguist/tree/master/samples). | |||
We got these results: | |||
@@ -182,9 +191,7 @@ Go regexp engine being slower than Ruby's on, wich is based on [oniguruma](https | |||
See [instructions](#misc) for running enry with oniguruma. | |||
Why Enry? | |||
------------ | |||
## Why Enry? | |||
In the movie [My Fair Lady](https://en.wikipedia.org/wiki/My_Fair_Lady), [Professor Henry Higgins](http://www.imdb.com/character/ch0011719/) is a linguist who at the very beginning of the movie enjoys guessing the origin of people based on their accent. | |||
@@ -199,10 +206,9 @@ To run the tests use: | |||
Setting `ENRY_TEST_REPO` to the path to existing checkout of Linguist will avoid cloning it and sepeed tests up. | |||
Setting `ENRY_DEBUG=1` will provide insight in the Bayesian classifier building done by `make code-generate`. | |||
### Sync with github/linguist upstream | |||
*enry* re-uses parts of the original [github/linguist](https://github.com/github/linguist) to generate internal data structures. | |||
_enry_ re-uses parts of the original [github/linguist](https://github.com/github/linguist) to generate internal data structures. | |||
In order to update to the latest release of linguist do: | |||
```bash | |||
@@ -217,10 +223,10 @@ $ make code-generate | |||
To stay in sync, enry needs to be updated when a new release of the linguist includes changes to any of the following files: | |||
* [languages.yml](https://github.com/github/linguist/blob/master/lib/linguist/languages.yml) | |||
* [heuristics.yml](https://github.com/github/linguist/blob/master/lib/linguist/heuristics.yml) | |||
* [vendor.yml](https://github.com/github/linguist/blob/master/lib/linguist/vendor.yml) | |||
* [documentation.yml](https://github.com/github/linguist/blob/master/lib/linguist/documentation.yml) | |||
- [languages.yml](https://github.com/github/linguist/blob/master/lib/linguist/languages.yml) | |||
- [heuristics.yml](https://github.com/github/linguist/blob/master/lib/linguist/heuristics.yml) | |||
- [vendor.yml](https://github.com/github/linguist/blob/master/lib/linguist/vendor.yml) | |||
- [documentation.yml](https://github.com/github/linguist/blob/master/lib/linguist/documentation.yml) | |||
There is no automation for detecting the changes in the linguist project, so this process above has to be done manually from time to time. | |||
@@ -229,8 +235,6 @@ the generated files (in [data](https://github.com/go-enry/go-enry/blob/master/da | |||
Separating all the necessary "manual" code changes to a different PR that includes some background description and an update to the documentation on ["divergences from linguist"](#divergences-from-linguist) is very much appreciated as it simplifies the maintenance (review/release notes/etc). | |||
## Misc | |||
<details> | |||
@@ -238,19 +242,20 @@ Separating all the necessary "manual" code changes to a different PR that includ | |||
### Benchmark | |||
All benchmark scripts are in [*benchmarks*](https://github.com/go-enry/go-enry/blob/master/benchmarks) directory. | |||
All benchmark scripts are in [_benchmarks_](https://github.com/go-enry/go-enry/blob/master/benchmarks) directory. | |||
#### Dependencies | |||
As benchmarks depend on Ruby and Github-Linguist gem make sure you have: | |||
- Ruby (e.g using [`rbenv`](https://github.com/rbenv/rbenv)), [`bundler`](https://bundler.io/) installed | |||
- Docker | |||
- [native dependencies](https://github.com/github/linguist/#dependencies) installed | |||
- Build the gem `cd .linguist && bundle install && rake build_gem && cd -` | |||
- Install it `gem install --no-rdoc --no-ri --local .linguist/github-linguist-*.gem` | |||
- Ruby (e.g using [`rbenv`](https://github.com/rbenv/rbenv)), [`bundler`](https://bundler.io/) installed | |||
- Docker | |||
- [native dependencies](https://github.com/github/linguist/#dependencies) installed | |||
- Build the gem `cd .linguist && bundle install && rake build_gem && cd -` | |||
- Install it `gem install --no-rdoc --no-ri --local .linguist/github-linguist-*.gem` | |||
#### Quick benchmark | |||
To run quicker benchmarks | |||
make benchmarks | |||
@@ -259,19 +264,20 @@ to get average times for the primary detection function and strategies for the w | |||
make benchmarks-samples | |||
#### Full benchmark | |||
If you want to reproduce the same benchmarks as reported above: | |||
- Make sure all [dependencies](#benchmark-dependencies) are installed | |||
- Install [gnuplot](http://gnuplot.info) (in order to plot the histogram) | |||
- Run `ENRY_TEST_REPO="$PWD/.linguist" benchmarks/run.sh` (takes ~15h) | |||
- Make sure all [dependencies](#benchmark-dependencies) are installed | |||
- Install [gnuplot](http://gnuplot.info) (in order to plot the histogram) | |||
- Run `ENRY_TEST_REPO="$PWD/.linguist" benchmarks/run.sh` (takes ~15h) | |||
It will run the benchmarks for enry and Linguist, parse the output, create csv files and plot the histogram. | |||
### Faster regexp engine (optional) | |||
[Oniguruma](https://github.com/kkos/oniguruma) is CRuby's regular expression engine. | |||
It is very fast and performs better than the one built into Go runtime. *enry* supports swapping | |||
It is very fast and performs better than the one built into Go runtime. _enry_ supports swapping | |||
between those two engines thanks to [rubex](https://github.com/moovweb/rubex) project. | |||
The typical overall speedup from using Oniguruma is 1.5-2x. However, it requires CGo and the external shared library. | |||
On macOS with [Homebrew](https://brew.sh/), it is: | |||
@@ -296,8 +302,6 @@ and then rebuild the project. | |||
</details> | |||
License | |||
------------ | |||
## License | |||
Apache License, Version 2.0. See [LICENSE](LICENSE) |
@@ -328,15 +328,13 @@ func getInterpreter(data []byte) (interpreter string) { | |||
return | |||
} | |||
func getFirstLine(data []byte) []byte { | |||
buf := bufio.NewScanner(bytes.NewReader(data)) | |||
buf.Scan() | |||
line := buf.Bytes() | |||
if err := buf.Err(); err != nil { | |||
return nil | |||
func getFirstLine(content []byte) []byte { | |||
nlpos := bytes.IndexByte(content, '\n') | |||
if nlpos < 0 { | |||
return content | |||
} | |||
return line | |||
return content[:nlpos] | |||
} | |||
func hasShebang(line []byte) bool { |
@@ -3,24 +3,24 @@ | |||
package data | |||
import "gopkg.in/toqueteos/substring.v1" | |||
import "github.com/go-enry/go-enry/v2/regex" | |||
var DocumentationMatchers = substring.Or( | |||
substring.Regexp(`^[Dd]ocs?/`), | |||
substring.Regexp(`(^|/)[Dd]ocumentation/`), | |||
substring.Regexp(`(^|/)[Gg]roovydoc/`), | |||
substring.Regexp(`(^|/)[Jj]avadoc/`), | |||
substring.Regexp(`^[Mm]an/`), | |||
substring.Regexp(`^[Ee]xamples/`), | |||
substring.Regexp(`^[Dd]emos?/`), | |||
substring.Regexp(`(^|/)inst/doc/`), | |||
substring.Regexp(`(^|/)CHANGE(S|LOG)?(\.|$)`), | |||
substring.Regexp(`(^|/)CONTRIBUTING(\.|$)`), | |||
substring.Regexp(`(^|/)COPYING(\.|$)`), | |||
substring.Regexp(`(^|/)INSTALL(\.|$)`), | |||
substring.Regexp(`(^|/)LICEN[CS]E(\.|$)`), | |||
substring.Regexp(`(^|/)[Ll]icen[cs]e(\.|$)`), | |||
substring.Regexp(`(^|/)README(\.|$)`), | |||
substring.Regexp(`(^|/)[Rr]eadme(\.|$)`), | |||
substring.Regexp(`^[Ss]amples?/`), | |||
) | |||
var DocumentationMatchers = []regex.EnryRegexp{ | |||
regex.MustCompile(`^[Dd]ocs?/`), | |||
regex.MustCompile(`(^|/)[Dd]ocumentation/`), | |||
regex.MustCompile(`(^|/)[Gg]roovydoc/`), | |||
regex.MustCompile(`(^|/)[Jj]avadoc/`), | |||
regex.MustCompile(`^[Mm]an/`), | |||
regex.MustCompile(`^[Ee]xamples/`), | |||
regex.MustCompile(`^[Dd]emos?/`), | |||
regex.MustCompile(`(^|/)inst/doc/`), | |||
regex.MustCompile(`(^|/)CHANGE(S|LOG)?(\.|$)`), | |||
regex.MustCompile(`(^|/)CONTRIBUTING(\.|$)`), | |||
regex.MustCompile(`(^|/)COPYING(\.|$)`), | |||
regex.MustCompile(`(^|/)INSTALL(\.|$)`), | |||
regex.MustCompile(`(^|/)LICEN[CS]E(\.|$)`), | |||
regex.MustCompile(`(^|/)[Ll]icen[cs]e(\.|$)`), | |||
regex.MustCompile(`(^|/)README(\.|$)`), | |||
regex.MustCompile(`(^|/)[Rr]eadme(\.|$)`), | |||
regex.MustCompile(`^[Ss]amples?/`), | |||
} |
@@ -0,0 +1,823 @@ | |||
package data | |||
import ( | |||
"bytes" | |||
"strings" | |||
"github.com/go-enry/go-enry/v2/regex" | |||
) | |||
// GeneratedCodeExtensions contains all extensions that belong to generated | |||
// files for sure. | |||
var GeneratedCodeExtensions = map[string]struct{}{ | |||
// XCode files | |||
".nib": {}, | |||
".xcworkspacedata": {}, | |||
".xcuserstate": {}, | |||
} | |||
// GeneratedCodeNameMatcher is a function that tells whether the file with the | |||
// given name is generated. | |||
type GeneratedCodeNameMatcher func(string) bool | |||
func nameMatches(pattern string) GeneratedCodeNameMatcher { | |||
r := regex.MustCompile(pattern) | |||
return func(name string) bool { | |||
return r.MatchString(name) | |||
} | |||
} | |||
func nameContains(pattern string) GeneratedCodeNameMatcher { | |||
return func(name string) bool { | |||
return strings.Contains(name, pattern) | |||
} | |||
} | |||
func nameEndsWith(pattern string) GeneratedCodeNameMatcher { | |||
return func(name string) bool { | |||
return strings.HasSuffix(name, pattern) | |||
} | |||
} | |||
// GeneratedCodeNameMatchers are all the matchers that check whether the code | |||
// is generated based only on the file name. | |||
var GeneratedCodeNameMatchers = []GeneratedCodeNameMatcher{ | |||
// Cocoa pods | |||
nameMatches(`(^Pods|\/Pods)\/`), | |||
// Carthage build | |||
nameMatches(`(^|\/)Carthage\/Build\/`), | |||
// NET designer file | |||
nameMatches(`(?i)\.designer\.(cs|vb)$`), | |||
// Generated NET specflow feature file | |||
nameEndsWith(".feature.cs"), | |||
// Node modules | |||
nameContains("node_modules/"), | |||
// Go vendor | |||
nameMatches(`vendor\/([-0-9A-Za-z]+\.)+(com|edu|gov|in|me|net|org|fm|io)`), | |||
// Go lock | |||
nameEndsWith("Gopkg.lock"), | |||
nameEndsWith("glide.lock"), | |||
// Esy lock | |||
nameMatches(`(^|\/)(\w+\.)?esy.lock$`), | |||
// NPM shrinkwrap | |||
nameEndsWith("npm-shrinkwrap.json"), | |||
// NPM package lock | |||
nameEndsWith("package-lock.json"), | |||
// Yarn plugnplay | |||
nameMatches(`(^|\/)\.pnp\.(c|m)?js$`), | |||
// Godeps | |||
nameContains("Godeps/"), | |||
// Composer lock | |||
nameEndsWith("composer.lock"), | |||
// Generated by zephir | |||
nameMatches(`.\.zep\.(?:c|h|php)$`), | |||
// Cargo lock | |||
nameEndsWith("Cargo.lock"), | |||
// Pipenv lock | |||
nameEndsWith("Pipfile.lock"), | |||
// GraphQL relay | |||
nameContains("__generated__/"), | |||
} | |||
// GeneratedCodeMatcher checks whether the file with the given data is | |||
// generated code. | |||
type GeneratedCodeMatcher func(path, ext string, content []byte) bool | |||
// GeneratedCodeMatchers is the list of all generated code matchers that | |||
// rely on checking the content of the file to make the guess. | |||
var GeneratedCodeMatchers = []GeneratedCodeMatcher{ | |||
isMinifiedFile, | |||
hasSourceMapReference, | |||
isSourceMap, | |||
isCompiledCoffeeScript, | |||
isGeneratedNetDocfile, | |||
isGeneratedJavaScriptPEGParser, | |||
isGeneratedPostScript, | |||
isGeneratedGo, | |||
isGeneratedProtobuf, | |||
isGeneratedJavaScriptProtocolBuffer, | |||
isGeneratedApacheThrift, | |||
isGeneratedJNIHeader, | |||
isVCRCassette, | |||
isCompiledCythonFile, | |||
isGeneratedModule, | |||
isGeneratedUnity3DMeta, | |||
isGeneratedRacc, | |||
isGeneratedJFlex, | |||
isGeneratedGrammarKit, | |||
isGeneratedRoxygen2, | |||
isGeneratedJison, | |||
isGeneratedGRPCCpp, | |||
isGeneratedDart, | |||
isGeneratedPerlPPPortHeader, | |||
isGeneratedGameMakerStudio, | |||
isGeneratedGimp, | |||
isGeneratedVisualStudio6, | |||
isGeneratedHaxe, | |||
isGeneratedHTML, | |||
isGeneratedJooq, | |||
} | |||
func canBeMinified(ext string) bool { | |||
return ext == ".js" || ext == ".css" | |||
} | |||
// isMinifiedFile returns whether the file may be minified. | |||
// We consider a minified file any css or js file whose average number of chars | |||
// per line is more than 110. | |||
func isMinifiedFile(path, ext string, content []byte) bool { | |||
if !canBeMinified(ext) { | |||
return false | |||
} | |||
var chars, lines uint64 | |||
forEachLine(content, func(line []byte) { | |||
chars += uint64(len(line)) | |||
lines++ | |||
}) | |||
if lines == 0 { | |||
return false | |||
} | |||
return chars/lines > 110 | |||
} | |||
var sourceMapRegex = regex.MustCompile(`^\/[*\/][\#@] source(?:Mapping)?URL|sourceURL=`) | |||
// hasSourceMapReference returns whether the file contains a reference to a | |||
// source-map file. | |||
func hasSourceMapReference(_ string, ext string, content []byte) bool { | |||
if !canBeMinified(ext) { | |||
return false | |||
} | |||
for _, line := range getLines(content, -2) { | |||
if sourceMapRegex.Match(line) { | |||
return true | |||
} | |||
} | |||
return false | |||
} | |||
var sourceMapRegexps = []regex.EnryRegexp{ | |||
regex.MustCompile(`^{"version":\d+,`), | |||
regex.MustCompile(`^\/\*\* Begin line maps\. \*\*\/{`), | |||
} | |||
// isSourceMap returns whether the file itself is a source map. | |||
func isSourceMap(path, _ string, content []byte) bool { | |||
if strings.HasSuffix(path, ".js.map") || strings.HasSuffix(path, ".css.map") { | |||
return true | |||
} | |||
firstLine := getFirstLine(content) | |||
if len(firstLine) == 0 { | |||
return false | |||
} | |||
for _, r := range sourceMapRegexps { | |||
if r.Match(firstLine) { | |||
return true | |||
} | |||
} | |||
return false | |||
} | |||
func isCompiledCoffeeScript(path, ext string, content []byte) bool { | |||
if ext != ".js" { | |||
return false | |||
} | |||
firstLine := getFirstLine(content) | |||
lastLines := getLines(content, -2) | |||
if len(lastLines) < 2 { | |||
return false | |||
} | |||
if string(firstLine) == "(function() {" && | |||
string(lastLines[1]) == "}).call(this);" && | |||
string(lastLines[0]) == "" { | |||
score := 0 | |||
forEachLine(content, func(line []byte) { | |||
if bytes.Contains(line, []byte("var ")) { | |||
// Underscored temp vars are likely to be Coffee | |||
score += 1 * countAppearancesInLine(line, "_fn", "_i", "_len", "_ref", "_results") | |||
// bind and extend functions are very Coffee specific | |||
score += 3 * countAppearancesInLine(line, "__bind", "__extends", "__hasProp", "__indexOf", "__slice") | |||
} | |||
}) | |||
// Require a score of 3. This is fairly abritrary. Consider tweaking later. | |||
// See: https://github.com/github/linguist/blob/master/lib/linguist/generated.rb#L176-L213 | |||
return score >= 3 | |||
} | |||
return false | |||
} | |||
func isGeneratedNetDocfile(_, ext string, content []byte) bool { | |||
if ext != ".xml" { | |||
return false | |||
} | |||
lines := bytes.Split(content, []byte{'\n'}) | |||
if len(lines) <= 3 { | |||
return false | |||
} | |||
return bytes.Contains(lines[1], []byte("<doc>")) && | |||
bytes.Contains(lines[2], []byte("<assembly>")) && | |||
bytes.Contains(lines[len(lines)-2], []byte("</doc>")) | |||
} | |||
var pegJavaScriptGeneratedRegex = regex.MustCompile(`^(?:[^\/]|\/[^\*])*\/\*(?:[^\*]|\*[^\/])*Generated by PEG.js`) | |||
func isGeneratedJavaScriptPEGParser(_, ext string, content []byte) bool { | |||
if ext != ".js" { | |||
return false | |||
} | |||
// PEG.js-generated parsers include a comment near the top of the file | |||
// that marks them as such. | |||
return pegJavaScriptGeneratedRegex.Match(bytes.Join(getLines(content, 5), []byte(""))) | |||
} | |||
var postScriptType1And42Regex = regex.MustCompile(`(\n|\r\n|\r)\s*(?:currentfile eexec\s+|\/sfnts\s+\[)`) | |||
var postScriptRegexes = []regex.EnryRegexp{ | |||
regex.MustCompile(`[0-9]|draw|mpage|ImageMagick|inkscape|MATLAB`), | |||
regex.MustCompile(`PCBNEW|pnmtops|\(Unknown\)|Serif Affinity|Filterimage -tops`), | |||
} | |||
func isGeneratedPostScript(_, ext string, content []byte) bool { | |||
if ext != ".ps" && ext != ".eps" && ext != ".pfa" { | |||
return false | |||
} | |||
// Type 1 and Type 42 fonts converted to PostScript are stored as hex-encoded byte streams; these | |||
// streams are always preceded the `eexec` operator (if Type 1), or the `/sfnts` key (if Type 42). | |||
if postScriptType1And42Regex.Match(content) { | |||
return true | |||
} | |||
// We analyze the "%%Creator:" comment, which contains the author/generator | |||
// of the file. If there is one, it should be in one of the first few lines. | |||
var creator []byte | |||
for _, line := range getLines(content, 10) { | |||
if bytes.HasPrefix(line, []byte("%%Creator: ")) { | |||
creator = line | |||
break | |||
} | |||
} | |||
if len(creator) == 0 { | |||
return false | |||
} | |||
// EAGLE doesn't include a version number when it generates PostScript. | |||
// However, it does prepend its name to the document's "%%Title" field. | |||
if bytes.Contains(creator, []byte("EAGLE")) { | |||
for _, line := range getLines(content, 5) { | |||
if bytes.HasPrefix(line, []byte("%%Title: EAGLE Drawing ")) { | |||
return true | |||
} | |||
} | |||
} | |||
// Most generators write their version number, while human authors' or companies' | |||
// names don't contain numbers. So look if the line contains digits. Also | |||
// look for some special cases without version numbers. | |||
for _, r := range postScriptRegexes { | |||
if r.Match(creator) { | |||
return true | |||
} | |||
} | |||
return false | |||
} | |||
func isGeneratedGo(_, ext string, content []byte) bool { | |||
if ext != ".go" { | |||
return false | |||
} | |||
lines := getLines(content, 40) | |||
if len(lines) <= 1 { | |||
return false | |||
} | |||
for _, line := range lines { | |||
if bytes.Contains(line, []byte("Code generated by")) { | |||
return true | |||
} | |||
} | |||
return false | |||
} | |||
var protoExtensions = map[string]struct{}{ | |||
".py": {}, | |||
".java": {}, | |||
".h": {}, | |||
".cc": {}, | |||
".cpp": {}, | |||
".m": {}, | |||
".rb": {}, | |||
".php": {}, | |||
} | |||
func isGeneratedProtobuf(_, ext string, content []byte) bool { | |||
if _, ok := protoExtensions[ext]; !ok { | |||
return false | |||
} | |||
lines := getLines(content, 3) | |||
if len(lines) <= 1 { | |||
return false | |||
} | |||
for _, line := range lines { | |||
if bytes.Contains(line, []byte("Generated by the protocol buffer compiler. DO NOT EDIT!")) { | |||
return true | |||
} | |||
} | |||
return false | |||
} | |||
func isGeneratedJavaScriptProtocolBuffer(_, ext string, content []byte) bool { | |||
if ext != ".js" { | |||
return false | |||
} | |||
lines := getLines(content, 6) | |||
if len(lines) < 6 { | |||
return false | |||
} | |||
return bytes.Contains(lines[5], []byte("GENERATED CODE -- DO NOT EDIT!")) | |||
} | |||
var apacheThriftExtensions = map[string]struct{}{ | |||
".rb": {}, | |||
".py": {}, | |||
".go": {}, | |||
".js": {}, | |||
".m": {}, | |||
".java": {}, | |||
".h": {}, | |||
".cc": {}, | |||
".cpp": {}, | |||
".php": {}, | |||
} | |||
func isGeneratedApacheThrift(_, ext string, content []byte) bool { | |||
if _, ok := apacheThriftExtensions[ext]; !ok { | |||
return false | |||
} | |||
for _, line := range getLines(content, 6) { | |||
if bytes.Contains(line, []byte("Autogenerated by Thrift Compiler")) { | |||
return true | |||
} | |||
} | |||
return false | |||
} | |||
func isGeneratedJNIHeader(_, ext string, content []byte) bool { | |||
if ext != ".h" { | |||
return false | |||
} | |||
lines := getLines(content, 2) | |||
if len(lines) < 2 { | |||
return false | |||
} | |||
return bytes.Contains(lines[0], []byte("/* DO NOT EDIT THIS FILE - it is machine generated */")) && | |||
bytes.Contains(lines[1], []byte("#include <jni.h>")) | |||
} | |||
func isVCRCassette(_, ext string, content []byte) bool { | |||
if ext != ".yml" { | |||
return false | |||
} | |||
lines := getLines(content, -2) | |||
if len(lines) < 2 { | |||
return false | |||
} | |||
return bytes.Contains(lines[1], []byte("recorded_with: VCR")) | |||
} | |||
func isCompiledCythonFile(_, ext string, content []byte) bool { | |||
if ext != ".c" && ext != ".cpp" { | |||
return false | |||
} | |||
lines := getLines(content, 1) | |||
if len(lines) < 1 { | |||
return false | |||
} | |||
return bytes.Contains(lines[0], []byte("Generated by Cython")) | |||
} | |||
func isGeneratedModule(_, ext string, content []byte) bool { | |||
if ext != ".mod" { | |||
return false | |||
} | |||
lines := getLines(content, 1) | |||
if len(lines) < 1 { | |||
return false | |||
} | |||
return bytes.Contains(lines[0], []byte("PCBNEW-LibModule-V")) || | |||
bytes.Contains(lines[0], []byte("GFORTRAN module version '")) | |||
} | |||
func isGeneratedUnity3DMeta(_, ext string, content []byte) bool { | |||
if ext != ".meta" { | |||
return false | |||
} | |||
lines := getLines(content, 1) | |||
if len(lines) < 1 { | |||
return false | |||
} | |||
return bytes.Contains(lines[0], []byte("fileFormatVersion: ")) | |||
} | |||
func isGeneratedRacc(_, ext string, content []byte) bool { | |||
if ext != ".rb" { | |||
return false | |||
} | |||
lines := getLines(content, 3) | |||
if len(lines) < 3 { | |||
return false | |||
} | |||
return bytes.HasPrefix(lines[2], []byte("# This file is automatically generated by Racc")) | |||
} | |||
func isGeneratedJFlex(_, ext string, content []byte) bool { | |||
if ext != ".java" { | |||
return false | |||
} | |||
lines := getLines(content, 1) | |||
if len(lines) < 1 { | |||
return false | |||
} | |||
return bytes.HasPrefix(lines[0], []byte("/* The following code was generated by JFlex ")) | |||
} | |||
func isGeneratedGrammarKit(_, ext string, content []byte) bool { | |||
if ext != ".java" { | |||
return false | |||
} | |||
lines := getLines(content, 1) | |||
if len(lines) < 1 { | |||
return false | |||
} | |||
return bytes.Contains(lines[0], []byte("// This is a generated file. Not intended for manual editing.")) | |||
} | |||
func isGeneratedRoxygen2(_, ext string, content []byte) bool { | |||
if ext != ".rd" { | |||
return false | |||
} | |||
lines := getLines(content, 1) | |||
if len(lines) < 1 { | |||
return false | |||
} | |||
return bytes.Contains(lines[0], []byte("% Generated by roxygen2: do not edit by hand")) | |||
} | |||
func isGeneratedJison(_, ext string, content []byte) bool { | |||
if ext != ".js" { | |||
return false | |||
} | |||
lines := getLines(content, 1) | |||
if len(lines) < 1 { | |||
return false | |||
} | |||
return bytes.Contains(lines[0], []byte("/* parser generated by jison ")) || | |||
bytes.Contains(lines[0], []byte("/* generated by jison-lex ")) | |||
} | |||
func isGeneratedGRPCCpp(_, ext string, content []byte) bool { | |||
switch ext { | |||
case ".cpp", ".hpp", ".h", ".cc": | |||
lines := getLines(content, 1) | |||
if len(lines) < 1 { | |||
return false | |||
} | |||
return bytes.Contains(lines[0], []byte("// Generated by the gRPC")) | |||
default: | |||
return false | |||
} | |||
} | |||
var dartRegex = regex.MustCompile(`generated code\W{2,3}do not modify`) | |||
func isGeneratedDart(_, ext string, content []byte) bool { | |||
if ext != ".dart" { | |||
return false | |||
} | |||
lines := getLines(content, 1) | |||
if len(lines) < 1 { | |||
return false | |||
} | |||
return dartRegex.Match(bytes.ToLower(lines[0])) | |||
} | |||
func isGeneratedPerlPPPortHeader(name, _ string, content []byte) bool { | |||
if !strings.HasSuffix(name, "ppport.h") { | |||
return false | |||
} | |||
lines := getLines(content, 10) | |||
if len(lines) < 10 { | |||
return false | |||
} | |||
return bytes.Contains(lines[8], []byte("Automatically created by Devel::PPPort")) | |||
} | |||
var ( | |||
gameMakerStudioFirstLineRegex = regex.MustCompile(`^\d\.\d\.\d.+\|\{`) | |||
gameMakerStudioThirdLineRegex = regex.MustCompile(`\"modelName\"\:\s*\"GM`) | |||
) | |||
func isGeneratedGameMakerStudio(_, ext string, content []byte) bool { | |||
if ext != ".yy" && ext != ".yyp" { | |||
return false | |||
} | |||
lines := getLines(content, 3) | |||
if len(lines) < 3 { | |||
return false | |||
} | |||
return gameMakerStudioThirdLineRegex.Match(lines[2]) || | |||
gameMakerStudioFirstLineRegex.Match(lines[0]) | |||
} | |||
var gimpRegexes = []regex.EnryRegexp{ | |||
regex.MustCompile(`\/\* GIMP [a-zA-Z0-9\- ]+ C\-Source image dump \(.+?\.c\) \*\/`), | |||
regex.MustCompile(`\/\* GIMP header image file format \([a-zA-Z0-9\- ]+\)\: .+?\.h \*\/`), | |||
} | |||
func isGeneratedGimp(_, ext string, content []byte) bool { | |||
if ext != ".c" && ext != ".h" { | |||
return false | |||
} | |||
lines := getLines(content, 1) | |||
if len(lines) < 1 { | |||
return false | |||
} | |||
for _, r := range gimpRegexes { | |||
if r.Match(lines[0]) { | |||
return true | |||
} | |||
} | |||
return false | |||
} | |||
func isGeneratedVisualStudio6(_, ext string, content []byte) bool { | |||
if ext != ".dsp" { | |||
return false | |||
} | |||
for _, l := range getLines(content, 3) { | |||
if bytes.Contains(l, []byte("# Microsoft Developer Studio Generated Build File")) { | |||
return true | |||
} | |||
} | |||
return false | |||
} | |||
var haxeExtensions = map[string]struct{}{ | |||
".js": {}, | |||
".py": {}, | |||
".lua": {}, | |||
".cpp": {}, | |||
".h": {}, | |||
".java": {}, | |||
".cs": {}, | |||
".php": {}, | |||
} | |||
func isGeneratedHaxe(_, ext string, content []byte) bool { | |||
if _, ok := haxeExtensions[ext]; !ok { | |||
return false | |||
} | |||
for _, l := range getLines(content, 3) { | |||
if bytes.Contains(l, []byte("Generated by Haxe")) { | |||
return true | |||
} | |||
} | |||
return false | |||
} | |||
var ( | |||
doxygenRegex = regex.MustCompile(`<!--\s+Generated by Doxygen\s+[.0-9]+\s*-->`) | |||
htmlMetaRegex = regex.MustCompile(`<meta(\s+[^>]+)>`) | |||
htmlMetaContentRegex = regex.MustCompile(`\s+(name|content|value)\s*=\s*("[^"]+"|'[^']+'|[^\s"']+)`) | |||
orgModeMetaRegex = regex.MustCompile(`org\s+mode`) | |||
) | |||
func isGeneratedHTML(_, ext string, content []byte) bool { | |||
if ext != ".html" && ext != ".htm" && ext != ".xhtml" { | |||
return false | |||
} | |||
lines := getLines(content, 30) | |||
// Pkgdown | |||
if len(lines) >= 2 { | |||
for _, l := range lines[:2] { | |||
if bytes.Contains(l, []byte("<!-- Generated by pkgdown: do not edit by hand -->")) { | |||
return true | |||
} | |||
} | |||
} | |||
// Mandoc | |||
if len(lines) > 2 && | |||
bytes.HasPrefix(lines[2], []byte("<!-- This is an automatically generated file.")) { | |||
return true | |||
} | |||
// Doxygen | |||
for _, l := range lines { | |||
if doxygenRegex.Match(l) { | |||
return true | |||
} | |||
} | |||
// HTML tag: <meta name="generator" content="" /> | |||
part := bytes.ToLower(bytes.Join(lines, []byte{' '})) | |||
part = bytes.ReplaceAll(part, []byte{'\n'}, []byte{}) | |||
part = bytes.ReplaceAll(part, []byte{'\r'}, []byte{}) | |||
matches := htmlMetaRegex.FindAll(part, -1) | |||
if len(matches) == 0 { | |||
return false | |||
} | |||
for _, m := range matches { | |||
var name, value, content string | |||
ms := htmlMetaContentRegex.FindAllStringSubmatch(string(m), -1) | |||
for _, m := range ms { | |||
switch m[1] { | |||
case "name": | |||
name = m[2] | |||
case "value": | |||
value = m[2] | |||
case "content": | |||
content = m[2] | |||
} | |||
} | |||
var val = value | |||
if val == "" { | |||
val = content | |||
} | |||
name = strings.Trim(name, `"'`) | |||
val = strings.Trim(val, `"'`) | |||
if name != "generator" || val == "" { | |||
continue | |||
} | |||
if strings.Contains(val, "jlatex2html") || | |||
strings.Contains(val, "latex2html") || | |||
strings.Contains(val, "groff") || | |||
strings.Contains(val, "makeinfo") || | |||
strings.Contains(val, "texi2html") || | |||
strings.Contains(val, "ronn") || | |||
orgModeMetaRegex.MatchString(val) { | |||
return true | |||
} | |||
} | |||
return false | |||
} | |||
func isGeneratedJooq(_, ext string, content []byte) bool { | |||
if ext != ".java" { | |||
return false | |||
} | |||
for _, l := range getLines(content, 2) { | |||
if bytes.Contains(l, []byte("This file is generated by jOOQ.")) { | |||
return true | |||
} | |||
} | |||
return false | |||
} | |||
func getFirstLine(content []byte) []byte { | |||
lines := getLines(content, 1) | |||
if len(lines) > 0 { | |||
return lines[0] | |||
} | |||
return nil | |||
} | |||
// getLines returns up to the first n lines. A negative index will return up to | |||
// the last n lines in reverse order. | |||
func getLines(content []byte, n int) [][]byte { | |||
var result [][]byte | |||
if n < 0 { | |||
for pos := len(content); pos > 0 && len(result) < -n; { | |||
nlpos := bytes.LastIndexByte(content[:pos], '\n') | |||
if nlpos+1 < len(content)-1 { | |||
result = append(result, content[nlpos+1:pos]) | |||
} | |||
pos = nlpos | |||
} | |||
} else { | |||
for pos := 0; pos < len(content) && len(result) < n; { | |||
nlpos := bytes.IndexByte(content[pos:], '\n') | |||
if nlpos < 0 && pos < len(content) { | |||
nlpos = len(content) | |||
} else if nlpos >= 0 { | |||
nlpos += pos | |||
} | |||
result = append(result, content[pos:nlpos]) | |||
pos = nlpos + 1 | |||
} | |||
} | |||
return result | |||
} | |||
func forEachLine(content []byte, cb func([]byte)) { | |||
var pos int | |||
for pos < len(content) { | |||
nlpos := bytes.IndexByte(content[pos:], '\n') | |||
if nlpos < 0 && pos < len(content) { | |||
nlpos = len(content) | |||
} else if nlpos >= 0 { | |||
nlpos += pos | |||
} | |||
cb(content[pos:nlpos]) | |||
pos = nlpos + 1 | |||
} | |||
} | |||
func countAppearancesInLine(line []byte, targets ...string) int { | |||
var count int | |||
for _, t := range targets { | |||
count += bytes.Count(line, []byte(t)) | |||
} | |||
return count | |||
} |
@@ -0,0 +1,17 @@ | |||
package data | |||
import "github.com/go-enry/go-enry/v2/regex" | |||
// TestMatchers is hand made collection of regexp used by the function `enry.IsTest` | |||
// to identify test files in different languages. | |||
var TestMatchers = []regex.EnryRegexp{ | |||
regex.MustCompile(`(^|/)tests/.*Test\.php$`), | |||
regex.MustCompile(`(^|/)test/.*Test(s?)\.java$`), | |||
regex.MustCompile(`(^|/)test(/|/.*/)Test.*\.java$`), | |||
regex.MustCompile(`(^|/)test/.*(Test(s?)|Spec(s?))\.scala$`), | |||
regex.MustCompile(`(^|/)test_.*\.py$`), | |||
regex.MustCompile(`(^|/).*_test\.go$`), | |||
regex.MustCompile(`(^|/).*_(test|spec)\.rb$`), | |||
regex.MustCompile(`(^|/).*Test(s?)\.cs$`), | |||
regex.MustCompile(`(^|/).*\.(test|spec)\.(ts|tsx|js)$`), | |||
} |
@@ -3,167 +3,167 @@ | |||
package data | |||
import "gopkg.in/toqueteos/substring.v1" | |||
import "github.com/go-enry/go-enry/v2/regex" | |||
var VendorMatchers = substring.Or( | |||
substring.Regexp(`(^|/)cache/`), | |||
substring.Regexp(`^[Dd]ependencies/`), | |||
substring.Regexp(`(^|/)dist/`), | |||
substring.Regexp(`^deps/`), | |||
substring.Regexp(`(^|/)configure$`), | |||
substring.Regexp(`(^|/)config.guess$`), | |||
substring.Regexp(`(^|/)config.sub$`), | |||
substring.Regexp(`(^|/)aclocal.m4`), | |||
substring.Regexp(`(^|/)libtool.m4`), | |||
substring.Regexp(`(^|/)ltoptions.m4`), | |||
substring.Regexp(`(^|/)ltsugar.m4`), | |||
substring.Regexp(`(^|/)ltversion.m4`), | |||
substring.Regexp(`(^|/)lt~obsolete.m4`), | |||
substring.Regexp(`dotnet-install\.(ps1|sh)$`), | |||
substring.Regexp(`cpplint.py`), | |||
substring.Regexp(`node_modules/`), | |||
substring.Regexp(`(^|/)\.yarn/releases/`), | |||
substring.Regexp(`(^|/)_esy$`), | |||
substring.Regexp(`bower_components/`), | |||
substring.Regexp(`^rebar$`), | |||
substring.Regexp(`erlang.mk`), | |||
substring.Regexp(`Godeps/_workspace/`), | |||
substring.Regexp(`(^|/)testdata/`), | |||
substring.Regexp(`.indent.pro`), | |||
substring.Regexp(`(\.|-)min\.(js|css)$`), | |||
substring.Regexp(`([^\s]*)import\.(css|less|scss|styl)$`), | |||
substring.Regexp(`(^|/)bootstrap([^.]*)\.(js|css|less|scss|styl)$`), | |||
substring.Regexp(`(^|/)custom\.bootstrap([^\s]*)(js|css|less|scss|styl)$`), | |||
substring.Regexp(`(^|/)font-?awesome\.(css|less|scss|styl)$`), | |||
substring.Regexp(`(^|/)font-?awesome/.*\.(css|less|scss|styl)$`), | |||
substring.Regexp(`(^|/)foundation\.(css|less|scss|styl)$`), | |||
substring.Regexp(`(^|/)normalize\.(css|less|scss|styl)$`), | |||
substring.Regexp(`(^|/)skeleton\.(css|less|scss|styl)$`), | |||
substring.Regexp(`(^|/)[Bb]ourbon/.*\.(css|less|scss|styl)$`), | |||
substring.Regexp(`(^|/)animate\.(css|less|scss|styl)$`), | |||
substring.Regexp(`(^|/)materialize\.(css|less|scss|styl|js)$`), | |||
substring.Regexp(`(^|/)select2/.*\.(css|scss|js)$`), | |||
substring.Regexp(`(^|/)bulma\.(css|sass|scss)$`), | |||
substring.Regexp(`(3rd|[Tt]hird)[-_]?[Pp]arty/`), | |||
substring.Regexp(`vendors?/`), | |||
substring.Regexp(`extern(al)?/`), | |||
substring.Regexp(`(^|/)[Vv]+endor/`), | |||
substring.Regexp(`^debian/`), | |||
substring.Regexp(`run.n$`), | |||
substring.Regexp(`bootstrap-datepicker/`), | |||
substring.Regexp(`(^|/)jquery([^.]*)\.js$`), | |||
substring.Regexp(`(^|/)jquery\-\d\.\d+(\.\d+)?\.js$`), | |||
substring.Regexp(`(^|/)jquery\-ui(\-\d\.\d+(\.\d+)?)?(\.\w+)?\.(js|css)$`), | |||
substring.Regexp(`(^|/)jquery\.(ui|effects)\.([^.]*)\.(js|css)$`), | |||
substring.Regexp(`jquery.fn.gantt.js`), | |||
substring.Regexp(`jquery.fancybox.(js|css)`), | |||
substring.Regexp(`fuelux.js`), | |||
substring.Regexp(`(^|/)jquery\.fileupload(-\w+)?\.js$`), | |||
substring.Regexp(`jquery.dataTables.js`), | |||
substring.Regexp(`bootbox.js`), | |||
substring.Regexp(`pdf.worker.js`), | |||
substring.Regexp(`(^|/)slick\.\w+.js$`), | |||
substring.Regexp(`(^|/)Leaflet\.Coordinates-\d+\.\d+\.\d+\.src\.js$`), | |||
substring.Regexp(`leaflet.draw-src.js`), | |||
substring.Regexp(`leaflet.draw.css`), | |||
substring.Regexp(`Control.FullScreen.css`), | |||
substring.Regexp(`Control.FullScreen.js`), | |||
substring.Regexp(`leaflet.spin.js`), | |||
substring.Regexp(`wicket-leaflet.js`), | |||
substring.Regexp(`.sublime-project`), | |||
substring.Regexp(`.sublime-workspace`), | |||
substring.Regexp(`.vscode`), | |||
substring.Regexp(`(^|/)prototype(.*)\.js$`), | |||
substring.Regexp(`(^|/)effects\.js$`), | |||
substring.Regexp(`(^|/)controls\.js$`), | |||
substring.Regexp(`(^|/)dragdrop\.js$`), | |||
substring.Regexp(`(.*?)\.d\.ts$`), | |||
substring.Regexp(`(^|/)mootools([^.]*)\d+\.\d+.\d+([^.]*)\.js$`), | |||
substring.Regexp(`(^|/)dojo\.js$`), | |||
substring.Regexp(`(^|/)MochiKit\.js$`), | |||
substring.Regexp(`(^|/)yahoo-([^.]*)\.js$`), | |||
substring.Regexp(`(^|/)yui([^.]*)\.js$`), | |||
substring.Regexp(`(^|/)ckeditor\.js$`), | |||
substring.Regexp(`(^|/)tiny_mce([^.]*)\.js$`), | |||
substring.Regexp(`(^|/)tiny_mce/(langs|plugins|themes|utils)`), | |||
substring.Regexp(`(^|/)ace-builds/`), | |||
substring.Regexp(`(^|/)fontello(.*?)\.css$`), | |||
substring.Regexp(`(^|/)MathJax/`), | |||
substring.Regexp(`(^|/)Chart\.js$`), | |||
substring.Regexp(`(^|/)[Cc]ode[Mm]irror/(\d+\.\d+/)?(lib|mode|theme|addon|keymap|demo)`), | |||
substring.Regexp(`(^|/)shBrush([^.]*)\.js$`), | |||
substring.Regexp(`(^|/)shCore\.js$`), | |||
substring.Regexp(`(^|/)shLegacy\.js$`), | |||
substring.Regexp(`(^|/)angular([^.]*)\.js$`), | |||
substring.Regexp(`(^|\/)d3(\.v\d+)?([^.]*)\.js$`), | |||
substring.Regexp(`(^|/)react(-[^.]*)?\.js$`), | |||
substring.Regexp(`(^|/)flow-typed/.*\.js$`), | |||
substring.Regexp(`(^|/)modernizr\-\d\.\d+(\.\d+)?\.js$`), | |||
substring.Regexp(`(^|/)modernizr\.custom\.\d+\.js$`), | |||
substring.Regexp(`(^|/)knockout-(\d+\.){3}(debug\.)?js$`), | |||
substring.Regexp(`(^|/)docs?/_?(build|themes?|templates?|static)/`), | |||
substring.Regexp(`(^|/)admin_media/`), | |||
substring.Regexp(`(^|/)env/`), | |||
substring.Regexp(`^fabfile\.py$`), | |||
substring.Regexp(`^waf$`), | |||
substring.Regexp(`^.osx$`), | |||
substring.Regexp(`\.xctemplate/`), | |||
substring.Regexp(`\.imageset/`), | |||
substring.Regexp(`(^|/)Carthage/`), | |||
substring.Regexp(`(^|/)Sparkle/`), | |||
substring.Regexp(`Crashlytics.framework/`), | |||
substring.Regexp(`Fabric.framework/`), | |||
substring.Regexp(`BuddyBuildSDK.framework/`), | |||
substring.Regexp(`Realm.framework`), | |||
substring.Regexp(`RealmSwift.framework`), | |||
substring.Regexp(`gitattributes$`), | |||
substring.Regexp(`gitignore$`), | |||
substring.Regexp(`gitmodules$`), | |||
substring.Regexp(`(^|/)gradlew$`), | |||
substring.Regexp(`(^|/)gradlew\.bat$`), | |||
substring.Regexp(`(^|/)gradle/wrapper/`), | |||
substring.Regexp(`(^|/)mvnw$`), | |||
substring.Regexp(`(^|/)mvnw\.cmd$`), | |||
substring.Regexp(`(^|/)\.mvn/wrapper/`), | |||
substring.Regexp(`-vsdoc\.js$`), | |||
substring.Regexp(`\.intellisense\.js$`), | |||
substring.Regexp(`(^|/)jquery([^.]*)\.validate(\.unobtrusive)?\.js$`), | |||
substring.Regexp(`(^|/)jquery([^.]*)\.unobtrusive\-ajax\.js$`), | |||
substring.Regexp(`(^|/)[Mm]icrosoft([Mm]vc)?([Aa]jax|[Vv]alidation)(\.debug)?\.js$`), | |||
substring.Regexp(`^[Pp]ackages\/.+\.\d+\/`), | |||
substring.Regexp(`(^|/)extjs/.*?\.js$`), | |||
substring.Regexp(`(^|/)extjs/.*?\.xml$`), | |||
substring.Regexp(`(^|/)extjs/.*?\.txt$`), | |||
substring.Regexp(`(^|/)extjs/.*?\.html$`), | |||
substring.Regexp(`(^|/)extjs/.*?\.properties$`), | |||
substring.Regexp(`(^|/)extjs/.sencha/`), | |||
substring.Regexp(`(^|/)extjs/docs/`), | |||
substring.Regexp(`(^|/)extjs/builds/`), | |||
substring.Regexp(`(^|/)extjs/cmd/`), | |||
substring.Regexp(`(^|/)extjs/examples/`), | |||
substring.Regexp(`(^|/)extjs/locale/`), | |||
substring.Regexp(`(^|/)extjs/packages/`), | |||
substring.Regexp(`(^|/)extjs/plugins/`), | |||
substring.Regexp(`(^|/)extjs/resources/`), | |||
substring.Regexp(`(^|/)extjs/src/`), | |||
substring.Regexp(`(^|/)extjs/welcome/`), | |||
substring.Regexp(`(^|/)html5shiv\.js$`), | |||
substring.Regexp(`^[Tt]ests?/fixtures/`), | |||
substring.Regexp(`^[Ss]pecs?/fixtures/`), | |||
substring.Regexp(`(^|/)cordova([^.]*)\.js$`), | |||
substring.Regexp(`(^|/)cordova\-\d\.\d(\.\d)?\.js$`), | |||
substring.Regexp(`foundation(\..*)?\.js$`), | |||
substring.Regexp(`^Vagrantfile$`), | |||
substring.Regexp(`.[Dd][Ss]_[Ss]tore$`), | |||
substring.Regexp(`^vignettes/`), | |||
substring.Regexp(`^inst/extdata/`), | |||
substring.Regexp(`octicons.css`), | |||
substring.Regexp(`sprockets-octicons.scss`), | |||
substring.Regexp(`(^|/)activator$`), | |||
substring.Regexp(`(^|/)activator\.bat$`), | |||
substring.Regexp(`proguard.pro`), | |||
substring.Regexp(`proguard-rules.pro`), | |||
substring.Regexp(`^puphpet/`), | |||
substring.Regexp(`(^|/)\.google_apis/`), | |||
substring.Regexp(`^Jenkinsfile$`), | |||
) | |||
var VendorMatchers = []regex.EnryRegexp{ | |||
regex.MustCompile(`(^|/)cache/`), | |||
regex.MustCompile(`^[Dd]ependencies/`), | |||
regex.MustCompile(`(^|/)dist/`), | |||
regex.MustCompile(`^deps/`), | |||
regex.MustCompile(`(^|/)configure$`), | |||
regex.MustCompile(`(^|/)config.guess$`), | |||
regex.MustCompile(`(^|/)config.sub$`), | |||
regex.MustCompile(`(^|/)aclocal.m4`), | |||
regex.MustCompile(`(^|/)libtool.m4`), | |||
regex.MustCompile(`(^|/)ltoptions.m4`), | |||
regex.MustCompile(`(^|/)ltsugar.m4`), | |||
regex.MustCompile(`(^|/)ltversion.m4`), | |||
regex.MustCompile(`(^|/)lt~obsolete.m4`), | |||
regex.MustCompile(`dotnet-install\.(ps1|sh)$`), | |||
regex.MustCompile(`cpplint.py`), | |||
regex.MustCompile(`node_modules/`), | |||
regex.MustCompile(`(^|/)\.yarn/releases/`), | |||
regex.MustCompile(`(^|/)_esy$`), | |||
regex.MustCompile(`bower_components/`), | |||
regex.MustCompile(`^rebar$`), | |||
regex.MustCompile(`erlang.mk`), | |||
regex.MustCompile(`Godeps/_workspace/`), | |||
regex.MustCompile(`(^|/)testdata/`), | |||
regex.MustCompile(`.indent.pro`), | |||
regex.MustCompile(`(\.|-)min\.(js|css)$`), | |||
regex.MustCompile(`([^\s]*)import\.(css|less|scss|styl)$`), | |||
regex.MustCompile(`(^|/)bootstrap([^.]*)\.(js|css|less|scss|styl)$`), | |||
regex.MustCompile(`(^|/)custom\.bootstrap([^\s]*)(js|css|less|scss|styl)$`), | |||
regex.MustCompile(`(^|/)font-?awesome\.(css|less|scss|styl)$`), | |||
regex.MustCompile(`(^|/)font-?awesome/.*\.(css|less|scss|styl)$`), | |||
regex.MustCompile(`(^|/)foundation\.(css|less|scss|styl)$`), | |||
regex.MustCompile(`(^|/)normalize\.(css|less|scss|styl)$`), | |||
regex.MustCompile(`(^|/)skeleton\.(css|less|scss|styl)$`), | |||
regex.MustCompile(`(^|/)[Bb]ourbon/.*\.(css|less|scss|styl)$`), | |||
regex.MustCompile(`(^|/)animate\.(css|less|scss|styl)$`), | |||
regex.MustCompile(`(^|/)materialize\.(css|less|scss|styl|js)$`), | |||
regex.MustCompile(`(^|/)select2/.*\.(css|scss|js)$`), | |||
regex.MustCompile(`(^|/)bulma\.(css|sass|scss)$`), | |||
regex.MustCompile(`(3rd|[Tt]hird)[-_]?[Pp]arty/`), | |||
regex.MustCompile(`vendors?/`), | |||
regex.MustCompile(`extern(al)?/`), | |||
regex.MustCompile(`(^|/)[Vv]+endor/`), | |||
regex.MustCompile(`^debian/`), | |||
regex.MustCompile(`run.n$`), | |||
regex.MustCompile(`bootstrap-datepicker/`), | |||
regex.MustCompile(`(^|/)jquery([^.]*)\.js$`), | |||
regex.MustCompile(`(^|/)jquery\-\d\.\d+(\.\d+)?\.js$`), | |||
regex.MustCompile(`(^|/)jquery\-ui(\-\d\.\d+(\.\d+)?)?(\.\w+)?\.(js|css)$`), | |||
regex.MustCompile(`(^|/)jquery\.(ui|effects)\.([^.]*)\.(js|css)$`), | |||
regex.MustCompile(`jquery.fn.gantt.js`), | |||
regex.MustCompile(`jquery.fancybox.(js|css)`), | |||
regex.MustCompile(`fuelux.js`), | |||
regex.MustCompile(`(^|/)jquery\.fileupload(-\w+)?\.js$`), | |||
regex.MustCompile(`jquery.dataTables.js`), | |||
regex.MustCompile(`bootbox.js`), | |||
regex.MustCompile(`pdf.worker.js`), | |||
regex.MustCompile(`(^|/)slick\.\w+.js$`), | |||
regex.MustCompile(`(^|/)Leaflet\.Coordinates-\d+\.\d+\.\d+\.src\.js$`), | |||
regex.MustCompile(`leaflet.draw-src.js`), | |||
regex.MustCompile(`leaflet.draw.css`), | |||
regex.MustCompile(`Control.FullScreen.css`), | |||
regex.MustCompile(`Control.FullScreen.js`), | |||
regex.MustCompile(`leaflet.spin.js`), | |||
regex.MustCompile(`wicket-leaflet.js`), | |||
regex.MustCompile(`.sublime-project`), | |||
regex.MustCompile(`.sublime-workspace`), | |||
regex.MustCompile(`.vscode`), | |||
regex.MustCompile(`(^|/)prototype(.*)\.js$`), | |||
regex.MustCompile(`(^|/)effects\.js$`), | |||
regex.MustCompile(`(^|/)controls\.js$`), | |||
regex.MustCompile(`(^|/)dragdrop\.js$`), | |||
regex.MustCompile(`(.*?)\.d\.ts$`), | |||
regex.MustCompile(`(^|/)mootools([^.]*)\d+\.\d+.\d+([^.]*)\.js$`), | |||
regex.MustCompile(`(^|/)dojo\.js$`), | |||
regex.MustCompile(`(^|/)MochiKit\.js$`), | |||
regex.MustCompile(`(^|/)yahoo-([^.]*)\.js$`), | |||
regex.MustCompile(`(^|/)yui([^.]*)\.js$`), | |||
regex.MustCompile(`(^|/)ckeditor\.js$`), | |||
regex.MustCompile(`(^|/)tiny_mce([^.]*)\.js$`), | |||
regex.MustCompile(`(^|/)tiny_mce/(langs|plugins|themes|utils)`), | |||
regex.MustCompile(`(^|/)ace-builds/`), | |||
regex.MustCompile(`(^|/)fontello(.*?)\.css$`), | |||
regex.MustCompile(`(^|/)MathJax/`), | |||
regex.MustCompile(`(^|/)Chart\.js$`), | |||
regex.MustCompile(`(^|/)[Cc]ode[Mm]irror/(\d+\.\d+/)?(lib|mode|theme|addon|keymap|demo)`), | |||
regex.MustCompile(`(^|/)shBrush([^.]*)\.js$`), | |||
regex.MustCompile(`(^|/)shCore\.js$`), | |||
regex.MustCompile(`(^|/)shLegacy\.js$`), | |||
regex.MustCompile(`(^|/)angular([^.]*)\.js$`), | |||
regex.MustCompile(`(^|\/)d3(\.v\d+)?([^.]*)\.js$`), | |||
regex.MustCompile(`(^|/)react(-[^.]*)?\.js$`), | |||
regex.MustCompile(`(^|/)flow-typed/.*\.js$`), | |||
regex.MustCompile(`(^|/)modernizr\-\d\.\d+(\.\d+)?\.js$`), | |||
regex.MustCompile(`(^|/)modernizr\.custom\.\d+\.js$`), | |||
regex.MustCompile(`(^|/)knockout-(\d+\.){3}(debug\.)?js$`), | |||
regex.MustCompile(`(^|/)docs?/_?(build|themes?|templates?|static)/`), | |||
regex.MustCompile(`(^|/)admin_media/`), | |||
regex.MustCompile(`(^|/)env/`), | |||
regex.MustCompile(`^fabfile\.py$`), | |||
regex.MustCompile(`^waf$`), | |||
regex.MustCompile(`^.osx$`), | |||
regex.MustCompile(`\.xctemplate/`), | |||
regex.MustCompile(`\.imageset/`), | |||
regex.MustCompile(`(^|/)Carthage/`), | |||
regex.MustCompile(`(^|/)Sparkle/`), | |||
regex.MustCompile(`Crashlytics.framework/`), | |||
regex.MustCompile(`Fabric.framework/`), | |||
regex.MustCompile(`BuddyBuildSDK.framework/`), | |||
regex.MustCompile(`Realm.framework`), | |||
regex.MustCompile(`RealmSwift.framework`), | |||
regex.MustCompile(`gitattributes$`), | |||
regex.MustCompile(`gitignore$`), | |||
regex.MustCompile(`gitmodules$`), | |||
regex.MustCompile(`(^|/)gradlew$`), | |||
regex.MustCompile(`(^|/)gradlew\.bat$`), | |||
regex.MustCompile(`(^|/)gradle/wrapper/`), | |||
regex.MustCompile(`(^|/)mvnw$`), | |||
regex.MustCompile(`(^|/)mvnw\.cmd$`), | |||
regex.MustCompile(`(^|/)\.mvn/wrapper/`), | |||
regex.MustCompile(`-vsdoc\.js$`), | |||
regex.MustCompile(`\.intellisense\.js$`), | |||
regex.MustCompile(`(^|/)jquery([^.]*)\.validate(\.unobtrusive)?\.js$`), | |||
regex.MustCompile(`(^|/)jquery([^.]*)\.unobtrusive\-ajax\.js$`), | |||
regex.MustCompile(`(^|/)[Mm]icrosoft([Mm]vc)?([Aa]jax|[Vv]alidation)(\.debug)?\.js$`), | |||
regex.MustCompile(`^[Pp]ackages\/.+\.\d+\/`), | |||
regex.MustCompile(`(^|/)extjs/.*?\.js$`), | |||
regex.MustCompile(`(^|/)extjs/.*?\.xml$`), | |||
regex.MustCompile(`(^|/)extjs/.*?\.txt$`), | |||
regex.MustCompile(`(^|/)extjs/.*?\.html$`), | |||
regex.MustCompile(`(^|/)extjs/.*?\.properties$`), | |||
regex.MustCompile(`(^|/)extjs/.sencha/`), | |||
regex.MustCompile(`(^|/)extjs/docs/`), | |||
regex.MustCompile(`(^|/)extjs/builds/`), | |||
regex.MustCompile(`(^|/)extjs/cmd/`), | |||
regex.MustCompile(`(^|/)extjs/examples/`), | |||
regex.MustCompile(`(^|/)extjs/locale/`), | |||
regex.MustCompile(`(^|/)extjs/packages/`), | |||
regex.MustCompile(`(^|/)extjs/plugins/`), | |||
regex.MustCompile(`(^|/)extjs/resources/`), | |||
regex.MustCompile(`(^|/)extjs/src/`), | |||
regex.MustCompile(`(^|/)extjs/welcome/`), | |||
regex.MustCompile(`(^|/)html5shiv\.js$`), | |||
regex.MustCompile(`^[Tt]ests?/fixtures/`), | |||
regex.MustCompile(`^[Ss]pecs?/fixtures/`), | |||
regex.MustCompile(`(^|/)cordova([^.]*)\.js$`), | |||
regex.MustCompile(`(^|/)cordova\-\d\.\d(\.\d)?\.js$`), | |||
regex.MustCompile(`foundation(\..*)?\.js$`), | |||
regex.MustCompile(`^Vagrantfile$`), | |||
regex.MustCompile(`.[Dd][Ss]_[Ss]tore$`), | |||
regex.MustCompile(`^vignettes/`), | |||
regex.MustCompile(`^inst/extdata/`), | |||
regex.MustCompile(`octicons.css`), | |||
regex.MustCompile(`sprockets-octicons.scss`), | |||
regex.MustCompile(`(^|/)activator$`), | |||
regex.MustCompile(`(^|/)activator\.bat$`), | |||
regex.MustCompile(`proguard.pro`), | |||
regex.MustCompile(`proguard-rules.pro`), | |||
regex.MustCompile(`^puphpet/`), | |||
regex.MustCompile(`(^|/)\.google_apis/`), | |||
regex.MustCompile(`^Jenkinsfile$`), | |||
} |
@@ -3,9 +3,7 @@ module github.com/go-enry/go-enry/v2 | |||
go 1.14 | |||
require ( | |||
github.com/go-enry/go-oniguruma v1.2.0 | |||
github.com/go-enry/go-oniguruma v1.2.1 | |||
github.com/stretchr/testify v1.3.0 | |||
github.com/toqueteos/trie v1.0.0 // indirect | |||
gopkg.in/toqueteos/substring.v1 v1.0.2 | |||
gopkg.in/yaml.v2 v2.2.8 | |||
) |
@@ -2,17 +2,15 @@ github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8 | |||
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= | |||
github.com/go-enry/go-oniguruma v1.2.0 h1:oBO9XC1IDT9+AoWW5oFsa/7gFeOPacEqDbyXZKWXuDs= | |||
github.com/go-enry/go-oniguruma v1.2.0/go.mod h1:bWDhYP+S6xZQgiRL7wlTScFYBe023B6ilRZbCAD5Hf4= | |||
github.com/go-enry/go-oniguruma v1.2.1 h1:k8aAMuJfMrqm/56SG2lV9Cfti6tC4x8673aHCcBk+eo= | |||
github.com/go-enry/go-oniguruma v1.2.1/go.mod h1:bWDhYP+S6xZQgiRL7wlTScFYBe023B6ilRZbCAD5Hf4= | |||
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM= | |||
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= | |||
github.com/stretchr/objx v0.1.0 h1:4G4v2dO3VZwixGIRoQ5Lfboy6nUhCyYzaqnIAPPhYs4= | |||
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= | |||
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q= | |||
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= | |||
github.com/toqueteos/trie v1.0.0 h1:8i6pXxNUXNRAqP246iibb7w/pSFquNTQ+uNfriG7vlk= | |||
github.com/toqueteos/trie v1.0.0/go.mod h1:Ywk48QhEqhU1+DwhMkJ2x7eeGxDHiGkAdc9+0DYcbsM= | |||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM= | |||
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= | |||
gopkg.in/toqueteos/substring.v1 v1.0.2 h1:urLqCeMm6x/eTuQa1oZerNw8N1KNOIp5hD5kGL7lFsE= | |||
gopkg.in/toqueteos/substring.v1 v1.0.2/go.mod h1:Eb2Z1UYehlVK8LYW2WBVR2rwbujsz3aX8XDrM1vbNew= | |||
gopkg.in/yaml.v2 v2.2.8 h1:obN1ZagJSUGI0Ek/LBmuj4SNLPfIny3KsKFopxRdj10= | |||
gopkg.in/yaml.v2 v2.2.8/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= |
@@ -6,12 +6,18 @@ import ( | |||
"strings" | |||
"github.com/go-enry/go-enry/v2/data" | |||
"github.com/go-enry/go-enry/v2/regex" | |||
) | |||
const binSniffLen = 8000 | |||
var configurationLanguages = map[string]bool{ | |||
"XML": true, "JSON": true, "TOML": true, "YAML": true, "INI": true, "SQL": true, | |||
var configurationLanguages = map[string]struct{}{ | |||
"XML": {}, | |||
"JSON": {}, | |||
"TOML": {}, | |||
"YAML": {}, | |||
"INI": {}, | |||
"SQL": {}, | |||
} | |||
// IsConfiguration tells if filename is in one of the configuration languages. | |||
@@ -46,7 +52,7 @@ func GetMIMEType(path string, language string) string { | |||
// IsDocumentation returns whether or not path is a documentation path. | |||
func IsDocumentation(path string) bool { | |||
return data.DocumentationMatchers.Match(path) | |||
return matchRegexSlice(data.DocumentationMatchers, path) | |||
} | |||
// IsDotFile returns whether or not path has dot as a prefix. | |||
@@ -57,7 +63,12 @@ func IsDotFile(path string) bool { | |||
// IsVendor returns whether or not path is a vendor path. | |||
func IsVendor(path string) bool { | |||
return data.VendorMatchers.Match(path) | |||
return matchRegexSlice(data.VendorMatchers, path) | |||
} | |||
// IsTest returns whether or not path is a test path. | |||
func IsTest(path string) bool { | |||
return matchRegexSlice(data.TestMatchers, path) | |||
} | |||
// IsBinary detects if data is a binary value based on: | |||
@@ -86,3 +97,37 @@ func GetColor(language string) string { | |||
return "#cccccc" | |||
} | |||
func matchRegexSlice(exprs []regex.EnryRegexp, str string) bool { | |||
for _, expr := range exprs { | |||
if expr.MatchString(str) { | |||
return true | |||
} | |||
} | |||
return false | |||
} | |||
// IsGenerated returns whether the file with the given path and content is a | |||
// generated file. | |||
func IsGenerated(path string, content []byte) bool { | |||
ext := strings.ToLower(filepath.Ext(path)) | |||
if _, ok := data.GeneratedCodeExtensions[ext]; ok { | |||
return true | |||
} | |||
for _, m := range data.GeneratedCodeNameMatchers { | |||
if m(path) { | |||
return true | |||
} | |||
} | |||
path = strings.ToLower(path) | |||
for _, m := range data.GeneratedCodeMatchers { | |||
if m(path, ext, content) { | |||
return true | |||
} | |||
} | |||
return false | |||
} |
@@ -7,7 +7,7 @@ | |||
#include "chelper.h" | |||
int NewOnigRegex( char *pattern, int pattern_length, int option, | |||
OnigRegex *regex, OnigRegion **region, OnigEncoding *encoding, OnigErrorInfo **error_info, char **error_buffer) { | |||
OnigRegex *regex, OnigEncoding *encoding, OnigErrorInfo **error_info, char **error_buffer) { | |||
int ret = ONIG_NORMAL; | |||
int error_msg_len = 0; | |||
@@ -23,8 +23,6 @@ int NewOnigRegex( char *pattern, int pattern_length, int option, | |||
memset(*error_buffer, 0, ONIG_MAX_ERROR_MESSAGE_LEN * sizeof(char)); | |||
*region = onig_region_new(); | |||
ret = onig_new(regex, pattern_start, pattern_end, (OnigOptionType)(option), *encoding, OnigDefaultSyntax, *error_info); | |||
if (ret != ONIG_NORMAL) { | |||
@@ -38,9 +36,10 @@ int NewOnigRegex( char *pattern, int pattern_length, int option, | |||
} | |||
int SearchOnigRegex( void *str, int str_length, int offset, int option, | |||
OnigRegex regex, OnigRegion *region, OnigErrorInfo *error_info, char *error_buffer, int *captures, int *numCaptures) { | |||
OnigRegex regex, OnigErrorInfo *error_info, char *error_buffer, int *captures, int *numCaptures) { | |||
int ret = ONIG_MISMATCH; | |||
int error_msg_len = 0; | |||
OnigRegion *region; | |||
#ifdef BENCHMARK_CHELP | |||
struct timeval tim1, tim2; | |||
long t; | |||
@@ -55,6 +54,8 @@ int SearchOnigRegex( void *str, int str_length, int offset, int option, | |||
gettimeofday(&tim1, NULL); | |||
#endif | |||
region = onig_region_new(); | |||
ret = onig_search(regex, str_start, str_end, search_start, search_end, region, option); | |||
if (ret < 0 && error_buffer != NULL) { | |||
error_msg_len = onig_error_code_to_str((unsigned char*)(error_buffer), ret, error_info); | |||
@@ -74,6 +75,8 @@ int SearchOnigRegex( void *str, int str_length, int offset, int option, | |||
*numCaptures = count; | |||
} | |||
onig_region_free(region, 1); | |||
#ifdef BENCHMARK_CHELP | |||
gettimeofday(&tim2, NULL); | |||
t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec; | |||
@@ -83,9 +86,10 @@ int SearchOnigRegex( void *str, int str_length, int offset, int option, | |||
} | |||
int MatchOnigRegex(void *str, int str_length, int offset, int option, | |||
OnigRegex regex, OnigRegion *region) { | |||
OnigRegex regex) { | |||
int ret = ONIG_MISMATCH; | |||
int error_msg_len = 0; | |||
OnigRegion *region; | |||
#ifdef BENCHMARK_CHELP | |||
struct timeval tim1, tim2; | |||
long t; | |||
@@ -98,7 +102,9 @@ int MatchOnigRegex(void *str, int str_length, int offset, int option, | |||
#ifdef BENCHMARK_CHELP | |||
gettimeofday(&tim1, NULL); | |||
#endif | |||
region = onig_region_new(); | |||
ret = onig_match(regex, str_start, str_end, search_start, region, option); | |||
onig_region_free(region, 1); | |||
#ifdef BENCHMARK_CHELP | |||
gettimeofday(&tim2, NULL); | |||
t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec; | |||
@@ -108,8 +114,9 @@ int MatchOnigRegex(void *str, int str_length, int offset, int option, | |||
} | |||
int LookupOnigCaptureByName(char *name, int name_length, | |||
OnigRegex regex, OnigRegion *region) { | |||
OnigRegex regex) { | |||
int ret = ONIGERR_UNDEFINED_NAME_REFERENCE; | |||
OnigRegion *region; | |||
#ifdef BENCHMARK_CHELP | |||
struct timeval tim1, tim2; | |||
long t; | |||
@@ -119,7 +126,9 @@ int LookupOnigCaptureByName(char *name, int name_length, | |||
#ifdef BENCHMARK_CHELP | |||
gettimeofday(&tim1, NULL); | |||
#endif | |||
region = onig_region_new(); | |||
ret = onig_name_to_backref_number(regex, name_start, name_end, region); | |||
onig_region_free(region, 1); | |||
#ifdef BENCHMARK_CHELP | |||
gettimeofday(&tim2, NULL); | |||
t = (tim2.tv_sec - tim1.tv_sec) * 1000000 + tim2.tv_usec - tim1.tv_usec; | |||
@@ -181,4 +190,3 @@ int GetCaptureNames(OnigRegex reg, void *buffer, int bufferSize, int* groupNumbe | |||
onig_foreach_name(reg, name_callback, (void* )&groupInfo); | |||
return groupInfo.bufferOffset; | |||
} | |||
@@ -1,14 +1,14 @@ | |||
#include <oniguruma.h> | |||
extern int NewOnigRegex( char *pattern, int pattern_length, int option, | |||
OnigRegex *regex, OnigRegion **region, OnigEncoding *encoding, OnigErrorInfo **error_info, char **error_buffer); | |||
OnigRegex *regex, OnigEncoding *encoding, OnigErrorInfo **error_info, char **error_buffer); | |||
extern int SearchOnigRegex( void *str, int str_length, int offset, int option, | |||
OnigRegex regex, OnigRegion *region, OnigErrorInfo *error_info, char *error_buffer, int *captures, int *numCaptures); | |||
OnigRegex regex, OnigErrorInfo *error_info, char *error_buffer, int *captures, int *numCaptures); | |||
extern int MatchOnigRegex( void *str, int str_length, int offset, int option, | |||
OnigRegex regex, OnigRegion *region); | |||
OnigRegex regex); | |||
extern int LookupOnigCaptureByName(char *name, int name_length, OnigRegex regex, OnigRegion *region); | |||
extern int LookupOnigCaptureByName(char *name, int name_length, OnigRegex regex); | |||
extern int GetCaptureNames(OnigRegex regex, void *buffer, int bufferSize, int* groupNumbers); |
@@ -14,7 +14,6 @@ import ( | |||
"errors" | |||
"fmt" | |||
"io" | |||
"log" | |||
"runtime" | |||
"strconv" | |||
"sync" | |||
@@ -22,62 +21,52 @@ import ( | |||
"unsafe" | |||
) | |||
type strRange []int | |||
const numMatchStartSize = 4 | |||
const numReadBufferStartSize = 256 | |||
var mutex sync.Mutex | |||
type MatchData struct { | |||
count int | |||
indexes [][]int32 | |||
} | |||
type NamedGroupInfo map[string]int | |||
type Regexp struct { | |||
pattern string | |||
regex C.OnigRegex | |||
region *C.OnigRegion | |||
encoding C.OnigEncoding | |||
errorInfo *C.OnigErrorInfo | |||
errorBuf *C.char | |||
matchData *MatchData | |||
pattern string | |||
regex C.OnigRegex | |||
encoding C.OnigEncoding | |||
errorInfo *C.OnigErrorInfo | |||
errorBuf *C.char | |||
numCaptures int32 | |||
namedGroupInfo NamedGroupInfo | |||
} | |||
// NewRegexp creates and initializes a new Regexp with the given pattern and option. | |||
func NewRegexp(pattern string, option int) (re *Regexp, err error) { | |||
func NewRegexp(pattern string, option int) (*Regexp, error) { | |||
return initRegexp(&Regexp{pattern: pattern, encoding: C.ONIG_ENCODING_UTF8}, option) | |||
} | |||
// NewRegexpASCII is equivalent to NewRegexp, but with the encoding restricted to ASCII. | |||
func NewRegexpASCII(pattern string, option int) (re *Regexp, err error) { | |||
func NewRegexpASCII(pattern string, option int) (*Regexp, error) { | |||
return initRegexp(&Regexp{pattern: pattern, encoding: C.ONIG_ENCODING_ASCII}, option) | |||
} | |||
func initRegexp(re *Regexp, option int) (*Regexp, error) { | |||
var err error | |||
patternCharPtr := C.CString(re.pattern) | |||
defer C.free(unsafe.Pointer(patternCharPtr)) | |||
mutex.Lock() | |||
defer mutex.Unlock() | |||
errorCode := C.NewOnigRegex(patternCharPtr, C.int(len(re.pattern)), C.int(option), &re.regex, &re.region, &re.encoding, &re.errorInfo, &re.errorBuf) | |||
errorCode := C.NewOnigRegex(patternCharPtr, C.int(len(re.pattern)), C.int(option), &re.regex, &re.encoding, &re.errorInfo, &re.errorBuf) | |||
if errorCode != C.ONIG_NORMAL { | |||
err = errors.New(C.GoString(re.errorBuf)) | |||
} else { | |||
err = nil | |||
numCapturesInPattern := int(C.onig_number_of_captures(re.regex)) + 1 | |||
re.matchData = &MatchData{} | |||
re.matchData.indexes = make([][]int32, numMatchStartSize) | |||
for i := 0; i < numMatchStartSize; i++ { | |||
re.matchData.indexes[i] = make([]int32, numCapturesInPattern*2) | |||
} | |||
re.namedGroupInfo = re.getNamedGroupInfo() | |||
runtime.SetFinalizer(re, (*Regexp).Free) | |||
return re, errors.New(C.GoString(re.errorBuf)) | |||
} | |||
return re, err | |||
re.numCaptures = int32(C.onig_number_of_captures(re.regex)) + 1 | |||
re.namedGroupInfo = re.getNamedGroupInfo() | |||
runtime.SetFinalizer(re, (*Regexp).Free) | |||
return re, nil | |||
} | |||
func Compile(str string) (*Regexp, error) { | |||
@@ -89,6 +78,7 @@ func MustCompile(str string) *Regexp { | |||
if error != nil { | |||
panic("regexp: compiling " + str + ": " + error.Error()) | |||
} | |||
return regexp | |||
} | |||
@@ -101,6 +91,7 @@ func MustCompileWithOption(str string, option int) *Regexp { | |||
if error != nil { | |||
panic("regexp: compiling " + str + ": " + error.Error()) | |||
} | |||
return regexp | |||
} | |||
@@ -110,6 +101,7 @@ func MustCompileASCII(str string) *Regexp { | |||
if error != nil { | |||
panic("regexp: compiling " + str + ": " + error.Error()) | |||
} | |||
return regexp | |||
} | |||
@@ -119,10 +111,6 @@ func (re *Regexp) Free() { | |||
C.onig_free(re.regex) | |||
re.regex = nil | |||
} | |||
if re.region != nil { | |||
C.onig_region_free(re.region, 1) | |||
re.region = nil | |||
} | |||
mutex.Unlock() | |||
if re.errorInfo != nil { | |||
C.free(unsafe.Pointer(re.errorInfo)) | |||
@@ -134,149 +122,149 @@ func (re *Regexp) Free() { | |||
} | |||
} | |||
func (re *Regexp) getNamedGroupInfo() (namedGroupInfo NamedGroupInfo) { | |||
func (re *Regexp) getNamedGroupInfo() NamedGroupInfo { | |||
numNamedGroups := int(C.onig_number_of_names(re.regex)) | |||
//when any named capture exisits, there is no numbered capture even if there are unnamed captures | |||
if numNamedGroups > 0 { | |||
namedGroupInfo = make(map[string]int) | |||
//try to get the names | |||
bufferSize := len(re.pattern) * 2 | |||
nameBuffer := make([]byte, bufferSize) | |||
groupNumbers := make([]int32, numNamedGroups) | |||
bufferPtr := unsafe.Pointer(&nameBuffer[0]) | |||
numbersPtr := unsafe.Pointer(&groupNumbers[0]) | |||
length := int(C.GetCaptureNames(re.regex, bufferPtr, (C.int)(bufferSize), (*C.int)(numbersPtr))) | |||
if length > 0 { | |||
namesAsBytes := bytes.Split(nameBuffer[:length], ([]byte)(";")) | |||
if len(namesAsBytes) != numNamedGroups { | |||
log.Fatalf("the number of named groups (%d) does not match the number names found (%d)\n", numNamedGroups, len(namesAsBytes)) | |||
} | |||
for i, nameAsBytes := range namesAsBytes { | |||
name := string(nameAsBytes) | |||
namedGroupInfo[name] = int(groupNumbers[i]) | |||
} | |||
} else { | |||
log.Fatalf("could not get the capture group names from %q", re.String()) | |||
} | |||
// when any named capture exists, there is no numbered capture even if | |||
// there are unnamed captures. | |||
if numNamedGroups == 0 { | |||
return nil | |||
} | |||
return | |||
} | |||
func (re *Regexp) groupNameToId(name string) (id int) { | |||
if re.namedGroupInfo == nil { | |||
id = ONIGERR_UNDEFINED_NAME_REFERENCE | |||
} else { | |||
id = re.namedGroupInfo[name] | |||
namedGroupInfo := make(map[string]int) | |||
//try to get the names | |||
bufferSize := len(re.pattern) * 2 | |||
nameBuffer := make([]byte, bufferSize) | |||
groupNumbers := make([]int32, numNamedGroups) | |||
bufferPtr := unsafe.Pointer(&nameBuffer[0]) | |||
numbersPtr := unsafe.Pointer(&groupNumbers[0]) | |||
length := int(C.GetCaptureNames(re.regex, bufferPtr, (C.int)(bufferSize), (*C.int)(numbersPtr))) | |||
if length == 0 { | |||
panic(fmt.Errorf("could not get the capture group names from %q", re.String())) | |||
} | |||
return | |||
} | |||
func (re *Regexp) processMatch(numCaptures int) (match []int32) { | |||
if numCaptures <= 0 { | |||
panic("cannot have 0 captures when processing a match") | |||
namesAsBytes := bytes.Split(nameBuffer[:length], ([]byte)(";")) | |||
if len(namesAsBytes) != numNamedGroups { | |||
panic(fmt.Errorf( | |||
"the number of named groups (%d) does not match the number names found (%d)", | |||
numNamedGroups, len(namesAsBytes), | |||
)) | |||
} | |||
for i, nameAsBytes := range namesAsBytes { | |||
name := string(nameAsBytes) | |||
namedGroupInfo[name] = int(groupNumbers[i]) | |||
} | |||
matchData := re.matchData | |||
return matchData.indexes[matchData.count][:numCaptures*2] | |||
} | |||
func (re *Regexp) ClearMatchData() { | |||
matchData := re.matchData | |||
matchData.count = 0 | |||
return namedGroupInfo | |||
} | |||
func (re *Regexp) find(b []byte, n int, offset int) (match []int) { | |||
func (re *Regexp) find(b []byte, n int, offset int) []int { | |||
match := make([]int, re.numCaptures*2) | |||
if n == 0 { | |||
b = []byte{0} | |||
} | |||
ptr := unsafe.Pointer(&b[0]) | |||
matchData := re.matchData | |||
capturesPtr := unsafe.Pointer(&(matchData.indexes[matchData.count][0])) | |||
numCaptures := int32(0) | |||
bytesPtr := unsafe.Pointer(&b[0]) | |||
// captures contains two pairs of ints, start and end, so we need list | |||
// twice the size of the capture groups. | |||
captures := make([]C.int, re.numCaptures*2) | |||
capturesPtr := unsafe.Pointer(&captures[0]) | |||
var numCaptures int32 | |||
numCapturesPtr := unsafe.Pointer(&numCaptures) | |||
pos := int(C.SearchOnigRegex((ptr), C.int(n), C.int(offset), C.int(ONIG_OPTION_DEFAULT), re.regex, re.region, re.errorInfo, (*C.char)(nil), (*C.int)(capturesPtr), (*C.int)(numCapturesPtr))) | |||
if pos >= 0 { | |||
if numCaptures <= 0 { | |||
panic("cannot have 0 captures when processing a match") | |||
} | |||
match2 := matchData.indexes[matchData.count][:numCaptures*2] | |||
match = make([]int, len(match2)) | |||
for i := range match2 { | |||
match[i] = int(match2[i]) | |||
} | |||
numCapturesInPattern := int32(C.onig_number_of_captures(re.regex)) + 1 | |||
if numCapturesInPattern != numCaptures { | |||
log.Fatalf("expected %d captures but got %d\n", numCapturesInPattern, numCaptures) | |||
} | |||
pos := int(C.SearchOnigRegex( | |||
bytesPtr, C.int(n), C.int(offset), C.int(ONIG_OPTION_DEFAULT), | |||
re.regex, re.errorInfo, (*C.char)(nil), (*C.int)(capturesPtr), (*C.int)(numCapturesPtr), | |||
)) | |||
if pos < 0 { | |||
return nil | |||
} | |||
if numCaptures <= 0 { | |||
panic("cannot have 0 captures when processing a match") | |||
} | |||
if re.numCaptures != numCaptures { | |||
panic(fmt.Errorf("expected %d captures but got %d", re.numCaptures, numCaptures)) | |||
} | |||
for i := range captures { | |||
match[i] = int(captures[i]) | |||
} | |||
return | |||
return match | |||
} | |||
func getCapture(b []byte, beg int, end int) []byte { | |||
if beg < 0 || end < 0 { | |||
return nil | |||
} | |||
return b[beg:end] | |||
} | |||
func (re *Regexp) match(b []byte, n int, offset int) bool { | |||
re.ClearMatchData() | |||
if n == 0 { | |||
b = []byte{0} | |||
} | |||
ptr := unsafe.Pointer(&b[0]) | |||
pos := int(C.SearchOnigRegex((ptr), C.int(n), C.int(offset), C.int(ONIG_OPTION_DEFAULT), re.regex, re.region, re.errorInfo, (*C.char)(nil), (*C.int)(nil), (*C.int)(nil))) | |||
bytesPtr := unsafe.Pointer(&b[0]) | |||
pos := int(C.SearchOnigRegex( | |||
bytesPtr, C.int(n), C.int(offset), C.int(ONIG_OPTION_DEFAULT), | |||
re.regex, re.errorInfo, nil, nil, nil, | |||
)) | |||
return pos >= 0 | |||
} | |||
func (re *Regexp) findAll(b []byte, n int) (matches [][]int) { | |||
re.ClearMatchData() | |||
func (re *Regexp) findAll(b []byte, n int) [][]int { | |||
if n < 0 { | |||
n = len(b) | |||
} | |||
matchData := re.matchData | |||
offset := 0 | |||
capture := make([][]int, 0, numMatchStartSize) | |||
var offset int | |||
for offset <= n { | |||
if matchData.count >= len(matchData.indexes) { | |||
length := len(matchData.indexes[0]) | |||
matchData.indexes = append(matchData.indexes, make([]int32, length)) | |||
} | |||
if match := re.find(b, n, offset); len(match) > 0 { | |||
matchData.count += 1 | |||
//move offset to the ending index of the current match and prepare to find the next non-overlapping match | |||
offset = match[1] | |||
//if match[0] == match[1], it means the current match does not advance the search. we need to exit the loop to avoid getting stuck here. | |||
if match[0] == match[1] { | |||
if offset < n && offset >= 0 { | |||
//there are more bytes, so move offset by a word | |||
_, width := utf8.DecodeRune(b[offset:]) | |||
offset += width | |||
} else { | |||
//search is over, exit loop | |||
break | |||
} | |||
} | |||
} else { | |||
match := re.find(b, n, offset) | |||
if match == nil { | |||
break | |||
} | |||
} | |||
matches2 := matchData.indexes[:matchData.count] | |||
matches = make([][]int, len(matches2)) | |||
for i, v := range matches2 { | |||
matches[i] = make([]int, len(v)) | |||
for j, v2 := range v { | |||
matches[i][j] = int(v2) | |||
capture = append(capture, match) | |||
// move offset to the ending index of the current match and prepare to | |||
// find the next non-overlapping match. | |||
offset = match[1] | |||
// if match[0] == match[1], it means the current match does not advance | |||
// the search. we need to exit the loop to avoid getting stuck here. | |||
if match[0] == match[1] { | |||
if offset < n && offset >= 0 { | |||
//there are more bytes, so move offset by a word | |||
_, width := utf8.DecodeRune(b[offset:]) | |||
offset += width | |||
} else { | |||
//search is over, exit loop | |||
break | |||
} | |||
} | |||
} | |||
return | |||
return capture | |||
} | |||
func (re *Regexp) FindIndex(b []byte) []int { | |||
re.ClearMatchData() | |||
match := re.find(b, len(b), 0) | |||
if len(match) == 0 { | |||
return nil | |||
} | |||
return match[:2] | |||
} | |||
@@ -285,21 +273,21 @@ func (re *Regexp) Find(b []byte) []byte { | |||
if loc == nil { | |||
return nil | |||
} | |||
return getCapture(b, loc[0], loc[1]) | |||
} | |||
func (re *Regexp) FindString(s string) string { | |||
b := []byte(s) | |||
mb := re.Find(b) | |||
mb := re.Find([]byte(s)) | |||
if mb == nil { | |||
return "" | |||
} | |||
return string(mb) | |||
} | |||
func (re *Regexp) FindStringIndex(s string) []int { | |||
b := []byte(s) | |||
return re.FindIndex(b) | |||
return re.FindIndex([]byte(s)) | |||
} | |||
func (re *Regexp) FindAllIndex(b []byte, n int) [][]int { | |||
@@ -307,6 +295,7 @@ func (re *Regexp) FindAllIndex(b []byte, n int) [][]int { | |||
if len(matches) == 0 { | |||
return nil | |||
} | |||
return matches | |||
} | |||
@@ -315,10 +304,12 @@ func (re *Regexp) FindAll(b []byte, n int) [][]byte { | |||
if matches == nil { | |||
return nil | |||
} | |||
matchBytes := make([][]byte, 0, len(matches)) | |||
for _, match := range matches { | |||
matchBytes = append(matchBytes, getCapture(b, match[0], match[1])) | |||
} | |||
return matchBytes | |||
} | |||
@@ -328,6 +319,7 @@ func (re *Regexp) FindAllString(s string, n int) []string { | |||
if matches == nil { | |||
return nil | |||
} | |||
matchStrings := make([]string, 0, len(matches)) | |||
for _, match := range matches { | |||
m := getCapture(b, match[0], match[1]) | |||
@@ -337,51 +329,50 @@ func (re *Regexp) FindAllString(s string, n int) []string { | |||
matchStrings = append(matchStrings, string(m)) | |||
} | |||
} | |||
return matchStrings | |||
} | |||
func (re *Regexp) FindAllStringIndex(s string, n int) [][]int { | |||
b := []byte(s) | |||
return re.FindAllIndex(b, n) | |||
} | |||
func (re *Regexp) findSubmatchIndex(b []byte) (match []int) { | |||
re.ClearMatchData() | |||
match = re.find(b, len(b), 0) | |||
return | |||
return re.FindAllIndex([]byte(s), n) | |||
} | |||
func (re *Regexp) FindSubmatchIndex(b []byte) []int { | |||
match := re.findSubmatchIndex(b) | |||
match := re.find(b, len(b), 0) | |||
if len(match) == 0 { | |||
return nil | |||
} | |||
return match | |||
} | |||
func (re *Regexp) FindSubmatch(b []byte) [][]byte { | |||
match := re.findSubmatchIndex(b) | |||
match := re.FindSubmatchIndex(b) | |||
if match == nil { | |||
return nil | |||
} | |||
length := len(match) / 2 | |||
if length == 0 { | |||
return nil | |||
} | |||
results := make([][]byte, 0, length) | |||
for i := 0; i < length; i++ { | |||
results = append(results, getCapture(b, match[2*i], match[2*i+1])) | |||
} | |||
return results | |||
} | |||
func (re *Regexp) FindStringSubmatch(s string) []string { | |||
b := []byte(s) | |||
match := re.findSubmatchIndex(b) | |||
match := re.FindSubmatchIndex(b) | |||
if match == nil { | |||
return nil | |||
} | |||
length := len(match) / 2 | |||
if length == 0 { | |||
return nil | |||
@@ -396,12 +387,12 @@ func (re *Regexp) FindStringSubmatch(s string) []string { | |||
results = append(results, string(cap)) | |||
} | |||
} | |||
return results | |||
} | |||
func (re *Regexp) FindStringSubmatchIndex(s string) []int { | |||
b := []byte(s) | |||
return re.FindSubmatchIndex(b) | |||
return re.FindSubmatchIndex([]byte(s)) | |||
} | |||
func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int { | |||
@@ -409,6 +400,7 @@ func (re *Regexp) FindAllSubmatchIndex(b []byte, n int) [][]int { | |||
if len(matches) == 0 { | |||
return nil | |||
} | |||
return matches | |||
} | |||
@@ -417,6 +409,7 @@ func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte { | |||
if len(matches) == 0 { | |||
return nil | |||
} | |||
allCapturedBytes := make([][][]byte, 0, len(matches)) | |||
for _, match := range matches { | |||
length := len(match) / 2 | |||
@@ -424,6 +417,7 @@ func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte { | |||
for i := 0; i < length; i++ { | |||
capturedBytes = append(capturedBytes, getCapture(b, match[2*i], match[2*i+1])) | |||
} | |||
allCapturedBytes = append(allCapturedBytes, capturedBytes) | |||
} | |||
@@ -432,10 +426,12 @@ func (re *Regexp) FindAllSubmatch(b []byte, n int) [][][]byte { | |||
func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string { | |||
b := []byte(s) | |||
matches := re.findAll(b, n) | |||
if len(matches) == 0 { | |||
return nil | |||
} | |||
allCapturedStrings := make([][]string, 0, len(matches)) | |||
for _, match := range matches { | |||
length := len(match) / 2 | |||
@@ -448,14 +444,15 @@ func (re *Regexp) FindAllStringSubmatch(s string, n int) [][]string { | |||
capturedStrings = append(capturedStrings, string(cap)) | |||
} | |||
} | |||
allCapturedStrings = append(allCapturedStrings, capturedStrings) | |||
} | |||
return allCapturedStrings | |||
} | |||
func (re *Regexp) FindAllStringSubmatchIndex(s string, n int) [][]int { | |||
b := []byte(s) | |||
return re.FindAllSubmatchIndex(b, n) | |||
return re.FindAllSubmatchIndex([]byte(s), n) | |||
} | |||
func (re *Regexp) Match(b []byte) bool { | |||
@@ -463,44 +460,25 @@ func (re *Regexp) Match(b []byte) bool { | |||
} | |||
func (re *Regexp) MatchString(s string) bool { | |||
b := []byte(s) | |||
return re.Match(b) | |||
return re.Match([]byte(s)) | |||
} | |||
func (re *Regexp) NumSubexp() int { | |||
return (int)(C.onig_number_of_captures(re.regex)) | |||
} | |||
func (re *Regexp) getNamedCapture(name []byte, capturedBytes [][]byte) []byte { | |||
nameStr := string(name) | |||
capNum := re.groupNameToId(nameStr) | |||
if capNum < 0 || capNum >= len(capturedBytes) { | |||
panic(fmt.Sprintf("capture group name (%q) has error\n", nameStr)) | |||
} | |||
return capturedBytes[capNum] | |||
} | |||
func (re *Regexp) getNumberedCapture(num int, capturedBytes [][]byte) []byte { | |||
//when named capture groups exist, numbered capture groups returns "" | |||
if re.namedGroupInfo == nil && num <= (len(capturedBytes)-1) && num >= 0 { | |||
return capturedBytes[num] | |||
} | |||
return ([]byte)("") | |||
} | |||
func fillCapturedValues(repl []byte, _ []byte, capturedBytes map[string][]byte) []byte { | |||
replLen := len(repl) | |||
newRepl := make([]byte, 0, replLen*3) | |||
inEscapeMode := false | |||
inGroupNameMode := false | |||
groupName := make([]byte, 0, replLen) | |||
for index := 0; index < replLen; index += 1 { | |||
var inGroupNameMode, inEscapeMode bool | |||
for index := 0; index < replLen; index++ { | |||
ch := repl[index] | |||
if inGroupNameMode && ch == byte('<') { | |||
} else if inGroupNameMode && ch == byte('>') { | |||
inGroupNameMode = false | |||
groupNameStr := string(groupName) | |||
capBytes := capturedBytes[groupNameStr] | |||
capBytes := capturedBytes[string(groupName)] | |||
newRepl = append(newRepl, capBytes...) | |||
groupName = groupName[:0] //reset the name | |||
} else if inGroupNameMode { | |||
@@ -512,7 +490,7 @@ func fillCapturedValues(repl []byte, _ []byte, capturedBytes map[string][]byte) | |||
} else if inEscapeMode && ch == byte('k') && (index+1) < replLen && repl[index+1] == byte('<') { | |||
inGroupNameMode = true | |||
inEscapeMode = false | |||
index += 1 //bypass the next char '<' | |||
index++ //bypass the next char '<' | |||
} else if inEscapeMode { | |||
newRepl = append(newRepl, '\\') | |||
newRepl = append(newRepl, ch) | |||
@@ -523,6 +501,7 @@ func fillCapturedValues(repl []byte, _ []byte, capturedBytes map[string][]byte) | |||
inEscapeMode = !inEscapeMode | |||
} | |||
} | |||
return newRepl | |||
} | |||
@@ -532,10 +511,12 @@ func (re *Regexp) replaceAll(src, repl []byte, replFunc func([]byte, []byte, map | |||
if len(matches) == 0 { | |||
return src | |||
} | |||
dest := make([]byte, 0, srcLen) | |||
for i, match := range matches { | |||
length := len(match) / 2 | |||
capturedBytes := make(map[string][]byte) | |||
if re.namedGroupInfo == nil { | |||
for j := 0; j < length; j++ { | |||
capturedBytes[strconv.Itoa(j)] = getCapture(src, match[2*j], match[2*j+1]) | |||
@@ -545,6 +526,7 @@ func (re *Regexp) replaceAll(src, repl []byte, replFunc func([]byte, []byte, map | |||
capturedBytes[name] = getCapture(src, match[2*j], match[2*j+1]) | |||
} | |||
} | |||
matchBytes := getCapture(src, match[0], match[1]) | |||
newRepl := replFunc(repl, matchBytes, capturedBytes) | |||
prevEnd := 0 | |||
@@ -552,15 +534,19 @@ func (re *Regexp) replaceAll(src, repl []byte, replFunc func([]byte, []byte, map | |||
prevMatch := matches[i-1][:2] | |||
prevEnd = prevMatch[1] | |||
} | |||
if match[0] > prevEnd && prevEnd >= 0 && match[0] <= srcLen { | |||
dest = append(dest, src[prevEnd:match[0]]...) | |||
} | |||
dest = append(dest, newRepl...) | |||
} | |||
lastEnd := matches[len(matches)-1][1] | |||
if lastEnd < srcLen && lastEnd >= 0 { | |||
dest = append(dest, src[lastEnd:]...) | |||
} | |||
return dest | |||
} | |||
@@ -569,7 +555,7 @@ func (re *Regexp) ReplaceAll(src, repl []byte) []byte { | |||
} | |||
func (re *Regexp) ReplaceAllFunc(src []byte, repl func([]byte) []byte) []byte { | |||
return re.replaceAll(src, []byte(""), func(_ []byte, matchBytes []byte, _ map[string][]byte) []byte { | |||
return re.replaceAll(src, nil, func(_ []byte, matchBytes []byte, _ map[string][]byte) []byte { | |||
return repl(matchBytes) | |||
}) | |||
} | |||
@@ -579,43 +565,44 @@ func (re *Regexp) ReplaceAllString(src, repl string) string { | |||
} | |||
func (re *Regexp) ReplaceAllStringFunc(src string, repl func(string) string) string { | |||
srcB := []byte(src) | |||
destB := re.replaceAll(srcB, []byte(""), func(_ []byte, matchBytes []byte, _ map[string][]byte) []byte { | |||
return string(re.replaceAll([]byte(src), nil, func(_ []byte, matchBytes []byte, _ map[string][]byte) []byte { | |||
return []byte(repl(string(matchBytes))) | |||
}) | |||
return string(destB) | |||
})) | |||
} | |||
func (re *Regexp) String() string { | |||
return re.pattern | |||
} | |||
func grow_buffer(b []byte, offset int, n int) []byte { | |||
func growBuffer(b []byte, offset int, n int) []byte { | |||
if offset+n > cap(b) { | |||
buf := make([]byte, 2*cap(b)+n) | |||
copy(buf, b[:offset]) | |||
return buf | |||
} | |||
return b | |||
} | |||
func fromReader(r io.RuneReader) []byte { | |||
b := make([]byte, numReadBufferStartSize) | |||
offset := 0 | |||
var err error = nil | |||
for err == nil { | |||
var offset int | |||
for { | |||
rune, runeWidth, err := r.ReadRune() | |||
if err == nil { | |||
b = grow_buffer(b, offset, runeWidth) | |||
writeWidth := utf8.EncodeRune(b[offset:], rune) | |||
if runeWidth != writeWidth { | |||
panic("reading rune width not equal to the written rune width") | |||
} | |||
offset += writeWidth | |||
} else { | |||
if err != nil { | |||
break | |||
} | |||
b = growBuffer(b, offset, runeWidth) | |||
writeWidth := utf8.EncodeRune(b[offset:], rune) | |||
if runeWidth != writeWidth { | |||
panic("reading rune width not equal to the written rune width") | |||
} | |||
offset += writeWidth | |||
} | |||
return b[:offset] | |||
} | |||
@@ -644,25 +631,25 @@ func MatchString(pattern string, s string) (matched bool, error error) { | |||
if err != nil { | |||
return false, err | |||
} | |||
return re.MatchString(s), nil | |||
} | |||
func (re *Regexp) Gsub(src, repl string) string { | |||
srcBytes := ([]byte)(src) | |||
replBytes := ([]byte)(repl) | |||
replaced := re.replaceAll(srcBytes, replBytes, fillCapturedValues) | |||
return string(replaced) | |||
return string(re.replaceAll([]byte(src), []byte(repl), fillCapturedValues)) | |||
} | |||
func (re *Regexp) GsubFunc(src string, replFunc func(string, map[string]string) string) string { | |||
srcBytes := ([]byte)(src) | |||
replaced := re.replaceAll(srcBytes, nil, func(_ []byte, matchBytes []byte, capturedBytes map[string][]byte) []byte { | |||
capturedStrings := make(map[string]string) | |||
for name, capBytes := range capturedBytes { | |||
capturedStrings[name] = string(capBytes) | |||
} | |||
matchString := string(matchBytes) | |||
return ([]byte)(replFunc(matchString, capturedStrings)) | |||
}) | |||
replaced := re.replaceAll([]byte(src), nil, | |||
func(_ []byte, matchBytes []byte, capturedBytes map[string][]byte) []byte { | |||
capturedStrings := make(map[string]string) | |||
for name, capBytes := range capturedBytes { | |||
capturedStrings[name] = string(capBytes) | |||
} | |||
matchString := string(matchBytes) | |||
return ([]byte)(replFunc(matchString, capturedStrings)) | |||
}, | |||
) | |||
return string(replaced) | |||
} |
@@ -1,22 +0,0 @@ | |||
Copyright (c) 2013 Caleb Spare | |||
MIT License | |||
Permission is hereby granted, free of charge, to any person obtaining | |||
a copy of this software and associated documentation files (the | |||
"Software"), to deal in the Software without restriction, including | |||
without limitation the rights to use, copy, modify, merge, publish, | |||
distribute, sublicense, and/or sell copies of the Software, and to | |||
permit persons to whom the Software is furnished to do so, subject to | |||
the following conditions: | |||
The above copyright notice and this permission notice shall be | |||
included in all copies or substantial portions of the Software. | |||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, | |||
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF | |||
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND | |||
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE | |||
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION | |||
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION | |||
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
@@ -1,7 +0,0 @@ | |||
# Trie | |||
[![GoDoc](http://godoc.org/github.com/toqueteos/trie?status.png)](http://godoc.org/github.com/toqueteos/trie) | |||
This is a fork of https://github.com/cespare/go-trie that adds the `PrefixIndex` method. | |||
It's required for https://github.com/toqueteos/substring. |
@@ -1 +0,0 @@ | |||
module github.com/toqueteos/trie |
@@ -1,102 +0,0 @@ | |||
// Package trie is an implementation of a trie (prefix tree) data structure over byte slices. It provides a | |||
// small and simple API for usage as a set as well as a 'Node' API for walking the trie. | |||
package trie | |||
// A Trie is a a prefix tree. | |||
type Trie struct { | |||
root *Node | |||
} | |||
// New construct a new, empty Trie ready for use. | |||
func New() *Trie { | |||
return &Trie{ | |||
root: &Node{}, | |||
} | |||
} | |||
// Insert puts b into the Trie. It returns true if the element was not previously in t. | |||
func (t *Trie) Insert(b []byte) bool { | |||
n := t.root | |||
for _, c := range b { | |||
next, ok := n.Walk(c) | |||
if !ok { | |||
next = &Node{} | |||
n.branches[c] = next | |||
n.hasChildren = true | |||
} | |||
n = next | |||
} | |||
if n.terminal { | |||
return false | |||
} | |||
n.terminal = true | |||
return true | |||
} | |||
// Contains checks t for membership of b. | |||
func (t *Trie) Contains(b []byte) bool { | |||
n := t.root | |||
for _, c := range b { | |||
next, ok := n.Walk(c) | |||
if !ok { | |||
return false | |||
} | |||
n = next | |||
} | |||
return n.terminal | |||
} | |||
// PrefixIndex walks through `b` until a prefix is found (terminal node) or it is exhausted. | |||
func (t *Trie) PrefixIndex(b []byte) int { | |||
var idx int | |||
n := t.root | |||
for _, c := range b { | |||
next, ok := n.Walk(c) | |||
if !ok { | |||
return -1 | |||
} | |||
if next.terminal { | |||
return idx | |||
} | |||
n = next | |||
idx++ | |||
} | |||
if !n.terminal { | |||
idx = -1 | |||
} | |||
return idx | |||
} | |||
// Root returns the root node of a Trie. A valid Trie (i.e., constructed with New), always has a non-nil root | |||
// node. | |||
func (t *Trie) Root() *Node { | |||
return t.root | |||
} | |||
// A Node represents a logical vertex in the trie structure. | |||
type Node struct { | |||
branches [256]*Node | |||
terminal bool | |||
hasChildren bool | |||
} | |||
// Walk returns the node reached along edge c, if one exists. The ok value indicates whether such a node | |||
// exist. | |||
func (n *Node) Walk(c byte) (next *Node, ok bool) { | |||
next = n.branches[int(c)] | |||
return next, (next != nil) | |||
} | |||
// Terminal indicates whether n is terminal in the trie (that is, whether the path from the root to n | |||
// represents an element in the set). For instance, if the root node is terminal, then []byte{} is in the | |||
// trie. | |||
func (n *Node) Terminal() bool { | |||
return n.terminal | |||
} | |||
// Leaf indicates whether n is a leaf node in the trie (that is, whether it has children). A leaf node must be | |||
// terminal (else it would not exist). Logically, if n is a leaf node then the []byte represented by the path | |||
// from the root to n is not a proper prefix of any element of the trie. | |||
func (n *Node) Leaf() bool { | |||
return !n.hasChildren | |||
} |
@@ -1,24 +0,0 @@ | |||
# Compiled Object files, Static and Dynamic libs (Shared Objects) | |||
*.o | |||
*.a | |||
*.so | |||
# Folders | |||
_obj | |||
_test | |||
# Architecture specific extensions/prefixes | |||
*.[568vq] | |||
[568vq].out | |||
*.cgo1.go | |||
*.cgo2.c | |||
_cgo_defun.c | |||
_cgo_gotypes.go | |||
_cgo_export.* | |||
_testmain.go | |||
*.exe | |||
*.test | |||
*.prof |
@@ -1,11 +0,0 @@ | |||
language: go | |||
go: | |||
- 1.2 | |||
- 1.3 | |||
- 1.4 | |||
- tip | |||
script: | |||
- go get launchpad.net/gocheck | |||
- go test |
@@ -1,22 +0,0 @@ | |||
The MIT License (MIT) | |||
Copyright (c) 2015 Carlos Cobo | |||
Permission is hereby granted, free of charge, to any person obtaining a copy | |||
of this software and associated documentation files (the "Software"), to deal | |||
in the Software without restriction, including without limitation the rights | |||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell | |||
copies of the Software, and to permit persons to whom the Software is | |||
furnished to do so, subject to the following conditions: | |||
The above copyright notice and this permission notice shall be included in all | |||
copies or substantial portions of the Software. | |||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR | |||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | |||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE | |||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER | |||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, | |||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE | |||
SOFTWARE. | |||
@@ -1,80 +0,0 @@ | |||
# substring [![Build Status](https://travis-ci.org/toqueteos/substring.png?branch=master)](https://travis-ci.org/toqueteos/substring) [![GoDoc](http://godoc.org/github.com/toqueteos/substring?status.png)](http://godoc.org/github.com/toqueteos/substring) [![GitHub release](https://img.shields.io/github/release/toqueteos/substring.svg)](https://github.com/toqueteos/substring/releases) | |||
Simple and composable alternative to [regexp](http://golang.org/pkg/regexp/) package for fast substring searches. | |||
## Installation | |||
The recommended way to install substring | |||
``` | |||
go get -t gopkg.in/toqueteos/substring.v1 | |||
``` | |||
The `-t` flag is for fetching [gocheck](https://gopkg.in/check.v1), required for tests and benchmarks. | |||
## Examples | |||
A basic example with two matchers: | |||
```go | |||
package main | |||
import ( | |||
"fmt" | |||
"regexp" | |||
"gopkg.in/toqueteos/substring.v1" | |||
) | |||
func main() { | |||
m1 := substring.After("assets/", substring.Or( | |||
substring.Has("jquery"), | |||
substring.Has("angular"), | |||
substring.Suffixes(".js", ".css", ".html"), | |||
)) | |||
fmt.Println(m1.Match("assets/angular/foo/bar")) //Prints: true | |||
fmt.Println(m1.Match("assets/js/file.js")) //Prints: true | |||
fmt.Println(m1.Match("assets/style/bar.css")) //Prints: true | |||
fmt.Println(m1.Match("assets/foo/bar.html")) //Prints: false | |||
fmt.Println(m1.Match("assets/js/qux.json")) //Prints: false | |||
fmt.Println(m1.Match("core/file.html")) //Prints: false | |||
fmt.Println(m1.Match("foobar/that.jsx")) //Prints: false | |||
m2 := substring.After("vendor/", substring.Suffixes(".css", ".js", ".less")) | |||
fmt.Println(m2.Match("foo/vendor/bar/qux.css")) //Prints: true | |||
fmt.Println(m2.Match("foo/var/qux.less")) //Prints: false | |||
re := regexp.MustCompile(`vendor\/.*\.(css|js|less)$`) | |||
fmt.Println(re.MatchString("foo/vendor/bar/qux.css")) //Prints: true | |||
fmt.Println(re.MatchString("foo/var/qux.less")) //Prints: false | |||
} | |||
``` | |||
## How fast? | |||
It may vary depending on your use case but 1~2 orders of magnitude faster than `regexp` is pretty common. | |||
Test it out for yourself by running `go test -check.b`! | |||
``` | |||
$ go test -check.b | |||
PASS: lib_test.go:18: LibSuite.BenchmarkExample1 10000000 221 ns/op | |||
PASS: lib_test.go:23: LibSuite.BenchmarkExample2 10000000 229 ns/op | |||
PASS: lib_test.go:28: LibSuite.BenchmarkExample3 10000000 216 ns/op | |||
PASS: lib_test.go:33: LibSuite.BenchmarkExample4 10000000 208 ns/op | |||
PASS: lib_test.go:38: LibSuite.BenchmarkExample5 20000000 82.1 ns/op | |||
PASS: lib_test.go:48: LibSuite.BenchmarkExampleRe1 500000 4136 ns/op | |||
PASS: lib_test.go:53: LibSuite.BenchmarkExampleRe2 500000 5222 ns/op | |||
PASS: lib_test.go:58: LibSuite.BenchmarkExampleRe3 500000 5116 ns/op | |||
PASS: lib_test.go:63: LibSuite.BenchmarkExampleRe4 500000 4020 ns/op | |||
PASS: lib_test.go:68: LibSuite.BenchmarkExampleRe5 10000000 226 ns/op | |||
OK: 10 passed | |||
PASS | |||
ok gopkg.in/toqueteos/substring.v1 23.471s | |||
``` | |||
License | |||
------- | |||
MIT, see [LICENSE](LICENSE) |
@@ -1,229 +0,0 @@ | |||
package substring | |||
import ( | |||
"bytes" | |||
"regexp" | |||
"github.com/toqueteos/trie" | |||
) | |||
type BytesMatcher interface { | |||
Match(b []byte) bool | |||
MatchIndex(b []byte) int | |||
} | |||
// regexp | |||
type regexpBytes struct{ re *regexp.Regexp } | |||
func BytesRegexp(pat string) *regexpBytes { return ®expBytes{regexp.MustCompile(pat)} } | |||
func (m *regexpBytes) Match(b []byte) bool { return m.re.Match(b) } | |||
func (m *regexpBytes) MatchIndex(b []byte) int { | |||
found := m.re.FindIndex(b) | |||
if found != nil { | |||
return found[1] | |||
} | |||
return -1 | |||
} | |||
// exact | |||
type exactBytes struct{ pat []byte } | |||
func BytesExact(pat string) *exactBytes { return &exactBytes{[]byte(pat)} } | |||
func (m *exactBytes) Match(b []byte) bool { | |||
l, r := len(m.pat), len(b) | |||
if l != r { | |||
return false | |||
} | |||
for i := 0; i < l; i++ { | |||
if b[i] != m.pat[i] { | |||
return false | |||
} | |||
} | |||
return true | |||
} | |||
func (m *exactBytes) MatchIndex(b []byte) int { | |||
if m.Match(b) { | |||
return len(b) | |||
} | |||
return -1 | |||
} | |||
// any, search `s` in `.Match(pat)` | |||
type anyBytes struct { | |||
pat []byte | |||
} | |||
func BytesAny(pat string) *anyBytes { return &anyBytes{[]byte(pat)} } | |||
func (m *anyBytes) Match(b []byte) bool { return bytes.Index(m.pat, b) >= 0 } | |||
func (m *anyBytes) MatchIndex(b []byte) int { | |||
if idx := bytes.Index(m.pat, b); idx >= 0 { | |||
return idx + len(b) | |||
} | |||
return -1 | |||
} | |||
// has, search `pat` in `.Match(s)` | |||
type hasBytes struct { | |||
pat []byte | |||
} | |||
func BytesHas(pat string) *hasBytes { return &hasBytes{[]byte(pat)} } | |||
func (m *hasBytes) Match(b []byte) bool { return bytes.Index(b, m.pat) >= 0 } | |||
func (m *hasBytes) MatchIndex(b []byte) int { | |||
if idx := bytes.Index(b, m.pat); idx >= 0 { | |||
return idx + len(m.pat) | |||
} | |||
return -1 | |||
} | |||
// prefix | |||
type prefixBytes struct{ pat []byte } | |||
func BytesPrefix(pat string) *prefixBytes { return &prefixBytes{[]byte(pat)} } | |||
func (m *prefixBytes) Match(b []byte) bool { return bytes.HasPrefix(b, m.pat) } | |||
func (m *prefixBytes) MatchIndex(b []byte) int { | |||
if bytes.HasPrefix(b, m.pat) { | |||
return len(m.pat) | |||
} | |||
return -1 | |||
} | |||
// prefixes | |||
type prefixesBytes struct { | |||
t *trie.Trie | |||
} | |||
func BytesPrefixes(pats ...string) *prefixesBytes { | |||
t := trie.New() | |||
for _, pat := range pats { | |||
t.Insert([]byte(pat)) | |||
} | |||
return &prefixesBytes{t} | |||
} | |||
func (m *prefixesBytes) Match(b []byte) bool { return m.t.PrefixIndex(b) >= 0 } | |||
func (m *prefixesBytes) MatchIndex(b []byte) int { | |||
if idx := m.t.PrefixIndex(b); idx >= 0 { | |||
return idx | |||
} | |||
return -1 | |||
} | |||
// suffix | |||
type suffixBytes struct{ pat []byte } | |||
func BytesSuffix(pat string) *suffixBytes { return &suffixBytes{[]byte(pat)} } | |||
func (m *suffixBytes) Match(b []byte) bool { return bytes.HasSuffix(b, m.pat) } | |||
func (m *suffixBytes) MatchIndex(b []byte) int { | |||
if bytes.HasSuffix(b, m.pat) { | |||
return len(m.pat) | |||
} | |||
return -1 | |||
} | |||
// suffixes | |||
type suffixesBytes struct { | |||
t *trie.Trie | |||
} | |||
func BytesSuffixes(pats ...string) *suffixesBytes { | |||
t := trie.New() | |||
for _, pat := range pats { | |||
t.Insert(reverse([]byte(pat))) | |||
} | |||
return &suffixesBytes{t} | |||
} | |||
func (m *suffixesBytes) Match(b []byte) bool { | |||
return m.t.PrefixIndex(reverse(b)) >= 0 | |||
} | |||
func (m *suffixesBytes) MatchIndex(b []byte) int { | |||
if idx := m.t.PrefixIndex(reverse(b)); idx >= 0 { | |||
return idx | |||
} | |||
return -1 | |||
} | |||
// after | |||
type afterBytes struct { | |||
first []byte | |||
matcher BytesMatcher | |||
} | |||
func BytesAfter(first string, m BytesMatcher) *afterBytes { return &afterBytes{[]byte(first), m} } | |||
func (a *afterBytes) Match(b []byte) bool { | |||
if idx := bytes.Index(b, a.first); idx >= 0 { | |||
return a.matcher.Match(b[idx+len(a.first):]) | |||
} | |||
return false | |||
} | |||
func (a *afterBytes) MatchIndex(b []byte) int { | |||
if idx := bytes.Index(b, a.first); idx >= 0 { | |||
return idx + a.matcher.MatchIndex(b[idx:]) | |||
} | |||
return -1 | |||
} | |||
// and, returns true iff all matchers return true | |||
type andBytes struct{ matchers []BytesMatcher } | |||
func BytesAnd(m ...BytesMatcher) *andBytes { return &andBytes{m} } | |||
func (a *andBytes) Match(b []byte) bool { | |||
for _, m := range a.matchers { | |||
if !m.Match(b) { | |||
return false | |||
} | |||
} | |||
return true | |||
} | |||
func (a *andBytes) MatchIndex(b []byte) int { | |||
longest := 0 | |||
for _, m := range a.matchers { | |||
if idx := m.MatchIndex(b); idx < 0 { | |||
return -1 | |||
} else if idx > longest { | |||
longest = idx | |||
} | |||
} | |||
return longest | |||
} | |||
// or, returns true iff any matcher returns true | |||
type orBytes struct{ matchers []BytesMatcher } | |||
func BytesOr(m ...BytesMatcher) *orBytes { return &orBytes{m} } | |||
func (o *orBytes) Match(b []byte) bool { | |||
for _, m := range o.matchers { | |||
if m.Match(b) { | |||
return true | |||
} | |||
} | |||
return false | |||
} | |||
func (o *orBytes) MatchIndex(b []byte) int { | |||
for _, m := range o.matchers { | |||
if idx := m.MatchIndex(b); idx >= 0 { | |||
return idx | |||
} | |||
} | |||
return -1 | |||
} | |||
type suffixGroupBytes struct { | |||
suffix BytesMatcher | |||
matchers []BytesMatcher | |||
} | |||
func BytesSuffixGroup(s string, m ...BytesMatcher) *suffixGroupBytes { | |||
return &suffixGroupBytes{BytesSuffix(s), m} | |||
} | |||
func (sg *suffixGroupBytes) Match(b []byte) bool { | |||
if sg.suffix.Match(b) { | |||
return BytesOr(sg.matchers...).Match(b) | |||
} | |||
return false | |||
} | |||
func (sg *suffixGroupBytes) MatchIndex(b []byte) int { | |||
if sg.suffix.MatchIndex(b) >= 0 { | |||
return BytesOr(sg.matchers...).MatchIndex(b) | |||
} | |||
return -1 | |||
} |
@@ -1,10 +0,0 @@ | |||
package substring | |||
// reverse is a helper fn for Suffixes | |||
func reverse(b []byte) []byte { | |||
n := len(b) | |||
for i := 0; i < n/2; i++ { | |||
b[i], b[n-1-i] = b[n-1-i], b[i] | |||
} | |||
return b | |||
} |
@@ -1,216 +0,0 @@ | |||
package substring | |||
import ( | |||
"regexp" | |||
"strings" | |||
"github.com/toqueteos/trie" | |||
) | |||
type StringsMatcher interface { | |||
Match(s string) bool | |||
MatchIndex(s string) int | |||
} | |||
// regexp | |||
type regexpString struct{ re *regexp.Regexp } | |||
func Regexp(pat string) *regexpString { return ®expString{regexp.MustCompile(pat)} } | |||
func (m *regexpString) Match(s string) bool { return m.re.MatchString(s) } | |||
func (m *regexpString) MatchIndex(s string) int { | |||
found := m.re.FindStringIndex(s) | |||
if found != nil { | |||
return found[1] | |||
} | |||
return -1 | |||
} | |||
// exact | |||
type exactString struct{ pat string } | |||
func Exact(pat string) *exactString { return &exactString{pat} } | |||
func (m *exactString) Match(s string) bool { return m.pat == s } | |||
func (m *exactString) MatchIndex(s string) int { | |||
if m.pat == s { | |||
return len(s) | |||
} | |||
return -1 | |||
} | |||
// any, search `s` in `.Match(pat)` | |||
type anyString struct{ pat string } | |||
func Any(pat string) *anyString { return &anyString{pat} } | |||
func (m *anyString) Match(s string) bool { | |||
return strings.Index(m.pat, s) >= 0 | |||
} | |||
func (m *anyString) MatchIndex(s string) int { | |||
if idx := strings.Index(m.pat, s); idx >= 0 { | |||
return idx + len(s) | |||
} | |||
return -1 | |||
} | |||
// has, search `pat` in `.Match(s)` | |||
type hasString struct{ pat string } | |||
func Has(pat string) *hasString { return &hasString{pat} } | |||
func (m *hasString) Match(s string) bool { | |||
return strings.Index(s, m.pat) >= 0 | |||
} | |||
func (m *hasString) MatchIndex(s string) int { | |||
if idx := strings.Index(s, m.pat); idx >= 0 { | |||
return idx + len(m.pat) | |||
} | |||
return -1 | |||
} | |||
// prefix | |||
type prefixString struct{ pat string } | |||
func Prefix(pat string) *prefixString { return &prefixString{pat} } | |||
func (m *prefixString) Match(s string) bool { return strings.HasPrefix(s, m.pat) } | |||
func (m *prefixString) MatchIndex(s string) int { | |||
if strings.HasPrefix(s, m.pat) { | |||
return len(m.pat) | |||
} | |||
return -1 | |||
} | |||
// prefixes | |||
type prefixesString struct{ t *trie.Trie } | |||
func Prefixes(pats ...string) *prefixesString { | |||
t := trie.New() | |||
for _, pat := range pats { | |||
t.Insert([]byte(pat)) | |||
} | |||
return &prefixesString{t} | |||
} | |||
func (m *prefixesString) Match(s string) bool { return m.t.PrefixIndex([]byte(s)) >= 0 } | |||
func (m *prefixesString) MatchIndex(s string) int { | |||
if idx := m.t.PrefixIndex([]byte(s)); idx >= 0 { | |||
return idx | |||
} | |||
return -1 | |||
} | |||
// suffix | |||
type suffixString struct{ pat string } | |||
func Suffix(pat string) *suffixString { return &suffixString{pat} } | |||
func (m *suffixString) Match(s string) bool { return strings.HasSuffix(s, m.pat) } | |||
func (m *suffixString) MatchIndex(s string) int { | |||
if strings.HasSuffix(s, m.pat) { | |||
return len(m.pat) | |||
} | |||
return -1 | |||
} | |||
// suffixes | |||
type suffixesString struct{ t *trie.Trie } | |||
func Suffixes(pats ...string) *suffixesString { | |||
t := trie.New() | |||
for _, pat := range pats { | |||
t.Insert(reverse([]byte(pat))) | |||
} | |||
return &suffixesString{t} | |||
} | |||
func (m *suffixesString) Match(s string) bool { | |||
return m.t.PrefixIndex(reverse([]byte(s))) >= 0 | |||
} | |||
func (m *suffixesString) MatchIndex(s string) int { | |||
if idx := m.t.PrefixIndex(reverse([]byte(s))); idx >= 0 { | |||
return idx | |||
} | |||
return -1 | |||
} | |||
// after | |||
type afterString struct { | |||
first string | |||
matcher StringsMatcher | |||
} | |||
func After(first string, m StringsMatcher) *afterString { | |||
return &afterString{first, m} | |||
} | |||
func (a *afterString) Match(s string) bool { | |||
if idx := strings.Index(s, a.first); idx >= 0 { | |||
return a.matcher.Match(s[idx+len(a.first):]) | |||
} | |||
return false | |||
} | |||
func (a *afterString) MatchIndex(s string) int { | |||
if idx := strings.Index(s, a.first); idx >= 0 { | |||
return idx + a.matcher.MatchIndex(s[idx+len(a.first):]) | |||
} | |||
return -1 | |||
} | |||
// and, returns true iff all matchers return true | |||
type andString struct{ matchers []StringsMatcher } | |||
func And(m ...StringsMatcher) *andString { return &andString{m} } | |||
func (a *andString) Match(s string) bool { | |||
for _, m := range a.matchers { | |||
if !m.Match(s) { | |||
return false | |||
} | |||
} | |||
return true | |||
} | |||
func (a *andString) MatchIndex(s string) int { | |||
longest := 0 | |||
for _, m := range a.matchers { | |||
if idx := m.MatchIndex(s); idx < 0 { | |||
return -1 | |||
} else if idx > longest { | |||
longest = idx | |||
} | |||
} | |||
return longest | |||
} | |||
// or, returns true iff any matcher returns true | |||
type orString struct{ matchers []StringsMatcher } | |||
func Or(m ...StringsMatcher) *orString { return &orString{m} } | |||
func (o *orString) Match(s string) bool { | |||
for _, m := range o.matchers { | |||
if m.Match(s) { | |||
return true | |||
} | |||
} | |||
return false | |||
} | |||
func (o *orString) MatchIndex(s string) int { | |||
for _, m := range o.matchers { | |||
if idx := m.MatchIndex(s); idx >= 0 { | |||
return idx | |||
} | |||
} | |||
return -1 | |||
} | |||
type suffixGroupString struct { | |||
suffix StringsMatcher | |||
matchers []StringsMatcher | |||
} | |||
func SuffixGroup(s string, m ...StringsMatcher) *suffixGroupString { | |||
return &suffixGroupString{Suffix(s), m} | |||
} | |||
func (sg *suffixGroupString) Match(s string) bool { | |||
if sg.suffix.Match(s) { | |||
return Or(sg.matchers...).Match(s) | |||
} | |||
return false | |||
} | |||
func (sg *suffixGroupString) MatchIndex(s string) int { | |||
if sg.suffix.MatchIndex(s) >= 0 { | |||
return Or(sg.matchers...).MatchIndex(s) | |||
} | |||
return -1 | |||
} |
@@ -202,7 +202,7 @@ github.com/gliderlabs/ssh | |||
# github.com/glycerine/go-unsnap-stream v0.0.0-20190901134440-81cf024a9e0a | |||
## explicit | |||
github.com/glycerine/go-unsnap-stream | |||
# github.com/go-enry/go-enry/v2 v2.3.0 | |||
# github.com/go-enry/go-enry/v2 v2.5.2 | |||
## explicit | |||
github.com/go-enry/go-enry/v2 | |||
github.com/go-enry/go-enry/v2/data | |||
@@ -210,7 +210,7 @@ github.com/go-enry/go-enry/v2/data/rule | |||
github.com/go-enry/go-enry/v2/internal/tokenizer | |||
github.com/go-enry/go-enry/v2/internal/tokenizer/flex | |||
github.com/go-enry/go-enry/v2/regex | |||
# github.com/go-enry/go-oniguruma v1.2.0 | |||
# github.com/go-enry/go-oniguruma v1.2.1 | |||
github.com/go-enry/go-oniguruma | |||
# github.com/go-git/gcfg v1.5.0 | |||
github.com/go-git/gcfg | |||
@@ -614,8 +614,6 @@ github.com/syndtr/goleveldb/leveldb/util | |||
# github.com/tinylib/msgp v1.1.2 | |||
## explicit | |||
github.com/tinylib/msgp/msgp | |||
# github.com/toqueteos/trie v1.0.0 | |||
github.com/toqueteos/trie | |||
# github.com/toqueteos/webbrowser v1.2.0 | |||
github.com/toqueteos/webbrowser | |||
# github.com/tstranex/u2f v1.0.0 | |||
@@ -836,8 +834,6 @@ gopkg.in/ldap.v3 | |||
# gopkg.in/testfixtures.v2 v2.5.0 | |||
## explicit | |||
gopkg.in/testfixtures.v2 | |||
# gopkg.in/toqueteos/substring.v1 v1.0.2 | |||
gopkg.in/toqueteos/substring.v1 | |||
# gopkg.in/warnings.v0 v0.1.2 | |||
gopkg.in/warnings.v0 | |||
# gopkg.in/yaml.v2 v2.2.8 |