123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823 |
- package data
-
- import (
- "bytes"
- "strings"
-
- "github.com/go-enry/go-enry/v2/regex"
- )
-
- // GeneratedCodeExtensions contains all extensions that belong to generated
- // files for sure.
- var GeneratedCodeExtensions = map[string]struct{}{
- // XCode files
- ".nib": {},
- ".xcworkspacedata": {},
- ".xcuserstate": {},
- }
-
- // GeneratedCodeNameMatcher is a function that tells whether the file with the
- // given name is generated.
- type GeneratedCodeNameMatcher func(string) bool
-
- func nameMatches(pattern string) GeneratedCodeNameMatcher {
- r := regex.MustCompile(pattern)
- return func(name string) bool {
- return r.MatchString(name)
- }
- }
-
- func nameContains(pattern string) GeneratedCodeNameMatcher {
- return func(name string) bool {
- return strings.Contains(name, pattern)
- }
- }
-
- func nameEndsWith(pattern string) GeneratedCodeNameMatcher {
- return func(name string) bool {
- return strings.HasSuffix(name, pattern)
- }
- }
-
- // GeneratedCodeNameMatchers are all the matchers that check whether the code
- // is generated based only on the file name.
- var GeneratedCodeNameMatchers = []GeneratedCodeNameMatcher{
- // Cocoa pods
- nameMatches(`(^Pods|\/Pods)\/`),
-
- // Carthage build
- nameMatches(`(^|\/)Carthage\/Build\/`),
-
- // NET designer file
- nameMatches(`(?i)\.designer\.(cs|vb)$`),
-
- // Generated NET specflow feature file
- nameEndsWith(".feature.cs"),
-
- // Node modules
- nameContains("node_modules/"),
-
- // Go vendor
- nameMatches(`vendor\/([-0-9A-Za-z]+\.)+(com|edu|gov|in|me|net|org|fm|io)`),
-
- // Go lock
- nameEndsWith("Gopkg.lock"),
- nameEndsWith("glide.lock"),
-
- // Esy lock
- nameMatches(`(^|\/)(\w+\.)?esy.lock$`),
-
- // NPM shrinkwrap
- nameEndsWith("npm-shrinkwrap.json"),
-
- // NPM package lock
- nameEndsWith("package-lock.json"),
-
- // Yarn plugnplay
- nameMatches(`(^|\/)\.pnp\.(c|m)?js$`),
-
- // Godeps
- nameContains("Godeps/"),
-
- // Composer lock
- nameEndsWith("composer.lock"),
-
- // Generated by zephir
- nameMatches(`.\.zep\.(?:c|h|php)$`),
-
- // Cargo lock
- nameEndsWith("Cargo.lock"),
-
- // Pipenv lock
- nameEndsWith("Pipfile.lock"),
-
- // GraphQL relay
- nameContains("__generated__/"),
- }
-
- // GeneratedCodeMatcher checks whether the file with the given data is
- // generated code.
- type GeneratedCodeMatcher func(path, ext string, content []byte) bool
-
- // GeneratedCodeMatchers is the list of all generated code matchers that
- // rely on checking the content of the file to make the guess.
- var GeneratedCodeMatchers = []GeneratedCodeMatcher{
- isMinifiedFile,
- hasSourceMapReference,
- isSourceMap,
- isCompiledCoffeeScript,
- isGeneratedNetDocfile,
- isGeneratedJavaScriptPEGParser,
- isGeneratedPostScript,
- isGeneratedGo,
- isGeneratedProtobuf,
- isGeneratedJavaScriptProtocolBuffer,
- isGeneratedApacheThrift,
- isGeneratedJNIHeader,
- isVCRCassette,
- isCompiledCythonFile,
- isGeneratedModule,
- isGeneratedUnity3DMeta,
- isGeneratedRacc,
- isGeneratedJFlex,
- isGeneratedGrammarKit,
- isGeneratedRoxygen2,
- isGeneratedJison,
- isGeneratedGRPCCpp,
- isGeneratedDart,
- isGeneratedPerlPPPortHeader,
- isGeneratedGameMakerStudio,
- isGeneratedGimp,
- isGeneratedVisualStudio6,
- isGeneratedHaxe,
- isGeneratedHTML,
- isGeneratedJooq,
- }
-
- func canBeMinified(ext string) bool {
- return ext == ".js" || ext == ".css"
- }
-
- // isMinifiedFile returns whether the file may be minified.
- // We consider a minified file any css or js file whose average number of chars
- // per line is more than 110.
- func isMinifiedFile(path, ext string, content []byte) bool {
- if !canBeMinified(ext) {
- return false
- }
-
- var chars, lines uint64
- forEachLine(content, func(line []byte) {
- chars += uint64(len(line))
- lines++
- })
-
- if lines == 0 {
- return false
- }
-
- return chars/lines > 110
- }
-
- var sourceMapRegex = regex.MustCompile(`^\/[*\/][\#@] source(?:Mapping)?URL|sourceURL=`)
-
- // hasSourceMapReference returns whether the file contains a reference to a
- // source-map file.
- func hasSourceMapReference(_ string, ext string, content []byte) bool {
- if !canBeMinified(ext) {
- return false
- }
-
- for _, line := range getLines(content, -2) {
- if sourceMapRegex.Match(line) {
- return true
- }
- }
-
- return false
- }
-
- var sourceMapRegexps = []regex.EnryRegexp{
- regex.MustCompile(`^{"version":\d+,`),
- regex.MustCompile(`^\/\*\* Begin line maps\. \*\*\/{`),
- }
-
- // isSourceMap returns whether the file itself is a source map.
- func isSourceMap(path, _ string, content []byte) bool {
- if strings.HasSuffix(path, ".js.map") || strings.HasSuffix(path, ".css.map") {
- return true
- }
-
- firstLine := getFirstLine(content)
- if len(firstLine) == 0 {
- return false
- }
-
- for _, r := range sourceMapRegexps {
- if r.Match(firstLine) {
- return true
- }
- }
-
- return false
- }
-
- func isCompiledCoffeeScript(path, ext string, content []byte) bool {
- if ext != ".js" {
- return false
- }
-
- firstLine := getFirstLine(content)
- lastLines := getLines(content, -2)
- if len(lastLines) < 2 {
- return false
- }
-
- if string(firstLine) == "(function() {" &&
- string(lastLines[1]) == "}).call(this);" &&
- string(lastLines[0]) == "" {
- score := 0
-
- forEachLine(content, func(line []byte) {
- if bytes.Contains(line, []byte("var ")) {
- // Underscored temp vars are likely to be Coffee
- score += 1 * countAppearancesInLine(line, "_fn", "_i", "_len", "_ref", "_results")
-
- // bind and extend functions are very Coffee specific
- score += 3 * countAppearancesInLine(line, "__bind", "__extends", "__hasProp", "__indexOf", "__slice")
- }
- })
-
- // Require a score of 3. This is fairly abritrary. Consider tweaking later.
- // See: https://github.com/github/linguist/blob/master/lib/linguist/generated.rb#L176-L213
- return score >= 3
- }
-
- return false
- }
-
- func isGeneratedNetDocfile(_, ext string, content []byte) bool {
- if ext != ".xml" {
- return false
- }
-
- lines := bytes.Split(content, []byte{'\n'})
- if len(lines) <= 3 {
- return false
- }
-
- return bytes.Contains(lines[1], []byte("<doc>")) &&
- bytes.Contains(lines[2], []byte("<assembly>")) &&
- bytes.Contains(lines[len(lines)-2], []byte("</doc>"))
- }
-
- var pegJavaScriptGeneratedRegex = regex.MustCompile(`^(?:[^\/]|\/[^\*])*\/\*(?:[^\*]|\*[^\/])*Generated by PEG.js`)
-
- func isGeneratedJavaScriptPEGParser(_, ext string, content []byte) bool {
- if ext != ".js" {
- return false
- }
-
- // PEG.js-generated parsers include a comment near the top of the file
- // that marks them as such.
- return pegJavaScriptGeneratedRegex.Match(bytes.Join(getLines(content, 5), []byte("")))
- }
-
- var postScriptType1And42Regex = regex.MustCompile(`(\n|\r\n|\r)\s*(?:currentfile eexec\s+|\/sfnts\s+\[)`)
-
- var postScriptRegexes = []regex.EnryRegexp{
- regex.MustCompile(`[0-9]|draw|mpage|ImageMagick|inkscape|MATLAB`),
- regex.MustCompile(`PCBNEW|pnmtops|\(Unknown\)|Serif Affinity|Filterimage -tops`),
- }
-
- func isGeneratedPostScript(_, ext string, content []byte) bool {
- if ext != ".ps" && ext != ".eps" && ext != ".pfa" {
- return false
- }
-
- // Type 1 and Type 42 fonts converted to PostScript are stored as hex-encoded byte streams; these
- // streams are always preceded the `eexec` operator (if Type 1), or the `/sfnts` key (if Type 42).
- if postScriptType1And42Regex.Match(content) {
- return true
- }
-
- // We analyze the "%%Creator:" comment, which contains the author/generator
- // of the file. If there is one, it should be in one of the first few lines.
- var creator []byte
- for _, line := range getLines(content, 10) {
- if bytes.HasPrefix(line, []byte("%%Creator: ")) {
- creator = line
- break
- }
- }
-
- if len(creator) == 0 {
- return false
- }
-
- // EAGLE doesn't include a version number when it generates PostScript.
- // However, it does prepend its name to the document's "%%Title" field.
- if bytes.Contains(creator, []byte("EAGLE")) {
- for _, line := range getLines(content, 5) {
- if bytes.HasPrefix(line, []byte("%%Title: EAGLE Drawing ")) {
- return true
- }
- }
- }
-
- // Most generators write their version number, while human authors' or companies'
- // names don't contain numbers. So look if the line contains digits. Also
- // look for some special cases without version numbers.
- for _, r := range postScriptRegexes {
- if r.Match(creator) {
- return true
- }
- }
-
- return false
- }
-
- func isGeneratedGo(_, ext string, content []byte) bool {
- if ext != ".go" {
- return false
- }
-
- lines := getLines(content, 40)
- if len(lines) <= 1 {
- return false
- }
-
- for _, line := range lines {
- if bytes.Contains(line, []byte("Code generated by")) {
- return true
- }
- }
-
- return false
- }
-
- var protoExtensions = map[string]struct{}{
- ".py": {},
- ".java": {},
- ".h": {},
- ".cc": {},
- ".cpp": {},
- ".m": {},
- ".rb": {},
- ".php": {},
- }
-
- func isGeneratedProtobuf(_, ext string, content []byte) bool {
- if _, ok := protoExtensions[ext]; !ok {
- return false
- }
-
- lines := getLines(content, 3)
- if len(lines) <= 1 {
- return false
- }
-
- for _, line := range lines {
- if bytes.Contains(line, []byte("Generated by the protocol buffer compiler. DO NOT EDIT!")) {
- return true
- }
- }
-
- return false
- }
-
- func isGeneratedJavaScriptProtocolBuffer(_, ext string, content []byte) bool {
- if ext != ".js" {
- return false
- }
-
- lines := getLines(content, 6)
- if len(lines) < 6 {
- return false
- }
-
- return bytes.Contains(lines[5], []byte("GENERATED CODE -- DO NOT EDIT!"))
- }
-
- var apacheThriftExtensions = map[string]struct{}{
- ".rb": {},
- ".py": {},
- ".go": {},
- ".js": {},
- ".m": {},
- ".java": {},
- ".h": {},
- ".cc": {},
- ".cpp": {},
- ".php": {},
- }
-
- func isGeneratedApacheThrift(_, ext string, content []byte) bool {
- if _, ok := apacheThriftExtensions[ext]; !ok {
- return false
- }
-
- for _, line := range getLines(content, 6) {
- if bytes.Contains(line, []byte("Autogenerated by Thrift Compiler")) {
- return true
- }
- }
-
- return false
- }
-
- func isGeneratedJNIHeader(_, ext string, content []byte) bool {
- if ext != ".h" {
- return false
- }
-
- lines := getLines(content, 2)
- if len(lines) < 2 {
- return false
- }
-
- return bytes.Contains(lines[0], []byte("/* DO NOT EDIT THIS FILE - it is machine generated */")) &&
- bytes.Contains(lines[1], []byte("#include <jni.h>"))
- }
-
- func isVCRCassette(_, ext string, content []byte) bool {
- if ext != ".yml" {
- return false
- }
-
- lines := getLines(content, -2)
- if len(lines) < 2 {
- return false
- }
-
- return bytes.Contains(lines[1], []byte("recorded_with: VCR"))
- }
-
- func isCompiledCythonFile(_, ext string, content []byte) bool {
- if ext != ".c" && ext != ".cpp" {
- return false
- }
-
- lines := getLines(content, 1)
- if len(lines) < 1 {
- return false
- }
-
- return bytes.Contains(lines[0], []byte("Generated by Cython"))
- }
-
- func isGeneratedModule(_, ext string, content []byte) bool {
- if ext != ".mod" {
- return false
- }
-
- lines := getLines(content, 1)
- if len(lines) < 1 {
- return false
- }
-
- return bytes.Contains(lines[0], []byte("PCBNEW-LibModule-V")) ||
- bytes.Contains(lines[0], []byte("GFORTRAN module version '"))
- }
-
- func isGeneratedUnity3DMeta(_, ext string, content []byte) bool {
- if ext != ".meta" {
- return false
- }
-
- lines := getLines(content, 1)
- if len(lines) < 1 {
- return false
- }
-
- return bytes.Contains(lines[0], []byte("fileFormatVersion: "))
- }
-
- func isGeneratedRacc(_, ext string, content []byte) bool {
- if ext != ".rb" {
- return false
- }
-
- lines := getLines(content, 3)
- if len(lines) < 3 {
- return false
- }
-
- return bytes.HasPrefix(lines[2], []byte("# This file is automatically generated by Racc"))
- }
-
- func isGeneratedJFlex(_, ext string, content []byte) bool {
- if ext != ".java" {
- return false
- }
-
- lines := getLines(content, 1)
- if len(lines) < 1 {
- return false
- }
-
- return bytes.HasPrefix(lines[0], []byte("/* The following code was generated by JFlex "))
- }
-
- func isGeneratedGrammarKit(_, ext string, content []byte) bool {
- if ext != ".java" {
- return false
- }
-
- lines := getLines(content, 1)
- if len(lines) < 1 {
- return false
- }
-
- return bytes.Contains(lines[0], []byte("// This is a generated file. Not intended for manual editing."))
- }
-
- func isGeneratedRoxygen2(_, ext string, content []byte) bool {
- if ext != ".rd" {
- return false
- }
-
- lines := getLines(content, 1)
- if len(lines) < 1 {
- return false
- }
-
- return bytes.Contains(lines[0], []byte("% Generated by roxygen2: do not edit by hand"))
- }
-
- func isGeneratedJison(_, ext string, content []byte) bool {
- if ext != ".js" {
- return false
- }
-
- lines := getLines(content, 1)
- if len(lines) < 1 {
- return false
- }
-
- return bytes.Contains(lines[0], []byte("/* parser generated by jison ")) ||
- bytes.Contains(lines[0], []byte("/* generated by jison-lex "))
- }
-
- func isGeneratedGRPCCpp(_, ext string, content []byte) bool {
- switch ext {
- case ".cpp", ".hpp", ".h", ".cc":
- lines := getLines(content, 1)
- if len(lines) < 1 {
- return false
- }
-
- return bytes.Contains(lines[0], []byte("// Generated by the gRPC"))
- default:
- return false
- }
- }
-
- var dartRegex = regex.MustCompile(`generated code\W{2,3}do not modify`)
-
- func isGeneratedDart(_, ext string, content []byte) bool {
- if ext != ".dart" {
- return false
- }
-
- lines := getLines(content, 1)
- if len(lines) < 1 {
- return false
- }
-
- return dartRegex.Match(bytes.ToLower(lines[0]))
- }
-
- func isGeneratedPerlPPPortHeader(name, _ string, content []byte) bool {
- if !strings.HasSuffix(name, "ppport.h") {
- return false
- }
-
- lines := getLines(content, 10)
- if len(lines) < 10 {
- return false
- }
-
- return bytes.Contains(lines[8], []byte("Automatically created by Devel::PPPort"))
- }
-
- var (
- gameMakerStudioFirstLineRegex = regex.MustCompile(`^\d\.\d\.\d.+\|\{`)
- gameMakerStudioThirdLineRegex = regex.MustCompile(`\"modelName\"\:\s*\"GM`)
- )
-
- func isGeneratedGameMakerStudio(_, ext string, content []byte) bool {
- if ext != ".yy" && ext != ".yyp" {
- return false
- }
-
- lines := getLines(content, 3)
- if len(lines) < 3 {
- return false
- }
-
- return gameMakerStudioThirdLineRegex.Match(lines[2]) ||
- gameMakerStudioFirstLineRegex.Match(lines[0])
- }
-
- var gimpRegexes = []regex.EnryRegexp{
- regex.MustCompile(`\/\* GIMP [a-zA-Z0-9\- ]+ C\-Source image dump \(.+?\.c\) \*\/`),
- regex.MustCompile(`\/\* GIMP header image file format \([a-zA-Z0-9\- ]+\)\: .+?\.h \*\/`),
- }
-
- func isGeneratedGimp(_, ext string, content []byte) bool {
- if ext != ".c" && ext != ".h" {
- return false
- }
-
- lines := getLines(content, 1)
- if len(lines) < 1 {
- return false
- }
-
- for _, r := range gimpRegexes {
- if r.Match(lines[0]) {
- return true
- }
- }
-
- return false
- }
-
- func isGeneratedVisualStudio6(_, ext string, content []byte) bool {
- if ext != ".dsp" {
- return false
- }
-
- for _, l := range getLines(content, 3) {
- if bytes.Contains(l, []byte("# Microsoft Developer Studio Generated Build File")) {
- return true
- }
- }
-
- return false
- }
-
- var haxeExtensions = map[string]struct{}{
- ".js": {},
- ".py": {},
- ".lua": {},
- ".cpp": {},
- ".h": {},
- ".java": {},
- ".cs": {},
- ".php": {},
- }
-
- func isGeneratedHaxe(_, ext string, content []byte) bool {
- if _, ok := haxeExtensions[ext]; !ok {
- return false
- }
-
- for _, l := range getLines(content, 3) {
- if bytes.Contains(l, []byte("Generated by Haxe")) {
- return true
- }
- }
-
- return false
- }
-
- var (
- doxygenRegex = regex.MustCompile(`<!--\s+Generated by Doxygen\s+[.0-9]+\s*-->`)
- htmlMetaRegex = regex.MustCompile(`<meta(\s+[^>]+)>`)
- htmlMetaContentRegex = regex.MustCompile(`\s+(name|content|value)\s*=\s*("[^"]+"|'[^']+'|[^\s"']+)`)
- orgModeMetaRegex = regex.MustCompile(`org\s+mode`)
- )
-
- func isGeneratedHTML(_, ext string, content []byte) bool {
- if ext != ".html" && ext != ".htm" && ext != ".xhtml" {
- return false
- }
-
- lines := getLines(content, 30)
-
- // Pkgdown
- if len(lines) >= 2 {
- for _, l := range lines[:2] {
- if bytes.Contains(l, []byte("<!-- Generated by pkgdown: do not edit by hand -->")) {
- return true
- }
- }
- }
-
- // Mandoc
- if len(lines) > 2 &&
- bytes.HasPrefix(lines[2], []byte("<!-- This is an automatically generated file.")) {
- return true
- }
-
- // Doxygen
- for _, l := range lines {
- if doxygenRegex.Match(l) {
- return true
- }
- }
-
- // HTML tag: <meta name="generator" content="" />
- part := bytes.ToLower(bytes.Join(lines, []byte{' '}))
- part = bytes.ReplaceAll(part, []byte{'\n'}, []byte{})
- part = bytes.ReplaceAll(part, []byte{'\r'}, []byte{})
- matches := htmlMetaRegex.FindAll(part, -1)
- if len(matches) == 0 {
- return false
- }
-
- for _, m := range matches {
- var name, value, content string
- ms := htmlMetaContentRegex.FindAllStringSubmatch(string(m), -1)
- for _, m := range ms {
- switch m[1] {
- case "name":
- name = m[2]
- case "value":
- value = m[2]
- case "content":
- content = m[2]
- }
- }
-
- var val = value
- if val == "" {
- val = content
- }
-
- name = strings.Trim(name, `"'`)
- val = strings.Trim(val, `"'`)
-
- if name != "generator" || val == "" {
- continue
- }
-
- if strings.Contains(val, "jlatex2html") ||
- strings.Contains(val, "latex2html") ||
- strings.Contains(val, "groff") ||
- strings.Contains(val, "makeinfo") ||
- strings.Contains(val, "texi2html") ||
- strings.Contains(val, "ronn") ||
- orgModeMetaRegex.MatchString(val) {
- return true
- }
- }
-
- return false
- }
-
- func isGeneratedJooq(_, ext string, content []byte) bool {
- if ext != ".java" {
- return false
- }
-
- for _, l := range getLines(content, 2) {
- if bytes.Contains(l, []byte("This file is generated by jOOQ.")) {
- return true
- }
- }
-
- return false
- }
-
- func getFirstLine(content []byte) []byte {
- lines := getLines(content, 1)
- if len(lines) > 0 {
- return lines[0]
- }
- return nil
- }
-
- // getLines returns up to the first n lines. A negative index will return up to
- // the last n lines in reverse order.
- func getLines(content []byte, n int) [][]byte {
- var result [][]byte
- if n < 0 {
- for pos := len(content); pos > 0 && len(result) < -n; {
- nlpos := bytes.LastIndexByte(content[:pos], '\n')
- if nlpos+1 < len(content)-1 {
- result = append(result, content[nlpos+1:pos])
- }
- pos = nlpos
- }
- } else {
- for pos := 0; pos < len(content) && len(result) < n; {
- nlpos := bytes.IndexByte(content[pos:], '\n')
- if nlpos < 0 && pos < len(content) {
- nlpos = len(content)
- } else if nlpos >= 0 {
- nlpos += pos
- }
-
- result = append(result, content[pos:nlpos])
- pos = nlpos + 1
- }
- }
-
- return result
- }
-
- func forEachLine(content []byte, cb func([]byte)) {
- var pos int
- for pos < len(content) {
- nlpos := bytes.IndexByte(content[pos:], '\n')
- if nlpos < 0 && pos < len(content) {
- nlpos = len(content)
- } else if nlpos >= 0 {
- nlpos += pos
- }
-
- cb(content[pos:nlpos])
- pos = nlpos + 1
- }
- }
-
- func countAppearancesInLine(line []byte, targets ...string) int {
- var count int
- for _, t := range targets {
- count += bytes.Count(line, []byte(t))
- }
- return count
- }
|