123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592 |
- // Copyright 2019 The Gitea Authors. All rights reserved.
- // SPDX-License-Identifier: MIT
-
- package references
-
- import (
- "bytes"
- "net/url"
- "regexp"
- "strconv"
- "strings"
- "sync"
-
- "code.gitea.io/gitea/modules/log"
- "code.gitea.io/gitea/modules/markup/mdstripper"
- "code.gitea.io/gitea/modules/setting"
-
- "github.com/yuin/goldmark/util"
- )
-
- var (
- // validNamePattern performs only the most basic validation for user or repository names
- // Repository name should contain only alphanumeric, dash ('-'), underscore ('_') and dot ('.') characters.
- validNamePattern = regexp.MustCompile(`^[a-z0-9_.-]+$`)
-
- // NOTE: All below regex matching do not perform any extra validation.
- // Thus a link is produced even if the linked entity does not exist.
- // While fast, this is also incorrect and lead to false positives.
- // TODO: fix invalid linking issue
-
- // mentionPattern matches all mentions in the form of "@user" or "@org/team"
- mentionPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@[0-9a-zA-Z-_]+|@[0-9a-zA-Z-_]+\/?[0-9a-zA-Z-_]+|@[0-9a-zA-Z-_][0-9a-zA-Z-_.]+\/?[0-9a-zA-Z-_.]+[0-9a-zA-Z-_])(?:\s|[:,;.?!]\s|[:,;.?!]?$|\)|\])`)
- // issueNumericPattern matches string that references to a numeric issue, e.g. #1287
- issueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[|\')([#!][0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
- // issueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-1234
- issueAlphanumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([A-Z]{1,10}-[1-9][0-9]*)(?:\s|$|\)|\]|:|\.(\s|$))`)
- // crossReferenceIssueNumericPattern matches string that references a numeric issue in a different repository
- // e.g. org/repo#12345
- crossReferenceIssueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+[#!][0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
- // crossReferenceCommitPattern matches a string that references a commit in a different repository
- // e.g. go-gitea/gitea@d8a994ef, go-gitea/gitea@d8a994ef243349f321568f9e36d5c3f444b99cae (7-40 characters)
- crossReferenceCommitPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-zA-Z-_\.]+)/([0-9a-zA-Z-_\.]+)@([0-9a-f]{7,40})(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
- // spaceTrimmedPattern let's find the trailing space
- spaceTrimmedPattern = regexp.MustCompile(`(?:.*[0-9a-zA-Z-_])\s`)
- // timeLogPattern matches string for time tracking
- timeLogPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@([0-9]+([\.,][0-9]+)?(w|d|m|h))+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
-
- issueCloseKeywordsPat, issueReopenKeywordsPat *regexp.Regexp
- issueKeywordsOnce sync.Once
-
- giteaHostInit sync.Once
- giteaHost string
- giteaIssuePullPattern *regexp.Regexp
-
- actionStrings = []string{
- "none",
- "closes",
- "reopens",
- "neutered",
- }
- )
-
- // XRefAction represents the kind of effect a cross reference has once is resolved
- type XRefAction int64
-
- const (
- // XRefActionNone means the cross-reference is simply a comment
- XRefActionNone XRefAction = iota // 0
- // XRefActionCloses means the cross-reference should close an issue if it is resolved
- XRefActionCloses // 1
- // XRefActionReopens means the cross-reference should reopen an issue if it is resolved
- XRefActionReopens // 2
- // XRefActionNeutered means the cross-reference will no longer affect the source
- XRefActionNeutered // 3
- )
-
- func (a XRefAction) String() string {
- return actionStrings[a]
- }
-
- // IssueReference contains an unverified cross-reference to a local issue or pull request
- type IssueReference struct {
- Index int64
- Owner string
- Name string
- Action XRefAction
- TimeLog string
- }
-
- // RenderizableReference contains an unverified cross-reference to with rendering information
- // The IsPull member means that a `!num` reference was used instead of `#num`.
- // This kind of reference is used to make pulls available when an external issue tracker
- // is used. Otherwise, `#` and `!` are completely interchangeable.
- type RenderizableReference struct {
- Issue string
- Owner string
- Name string
- CommitSha string
- IsPull bool
- RefLocation *RefSpan
- Action XRefAction
- ActionLocation *RefSpan
- }
-
- type rawReference struct {
- index int64
- owner string
- name string
- isPull bool
- action XRefAction
- issue string
- refLocation *RefSpan
- actionLocation *RefSpan
- timeLog string
- }
-
- func rawToIssueReferenceList(reflist []*rawReference) []IssueReference {
- refarr := make([]IssueReference, len(reflist))
- for i, r := range reflist {
- refarr[i] = IssueReference{
- Index: r.index,
- Owner: r.owner,
- Name: r.name,
- Action: r.action,
- TimeLog: r.timeLog,
- }
- }
- return refarr
- }
-
- // RefSpan is the position where the reference was found within the parsed text
- type RefSpan struct {
- Start int
- End int
- }
-
- func makeKeywordsPat(words []string) *regexp.Regexp {
- acceptedWords := parseKeywords(words)
- if len(acceptedWords) == 0 {
- // Never match
- return nil
- }
- return regexp.MustCompile(`(?i)(?:\s|^|\(|\[)(` + strings.Join(acceptedWords, `|`) + `):? $`)
- }
-
- func parseKeywords(words []string) []string {
- acceptedWords := make([]string, 0, 5)
- wordPat := regexp.MustCompile(`^[\pL]+$`)
- for _, word := range words {
- word = strings.ToLower(strings.TrimSpace(word))
- // Accept Unicode letter class runes (a-z, á, à, ä, )
- if wordPat.MatchString(word) {
- acceptedWords = append(acceptedWords, word)
- } else {
- log.Info("Invalid keyword: %s", word)
- }
- }
- return acceptedWords
- }
-
- func newKeywords() {
- issueKeywordsOnce.Do(func() {
- // Delay initialization until after the settings module is initialized
- doNewKeywords(setting.Repository.PullRequest.CloseKeywords, setting.Repository.PullRequest.ReopenKeywords)
- })
- }
-
- func doNewKeywords(close, reopen []string) {
- issueCloseKeywordsPat = makeKeywordsPat(close)
- issueReopenKeywordsPat = makeKeywordsPat(reopen)
- }
-
- // getGiteaHostName returns a normalized string with the local host name, with no scheme or port information
- func getGiteaHostName() string {
- giteaHostInit.Do(func() {
- if uapp, err := url.Parse(setting.AppURL); err == nil {
- giteaHost = strings.ToLower(uapp.Host)
- giteaIssuePullPattern = regexp.MustCompile(
- `(\s|^|\(|\[)` +
- regexp.QuoteMeta(strings.TrimSpace(setting.AppURL)) +
- `([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+)/` +
- `((?:issues)|(?:pulls))/([0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
- } else {
- giteaHost = ""
- giteaIssuePullPattern = nil
- }
- })
- return giteaHost
- }
-
- // getGiteaIssuePullPattern
- func getGiteaIssuePullPattern() *regexp.Regexp {
- getGiteaHostName()
- return giteaIssuePullPattern
- }
-
- // FindAllMentionsMarkdown matches mention patterns in given content and
- // returns a list of found unvalidated user names **not including** the @ prefix.
- func FindAllMentionsMarkdown(content string) []string {
- bcontent, _ := mdstripper.StripMarkdownBytes([]byte(content))
- locations := FindAllMentionsBytes(bcontent)
- mentions := make([]string, len(locations))
- for i, val := range locations {
- mentions[i] = string(bcontent[val.Start+1 : val.End])
- }
- return mentions
- }
-
- // FindAllMentionsBytes matches mention patterns in given content
- // and returns a list of locations for the unvalidated user names, including the @ prefix.
- func FindAllMentionsBytes(content []byte) []RefSpan {
- // Sadly we can't use FindAllSubmatchIndex because our pattern checks for starting and
- // trailing spaces (\s@mention,\s), so if we get two consecutive references, the space
- // from the second reference will be "eaten" by the first one:
- // ...\s@mention1\s@mention2\s... --> ...`\s@mention1\s`, (not) `@mention2,\s...`
- ret := make([]RefSpan, 0, 5)
- pos := 0
- for {
- match := mentionPattern.FindSubmatchIndex(content[pos:])
- if match == nil {
- break
- }
- ret = append(ret, RefSpan{Start: match[2] + pos, End: match[3] + pos})
- notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos])
- if notrail == nil {
- pos = match[3] + pos
- } else {
- pos = match[3] + pos + notrail[1] - notrail[3]
- }
- }
- return ret
- }
-
- // FindFirstMentionBytes matches the first mention in then given content
- // and returns the location of the unvalidated user name, including the @ prefix.
- func FindFirstMentionBytes(content []byte) (bool, RefSpan) {
- mention := mentionPattern.FindSubmatchIndex(content)
- if mention == nil {
- return false, RefSpan{}
- }
- return true, RefSpan{Start: mention[2], End: mention[3]}
- }
-
- // FindAllIssueReferencesMarkdown strips content from markdown markup
- // and returns a list of unvalidated references found in it.
- func FindAllIssueReferencesMarkdown(content string) []IssueReference {
- return rawToIssueReferenceList(findAllIssueReferencesMarkdown(content))
- }
-
- func findAllIssueReferencesMarkdown(content string) []*rawReference {
- bcontent, links := mdstripper.StripMarkdownBytes([]byte(content))
- return findAllIssueReferencesBytes(bcontent, links)
- }
-
- func convertFullHTMLReferencesToShortRefs(re *regexp.Regexp, contentBytes *[]byte) {
- // We will iterate through the content, rewrite and simplify full references.
- //
- // We want to transform something like:
- //
- // this is a https://ourgitea.com/git/owner/repo/issues/123456789, foo
- // https://ourgitea.com/git/owner/repo/pulls/123456789
- //
- // Into something like:
- //
- // this is a #123456789, foo
- // !123456789
-
- pos := 0
- for {
- // re looks for something like: (\s|^|\(|\[)https://ourgitea.com/git/(owner/repo)/(issues)/(123456789)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)
- match := re.FindSubmatchIndex((*contentBytes)[pos:])
- if match == nil {
- break
- }
- // match is a bunch of indices into the content from pos onwards so
- // to simplify things let's just add pos to all of the indices in match
- for i := range match {
- match[i] += pos
- }
-
- // match[0]-match[1] is whole string
- // match[2]-match[3] is preamble
-
- // move the position to the end of the preamble
- pos = match[3]
-
- // match[4]-match[5] is owner/repo
- // now copy the owner/repo to end of the preamble
- endPos := pos + match[5] - match[4]
- copy((*contentBytes)[pos:endPos], (*contentBytes)[match[4]:match[5]])
-
- // move the current position to the end of the newly copied owner/repo
- pos = endPos
-
- // Now set the issue/pull marker:
- //
- // match[6]-match[7] == 'issues'
- (*contentBytes)[pos] = '#'
- if string((*contentBytes)[match[6]:match[7]]) == "pulls" {
- (*contentBytes)[pos] = '!'
- }
- pos++
-
- // Then add the issue/pull number
- //
- // match[8]-match[9] is the number
- endPos = pos + match[9] - match[8]
- copy((*contentBytes)[pos:endPos], (*contentBytes)[match[8]:match[9]])
-
- // Now copy what's left at the end of the string to the new end position
- copy((*contentBytes)[endPos:], (*contentBytes)[match[9]:])
- // now we reset the length
-
- // our new section has length endPos - match[3]
- // our old section has length match[9] - match[3]
- *contentBytes = (*contentBytes)[:len(*contentBytes)-match[9]+endPos]
- pos = endPos
- }
- }
-
- // FindAllIssueReferences returns a list of unvalidated references found in a string.
- func FindAllIssueReferences(content string) []IssueReference {
- // Need to convert fully qualified html references to local system to #/! short codes
- contentBytes := []byte(content)
- if re := getGiteaIssuePullPattern(); re != nil {
- convertFullHTMLReferencesToShortRefs(re, &contentBytes)
- } else {
- log.Debug("No GiteaIssuePullPattern pattern")
- }
- return rawToIssueReferenceList(findAllIssueReferencesBytes(contentBytes, []string{}))
- }
-
- // FindRenderizableReferenceNumeric returns the first unvalidated reference found in a string.
- func FindRenderizableReferenceNumeric(content string, prOnly bool) (bool, *RenderizableReference) {
- match := issueNumericPattern.FindStringSubmatchIndex(content)
- if match == nil {
- if match = crossReferenceIssueNumericPattern.FindStringSubmatchIndex(content); match == nil {
- return false, nil
- }
- }
- r := getCrossReference(util.StringToReadOnlyBytes(content), match[2], match[3], false, prOnly)
- if r == nil {
- return false, nil
- }
-
- return true, &RenderizableReference{
- Issue: r.issue,
- Owner: r.owner,
- Name: r.name,
- IsPull: r.isPull,
- RefLocation: r.refLocation,
- Action: r.action,
- ActionLocation: r.actionLocation,
- }
- }
-
- // FindRenderizableCommitCrossReference returns the first unvalidated commit cross reference found in a string.
- func FindRenderizableCommitCrossReference(content string) (bool, *RenderizableReference) {
- m := crossReferenceCommitPattern.FindStringSubmatchIndex(content)
- if len(m) < 8 {
- return false, nil
- }
-
- return true, &RenderizableReference{
- Owner: content[m[2]:m[3]],
- Name: content[m[4]:m[5]],
- CommitSha: content[m[6]:m[7]],
- RefLocation: &RefSpan{Start: m[2], End: m[7]},
- }
- }
-
- // FindRenderizableReferenceRegexp returns the first regexp unvalidated references found in a string.
- func FindRenderizableReferenceRegexp(content string, pattern *regexp.Regexp) (bool, *RenderizableReference) {
- match := pattern.FindStringSubmatchIndex(content)
- if len(match) < 4 {
- return false, nil
- }
-
- action, location := findActionKeywords([]byte(content), match[2])
-
- return true, &RenderizableReference{
- Issue: content[match[2]:match[3]],
- RefLocation: &RefSpan{Start: match[0], End: match[1]},
- Action: action,
- ActionLocation: location,
- IsPull: false,
- }
- }
-
- // FindRenderizableReferenceAlphanumeric returns the first alphanumeric unvalidated references found in a string.
- func FindRenderizableReferenceAlphanumeric(content string) (bool, *RenderizableReference) {
- match := issueAlphanumericPattern.FindStringSubmatchIndex(content)
- if match == nil {
- return false, nil
- }
-
- action, location := findActionKeywords([]byte(content), match[2])
-
- return true, &RenderizableReference{
- Issue: content[match[2]:match[3]],
- RefLocation: &RefSpan{Start: match[2], End: match[3]},
- Action: action,
- ActionLocation: location,
- IsPull: false,
- }
- }
-
- // FindAllIssueReferencesBytes returns a list of unvalidated references found in a byte slice.
- func findAllIssueReferencesBytes(content []byte, links []string) []*rawReference {
- ret := make([]*rawReference, 0, 10)
- pos := 0
-
- // Sadly we can't use FindAllSubmatchIndex because our pattern checks for starting and
- // trailing spaces (\s#ref,\s), so if we get two consecutive references, the space
- // from the second reference will be "eaten" by the first one:
- // ...\s#ref1\s#ref2\s... --> ...`\s#ref1\s`, (not) `#ref2,\s...`
- for {
- match := issueNumericPattern.FindSubmatchIndex(content[pos:])
- if match == nil {
- break
- }
- if ref := getCrossReference(content, match[2]+pos, match[3]+pos, false, false); ref != nil {
- ret = append(ret, ref)
- }
- notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos])
- if notrail == nil {
- pos = match[3] + pos
- } else {
- pos = match[3] + pos + notrail[1] - notrail[3]
- }
- }
-
- pos = 0
-
- for {
- match := crossReferenceIssueNumericPattern.FindSubmatchIndex(content[pos:])
- if match == nil {
- break
- }
- if ref := getCrossReference(content, match[2]+pos, match[3]+pos, false, false); ref != nil {
- ret = append(ret, ref)
- }
- notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos])
- if notrail == nil {
- pos = match[3] + pos
- } else {
- pos = match[3] + pos + notrail[1] - notrail[3]
- }
- }
-
- localhost := getGiteaHostName()
- for _, link := range links {
- if u, err := url.Parse(link); err == nil {
- // Note: we're not attempting to match the URL scheme (http/https)
- host := strings.ToLower(u.Host)
- if host != "" && host != localhost {
- continue
- }
- parts := strings.Split(u.EscapedPath(), "/")
- // /user/repo/issues/3
- if len(parts) != 5 || parts[0] != "" {
- continue
- }
- var sep string
- if parts[3] == "issues" {
- sep = "#"
- } else if parts[3] == "pulls" {
- sep = "!"
- } else {
- continue
- }
- // Note: closing/reopening keywords not supported with URLs
- bytes := []byte(parts[1] + "/" + parts[2] + sep + parts[4])
- if ref := getCrossReference(bytes, 0, len(bytes), true, false); ref != nil {
- ref.refLocation = nil
- ret = append(ret, ref)
- }
- }
- }
-
- if len(ret) == 0 {
- return ret
- }
-
- pos = 0
-
- for {
- match := timeLogPattern.FindSubmatchIndex(content[pos:])
- if match == nil {
- break
- }
-
- timeLogEntry := string(content[match[2]+pos+1 : match[3]+pos])
-
- var f *rawReference
- for _, ref := range ret {
- if ref.refLocation != nil && ref.refLocation.End < match[2]+pos && (f == nil || f.refLocation.End < ref.refLocation.End) {
- f = ref
- }
- }
-
- pos = match[1] + pos
-
- if f == nil {
- f = ret[0]
- }
-
- if len(f.timeLog) == 0 {
- f.timeLog = timeLogEntry
- }
- }
-
- return ret
- }
-
- func getCrossReference(content []byte, start, end int, fromLink, prOnly bool) *rawReference {
- sep := bytes.IndexAny(content[start:end], "#!")
- if sep < 0 {
- return nil
- }
- isPull := content[start+sep] == '!'
- if prOnly && !isPull {
- return nil
- }
- repo := string(content[start : start+sep])
- issue := string(content[start+sep+1 : end])
- index, err := strconv.ParseInt(issue, 10, 64)
- if err != nil {
- return nil
- }
- if repo == "" {
- if fromLink {
- // Markdown links must specify owner/repo
- return nil
- }
- action, location := findActionKeywords(content, start)
- return &rawReference{
- index: index,
- action: action,
- issue: issue,
- isPull: isPull,
- refLocation: &RefSpan{Start: start, End: end},
- actionLocation: location,
- }
- }
- parts := strings.Split(strings.ToLower(repo), "/")
- if len(parts) != 2 {
- return nil
- }
- owner, name := parts[0], parts[1]
- if !validNamePattern.MatchString(owner) || !validNamePattern.MatchString(name) {
- return nil
- }
- action, location := findActionKeywords(content, start)
- return &rawReference{
- index: index,
- owner: owner,
- name: name,
- action: action,
- issue: issue,
- isPull: isPull,
- refLocation: &RefSpan{Start: start, End: end},
- actionLocation: location,
- }
- }
-
- func findActionKeywords(content []byte, start int) (XRefAction, *RefSpan) {
- newKeywords()
- var m []int
- if issueCloseKeywordsPat != nil {
- m = issueCloseKeywordsPat.FindSubmatchIndex(content[:start])
- if m != nil {
- return XRefActionCloses, &RefSpan{Start: m[2], End: m[3]}
- }
- }
- if issueReopenKeywordsPat != nil {
- m = issueReopenKeywordsPat.FindSubmatchIndex(content[:start])
- if m != nil {
- return XRefActionReopens, &RefSpan{Start: m[2], End: m[3]}
- }
- }
- return XRefActionNone, nil
- }
-
- // IsXrefActionable returns true if the xref action is actionable (i.e. produces a result when resolved)
- func IsXrefActionable(ref *RenderizableReference, extTracker bool) bool {
- if extTracker {
- // External issues cannot be automatically closed
- return false
- }
- return ref.Action == XRefActionCloses || ref.Action == XRefActionReopens
- }
|