You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

references.go 12KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380
  1. // Copyright 2019 The Gitea Authors. All rights reserved.
  2. // Use of this source code is governed by a MIT-style
  3. // license that can be found in the LICENSE file.
  4. package references
  5. import (
  6. "net/url"
  7. "regexp"
  8. "strconv"
  9. "strings"
  10. "sync"
  11. "code.gitea.io/gitea/modules/log"
  12. "code.gitea.io/gitea/modules/markup/mdstripper"
  13. "code.gitea.io/gitea/modules/setting"
  14. )
  15. var (
  16. // validNamePattern performs only the most basic validation for user or repository names
  17. // Repository name should contain only alphanumeric, dash ('-'), underscore ('_') and dot ('.') characters.
  18. validNamePattern = regexp.MustCompile(`^[a-z0-9_.-]+$`)
  19. // NOTE: All below regex matching do not perform any extra validation.
  20. // Thus a link is produced even if the linked entity does not exist.
  21. // While fast, this is also incorrect and lead to false positives.
  22. // TODO: fix invalid linking issue
  23. // mentionPattern matches all mentions in the form of "@user"
  24. mentionPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@[0-9a-zA-Z-_]+|@[0-9a-zA-Z-_][0-9a-zA-Z-_.]+[0-9a-zA-Z-_])(?:\s|[:,;.?!]\s|[:,;.?!]?$|\)|\])`)
  25. // issueNumericPattern matches string that references to a numeric issue, e.g. #1287
  26. issueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([#!][0-9]+)(?:\s|$|\)|\]|:|\.(\s|$))`)
  27. // issueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-1234
  28. issueAlphanumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([A-Z]{1,10}-[1-9][0-9]*)(?:\s|$|\)|\]|:|\.(\s|$))`)
  29. // crossReferenceIssueNumericPattern matches string that references a numeric issue in a different repository
  30. // e.g. gogits/gogs#12345
  31. crossReferenceIssueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+[#!][0-9]+)(?:\s|$|\)|\]|\.(\s|$))`)
  32. issueCloseKeywordsPat, issueReopenKeywordsPat *regexp.Regexp
  33. issueKeywordsOnce sync.Once
  34. giteaHostInit sync.Once
  35. giteaHost string
  36. )
  37. // XRefAction represents the kind of effect a cross reference has once is resolved
  38. type XRefAction int64
  39. const (
  40. // XRefActionNone means the cross-reference is simply a comment
  41. XRefActionNone XRefAction = iota // 0
  42. // XRefActionCloses means the cross-reference should close an issue if it is resolved
  43. XRefActionCloses // 1
  44. // XRefActionReopens means the cross-reference should reopen an issue if it is resolved
  45. XRefActionReopens // 2
  46. // XRefActionNeutered means the cross-reference will no longer affect the source
  47. XRefActionNeutered // 3
  48. )
  49. // IssueReference contains an unverified cross-reference to a local issue or pull request
  50. type IssueReference struct {
  51. Index int64
  52. Owner string
  53. Name string
  54. Action XRefAction
  55. }
  56. // RenderizableReference contains an unverified cross-reference to with rendering information
  57. // The IsPull member means that a `!num` reference was used instead of `#num`.
  58. // This kind of reference is used to make pulls available when an external issue tracker
  59. // is used. Otherwise, `#` and `!` are completely interchangeable.
  60. type RenderizableReference struct {
  61. Issue string
  62. Owner string
  63. Name string
  64. IsPull bool
  65. RefLocation *RefSpan
  66. Action XRefAction
  67. ActionLocation *RefSpan
  68. }
  69. type rawReference struct {
  70. index int64
  71. owner string
  72. name string
  73. isPull bool
  74. action XRefAction
  75. issue string
  76. refLocation *RefSpan
  77. actionLocation *RefSpan
  78. }
  79. func rawToIssueReferenceList(reflist []*rawReference) []IssueReference {
  80. refarr := make([]IssueReference, len(reflist))
  81. for i, r := range reflist {
  82. refarr[i] = IssueReference{
  83. Index: r.index,
  84. Owner: r.owner,
  85. Name: r.name,
  86. Action: r.action,
  87. }
  88. }
  89. return refarr
  90. }
  91. // RefSpan is the position where the reference was found within the parsed text
  92. type RefSpan struct {
  93. Start int
  94. End int
  95. }
  96. func makeKeywordsPat(words []string) *regexp.Regexp {
  97. acceptedWords := parseKeywords(words)
  98. if len(acceptedWords) == 0 {
  99. // Never match
  100. return nil
  101. }
  102. return regexp.MustCompile(`(?i)(?:\s|^|\(|\[)(` + strings.Join(acceptedWords, `|`) + `):? $`)
  103. }
  104. func parseKeywords(words []string) []string {
  105. acceptedWords := make([]string, 0, 5)
  106. wordPat := regexp.MustCompile(`^[\pL]+$`)
  107. for _, word := range words {
  108. word = strings.ToLower(strings.TrimSpace(word))
  109. // Accept Unicode letter class runes (a-z, á, à, ä, )
  110. if wordPat.MatchString(word) {
  111. acceptedWords = append(acceptedWords, word)
  112. } else {
  113. log.Info("Invalid keyword: %s", word)
  114. }
  115. }
  116. return acceptedWords
  117. }
  118. func newKeywords() {
  119. issueKeywordsOnce.Do(func() {
  120. // Delay initialization until after the settings module is initialized
  121. doNewKeywords(setting.Repository.PullRequest.CloseKeywords, setting.Repository.PullRequest.ReopenKeywords)
  122. })
  123. }
  124. func doNewKeywords(close []string, reopen []string) {
  125. issueCloseKeywordsPat = makeKeywordsPat(close)
  126. issueReopenKeywordsPat = makeKeywordsPat(reopen)
  127. }
  128. // getGiteaHostName returns a normalized string with the local host name, with no scheme or port information
  129. func getGiteaHostName() string {
  130. giteaHostInit.Do(func() {
  131. if uapp, err := url.Parse(setting.AppURL); err == nil {
  132. giteaHost = strings.ToLower(uapp.Host)
  133. } else {
  134. giteaHost = ""
  135. }
  136. })
  137. return giteaHost
  138. }
  139. // FindAllMentionsMarkdown matches mention patterns in given content and
  140. // returns a list of found unvalidated user names **not including** the @ prefix.
  141. func FindAllMentionsMarkdown(content string) []string {
  142. bcontent, _ := mdstripper.StripMarkdownBytes([]byte(content))
  143. locations := FindAllMentionsBytes(bcontent)
  144. mentions := make([]string, len(locations))
  145. for i, val := range locations {
  146. mentions[i] = string(bcontent[val.Start+1 : val.End])
  147. }
  148. return mentions
  149. }
  150. // FindAllMentionsBytes matches mention patterns in given content
  151. // and returns a list of locations for the unvalidated user names, including the @ prefix.
  152. func FindAllMentionsBytes(content []byte) []RefSpan {
  153. mentions := mentionPattern.FindAllSubmatchIndex(content, -1)
  154. ret := make([]RefSpan, len(mentions))
  155. for i, val := range mentions {
  156. ret[i] = RefSpan{Start: val[2], End: val[3]}
  157. }
  158. return ret
  159. }
  160. // FindFirstMentionBytes matches the first mention in then given content
  161. // and returns the location of the unvalidated user name, including the @ prefix.
  162. func FindFirstMentionBytes(content []byte) (bool, RefSpan) {
  163. mention := mentionPattern.FindSubmatchIndex(content)
  164. if mention == nil {
  165. return false, RefSpan{}
  166. }
  167. return true, RefSpan{Start: mention[2], End: mention[3]}
  168. }
  169. // FindAllIssueReferencesMarkdown strips content from markdown markup
  170. // and returns a list of unvalidated references found in it.
  171. func FindAllIssueReferencesMarkdown(content string) []IssueReference {
  172. return rawToIssueReferenceList(findAllIssueReferencesMarkdown(content))
  173. }
  174. func findAllIssueReferencesMarkdown(content string) []*rawReference {
  175. bcontent, links := mdstripper.StripMarkdownBytes([]byte(content))
  176. return findAllIssueReferencesBytes(bcontent, links)
  177. }
  178. // FindAllIssueReferences returns a list of unvalidated references found in a string.
  179. func FindAllIssueReferences(content string) []IssueReference {
  180. return rawToIssueReferenceList(findAllIssueReferencesBytes([]byte(content), []string{}))
  181. }
  182. // FindRenderizableReferenceNumeric returns the first unvalidated reference found in a string.
  183. func FindRenderizableReferenceNumeric(content string, prOnly bool) (bool, *RenderizableReference) {
  184. match := issueNumericPattern.FindStringSubmatchIndex(content)
  185. if match == nil {
  186. if match = crossReferenceIssueNumericPattern.FindStringSubmatchIndex(content); match == nil {
  187. return false, nil
  188. }
  189. }
  190. r := getCrossReference([]byte(content), match[2], match[3], false, prOnly)
  191. if r == nil {
  192. return false, nil
  193. }
  194. return true, &RenderizableReference{
  195. Issue: r.issue,
  196. Owner: r.owner,
  197. Name: r.name,
  198. IsPull: r.isPull,
  199. RefLocation: r.refLocation,
  200. Action: r.action,
  201. ActionLocation: r.actionLocation,
  202. }
  203. }
  204. // FindRenderizableReferenceAlphanumeric returns the first alphanumeric unvalidated references found in a string.
  205. func FindRenderizableReferenceAlphanumeric(content string) (bool, *RenderizableReference) {
  206. match := issueAlphanumericPattern.FindStringSubmatchIndex(content)
  207. if match == nil {
  208. return false, nil
  209. }
  210. action, location := findActionKeywords([]byte(content), match[2])
  211. return true, &RenderizableReference{
  212. Issue: string(content[match[2]:match[3]]),
  213. RefLocation: &RefSpan{Start: match[2], End: match[3]},
  214. Action: action,
  215. ActionLocation: location,
  216. IsPull: false,
  217. }
  218. }
  219. // FindAllIssueReferencesBytes returns a list of unvalidated references found in a byte slice.
  220. func findAllIssueReferencesBytes(content []byte, links []string) []*rawReference {
  221. ret := make([]*rawReference, 0, 10)
  222. matches := issueNumericPattern.FindAllSubmatchIndex(content, -1)
  223. for _, match := range matches {
  224. if ref := getCrossReference(content, match[2], match[3], false, false); ref != nil {
  225. ret = append(ret, ref)
  226. }
  227. }
  228. matches = crossReferenceIssueNumericPattern.FindAllSubmatchIndex(content, -1)
  229. for _, match := range matches {
  230. if ref := getCrossReference(content, match[2], match[3], false, false); ref != nil {
  231. ret = append(ret, ref)
  232. }
  233. }
  234. localhost := getGiteaHostName()
  235. for _, link := range links {
  236. if u, err := url.Parse(link); err == nil {
  237. // Note: we're not attempting to match the URL scheme (http/https)
  238. host := strings.ToLower(u.Host)
  239. if host != "" && host != localhost {
  240. continue
  241. }
  242. parts := strings.Split(u.EscapedPath(), "/")
  243. // /user/repo/issues/3
  244. if len(parts) != 5 || parts[0] != "" {
  245. continue
  246. }
  247. var sep string
  248. if parts[3] == "issues" {
  249. sep = "#"
  250. } else if parts[3] == "pulls" {
  251. sep = "!"
  252. } else {
  253. continue
  254. }
  255. // Note: closing/reopening keywords not supported with URLs
  256. bytes := []byte(parts[1] + "/" + parts[2] + sep + parts[4])
  257. if ref := getCrossReference(bytes, 0, len(bytes), true, false); ref != nil {
  258. ref.refLocation = nil
  259. ret = append(ret, ref)
  260. }
  261. }
  262. }
  263. return ret
  264. }
  265. func getCrossReference(content []byte, start, end int, fromLink bool, prOnly bool) *rawReference {
  266. refid := string(content[start:end])
  267. sep := strings.IndexAny(refid, "#!")
  268. if sep < 0 {
  269. return nil
  270. }
  271. isPull := refid[sep] == '!'
  272. if prOnly && !isPull {
  273. return nil
  274. }
  275. repo := refid[:sep]
  276. issue := refid[sep+1:]
  277. index, err := strconv.ParseInt(issue, 10, 64)
  278. if err != nil {
  279. return nil
  280. }
  281. if repo == "" {
  282. if fromLink {
  283. // Markdown links must specify owner/repo
  284. return nil
  285. }
  286. action, location := findActionKeywords(content, start)
  287. return &rawReference{
  288. index: index,
  289. action: action,
  290. issue: issue,
  291. isPull: isPull,
  292. refLocation: &RefSpan{Start: start, End: end},
  293. actionLocation: location,
  294. }
  295. }
  296. parts := strings.Split(strings.ToLower(repo), "/")
  297. if len(parts) != 2 {
  298. return nil
  299. }
  300. owner, name := parts[0], parts[1]
  301. if !validNamePattern.MatchString(owner) || !validNamePattern.MatchString(name) {
  302. return nil
  303. }
  304. action, location := findActionKeywords(content, start)
  305. return &rawReference{
  306. index: index,
  307. owner: owner,
  308. name: name,
  309. action: action,
  310. issue: issue,
  311. isPull: isPull,
  312. refLocation: &RefSpan{Start: start, End: end},
  313. actionLocation: location,
  314. }
  315. }
  316. func findActionKeywords(content []byte, start int) (XRefAction, *RefSpan) {
  317. newKeywords()
  318. var m []int
  319. if issueCloseKeywordsPat != nil {
  320. m = issueCloseKeywordsPat.FindSubmatchIndex(content[:start])
  321. if m != nil {
  322. return XRefActionCloses, &RefSpan{Start: m[2], End: m[3]}
  323. }
  324. }
  325. if issueReopenKeywordsPat != nil {
  326. m = issueReopenKeywordsPat.FindSubmatchIndex(content[:start])
  327. if m != nil {
  328. return XRefActionReopens, &RefSpan{Start: m[2], End: m[3]}
  329. }
  330. }
  331. return XRefActionNone, nil
  332. }
  333. // IsXrefActionable returns true if the xref action is actionable (i.e. produces a result when resolved)
  334. func IsXrefActionable(ref *RenderizableReference, extTracker bool, alphaNum bool) bool {
  335. if extTracker {
  336. // External issues cannot be automatically closed
  337. return false
  338. }
  339. return ref.Action == XRefActionCloses || ref.Action == XRefActionReopens
  340. }