You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

references.go 13KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421
  1. // Copyright 2019 The Gitea Authors. All rights reserved.
  2. // Use of this source code is governed by a MIT-style
  3. // license that can be found in the LICENSE file.
  4. package references
  5. import (
  6. "net/url"
  7. "regexp"
  8. "strconv"
  9. "strings"
  10. "sync"
  11. "code.gitea.io/gitea/modules/log"
  12. "code.gitea.io/gitea/modules/markup/mdstripper"
  13. "code.gitea.io/gitea/modules/setting"
  14. )
  15. var (
  16. // validNamePattern performs only the most basic validation for user or repository names
  17. // Repository name should contain only alphanumeric, dash ('-'), underscore ('_') and dot ('.') characters.
  18. validNamePattern = regexp.MustCompile(`^[a-z0-9_.-]+$`)
  19. // NOTE: All below regex matching do not perform any extra validation.
  20. // Thus a link is produced even if the linked entity does not exist.
  21. // While fast, this is also incorrect and lead to false positives.
  22. // TODO: fix invalid linking issue
  23. // mentionPattern matches all mentions in the form of "@user"
  24. mentionPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@[0-9a-zA-Z-_]+|@[0-9a-zA-Z-_][0-9a-zA-Z-_.]+[0-9a-zA-Z-_])(?:\s|[:,;.?!]\s|[:,;.?!]?$|\)|\])`)
  25. // issueNumericPattern matches string that references to a numeric issue, e.g. #1287
  26. issueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([#!][0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
  27. // issueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-1234
  28. issueAlphanumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([A-Z]{1,10}-[1-9][0-9]*)(?:\s|$|\)|\]|:|\.(\s|$))`)
  29. // crossReferenceIssueNumericPattern matches string that references a numeric issue in a different repository
  30. // e.g. gogits/gogs#12345
  31. crossReferenceIssueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+[#!][0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
  32. // spaceTrimmedPattern let's us find the trailing space
  33. spaceTrimmedPattern = regexp.MustCompile(`(?:.*[0-9a-zA-Z-_])\s`)
  34. issueCloseKeywordsPat, issueReopenKeywordsPat *regexp.Regexp
  35. issueKeywordsOnce sync.Once
  36. giteaHostInit sync.Once
  37. giteaHost string
  38. )
  39. // XRefAction represents the kind of effect a cross reference has once is resolved
  40. type XRefAction int64
  41. const (
  42. // XRefActionNone means the cross-reference is simply a comment
  43. XRefActionNone XRefAction = iota // 0
  44. // XRefActionCloses means the cross-reference should close an issue if it is resolved
  45. XRefActionCloses // 1
  46. // XRefActionReopens means the cross-reference should reopen an issue if it is resolved
  47. XRefActionReopens // 2
  48. // XRefActionNeutered means the cross-reference will no longer affect the source
  49. XRefActionNeutered // 3
  50. )
  51. // IssueReference contains an unverified cross-reference to a local issue or pull request
  52. type IssueReference struct {
  53. Index int64
  54. Owner string
  55. Name string
  56. Action XRefAction
  57. }
  58. // RenderizableReference contains an unverified cross-reference to with rendering information
  59. // The IsPull member means that a `!num` reference was used instead of `#num`.
  60. // This kind of reference is used to make pulls available when an external issue tracker
  61. // is used. Otherwise, `#` and `!` are completely interchangeable.
  62. type RenderizableReference struct {
  63. Issue string
  64. Owner string
  65. Name string
  66. IsPull bool
  67. RefLocation *RefSpan
  68. Action XRefAction
  69. ActionLocation *RefSpan
  70. }
  71. type rawReference struct {
  72. index int64
  73. owner string
  74. name string
  75. isPull bool
  76. action XRefAction
  77. issue string
  78. refLocation *RefSpan
  79. actionLocation *RefSpan
  80. }
  81. func rawToIssueReferenceList(reflist []*rawReference) []IssueReference {
  82. refarr := make([]IssueReference, len(reflist))
  83. for i, r := range reflist {
  84. refarr[i] = IssueReference{
  85. Index: r.index,
  86. Owner: r.owner,
  87. Name: r.name,
  88. Action: r.action,
  89. }
  90. }
  91. return refarr
  92. }
  93. // RefSpan is the position where the reference was found within the parsed text
  94. type RefSpan struct {
  95. Start int
  96. End int
  97. }
  98. func makeKeywordsPat(words []string) *regexp.Regexp {
  99. acceptedWords := parseKeywords(words)
  100. if len(acceptedWords) == 0 {
  101. // Never match
  102. return nil
  103. }
  104. return regexp.MustCompile(`(?i)(?:\s|^|\(|\[)(` + strings.Join(acceptedWords, `|`) + `):? $`)
  105. }
  106. func parseKeywords(words []string) []string {
  107. acceptedWords := make([]string, 0, 5)
  108. wordPat := regexp.MustCompile(`^[\pL]+$`)
  109. for _, word := range words {
  110. word = strings.ToLower(strings.TrimSpace(word))
  111. // Accept Unicode letter class runes (a-z, á, à, ä, )
  112. if wordPat.MatchString(word) {
  113. acceptedWords = append(acceptedWords, word)
  114. } else {
  115. log.Info("Invalid keyword: %s", word)
  116. }
  117. }
  118. return acceptedWords
  119. }
  120. func newKeywords() {
  121. issueKeywordsOnce.Do(func() {
  122. // Delay initialization until after the settings module is initialized
  123. doNewKeywords(setting.Repository.PullRequest.CloseKeywords, setting.Repository.PullRequest.ReopenKeywords)
  124. })
  125. }
  126. func doNewKeywords(close []string, reopen []string) {
  127. issueCloseKeywordsPat = makeKeywordsPat(close)
  128. issueReopenKeywordsPat = makeKeywordsPat(reopen)
  129. }
  130. // getGiteaHostName returns a normalized string with the local host name, with no scheme or port information
  131. func getGiteaHostName() string {
  132. giteaHostInit.Do(func() {
  133. if uapp, err := url.Parse(setting.AppURL); err == nil {
  134. giteaHost = strings.ToLower(uapp.Host)
  135. } else {
  136. giteaHost = ""
  137. }
  138. })
  139. return giteaHost
  140. }
  141. // FindAllMentionsMarkdown matches mention patterns in given content and
  142. // returns a list of found unvalidated user names **not including** the @ prefix.
  143. func FindAllMentionsMarkdown(content string) []string {
  144. bcontent, _ := mdstripper.StripMarkdownBytes([]byte(content))
  145. locations := FindAllMentionsBytes(bcontent)
  146. mentions := make([]string, len(locations))
  147. for i, val := range locations {
  148. mentions[i] = string(bcontent[val.Start+1 : val.End])
  149. }
  150. return mentions
  151. }
  152. // FindAllMentionsBytes matches mention patterns in given content
  153. // and returns a list of locations for the unvalidated user names, including the @ prefix.
  154. func FindAllMentionsBytes(content []byte) []RefSpan {
  155. // Sadly we can't use FindAllSubmatchIndex because our pattern checks for starting and
  156. // trailing spaces (\s@mention,\s), so if we get two consecutive references, the space
  157. // from the second reference will be "eaten" by the first one:
  158. // ...\s@mention1\s@mention2\s... --> ...`\s@mention1\s`, (not) `@mention2,\s...`
  159. ret := make([]RefSpan, 0, 5)
  160. pos := 0
  161. for {
  162. match := mentionPattern.FindSubmatchIndex(content[pos:])
  163. if match == nil {
  164. break
  165. }
  166. ret = append(ret, RefSpan{Start: match[2] + pos, End: match[3] + pos})
  167. notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos])
  168. if notrail == nil {
  169. pos = match[3] + pos
  170. } else {
  171. pos = match[3] + pos + notrail[1] - notrail[3]
  172. }
  173. }
  174. return ret
  175. }
  176. // FindFirstMentionBytes matches the first mention in then given content
  177. // and returns the location of the unvalidated user name, including the @ prefix.
  178. func FindFirstMentionBytes(content []byte) (bool, RefSpan) {
  179. mention := mentionPattern.FindSubmatchIndex(content)
  180. if mention == nil {
  181. return false, RefSpan{}
  182. }
  183. return true, RefSpan{Start: mention[2], End: mention[3]}
  184. }
  185. // FindAllIssueReferencesMarkdown strips content from markdown markup
  186. // and returns a list of unvalidated references found in it.
  187. func FindAllIssueReferencesMarkdown(content string) []IssueReference {
  188. return rawToIssueReferenceList(findAllIssueReferencesMarkdown(content))
  189. }
  190. func findAllIssueReferencesMarkdown(content string) []*rawReference {
  191. bcontent, links := mdstripper.StripMarkdownBytes([]byte(content))
  192. return findAllIssueReferencesBytes(bcontent, links)
  193. }
  194. // FindAllIssueReferences returns a list of unvalidated references found in a string.
  195. func FindAllIssueReferences(content string) []IssueReference {
  196. return rawToIssueReferenceList(findAllIssueReferencesBytes([]byte(content), []string{}))
  197. }
  198. // FindRenderizableReferenceNumeric returns the first unvalidated reference found in a string.
  199. func FindRenderizableReferenceNumeric(content string, prOnly bool) (bool, *RenderizableReference) {
  200. match := issueNumericPattern.FindStringSubmatchIndex(content)
  201. if match == nil {
  202. if match = crossReferenceIssueNumericPattern.FindStringSubmatchIndex(content); match == nil {
  203. return false, nil
  204. }
  205. }
  206. r := getCrossReference([]byte(content), match[2], match[3], false, prOnly)
  207. if r == nil {
  208. return false, nil
  209. }
  210. return true, &RenderizableReference{
  211. Issue: r.issue,
  212. Owner: r.owner,
  213. Name: r.name,
  214. IsPull: r.isPull,
  215. RefLocation: r.refLocation,
  216. Action: r.action,
  217. ActionLocation: r.actionLocation,
  218. }
  219. }
  220. // FindRenderizableReferenceAlphanumeric returns the first alphanumeric unvalidated references found in a string.
  221. func FindRenderizableReferenceAlphanumeric(content string) (bool, *RenderizableReference) {
  222. match := issueAlphanumericPattern.FindStringSubmatchIndex(content)
  223. if match == nil {
  224. return false, nil
  225. }
  226. action, location := findActionKeywords([]byte(content), match[2])
  227. return true, &RenderizableReference{
  228. Issue: string(content[match[2]:match[3]]),
  229. RefLocation: &RefSpan{Start: match[2], End: match[3]},
  230. Action: action,
  231. ActionLocation: location,
  232. IsPull: false,
  233. }
  234. }
  235. // FindAllIssueReferencesBytes returns a list of unvalidated references found in a byte slice.
  236. func findAllIssueReferencesBytes(content []byte, links []string) []*rawReference {
  237. ret := make([]*rawReference, 0, 10)
  238. pos := 0
  239. // Sadly we can't use FindAllSubmatchIndex because our pattern checks for starting and
  240. // trailing spaces (\s#ref,\s), so if we get two consecutive references, the space
  241. // from the second reference will be "eaten" by the first one:
  242. // ...\s#ref1\s#ref2\s... --> ...`\s#ref1\s`, (not) `#ref2,\s...`
  243. for {
  244. match := issueNumericPattern.FindSubmatchIndex(content[pos:])
  245. if match == nil {
  246. break
  247. }
  248. if ref := getCrossReference(content, match[2]+pos, match[3]+pos, false, false); ref != nil {
  249. ret = append(ret, ref)
  250. }
  251. notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos])
  252. if notrail == nil {
  253. pos = match[3] + pos
  254. } else {
  255. pos = match[3] + pos + notrail[1] - notrail[3]
  256. }
  257. }
  258. pos = 0
  259. for {
  260. match := crossReferenceIssueNumericPattern.FindSubmatchIndex(content[pos:])
  261. if match == nil {
  262. break
  263. }
  264. if ref := getCrossReference(content, match[2]+pos, match[3]+pos, false, false); ref != nil {
  265. ret = append(ret, ref)
  266. }
  267. notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos])
  268. if notrail == nil {
  269. pos = match[3] + pos
  270. } else {
  271. pos = match[3] + pos + notrail[1] - notrail[3]
  272. }
  273. }
  274. localhost := getGiteaHostName()
  275. for _, link := range links {
  276. if u, err := url.Parse(link); err == nil {
  277. // Note: we're not attempting to match the URL scheme (http/https)
  278. host := strings.ToLower(u.Host)
  279. if host != "" && host != localhost {
  280. continue
  281. }
  282. parts := strings.Split(u.EscapedPath(), "/")
  283. // /user/repo/issues/3
  284. if len(parts) != 5 || parts[0] != "" {
  285. continue
  286. }
  287. var sep string
  288. if parts[3] == "issues" {
  289. sep = "#"
  290. } else if parts[3] == "pulls" {
  291. sep = "!"
  292. } else {
  293. continue
  294. }
  295. // Note: closing/reopening keywords not supported with URLs
  296. bytes := []byte(parts[1] + "/" + parts[2] + sep + parts[4])
  297. if ref := getCrossReference(bytes, 0, len(bytes), true, false); ref != nil {
  298. ref.refLocation = nil
  299. ret = append(ret, ref)
  300. }
  301. }
  302. }
  303. return ret
  304. }
  305. func getCrossReference(content []byte, start, end int, fromLink bool, prOnly bool) *rawReference {
  306. refid := string(content[start:end])
  307. sep := strings.IndexAny(refid, "#!")
  308. if sep < 0 {
  309. return nil
  310. }
  311. isPull := refid[sep] == '!'
  312. if prOnly && !isPull {
  313. return nil
  314. }
  315. repo := refid[:sep]
  316. issue := refid[sep+1:]
  317. index, err := strconv.ParseInt(issue, 10, 64)
  318. if err != nil {
  319. return nil
  320. }
  321. if repo == "" {
  322. if fromLink {
  323. // Markdown links must specify owner/repo
  324. return nil
  325. }
  326. action, location := findActionKeywords(content, start)
  327. return &rawReference{
  328. index: index,
  329. action: action,
  330. issue: issue,
  331. isPull: isPull,
  332. refLocation: &RefSpan{Start: start, End: end},
  333. actionLocation: location,
  334. }
  335. }
  336. parts := strings.Split(strings.ToLower(repo), "/")
  337. if len(parts) != 2 {
  338. return nil
  339. }
  340. owner, name := parts[0], parts[1]
  341. if !validNamePattern.MatchString(owner) || !validNamePattern.MatchString(name) {
  342. return nil
  343. }
  344. action, location := findActionKeywords(content, start)
  345. return &rawReference{
  346. index: index,
  347. owner: owner,
  348. name: name,
  349. action: action,
  350. issue: issue,
  351. isPull: isPull,
  352. refLocation: &RefSpan{Start: start, End: end},
  353. actionLocation: location,
  354. }
  355. }
  356. func findActionKeywords(content []byte, start int) (XRefAction, *RefSpan) {
  357. newKeywords()
  358. var m []int
  359. if issueCloseKeywordsPat != nil {
  360. m = issueCloseKeywordsPat.FindSubmatchIndex(content[:start])
  361. if m != nil {
  362. return XRefActionCloses, &RefSpan{Start: m[2], End: m[3]}
  363. }
  364. }
  365. if issueReopenKeywordsPat != nil {
  366. m = issueReopenKeywordsPat.FindSubmatchIndex(content[:start])
  367. if m != nil {
  368. return XRefActionReopens, &RefSpan{Start: m[2], End: m[3]}
  369. }
  370. }
  371. return XRefActionNone, nil
  372. }
  373. // IsXrefActionable returns true if the xref action is actionable (i.e. produces a result when resolved)
  374. func IsXrefActionable(ref *RenderizableReference, extTracker bool, alphaNum bool) bool {
  375. if extTracker {
  376. // External issues cannot be automatically closed
  377. return false
  378. }
  379. return ref.Action == XRefActionCloses || ref.Action == XRefActionReopens
  380. }