You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

references.go 11KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352
  1. // Copyright 2019 The Gitea Authors. All rights reserved.
  2. // Use of this source code is governed by a MIT-style
  3. // license that can be found in the LICENSE file.
  4. package references
  5. import (
  6. "net/url"
  7. "regexp"
  8. "strconv"
  9. "strings"
  10. "sync"
  11. "code.gitea.io/gitea/modules/log"
  12. "code.gitea.io/gitea/modules/markup/mdstripper"
  13. "code.gitea.io/gitea/modules/setting"
  14. )
  15. var (
  16. // validNamePattern performs only the most basic validation for user or repository names
  17. // Repository name should contain only alphanumeric, dash ('-'), underscore ('_') and dot ('.') characters.
  18. validNamePattern = regexp.MustCompile(`^[a-z0-9_.-]+$`)
  19. // NOTE: All below regex matching do not perform any extra validation.
  20. // Thus a link is produced even if the linked entity does not exist.
  21. // While fast, this is also incorrect and lead to false positives.
  22. // TODO: fix invalid linking issue
  23. // mentionPattern matches all mentions in the form of "@user"
  24. mentionPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@[0-9a-zA-Z-_]+|@[0-9a-zA-Z-_][0-9a-zA-Z-_.]+[0-9a-zA-Z-_])(?:\s|[:,;.?!]\s|[:,;.?!]?$|\)|\])`)
  25. // issueNumericPattern matches string that references to a numeric issue, e.g. #1287
  26. issueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(#[0-9]+)(?:\s|$|\)|\]|:|\.(\s|$))`)
  27. // issueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-1234
  28. issueAlphanumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([A-Z]{1,10}-[1-9][0-9]*)(?:\s|$|\)|\]|:|\.(\s|$))`)
  29. // crossReferenceIssueNumericPattern matches string that references a numeric issue in a different repository
  30. // e.g. gogits/gogs#12345
  31. crossReferenceIssueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+#[0-9]+)(?:\s|$|\)|\]|\.(\s|$))`)
  32. issueCloseKeywordsPat, issueReopenKeywordsPat *regexp.Regexp
  33. issueKeywordsOnce sync.Once
  34. giteaHostInit sync.Once
  35. giteaHost string
  36. )
  37. // XRefAction represents the kind of effect a cross reference has once is resolved
  38. type XRefAction int64
  39. const (
  40. // XRefActionNone means the cross-reference is simply a comment
  41. XRefActionNone XRefAction = iota // 0
  42. // XRefActionCloses means the cross-reference should close an issue if it is resolved
  43. XRefActionCloses // 1
  44. // XRefActionReopens means the cross-reference should reopen an issue if it is resolved
  45. XRefActionReopens // 2
  46. // XRefActionNeutered means the cross-reference will no longer affect the source
  47. XRefActionNeutered // 3
  48. )
  49. // IssueReference contains an unverified cross-reference to a local issue or pull request
  50. type IssueReference struct {
  51. Index int64
  52. Owner string
  53. Name string
  54. Action XRefAction
  55. }
  56. // RenderizableReference contains an unverified cross-reference to with rendering information
  57. type RenderizableReference struct {
  58. Issue string
  59. Owner string
  60. Name string
  61. RefLocation *RefSpan
  62. Action XRefAction
  63. ActionLocation *RefSpan
  64. }
  65. type rawReference struct {
  66. index int64
  67. owner string
  68. name string
  69. action XRefAction
  70. issue string
  71. refLocation *RefSpan
  72. actionLocation *RefSpan
  73. }
  74. func rawToIssueReferenceList(reflist []*rawReference) []IssueReference {
  75. refarr := make([]IssueReference, len(reflist))
  76. for i, r := range reflist {
  77. refarr[i] = IssueReference{
  78. Index: r.index,
  79. Owner: r.owner,
  80. Name: r.name,
  81. Action: r.action,
  82. }
  83. }
  84. return refarr
  85. }
  86. // RefSpan is the position where the reference was found within the parsed text
  87. type RefSpan struct {
  88. Start int
  89. End int
  90. }
  91. func makeKeywordsPat(words []string) *regexp.Regexp {
  92. acceptedWords := parseKeywords(words)
  93. if len(acceptedWords) == 0 {
  94. // Never match
  95. return nil
  96. }
  97. return regexp.MustCompile(`(?i)(?:\s|^|\(|\[)(` + strings.Join(acceptedWords, `|`) + `):? $`)
  98. }
  99. func parseKeywords(words []string) []string {
  100. acceptedWords := make([]string, 0, 5)
  101. wordPat := regexp.MustCompile(`^[\pL]+$`)
  102. for _, word := range words {
  103. word = strings.ToLower(strings.TrimSpace(word))
  104. // Accept Unicode letter class runes (a-z, á, à, ä, )
  105. if wordPat.MatchString(word) {
  106. acceptedWords = append(acceptedWords, word)
  107. } else {
  108. log.Info("Invalid keyword: %s", word)
  109. }
  110. }
  111. return acceptedWords
  112. }
  113. func newKeywords() {
  114. issueKeywordsOnce.Do(func() {
  115. // Delay initialization until after the settings module is initialized
  116. doNewKeywords(setting.Repository.PullRequest.CloseKeywords, setting.Repository.PullRequest.ReopenKeywords)
  117. })
  118. }
  119. func doNewKeywords(close []string, reopen []string) {
  120. issueCloseKeywordsPat = makeKeywordsPat(close)
  121. issueReopenKeywordsPat = makeKeywordsPat(reopen)
  122. }
  123. // getGiteaHostName returns a normalized string with the local host name, with no scheme or port information
  124. func getGiteaHostName() string {
  125. giteaHostInit.Do(func() {
  126. if uapp, err := url.Parse(setting.AppURL); err == nil {
  127. giteaHost = strings.ToLower(uapp.Host)
  128. } else {
  129. giteaHost = ""
  130. }
  131. })
  132. return giteaHost
  133. }
  134. // FindAllMentionsMarkdown matches mention patterns in given content and
  135. // returns a list of found unvalidated user names **not including** the @ prefix.
  136. func FindAllMentionsMarkdown(content string) []string {
  137. bcontent, _ := mdstripper.StripMarkdownBytes([]byte(content))
  138. locations := FindAllMentionsBytes(bcontent)
  139. mentions := make([]string, len(locations))
  140. for i, val := range locations {
  141. mentions[i] = string(bcontent[val.Start+1 : val.End])
  142. }
  143. return mentions
  144. }
  145. // FindAllMentionsBytes matches mention patterns in given content
  146. // and returns a list of locations for the unvalidated user names, including the @ prefix.
  147. func FindAllMentionsBytes(content []byte) []RefSpan {
  148. mentions := mentionPattern.FindAllSubmatchIndex(content, -1)
  149. ret := make([]RefSpan, len(mentions))
  150. for i, val := range mentions {
  151. ret[i] = RefSpan{Start: val[2], End: val[3]}
  152. }
  153. return ret
  154. }
  155. // FindFirstMentionBytes matches the first mention in then given content
  156. // and returns the location of the unvalidated user name, including the @ prefix.
  157. func FindFirstMentionBytes(content []byte) (bool, RefSpan) {
  158. mention := mentionPattern.FindSubmatchIndex(content)
  159. if mention == nil {
  160. return false, RefSpan{}
  161. }
  162. return true, RefSpan{Start: mention[2], End: mention[3]}
  163. }
  164. // FindAllIssueReferencesMarkdown strips content from markdown markup
  165. // and returns a list of unvalidated references found in it.
  166. func FindAllIssueReferencesMarkdown(content string) []IssueReference {
  167. return rawToIssueReferenceList(findAllIssueReferencesMarkdown(content))
  168. }
  169. func findAllIssueReferencesMarkdown(content string) []*rawReference {
  170. bcontent, links := mdstripper.StripMarkdownBytes([]byte(content))
  171. return findAllIssueReferencesBytes(bcontent, links)
  172. }
  173. // FindAllIssueReferences returns a list of unvalidated references found in a string.
  174. func FindAllIssueReferences(content string) []IssueReference {
  175. return rawToIssueReferenceList(findAllIssueReferencesBytes([]byte(content), []string{}))
  176. }
  177. // FindRenderizableReferenceNumeric returns the first unvalidated reference found in a string.
  178. func FindRenderizableReferenceNumeric(content string) (bool, *RenderizableReference) {
  179. match := issueNumericPattern.FindStringSubmatchIndex(content)
  180. if match == nil {
  181. if match = crossReferenceIssueNumericPattern.FindStringSubmatchIndex(content); match == nil {
  182. return false, nil
  183. }
  184. }
  185. r := getCrossReference([]byte(content), match[2], match[3], false)
  186. if r == nil {
  187. return false, nil
  188. }
  189. return true, &RenderizableReference{
  190. Issue: r.issue,
  191. Owner: r.owner,
  192. Name: r.name,
  193. RefLocation: r.refLocation,
  194. Action: r.action,
  195. ActionLocation: r.actionLocation,
  196. }
  197. }
  198. // FindRenderizableReferenceAlphanumeric returns the first alphanumeric unvalidated references found in a string.
  199. func FindRenderizableReferenceAlphanumeric(content string) (bool, *RenderizableReference) {
  200. match := issueAlphanumericPattern.FindStringSubmatchIndex(content)
  201. if match == nil {
  202. return false, nil
  203. }
  204. action, location := findActionKeywords([]byte(content), match[2])
  205. return true, &RenderizableReference{
  206. Issue: string(content[match[2]:match[3]]),
  207. RefLocation: &RefSpan{Start: match[2], End: match[3]},
  208. Action: action,
  209. ActionLocation: location,
  210. }
  211. }
  212. // FindAllIssueReferencesBytes returns a list of unvalidated references found in a byte slice.
  213. func findAllIssueReferencesBytes(content []byte, links []string) []*rawReference {
  214. ret := make([]*rawReference, 0, 10)
  215. matches := issueNumericPattern.FindAllSubmatchIndex(content, -1)
  216. for _, match := range matches {
  217. if ref := getCrossReference(content, match[2], match[3], false); ref != nil {
  218. ret = append(ret, ref)
  219. }
  220. }
  221. matches = crossReferenceIssueNumericPattern.FindAllSubmatchIndex(content, -1)
  222. for _, match := range matches {
  223. if ref := getCrossReference(content, match[2], match[3], false); ref != nil {
  224. ret = append(ret, ref)
  225. }
  226. }
  227. localhost := getGiteaHostName()
  228. for _, link := range links {
  229. if u, err := url.Parse(link); err == nil {
  230. // Note: we're not attempting to match the URL scheme (http/https)
  231. host := strings.ToLower(u.Host)
  232. if host != "" && host != localhost {
  233. continue
  234. }
  235. parts := strings.Split(u.EscapedPath(), "/")
  236. // /user/repo/issues/3
  237. if len(parts) != 5 || parts[0] != "" {
  238. continue
  239. }
  240. if parts[3] != "issues" && parts[3] != "pulls" {
  241. continue
  242. }
  243. // Note: closing/reopening keywords not supported with URLs
  244. bytes := []byte(parts[1] + "/" + parts[2] + "#" + parts[4])
  245. if ref := getCrossReference(bytes, 0, len(bytes), true); ref != nil {
  246. ref.refLocation = nil
  247. ret = append(ret, ref)
  248. }
  249. }
  250. }
  251. return ret
  252. }
  253. func getCrossReference(content []byte, start, end int, fromLink bool) *rawReference {
  254. refid := string(content[start:end])
  255. parts := strings.Split(refid, "#")
  256. if len(parts) != 2 {
  257. return nil
  258. }
  259. repo, issue := parts[0], parts[1]
  260. index, err := strconv.ParseInt(issue, 10, 64)
  261. if err != nil {
  262. return nil
  263. }
  264. if repo == "" {
  265. if fromLink {
  266. // Markdown links must specify owner/repo
  267. return nil
  268. }
  269. action, location := findActionKeywords(content, start)
  270. return &rawReference{
  271. index: index,
  272. action: action,
  273. issue: issue,
  274. refLocation: &RefSpan{Start: start, End: end},
  275. actionLocation: location,
  276. }
  277. }
  278. parts = strings.Split(strings.ToLower(repo), "/")
  279. if len(parts) != 2 {
  280. return nil
  281. }
  282. owner, name := parts[0], parts[1]
  283. if !validNamePattern.MatchString(owner) || !validNamePattern.MatchString(name) {
  284. return nil
  285. }
  286. action, location := findActionKeywords(content, start)
  287. return &rawReference{
  288. index: index,
  289. owner: owner,
  290. name: name,
  291. action: action,
  292. issue: issue,
  293. refLocation: &RefSpan{Start: start, End: end},
  294. actionLocation: location,
  295. }
  296. }
  297. func findActionKeywords(content []byte, start int) (XRefAction, *RefSpan) {
  298. newKeywords()
  299. var m []int
  300. if issueCloseKeywordsPat != nil {
  301. m = issueCloseKeywordsPat.FindSubmatchIndex(content[:start])
  302. if m != nil {
  303. return XRefActionCloses, &RefSpan{Start: m[2], End: m[3]}
  304. }
  305. }
  306. if issueReopenKeywordsPat != nil {
  307. m = issueReopenKeywordsPat.FindSubmatchIndex(content[:start])
  308. if m != nil {
  309. return XRefActionReopens, &RefSpan{Start: m[2], End: m[3]}
  310. }
  311. }
  312. return XRefActionNone, nil
  313. }