You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

references.go 17KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544
  1. // Copyright 2019 The Gitea Authors. All rights reserved.
  2. // Use of this source code is governed by a MIT-style
  3. // license that can be found in the LICENSE file.
  4. package references
  5. import (
  6. "net/url"
  7. "regexp"
  8. "strconv"
  9. "strings"
  10. "sync"
  11. "code.gitea.io/gitea/modules/log"
  12. "code.gitea.io/gitea/modules/markup/mdstripper"
  13. "code.gitea.io/gitea/modules/setting"
  14. )
  15. var (
  16. // validNamePattern performs only the most basic validation for user or repository names
  17. // Repository name should contain only alphanumeric, dash ('-'), underscore ('_') and dot ('.') characters.
  18. validNamePattern = regexp.MustCompile(`^[a-z0-9_.-]+$`)
  19. // NOTE: All below regex matching do not perform any extra validation.
  20. // Thus a link is produced even if the linked entity does not exist.
  21. // While fast, this is also incorrect and lead to false positives.
  22. // TODO: fix invalid linking issue
  23. // mentionPattern matches all mentions in the form of "@user" or "@org/team"
  24. mentionPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@[0-9a-zA-Z-_]+|@[0-9a-zA-Z-_]+\/?[0-9a-zA-Z-_]+|@[0-9a-zA-Z-_][0-9a-zA-Z-_.]+\/?[0-9a-zA-Z-_.]+[0-9a-zA-Z-_])(?:\s|[:,;.?!]\s|[:,;.?!]?$|\)|\])`)
  25. // issueNumericPattern matches string that references to a numeric issue, e.g. #1287
  26. issueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([#!][0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
  27. // issueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-1234
  28. issueAlphanumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([A-Z]{1,10}-[1-9][0-9]*)(?:\s|$|\)|\]|:|\.(\s|$))`)
  29. // crossReferenceIssueNumericPattern matches string that references a numeric issue in a different repository
  30. // e.g. gogits/gogs#12345
  31. crossReferenceIssueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+[#!][0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
  32. // spaceTrimmedPattern let's us find the trailing space
  33. spaceTrimmedPattern = regexp.MustCompile(`(?:.*[0-9a-zA-Z-_])\s`)
  34. // timeLogPattern matches string for time tracking
  35. timeLogPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@([0-9]+([\.,][0-9]+)?(w|d|m|h))+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
  36. issueCloseKeywordsPat, issueReopenKeywordsPat *regexp.Regexp
  37. issueKeywordsOnce sync.Once
  38. giteaHostInit sync.Once
  39. giteaHost string
  40. giteaIssuePullPattern *regexp.Regexp
  41. )
  42. // XRefAction represents the kind of effect a cross reference has once is resolved
  43. type XRefAction int64
  44. const (
  45. // XRefActionNone means the cross-reference is simply a comment
  46. XRefActionNone XRefAction = iota // 0
  47. // XRefActionCloses means the cross-reference should close an issue if it is resolved
  48. XRefActionCloses // 1
  49. // XRefActionReopens means the cross-reference should reopen an issue if it is resolved
  50. XRefActionReopens // 2
  51. // XRefActionNeutered means the cross-reference will no longer affect the source
  52. XRefActionNeutered // 3
  53. )
  54. // IssueReference contains an unverified cross-reference to a local issue or pull request
  55. type IssueReference struct {
  56. Index int64
  57. Owner string
  58. Name string
  59. Action XRefAction
  60. TimeLog string
  61. }
  62. // RenderizableReference contains an unverified cross-reference to with rendering information
  63. // The IsPull member means that a `!num` reference was used instead of `#num`.
  64. // This kind of reference is used to make pulls available when an external issue tracker
  65. // is used. Otherwise, `#` and `!` are completely interchangeable.
  66. type RenderizableReference struct {
  67. Issue string
  68. Owner string
  69. Name string
  70. IsPull bool
  71. RefLocation *RefSpan
  72. Action XRefAction
  73. ActionLocation *RefSpan
  74. }
  75. type rawReference struct {
  76. index int64
  77. owner string
  78. name string
  79. isPull bool
  80. action XRefAction
  81. issue string
  82. refLocation *RefSpan
  83. actionLocation *RefSpan
  84. timeLog string
  85. }
  86. func rawToIssueReferenceList(reflist []*rawReference) []IssueReference {
  87. refarr := make([]IssueReference, len(reflist))
  88. for i, r := range reflist {
  89. refarr[i] = IssueReference{
  90. Index: r.index,
  91. Owner: r.owner,
  92. Name: r.name,
  93. Action: r.action,
  94. TimeLog: r.timeLog,
  95. }
  96. }
  97. return refarr
  98. }
  99. // RefSpan is the position where the reference was found within the parsed text
  100. type RefSpan struct {
  101. Start int
  102. End int
  103. }
  104. func makeKeywordsPat(words []string) *regexp.Regexp {
  105. acceptedWords := parseKeywords(words)
  106. if len(acceptedWords) == 0 {
  107. // Never match
  108. return nil
  109. }
  110. return regexp.MustCompile(`(?i)(?:\s|^|\(|\[)(` + strings.Join(acceptedWords, `|`) + `):? $`)
  111. }
  112. func parseKeywords(words []string) []string {
  113. acceptedWords := make([]string, 0, 5)
  114. wordPat := regexp.MustCompile(`^[\pL]+$`)
  115. for _, word := range words {
  116. word = strings.ToLower(strings.TrimSpace(word))
  117. // Accept Unicode letter class runes (a-z, á, à, ä, )
  118. if wordPat.MatchString(word) {
  119. acceptedWords = append(acceptedWords, word)
  120. } else {
  121. log.Info("Invalid keyword: %s", word)
  122. }
  123. }
  124. return acceptedWords
  125. }
  126. func newKeywords() {
  127. issueKeywordsOnce.Do(func() {
  128. // Delay initialization until after the settings module is initialized
  129. doNewKeywords(setting.Repository.PullRequest.CloseKeywords, setting.Repository.PullRequest.ReopenKeywords)
  130. })
  131. }
  132. func doNewKeywords(close []string, reopen []string) {
  133. issueCloseKeywordsPat = makeKeywordsPat(close)
  134. issueReopenKeywordsPat = makeKeywordsPat(reopen)
  135. }
  136. // getGiteaHostName returns a normalized string with the local host name, with no scheme or port information
  137. func getGiteaHostName() string {
  138. giteaHostInit.Do(func() {
  139. if uapp, err := url.Parse(setting.AppURL); err == nil {
  140. giteaHost = strings.ToLower(uapp.Host)
  141. giteaIssuePullPattern = regexp.MustCompile(
  142. `(\s|^|\(|\[)` +
  143. regexp.QuoteMeta(strings.TrimSpace(setting.AppURL)) +
  144. `([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+)/` +
  145. `((?:issues)|(?:pulls))/([0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
  146. } else {
  147. giteaHost = ""
  148. giteaIssuePullPattern = nil
  149. }
  150. })
  151. return giteaHost
  152. }
  153. // getGiteaIssuePullPattern
  154. func getGiteaIssuePullPattern() *regexp.Regexp {
  155. getGiteaHostName()
  156. return giteaIssuePullPattern
  157. }
  158. // FindAllMentionsMarkdown matches mention patterns in given content and
  159. // returns a list of found unvalidated user names **not including** the @ prefix.
  160. func FindAllMentionsMarkdown(content string) []string {
  161. bcontent, _ := mdstripper.StripMarkdownBytes([]byte(content))
  162. locations := FindAllMentionsBytes(bcontent)
  163. mentions := make([]string, len(locations))
  164. for i, val := range locations {
  165. mentions[i] = string(bcontent[val.Start+1 : val.End])
  166. }
  167. return mentions
  168. }
  169. // FindAllMentionsBytes matches mention patterns in given content
  170. // and returns a list of locations for the unvalidated user names, including the @ prefix.
  171. func FindAllMentionsBytes(content []byte) []RefSpan {
  172. // Sadly we can't use FindAllSubmatchIndex because our pattern checks for starting and
  173. // trailing spaces (\s@mention,\s), so if we get two consecutive references, the space
  174. // from the second reference will be "eaten" by the first one:
  175. // ...\s@mention1\s@mention2\s... --> ...`\s@mention1\s`, (not) `@mention2,\s...`
  176. ret := make([]RefSpan, 0, 5)
  177. pos := 0
  178. for {
  179. match := mentionPattern.FindSubmatchIndex(content[pos:])
  180. if match == nil {
  181. break
  182. }
  183. ret = append(ret, RefSpan{Start: match[2] + pos, End: match[3] + pos})
  184. notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos])
  185. if notrail == nil {
  186. pos = match[3] + pos
  187. } else {
  188. pos = match[3] + pos + notrail[1] - notrail[3]
  189. }
  190. }
  191. return ret
  192. }
  193. // FindFirstMentionBytes matches the first mention in then given content
  194. // and returns the location of the unvalidated user name, including the @ prefix.
  195. func FindFirstMentionBytes(content []byte) (bool, RefSpan) {
  196. mention := mentionPattern.FindSubmatchIndex(content)
  197. if mention == nil {
  198. return false, RefSpan{}
  199. }
  200. return true, RefSpan{Start: mention[2], End: mention[3]}
  201. }
  202. // FindAllIssueReferencesMarkdown strips content from markdown markup
  203. // and returns a list of unvalidated references found in it.
  204. func FindAllIssueReferencesMarkdown(content string) []IssueReference {
  205. return rawToIssueReferenceList(findAllIssueReferencesMarkdown(content))
  206. }
  207. func findAllIssueReferencesMarkdown(content string) []*rawReference {
  208. bcontent, links := mdstripper.StripMarkdownBytes([]byte(content))
  209. return findAllIssueReferencesBytes(bcontent, links)
  210. }
  211. func convertFullHTMLReferencesToShortRefs(re *regexp.Regexp, contentBytes *[]byte) {
  212. // We will iterate through the content, rewrite and simplify full references.
  213. //
  214. // We want to transform something like:
  215. //
  216. // this is a https://ourgitea.com/git/owner/repo/issues/123456789, foo
  217. // https://ourgitea.com/git/owner/repo/pulls/123456789
  218. //
  219. // Into something like:
  220. //
  221. // this is a #123456789, foo
  222. // !123456789
  223. pos := 0
  224. for {
  225. // re looks for something like: (\s|^|\(|\[)https://ourgitea.com/git/(owner/repo)/(issues)/(123456789)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)
  226. match := re.FindSubmatchIndex((*contentBytes)[pos:])
  227. if match == nil {
  228. break
  229. }
  230. // match is a bunch of indices into the content from pos onwards so
  231. // to simplify things let's just add pos to all of the indices in match
  232. for i := range match {
  233. match[i] += pos
  234. }
  235. // match[0]-match[1] is whole string
  236. // match[2]-match[3] is preamble
  237. // move the position to the end of the preamble
  238. pos = match[3]
  239. // match[4]-match[5] is owner/repo
  240. // now copy the owner/repo to end of the preamble
  241. endPos := pos + match[5] - match[4]
  242. copy((*contentBytes)[pos:endPos], (*contentBytes)[match[4]:match[5]])
  243. // move the current position to the end of the newly copied owner/repo
  244. pos = endPos
  245. // Now set the issue/pull marker:
  246. //
  247. // match[6]-match[7] == 'issues'
  248. (*contentBytes)[pos] = '#'
  249. if string((*contentBytes)[match[6]:match[7]]) == "pulls" {
  250. (*contentBytes)[pos] = '!'
  251. }
  252. pos++
  253. // Then add the issue/pull number
  254. //
  255. // match[8]-match[9] is the number
  256. endPos = pos + match[9] - match[8]
  257. copy((*contentBytes)[pos:endPos], (*contentBytes)[match[8]:match[9]])
  258. // Now copy what's left at the end of the string to the new end position
  259. copy((*contentBytes)[endPos:], (*contentBytes)[match[9]:])
  260. // now we reset the length
  261. // our new section has length endPos - match[3]
  262. // our old section has length match[9] - match[3]
  263. *contentBytes = (*contentBytes)[:len(*contentBytes)-match[9]+endPos]
  264. pos = endPos
  265. }
  266. }
  267. // FindAllIssueReferences returns a list of unvalidated references found in a string.
  268. func FindAllIssueReferences(content string) []IssueReference {
  269. // Need to convert fully qualified html references to local system to #/! short codes
  270. contentBytes := []byte(content)
  271. if re := getGiteaIssuePullPattern(); re != nil {
  272. convertFullHTMLReferencesToShortRefs(re, &contentBytes)
  273. } else {
  274. log.Debug("No GiteaIssuePullPattern pattern")
  275. }
  276. return rawToIssueReferenceList(findAllIssueReferencesBytes(contentBytes, []string{}))
  277. }
  278. // FindRenderizableReferenceNumeric returns the first unvalidated reference found in a string.
  279. func FindRenderizableReferenceNumeric(content string, prOnly bool) (bool, *RenderizableReference) {
  280. match := issueNumericPattern.FindStringSubmatchIndex(content)
  281. if match == nil {
  282. if match = crossReferenceIssueNumericPattern.FindStringSubmatchIndex(content); match == nil {
  283. return false, nil
  284. }
  285. }
  286. r := getCrossReference([]byte(content), match[2], match[3], false, prOnly)
  287. if r == nil {
  288. return false, nil
  289. }
  290. return true, &RenderizableReference{
  291. Issue: r.issue,
  292. Owner: r.owner,
  293. Name: r.name,
  294. IsPull: r.isPull,
  295. RefLocation: r.refLocation,
  296. Action: r.action,
  297. ActionLocation: r.actionLocation,
  298. }
  299. }
  300. // FindRenderizableReferenceAlphanumeric returns the first alphanumeric unvalidated references found in a string.
  301. func FindRenderizableReferenceAlphanumeric(content string) (bool, *RenderizableReference) {
  302. match := issueAlphanumericPattern.FindStringSubmatchIndex(content)
  303. if match == nil {
  304. return false, nil
  305. }
  306. action, location := findActionKeywords([]byte(content), match[2])
  307. return true, &RenderizableReference{
  308. Issue: string(content[match[2]:match[3]]),
  309. RefLocation: &RefSpan{Start: match[2], End: match[3]},
  310. Action: action,
  311. ActionLocation: location,
  312. IsPull: false,
  313. }
  314. }
  315. // FindAllIssueReferencesBytes returns a list of unvalidated references found in a byte slice.
  316. func findAllIssueReferencesBytes(content []byte, links []string) []*rawReference {
  317. ret := make([]*rawReference, 0, 10)
  318. pos := 0
  319. // Sadly we can't use FindAllSubmatchIndex because our pattern checks for starting and
  320. // trailing spaces (\s#ref,\s), so if we get two consecutive references, the space
  321. // from the second reference will be "eaten" by the first one:
  322. // ...\s#ref1\s#ref2\s... --> ...`\s#ref1\s`, (not) `#ref2,\s...`
  323. for {
  324. match := issueNumericPattern.FindSubmatchIndex(content[pos:])
  325. if match == nil {
  326. break
  327. }
  328. if ref := getCrossReference(content, match[2]+pos, match[3]+pos, false, false); ref != nil {
  329. ret = append(ret, ref)
  330. }
  331. notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos])
  332. if notrail == nil {
  333. pos = match[3] + pos
  334. } else {
  335. pos = match[3] + pos + notrail[1] - notrail[3]
  336. }
  337. }
  338. pos = 0
  339. for {
  340. match := crossReferenceIssueNumericPattern.FindSubmatchIndex(content[pos:])
  341. if match == nil {
  342. break
  343. }
  344. if ref := getCrossReference(content, match[2]+pos, match[3]+pos, false, false); ref != nil {
  345. ret = append(ret, ref)
  346. }
  347. notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos])
  348. if notrail == nil {
  349. pos = match[3] + pos
  350. } else {
  351. pos = match[3] + pos + notrail[1] - notrail[3]
  352. }
  353. }
  354. localhost := getGiteaHostName()
  355. for _, link := range links {
  356. if u, err := url.Parse(link); err == nil {
  357. // Note: we're not attempting to match the URL scheme (http/https)
  358. host := strings.ToLower(u.Host)
  359. if host != "" && host != localhost {
  360. continue
  361. }
  362. parts := strings.Split(u.EscapedPath(), "/")
  363. // /user/repo/issues/3
  364. if len(parts) != 5 || parts[0] != "" {
  365. continue
  366. }
  367. var sep string
  368. if parts[3] == "issues" {
  369. sep = "#"
  370. } else if parts[3] == "pulls" {
  371. sep = "!"
  372. } else {
  373. continue
  374. }
  375. // Note: closing/reopening keywords not supported with URLs
  376. bytes := []byte(parts[1] + "/" + parts[2] + sep + parts[4])
  377. if ref := getCrossReference(bytes, 0, len(bytes), true, false); ref != nil {
  378. ref.refLocation = nil
  379. ret = append(ret, ref)
  380. }
  381. }
  382. }
  383. if len(ret) == 0 {
  384. return ret
  385. }
  386. pos = 0
  387. for {
  388. match := timeLogPattern.FindSubmatchIndex(content[pos:])
  389. if match == nil {
  390. break
  391. }
  392. timeLogEntry := string(content[match[2]+pos+1 : match[3]+pos])
  393. var f *rawReference
  394. for _, ref := range ret {
  395. if ref.refLocation != nil && ref.refLocation.End < match[2]+pos && (f == nil || f.refLocation.End < ref.refLocation.End) {
  396. f = ref
  397. }
  398. }
  399. pos = match[1] + pos
  400. if f == nil {
  401. f = ret[0]
  402. }
  403. if len(f.timeLog) == 0 {
  404. f.timeLog = timeLogEntry
  405. }
  406. }
  407. return ret
  408. }
  409. func getCrossReference(content []byte, start, end int, fromLink bool, prOnly bool) *rawReference {
  410. refid := string(content[start:end])
  411. sep := strings.IndexAny(refid, "#!")
  412. if sep < 0 {
  413. return nil
  414. }
  415. isPull := refid[sep] == '!'
  416. if prOnly && !isPull {
  417. return nil
  418. }
  419. repo := refid[:sep]
  420. issue := refid[sep+1:]
  421. index, err := strconv.ParseInt(issue, 10, 64)
  422. if err != nil {
  423. return nil
  424. }
  425. if repo == "" {
  426. if fromLink {
  427. // Markdown links must specify owner/repo
  428. return nil
  429. }
  430. action, location := findActionKeywords(content, start)
  431. return &rawReference{
  432. index: index,
  433. action: action,
  434. issue: issue,
  435. isPull: isPull,
  436. refLocation: &RefSpan{Start: start, End: end},
  437. actionLocation: location,
  438. }
  439. }
  440. parts := strings.Split(strings.ToLower(repo), "/")
  441. if len(parts) != 2 {
  442. return nil
  443. }
  444. owner, name := parts[0], parts[1]
  445. if !validNamePattern.MatchString(owner) || !validNamePattern.MatchString(name) {
  446. return nil
  447. }
  448. action, location := findActionKeywords(content, start)
  449. return &rawReference{
  450. index: index,
  451. owner: owner,
  452. name: name,
  453. action: action,
  454. issue: issue,
  455. isPull: isPull,
  456. refLocation: &RefSpan{Start: start, End: end},
  457. actionLocation: location,
  458. }
  459. }
  460. func findActionKeywords(content []byte, start int) (XRefAction, *RefSpan) {
  461. newKeywords()
  462. var m []int
  463. if issueCloseKeywordsPat != nil {
  464. m = issueCloseKeywordsPat.FindSubmatchIndex(content[:start])
  465. if m != nil {
  466. return XRefActionCloses, &RefSpan{Start: m[2], End: m[3]}
  467. }
  468. }
  469. if issueReopenKeywordsPat != nil {
  470. m = issueReopenKeywordsPat.FindSubmatchIndex(content[:start])
  471. if m != nil {
  472. return XRefActionReopens, &RefSpan{Start: m[2], End: m[3]}
  473. }
  474. }
  475. return XRefActionNone, nil
  476. }
  477. // IsXrefActionable returns true if the xref action is actionable (i.e. produces a result when resolved)
  478. func IsXrefActionable(ref *RenderizableReference, extTracker bool, alphaNum bool) bool {
  479. if extTracker {
  480. // External issues cannot be automatically closed
  481. return false
  482. }
  483. return ref.Action == XRefActionCloses || ref.Action == XRefActionReopens
  484. }