You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

references.go 18KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574
  1. // Copyright 2019 The Gitea Authors. All rights reserved.
  2. // Use of this source code is governed by a MIT-style
  3. // license that can be found in the LICENSE file.
  4. package references
  5. import (
  6. "bytes"
  7. "net/url"
  8. "regexp"
  9. "strconv"
  10. "strings"
  11. "sync"
  12. "code.gitea.io/gitea/modules/log"
  13. "code.gitea.io/gitea/modules/markup/mdstripper"
  14. "code.gitea.io/gitea/modules/setting"
  15. "github.com/yuin/goldmark/util"
  16. )
  17. var (
  18. // validNamePattern performs only the most basic validation for user or repository names
  19. // Repository name should contain only alphanumeric, dash ('-'), underscore ('_') and dot ('.') characters.
  20. validNamePattern = regexp.MustCompile(`^[a-z0-9_.-]+$`)
  21. // NOTE: All below regex matching do not perform any extra validation.
  22. // Thus a link is produced even if the linked entity does not exist.
  23. // While fast, this is also incorrect and lead to false positives.
  24. // TODO: fix invalid linking issue
  25. // mentionPattern matches all mentions in the form of "@user" or "@org/team"
  26. mentionPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@[0-9a-zA-Z-_]+|@[0-9a-zA-Z-_]+\/?[0-9a-zA-Z-_]+|@[0-9a-zA-Z-_][0-9a-zA-Z-_.]+\/?[0-9a-zA-Z-_.]+[0-9a-zA-Z-_])(?:\s|[:,;.?!]\s|[:,;.?!]?$|\)|\])`)
  27. // issueNumericPattern matches string that references to a numeric issue, e.g. #1287
  28. issueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[|\')([#!][0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
  29. // issueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-1234
  30. issueAlphanumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([A-Z]{1,10}-[1-9][0-9]*)(?:\s|$|\)|\]|:|\.(\s|$))`)
  31. // crossReferenceIssueNumericPattern matches string that references a numeric issue in a different repository
  32. // e.g. gogits/gogs#12345
  33. crossReferenceIssueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+[#!][0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
  34. // spaceTrimmedPattern let's us find the trailing space
  35. spaceTrimmedPattern = regexp.MustCompile(`(?:.*[0-9a-zA-Z-_])\s`)
  36. // timeLogPattern matches string for time tracking
  37. timeLogPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@([0-9]+([\.,][0-9]+)?(w|d|m|h))+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
  38. issueCloseKeywordsPat, issueReopenKeywordsPat *regexp.Regexp
  39. issueKeywordsOnce sync.Once
  40. giteaHostInit sync.Once
  41. giteaHost string
  42. giteaIssuePullPattern *regexp.Regexp
  43. actionStrings = []string{
  44. "none",
  45. "closes",
  46. "reopens",
  47. "neutered",
  48. }
  49. )
  50. // XRefAction represents the kind of effect a cross reference has once is resolved
  51. type XRefAction int64
  52. const (
  53. // XRefActionNone means the cross-reference is simply a comment
  54. XRefActionNone XRefAction = iota // 0
  55. // XRefActionCloses means the cross-reference should close an issue if it is resolved
  56. XRefActionCloses // 1
  57. // XRefActionReopens means the cross-reference should reopen an issue if it is resolved
  58. XRefActionReopens // 2
  59. // XRefActionNeutered means the cross-reference will no longer affect the source
  60. XRefActionNeutered // 3
  61. )
  62. func (a XRefAction) String() string {
  63. return actionStrings[a]
  64. }
  65. // IssueReference contains an unverified cross-reference to a local issue or pull request
  66. type IssueReference struct {
  67. Index int64
  68. Owner string
  69. Name string
  70. Action XRefAction
  71. TimeLog string
  72. }
  73. // RenderizableReference contains an unverified cross-reference to with rendering information
  74. // The IsPull member means that a `!num` reference was used instead of `#num`.
  75. // This kind of reference is used to make pulls available when an external issue tracker
  76. // is used. Otherwise, `#` and `!` are completely interchangeable.
  77. type RenderizableReference struct {
  78. Issue string
  79. Owner string
  80. Name string
  81. IsPull bool
  82. RefLocation *RefSpan
  83. Action XRefAction
  84. ActionLocation *RefSpan
  85. }
  86. type rawReference struct {
  87. index int64
  88. owner string
  89. name string
  90. isPull bool
  91. action XRefAction
  92. issue string
  93. refLocation *RefSpan
  94. actionLocation *RefSpan
  95. timeLog string
  96. }
  97. func rawToIssueReferenceList(reflist []*rawReference) []IssueReference {
  98. refarr := make([]IssueReference, len(reflist))
  99. for i, r := range reflist {
  100. refarr[i] = IssueReference{
  101. Index: r.index,
  102. Owner: r.owner,
  103. Name: r.name,
  104. Action: r.action,
  105. TimeLog: r.timeLog,
  106. }
  107. }
  108. return refarr
  109. }
  110. // RefSpan is the position where the reference was found within the parsed text
  111. type RefSpan struct {
  112. Start int
  113. End int
  114. }
  115. func makeKeywordsPat(words []string) *regexp.Regexp {
  116. acceptedWords := parseKeywords(words)
  117. if len(acceptedWords) == 0 {
  118. // Never match
  119. return nil
  120. }
  121. return regexp.MustCompile(`(?i)(?:\s|^|\(|\[)(` + strings.Join(acceptedWords, `|`) + `):? $`)
  122. }
  123. func parseKeywords(words []string) []string {
  124. acceptedWords := make([]string, 0, 5)
  125. wordPat := regexp.MustCompile(`^[\pL]+$`)
  126. for _, word := range words {
  127. word = strings.ToLower(strings.TrimSpace(word))
  128. // Accept Unicode letter class runes (a-z, á, à, ä, )
  129. if wordPat.MatchString(word) {
  130. acceptedWords = append(acceptedWords, word)
  131. } else {
  132. log.Info("Invalid keyword: %s", word)
  133. }
  134. }
  135. return acceptedWords
  136. }
  137. func newKeywords() {
  138. issueKeywordsOnce.Do(func() {
  139. // Delay initialization until after the settings module is initialized
  140. doNewKeywords(setting.Repository.PullRequest.CloseKeywords, setting.Repository.PullRequest.ReopenKeywords)
  141. })
  142. }
  143. func doNewKeywords(close, reopen []string) {
  144. issueCloseKeywordsPat = makeKeywordsPat(close)
  145. issueReopenKeywordsPat = makeKeywordsPat(reopen)
  146. }
  147. // getGiteaHostName returns a normalized string with the local host name, with no scheme or port information
  148. func getGiteaHostName() string {
  149. giteaHostInit.Do(func() {
  150. if uapp, err := url.Parse(setting.AppURL); err == nil {
  151. giteaHost = strings.ToLower(uapp.Host)
  152. giteaIssuePullPattern = regexp.MustCompile(
  153. `(\s|^|\(|\[)` +
  154. regexp.QuoteMeta(strings.TrimSpace(setting.AppURL)) +
  155. `([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+)/` +
  156. `((?:issues)|(?:pulls))/([0-9]+)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)`)
  157. } else {
  158. giteaHost = ""
  159. giteaIssuePullPattern = nil
  160. }
  161. })
  162. return giteaHost
  163. }
  164. // getGiteaIssuePullPattern
  165. func getGiteaIssuePullPattern() *regexp.Regexp {
  166. getGiteaHostName()
  167. return giteaIssuePullPattern
  168. }
  169. // FindAllMentionsMarkdown matches mention patterns in given content and
  170. // returns a list of found unvalidated user names **not including** the @ prefix.
  171. func FindAllMentionsMarkdown(content string) []string {
  172. bcontent, _ := mdstripper.StripMarkdownBytes([]byte(content))
  173. locations := FindAllMentionsBytes(bcontent)
  174. mentions := make([]string, len(locations))
  175. for i, val := range locations {
  176. mentions[i] = string(bcontent[val.Start+1 : val.End])
  177. }
  178. return mentions
  179. }
  180. // FindAllMentionsBytes matches mention patterns in given content
  181. // and returns a list of locations for the unvalidated user names, including the @ prefix.
  182. func FindAllMentionsBytes(content []byte) []RefSpan {
  183. // Sadly we can't use FindAllSubmatchIndex because our pattern checks for starting and
  184. // trailing spaces (\s@mention,\s), so if we get two consecutive references, the space
  185. // from the second reference will be "eaten" by the first one:
  186. // ...\s@mention1\s@mention2\s... --> ...`\s@mention1\s`, (not) `@mention2,\s...`
  187. ret := make([]RefSpan, 0, 5)
  188. pos := 0
  189. for {
  190. match := mentionPattern.FindSubmatchIndex(content[pos:])
  191. if match == nil {
  192. break
  193. }
  194. ret = append(ret, RefSpan{Start: match[2] + pos, End: match[3] + pos})
  195. notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos])
  196. if notrail == nil {
  197. pos = match[3] + pos
  198. } else {
  199. pos = match[3] + pos + notrail[1] - notrail[3]
  200. }
  201. }
  202. return ret
  203. }
  204. // FindFirstMentionBytes matches the first mention in then given content
  205. // and returns the location of the unvalidated user name, including the @ prefix.
  206. func FindFirstMentionBytes(content []byte) (bool, RefSpan) {
  207. mention := mentionPattern.FindSubmatchIndex(content)
  208. if mention == nil {
  209. return false, RefSpan{}
  210. }
  211. return true, RefSpan{Start: mention[2], End: mention[3]}
  212. }
  213. // FindAllIssueReferencesMarkdown strips content from markdown markup
  214. // and returns a list of unvalidated references found in it.
  215. func FindAllIssueReferencesMarkdown(content string) []IssueReference {
  216. return rawToIssueReferenceList(findAllIssueReferencesMarkdown(content))
  217. }
  218. func findAllIssueReferencesMarkdown(content string) []*rawReference {
  219. bcontent, links := mdstripper.StripMarkdownBytes([]byte(content))
  220. return findAllIssueReferencesBytes(bcontent, links)
  221. }
  222. func convertFullHTMLReferencesToShortRefs(re *regexp.Regexp, contentBytes *[]byte) {
  223. // We will iterate through the content, rewrite and simplify full references.
  224. //
  225. // We want to transform something like:
  226. //
  227. // this is a https://ourgitea.com/git/owner/repo/issues/123456789, foo
  228. // https://ourgitea.com/git/owner/repo/pulls/123456789
  229. //
  230. // Into something like:
  231. //
  232. // this is a #123456789, foo
  233. // !123456789
  234. pos := 0
  235. for {
  236. // re looks for something like: (\s|^|\(|\[)https://ourgitea.com/git/(owner/repo)/(issues)/(123456789)(?:\s|$|\)|\]|[:;,.?!]\s|[:;,.?!]$)
  237. match := re.FindSubmatchIndex((*contentBytes)[pos:])
  238. if match == nil {
  239. break
  240. }
  241. // match is a bunch of indices into the content from pos onwards so
  242. // to simplify things let's just add pos to all of the indices in match
  243. for i := range match {
  244. match[i] += pos
  245. }
  246. // match[0]-match[1] is whole string
  247. // match[2]-match[3] is preamble
  248. // move the position to the end of the preamble
  249. pos = match[3]
  250. // match[4]-match[5] is owner/repo
  251. // now copy the owner/repo to end of the preamble
  252. endPos := pos + match[5] - match[4]
  253. copy((*contentBytes)[pos:endPos], (*contentBytes)[match[4]:match[5]])
  254. // move the current position to the end of the newly copied owner/repo
  255. pos = endPos
  256. // Now set the issue/pull marker:
  257. //
  258. // match[6]-match[7] == 'issues'
  259. (*contentBytes)[pos] = '#'
  260. if string((*contentBytes)[match[6]:match[7]]) == "pulls" {
  261. (*contentBytes)[pos] = '!'
  262. }
  263. pos++
  264. // Then add the issue/pull number
  265. //
  266. // match[8]-match[9] is the number
  267. endPos = pos + match[9] - match[8]
  268. copy((*contentBytes)[pos:endPos], (*contentBytes)[match[8]:match[9]])
  269. // Now copy what's left at the end of the string to the new end position
  270. copy((*contentBytes)[endPos:], (*contentBytes)[match[9]:])
  271. // now we reset the length
  272. // our new section has length endPos - match[3]
  273. // our old section has length match[9] - match[3]
  274. *contentBytes = (*contentBytes)[:len(*contentBytes)-match[9]+endPos]
  275. pos = endPos
  276. }
  277. }
  278. // FindAllIssueReferences returns a list of unvalidated references found in a string.
  279. func FindAllIssueReferences(content string) []IssueReference {
  280. // Need to convert fully qualified html references to local system to #/! short codes
  281. contentBytes := []byte(content)
  282. if re := getGiteaIssuePullPattern(); re != nil {
  283. convertFullHTMLReferencesToShortRefs(re, &contentBytes)
  284. } else {
  285. log.Debug("No GiteaIssuePullPattern pattern")
  286. }
  287. return rawToIssueReferenceList(findAllIssueReferencesBytes(contentBytes, []string{}))
  288. }
  289. // FindRenderizableReferenceNumeric returns the first unvalidated reference found in a string.
  290. func FindRenderizableReferenceNumeric(content string, prOnly bool) (bool, *RenderizableReference) {
  291. match := issueNumericPattern.FindStringSubmatchIndex(content)
  292. if match == nil {
  293. if match = crossReferenceIssueNumericPattern.FindStringSubmatchIndex(content); match == nil {
  294. return false, nil
  295. }
  296. }
  297. r := getCrossReference(util.StringToReadOnlyBytes(content), match[2], match[3], false, prOnly)
  298. if r == nil {
  299. return false, nil
  300. }
  301. return true, &RenderizableReference{
  302. Issue: r.issue,
  303. Owner: r.owner,
  304. Name: r.name,
  305. IsPull: r.isPull,
  306. RefLocation: r.refLocation,
  307. Action: r.action,
  308. ActionLocation: r.actionLocation,
  309. }
  310. }
  311. // FindRenderizableReferenceRegexp returns the first regexp unvalidated references found in a string.
  312. func FindRenderizableReferenceRegexp(content string, pattern *regexp.Regexp) (bool, *RenderizableReference) {
  313. match := pattern.FindStringSubmatchIndex(content)
  314. if len(match) < 4 {
  315. return false, nil
  316. }
  317. action, location := findActionKeywords([]byte(content), match[2])
  318. return true, &RenderizableReference{
  319. Issue: content[match[2]:match[3]],
  320. RefLocation: &RefSpan{Start: match[0], End: match[1]},
  321. Action: action,
  322. ActionLocation: location,
  323. IsPull: false,
  324. }
  325. }
  326. // FindRenderizableReferenceAlphanumeric returns the first alphanumeric unvalidated references found in a string.
  327. func FindRenderizableReferenceAlphanumeric(content string) (bool, *RenderizableReference) {
  328. match := issueAlphanumericPattern.FindStringSubmatchIndex(content)
  329. if match == nil {
  330. return false, nil
  331. }
  332. action, location := findActionKeywords([]byte(content), match[2])
  333. return true, &RenderizableReference{
  334. Issue: content[match[2]:match[3]],
  335. RefLocation: &RefSpan{Start: match[2], End: match[3]},
  336. Action: action,
  337. ActionLocation: location,
  338. IsPull: false,
  339. }
  340. }
  341. // FindAllIssueReferencesBytes returns a list of unvalidated references found in a byte slice.
  342. func findAllIssueReferencesBytes(content []byte, links []string) []*rawReference {
  343. ret := make([]*rawReference, 0, 10)
  344. pos := 0
  345. // Sadly we can't use FindAllSubmatchIndex because our pattern checks for starting and
  346. // trailing spaces (\s#ref,\s), so if we get two consecutive references, the space
  347. // from the second reference will be "eaten" by the first one:
  348. // ...\s#ref1\s#ref2\s... --> ...`\s#ref1\s`, (not) `#ref2,\s...`
  349. for {
  350. match := issueNumericPattern.FindSubmatchIndex(content[pos:])
  351. if match == nil {
  352. break
  353. }
  354. if ref := getCrossReference(content, match[2]+pos, match[3]+pos, false, false); ref != nil {
  355. ret = append(ret, ref)
  356. }
  357. notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos])
  358. if notrail == nil {
  359. pos = match[3] + pos
  360. } else {
  361. pos = match[3] + pos + notrail[1] - notrail[3]
  362. }
  363. }
  364. pos = 0
  365. for {
  366. match := crossReferenceIssueNumericPattern.FindSubmatchIndex(content[pos:])
  367. if match == nil {
  368. break
  369. }
  370. if ref := getCrossReference(content, match[2]+pos, match[3]+pos, false, false); ref != nil {
  371. ret = append(ret, ref)
  372. }
  373. notrail := spaceTrimmedPattern.FindSubmatchIndex(content[match[2]+pos : match[3]+pos])
  374. if notrail == nil {
  375. pos = match[3] + pos
  376. } else {
  377. pos = match[3] + pos + notrail[1] - notrail[3]
  378. }
  379. }
  380. localhost := getGiteaHostName()
  381. for _, link := range links {
  382. if u, err := url.Parse(link); err == nil {
  383. // Note: we're not attempting to match the URL scheme (http/https)
  384. host := strings.ToLower(u.Host)
  385. if host != "" && host != localhost {
  386. continue
  387. }
  388. parts := strings.Split(u.EscapedPath(), "/")
  389. // /user/repo/issues/3
  390. if len(parts) != 5 || parts[0] != "" {
  391. continue
  392. }
  393. var sep string
  394. if parts[3] == "issues" {
  395. sep = "#"
  396. } else if parts[3] == "pulls" {
  397. sep = "!"
  398. } else {
  399. continue
  400. }
  401. // Note: closing/reopening keywords not supported with URLs
  402. bytes := []byte(parts[1] + "/" + parts[2] + sep + parts[4])
  403. if ref := getCrossReference(bytes, 0, len(bytes), true, false); ref != nil {
  404. ref.refLocation = nil
  405. ret = append(ret, ref)
  406. }
  407. }
  408. }
  409. if len(ret) == 0 {
  410. return ret
  411. }
  412. pos = 0
  413. for {
  414. match := timeLogPattern.FindSubmatchIndex(content[pos:])
  415. if match == nil {
  416. break
  417. }
  418. timeLogEntry := string(content[match[2]+pos+1 : match[3]+pos])
  419. var f *rawReference
  420. for _, ref := range ret {
  421. if ref.refLocation != nil && ref.refLocation.End < match[2]+pos && (f == nil || f.refLocation.End < ref.refLocation.End) {
  422. f = ref
  423. }
  424. }
  425. pos = match[1] + pos
  426. if f == nil {
  427. f = ret[0]
  428. }
  429. if len(f.timeLog) == 0 {
  430. f.timeLog = timeLogEntry
  431. }
  432. }
  433. return ret
  434. }
  435. func getCrossReference(content []byte, start, end int, fromLink, prOnly bool) *rawReference {
  436. sep := bytes.IndexAny(content[start:end], "#!")
  437. if sep < 0 {
  438. return nil
  439. }
  440. isPull := content[start+sep] == '!'
  441. if prOnly && !isPull {
  442. return nil
  443. }
  444. repo := string(content[start : start+sep])
  445. issue := string(content[start+sep+1 : end])
  446. index, err := strconv.ParseInt(issue, 10, 64)
  447. if err != nil {
  448. return nil
  449. }
  450. if repo == "" {
  451. if fromLink {
  452. // Markdown links must specify owner/repo
  453. return nil
  454. }
  455. action, location := findActionKeywords(content, start)
  456. return &rawReference{
  457. index: index,
  458. action: action,
  459. issue: issue,
  460. isPull: isPull,
  461. refLocation: &RefSpan{Start: start, End: end},
  462. actionLocation: location,
  463. }
  464. }
  465. parts := strings.Split(strings.ToLower(repo), "/")
  466. if len(parts) != 2 {
  467. return nil
  468. }
  469. owner, name := parts[0], parts[1]
  470. if !validNamePattern.MatchString(owner) || !validNamePattern.MatchString(name) {
  471. return nil
  472. }
  473. action, location := findActionKeywords(content, start)
  474. return &rawReference{
  475. index: index,
  476. owner: owner,
  477. name: name,
  478. action: action,
  479. issue: issue,
  480. isPull: isPull,
  481. refLocation: &RefSpan{Start: start, End: end},
  482. actionLocation: location,
  483. }
  484. }
  485. func findActionKeywords(content []byte, start int) (XRefAction, *RefSpan) {
  486. newKeywords()
  487. var m []int
  488. if issueCloseKeywordsPat != nil {
  489. m = issueCloseKeywordsPat.FindSubmatchIndex(content[:start])
  490. if m != nil {
  491. return XRefActionCloses, &RefSpan{Start: m[2], End: m[3]}
  492. }
  493. }
  494. if issueReopenKeywordsPat != nil {
  495. m = issueReopenKeywordsPat.FindSubmatchIndex(content[:start])
  496. if m != nil {
  497. return XRefActionReopens, &RefSpan{Start: m[2], End: m[3]}
  498. }
  499. }
  500. return XRefActionNone, nil
  501. }
  502. // IsXrefActionable returns true if the xref action is actionable (i.e. produces a result when resolved)
  503. func IsXrefActionable(ref *RenderizableReference, extTracker bool) bool {
  504. if extTracker {
  505. // External issues cannot be automatically closed
  506. return false
  507. }
  508. return ref.Action == XRefActionCloses || ref.Action == XRefActionReopens
  509. }