You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

html.go 32KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132
  1. // Copyright 2017 The Gitea Authors. All rights reserved.
  2. // Use of this source code is governed by a MIT-style
  3. // license that can be found in the LICENSE file.
  4. package markup
  5. import (
  6. "bytes"
  7. "io"
  8. "net/url"
  9. "path"
  10. "path/filepath"
  11. "regexp"
  12. "strings"
  13. "sync"
  14. "code.gitea.io/gitea/modules/base"
  15. "code.gitea.io/gitea/modules/emoji"
  16. "code.gitea.io/gitea/modules/git"
  17. "code.gitea.io/gitea/modules/log"
  18. "code.gitea.io/gitea/modules/markup/common"
  19. "code.gitea.io/gitea/modules/references"
  20. "code.gitea.io/gitea/modules/setting"
  21. "code.gitea.io/gitea/modules/util"
  22. "github.com/unknwon/com"
  23. "golang.org/x/net/html"
  24. "golang.org/x/net/html/atom"
  25. "mvdan.cc/xurls/v2"
  26. )
  27. // Issue name styles
  28. const (
  29. IssueNameStyleNumeric = "numeric"
  30. IssueNameStyleAlphanumeric = "alphanumeric"
  31. )
  32. var (
  33. // NOTE: All below regex matching do not perform any extra validation.
  34. // Thus a link is produced even if the linked entity does not exist.
  35. // While fast, this is also incorrect and lead to false positives.
  36. // TODO: fix invalid linking issue
  37. // sha1CurrentPattern matches string that represents a commit SHA, e.g. d8a994ef243349f321568f9e36d5c3f444b99cae
  38. // Although SHA1 hashes are 40 chars long, the regex matches the hash from 7 to 40 chars in length
  39. // so that abbreviated hash links can be used as well. This matches git and github useability.
  40. sha1CurrentPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-f]{7,40})(?:\s|$|\)|\]|[.,](\s|$))`)
  41. // shortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax
  42. shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`)
  43. // anySHA1Pattern allows to split url containing SHA into parts
  44. anySHA1Pattern = regexp.MustCompile(`https?://(?:\S+/){4}([0-9a-f]{40})(/[^#\s]+)?(#\S+)?`)
  45. validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`)
  46. // While this email regex is definitely not perfect and I'm sure you can come up
  47. // with edge cases, it is still accepted by the CommonMark specification, as
  48. // well as the HTML5 spec:
  49. // http://spec.commonmark.org/0.28/#email-address
  50. // https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail)
  51. emailRegex = regexp.MustCompile("(?:\\s|^|\\(|\\[)([a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9]{2,}(?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)+)(?:\\s|$|\\)|\\]|\\.(\\s|$))")
  52. // blackfriday extensions create IDs like fn:user-content-footnote
  53. blackfridayExtRegex = regexp.MustCompile(`[^:]*:user-content-`)
  54. // EmojiShortCodeRegex find emoji by alias like :smile:
  55. EmojiShortCodeRegex = regexp.MustCompile(`:[\w\+\-]+:`)
  56. )
  57. // CSS class for action keywords (e.g. "closes: #1")
  58. const keywordClass = "issue-keyword"
  59. // IsLink reports whether link fits valid format.
  60. func IsLink(link []byte) bool {
  61. return isLink(link)
  62. }
  63. // isLink reports whether link fits valid format.
  64. func isLink(link []byte) bool {
  65. return validLinksPattern.Match(link)
  66. }
  67. func isLinkStr(link string) bool {
  68. return validLinksPattern.MatchString(link)
  69. }
  70. // regexp for full links to issues/pulls
  71. var issueFullPattern *regexp.Regexp
  72. // Once for to prevent races
  73. var issueFullPatternOnce sync.Once
  74. func getIssueFullPattern() *regexp.Regexp {
  75. issueFullPatternOnce.Do(func() {
  76. issueFullPattern = regexp.MustCompile(regexp.QuoteMeta(setting.AppURL) +
  77. `\w+/\w+/(?:issues|pulls)/((?:\w{1,10}-)?[1-9][0-9]*)([\?|#](\S+)?)?\b`)
  78. })
  79. return issueFullPattern
  80. }
  81. // CustomLinkURLSchemes allows for additional schemes to be detected when parsing links within text
  82. func CustomLinkURLSchemes(schemes []string) {
  83. schemes = append(schemes, "http", "https")
  84. withAuth := make([]string, 0, len(schemes))
  85. validScheme := regexp.MustCompile(`^[a-z]+$`)
  86. for _, s := range schemes {
  87. if !validScheme.MatchString(s) {
  88. continue
  89. }
  90. without := false
  91. for _, sna := range xurls.SchemesNoAuthority {
  92. if s == sna {
  93. without = true
  94. break
  95. }
  96. }
  97. if without {
  98. s += ":"
  99. } else {
  100. s += "://"
  101. }
  102. withAuth = append(withAuth, s)
  103. }
  104. common.LinkRegex, _ = xurls.StrictMatchingScheme(strings.Join(withAuth, "|"))
  105. }
  106. // IsSameDomain checks if given url string has the same hostname as current Gitea instance
  107. func IsSameDomain(s string) bool {
  108. if strings.HasPrefix(s, "/") {
  109. return true
  110. }
  111. if uapp, err := url.Parse(setting.AppURL); err == nil {
  112. if u, err := url.Parse(s); err == nil {
  113. return u.Host == uapp.Host
  114. }
  115. return false
  116. }
  117. return false
  118. }
  119. type postProcessError struct {
  120. context string
  121. err error
  122. }
  123. func (p *postProcessError) Error() string {
  124. return "PostProcess: " + p.context + ", " + p.err.Error()
  125. }
  126. type processor func(ctx *RenderContext, node *html.Node)
  127. var defaultProcessors = []processor{
  128. fullIssuePatternProcessor,
  129. fullSha1PatternProcessor,
  130. shortLinkProcessor,
  131. linkProcessor,
  132. mentionProcessor,
  133. issueIndexPatternProcessor,
  134. sha1CurrentPatternProcessor,
  135. emailAddressProcessor,
  136. emojiProcessor,
  137. emojiShortCodeProcessor,
  138. }
  139. // PostProcess does the final required transformations to the passed raw HTML
  140. // data, and ensures its validity. Transformations include: replacing links and
  141. // emails with HTML links, parsing shortlinks in the format of [[Link]], like
  142. // MediaWiki, linking issues in the format #ID, and mentions in the format
  143. // @user, and others.
  144. func PostProcess(
  145. ctx *RenderContext,
  146. input io.Reader,
  147. output io.Writer,
  148. ) error {
  149. return postProcess(ctx, defaultProcessors, input, output)
  150. }
  151. var commitMessageProcessors = []processor{
  152. fullIssuePatternProcessor,
  153. fullSha1PatternProcessor,
  154. linkProcessor,
  155. mentionProcessor,
  156. issueIndexPatternProcessor,
  157. sha1CurrentPatternProcessor,
  158. emailAddressProcessor,
  159. emojiProcessor,
  160. emojiShortCodeProcessor,
  161. }
  162. // RenderCommitMessage will use the same logic as PostProcess, but will disable
  163. // the shortLinkProcessor and will add a defaultLinkProcessor if defaultLink is
  164. // set, which changes every text node into a link to the passed default link.
  165. func RenderCommitMessage(
  166. ctx *RenderContext,
  167. content string,
  168. ) (string, error) {
  169. var procs = commitMessageProcessors
  170. if ctx.DefaultLink != "" {
  171. // we don't have to fear data races, because being
  172. // commitMessageProcessors of fixed len and cap, every time we append
  173. // something to it the slice is realloc+copied, so append always
  174. // generates the slice ex-novo.
  175. procs = append(procs, genDefaultLinkProcessor(ctx.DefaultLink))
  176. }
  177. return renderProcessString(ctx, procs, content)
  178. }
  179. var commitMessageSubjectProcessors = []processor{
  180. fullIssuePatternProcessor,
  181. fullSha1PatternProcessor,
  182. linkProcessor,
  183. mentionProcessor,
  184. issueIndexPatternProcessor,
  185. sha1CurrentPatternProcessor,
  186. emojiShortCodeProcessor,
  187. emojiProcessor,
  188. }
  189. var emojiProcessors = []processor{
  190. emojiShortCodeProcessor,
  191. emojiProcessor,
  192. }
  193. // RenderCommitMessageSubject will use the same logic as PostProcess and
  194. // RenderCommitMessage, but will disable the shortLinkProcessor and
  195. // emailAddressProcessor, will add a defaultLinkProcessor if defaultLink is set,
  196. // which changes every text node into a link to the passed default link.
  197. func RenderCommitMessageSubject(
  198. ctx *RenderContext,
  199. content string,
  200. ) (string, error) {
  201. var procs = commitMessageSubjectProcessors
  202. if ctx.DefaultLink != "" {
  203. // we don't have to fear data races, because being
  204. // commitMessageSubjectProcessors of fixed len and cap, every time we
  205. // append something to it the slice is realloc+copied, so append always
  206. // generates the slice ex-novo.
  207. procs = append(procs, genDefaultLinkProcessor(ctx.DefaultLink))
  208. }
  209. return renderProcessString(ctx, procs, content)
  210. }
  211. // RenderIssueTitle to process title on individual issue/pull page
  212. func RenderIssueTitle(
  213. ctx *RenderContext,
  214. title string,
  215. ) (string, error) {
  216. return renderProcessString(ctx, []processor{
  217. issueIndexPatternProcessor,
  218. sha1CurrentPatternProcessor,
  219. emojiShortCodeProcessor,
  220. emojiProcessor,
  221. }, title)
  222. }
  223. func renderProcessString(ctx *RenderContext, procs []processor, content string) (string, error) {
  224. var buf strings.Builder
  225. if err := postProcess(ctx, procs, strings.NewReader(content), &buf); err != nil {
  226. return "", err
  227. }
  228. return buf.String(), nil
  229. }
  230. // RenderDescriptionHTML will use similar logic as PostProcess, but will
  231. // use a single special linkProcessor.
  232. func RenderDescriptionHTML(
  233. ctx *RenderContext,
  234. content string,
  235. ) (string, error) {
  236. return renderProcessString(ctx, []processor{
  237. descriptionLinkProcessor,
  238. emojiShortCodeProcessor,
  239. emojiProcessor,
  240. }, content)
  241. }
  242. // RenderEmoji for when we want to just process emoji and shortcodes
  243. // in various places it isn't already run through the normal markdown processor
  244. func RenderEmoji(
  245. content string,
  246. ) (string, error) {
  247. return renderProcessString(&RenderContext{}, emojiProcessors, content)
  248. }
  249. var tagCleaner = regexp.MustCompile(`<((?:/?\w+/\w+)|(?:/[\w ]+/)|(/?[hH][tT][mM][lL]\b)|(/?[hH][eE][aA][dD]\b))`)
  250. var nulCleaner = strings.NewReplacer("\000", "")
  251. func postProcess(ctx *RenderContext, procs []processor, input io.Reader, output io.Writer) error {
  252. defer ctx.Cancel()
  253. // FIXME: don't read all content to memory
  254. rawHTML, err := io.ReadAll(input)
  255. if err != nil {
  256. return err
  257. }
  258. res := bytes.NewBuffer(make([]byte, 0, len(rawHTML)+50))
  259. // prepend "<html><body>"
  260. _, _ = res.WriteString("<html><body>")
  261. // Strip out nuls - they're always invalid
  262. _, _ = res.Write(tagCleaner.ReplaceAll([]byte(nulCleaner.Replace(string(rawHTML))), []byte("&lt;$1")))
  263. // close the tags
  264. _, _ = res.WriteString("</body></html>")
  265. // parse the HTML
  266. node, err := html.Parse(res)
  267. if err != nil {
  268. return &postProcessError{"invalid HTML", err}
  269. }
  270. if node.Type == html.DocumentNode {
  271. node = node.FirstChild
  272. }
  273. visitNode(ctx, procs, node, true)
  274. newNodes := make([]*html.Node, 0, 5)
  275. if node.Data == "html" {
  276. node = node.FirstChild
  277. for node != nil && node.Data != "body" {
  278. node = node.NextSibling
  279. }
  280. }
  281. if node != nil {
  282. if node.Data == "body" {
  283. child := node.FirstChild
  284. for child != nil {
  285. newNodes = append(newNodes, child)
  286. child = child.NextSibling
  287. }
  288. } else {
  289. newNodes = append(newNodes, node)
  290. }
  291. }
  292. // Render everything to buf.
  293. for _, node := range newNodes {
  294. err = html.Render(output, node)
  295. if err != nil {
  296. return &postProcessError{"error rendering processed HTML", err}
  297. }
  298. }
  299. return nil
  300. }
  301. func visitNode(ctx *RenderContext, procs []processor, node *html.Node, visitText bool) {
  302. // Add user-content- to IDs if they don't already have them
  303. for idx, attr := range node.Attr {
  304. if attr.Key == "id" && !(strings.HasPrefix(attr.Val, "user-content-") || blackfridayExtRegex.MatchString(attr.Val)) {
  305. node.Attr[idx].Val = "user-content-" + attr.Val
  306. }
  307. if attr.Key == "class" && attr.Val == "emoji" {
  308. visitText = false
  309. }
  310. }
  311. // We ignore code, pre and already generated links.
  312. switch node.Type {
  313. case html.TextNode:
  314. if visitText {
  315. textNode(ctx, procs, node)
  316. }
  317. case html.ElementNode:
  318. if node.Data == "img" {
  319. for i, attr := range node.Attr {
  320. if attr.Key != "src" {
  321. continue
  322. }
  323. if len(attr.Val) > 0 && !isLinkStr(attr.Val) && !strings.HasPrefix(attr.Val, "data:image/") {
  324. prefix := ctx.URLPrefix
  325. if ctx.IsWiki {
  326. prefix = util.URLJoin(prefix, "wiki", "raw")
  327. }
  328. prefix = strings.Replace(prefix, "/src/", "/media/", 1)
  329. attr.Val = util.URLJoin(prefix, attr.Val)
  330. }
  331. node.Attr[i] = attr
  332. }
  333. } else if node.Data == "a" {
  334. visitText = false
  335. } else if node.Data == "code" || node.Data == "pre" {
  336. return
  337. } else if node.Data == "i" {
  338. for _, attr := range node.Attr {
  339. if attr.Key != "class" {
  340. continue
  341. }
  342. classes := strings.Split(attr.Val, " ")
  343. for i, class := range classes {
  344. if class == "icon" {
  345. classes[0], classes[i] = classes[i], classes[0]
  346. attr.Val = strings.Join(classes, " ")
  347. // Remove all children of icons
  348. child := node.FirstChild
  349. for child != nil {
  350. node.RemoveChild(child)
  351. child = node.FirstChild
  352. }
  353. break
  354. }
  355. }
  356. }
  357. }
  358. for n := node.FirstChild; n != nil; n = n.NextSibling {
  359. visitNode(ctx, procs, n, visitText)
  360. }
  361. }
  362. // ignore everything else
  363. }
  364. // textNode runs the passed node through various processors, in order to handle
  365. // all kinds of special links handled by the post-processing.
  366. func textNode(ctx *RenderContext, procs []processor, node *html.Node) {
  367. for _, processor := range procs {
  368. processor(ctx, node)
  369. }
  370. }
  371. // createKeyword() renders a highlighted version of an action keyword
  372. func createKeyword(content string) *html.Node {
  373. span := &html.Node{
  374. Type: html.ElementNode,
  375. Data: atom.Span.String(),
  376. Attr: []html.Attribute{},
  377. }
  378. span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: keywordClass})
  379. text := &html.Node{
  380. Type: html.TextNode,
  381. Data: content,
  382. }
  383. span.AppendChild(text)
  384. return span
  385. }
  386. func createEmoji(content, class, name string) *html.Node {
  387. span := &html.Node{
  388. Type: html.ElementNode,
  389. Data: atom.Span.String(),
  390. Attr: []html.Attribute{},
  391. }
  392. if class != "" {
  393. span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: class})
  394. }
  395. if name != "" {
  396. span.Attr = append(span.Attr, html.Attribute{Key: "aria-label", Val: name})
  397. }
  398. text := &html.Node{
  399. Type: html.TextNode,
  400. Data: content,
  401. }
  402. span.AppendChild(text)
  403. return span
  404. }
  405. func createCustomEmoji(alias string) *html.Node {
  406. span := &html.Node{
  407. Type: html.ElementNode,
  408. Data: atom.Span.String(),
  409. Attr: []html.Attribute{},
  410. }
  411. span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: "emoji"})
  412. span.Attr = append(span.Attr, html.Attribute{Key: "aria-label", Val: alias})
  413. img := &html.Node{
  414. Type: html.ElementNode,
  415. DataAtom: atom.Img,
  416. Data: "img",
  417. Attr: []html.Attribute{},
  418. }
  419. img.Attr = append(img.Attr, html.Attribute{Key: "alt", Val: ":" + alias + ":"})
  420. img.Attr = append(img.Attr, html.Attribute{Key: "src", Val: setting.StaticURLPrefix + "/assets/img/emoji/" + alias + ".png"})
  421. span.AppendChild(img)
  422. return span
  423. }
  424. func createLink(href, content, class string) *html.Node {
  425. a := &html.Node{
  426. Type: html.ElementNode,
  427. Data: atom.A.String(),
  428. Attr: []html.Attribute{{Key: "href", Val: href}},
  429. }
  430. if class != "" {
  431. a.Attr = append(a.Attr, html.Attribute{Key: "class", Val: class})
  432. }
  433. text := &html.Node{
  434. Type: html.TextNode,
  435. Data: content,
  436. }
  437. a.AppendChild(text)
  438. return a
  439. }
  440. func createCodeLink(href, content, class string) *html.Node {
  441. a := &html.Node{
  442. Type: html.ElementNode,
  443. Data: atom.A.String(),
  444. Attr: []html.Attribute{{Key: "href", Val: href}},
  445. }
  446. if class != "" {
  447. a.Attr = append(a.Attr, html.Attribute{Key: "class", Val: class})
  448. }
  449. text := &html.Node{
  450. Type: html.TextNode,
  451. Data: content,
  452. }
  453. code := &html.Node{
  454. Type: html.ElementNode,
  455. Data: atom.Code.String(),
  456. Attr: []html.Attribute{{Key: "class", Val: "nohighlight"}},
  457. }
  458. code.AppendChild(text)
  459. a.AppendChild(code)
  460. return a
  461. }
  462. // replaceContent takes text node, and in its content it replaces a section of
  463. // it with the specified newNode.
  464. func replaceContent(node *html.Node, i, j int, newNode *html.Node) {
  465. replaceContentList(node, i, j, []*html.Node{newNode})
  466. }
  467. // replaceContentList takes text node, and in its content it replaces a section of
  468. // it with the specified newNodes. An example to visualize how this can work can
  469. // be found here: https://play.golang.org/p/5zP8NnHZ03s
  470. func replaceContentList(node *html.Node, i, j int, newNodes []*html.Node) {
  471. // get the data before and after the match
  472. before := node.Data[:i]
  473. after := node.Data[j:]
  474. // Replace in the current node the text, so that it is only what it is
  475. // supposed to have.
  476. node.Data = before
  477. // Get the current next sibling, before which we place the replaced data,
  478. // and after that we place the new text node.
  479. nextSibling := node.NextSibling
  480. for _, n := range newNodes {
  481. node.Parent.InsertBefore(n, nextSibling)
  482. }
  483. if after != "" {
  484. node.Parent.InsertBefore(&html.Node{
  485. Type: html.TextNode,
  486. Data: after,
  487. }, nextSibling)
  488. }
  489. }
  490. func mentionProcessor(ctx *RenderContext, node *html.Node) {
  491. start := 0
  492. next := node.NextSibling
  493. for node != nil && node != next && start < len(node.Data) {
  494. // We replace only the first mention; other mentions will be addressed later
  495. found, loc := references.FindFirstMentionBytes([]byte(node.Data[start:]))
  496. if !found {
  497. return
  498. }
  499. loc.Start += start
  500. loc.End += start
  501. mention := node.Data[loc.Start:loc.End]
  502. var teams string
  503. teams, ok := ctx.Metas["teams"]
  504. // FIXME: util.URLJoin may not be necessary here:
  505. // - setting.AppURL is defined to have a terminal '/' so unless mention[1:]
  506. // is an AppSubURL link we can probably fallback to concatenation.
  507. // team mention should follow @orgName/teamName style
  508. if ok && strings.Contains(mention, "/") {
  509. mentionOrgAndTeam := strings.Split(mention, "/")
  510. if mentionOrgAndTeam[0][1:] == ctx.Metas["org"] && strings.Contains(teams, ","+strings.ToLower(mentionOrgAndTeam[1])+",") {
  511. replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(setting.AppURL, "org", ctx.Metas["org"], "teams", mentionOrgAndTeam[1]), mention, "mention"))
  512. node = node.NextSibling.NextSibling
  513. start = 0
  514. continue
  515. }
  516. start = loc.End
  517. continue
  518. }
  519. replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(setting.AppURL, mention[1:]), mention, "mention"))
  520. node = node.NextSibling.NextSibling
  521. start = 0
  522. }
  523. }
  524. func shortLinkProcessor(ctx *RenderContext, node *html.Node) {
  525. shortLinkProcessorFull(ctx, node, false)
  526. }
  527. func shortLinkProcessorFull(ctx *RenderContext, node *html.Node, noLink bool) {
  528. next := node.NextSibling
  529. for node != nil && node != next {
  530. m := shortLinkPattern.FindStringSubmatchIndex(node.Data)
  531. if m == nil {
  532. return
  533. }
  534. content := node.Data[m[2]:m[3]]
  535. tail := node.Data[m[4]:m[5]]
  536. props := make(map[string]string)
  537. // MediaWiki uses [[link|text]], while GitHub uses [[text|link]]
  538. // It makes page handling terrible, but we prefer GitHub syntax
  539. // And fall back to MediaWiki only when it is obvious from the look
  540. // Of text and link contents
  541. sl := strings.Split(content, "|")
  542. for _, v := range sl {
  543. if equalPos := strings.IndexByte(v, '='); equalPos == -1 {
  544. // There is no equal in this argument; this is a mandatory arg
  545. if props["name"] == "" {
  546. if isLinkStr(v) {
  547. // If we clearly see it is a link, we save it so
  548. // But first we need to ensure, that if both mandatory args provided
  549. // look like links, we stick to GitHub syntax
  550. if props["link"] != "" {
  551. props["name"] = props["link"]
  552. }
  553. props["link"] = strings.TrimSpace(v)
  554. } else {
  555. props["name"] = v
  556. }
  557. } else {
  558. props["link"] = strings.TrimSpace(v)
  559. }
  560. } else {
  561. // There is an equal; optional argument.
  562. sep := strings.IndexByte(v, '=')
  563. key, val := v[:sep], html.UnescapeString(v[sep+1:])
  564. // When parsing HTML, x/net/html will change all quotes which are
  565. // not used for syntax into UTF-8 quotes. So checking val[0] won't
  566. // be enough, since that only checks a single byte.
  567. if len(val) > 1 {
  568. if (strings.HasPrefix(val, "“") && strings.HasSuffix(val, "”")) ||
  569. (strings.HasPrefix(val, "‘") && strings.HasSuffix(val, "’")) {
  570. const lenQuote = len("‘")
  571. val = val[lenQuote : len(val)-lenQuote]
  572. } else if (strings.HasPrefix(val, "\"") && strings.HasSuffix(val, "\"")) ||
  573. (strings.HasPrefix(val, "'") && strings.HasSuffix(val, "'")) {
  574. val = val[1 : len(val)-1]
  575. } else if strings.HasPrefix(val, "'") && strings.HasSuffix(val, "’") {
  576. const lenQuote = len("‘")
  577. val = val[1 : len(val)-lenQuote]
  578. }
  579. }
  580. props[key] = val
  581. }
  582. }
  583. var name, link string
  584. if props["link"] != "" {
  585. link = props["link"]
  586. } else if props["name"] != "" {
  587. link = props["name"]
  588. }
  589. if props["title"] != "" {
  590. name = props["title"]
  591. } else if props["name"] != "" {
  592. name = props["name"]
  593. } else {
  594. name = link
  595. }
  596. name += tail
  597. image := false
  598. switch ext := filepath.Ext(link); ext {
  599. // fast path: empty string, ignore
  600. case "":
  601. // leave image as false
  602. case ".jpg", ".jpeg", ".png", ".tif", ".tiff", ".webp", ".gif", ".bmp", ".ico", ".svg":
  603. image = true
  604. }
  605. childNode := &html.Node{}
  606. linkNode := &html.Node{
  607. FirstChild: childNode,
  608. LastChild: childNode,
  609. Type: html.ElementNode,
  610. Data: "a",
  611. DataAtom: atom.A,
  612. }
  613. childNode.Parent = linkNode
  614. absoluteLink := isLinkStr(link)
  615. if !absoluteLink {
  616. if image {
  617. link = strings.ReplaceAll(link, " ", "+")
  618. } else {
  619. link = strings.ReplaceAll(link, " ", "-")
  620. }
  621. if !strings.Contains(link, "/") {
  622. link = url.PathEscape(link)
  623. }
  624. }
  625. urlPrefix := ctx.URLPrefix
  626. if image {
  627. if !absoluteLink {
  628. if IsSameDomain(urlPrefix) {
  629. urlPrefix = strings.Replace(urlPrefix, "/src/", "/raw/", 1)
  630. }
  631. if ctx.IsWiki {
  632. link = util.URLJoin("wiki", "raw", link)
  633. }
  634. link = util.URLJoin(urlPrefix, link)
  635. }
  636. title := props["title"]
  637. if title == "" {
  638. title = props["alt"]
  639. }
  640. if title == "" {
  641. title = path.Base(name)
  642. }
  643. alt := props["alt"]
  644. if alt == "" {
  645. alt = name
  646. }
  647. // make the childNode an image - if we can, we also place the alt
  648. childNode.Type = html.ElementNode
  649. childNode.Data = "img"
  650. childNode.DataAtom = atom.Img
  651. childNode.Attr = []html.Attribute{
  652. {Key: "src", Val: link},
  653. {Key: "title", Val: title},
  654. {Key: "alt", Val: alt},
  655. }
  656. if alt == "" {
  657. childNode.Attr = childNode.Attr[:2]
  658. }
  659. } else {
  660. if !absoluteLink {
  661. if ctx.IsWiki {
  662. link = util.URLJoin("wiki", link)
  663. }
  664. link = util.URLJoin(urlPrefix, link)
  665. }
  666. childNode.Type = html.TextNode
  667. childNode.Data = name
  668. }
  669. if noLink {
  670. linkNode = childNode
  671. } else {
  672. linkNode.Attr = []html.Attribute{{Key: "href", Val: link}}
  673. }
  674. replaceContent(node, m[0], m[1], linkNode)
  675. node = node.NextSibling.NextSibling
  676. }
  677. }
  678. func fullIssuePatternProcessor(ctx *RenderContext, node *html.Node) {
  679. if ctx.Metas == nil {
  680. return
  681. }
  682. next := node.NextSibling
  683. for node != nil && node != next {
  684. m := getIssueFullPattern().FindStringSubmatchIndex(node.Data)
  685. if m == nil {
  686. return
  687. }
  688. link := node.Data[m[0]:m[1]]
  689. id := "#" + node.Data[m[2]:m[3]]
  690. // extract repo and org name from matched link like
  691. // http://localhost:3000/gituser/myrepo/issues/1
  692. linkParts := strings.Split(link, "/")
  693. matchOrg := linkParts[len(linkParts)-4]
  694. matchRepo := linkParts[len(linkParts)-3]
  695. if matchOrg == ctx.Metas["user"] && matchRepo == ctx.Metas["repo"] {
  696. // TODO if m[4]:m[5] is not nil, then link is to a comment,
  697. // and we should indicate that in the text somehow
  698. replaceContent(node, m[0], m[1], createLink(link, id, "ref-issue"))
  699. } else {
  700. orgRepoID := matchOrg + "/" + matchRepo + id
  701. replaceContent(node, m[0], m[1], createLink(link, orgRepoID, "ref-issue"))
  702. }
  703. node = node.NextSibling.NextSibling
  704. }
  705. }
  706. func issueIndexPatternProcessor(ctx *RenderContext, node *html.Node) {
  707. if ctx.Metas == nil {
  708. return
  709. }
  710. var (
  711. found bool
  712. ref *references.RenderizableReference
  713. )
  714. next := node.NextSibling
  715. for node != nil && node != next {
  716. _, exttrack := ctx.Metas["format"]
  717. alphanum := ctx.Metas["style"] == IssueNameStyleAlphanumeric
  718. // Repos with external issue trackers might still need to reference local PRs
  719. // We need to concern with the first one that shows up in the text, whichever it is
  720. found, ref = references.FindRenderizableReferenceNumeric(node.Data, exttrack && alphanum)
  721. if exttrack && alphanum {
  722. if found2, ref2 := references.FindRenderizableReferenceAlphanumeric(node.Data); found2 {
  723. if !found || ref2.RefLocation.Start < ref.RefLocation.Start {
  724. found = true
  725. ref = ref2
  726. }
  727. }
  728. }
  729. if !found {
  730. return
  731. }
  732. var link *html.Node
  733. reftext := node.Data[ref.RefLocation.Start:ref.RefLocation.End]
  734. if exttrack && !ref.IsPull {
  735. ctx.Metas["index"] = ref.Issue
  736. link = createLink(com.Expand(ctx.Metas["format"], ctx.Metas), reftext, "ref-issue ref-external-issue")
  737. } else {
  738. // Path determines the type of link that will be rendered. It's unknown at this point whether
  739. // the linked item is actually a PR or an issue. Luckily it's of no real consequence because
  740. // Gitea will redirect on click as appropriate.
  741. path := "issues"
  742. if ref.IsPull {
  743. path = "pulls"
  744. }
  745. if ref.Owner == "" {
  746. link = createLink(util.URLJoin(setting.AppURL, ctx.Metas["user"], ctx.Metas["repo"], path, ref.Issue), reftext, "ref-issue")
  747. } else {
  748. link = createLink(util.URLJoin(setting.AppURL, ref.Owner, ref.Name, path, ref.Issue), reftext, "ref-issue")
  749. }
  750. }
  751. if ref.Action == references.XRefActionNone {
  752. replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link)
  753. node = node.NextSibling.NextSibling
  754. continue
  755. }
  756. // Decorate action keywords if actionable
  757. var keyword *html.Node
  758. if references.IsXrefActionable(ref, exttrack, alphanum) {
  759. keyword = createKeyword(node.Data[ref.ActionLocation.Start:ref.ActionLocation.End])
  760. } else {
  761. keyword = &html.Node{
  762. Type: html.TextNode,
  763. Data: node.Data[ref.ActionLocation.Start:ref.ActionLocation.End],
  764. }
  765. }
  766. spaces := &html.Node{
  767. Type: html.TextNode,
  768. Data: node.Data[ref.ActionLocation.End:ref.RefLocation.Start],
  769. }
  770. replaceContentList(node, ref.ActionLocation.Start, ref.RefLocation.End, []*html.Node{keyword, spaces, link})
  771. node = node.NextSibling.NextSibling.NextSibling.NextSibling
  772. }
  773. }
  774. // fullSha1PatternProcessor renders SHA containing URLs
  775. func fullSha1PatternProcessor(ctx *RenderContext, node *html.Node) {
  776. if ctx.Metas == nil {
  777. return
  778. }
  779. next := node.NextSibling
  780. for node != nil && node != next {
  781. m := anySHA1Pattern.FindStringSubmatchIndex(node.Data)
  782. if m == nil {
  783. return
  784. }
  785. urlFull := node.Data[m[0]:m[1]]
  786. text := base.ShortSha(node.Data[m[2]:m[3]])
  787. // 3rd capture group matches a optional path
  788. subpath := ""
  789. if m[5] > 0 {
  790. subpath = node.Data[m[4]:m[5]]
  791. }
  792. // 4th capture group matches a optional url hash
  793. hash := ""
  794. if m[7] > 0 {
  795. hash = node.Data[m[6]:m[7]][1:]
  796. }
  797. start := m[0]
  798. end := m[1]
  799. // If url ends in '.', it's very likely that it is not part of the
  800. // actual url but used to finish a sentence.
  801. if strings.HasSuffix(urlFull, ".") {
  802. end--
  803. urlFull = urlFull[:len(urlFull)-1]
  804. if hash != "" {
  805. hash = hash[:len(hash)-1]
  806. } else if subpath != "" {
  807. subpath = subpath[:len(subpath)-1]
  808. }
  809. }
  810. if subpath != "" {
  811. text += subpath
  812. }
  813. if hash != "" {
  814. text += " (" + hash + ")"
  815. }
  816. replaceContent(node, start, end, createCodeLink(urlFull, text, "commit"))
  817. node = node.NextSibling.NextSibling
  818. }
  819. }
  820. // emojiShortCodeProcessor for rendering text like :smile: into emoji
  821. func emojiShortCodeProcessor(ctx *RenderContext, node *html.Node) {
  822. start := 0
  823. next := node.NextSibling
  824. for node != nil && node != next && start < len(node.Data) {
  825. m := EmojiShortCodeRegex.FindStringSubmatchIndex(node.Data[start:])
  826. if m == nil {
  827. return
  828. }
  829. m[0] += start
  830. m[1] += start
  831. start = m[1]
  832. alias := node.Data[m[0]:m[1]]
  833. alias = strings.ReplaceAll(alias, ":", "")
  834. converted := emoji.FromAlias(alias)
  835. if converted == nil {
  836. // check if this is a custom reaction
  837. if _, exist := setting.UI.CustomEmojisMap[alias]; exist {
  838. replaceContent(node, m[0], m[1], createCustomEmoji(alias))
  839. node = node.NextSibling.NextSibling
  840. start = 0
  841. continue
  842. }
  843. continue
  844. }
  845. replaceContent(node, m[0], m[1], createEmoji(converted.Emoji, "emoji", converted.Description))
  846. node = node.NextSibling.NextSibling
  847. start = 0
  848. }
  849. }
  850. // emoji processor to match emoji and add emoji class
  851. func emojiProcessor(ctx *RenderContext, node *html.Node) {
  852. start := 0
  853. next := node.NextSibling
  854. for node != nil && node != next && start < len(node.Data) {
  855. m := emoji.FindEmojiSubmatchIndex(node.Data[start:])
  856. if m == nil {
  857. return
  858. }
  859. m[0] += start
  860. m[1] += start
  861. codepoint := node.Data[m[0]:m[1]]
  862. start = m[1]
  863. val := emoji.FromCode(codepoint)
  864. if val != nil {
  865. replaceContent(node, m[0], m[1], createEmoji(codepoint, "emoji", val.Description))
  866. node = node.NextSibling.NextSibling
  867. start = 0
  868. }
  869. }
  870. }
  871. // sha1CurrentPatternProcessor renders SHA1 strings to corresponding links that
  872. // are assumed to be in the same repository.
  873. func sha1CurrentPatternProcessor(ctx *RenderContext, node *html.Node) {
  874. if ctx.Metas == nil || ctx.Metas["user"] == "" || ctx.Metas["repo"] == "" || ctx.Metas["repoPath"] == "" {
  875. return
  876. }
  877. start := 0
  878. next := node.NextSibling
  879. if ctx.ShaExistCache == nil {
  880. ctx.ShaExistCache = make(map[string]bool)
  881. }
  882. for node != nil && node != next && start < len(node.Data) {
  883. m := sha1CurrentPattern.FindStringSubmatchIndex(node.Data[start:])
  884. if m == nil {
  885. return
  886. }
  887. m[2] += start
  888. m[3] += start
  889. hash := node.Data[m[2]:m[3]]
  890. // The regex does not lie, it matches the hash pattern.
  891. // However, a regex cannot know if a hash actually exists or not.
  892. // We could assume that a SHA1 hash should probably contain alphas AND numerics
  893. // but that is not always the case.
  894. // Although unlikely, deadbeef and 1234567 are valid short forms of SHA1 hash
  895. // as used by git and github for linking and thus we have to do similar.
  896. // Because of this, we check to make sure that a matched hash is actually
  897. // a commit in the repository before making it a link.
  898. // check cache first
  899. exist, inCache := ctx.ShaExistCache[hash]
  900. if !inCache {
  901. if ctx.GitRepo == nil {
  902. var err error
  903. ctx.GitRepo, err = git.OpenRepository(ctx.Metas["repoPath"])
  904. if err != nil {
  905. log.Error("unable to open repository: %s Error: %v", ctx.Metas["repoPath"], err)
  906. return
  907. }
  908. ctx.AddCancel(func() {
  909. ctx.GitRepo.Close()
  910. ctx.GitRepo = nil
  911. })
  912. }
  913. exist = ctx.GitRepo.IsObjectExist(hash)
  914. ctx.ShaExistCache[hash] = exist
  915. }
  916. if !exist {
  917. start = m[3]
  918. continue
  919. }
  920. replaceContent(node, m[2], m[3],
  921. createCodeLink(util.URLJoin(setting.AppURL, ctx.Metas["user"], ctx.Metas["repo"], "commit", hash), base.ShortSha(hash), "commit"))
  922. start = 0
  923. node = node.NextSibling.NextSibling
  924. }
  925. }
  926. // emailAddressProcessor replaces raw email addresses with a mailto: link.
  927. func emailAddressProcessor(ctx *RenderContext, node *html.Node) {
  928. next := node.NextSibling
  929. for node != nil && node != next {
  930. m := emailRegex.FindStringSubmatchIndex(node.Data)
  931. if m == nil {
  932. return
  933. }
  934. mail := node.Data[m[2]:m[3]]
  935. replaceContent(node, m[2], m[3], createLink("mailto:"+mail, mail, "mailto"))
  936. node = node.NextSibling.NextSibling
  937. }
  938. }
  939. // linkProcessor creates links for any HTTP or HTTPS URL not captured by
  940. // markdown.
  941. func linkProcessor(ctx *RenderContext, node *html.Node) {
  942. next := node.NextSibling
  943. for node != nil && node != next {
  944. m := common.LinkRegex.FindStringIndex(node.Data)
  945. if m == nil {
  946. return
  947. }
  948. uri := node.Data[m[0]:m[1]]
  949. replaceContent(node, m[0], m[1], createLink(uri, uri, "link"))
  950. node = node.NextSibling.NextSibling
  951. }
  952. }
  953. func genDefaultLinkProcessor(defaultLink string) processor {
  954. return func(ctx *RenderContext, node *html.Node) {
  955. ch := &html.Node{
  956. Parent: node,
  957. Type: html.TextNode,
  958. Data: node.Data,
  959. }
  960. node.Type = html.ElementNode
  961. node.Data = "a"
  962. node.DataAtom = atom.A
  963. node.Attr = []html.Attribute{
  964. {Key: "href", Val: defaultLink},
  965. {Key: "class", Val: "default-link"},
  966. }
  967. node.FirstChild, node.LastChild = ch, ch
  968. }
  969. }
  970. // descriptionLinkProcessor creates links for DescriptionHTML
  971. func descriptionLinkProcessor(ctx *RenderContext, node *html.Node) {
  972. next := node.NextSibling
  973. for node != nil && node != next {
  974. m := common.LinkRegex.FindStringIndex(node.Data)
  975. if m == nil {
  976. return
  977. }
  978. uri := node.Data[m[0]:m[1]]
  979. replaceContent(node, m[0], m[1], createDescriptionLink(uri, uri))
  980. node = node.NextSibling.NextSibling
  981. }
  982. }
  983. func createDescriptionLink(href, content string) *html.Node {
  984. textNode := &html.Node{
  985. Type: html.TextNode,
  986. Data: content,
  987. }
  988. linkNode := &html.Node{
  989. FirstChild: textNode,
  990. LastChild: textNode,
  991. Type: html.ElementNode,
  992. Data: "a",
  993. DataAtom: atom.A,
  994. Attr: []html.Attribute{
  995. {Key: "href", Val: href},
  996. {Key: "target", Val: "_blank"},
  997. {Key: "rel", Val: "noopener noreferrer"},
  998. },
  999. }
  1000. textNode.Parent = linkNode
  1001. return linkNode
  1002. }