You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

html.go 19KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670
  1. // Copyright 2017 The Gitea Authors. All rights reserved.
  2. // Use of this source code is governed by a MIT-style
  3. // license that can be found in the LICENSE file.
  4. package markup
  5. import (
  6. "bytes"
  7. "net/url"
  8. "path"
  9. "path/filepath"
  10. "regexp"
  11. "strings"
  12. "code.gitea.io/gitea/modules/base"
  13. "code.gitea.io/gitea/modules/setting"
  14. "code.gitea.io/gitea/modules/util"
  15. "github.com/Unknwon/com"
  16. "golang.org/x/net/html"
  17. "golang.org/x/net/html/atom"
  18. )
  19. // Issue name styles
  20. const (
  21. IssueNameStyleNumeric = "numeric"
  22. IssueNameStyleAlphanumeric = "alphanumeric"
  23. )
  24. var (
  25. // NOTE: All below regex matching do not perform any extra validation.
  26. // Thus a link is produced even if the linked entity does not exist.
  27. // While fast, this is also incorrect and lead to false positives.
  28. // TODO: fix invalid linking issue
  29. // mentionPattern matches all mentions in the form of "@user"
  30. mentionPattern = regexp.MustCompile(`(?:\s|^|\W)(@[0-9a-zA-Z-_\.]+)`)
  31. // issueNumericPattern matches string that references to a numeric issue, e.g. #1287
  32. issueNumericPattern = regexp.MustCompile(`(?:\s|^|\W)(#[0-9]+)\b`)
  33. // issueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-1234
  34. issueAlphanumericPattern = regexp.MustCompile(`(?:\s|^|\W)([A-Z]{1,10}-[1-9][0-9]*)\b`)
  35. // crossReferenceIssueNumericPattern matches string that references a numeric issue in a different repository
  36. // e.g. gogits/gogs#12345
  37. crossReferenceIssueNumericPattern = regexp.MustCompile(`(?:\s|^|\W)([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+#[0-9]+)\b`)
  38. // sha1CurrentPattern matches string that represents a commit SHA, e.g. d8a994ef243349f321568f9e36d5c3f444b99cae
  39. // Although SHA1 hashes are 40 chars long, the regex matches the hash from 7 to 40 chars in length
  40. // so that abbreviated hash links can be used as well. This matches git and github useability.
  41. sha1CurrentPattern = regexp.MustCompile(`(?:\s|^|\W)([0-9a-f]{7,40})\b`)
  42. // shortLinkPattern matches short but difficult to parse [[name|link|arg=test]] syntax
  43. shortLinkPattern = regexp.MustCompile(`\[\[(.*?)\]\](\w*)`)
  44. // anySHA1Pattern allows to split url containing SHA into parts
  45. anySHA1Pattern = regexp.MustCompile(`https?://(?:\S+/){4}([0-9a-f]{40})/?([^#\s]+)?(?:#(\S+))?`)
  46. validLinksPattern = regexp.MustCompile(`^[a-z][\w-]+://`)
  47. // While this email regex is definitely not perfect and I'm sure you can come up
  48. // with edge cases, it is still accepted by the CommonMark specification, as
  49. // well as the HTML5 spec:
  50. // http://spec.commonmark.org/0.28/#email-address
  51. // https://html.spec.whatwg.org/multipage/input.html#e-mail-state-(type%3Demail)
  52. emailRegex = regexp.MustCompile("[a-zA-Z0-9.!#$%&'*+\\/=?^_`{|}~-]+@[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?(?:\\.[a-zA-Z0-9](?:[a-zA-Z0-9-]{0,61}[a-zA-Z0-9])?)*")
  53. // matches http/https links. used for autlinking those. partly modified from
  54. // the original present in autolink.js
  55. linkRegex = regexp.MustCompile(`(?:(?:http|https):\/\/(?:[\-;:&=\+\$,\w]+@)?[A-Za-z0-9\.\-]+|(?:www\.|[\-;:&=\+\$,\w]+@)[A-Za-z0-9\.\-]+)(?:(?:\/[\+~%\/\.\w\-]*)?\??(?:[\-\+:=&;%@\.\w]*)#?(?:[\.\!\/\\\w]*))?`)
  56. )
  57. // regexp for full links to issues/pulls
  58. var issueFullPattern *regexp.Regexp
  59. // IsLink reports whether link fits valid format.
  60. func IsLink(link []byte) bool {
  61. return isLink(link)
  62. }
  63. // isLink reports whether link fits valid format.
  64. func isLink(link []byte) bool {
  65. return validLinksPattern.Match(link)
  66. }
  67. func isLinkStr(link string) bool {
  68. return validLinksPattern.MatchString(link)
  69. }
  70. func getIssueFullPattern() *regexp.Regexp {
  71. if issueFullPattern == nil {
  72. appURL := setting.AppURL
  73. if len(appURL) > 0 && appURL[len(appURL)-1] != '/' {
  74. appURL += "/"
  75. }
  76. issueFullPattern = regexp.MustCompile(appURL +
  77. `\w+/\w+/(?:issues|pulls)/((?:\w{1,10}-)?[1-9][0-9]*)([\?|#]\S+.(\S+)?)?\b`)
  78. }
  79. return issueFullPattern
  80. }
  81. // FindAllMentions matches mention patterns in given content
  82. // and returns a list of found user names without @ prefix.
  83. func FindAllMentions(content string) []string {
  84. mentions := mentionPattern.FindAllStringSubmatch(content, -1)
  85. ret := make([]string, len(mentions))
  86. for i, val := range mentions {
  87. ret[i] = val[1][1:]
  88. }
  89. return ret
  90. }
  91. // cutoutVerbosePrefix cutouts URL prefix including sub-path to
  92. // return a clean unified string of request URL path.
  93. func cutoutVerbosePrefix(prefix string) string {
  94. if len(prefix) == 0 || prefix[0] != '/' {
  95. return prefix
  96. }
  97. count := 0
  98. for i := 0; i < len(prefix); i++ {
  99. if prefix[i] == '/' {
  100. count++
  101. }
  102. if count >= 3+setting.AppSubURLDepth {
  103. return prefix[:i]
  104. }
  105. }
  106. return prefix
  107. }
  108. // IsSameDomain checks if given url string has the same hostname as current Gitea instance
  109. func IsSameDomain(s string) bool {
  110. if strings.HasPrefix(s, "/") {
  111. return true
  112. }
  113. if uapp, err := url.Parse(setting.AppURL); err == nil {
  114. if u, err := url.Parse(s); err == nil {
  115. return u.Host == uapp.Host
  116. }
  117. return false
  118. }
  119. return false
  120. }
  121. type postProcessError struct {
  122. context string
  123. err error
  124. }
  125. func (p *postProcessError) Error() string {
  126. return "PostProcess: " + p.context + ", " + p.Error()
  127. }
  128. type processor func(ctx *postProcessCtx, node *html.Node)
  129. var defaultProcessors = []processor{
  130. mentionProcessor,
  131. shortLinkProcessor,
  132. fullIssuePatternProcessor,
  133. issueIndexPatternProcessor,
  134. crossReferenceIssueIndexPatternProcessor,
  135. fullSha1PatternProcessor,
  136. sha1CurrentPatternProcessor,
  137. emailAddressProcessor,
  138. linkProcessor,
  139. }
  140. type postProcessCtx struct {
  141. metas map[string]string
  142. urlPrefix string
  143. isWikiMarkdown bool
  144. // processors used by this context.
  145. procs []processor
  146. // if set to true, when an <a> is found, instead of just returning during
  147. // visitNode, it will recursively visit the node exclusively running
  148. // shortLinkProcessorFull with true.
  149. visitLinksForShortLinks bool
  150. }
  151. // PostProcess does the final required transformations to the passed raw HTML
  152. // data, and ensures its validity. Transformations include: replacing links and
  153. // emails with HTML links, parsing shortlinks in the format of [[Link]], like
  154. // MediaWiki, linking issues in the format #ID, and mentions in the format
  155. // @user, and others.
  156. func PostProcess(
  157. rawHTML []byte,
  158. urlPrefix string,
  159. metas map[string]string,
  160. isWikiMarkdown bool,
  161. ) ([]byte, error) {
  162. // create the context from the parameters
  163. ctx := &postProcessCtx{
  164. metas: metas,
  165. urlPrefix: urlPrefix,
  166. isWikiMarkdown: isWikiMarkdown,
  167. procs: defaultProcessors,
  168. visitLinksForShortLinks: true,
  169. }
  170. return ctx.postProcess(rawHTML)
  171. }
  172. var commitMessageProcessors = []processor{
  173. mentionProcessor,
  174. fullIssuePatternProcessor,
  175. issueIndexPatternProcessor,
  176. crossReferenceIssueIndexPatternProcessor,
  177. fullSha1PatternProcessor,
  178. sha1CurrentPatternProcessor,
  179. emailAddressProcessor,
  180. linkProcessor,
  181. }
  182. // RenderCommitMessage will use the same logic as PostProcess, but will disable
  183. // the shortLinkProcessor and will add a defaultLinkProcessor if defaultLink is
  184. // set, which changes every text node into a link to the passed default link.
  185. func RenderCommitMessage(
  186. rawHTML []byte,
  187. urlPrefix, defaultLink string,
  188. metas map[string]string,
  189. ) ([]byte, error) {
  190. ctx := &postProcessCtx{
  191. metas: metas,
  192. urlPrefix: urlPrefix,
  193. procs: commitMessageProcessors,
  194. }
  195. if defaultLink != "" {
  196. // we don't have to fear data races, because being
  197. // commitMessageProcessors of fixed len and cap, every time we append
  198. // something to it the slice is realloc+copied, so append always
  199. // generates the slice ex-novo.
  200. ctx.procs = append(ctx.procs, genDefaultLinkProcessor(defaultLink))
  201. }
  202. return ctx.postProcess(rawHTML)
  203. }
  204. var byteBodyTag = []byte("<body>")
  205. var byteBodyTagClosing = []byte("</body>")
  206. func (ctx *postProcessCtx) postProcess(rawHTML []byte) ([]byte, error) {
  207. if ctx.procs == nil {
  208. ctx.procs = defaultProcessors
  209. }
  210. // give a generous extra 50 bytes
  211. res := make([]byte, 0, len(rawHTML)+50)
  212. res = append(res, byteBodyTag...)
  213. res = append(res, rawHTML...)
  214. res = append(res, byteBodyTagClosing...)
  215. // parse the HTML
  216. nodes, err := html.ParseFragment(bytes.NewReader(res), nil)
  217. if err != nil {
  218. return nil, &postProcessError{"invalid HTML", err}
  219. }
  220. for _, node := range nodes {
  221. ctx.visitNode(node)
  222. }
  223. // Create buffer in which the data will be placed again. We know that the
  224. // length will be at least that of res; to spare a few alloc+copy, we
  225. // reuse res, resetting its length to 0.
  226. buf := bytes.NewBuffer(res[:0])
  227. // Render everything to buf.
  228. for _, node := range nodes {
  229. err = html.Render(buf, node)
  230. if err != nil {
  231. return nil, &postProcessError{"error rendering processed HTML", err}
  232. }
  233. }
  234. // remove initial parts - because Render creates a whole HTML page.
  235. res = buf.Bytes()
  236. res = res[bytes.Index(res, byteBodyTag)+len(byteBodyTag) : bytes.LastIndex(res, byteBodyTagClosing)]
  237. // Everything done successfully, return parsed data.
  238. return res, nil
  239. }
  240. func (ctx *postProcessCtx) visitNode(node *html.Node) {
  241. // We ignore code, pre and already generated links.
  242. switch node.Type {
  243. case html.TextNode:
  244. ctx.textNode(node)
  245. case html.ElementNode:
  246. if node.Data == "a" || node.Data == "code" || node.Data == "pre" {
  247. if node.Data == "a" && ctx.visitLinksForShortLinks {
  248. ctx.visitNodeForShortLinks(node)
  249. }
  250. return
  251. }
  252. for n := node.FirstChild; n != nil; n = n.NextSibling {
  253. ctx.visitNode(n)
  254. }
  255. }
  256. // ignore everything else
  257. }
  258. func (ctx *postProcessCtx) visitNodeForShortLinks(node *html.Node) {
  259. switch node.Type {
  260. case html.TextNode:
  261. shortLinkProcessorFull(ctx, node, true)
  262. case html.ElementNode:
  263. if node.Data == "code" || node.Data == "pre" {
  264. return
  265. }
  266. for n := node.FirstChild; n != nil; n = n.NextSibling {
  267. ctx.visitNodeForShortLinks(n)
  268. }
  269. }
  270. }
  271. // textNode runs the passed node through various processors, in order to handle
  272. // all kinds of special links handled by the post-processing.
  273. func (ctx *postProcessCtx) textNode(node *html.Node) {
  274. for _, processor := range ctx.procs {
  275. processor(ctx, node)
  276. }
  277. }
  278. func createLink(href, content string) *html.Node {
  279. textNode := &html.Node{
  280. Type: html.TextNode,
  281. Data: content,
  282. }
  283. linkNode := &html.Node{
  284. FirstChild: textNode,
  285. LastChild: textNode,
  286. Type: html.ElementNode,
  287. Data: "a",
  288. DataAtom: atom.A,
  289. Attr: []html.Attribute{
  290. {Key: "href", Val: href},
  291. },
  292. }
  293. textNode.Parent = linkNode
  294. return linkNode
  295. }
  296. // replaceContent takes a text node, and in its content it replaces a section of
  297. // it with the specified newNode. An example to visualize how this can work can
  298. // be found here: https://play.golang.org/p/5zP8NnHZ03s
  299. func replaceContent(node *html.Node, i, j int, newNode *html.Node) {
  300. // get the data before and after the match
  301. before := node.Data[:i]
  302. after := node.Data[j:]
  303. // Replace in the current node the text, so that it is only what it is
  304. // supposed to have.
  305. node.Data = before
  306. // Get the current next sibling, before which we place the replaced data,
  307. // and after that we place the new text node.
  308. nextSibling := node.NextSibling
  309. node.Parent.InsertBefore(newNode, nextSibling)
  310. if after != "" {
  311. node.Parent.InsertBefore(&html.Node{
  312. Type: html.TextNode,
  313. Data: after,
  314. }, nextSibling)
  315. }
  316. }
  317. func mentionProcessor(_ *postProcessCtx, node *html.Node) {
  318. m := mentionPattern.FindStringSubmatchIndex(node.Data)
  319. if m == nil {
  320. return
  321. }
  322. // Replace the mention with a link to the specified user.
  323. mention := node.Data[m[2]:m[3]]
  324. replaceContent(node, m[2], m[3], createLink(util.URLJoin(setting.AppURL, mention[1:]), mention))
  325. }
  326. func shortLinkProcessor(ctx *postProcessCtx, node *html.Node) {
  327. shortLinkProcessorFull(ctx, node, false)
  328. }
  329. func shortLinkProcessorFull(ctx *postProcessCtx, node *html.Node, noLink bool) {
  330. m := shortLinkPattern.FindStringSubmatchIndex(node.Data)
  331. if m == nil {
  332. return
  333. }
  334. content := node.Data[m[2]:m[3]]
  335. tail := node.Data[m[4]:m[5]]
  336. props := make(map[string]string)
  337. // MediaWiki uses [[link|text]], while GitHub uses [[text|link]]
  338. // It makes page handling terrible, but we prefer GitHub syntax
  339. // And fall back to MediaWiki only when it is obvious from the look
  340. // Of text and link contents
  341. sl := strings.Split(content, "|")
  342. for _, v := range sl {
  343. if equalPos := strings.IndexByte(v, '='); equalPos == -1 {
  344. // There is no equal in this argument; this is a mandatory arg
  345. if props["name"] == "" {
  346. if isLinkStr(v) {
  347. // If we clearly see it is a link, we save it so
  348. // But first we need to ensure, that if both mandatory args provided
  349. // look like links, we stick to GitHub syntax
  350. if props["link"] != "" {
  351. props["name"] = props["link"]
  352. }
  353. props["link"] = strings.TrimSpace(v)
  354. } else {
  355. props["name"] = v
  356. }
  357. } else {
  358. props["link"] = strings.TrimSpace(v)
  359. }
  360. } else {
  361. // There is an equal; optional argument.
  362. sep := strings.IndexByte(v, '=')
  363. key, val := v[:sep], html.UnescapeString(v[sep+1:])
  364. // When parsing HTML, x/net/html will change all quotes which are
  365. // not used for syntax into UTF-8 quotes. So checking val[0] won't
  366. // be enough, since that only checks a single byte.
  367. if (strings.HasPrefix(val, "“") && strings.HasSuffix(val, "”")) ||
  368. (strings.HasPrefix(val, "‘") && strings.HasSuffix(val, "’")) {
  369. const lenQuote = len("‘")
  370. val = val[lenQuote : len(val)-lenQuote]
  371. }
  372. props[key] = val
  373. }
  374. }
  375. var name, link string
  376. if props["link"] != "" {
  377. link = props["link"]
  378. } else if props["name"] != "" {
  379. link = props["name"]
  380. }
  381. if props["title"] != "" {
  382. name = props["title"]
  383. } else if props["name"] != "" {
  384. name = props["name"]
  385. } else {
  386. name = link
  387. }
  388. name += tail
  389. image := false
  390. switch ext := filepath.Ext(string(link)); ext {
  391. // fast path: empty string, ignore
  392. case "":
  393. break
  394. case ".jpg", ".jpeg", ".png", ".tif", ".tiff", ".webp", ".gif", ".bmp", ".ico", ".svg":
  395. image = true
  396. }
  397. childNode := &html.Node{}
  398. linkNode := &html.Node{
  399. FirstChild: childNode,
  400. LastChild: childNode,
  401. Type: html.ElementNode,
  402. Data: "a",
  403. DataAtom: atom.A,
  404. }
  405. childNode.Parent = linkNode
  406. absoluteLink := isLinkStr(link)
  407. if !absoluteLink {
  408. if image {
  409. link = strings.Replace(link, " ", "+", -1)
  410. } else {
  411. link = strings.Replace(link, " ", "-", -1)
  412. }
  413. if !strings.Contains(link, "/") {
  414. link = url.PathEscape(link)
  415. }
  416. }
  417. urlPrefix := ctx.urlPrefix
  418. if image {
  419. if !absoluteLink {
  420. if IsSameDomain(urlPrefix) {
  421. urlPrefix = strings.Replace(urlPrefix, "/src/", "/raw/", 1)
  422. }
  423. if ctx.isWikiMarkdown {
  424. link = util.URLJoin("wiki", "raw", link)
  425. }
  426. link = util.URLJoin(urlPrefix, link)
  427. }
  428. title := props["title"]
  429. if title == "" {
  430. title = props["alt"]
  431. }
  432. if title == "" {
  433. title = path.Base(string(name))
  434. }
  435. alt := props["alt"]
  436. if alt == "" {
  437. alt = name
  438. }
  439. // make the childNode an image - if we can, we also place the alt
  440. childNode.Type = html.ElementNode
  441. childNode.Data = "img"
  442. childNode.DataAtom = atom.Img
  443. childNode.Attr = []html.Attribute{
  444. {Key: "src", Val: link},
  445. {Key: "title", Val: title},
  446. {Key: "alt", Val: alt},
  447. }
  448. if alt == "" {
  449. childNode.Attr = childNode.Attr[:2]
  450. }
  451. } else {
  452. if !absoluteLink {
  453. if ctx.isWikiMarkdown {
  454. link = util.URLJoin("wiki", link)
  455. }
  456. link = util.URLJoin(urlPrefix, link)
  457. }
  458. childNode.Type = html.TextNode
  459. childNode.Data = name
  460. }
  461. if noLink {
  462. linkNode = childNode
  463. } else {
  464. linkNode.Attr = []html.Attribute{{Key: "href", Val: link}}
  465. }
  466. replaceContent(node, m[0], m[1], linkNode)
  467. }
  468. func fullIssuePatternProcessor(ctx *postProcessCtx, node *html.Node) {
  469. m := getIssueFullPattern().FindStringSubmatchIndex(node.Data)
  470. if m == nil {
  471. return
  472. }
  473. link := node.Data[m[0]:m[1]]
  474. id := "#" + node.Data[m[2]:m[3]]
  475. // TODO if m[4]:m[5] is not nil, then link is to a comment,
  476. // and we should indicate that in the text somehow
  477. replaceContent(node, m[0], m[1], createLink(link, id))
  478. }
  479. func issueIndexPatternProcessor(ctx *postProcessCtx, node *html.Node) {
  480. prefix := cutoutVerbosePrefix(ctx.urlPrefix)
  481. // default to numeric pattern, unless alphanumeric is requested.
  482. pattern := issueNumericPattern
  483. if ctx.metas["style"] == IssueNameStyleAlphanumeric {
  484. pattern = issueAlphanumericPattern
  485. }
  486. match := pattern.FindStringSubmatchIndex(node.Data)
  487. if match == nil {
  488. return
  489. }
  490. id := node.Data[match[2]:match[3]]
  491. var link *html.Node
  492. if ctx.metas == nil {
  493. link = createLink(util.URLJoin(prefix, "issues", id[1:]), id)
  494. } else {
  495. // Support for external issue tracker
  496. if ctx.metas["style"] == IssueNameStyleAlphanumeric {
  497. ctx.metas["index"] = id
  498. } else {
  499. ctx.metas["index"] = id[1:]
  500. }
  501. link = createLink(com.Expand(ctx.metas["format"], ctx.metas), id)
  502. }
  503. replaceContent(node, match[2], match[3], link)
  504. }
  505. func crossReferenceIssueIndexPatternProcessor(ctx *postProcessCtx, node *html.Node) {
  506. m := crossReferenceIssueNumericPattern.FindStringSubmatchIndex(node.Data)
  507. if m == nil {
  508. return
  509. }
  510. ref := node.Data[m[2]:m[3]]
  511. parts := strings.SplitN(ref, "#", 2)
  512. repo, issue := parts[0], parts[1]
  513. replaceContent(node, m[2], m[3],
  514. createLink(util.URLJoin(setting.AppURL, repo, "issues", issue), ref))
  515. }
  516. // fullSha1PatternProcessor renders SHA containing URLs
  517. func fullSha1PatternProcessor(ctx *postProcessCtx, node *html.Node) {
  518. m := anySHA1Pattern.FindStringSubmatchIndex(node.Data)
  519. if m == nil {
  520. return
  521. }
  522. // take out what's relevant
  523. urlFull := node.Data[m[0]:m[1]]
  524. hash := node.Data[m[2]:m[3]]
  525. var subtree, line string
  526. // optional, we do them depending on the length.
  527. if m[7] > 0 {
  528. line = node.Data[m[6]:m[7]]
  529. }
  530. if m[5] > 0 {
  531. subtree = node.Data[m[4]:m[5]]
  532. }
  533. text := base.ShortSha(hash)
  534. if subtree != "" {
  535. text += "/" + subtree
  536. }
  537. if line != "" {
  538. text += " ("
  539. text += line
  540. text += ")"
  541. }
  542. replaceContent(node, m[0], m[1], createLink(urlFull, text))
  543. }
  544. // sha1CurrentPatternProcessor renders SHA1 strings to corresponding links that
  545. // are assumed to be in the same repository.
  546. func sha1CurrentPatternProcessor(ctx *postProcessCtx, node *html.Node) {
  547. m := sha1CurrentPattern.FindStringSubmatchIndex(node.Data)
  548. if m == nil {
  549. return
  550. }
  551. hash := node.Data[m[2]:m[3]]
  552. // The regex does not lie, it matches the hash pattern.
  553. // However, a regex cannot know if a hash actually exists or not.
  554. // We could assume that a SHA1 hash should probably contain alphas AND numerics
  555. // but that is not always the case.
  556. // Although unlikely, deadbeef and 1234567 are valid short forms of SHA1 hash
  557. // as used by git and github for linking and thus we have to do similar.
  558. replaceContent(node, m[2], m[3],
  559. createLink(util.URLJoin(ctx.urlPrefix, "commit", hash), base.ShortSha(hash)))
  560. }
  561. // emailAddressProcessor replaces raw email addresses with a mailto: link.
  562. func emailAddressProcessor(ctx *postProcessCtx, node *html.Node) {
  563. m := emailRegex.FindStringIndex(node.Data)
  564. if m == nil {
  565. return
  566. }
  567. mail := node.Data[m[0]:m[1]]
  568. replaceContent(node, m[0], m[1], createLink("mailto:"+mail, mail))
  569. }
  570. // linkProcessor creates links for any HTTP or HTTPS URL not captured by
  571. // markdown.
  572. func linkProcessor(ctx *postProcessCtx, node *html.Node) {
  573. m := linkRegex.FindStringIndex(node.Data)
  574. if m == nil {
  575. return
  576. }
  577. uri := node.Data[m[0]:m[1]]
  578. replaceContent(node, m[0], m[1], createLink(uri, uri))
  579. }
  580. func genDefaultLinkProcessor(defaultLink string) processor {
  581. return func(ctx *postProcessCtx, node *html.Node) {
  582. ch := &html.Node{
  583. Parent: node,
  584. Type: html.TextNode,
  585. Data: node.Data,
  586. }
  587. node.Type = html.ElementNode
  588. node.Data = "a"
  589. node.DataAtom = atom.A
  590. node.Attr = []html.Attribute{{Key: "href", Val: defaultLink}}
  591. node.FirstChild, node.LastChild = ch, ch
  592. }
  593. }