aboutsummaryrefslogtreecommitdiffstats
path: root/modules
diff options
context:
space:
mode:
Diffstat (limited to 'modules')
-rw-r--r--modules/markup/html.go124
-rw-r--r--modules/markup/html_internal_test.go92
-rw-r--r--modules/markup/mdstripper/mdstripper.go260
-rw-r--r--modules/markup/mdstripper/mdstripper_test.go71
-rw-r--r--modules/markup/sanitizer.go3
-rw-r--r--modules/references/references.go322
-rw-r--r--modules/references/references_test.go296
7 files changed, 1016 insertions, 152 deletions
diff --git a/modules/markup/html.go b/modules/markup/html.go
index f07993bc4c..fc823b1f30 100644
--- a/modules/markup/html.go
+++ b/modules/markup/html.go
@@ -15,6 +15,7 @@ import (
"code.gitea.io/gitea/modules/base"
"code.gitea.io/gitea/modules/git"
"code.gitea.io/gitea/modules/log"
+ "code.gitea.io/gitea/modules/references"
"code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/util"
@@ -36,17 +37,6 @@ var (
// While fast, this is also incorrect and lead to false positives.
// TODO: fix invalid linking issue
- // mentionPattern matches all mentions in the form of "@user"
- mentionPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@[0-9a-zA-Z-_\.]+)(?:\s|$|\)|\])`)
-
- // issueNumericPattern matches string that references to a numeric issue, e.g. #1287
- issueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(#[0-9]+)(?:\s|$|\)|\]|:|\.(\s|$))`)
- // issueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-1234
- issueAlphanumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([A-Z]{1,10}-[1-9][0-9]*)(?:\s|$|\)|\]|:|\.(\s|$))`)
- // crossReferenceIssueNumericPattern matches string that references a numeric issue in a different repository
- // e.g. gogits/gogs#12345
- crossReferenceIssueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+#[0-9]+)(?:\s|$|\)|\]|\.(\s|$))`)
-
// sha1CurrentPattern matches string that represents a commit SHA, e.g. d8a994ef243349f321568f9e36d5c3f444b99cae
// Although SHA1 hashes are 40 chars long, the regex matches the hash from 7 to 40 chars in length
// so that abbreviated hash links can be used as well. This matches git and github useability.
@@ -70,6 +60,9 @@ var (
linkRegex, _ = xurls.StrictMatchingScheme("https?://")
)
+// CSS class for action keywords (e.g. "closes: #1")
+const keywordClass = "issue-keyword"
+
// regexp for full links to issues/pulls
var issueFullPattern *regexp.Regexp
@@ -99,17 +92,6 @@ func getIssueFullPattern() *regexp.Regexp {
return issueFullPattern
}
-// FindAllMentions matches mention patterns in given content
-// and returns a list of found user names without @ prefix.
-func FindAllMentions(content string) []string {
- mentions := mentionPattern.FindAllStringSubmatch(content, -1)
- ret := make([]string, len(mentions))
- for i, val := range mentions {
- ret[i] = val[1][1:]
- }
- return ret
-}
-
// IsSameDomain checks if given url string has the same hostname as current Gitea instance
func IsSameDomain(s string) bool {
if strings.HasPrefix(s, "/") {
@@ -142,7 +124,6 @@ var defaultProcessors = []processor{
linkProcessor,
mentionProcessor,
issueIndexPatternProcessor,
- crossReferenceIssueIndexPatternProcessor,
sha1CurrentPatternProcessor,
emailAddressProcessor,
}
@@ -183,7 +164,6 @@ var commitMessageProcessors = []processor{
linkProcessor,
mentionProcessor,
issueIndexPatternProcessor,
- crossReferenceIssueIndexPatternProcessor,
sha1CurrentPatternProcessor,
emailAddressProcessor,
}
@@ -217,7 +197,6 @@ var commitMessageSubjectProcessors = []processor{
linkProcessor,
mentionProcessor,
issueIndexPatternProcessor,
- crossReferenceIssueIndexPatternProcessor,
sha1CurrentPatternProcessor,
}
@@ -330,6 +309,24 @@ func (ctx *postProcessCtx) textNode(node *html.Node) {
}
}
+// createKeyword() renders a highlighted version of an action keyword
+func createKeyword(content string) *html.Node {
+ span := &html.Node{
+ Type: html.ElementNode,
+ Data: atom.Span.String(),
+ Attr: []html.Attribute{},
+ }
+ span.Attr = append(span.Attr, html.Attribute{Key: "class", Val: keywordClass})
+
+ text := &html.Node{
+ Type: html.TextNode,
+ Data: content,
+ }
+ span.AppendChild(text)
+
+ return span
+}
+
func createLink(href, content, class string) *html.Node {
a := &html.Node{
Type: html.ElementNode,
@@ -377,10 +374,16 @@ func createCodeLink(href, content, class string) *html.Node {
return a
}
-// replaceContent takes a text node, and in its content it replaces a section of
-// it with the specified newNode. An example to visualize how this can work can
-// be found here: https://play.golang.org/p/5zP8NnHZ03s
+// replaceContent takes text node, and in its content it replaces a section of
+// it with the specified newNode.
func replaceContent(node *html.Node, i, j int, newNode *html.Node) {
+ replaceContentList(node, i, j, []*html.Node{newNode})
+}
+
+// replaceContentList takes text node, and in its content it replaces a section of
+// it with the specified newNodes. An example to visualize how this can work can
+// be found here: https://play.golang.org/p/5zP8NnHZ03s
+func replaceContentList(node *html.Node, i, j int, newNodes []*html.Node) {
// get the data before and after the match
before := node.Data[:i]
after := node.Data[j:]
@@ -392,7 +395,9 @@ func replaceContent(node *html.Node, i, j int, newNode *html.Node) {
// Get the current next sibling, before which we place the replaced data,
// and after that we place the new text node.
nextSibling := node.NextSibling
- node.Parent.InsertBefore(newNode, nextSibling)
+ for _, n := range newNodes {
+ node.Parent.InsertBefore(n, nextSibling)
+ }
if after != "" {
node.Parent.InsertBefore(&html.Node{
Type: html.TextNode,
@@ -402,13 +407,13 @@ func replaceContent(node *html.Node, i, j int, newNode *html.Node) {
}
func mentionProcessor(_ *postProcessCtx, node *html.Node) {
- m := mentionPattern.FindStringSubmatchIndex(node.Data)
- if m == nil {
+ // We replace only the first mention; other mentions will be addressed later
+ found, loc := references.FindFirstMentionBytes([]byte(node.Data))
+ if !found {
return
}
- // Replace the mention with a link to the specified user.
- mention := node.Data[m[2]:m[3]]
- replaceContent(node, m[2], m[3], createLink(util.URLJoin(setting.AppURL, mention[1:]), mention, "mention"))
+ mention := node.Data[loc.Start:loc.End]
+ replaceContent(node, loc.Start, loc.End, createLink(util.URLJoin(setting.AppURL, mention[1:]), mention, "mention"))
}
func shortLinkProcessor(ctx *postProcessCtx, node *html.Node) {
@@ -597,45 +602,44 @@ func issueIndexPatternProcessor(ctx *postProcessCtx, node *html.Node) {
if ctx.metas == nil {
return
}
- // default to numeric pattern, unless alphanumeric is requested.
- pattern := issueNumericPattern
+
+ var (
+ found bool
+ ref *references.RenderizableReference
+ )
+
if ctx.metas["style"] == IssueNameStyleAlphanumeric {
- pattern = issueAlphanumericPattern
+ found, ref = references.FindRenderizableReferenceAlphanumeric(node.Data)
+ } else {
+ found, ref = references.FindRenderizableReferenceNumeric(node.Data)
}
-
- match := pattern.FindStringSubmatchIndex(node.Data)
- if match == nil {
+ if !found {
return
}
- id := node.Data[match[2]:match[3]]
var link *html.Node
+ reftext := node.Data[ref.RefLocation.Start:ref.RefLocation.End]
if _, ok := ctx.metas["format"]; ok {
- // Support for external issue tracker
- if ctx.metas["style"] == IssueNameStyleAlphanumeric {
- ctx.metas["index"] = id
- } else {
- ctx.metas["index"] = id[1:]
- }
- link = createLink(com.Expand(ctx.metas["format"], ctx.metas), id, "issue")
+ ctx.metas["index"] = ref.Issue
+ link = createLink(com.Expand(ctx.metas["format"], ctx.metas), reftext, "issue")
+ } else if ref.Owner == "" {
+ link = createLink(util.URLJoin(setting.AppURL, ctx.metas["user"], ctx.metas["repo"], "issues", ref.Issue), reftext, "issue")
} else {
- link = createLink(util.URLJoin(setting.AppURL, ctx.metas["user"], ctx.metas["repo"], "issues", id[1:]), id, "issue")
+ link = createLink(util.URLJoin(setting.AppURL, ref.Owner, ref.Name, "issues", ref.Issue), reftext, "issue")
}
- replaceContent(node, match[2], match[3], link)
-}
-func crossReferenceIssueIndexPatternProcessor(ctx *postProcessCtx, node *html.Node) {
- m := crossReferenceIssueNumericPattern.FindStringSubmatchIndex(node.Data)
- if m == nil {
+ if ref.Action == references.XRefActionNone {
+ replaceContent(node, ref.RefLocation.Start, ref.RefLocation.End, link)
return
}
- ref := node.Data[m[2]:m[3]]
- parts := strings.SplitN(ref, "#", 2)
- repo, issue := parts[0], parts[1]
-
- replaceContent(node, m[2], m[3],
- createLink(util.URLJoin(setting.AppURL, repo, "issues", issue), ref, issue))
+ // Decorate action keywords
+ keyword := createKeyword(node.Data[ref.ActionLocation.Start:ref.ActionLocation.End])
+ spaces := &html.Node{
+ Type: html.TextNode,
+ Data: node.Data[ref.ActionLocation.End:ref.RefLocation.Start],
+ }
+ replaceContentList(node, ref.ActionLocation.Start, ref.RefLocation.End, []*html.Node{keyword, spaces, link})
}
// fullSha1PatternProcessor renders SHA containing URLs
diff --git a/modules/markup/html_internal_test.go b/modules/markup/html_internal_test.go
index 2824ce3e68..9722063e17 100644
--- a/modules/markup/html_internal_test.go
+++ b/modules/markup/html_internal_test.go
@@ -239,34 +239,6 @@ func TestRender_FullIssueURLs(t *testing.T) {
`<a href="http://localhost:3000/gogits/gogs/issues/4" class="issue">#4</a>`)
}
-func TestRegExp_issueNumericPattern(t *testing.T) {
- trueTestCases := []string{
- "#1234",
- "#0",
- "#1234567890987654321",
- " #12",
- "#12:",
- "ref: #12: msg",
- }
- falseTestCases := []string{
- "# 1234",
- "# 0",
- "# ",
- "#",
- "#ABC",
- "#1A2B",
- "",
- "ABC",
- }
-
- for _, testCase := range trueTestCases {
- assert.True(t, issueNumericPattern.MatchString(testCase))
- }
- for _, testCase := range falseTestCases {
- assert.False(t, issueNumericPattern.MatchString(testCase))
- }
-}
-
func TestRegExp_sha1CurrentPattern(t *testing.T) {
trueTestCases := []string{
"d8a994ef243349f321568f9e36d5c3f444b99cae",
@@ -325,70 +297,6 @@ func TestRegExp_anySHA1Pattern(t *testing.T) {
}
}
-func TestRegExp_mentionPattern(t *testing.T) {
- trueTestCases := []string{
- "@Unknwon",
- "@ANT_123",
- "@xxx-DiN0-z-A..uru..s-xxx",
- " @lol ",
- " @Te-st",
- "(@gitea)",
- "[@gitea]",
- }
- falseTestCases := []string{
- "@ 0",
- "@ ",
- "@",
- "",
- "ABC",
- "/home/gitea/@gitea",
- "\"@gitea\"",
- }
-
- for _, testCase := range trueTestCases {
- res := mentionPattern.MatchString(testCase)
- assert.True(t, res)
- }
- for _, testCase := range falseTestCases {
- res := mentionPattern.MatchString(testCase)
- assert.False(t, res)
- }
-}
-
-func TestRegExp_issueAlphanumericPattern(t *testing.T) {
- trueTestCases := []string{
- "ABC-1234",
- "A-1",
- "RC-80",
- "ABCDEFGHIJ-1234567890987654321234567890",
- "ABC-123.",
- "(ABC-123)",
- "[ABC-123]",
- "ABC-123:",
- }
- falseTestCases := []string{
- "RC-08",
- "PR-0",
- "ABCDEFGHIJK-1",
- "PR_1",
- "",
- "#ABC",
- "",
- "ABC",
- "GG-",
- "rm-1",
- "/home/gitea/ABC-1234",
- "MY-STRING-ABC-123",
- }
-
- for _, testCase := range trueTestCases {
- assert.True(t, issueAlphanumericPattern.MatchString(testCase))
- }
- for _, testCase := range falseTestCases {
- assert.False(t, issueAlphanumericPattern.MatchString(testCase))
- }
-}
-
func TestRegExp_shortLinkPattern(t *testing.T) {
trueTestCases := []string{
"[[stuff]]",
diff --git a/modules/markup/mdstripper/mdstripper.go b/modules/markup/mdstripper/mdstripper.go
new file mode 100644
index 0000000000..7a901b17a9
--- /dev/null
+++ b/modules/markup/mdstripper/mdstripper.go
@@ -0,0 +1,260 @@
+// Copyright 2019 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package mdstripper
+
+import (
+ "bytes"
+
+ "github.com/russross/blackfriday"
+)
+
+// MarkdownStripper extends blackfriday.Renderer
+type MarkdownStripper struct {
+ blackfriday.Renderer
+ links []string
+ coallesce bool
+}
+
+const (
+ blackfridayExtensions = 0 |
+ blackfriday.EXTENSION_NO_INTRA_EMPHASIS |
+ blackfriday.EXTENSION_TABLES |
+ blackfriday.EXTENSION_FENCED_CODE |
+ blackfriday.EXTENSION_STRIKETHROUGH |
+ blackfriday.EXTENSION_NO_EMPTY_LINE_BEFORE_BLOCK |
+ blackfriday.EXTENSION_DEFINITION_LISTS |
+ blackfriday.EXTENSION_FOOTNOTES |
+ blackfriday.EXTENSION_HEADER_IDS |
+ blackfriday.EXTENSION_AUTO_HEADER_IDS |
+ // Not included in modules/markup/markdown/markdown.go;
+ // required here to process inline links
+ blackfriday.EXTENSION_AUTOLINK
+)
+
+//revive:disable:var-naming Implementing the Rendering interface requires breaking some linting rules
+
+// StripMarkdown parses markdown content by removing all markup and code blocks
+// in order to extract links and other references
+func StripMarkdown(rawBytes []byte) (string, []string) {
+ stripper := &MarkdownStripper{
+ links: make([]string, 0, 10),
+ }
+ body := blackfriday.Markdown(rawBytes, stripper, blackfridayExtensions)
+ return string(body), stripper.GetLinks()
+}
+
+// StripMarkdownBytes parses markdown content by removing all markup and code blocks
+// in order to extract links and other references
+func StripMarkdownBytes(rawBytes []byte) ([]byte, []string) {
+ stripper := &MarkdownStripper{
+ links: make([]string, 0, 10),
+ }
+ body := blackfriday.Markdown(rawBytes, stripper, blackfridayExtensions)
+ return body, stripper.GetLinks()
+}
+
+// block-level callbacks
+
+// BlockCode dummy function to proceed with rendering
+func (r *MarkdownStripper) BlockCode(out *bytes.Buffer, text []byte, infoString string) {
+ // Not rendered
+ r.coallesce = false
+}
+
+// BlockQuote dummy function to proceed with rendering
+func (r *MarkdownStripper) BlockQuote(out *bytes.Buffer, text []byte) {
+ // FIXME: perhaps it's better to leave out block quote for this?
+ r.processString(out, text, false)
+}
+
+// BlockHtml dummy function to proceed with rendering
+func (r *MarkdownStripper) BlockHtml(out *bytes.Buffer, text []byte) { //nolint
+ // Not rendered
+ r.coallesce = false
+}
+
+// Header dummy function to proceed with rendering
+func (r *MarkdownStripper) Header(out *bytes.Buffer, text func() bool, level int, id string) {
+ text()
+ r.coallesce = false
+}
+
+// HRule dummy function to proceed with rendering
+func (r *MarkdownStripper) HRule(out *bytes.Buffer) {
+ // Not rendered
+ r.coallesce = false
+}
+
+// List dummy function to proceed with rendering
+func (r *MarkdownStripper) List(out *bytes.Buffer, text func() bool, flags int) {
+ text()
+ r.coallesce = false
+}
+
+// ListItem dummy function to proceed with rendering
+func (r *MarkdownStripper) ListItem(out *bytes.Buffer, text []byte, flags int) {
+ r.processString(out, text, false)
+}
+
+// Paragraph dummy function to proceed with rendering
+func (r *MarkdownStripper) Paragraph(out *bytes.Buffer, text func() bool) {
+ text()
+ r.coallesce = false
+}
+
+// Table dummy function to proceed with rendering
+func (r *MarkdownStripper) Table(out *bytes.Buffer, header []byte, body []byte, columnData []int) {
+ r.processString(out, header, false)
+ r.processString(out, body, false)
+}
+
+// TableRow dummy function to proceed with rendering
+func (r *MarkdownStripper) TableRow(out *bytes.Buffer, text []byte) {
+ r.processString(out, text, false)
+}
+
+// TableHeaderCell dummy function to proceed with rendering
+func (r *MarkdownStripper) TableHeaderCell(out *bytes.Buffer, text []byte, flags int) {
+ r.processString(out, text, false)
+}
+
+// TableCell dummy function to proceed with rendering
+func (r *MarkdownStripper) TableCell(out *bytes.Buffer, text []byte, flags int) {
+ r.processString(out, text, false)
+}
+
+// Footnotes dummy function to proceed with rendering
+func (r *MarkdownStripper) Footnotes(out *bytes.Buffer, text func() bool) {
+ text()
+}
+
+// FootnoteItem dummy function to proceed with rendering
+func (r *MarkdownStripper) FootnoteItem(out *bytes.Buffer, name, text []byte, flags int) {
+ r.processString(out, text, false)
+}
+
+// TitleBlock dummy function to proceed with rendering
+func (r *MarkdownStripper) TitleBlock(out *bytes.Buffer, text []byte) {
+ r.processString(out, text, false)
+}
+
+// Span-level callbacks
+
+// AutoLink dummy function to proceed with rendering
+func (r *MarkdownStripper) AutoLink(out *bytes.Buffer, link []byte, kind int) {
+ r.processLink(out, link, []byte{})
+}
+
+// CodeSpan dummy function to proceed with rendering
+func (r *MarkdownStripper) CodeSpan(out *bytes.Buffer, text []byte) {
+ // Not rendered
+ r.coallesce = false
+}
+
+// DoubleEmphasis dummy function to proceed with rendering
+func (r *MarkdownStripper) DoubleEmphasis(out *bytes.Buffer, text []byte) {
+ r.processString(out, text, false)
+}
+
+// Emphasis dummy function to proceed with rendering
+func (r *MarkdownStripper) Emphasis(out *bytes.Buffer, text []byte) {
+ r.processString(out, text, false)
+}
+
+// Image dummy function to proceed with rendering
+func (r *MarkdownStripper) Image(out *bytes.Buffer, link []byte, title []byte, alt []byte) {
+ // Not rendered
+ r.coallesce = false
+}
+
+// LineBreak dummy function to proceed with rendering
+func (r *MarkdownStripper) LineBreak(out *bytes.Buffer) {
+ // Not rendered
+ r.coallesce = false
+}
+
+// Link dummy function to proceed with rendering
+func (r *MarkdownStripper) Link(out *bytes.Buffer, link []byte, title []byte, content []byte) {
+ r.processLink(out, link, content)
+}
+
+// RawHtmlTag dummy function to proceed with rendering
+func (r *MarkdownStripper) RawHtmlTag(out *bytes.Buffer, tag []byte) { //nolint
+ // Not rendered
+ r.coallesce = false
+}
+
+// TripleEmphasis dummy function to proceed with rendering
+func (r *MarkdownStripper) TripleEmphasis(out *bytes.Buffer, text []byte) {
+ r.processString(out, text, false)
+}
+
+// StrikeThrough dummy function to proceed with rendering
+func (r *MarkdownStripper) StrikeThrough(out *bytes.Buffer, text []byte) {
+ r.processString(out, text, false)
+}
+
+// FootnoteRef dummy function to proceed with rendering
+func (r *MarkdownStripper) FootnoteRef(out *bytes.Buffer, ref []byte, id int) {
+ // Not rendered
+ r.coallesce = false
+}
+
+// Low-level callbacks
+
+// Entity dummy function to proceed with rendering
+func (r *MarkdownStripper) Entity(out *bytes.Buffer, entity []byte) {
+ // FIXME: literal entities are not parsed; perhaps they should
+ r.coallesce = false
+}
+
+// NormalText dummy function to proceed with rendering
+func (r *MarkdownStripper) NormalText(out *bytes.Buffer, text []byte) {
+ r.processString(out, text, true)
+}
+
+// Header and footer
+
+// DocumentHeader dummy function to proceed with rendering
+func (r *MarkdownStripper) DocumentHeader(out *bytes.Buffer) {
+ r.coallesce = false
+}
+
+// DocumentFooter dummy function to proceed with rendering
+func (r *MarkdownStripper) DocumentFooter(out *bytes.Buffer) {
+ r.coallesce = false
+}
+
+// GetFlags returns rendering flags
+func (r *MarkdownStripper) GetFlags() int {
+ return 0
+}
+
+//revive:enable:var-naming
+
+func doubleSpace(out *bytes.Buffer) {
+ if out.Len() > 0 {
+ out.WriteByte('\n')
+ }
+}
+
+func (r *MarkdownStripper) processString(out *bytes.Buffer, text []byte, coallesce bool) {
+ // Always break-up words
+ if !coallesce || !r.coallesce {
+ doubleSpace(out)
+ }
+ out.Write(text)
+ r.coallesce = coallesce
+}
+func (r *MarkdownStripper) processLink(out *bytes.Buffer, link []byte, content []byte) {
+ // Links are processed out of band
+ r.links = append(r.links, string(link))
+ r.coallesce = false
+}
+
+// GetLinks returns the list of link data collected while parsing
+func (r *MarkdownStripper) GetLinks() []string {
+ return r.links
+}
diff --git a/modules/markup/mdstripper/mdstripper_test.go b/modules/markup/mdstripper/mdstripper_test.go
new file mode 100644
index 0000000000..157fe1975b
--- /dev/null
+++ b/modules/markup/mdstripper/mdstripper_test.go
@@ -0,0 +1,71 @@
+// Copyright 2019 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package mdstripper
+
+import (
+ "strings"
+ "testing"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestMarkdownStripper(t *testing.T) {
+ type testItem struct {
+ markdown string
+ expectedText []string
+ expectedLinks []string
+ }
+
+ list := []testItem{
+ {
+ `
+## This is a title
+
+This is [one](link) to paradise.
+This **is emphasized**.
+This: should coallesce.
+
+` + "```" + `
+This is a code block.
+This should not appear in the output at all.
+` + "```" + `
+
+* Bullet 1
+* Bullet 2
+
+A HIDDEN ` + "`" + `GHOST` + "`" + ` IN THIS LINE.
+ `,
+ []string{
+ "This is a title",
+ "This is",
+ "to paradise.",
+ "This",
+ "is emphasized",
+ ".",
+ "This: should coallesce.",
+ "Bullet 1",
+ "Bullet 2",
+ "A HIDDEN",
+ "IN THIS LINE.",
+ },
+ []string{
+ "link",
+ }},
+ }
+
+ for _, test := range list {
+ text, links := StripMarkdown([]byte(test.markdown))
+ rawlines := strings.Split(text, "\n")
+ lines := make([]string, 0, len(rawlines))
+ for _, line := range rawlines {
+ line := strings.TrimSpace(line)
+ if line != "" {
+ lines = append(lines, line)
+ }
+ }
+ assert.EqualValues(t, test.expectedText, lines)
+ assert.EqualValues(t, test.expectedLinks, links)
+ }
+}
diff --git a/modules/markup/sanitizer.go b/modules/markup/sanitizer.go
index 2ec43cf4fd..fd6f90b2ab 100644
--- a/modules/markup/sanitizer.go
+++ b/modules/markup/sanitizer.go
@@ -38,6 +38,9 @@ func NewSanitizer() {
// Custom URL-Schemes
sanitizer.policy.AllowURLSchemes(setting.Markdown.CustomURLSchemes...)
+
+ // Allow keyword markup
+ sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^` + keywordClass + `$`)).OnElements("span")
})
}
diff --git a/modules/references/references.go b/modules/references/references.go
new file mode 100644
index 0000000000..9c74d0d081
--- /dev/null
+++ b/modules/references/references.go
@@ -0,0 +1,322 @@
+// Copyright 2019 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package references
+
+import (
+ "net/url"
+ "regexp"
+ "strconv"
+ "strings"
+ "sync"
+
+ "code.gitea.io/gitea/modules/markup/mdstripper"
+ "code.gitea.io/gitea/modules/setting"
+)
+
+var (
+ // validNamePattern performs only the most basic validation for user or repository names
+ // Repository name should contain only alphanumeric, dash ('-'), underscore ('_') and dot ('.') characters.
+ validNamePattern = regexp.MustCompile(`^[a-z0-9_.-]+$`)
+
+ // NOTE: All below regex matching do not perform any extra validation.
+ // Thus a link is produced even if the linked entity does not exist.
+ // While fast, this is also incorrect and lead to false positives.
+ // TODO: fix invalid linking issue
+
+ // mentionPattern matches all mentions in the form of "@user"
+ mentionPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@[0-9a-zA-Z-_\.]+)(?:\s|$|\)|\])`)
+ // issueNumericPattern matches string that references to a numeric issue, e.g. #1287
+ issueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(#[0-9]+)(?:\s|$|\)|\]|:|\.(\s|$))`)
+ // issueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-1234
+ issueAlphanumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([A-Z]{1,10}-[1-9][0-9]*)(?:\s|$|\)|\]|:|\.(\s|$))`)
+ // crossReferenceIssueNumericPattern matches string that references a numeric issue in a different repository
+ // e.g. gogits/gogs#12345
+ crossReferenceIssueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+#[0-9]+)(?:\s|$|\)|\]|\.(\s|$))`)
+
+ // Same as GitHub. See
+ // https://help.github.com/articles/closing-issues-via-commit-messages
+ issueCloseKeywords = []string{"close", "closes", "closed", "fix", "fixes", "fixed", "resolve", "resolves", "resolved"}
+ issueReopenKeywords = []string{"reopen", "reopens", "reopened"}
+
+ issueCloseKeywordsPat, issueReopenKeywordsPat *regexp.Regexp
+
+ giteaHostInit sync.Once
+ giteaHost string
+)
+
+// XRefAction represents the kind of effect a cross reference has once is resolved
+type XRefAction int64
+
+const (
+ // XRefActionNone means the cross-reference is simply a comment
+ XRefActionNone XRefAction = iota // 0
+ // XRefActionCloses means the cross-reference should close an issue if it is resolved
+ XRefActionCloses // 1
+ // XRefActionReopens means the cross-reference should reopen an issue if it is resolved
+ XRefActionReopens // 2
+ // XRefActionNeutered means the cross-reference will no longer affect the source
+ XRefActionNeutered // 3
+)
+
+// IssueReference contains an unverified cross-reference to a local issue or pull request
+type IssueReference struct {
+ Index int64
+ Owner string
+ Name string
+ Action XRefAction
+}
+
+// RenderizableReference contains an unverified cross-reference to with rendering information
+type RenderizableReference struct {
+ Issue string
+ Owner string
+ Name string
+ RefLocation *RefSpan
+ Action XRefAction
+ ActionLocation *RefSpan
+}
+
+type rawReference struct {
+ index int64
+ owner string
+ name string
+ action XRefAction
+ issue string
+ refLocation *RefSpan
+ actionLocation *RefSpan
+}
+
+func rawToIssueReferenceList(reflist []*rawReference) []IssueReference {
+ refarr := make([]IssueReference, len(reflist))
+ for i, r := range reflist {
+ refarr[i] = IssueReference{
+ Index: r.index,
+ Owner: r.owner,
+ Name: r.name,
+ Action: r.action,
+ }
+ }
+ return refarr
+}
+
+// RefSpan is the position where the reference was found within the parsed text
+type RefSpan struct {
+ Start int
+ End int
+}
+
+func makeKeywordsPat(keywords []string) *regexp.Regexp {
+ return regexp.MustCompile(`(?i)(?:\s|^|\(|\[)(` + strings.Join(keywords, `|`) + `):? $`)
+}
+
+func init() {
+ issueCloseKeywordsPat = makeKeywordsPat(issueCloseKeywords)
+ issueReopenKeywordsPat = makeKeywordsPat(issueReopenKeywords)
+}
+
+// getGiteaHostName returns a normalized string with the local host name, with no scheme or port information
+func getGiteaHostName() string {
+ giteaHostInit.Do(func() {
+ if uapp, err := url.Parse(setting.AppURL); err == nil {
+ giteaHost = strings.ToLower(uapp.Host)
+ } else {
+ giteaHost = ""
+ }
+ })
+ return giteaHost
+}
+
+// FindAllMentionsMarkdown matches mention patterns in given content and
+// returns a list of found unvalidated user names **not including** the @ prefix.
+func FindAllMentionsMarkdown(content string) []string {
+ bcontent, _ := mdstripper.StripMarkdownBytes([]byte(content))
+ locations := FindAllMentionsBytes(bcontent)
+ mentions := make([]string, len(locations))
+ for i, val := range locations {
+ mentions[i] = string(bcontent[val.Start+1 : val.End])
+ }
+ return mentions
+}
+
+// FindAllMentionsBytes matches mention patterns in given content
+// and returns a list of locations for the unvalidated user names, including the @ prefix.
+func FindAllMentionsBytes(content []byte) []RefSpan {
+ mentions := mentionPattern.FindAllSubmatchIndex(content, -1)
+ ret := make([]RefSpan, len(mentions))
+ for i, val := range mentions {
+ ret[i] = RefSpan{Start: val[2], End: val[3]}
+ }
+ return ret
+}
+
+// FindFirstMentionBytes matches the first mention in then given content
+// and returns the location of the unvalidated user name, including the @ prefix.
+func FindFirstMentionBytes(content []byte) (bool, RefSpan) {
+ mention := mentionPattern.FindSubmatchIndex(content)
+ if mention == nil {
+ return false, RefSpan{}
+ }
+ return true, RefSpan{Start: mention[2], End: mention[3]}
+}
+
+// FindAllIssueReferencesMarkdown strips content from markdown markup
+// and returns a list of unvalidated references found in it.
+func FindAllIssueReferencesMarkdown(content string) []IssueReference {
+ return rawToIssueReferenceList(findAllIssueReferencesMarkdown(content))
+}
+
+func findAllIssueReferencesMarkdown(content string) []*rawReference {
+ bcontent, links := mdstripper.StripMarkdownBytes([]byte(content))
+ return findAllIssueReferencesBytes(bcontent, links)
+}
+
+// FindAllIssueReferences returns a list of unvalidated references found in a string.
+func FindAllIssueReferences(content string) []IssueReference {
+ return rawToIssueReferenceList(findAllIssueReferencesBytes([]byte(content), []string{}))
+}
+
+// FindRenderizableReferenceNumeric returns the first unvalidated reference found in a string.
+func FindRenderizableReferenceNumeric(content string) (bool, *RenderizableReference) {
+ match := issueNumericPattern.FindStringSubmatchIndex(content)
+ if match == nil {
+ if match = crossReferenceIssueNumericPattern.FindStringSubmatchIndex(content); match == nil {
+ return false, nil
+ }
+ }
+ r := getCrossReference([]byte(content), match[2], match[3], false)
+ if r == nil {
+ return false, nil
+ }
+
+ return true, &RenderizableReference{
+ Issue: r.issue,
+ Owner: r.owner,
+ Name: r.name,
+ RefLocation: r.refLocation,
+ Action: r.action,
+ ActionLocation: r.actionLocation,
+ }
+}
+
+// FindRenderizableReferenceAlphanumeric returns the first alphanumeric unvalidated references found in a string.
+func FindRenderizableReferenceAlphanumeric(content string) (bool, *RenderizableReference) {
+ match := issueAlphanumericPattern.FindStringSubmatchIndex(content)
+ if match == nil {
+ return false, nil
+ }
+
+ action, location := findActionKeywords([]byte(content), match[2])
+
+ return true, &RenderizableReference{
+ Issue: string(content[match[2]:match[3]]),
+ RefLocation: &RefSpan{Start: match[2], End: match[3]},
+ Action: action,
+ ActionLocation: location,
+ }
+}
+
+// FindAllIssueReferencesBytes returns a list of unvalidated references found in a byte slice.
+func findAllIssueReferencesBytes(content []byte, links []string) []*rawReference {
+
+ ret := make([]*rawReference, 0, 10)
+
+ matches := issueNumericPattern.FindAllSubmatchIndex(content, -1)
+ for _, match := range matches {
+ if ref := getCrossReference(content, match[2], match[3], false); ref != nil {
+ ret = append(ret, ref)
+ }
+ }
+
+ matches = crossReferenceIssueNumericPattern.FindAllSubmatchIndex(content, -1)
+ for _, match := range matches {
+ if ref := getCrossReference(content, match[2], match[3], false); ref != nil {
+ ret = append(ret, ref)
+ }
+ }
+
+ localhost := getGiteaHostName()
+ for _, link := range links {
+ if u, err := url.Parse(link); err == nil {
+ // Note: we're not attempting to match the URL scheme (http/https)
+ host := strings.ToLower(u.Host)
+ if host != "" && host != localhost {
+ continue
+ }
+ parts := strings.Split(u.EscapedPath(), "/")
+ // /user/repo/issues/3
+ if len(parts) != 5 || parts[0] != "" {
+ continue
+ }
+ if parts[3] != "issues" && parts[3] != "pulls" {
+ continue
+ }
+ // Note: closing/reopening keywords not supported with URLs
+ bytes := []byte(parts[1] + "/" + parts[2] + "#" + parts[4])
+ if ref := getCrossReference(bytes, 0, len(bytes), true); ref != nil {
+ ref.refLocation = nil
+ ret = append(ret, ref)
+ }
+ }
+ }
+
+ return ret
+}
+
+func getCrossReference(content []byte, start, end int, fromLink bool) *rawReference {
+ refid := string(content[start:end])
+ parts := strings.Split(refid, "#")
+ if len(parts) != 2 {
+ return nil
+ }
+ repo, issue := parts[0], parts[1]
+ index, err := strconv.ParseInt(issue, 10, 64)
+ if err != nil {
+ return nil
+ }
+ if repo == "" {
+ if fromLink {
+ // Markdown links must specify owner/repo
+ return nil
+ }
+ action, location := findActionKeywords(content, start)
+ return &rawReference{
+ index: index,
+ action: action,
+ issue: issue,
+ refLocation: &RefSpan{Start: start, End: end},
+ actionLocation: location,
+ }
+ }
+ parts = strings.Split(strings.ToLower(repo), "/")
+ if len(parts) != 2 {
+ return nil
+ }
+ owner, name := parts[0], parts[1]
+ if !validNamePattern.MatchString(owner) || !validNamePattern.MatchString(name) {
+ return nil
+ }
+ action, location := findActionKeywords(content, start)
+ return &rawReference{
+ index: index,
+ owner: owner,
+ name: name,
+ action: action,
+ issue: issue,
+ refLocation: &RefSpan{Start: start, End: end},
+ actionLocation: location,
+ }
+}
+
+func findActionKeywords(content []byte, start int) (XRefAction, *RefSpan) {
+ m := issueCloseKeywordsPat.FindSubmatchIndex(content[:start])
+ if m != nil {
+ return XRefActionCloses, &RefSpan{Start: m[2], End: m[3]}
+ }
+ m = issueReopenKeywordsPat.FindSubmatchIndex(content[:start])
+ if m != nil {
+ return XRefActionReopens, &RefSpan{Start: m[2], End: m[3]}
+ }
+ return XRefActionNone, nil
+}
diff --git a/modules/references/references_test.go b/modules/references/references_test.go
new file mode 100644
index 0000000000..f8153ffe36
--- /dev/null
+++ b/modules/references/references_test.go
@@ -0,0 +1,296 @@
+// Copyright 2019 The Gitea Authors. All rights reserved.
+// Use of this source code is governed by a MIT-style
+// license that can be found in the LICENSE file.
+
+package references
+
+import (
+ "testing"
+
+ "code.gitea.io/gitea/modules/setting"
+
+ "github.com/stretchr/testify/assert"
+)
+
+func TestFindAllIssueReferences(t *testing.T) {
+
+ type result struct {
+ Index int64
+ Owner string
+ Name string
+ Issue string
+ Action XRefAction
+ RefLocation *RefSpan
+ ActionLocation *RefSpan
+ }
+
+ type testFixture struct {
+ input string
+ expected []result
+ }
+
+ fixtures := []testFixture{
+ {
+ "Simply closes: #29 yes",
+ []result{
+ {29, "", "", "29", XRefActionCloses, &RefSpan{Start: 15, End: 18}, &RefSpan{Start: 7, End: 13}},
+ },
+ },
+ {
+ "#123 no, this is a title.",
+ []result{},
+ },
+ {
+ " #124 yes, this is a reference.",
+ []result{
+ {124, "", "", "124", XRefActionNone, &RefSpan{Start: 0, End: 4}, nil},
+ },
+ },
+ {
+ "```\nThis is a code block.\n#723 no, it's a code block.```",
+ []result{},
+ },
+ {
+ "This `#724` no, it's inline code.",
+ []result{},
+ },
+ {
+ "This user3/repo4#200 yes.",
+ []result{
+ {200, "user3", "repo4", "200", XRefActionNone, &RefSpan{Start: 5, End: 20}, nil},
+ },
+ },
+ {
+ "This [one](#919) no, this is a URL fragment.",
+ []result{},
+ },
+ {
+ "This [two](/user2/repo1/issues/921) yes.",
+ []result{
+ {921, "user2", "repo1", "921", XRefActionNone, nil, nil},
+ },
+ },
+ {
+ "This [three](/user2/repo1/pulls/922) yes.",
+ []result{
+ {922, "user2", "repo1", "922", XRefActionNone, nil, nil},
+ },
+ },
+ {
+ "This [four](http://gitea.com:3000/user3/repo4/issues/203) yes.",
+ []result{
+ {203, "user3", "repo4", "203", XRefActionNone, nil, nil},
+ },
+ },
+ {
+ "This [five](http://github.com/user3/repo4/issues/204) no.",
+ []result{},
+ },
+ {
+ "This http://gitea.com:3000/user4/repo5/201 no, bad URL.",
+ []result{},
+ },
+ {
+ "This http://gitea.com:3000/user4/repo5/pulls/202 yes.",
+ []result{
+ {202, "user4", "repo5", "202", XRefActionNone, nil, nil},
+ },
+ },
+ {
+ "This http://GiTeA.COM:3000/user4/repo6/pulls/205 yes.",
+ []result{
+ {205, "user4", "repo6", "205", XRefActionNone, nil, nil},
+ },
+ },
+ {
+ "Reopens #15 yes",
+ []result{
+ {15, "", "", "15", XRefActionReopens, &RefSpan{Start: 8, End: 11}, &RefSpan{Start: 0, End: 7}},
+ },
+ },
+ {
+ "This closes #20 for you yes",
+ []result{
+ {20, "", "", "20", XRefActionCloses, &RefSpan{Start: 12, End: 15}, &RefSpan{Start: 5, End: 11}},
+ },
+ },
+ {
+ "Do you fix user6/repo6#300 ? yes",
+ []result{
+ {300, "user6", "repo6", "300", XRefActionCloses, &RefSpan{Start: 11, End: 26}, &RefSpan{Start: 7, End: 10}},
+ },
+ },
+ {
+ "For 999 #1235 no keyword, but yes",
+ []result{
+ {1235, "", "", "1235", XRefActionNone, &RefSpan{Start: 8, End: 13}, nil},
+ },
+ },
+ {
+ "Which abc. #9434 same as above",
+ []result{
+ {9434, "", "", "9434", XRefActionNone, &RefSpan{Start: 11, End: 16}, nil},
+ },
+ },
+ {
+ "This closes #600 and reopens #599",
+ []result{
+ {600, "", "", "600", XRefActionCloses, &RefSpan{Start: 12, End: 16}, &RefSpan{Start: 5, End: 11}},
+ {599, "", "", "599", XRefActionReopens, &RefSpan{Start: 29, End: 33}, &RefSpan{Start: 21, End: 28}},
+ },
+ },
+ }
+
+ // Save original value for other tests that may rely on it
+ prevURL := setting.AppURL
+ setting.AppURL = "https://gitea.com:3000/"
+
+ for _, fixture := range fixtures {
+ expraw := make([]*rawReference, len(fixture.expected))
+ for i, e := range fixture.expected {
+ expraw[i] = &rawReference{
+ index: e.Index,
+ owner: e.Owner,
+ name: e.Name,
+ action: e.Action,
+ issue: e.Issue,
+ refLocation: e.RefLocation,
+ actionLocation: e.ActionLocation,
+ }
+ }
+ expref := rawToIssueReferenceList(expraw)
+ refs := FindAllIssueReferencesMarkdown(fixture.input)
+ assert.EqualValues(t, expref, refs, "Failed to parse: {%s}", fixture.input)
+ rawrefs := findAllIssueReferencesMarkdown(fixture.input)
+ assert.EqualValues(t, expraw, rawrefs, "Failed to parse: {%s}", fixture.input)
+ }
+
+ // Restore for other tests that may rely on the original value
+ setting.AppURL = prevURL
+
+ type alnumFixture struct {
+ input string
+ issue string
+ refLocation *RefSpan
+ action XRefAction
+ actionLocation *RefSpan
+ }
+
+ alnumFixtures := []alnumFixture{
+ {
+ "This ref ABC-123 is alphanumeric",
+ "ABC-123", &RefSpan{Start: 9, End: 16},
+ XRefActionNone, nil,
+ },
+ {
+ "This closes ABCD-1234 alphanumeric",
+ "ABCD-1234", &RefSpan{Start: 12, End: 21},
+ XRefActionCloses, &RefSpan{Start: 5, End: 11},
+ },
+ }
+
+ for _, fixture := range alnumFixtures {
+ found, ref := FindRenderizableReferenceAlphanumeric(fixture.input)
+ if fixture.issue == "" {
+ assert.False(t, found, "Failed to parse: {%s}", fixture.input)
+ } else {
+ assert.True(t, found, "Failed to parse: {%s}", fixture.input)
+ assert.Equal(t, fixture.issue, ref.Issue, "Failed to parse: {%s}", fixture.input)
+ assert.Equal(t, fixture.refLocation, ref.RefLocation, "Failed to parse: {%s}", fixture.input)
+ assert.Equal(t, fixture.action, ref.Action, "Failed to parse: {%s}", fixture.input)
+ assert.Equal(t, fixture.actionLocation, ref.ActionLocation, "Failed to parse: {%s}", fixture.input)
+ }
+ }
+}
+
+func TestRegExp_mentionPattern(t *testing.T) {
+ trueTestCases := []string{
+ "@Unknwon",
+ "@ANT_123",
+ "@xxx-DiN0-z-A..uru..s-xxx",
+ " @lol ",
+ " @Te-st",
+ "(@gitea)",
+ "[@gitea]",
+ }
+ falseTestCases := []string{
+ "@ 0",
+ "@ ",
+ "@",
+ "",
+ "ABC",
+ "/home/gitea/@gitea",
+ "\"@gitea\"",
+ }
+
+ for _, testCase := range trueTestCases {
+ res := mentionPattern.MatchString(testCase)
+ assert.True(t, res)
+ }
+ for _, testCase := range falseTestCases {
+ res := mentionPattern.MatchString(testCase)
+ assert.False(t, res)
+ }
+}
+
+func TestRegExp_issueNumericPattern(t *testing.T) {
+ trueTestCases := []string{
+ "#1234",
+ "#0",
+ "#1234567890987654321",
+ " #12",
+ "#12:",
+ "ref: #12: msg",
+ }
+ falseTestCases := []string{
+ "# 1234",
+ "# 0",
+ "# ",
+ "#",
+ "#ABC",
+ "#1A2B",
+ "",
+ "ABC",
+ }
+
+ for _, testCase := range trueTestCases {
+ assert.True(t, issueNumericPattern.MatchString(testCase))
+ }
+ for _, testCase := range falseTestCases {
+ assert.False(t, issueNumericPattern.MatchString(testCase))
+ }
+}
+
+func TestRegExp_issueAlphanumericPattern(t *testing.T) {
+ trueTestCases := []string{
+ "ABC-1234",
+ "A-1",
+ "RC-80",
+ "ABCDEFGHIJ-1234567890987654321234567890",
+ "ABC-123.",
+ "(ABC-123)",
+ "[ABC-123]",
+ "ABC-123:",
+ }
+ falseTestCases := []string{
+ "RC-08",
+ "PR-0",
+ "ABCDEFGHIJK-1",
+ "PR_1",
+ "",
+ "#ABC",
+ "",
+ "ABC",
+ "GG-",
+ "rm-1",
+ "/home/gitea/ABC-1234",
+ "MY-STRING-ABC-123",
+ }
+
+ for _, testCase := range trueTestCases {
+ assert.True(t, issueAlphanumericPattern.MatchString(testCase))
+ }
+ for _, testCase := range falseTestCases {
+ assert.False(t, issueAlphanumericPattern.MatchString(testCase))
+ }
+}