From 15809d81f7d36759f289b941352a9754611c5dba Mon Sep 17 00:00:00 2001 From: guillep2k <18600385+guillep2k@users.noreply.github.com> Date: Sun, 13 Oct 2019 19:29:10 -0300 Subject: Rewrite reference processing code in preparation for opening/closing from comment references (#8261) * Add a markdown stripper for mentions and xrefs * Improve comments * Small code simplification * Move reference code to modules/references * Fix typo * Make MarkdownStripper return [][]byte * Implement preliminary keywords parsing * Add FIXME comment * Fix comment * make fmt * Fix permissions check * Fix text assumptions * Fix imports * Fix lint, fmt * Fix unused import * Add missing export comment * Bypass revive on implemented interface * Move mdstripper into its own package * Support alphanumeric patterns * Refactor FindAllMentions * Move mentions test to references * Parse mentions from reference package * Refactor code to implement renderizable references * Fix typo * Move patterns and tests to the references package * Fix nil reference * Preliminary rendering attempt of closing keywords * Normalize names, comments, general tidy-up * Add CSS style for action keywords * Fix permission for admin and owner * Fix golangci-lint * Fix golangci-lint --- modules/references/references.go | 322 ++++++++++++++++++++++++++++++++++ modules/references/references_test.go | 296 +++++++++++++++++++++++++++++++ 2 files changed, 618 insertions(+) create mode 100644 modules/references/references.go create mode 100644 modules/references/references_test.go (limited to 'modules/references') diff --git a/modules/references/references.go b/modules/references/references.go new file mode 100644 index 0000000000..9c74d0d081 --- /dev/null +++ b/modules/references/references.go @@ -0,0 +1,322 @@ +// Copyright 2019 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package references + +import ( + "net/url" + "regexp" + "strconv" + "strings" + "sync" + + "code.gitea.io/gitea/modules/markup/mdstripper" + "code.gitea.io/gitea/modules/setting" +) + +var ( + // validNamePattern performs only the most basic validation for user or repository names + // Repository name should contain only alphanumeric, dash ('-'), underscore ('_') and dot ('.') characters. + validNamePattern = regexp.MustCompile(`^[a-z0-9_.-]+$`) + + // NOTE: All below regex matching do not perform any extra validation. + // Thus a link is produced even if the linked entity does not exist. + // While fast, this is also incorrect and lead to false positives. + // TODO: fix invalid linking issue + + // mentionPattern matches all mentions in the form of "@user" + mentionPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(@[0-9a-zA-Z-_\.]+)(?:\s|$|\)|\])`) + // issueNumericPattern matches string that references to a numeric issue, e.g. #1287 + issueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)(#[0-9]+)(?:\s|$|\)|\]|:|\.(\s|$))`) + // issueAlphanumericPattern matches string that references to an alphanumeric issue, e.g. ABC-1234 + issueAlphanumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([A-Z]{1,10}-[1-9][0-9]*)(?:\s|$|\)|\]|:|\.(\s|$))`) + // crossReferenceIssueNumericPattern matches string that references a numeric issue in a different repository + // e.g. gogits/gogs#12345 + crossReferenceIssueNumericPattern = regexp.MustCompile(`(?:\s|^|\(|\[)([0-9a-zA-Z-_\.]+/[0-9a-zA-Z-_\.]+#[0-9]+)(?:\s|$|\)|\]|\.(\s|$))`) + + // Same as GitHub. See + // https://help.github.com/articles/closing-issues-via-commit-messages + issueCloseKeywords = []string{"close", "closes", "closed", "fix", "fixes", "fixed", "resolve", "resolves", "resolved"} + issueReopenKeywords = []string{"reopen", "reopens", "reopened"} + + issueCloseKeywordsPat, issueReopenKeywordsPat *regexp.Regexp + + giteaHostInit sync.Once + giteaHost string +) + +// XRefAction represents the kind of effect a cross reference has once is resolved +type XRefAction int64 + +const ( + // XRefActionNone means the cross-reference is simply a comment + XRefActionNone XRefAction = iota // 0 + // XRefActionCloses means the cross-reference should close an issue if it is resolved + XRefActionCloses // 1 + // XRefActionReopens means the cross-reference should reopen an issue if it is resolved + XRefActionReopens // 2 + // XRefActionNeutered means the cross-reference will no longer affect the source + XRefActionNeutered // 3 +) + +// IssueReference contains an unverified cross-reference to a local issue or pull request +type IssueReference struct { + Index int64 + Owner string + Name string + Action XRefAction +} + +// RenderizableReference contains an unverified cross-reference to with rendering information +type RenderizableReference struct { + Issue string + Owner string + Name string + RefLocation *RefSpan + Action XRefAction + ActionLocation *RefSpan +} + +type rawReference struct { + index int64 + owner string + name string + action XRefAction + issue string + refLocation *RefSpan + actionLocation *RefSpan +} + +func rawToIssueReferenceList(reflist []*rawReference) []IssueReference { + refarr := make([]IssueReference, len(reflist)) + for i, r := range reflist { + refarr[i] = IssueReference{ + Index: r.index, + Owner: r.owner, + Name: r.name, + Action: r.action, + } + } + return refarr +} + +// RefSpan is the position where the reference was found within the parsed text +type RefSpan struct { + Start int + End int +} + +func makeKeywordsPat(keywords []string) *regexp.Regexp { + return regexp.MustCompile(`(?i)(?:\s|^|\(|\[)(` + strings.Join(keywords, `|`) + `):? $`) +} + +func init() { + issueCloseKeywordsPat = makeKeywordsPat(issueCloseKeywords) + issueReopenKeywordsPat = makeKeywordsPat(issueReopenKeywords) +} + +// getGiteaHostName returns a normalized string with the local host name, with no scheme or port information +func getGiteaHostName() string { + giteaHostInit.Do(func() { + if uapp, err := url.Parse(setting.AppURL); err == nil { + giteaHost = strings.ToLower(uapp.Host) + } else { + giteaHost = "" + } + }) + return giteaHost +} + +// FindAllMentionsMarkdown matches mention patterns in given content and +// returns a list of found unvalidated user names **not including** the @ prefix. +func FindAllMentionsMarkdown(content string) []string { + bcontent, _ := mdstripper.StripMarkdownBytes([]byte(content)) + locations := FindAllMentionsBytes(bcontent) + mentions := make([]string, len(locations)) + for i, val := range locations { + mentions[i] = string(bcontent[val.Start+1 : val.End]) + } + return mentions +} + +// FindAllMentionsBytes matches mention patterns in given content +// and returns a list of locations for the unvalidated user names, including the @ prefix. +func FindAllMentionsBytes(content []byte) []RefSpan { + mentions := mentionPattern.FindAllSubmatchIndex(content, -1) + ret := make([]RefSpan, len(mentions)) + for i, val := range mentions { + ret[i] = RefSpan{Start: val[2], End: val[3]} + } + return ret +} + +// FindFirstMentionBytes matches the first mention in then given content +// and returns the location of the unvalidated user name, including the @ prefix. +func FindFirstMentionBytes(content []byte) (bool, RefSpan) { + mention := mentionPattern.FindSubmatchIndex(content) + if mention == nil { + return false, RefSpan{} + } + return true, RefSpan{Start: mention[2], End: mention[3]} +} + +// FindAllIssueReferencesMarkdown strips content from markdown markup +// and returns a list of unvalidated references found in it. +func FindAllIssueReferencesMarkdown(content string) []IssueReference { + return rawToIssueReferenceList(findAllIssueReferencesMarkdown(content)) +} + +func findAllIssueReferencesMarkdown(content string) []*rawReference { + bcontent, links := mdstripper.StripMarkdownBytes([]byte(content)) + return findAllIssueReferencesBytes(bcontent, links) +} + +// FindAllIssueReferences returns a list of unvalidated references found in a string. +func FindAllIssueReferences(content string) []IssueReference { + return rawToIssueReferenceList(findAllIssueReferencesBytes([]byte(content), []string{})) +} + +// FindRenderizableReferenceNumeric returns the first unvalidated reference found in a string. +func FindRenderizableReferenceNumeric(content string) (bool, *RenderizableReference) { + match := issueNumericPattern.FindStringSubmatchIndex(content) + if match == nil { + if match = crossReferenceIssueNumericPattern.FindStringSubmatchIndex(content); match == nil { + return false, nil + } + } + r := getCrossReference([]byte(content), match[2], match[3], false) + if r == nil { + return false, nil + } + + return true, &RenderizableReference{ + Issue: r.issue, + Owner: r.owner, + Name: r.name, + RefLocation: r.refLocation, + Action: r.action, + ActionLocation: r.actionLocation, + } +} + +// FindRenderizableReferenceAlphanumeric returns the first alphanumeric unvalidated references found in a string. +func FindRenderizableReferenceAlphanumeric(content string) (bool, *RenderizableReference) { + match := issueAlphanumericPattern.FindStringSubmatchIndex(content) + if match == nil { + return false, nil + } + + action, location := findActionKeywords([]byte(content), match[2]) + + return true, &RenderizableReference{ + Issue: string(content[match[2]:match[3]]), + RefLocation: &RefSpan{Start: match[2], End: match[3]}, + Action: action, + ActionLocation: location, + } +} + +// FindAllIssueReferencesBytes returns a list of unvalidated references found in a byte slice. +func findAllIssueReferencesBytes(content []byte, links []string) []*rawReference { + + ret := make([]*rawReference, 0, 10) + + matches := issueNumericPattern.FindAllSubmatchIndex(content, -1) + for _, match := range matches { + if ref := getCrossReference(content, match[2], match[3], false); ref != nil { + ret = append(ret, ref) + } + } + + matches = crossReferenceIssueNumericPattern.FindAllSubmatchIndex(content, -1) + for _, match := range matches { + if ref := getCrossReference(content, match[2], match[3], false); ref != nil { + ret = append(ret, ref) + } + } + + localhost := getGiteaHostName() + for _, link := range links { + if u, err := url.Parse(link); err == nil { + // Note: we're not attempting to match the URL scheme (http/https) + host := strings.ToLower(u.Host) + if host != "" && host != localhost { + continue + } + parts := strings.Split(u.EscapedPath(), "/") + // /user/repo/issues/3 + if len(parts) != 5 || parts[0] != "" { + continue + } + if parts[3] != "issues" && parts[3] != "pulls" { + continue + } + // Note: closing/reopening keywords not supported with URLs + bytes := []byte(parts[1] + "/" + parts[2] + "#" + parts[4]) + if ref := getCrossReference(bytes, 0, len(bytes), true); ref != nil { + ref.refLocation = nil + ret = append(ret, ref) + } + } + } + + return ret +} + +func getCrossReference(content []byte, start, end int, fromLink bool) *rawReference { + refid := string(content[start:end]) + parts := strings.Split(refid, "#") + if len(parts) != 2 { + return nil + } + repo, issue := parts[0], parts[1] + index, err := strconv.ParseInt(issue, 10, 64) + if err != nil { + return nil + } + if repo == "" { + if fromLink { + // Markdown links must specify owner/repo + return nil + } + action, location := findActionKeywords(content, start) + return &rawReference{ + index: index, + action: action, + issue: issue, + refLocation: &RefSpan{Start: start, End: end}, + actionLocation: location, + } + } + parts = strings.Split(strings.ToLower(repo), "/") + if len(parts) != 2 { + return nil + } + owner, name := parts[0], parts[1] + if !validNamePattern.MatchString(owner) || !validNamePattern.MatchString(name) { + return nil + } + action, location := findActionKeywords(content, start) + return &rawReference{ + index: index, + owner: owner, + name: name, + action: action, + issue: issue, + refLocation: &RefSpan{Start: start, End: end}, + actionLocation: location, + } +} + +func findActionKeywords(content []byte, start int) (XRefAction, *RefSpan) { + m := issueCloseKeywordsPat.FindSubmatchIndex(content[:start]) + if m != nil { + return XRefActionCloses, &RefSpan{Start: m[2], End: m[3]} + } + m = issueReopenKeywordsPat.FindSubmatchIndex(content[:start]) + if m != nil { + return XRefActionReopens, &RefSpan{Start: m[2], End: m[3]} + } + return XRefActionNone, nil +} diff --git a/modules/references/references_test.go b/modules/references/references_test.go new file mode 100644 index 0000000000..f8153ffe36 --- /dev/null +++ b/modules/references/references_test.go @@ -0,0 +1,296 @@ +// Copyright 2019 The Gitea Authors. All rights reserved. +// Use of this source code is governed by a MIT-style +// license that can be found in the LICENSE file. + +package references + +import ( + "testing" + + "code.gitea.io/gitea/modules/setting" + + "github.com/stretchr/testify/assert" +) + +func TestFindAllIssueReferences(t *testing.T) { + + type result struct { + Index int64 + Owner string + Name string + Issue string + Action XRefAction + RefLocation *RefSpan + ActionLocation *RefSpan + } + + type testFixture struct { + input string + expected []result + } + + fixtures := []testFixture{ + { + "Simply closes: #29 yes", + []result{ + {29, "", "", "29", XRefActionCloses, &RefSpan{Start: 15, End: 18}, &RefSpan{Start: 7, End: 13}}, + }, + }, + { + "#123 no, this is a title.", + []result{}, + }, + { + " #124 yes, this is a reference.", + []result{ + {124, "", "", "124", XRefActionNone, &RefSpan{Start: 0, End: 4}, nil}, + }, + }, + { + "```\nThis is a code block.\n#723 no, it's a code block.```", + []result{}, + }, + { + "This `#724` no, it's inline code.", + []result{}, + }, + { + "This user3/repo4#200 yes.", + []result{ + {200, "user3", "repo4", "200", XRefActionNone, &RefSpan{Start: 5, End: 20}, nil}, + }, + }, + { + "This [one](#919) no, this is a URL fragment.", + []result{}, + }, + { + "This [two](/user2/repo1/issues/921) yes.", + []result{ + {921, "user2", "repo1", "921", XRefActionNone, nil, nil}, + }, + }, + { + "This [three](/user2/repo1/pulls/922) yes.", + []result{ + {922, "user2", "repo1", "922", XRefActionNone, nil, nil}, + }, + }, + { + "This [four](http://gitea.com:3000/user3/repo4/issues/203) yes.", + []result{ + {203, "user3", "repo4", "203", XRefActionNone, nil, nil}, + }, + }, + { + "This [five](http://github.com/user3/repo4/issues/204) no.", + []result{}, + }, + { + "This http://gitea.com:3000/user4/repo5/201 no, bad URL.", + []result{}, + }, + { + "This http://gitea.com:3000/user4/repo5/pulls/202 yes.", + []result{ + {202, "user4", "repo5", "202", XRefActionNone, nil, nil}, + }, + }, + { + "This http://GiTeA.COM:3000/user4/repo6/pulls/205 yes.", + []result{ + {205, "user4", "repo6", "205", XRefActionNone, nil, nil}, + }, + }, + { + "Reopens #15 yes", + []result{ + {15, "", "", "15", XRefActionReopens, &RefSpan{Start: 8, End: 11}, &RefSpan{Start: 0, End: 7}}, + }, + }, + { + "This closes #20 for you yes", + []result{ + {20, "", "", "20", XRefActionCloses, &RefSpan{Start: 12, End: 15}, &RefSpan{Start: 5, End: 11}}, + }, + }, + { + "Do you fix user6/repo6#300 ? yes", + []result{ + {300, "user6", "repo6", "300", XRefActionCloses, &RefSpan{Start: 11, End: 26}, &RefSpan{Start: 7, End: 10}}, + }, + }, + { + "For 999 #1235 no keyword, but yes", + []result{ + {1235, "", "", "1235", XRefActionNone, &RefSpan{Start: 8, End: 13}, nil}, + }, + }, + { + "Which abc. #9434 same as above", + []result{ + {9434, "", "", "9434", XRefActionNone, &RefSpan{Start: 11, End: 16}, nil}, + }, + }, + { + "This closes #600 and reopens #599", + []result{ + {600, "", "", "600", XRefActionCloses, &RefSpan{Start: 12, End: 16}, &RefSpan{Start: 5, End: 11}}, + {599, "", "", "599", XRefActionReopens, &RefSpan{Start: 29, End: 33}, &RefSpan{Start: 21, End: 28}}, + }, + }, + } + + // Save original value for other tests that may rely on it + prevURL := setting.AppURL + setting.AppURL = "https://gitea.com:3000/" + + for _, fixture := range fixtures { + expraw := make([]*rawReference, len(fixture.expected)) + for i, e := range fixture.expected { + expraw[i] = &rawReference{ + index: e.Index, + owner: e.Owner, + name: e.Name, + action: e.Action, + issue: e.Issue, + refLocation: e.RefLocation, + actionLocation: e.ActionLocation, + } + } + expref := rawToIssueReferenceList(expraw) + refs := FindAllIssueReferencesMarkdown(fixture.input) + assert.EqualValues(t, expref, refs, "Failed to parse: {%s}", fixture.input) + rawrefs := findAllIssueReferencesMarkdown(fixture.input) + assert.EqualValues(t, expraw, rawrefs, "Failed to parse: {%s}", fixture.input) + } + + // Restore for other tests that may rely on the original value + setting.AppURL = prevURL + + type alnumFixture struct { + input string + issue string + refLocation *RefSpan + action XRefAction + actionLocation *RefSpan + } + + alnumFixtures := []alnumFixture{ + { + "This ref ABC-123 is alphanumeric", + "ABC-123", &RefSpan{Start: 9, End: 16}, + XRefActionNone, nil, + }, + { + "This closes ABCD-1234 alphanumeric", + "ABCD-1234", &RefSpan{Start: 12, End: 21}, + XRefActionCloses, &RefSpan{Start: 5, End: 11}, + }, + } + + for _, fixture := range alnumFixtures { + found, ref := FindRenderizableReferenceAlphanumeric(fixture.input) + if fixture.issue == "" { + assert.False(t, found, "Failed to parse: {%s}", fixture.input) + } else { + assert.True(t, found, "Failed to parse: {%s}", fixture.input) + assert.Equal(t, fixture.issue, ref.Issue, "Failed to parse: {%s}", fixture.input) + assert.Equal(t, fixture.refLocation, ref.RefLocation, "Failed to parse: {%s}", fixture.input) + assert.Equal(t, fixture.action, ref.Action, "Failed to parse: {%s}", fixture.input) + assert.Equal(t, fixture.actionLocation, ref.ActionLocation, "Failed to parse: {%s}", fixture.input) + } + } +} + +func TestRegExp_mentionPattern(t *testing.T) { + trueTestCases := []string{ + "@Unknwon", + "@ANT_123", + "@xxx-DiN0-z-A..uru..s-xxx", + " @lol ", + " @Te-st", + "(@gitea)", + "[@gitea]", + } + falseTestCases := []string{ + "@ 0", + "@ ", + "@", + "", + "ABC", + "/home/gitea/@gitea", + "\"@gitea\"", + } + + for _, testCase := range trueTestCases { + res := mentionPattern.MatchString(testCase) + assert.True(t, res) + } + for _, testCase := range falseTestCases { + res := mentionPattern.MatchString(testCase) + assert.False(t, res) + } +} + +func TestRegExp_issueNumericPattern(t *testing.T) { + trueTestCases := []string{ + "#1234", + "#0", + "#1234567890987654321", + " #12", + "#12:", + "ref: #12: msg", + } + falseTestCases := []string{ + "# 1234", + "# 0", + "# ", + "#", + "#ABC", + "#1A2B", + "", + "ABC", + } + + for _, testCase := range trueTestCases { + assert.True(t, issueNumericPattern.MatchString(testCase)) + } + for _, testCase := range falseTestCases { + assert.False(t, issueNumericPattern.MatchString(testCase)) + } +} + +func TestRegExp_issueAlphanumericPattern(t *testing.T) { + trueTestCases := []string{ + "ABC-1234", + "A-1", + "RC-80", + "ABCDEFGHIJ-1234567890987654321234567890", + "ABC-123.", + "(ABC-123)", + "[ABC-123]", + "ABC-123:", + } + falseTestCases := []string{ + "RC-08", + "PR-0", + "ABCDEFGHIJK-1", + "PR_1", + "", + "#ABC", + "", + "ABC", + "GG-", + "rm-1", + "/home/gitea/ABC-1234", + "MY-STRING-ABC-123", + } + + for _, testCase := range trueTestCases { + assert.True(t, issueAlphanumericPattern.MatchString(testCase)) + } + for _, testCase := range falseTestCases { + assert.False(t, issueAlphanumericPattern.MatchString(testCase)) + } +} -- cgit v1.2.3