diff options
author | KN4CK3R <admin@oldschoolhack.me> | 2021-06-23 23:09:51 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-06-23 17:09:51 -0400 |
commit | c9c7afda1a80bda7b61ded222163db796132b78f (patch) | |
tree | 2145a5afe7c7a19d450b7def199dc302e1c8d6f2 /modules/markup | |
parent | eb324a9402878a13029116bafa8ccce527796522 (diff) | |
download | gitea-c9c7afda1a80bda7b61ded222163db796132b78f.tar.gz gitea-c9c7afda1a80bda7b61ded222163db796132b78f.zip |
Add sanitizer rules per renderer (#16110)
* Added sanitizer rules per renderer.
* Updated documentation.
Co-authored-by: techknowlogick <techknowlogick@gitea.io>
Diffstat (limited to 'modules/markup')
-rw-r--r-- | modules/markup/csv/csv.go | 10 | ||||
-rw-r--r-- | modules/markup/external/external.go | 7 | ||||
-rw-r--r-- | modules/markup/html_test.go | 4 | ||||
-rw-r--r-- | modules/markup/markdown/markdown.go | 9 | ||||
-rw-r--r-- | modules/markup/orgmode/orgmode.go | 6 | ||||
-rw-r--r-- | modules/markup/renderer.go | 56 | ||||
-rw-r--r-- | modules/markup/sanitizer.go | 88 |
7 files changed, 114 insertions, 66 deletions
diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go index 6572b0ee1e..8a4df89511 100644 --- a/modules/markup/csv/csv.go +++ b/modules/markup/csv/csv.go @@ -10,6 +10,7 @@ import ( "html" "io" "io/ioutil" + "regexp" "strconv" "code.gitea.io/gitea/modules/csv" @@ -38,6 +39,15 @@ func (Renderer) Extensions() []string { return []string{".csv", ".tsv"} } +// SanitizerRules implements markup.Renderer +func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { + return []setting.MarkupSanitizerRule{ + {Element: "table", AllowAttr: "class", Regexp: regexp.MustCompile(`data-table`)}, + {Element: "th", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)}, + {Element: "td", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)}, + } +} + func writeField(w io.Writer, element, class, field string) error { if _, err := io.WriteString(w, "<"); err != nil { return err diff --git a/modules/markup/external/external.go b/modules/markup/external/external.go index 62814c9914..c849f505e7 100644 --- a/modules/markup/external/external.go +++ b/modules/markup/external/external.go @@ -30,7 +30,7 @@ func RegisterRenderers() { // Renderer implements markup.Renderer for external tools type Renderer struct { - setting.MarkupRenderer + *setting.MarkupRenderer } // Name returns the external tool name @@ -48,6 +48,11 @@ func (p *Renderer) Extensions() []string { return p.FileExtensions } +// SanitizerRules implements markup.Renderer +func (p *Renderer) SanitizerRules() []setting.MarkupSanitizerRule { + return p.MarkupSanitizerRules +} + func envMark(envName string) string { if runtime.GOOS == "windows" { return "%" + envName + "%" diff --git a/modules/markup/html_test.go b/modules/markup/html_test.go index 4cdd5798c8..8c3d2b5395 100644 --- a/modules/markup/html_test.go +++ b/modules/markup/html_test.go @@ -112,7 +112,7 @@ func TestRender_links(t *testing.T) { defaultCustom := setting.Markdown.CustomURLSchemes setting.Markdown.CustomURLSchemes = []string{"ftp", "magnet"} - ReplaceSanitizer() + InitializeSanitizer() CustomLinkURLSchemes(setting.Markdown.CustomURLSchemes) test( @@ -192,7 +192,7 @@ func TestRender_links(t *testing.T) { // Restore previous settings setting.Markdown.CustomURLSchemes = defaultCustom - ReplaceSanitizer() + InitializeSanitizer() CustomLinkURLSchemes(setting.Markdown.CustomURLSchemes) } diff --git a/modules/markup/markdown/markdown.go b/modules/markup/markdown/markdown.go index 87fae2a23b..cac2a180fa 100644 --- a/modules/markup/markdown/markdown.go +++ b/modules/markup/markdown/markdown.go @@ -199,7 +199,7 @@ func actualRender(ctx *markup.RenderContext, input io.Reader, output io.Writer) } _ = lw.Close() }() - buf := markup.SanitizeReader(rd) + buf := markup.SanitizeReader(rd, "") _, err := io.Copy(output, buf) return err } @@ -215,7 +215,7 @@ func render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error if log.IsDebug() { log.Debug("Panic in markdown: %v\n%s", err, string(log.Stack(2))) } - ret := markup.SanitizeReader(input) + ret := markup.SanitizeReader(input, "") _, err = io.Copy(output, ret) if err != nil { log.Error("SanitizeReader failed: %v", err) @@ -249,6 +249,11 @@ func (Renderer) Extensions() []string { return setting.Markdown.FileExtensions } +// SanitizerRules implements markup.Renderer +func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { + return []setting.MarkupSanitizerRule{} +} + // Render implements markup.Renderer func (Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error { return render(ctx, input, output) diff --git a/modules/markup/orgmode/orgmode.go b/modules/markup/orgmode/orgmode.go index 851fc97f9a..7e9f1f45c5 100644 --- a/modules/markup/orgmode/orgmode.go +++ b/modules/markup/orgmode/orgmode.go @@ -13,6 +13,7 @@ import ( "code.gitea.io/gitea/modules/highlight" "code.gitea.io/gitea/modules/markup" + "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/util" "github.com/alecthomas/chroma" @@ -41,6 +42,11 @@ func (Renderer) Extensions() []string { return []string{".org"} } +// SanitizerRules implements markup.Renderer +func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { + return []setting.MarkupSanitizerRule{} +} + // Render renders orgmode rawbytes to HTML func Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error { htmlWriter := org.NewHTMLWriter() diff --git a/modules/markup/renderer.go b/modules/markup/renderer.go index d60c8ad710..04619caee3 100644 --- a/modules/markup/renderer.go +++ b/modules/markup/renderer.go @@ -81,6 +81,7 @@ type Renderer interface { Name() string // markup format name Extensions() []string NeedPostProcess() bool + SanitizerRules() []setting.MarkupSanitizerRule Render(ctx *RenderContext, input io.Reader, output io.Writer) error } @@ -136,37 +137,32 @@ func render(ctx *RenderContext, renderer Renderer, input io.Reader, output io.Wr _ = pw.Close() }() - if renderer.NeedPostProcess() { - pr2, pw2 := io.Pipe() - defer func() { - _ = pr2.Close() - _ = pw2.Close() - }() - - wg.Add(1) - go func() { - buf := SanitizeReader(pr2) - _, err = io.Copy(output, buf) - _ = pr2.Close() - wg.Done() - }() - - wg.Add(1) - go func() { + pr2, pw2 := io.Pipe() + defer func() { + _ = pr2.Close() + _ = pw2.Close() + }() + + wg.Add(1) + go func() { + buf := SanitizeReader(pr2, renderer.Name()) + _, err = io.Copy(output, buf) + _ = pr2.Close() + wg.Done() + }() + + wg.Add(1) + go func() { + if renderer.NeedPostProcess() { err = PostProcess(ctx, pr, pw2) - _ = pr.Close() - _ = pw2.Close() - wg.Done() - }() - } else { - wg.Add(1) - go func() { - buf := SanitizeReader(pr) - _, err = io.Copy(output, buf) - _ = pr.Close() - wg.Done() - }() - } + } else { + _, err = io.Copy(pw2, pr) + } + _ = pr.Close() + _ = pw2.Close() + wg.Done() + }() + if err1 := renderer.Render(ctx, input, pw); err1 != nil { return err1 } diff --git a/modules/markup/sanitizer.go b/modules/markup/sanitizer.go index 5611bd06ad..9342d65de5 100644 --- a/modules/markup/sanitizer.go +++ b/modules/markup/sanitizer.go @@ -19,8 +19,9 @@ import ( // Sanitizer is a protection wrapper of *bluemonday.Policy which does not allow // any modification to the underlying policies once it's been created. type Sanitizer struct { - policy *bluemonday.Policy - init sync.Once + defaultPolicy *bluemonday.Policy + rendererPolicies map[string]*bluemonday.Policy + init sync.Once } var sanitizer = &Sanitizer{} @@ -30,47 +31,57 @@ var sanitizer = &Sanitizer{} // entire application lifecycle. func NewSanitizer() { sanitizer.init.Do(func() { - ReplaceSanitizer() + InitializeSanitizer() }) } -// ReplaceSanitizer replaces the current sanitizer to account for changes in settings -func ReplaceSanitizer() { - sanitizer.policy = bluemonday.UGCPolicy() +// InitializeSanitizer (re)initializes the current sanitizer to account for changes in settings +func InitializeSanitizer() { + sanitizer.rendererPolicies = map[string]*bluemonday.Policy{} + sanitizer.defaultPolicy = createDefaultPolicy() + + for name, renderer := range renderers { + sanitizerRules := renderer.SanitizerRules() + if len(sanitizerRules) > 0 { + policy := createDefaultPolicy() + addSanitizerRules(policy, sanitizerRules) + sanitizer.rendererPolicies[name] = policy + } + } +} + +func createDefaultPolicy() *bluemonday.Policy { + policy := bluemonday.UGCPolicy() // For Chroma markdown plugin - sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^is-loading$`)).OnElements("pre") - sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(chroma )?language-[\w-]+$`)).OnElements("code") + policy.AllowAttrs("class").Matching(regexp.MustCompile(`^is-loading$`)).OnElements("pre") + policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(chroma )?language-[\w-]+$`)).OnElements("code") // Checkboxes - sanitizer.policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input") - sanitizer.policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input") + policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input") + policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input") // Custom URL-Schemes if len(setting.Markdown.CustomURLSchemes) > 0 { - sanitizer.policy.AllowURLSchemes(setting.Markdown.CustomURLSchemes...) + policy.AllowURLSchemes(setting.Markdown.CustomURLSchemes...) } // Allow classes for anchors - sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`ref-issue`)).OnElements("a") + policy.AllowAttrs("class").Matching(regexp.MustCompile(`ref-issue`)).OnElements("a") // Allow classes for task lists - sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`task-list-item`)).OnElements("li") + policy.AllowAttrs("class").Matching(regexp.MustCompile(`task-list-item`)).OnElements("li") // Allow icons - sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^icon(\s+[\p{L}\p{N}_-]+)+$`)).OnElements("i") + policy.AllowAttrs("class").Matching(regexp.MustCompile(`^icon(\s+[\p{L}\p{N}_-]+)+$`)).OnElements("i") // Allow unlabelled labels - sanitizer.policy.AllowNoAttrs().OnElements("label") + policy.AllowNoAttrs().OnElements("label") // Allow classes for emojis - sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`emoji`)).OnElements("img") + policy.AllowAttrs("class").Matching(regexp.MustCompile(`emoji`)).OnElements("img") // Allow icons, emojis, chroma syntax and keyword markup on span - sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji))$|^([a-z][a-z0-9]{0,2})$|^` + keywordClass + `$`)).OnElements("span") - - // Allow data tables - sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`data-table`)).OnElements("table") - sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`line-num`)).OnElements("th", "td") + policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji))$|^([a-z][a-z0-9]{0,2})$|^` + keywordClass + `$`)).OnElements("span") // Allow generally safe attributes generalSafeAttrs := []string{"abbr", "accept", "accept-charset", @@ -101,18 +112,29 @@ func ReplaceSanitizer() { "abbr", "bdo", "cite", "dfn", "mark", "small", "span", "time", "wbr", } - sanitizer.policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...) + policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...) - sanitizer.policy.AllowAttrs("itemscope", "itemtype").OnElements("div") + policy.AllowAttrs("itemscope", "itemtype").OnElements("div") // FIXME: Need to handle longdesc in img but there is no easy way to do it // Custom keyword markup - for _, rule := range setting.ExternalSanitizerRules { - if rule.Regexp != nil { - sanitizer.policy.AllowAttrs(rule.AllowAttr).Matching(rule.Regexp).OnElements(rule.Element) - } else { - sanitizer.policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element) + addSanitizerRules(policy, setting.ExternalSanitizerRules) + + return policy +} + +func addSanitizerRules(policy *bluemonday.Policy, rules []setting.MarkupSanitizerRule) { + for _, rule := range rules { + if rule.AllowDataURIImages { + policy.AllowDataURIImages() + } + if rule.Element != "" { + if rule.Regexp != nil { + policy.AllowAttrs(rule.AllowAttr).Matching(rule.Regexp).OnElements(rule.Element) + } else { + policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element) + } } } } @@ -120,11 +142,15 @@ func ReplaceSanitizer() { // Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist. func Sanitize(s string) string { NewSanitizer() - return sanitizer.policy.Sanitize(s) + return sanitizer.defaultPolicy.Sanitize(s) } // SanitizeReader sanitizes a Reader -func SanitizeReader(r io.Reader) *bytes.Buffer { +func SanitizeReader(r io.Reader, renderer string) *bytes.Buffer { NewSanitizer() - return sanitizer.policy.SanitizeReader(r) + policy, exist := sanitizer.rendererPolicies[renderer] + if !exist { + policy = sanitizer.defaultPolicy + } + return policy.SanitizeReader(r) } |