diff options
author | KN4CK3R <admin@oldschoolhack.me> | 2021-06-23 23:09:51 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-06-23 17:09:51 -0400 |
commit | c9c7afda1a80bda7b61ded222163db796132b78f (patch) | |
tree | 2145a5afe7c7a19d450b7def199dc302e1c8d6f2 /modules | |
parent | eb324a9402878a13029116bafa8ccce527796522 (diff) | |
download | gitea-c9c7afda1a80bda7b61ded222163db796132b78f.tar.gz gitea-c9c7afda1a80bda7b61ded222163db796132b78f.zip |
Add sanitizer rules per renderer (#16110)
* Added sanitizer rules per renderer.
* Updated documentation.
Co-authored-by: techknowlogick <techknowlogick@gitea.io>
Diffstat (limited to 'modules')
-rw-r--r-- | modules/markup/csv/csv.go | 10 | ||||
-rw-r--r-- | modules/markup/external/external.go | 7 | ||||
-rw-r--r-- | modules/markup/html_test.go | 4 | ||||
-rw-r--r-- | modules/markup/markdown/markdown.go | 9 | ||||
-rw-r--r-- | modules/markup/orgmode/orgmode.go | 6 | ||||
-rw-r--r-- | modules/markup/renderer.go | 56 | ||||
-rw-r--r-- | modules/markup/sanitizer.go | 88 | ||||
-rw-r--r-- | modules/setting/markup.go | 103 |
8 files changed, 173 insertions, 110 deletions
diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go index 6572b0ee1e..8a4df89511 100644 --- a/modules/markup/csv/csv.go +++ b/modules/markup/csv/csv.go @@ -10,6 +10,7 @@ import ( "html" "io" "io/ioutil" + "regexp" "strconv" "code.gitea.io/gitea/modules/csv" @@ -38,6 +39,15 @@ func (Renderer) Extensions() []string { return []string{".csv", ".tsv"} } +// SanitizerRules implements markup.Renderer +func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { + return []setting.MarkupSanitizerRule{ + {Element: "table", AllowAttr: "class", Regexp: regexp.MustCompile(`data-table`)}, + {Element: "th", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)}, + {Element: "td", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)}, + } +} + func writeField(w io.Writer, element, class, field string) error { if _, err := io.WriteString(w, "<"); err != nil { return err diff --git a/modules/markup/external/external.go b/modules/markup/external/external.go index 62814c9914..c849f505e7 100644 --- a/modules/markup/external/external.go +++ b/modules/markup/external/external.go @@ -30,7 +30,7 @@ func RegisterRenderers() { // Renderer implements markup.Renderer for external tools type Renderer struct { - setting.MarkupRenderer + *setting.MarkupRenderer } // Name returns the external tool name @@ -48,6 +48,11 @@ func (p *Renderer) Extensions() []string { return p.FileExtensions } +// SanitizerRules implements markup.Renderer +func (p *Renderer) SanitizerRules() []setting.MarkupSanitizerRule { + return p.MarkupSanitizerRules +} + func envMark(envName string) string { if runtime.GOOS == "windows" { return "%" + envName + "%" diff --git a/modules/markup/html_test.go b/modules/markup/html_test.go index 4cdd5798c8..8c3d2b5395 100644 --- a/modules/markup/html_test.go +++ b/modules/markup/html_test.go @@ -112,7 +112,7 @@ func TestRender_links(t *testing.T) { defaultCustom := setting.Markdown.CustomURLSchemes setting.Markdown.CustomURLSchemes = []string{"ftp", "magnet"} - ReplaceSanitizer() + InitializeSanitizer() CustomLinkURLSchemes(setting.Markdown.CustomURLSchemes) test( @@ -192,7 +192,7 @@ func TestRender_links(t *testing.T) { // Restore previous settings setting.Markdown.CustomURLSchemes = defaultCustom - ReplaceSanitizer() + InitializeSanitizer() CustomLinkURLSchemes(setting.Markdown.CustomURLSchemes) } diff --git a/modules/markup/markdown/markdown.go b/modules/markup/markdown/markdown.go index 87fae2a23b..cac2a180fa 100644 --- a/modules/markup/markdown/markdown.go +++ b/modules/markup/markdown/markdown.go @@ -199,7 +199,7 @@ func actualRender(ctx *markup.RenderContext, input io.Reader, output io.Writer) } _ = lw.Close() }() - buf := markup.SanitizeReader(rd) + buf := markup.SanitizeReader(rd, "") _, err := io.Copy(output, buf) return err } @@ -215,7 +215,7 @@ func render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error if log.IsDebug() { log.Debug("Panic in markdown: %v\n%s", err, string(log.Stack(2))) } - ret := markup.SanitizeReader(input) + ret := markup.SanitizeReader(input, "") _, err = io.Copy(output, ret) if err != nil { log.Error("SanitizeReader failed: %v", err) @@ -249,6 +249,11 @@ func (Renderer) Extensions() []string { return setting.Markdown.FileExtensions } +// SanitizerRules implements markup.Renderer +func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { + return []setting.MarkupSanitizerRule{} +} + // Render implements markup.Renderer func (Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error { return render(ctx, input, output) diff --git a/modules/markup/orgmode/orgmode.go b/modules/markup/orgmode/orgmode.go index 851fc97f9a..7e9f1f45c5 100644 --- a/modules/markup/orgmode/orgmode.go +++ b/modules/markup/orgmode/orgmode.go @@ -13,6 +13,7 @@ import ( "code.gitea.io/gitea/modules/highlight" "code.gitea.io/gitea/modules/markup" + "code.gitea.io/gitea/modules/setting" "code.gitea.io/gitea/modules/util" "github.com/alecthomas/chroma" @@ -41,6 +42,11 @@ func (Renderer) Extensions() []string { return []string{".org"} } +// SanitizerRules implements markup.Renderer +func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule { + return []setting.MarkupSanitizerRule{} +} + // Render renders orgmode rawbytes to HTML func Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error { htmlWriter := org.NewHTMLWriter() diff --git a/modules/markup/renderer.go b/modules/markup/renderer.go index d60c8ad710..04619caee3 100644 --- a/modules/markup/renderer.go +++ b/modules/markup/renderer.go @@ -81,6 +81,7 @@ type Renderer interface { Name() string // markup format name Extensions() []string NeedPostProcess() bool + SanitizerRules() []setting.MarkupSanitizerRule Render(ctx *RenderContext, input io.Reader, output io.Writer) error } @@ -136,37 +137,32 @@ func render(ctx *RenderContext, renderer Renderer, input io.Reader, output io.Wr _ = pw.Close() }() - if renderer.NeedPostProcess() { - pr2, pw2 := io.Pipe() - defer func() { - _ = pr2.Close() - _ = pw2.Close() - }() - - wg.Add(1) - go func() { - buf := SanitizeReader(pr2) - _, err = io.Copy(output, buf) - _ = pr2.Close() - wg.Done() - }() - - wg.Add(1) - go func() { + pr2, pw2 := io.Pipe() + defer func() { + _ = pr2.Close() + _ = pw2.Close() + }() + + wg.Add(1) + go func() { + buf := SanitizeReader(pr2, renderer.Name()) + _, err = io.Copy(output, buf) + _ = pr2.Close() + wg.Done() + }() + + wg.Add(1) + go func() { + if renderer.NeedPostProcess() { err = PostProcess(ctx, pr, pw2) - _ = pr.Close() - _ = pw2.Close() - wg.Done() - }() - } else { - wg.Add(1) - go func() { - buf := SanitizeReader(pr) - _, err = io.Copy(output, buf) - _ = pr.Close() - wg.Done() - }() - } + } else { + _, err = io.Copy(pw2, pr) + } + _ = pr.Close() + _ = pw2.Close() + wg.Done() + }() + if err1 := renderer.Render(ctx, input, pw); err1 != nil { return err1 } diff --git a/modules/markup/sanitizer.go b/modules/markup/sanitizer.go index 5611bd06ad..9342d65de5 100644 --- a/modules/markup/sanitizer.go +++ b/modules/markup/sanitizer.go @@ -19,8 +19,9 @@ import ( // Sanitizer is a protection wrapper of *bluemonday.Policy which does not allow // any modification to the underlying policies once it's been created. type Sanitizer struct { - policy *bluemonday.Policy - init sync.Once + defaultPolicy *bluemonday.Policy + rendererPolicies map[string]*bluemonday.Policy + init sync.Once } var sanitizer = &Sanitizer{} @@ -30,47 +31,57 @@ var sanitizer = &Sanitizer{} // entire application lifecycle. func NewSanitizer() { sanitizer.init.Do(func() { - ReplaceSanitizer() + InitializeSanitizer() }) } -// ReplaceSanitizer replaces the current sanitizer to account for changes in settings -func ReplaceSanitizer() { - sanitizer.policy = bluemonday.UGCPolicy() +// InitializeSanitizer (re)initializes the current sanitizer to account for changes in settings +func InitializeSanitizer() { + sanitizer.rendererPolicies = map[string]*bluemonday.Policy{} + sanitizer.defaultPolicy = createDefaultPolicy() + + for name, renderer := range renderers { + sanitizerRules := renderer.SanitizerRules() + if len(sanitizerRules) > 0 { + policy := createDefaultPolicy() + addSanitizerRules(policy, sanitizerRules) + sanitizer.rendererPolicies[name] = policy + } + } +} + +func createDefaultPolicy() *bluemonday.Policy { + policy := bluemonday.UGCPolicy() // For Chroma markdown plugin - sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^is-loading$`)).OnElements("pre") - sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(chroma )?language-[\w-]+$`)).OnElements("code") + policy.AllowAttrs("class").Matching(regexp.MustCompile(`^is-loading$`)).OnElements("pre") + policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(chroma )?language-[\w-]+$`)).OnElements("code") // Checkboxes - sanitizer.policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input") - sanitizer.policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input") + policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input") + policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input") // Custom URL-Schemes if len(setting.Markdown.CustomURLSchemes) > 0 { - sanitizer.policy.AllowURLSchemes(setting.Markdown.CustomURLSchemes...) + policy.AllowURLSchemes(setting.Markdown.CustomURLSchemes...) } // Allow classes for anchors - sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`ref-issue`)).OnElements("a") + policy.AllowAttrs("class").Matching(regexp.MustCompile(`ref-issue`)).OnElements("a") // Allow classes for task lists - sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`task-list-item`)).OnElements("li") + policy.AllowAttrs("class").Matching(regexp.MustCompile(`task-list-item`)).OnElements("li") // Allow icons - sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^icon(\s+[\p{L}\p{N}_-]+)+$`)).OnElements("i") + policy.AllowAttrs("class").Matching(regexp.MustCompile(`^icon(\s+[\p{L}\p{N}_-]+)+$`)).OnElements("i") // Allow unlabelled labels - sanitizer.policy.AllowNoAttrs().OnElements("label") + policy.AllowNoAttrs().OnElements("label") // Allow classes for emojis - sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`emoji`)).OnElements("img") + policy.AllowAttrs("class").Matching(regexp.MustCompile(`emoji`)).OnElements("img") // Allow icons, emojis, chroma syntax and keyword markup on span - sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji))$|^([a-z][a-z0-9]{0,2})$|^` + keywordClass + `$`)).OnElements("span") - - // Allow data tables - sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`data-table`)).OnElements("table") - sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`line-num`)).OnElements("th", "td") + policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji))$|^([a-z][a-z0-9]{0,2})$|^` + keywordClass + `$`)).OnElements("span") // Allow generally safe attributes generalSafeAttrs := []string{"abbr", "accept", "accept-charset", @@ -101,18 +112,29 @@ func ReplaceSanitizer() { "abbr", "bdo", "cite", "dfn", "mark", "small", "span", "time", "wbr", } - sanitizer.policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...) + policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...) - sanitizer.policy.AllowAttrs("itemscope", "itemtype").OnElements("div") + policy.AllowAttrs("itemscope", "itemtype").OnElements("div") // FIXME: Need to handle longdesc in img but there is no easy way to do it // Custom keyword markup - for _, rule := range setting.ExternalSanitizerRules { - if rule.Regexp != nil { - sanitizer.policy.AllowAttrs(rule.AllowAttr).Matching(rule.Regexp).OnElements(rule.Element) - } else { - sanitizer.policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element) + addSanitizerRules(policy, setting.ExternalSanitizerRules) + + return policy +} + +func addSanitizerRules(policy *bluemonday.Policy, rules []setting.MarkupSanitizerRule) { + for _, rule := range rules { + if rule.AllowDataURIImages { + policy.AllowDataURIImages() + } + if rule.Element != "" { + if rule.Regexp != nil { + policy.AllowAttrs(rule.AllowAttr).Matching(rule.Regexp).OnElements(rule.Element) + } else { + policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element) + } } } } @@ -120,11 +142,15 @@ func ReplaceSanitizer() { // Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist. func Sanitize(s string) string { NewSanitizer() - return sanitizer.policy.Sanitize(s) + return sanitizer.defaultPolicy.Sanitize(s) } // SanitizeReader sanitizes a Reader -func SanitizeReader(r io.Reader) *bytes.Buffer { +func SanitizeReader(r io.Reader, renderer string) *bytes.Buffer { NewSanitizer() - return sanitizer.policy.SanitizeReader(r) + policy, exist := sanitizer.rendererPolicies[renderer] + if !exist { + policy = sanitizer.defaultPolicy + } + return policy.SanitizeReader(r) } diff --git a/modules/setting/markup.go b/modules/setting/markup.go index 43df4ce442..31ec1dd2eb 100644 --- a/modules/setting/markup.go +++ b/modules/setting/markup.go @@ -15,31 +15,34 @@ import ( // ExternalMarkupRenderers represents the external markup renderers var ( - ExternalMarkupRenderers []MarkupRenderer + ExternalMarkupRenderers []*MarkupRenderer ExternalSanitizerRules []MarkupSanitizerRule ) // MarkupRenderer defines the external parser configured in ini type MarkupRenderer struct { - Enabled bool - MarkupName string - Command string - FileExtensions []string - IsInputFile bool - NeedPostProcess bool + Enabled bool + MarkupName string + Command string + FileExtensions []string + IsInputFile bool + NeedPostProcess bool + MarkupSanitizerRules []MarkupSanitizerRule } // MarkupSanitizerRule defines the policy for whitelisting attributes on // certain elements. type MarkupSanitizerRule struct { - Element string - AllowAttr string - Regexp *regexp.Regexp + Element string + AllowAttr string + Regexp *regexp.Regexp + AllowDataURIImages bool } func newMarkup() { - ExternalMarkupRenderers = make([]MarkupRenderer, 0, 10) + ExternalMarkupRenderers = make([]*MarkupRenderer, 0, 10) ExternalSanitizerRules = make([]MarkupSanitizerRule, 0, 10) + for _, sec := range Cfg.Section("markup").ChildSections() { name := strings.TrimPrefix(sec.Name(), "markup.") if name == "" { @@ -56,50 +59,62 @@ func newMarkup() { } func newMarkupSanitizer(name string, sec *ini.Section) { - haveElement := sec.HasKey("ELEMENT") - haveAttr := sec.HasKey("ALLOW_ATTR") - haveRegexp := sec.HasKey("REGEXP") - - if !haveElement && !haveAttr && !haveRegexp { - log.Warn("Skipping empty section: markup.%s.", name) - return + rule, ok := createMarkupSanitizerRule(name, sec) + if ok { + if strings.HasPrefix(name, "sanitizer.") { + names := strings.SplitN(strings.TrimPrefix(name, "sanitizer."), ".", 2) + name = names[0] + } + for _, renderer := range ExternalMarkupRenderers { + if name == renderer.MarkupName { + renderer.MarkupSanitizerRules = append(renderer.MarkupSanitizerRules, rule) + return + } + } + ExternalSanitizerRules = append(ExternalSanitizerRules, rule) } +} - if !haveElement || !haveAttr || !haveRegexp { - log.Error("Missing required keys from markup.%s. Must have all three of ELEMENT, ALLOW_ATTR, and REGEXP defined!", name) - return +func createMarkupSanitizerRule(name string, sec *ini.Section) (MarkupSanitizerRule, bool) { + var rule MarkupSanitizerRule + + ok := false + if sec.HasKey("ALLOW_DATA_URI_IMAGES") { + rule.AllowDataURIImages = sec.Key("ALLOW_DATA_URI_IMAGES").MustBool(false) + ok = true } - elements := sec.Key("ELEMENT").Value() - allowAttrs := sec.Key("ALLOW_ATTR").Value() - regexpStr := sec.Key("REGEXP").Value() + if sec.HasKey("ELEMENT") || sec.HasKey("ALLOW_ATTR") { + rule.Element = sec.Key("ELEMENT").Value() + rule.AllowAttr = sec.Key("ALLOW_ATTR").Value() - if regexpStr == "" { - rule := MarkupSanitizerRule{ - Element: elements, - AllowAttr: allowAttrs, - Regexp: nil, + if rule.Element == "" || rule.AllowAttr == "" { + log.Error("Missing required values from markup.%s. Must have ELEMENT and ALLOW_ATTR defined!", name) + return rule, false } - ExternalSanitizerRules = append(ExternalSanitizerRules, rule) - return - } + regexpStr := sec.Key("REGEXP").Value() + if regexpStr != "" { + // Validate when parsing the config that this is a valid regular + // expression. Then we can use regexp.MustCompile(...) later. + compiled, err := regexp.Compile(regexpStr) + if err != nil { + log.Error("In markup.%s: REGEXP (%s) failed to compile: %v", name, regexpStr, err) + return rule, false + } + + rule.Regexp = compiled + } - // Validate when parsing the config that this is a valid regular - // expression. Then we can use regexp.MustCompile(...) later. - compiled, err := regexp.Compile(regexpStr) - if err != nil { - log.Error("In module.%s: REGEXP (%s) at definition %d failed to compile: %v", regexpStr, name, err) - return + ok = true } - rule := MarkupSanitizerRule{ - Element: elements, - AllowAttr: allowAttrs, - Regexp: compiled, + if !ok { + log.Error("Missing required keys from markup.%s. Must have ELEMENT and ALLOW_ATTR or ALLOW_DATA_URI_IMAGES defined!", name) + return rule, false } - ExternalSanitizerRules = append(ExternalSanitizerRules, rule) + return rule, true } func newMarkupRenderer(name string, sec *ini.Section) { @@ -126,7 +141,7 @@ func newMarkupRenderer(name string, sec *ini.Section) { return } - ExternalMarkupRenderers = append(ExternalMarkupRenderers, MarkupRenderer{ + ExternalMarkupRenderers = append(ExternalMarkupRenderers, &MarkupRenderer{ Enabled: sec.Key("ENABLED").MustBool(false), MarkupName: name, FileExtensions: exts, |