diff options
author | Alexander Scheel <alexander.m.scheel@gmail.com> | 2019-12-07 14:49:04 -0500 |
---|---|---|
committer | techknowlogick <techknowlogick@gitea.io> | 2019-12-07 14:49:04 -0500 |
commit | ee7df7ba8c5e6a4b32b0c4048d2b535d8df3cbe9 (patch) | |
tree | 73229ccd7b291bc1c48fa2aed78cdf1dd7100b6f /modules | |
parent | cecc31951c1b12864e13a2dd148a5e96c74d9a5c (diff) | |
download | gitea-ee7df7ba8c5e6a4b32b0c4048d2b535d8df3cbe9.tar.gz gitea-ee7df7ba8c5e6a4b32b0c4048d2b535d8df3cbe9.zip |
Markdown: Sanitizier Configuration (#9075)
* Support custom sanitization policy
Allowing the gitea administrator to configure sanitization policy allows
them to couple external renders and custom templates to support more
markup. In particular, the `pandoc` renderer allows generating KaTeX
annotations, wrapping them in `<span>` elements with class `math` and
either `inline` or `display` (depending on whether or not inline or
block mode was requested).
This iteration gives the administrator whitelisting powers; carefully
crafted regexes will thus let through only the desired attributes
necessary to support their custom markup.
Resolves: #9054
Signed-off-by: Alexander Scheel <alexander.m.scheel@gmail.com>
* Document new sanitization configuration
- Adds basic documentation to app.ini.sample,
- Adds an example to the Configuration Cheat Sheet, and
- Adds extended information to External Renderers section.
Signed-off-by: Alexander Scheel <alexander.m.scheel@gmail.com>
* Drop extraneous length check in newMarkupSanitizer(...)
Signed-off-by: Alexander Scheel <alexander.m.scheel@gmail.com>
* Fix plural ELEMENT and ALLOW_ATTR in docs
These were left over from their initial names. Make them singular to
conform with the current expectations.
Signed-off-by: Alexander Scheel <alexander.m.scheel@gmail.com>
Diffstat (limited to 'modules')
-rw-r--r-- | modules/markup/sanitizer.go | 9 | ||||
-rw-r--r-- | modules/setting/markup.go | 119 |
2 files changed, 106 insertions, 22 deletions
diff --git a/modules/markup/sanitizer.go b/modules/markup/sanitizer.go index 0ebb3ff88b..f7789a9e56 100644 --- a/modules/markup/sanitizer.go +++ b/modules/markup/sanitizer.go @@ -50,6 +50,15 @@ func ReplaceSanitizer() { // Allow <kbd> tags for keyboard shortcut styling sanitizer.policy.AllowElements("kbd") + + // Custom keyword markup + for _, rule := range setting.ExternalSanitizerRules { + if rule.Regexp != nil { + sanitizer.policy.AllowAttrs(rule.AllowAttr).Matching(rule.Regexp).OnElements(rule.Element) + } else { + sanitizer.policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element) + } + } } // Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist. diff --git a/modules/setting/markup.go b/modules/setting/markup.go index 41f3cdd3a1..75e6d651bd 100644 --- a/modules/setting/markup.go +++ b/modules/setting/markup.go @@ -9,11 +9,14 @@ import ( "strings" "code.gitea.io/gitea/modules/log" + + "gopkg.in/ini.v1" ) // ExternalMarkupParsers represents the external markup parsers var ( - ExternalMarkupParsers []MarkupParser + ExternalMarkupParsers []MarkupParser + ExternalSanitizerRules []MarkupSanitizerRule ) // MarkupParser defines the external parser configured in ini @@ -25,8 +28,15 @@ type MarkupParser struct { IsInputFile bool } +// MarkupSanitizerRule defines the policy for whitelisting attributes on +// certain elements. +type MarkupSanitizerRule struct { + Element string + AllowAttr string + Regexp *regexp.Regexp +} + func newMarkup() { - extensionReg := regexp.MustCompile(`\.\w`) for _, sec := range Cfg.Section("markup").ChildSections() { name := strings.TrimPrefix(sec.Name(), "markup.") if name == "" { @@ -34,33 +44,98 @@ func newMarkup() { continue } - extensions := sec.Key("FILE_EXTENSIONS").Strings(",") - var exts = make([]string, 0, len(extensions)) - for _, extension := range extensions { - if !extensionReg.MatchString(extension) { - log.Warn(sec.Name() + " file extension " + extension + " is invalid. Extension ignored") - } else { - exts = append(exts, extension) - } + if name == "sanitizer" { + newMarkupSanitizer(name, sec) + } else { + newMarkupRenderer(name, sec) } + } +} + +func newMarkupSanitizer(name string, sec *ini.Section) { + haveElement := sec.HasKey("ELEMENT") + haveAttr := sec.HasKey("ALLOW_ATTR") + haveRegexp := sec.HasKey("REGEXP") + + if !haveElement && !haveAttr && !haveRegexp { + log.Warn("Skipping empty section: markup.%s.", name) + return + } + + if !haveElement || !haveAttr || !haveRegexp { + log.Error("Missing required keys from markup.%s. Must have all three of ELEMENT, ALLOW_ATTR, and REGEXP defined!", name) + return + } + + elements := sec.Key("ELEMENT").ValueWithShadows() + allowAttrs := sec.Key("ALLOW_ATTR").ValueWithShadows() + regexps := sec.Key("REGEXP").ValueWithShadows() + + if len(elements) != len(allowAttrs) || + len(elements) != len(regexps) { + log.Error("All three keys in markup.%s (ELEMENT, ALLOW_ATTR, REGEXP) must be defined the same number of times! Got %d, %d, and %d respectively.", name, len(elements), len(allowAttrs), len(regexps)) + return + } - if len(exts) == 0 { - log.Warn(sec.Name() + " file extension is empty, markup " + name + " ignored") + ExternalSanitizerRules = make([]MarkupSanitizerRule, 0, len(elements)) + + for index, pattern := range regexps { + if pattern == "" { + rule := MarkupSanitizerRule{ + Element: elements[index], + AllowAttr: allowAttrs[index], + Regexp: nil, + } + ExternalSanitizerRules = append(ExternalSanitizerRules, rule) continue } - command := sec.Key("RENDER_COMMAND").MustString("") - if command == "" { - log.Warn(" RENDER_COMMAND is empty, markup " + name + " ignored") + // Validate when parsing the config that this is a valid regular + // expression. Then we can use regexp.MustCompile(...) later. + compiled, err := regexp.Compile(pattern) + if err != nil { + log.Error("In module.%s: REGEXP at definition %d failed to compile: %v", name, index+1, err) continue } - ExternalMarkupParsers = append(ExternalMarkupParsers, MarkupParser{ - Enabled: sec.Key("ENABLED").MustBool(false), - MarkupName: name, - FileExtensions: exts, - Command: command, - IsInputFile: sec.Key("IS_INPUT_FILE").MustBool(false), - }) + rule := MarkupSanitizerRule{ + Element: elements[index], + AllowAttr: allowAttrs[index], + Regexp: compiled, + } + ExternalSanitizerRules = append(ExternalSanitizerRules, rule) + } +} + +func newMarkupRenderer(name string, sec *ini.Section) { + extensionReg := regexp.MustCompile(`\.\w`) + + extensions := sec.Key("FILE_EXTENSIONS").Strings(",") + var exts = make([]string, 0, len(extensions)) + for _, extension := range extensions { + if !extensionReg.MatchString(extension) { + log.Warn(sec.Name() + " file extension " + extension + " is invalid. Extension ignored") + } else { + exts = append(exts, extension) + } + } + + if len(exts) == 0 { + log.Warn(sec.Name() + " file extension is empty, markup " + name + " ignored") + return } + + command := sec.Key("RENDER_COMMAND").MustString("") + if command == "" { + log.Warn(" RENDER_COMMAND is empty, markup " + name + " ignored") + return + } + + ExternalMarkupParsers = append(ExternalMarkupParsers, MarkupParser{ + Enabled: sec.Key("ENABLED").MustBool(false), + MarkupName: name, + FileExtensions: exts, + Command: command, + IsInputFile: sec.Key("IS_INPUT_FILE").MustBool(false), + }) } |