summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorAlexander Scheel <alexander.m.scheel@gmail.com>2019-12-07 14:49:04 -0500
committertechknowlogick <techknowlogick@gitea.io>2019-12-07 14:49:04 -0500
commitee7df7ba8c5e6a4b32b0c4048d2b535d8df3cbe9 (patch)
tree73229ccd7b291bc1c48fa2aed78cdf1dd7100b6f
parentcecc31951c1b12864e13a2dd148a5e96c74d9a5c (diff)
downloadgitea-ee7df7ba8c5e6a4b32b0c4048d2b535d8df3cbe9.tar.gz
gitea-ee7df7ba8c5e6a4b32b0c4048d2b535d8df3cbe9.zip
Markdown: Sanitizier Configuration (#9075)
* Support custom sanitization policy Allowing the gitea administrator to configure sanitization policy allows them to couple external renders and custom templates to support more markup. In particular, the `pandoc` renderer allows generating KaTeX annotations, wrapping them in `<span>` elements with class `math` and either `inline` or `display` (depending on whether or not inline or block mode was requested). This iteration gives the administrator whitelisting powers; carefully crafted regexes will thus let through only the desired attributes necessary to support their custom markup. Resolves: #9054 Signed-off-by: Alexander Scheel <alexander.m.scheel@gmail.com> * Document new sanitization configuration - Adds basic documentation to app.ini.sample, - Adds an example to the Configuration Cheat Sheet, and - Adds extended information to External Renderers section. Signed-off-by: Alexander Scheel <alexander.m.scheel@gmail.com> * Drop extraneous length check in newMarkupSanitizer(...) Signed-off-by: Alexander Scheel <alexander.m.scheel@gmail.com> * Fix plural ELEMENT and ALLOW_ATTR in docs These were left over from their initial names. Make them singular to conform with the current expectations. Signed-off-by: Alexander Scheel <alexander.m.scheel@gmail.com>
-rw-r--r--custom/conf/app.ini.sample6
-rw-r--r--docs/content/doc/advanced/config-cheat-sheet.en-us.md18
-rw-r--r--docs/content/doc/advanced/external-renderers.en-us.md18
-rw-r--r--modules/markup/sanitizer.go9
-rw-r--r--modules/setting/markup.go119
5 files changed, 148 insertions, 22 deletions
diff --git a/custom/conf/app.ini.sample b/custom/conf/app.ini.sample
index 8d11cfc293..050a0db730 100644
--- a/custom/conf/app.ini.sample
+++ b/custom/conf/app.ini.sample
@@ -877,6 +877,12 @@ SHOW_FOOTER_VERSION = true
; Show template execution time in the footer
SHOW_FOOTER_TEMPLATE_LOAD_TIME = true
+[markup.sanitizer]
+; The following keys can be used multiple times to define sanitation policy rules.
+;ELEMENT = span
+;ALLOW_ATTR = class
+;REGEXP = ^(info|warning|error)$
+
[markup.asciidoc]
ENABLED = false
; List of file extensions that should be rendered by an external command
diff --git a/docs/content/doc/advanced/config-cheat-sheet.en-us.md b/docs/content/doc/advanced/config-cheat-sheet.en-us.md
index 9f02e888cf..0d7a641b19 100644
--- a/docs/content/doc/advanced/config-cheat-sheet.en-us.md
+++ b/docs/content/doc/advanced/config-cheat-sheet.en-us.md
@@ -578,6 +578,24 @@ Two special environment variables are passed to the render command:
- `GITEA_PREFIX_SRC`, which contains the current URL prefix in the `src` path tree. To be used as prefix for links.
- `GITEA_PREFIX_RAW`, which contains the current URL prefix in the `raw` path tree. To be used as prefix for image paths.
+
+Gitea supports customizing the sanitization policy for rendered HTML. The example below will support KaTeX output from pandoc.
+
+```ini
+[markup.sanitizer]
+; Pandoc renders TeX segments as <span>s with the "math" class, optionally
+; with "inline" or "display" classes depending on context.
+ELEMENT = span
+ALLOW_ATTR = class
+REGEXP = ^\s*((math(\s+|$)|inline(\s+|$)|display(\s+|$)))+
+```
+
+ - `ELEMENT`: The element this policy applies to. Must be non-empty.
+ - `ALLOW_ATTR`: The attribute this policy allows. Must be non-empty.
+ - `REGEXP`: A regex to match the contents of the attribute against. Must be present but may be empty for unconditional whitelisting of this attribute.
+
+You may redefine `ELEMENT`, `ALLOW_ATTR`, and `REGEXP` multiple times; each time all three are defined is a single policy entry.
+
## Time (`time`)
- `FORMAT`: Time format to diplay on UI. i.e. RFC1123 or 2006-01-02 15:04:05
diff --git a/docs/content/doc/advanced/external-renderers.en-us.md b/docs/content/doc/advanced/external-renderers.en-us.md
index a14f344e63..ec1ee63fb6 100644
--- a/docs/content/doc/advanced/external-renderers.en-us.md
+++ b/docs/content/doc/advanced/external-renderers.en-us.md
@@ -68,4 +68,22 @@ RENDER_COMMAND = rst2html.py
IS_INPUT_FILE = false
```
+If your external markup relies on additional classes and attributes on the generated HTML elements, you might need to enable custom sanitizer policies. Gitea uses the [`bluemonday`](https://godoc.org/github.com/microcosm-cc/bluemonday) package as our HTML sanitizier. The example below will support [KaTeX](https://katex.org/) output from [`pandoc`](https://pandoc.org/).
+
+```ini
+[markup.sanitizer]
+; Pandoc renders TeX segments as <span>s with the "math" class, optionally
+; with "inline" or "display" classes depending on context.
+ELEMENT = span
+ALLOW_ATTR = class
+REGEXP = ^\s*((math(\s+|$)|inline(\s+|$)|display(\s+|$)))+
+
+[markup.markdown]
+ENABLED = true
+FILE_EXTENSIONS = .md,.markdown
+RENDER_COMMAND = pandoc -f markdown -t html --katex
+```
+
+You may redefine `ELEMENT`, `ALLOW_ATTR`, and `REGEXP` multiple times; each time all three are defined is a single policy entry. All three must be defined, but `REGEXP` may be blank to allow unconditional whitelisting of that attribute.
+
Once your configuration changes have been made, restart Gitea to have changes take effect.
diff --git a/modules/markup/sanitizer.go b/modules/markup/sanitizer.go
index 0ebb3ff88b..f7789a9e56 100644
--- a/modules/markup/sanitizer.go
+++ b/modules/markup/sanitizer.go
@@ -50,6 +50,15 @@ func ReplaceSanitizer() {
// Allow <kbd> tags for keyboard shortcut styling
sanitizer.policy.AllowElements("kbd")
+
+ // Custom keyword markup
+ for _, rule := range setting.ExternalSanitizerRules {
+ if rule.Regexp != nil {
+ sanitizer.policy.AllowAttrs(rule.AllowAttr).Matching(rule.Regexp).OnElements(rule.Element)
+ } else {
+ sanitizer.policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element)
+ }
+ }
}
// Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist.
diff --git a/modules/setting/markup.go b/modules/setting/markup.go
index 41f3cdd3a1..75e6d651bd 100644
--- a/modules/setting/markup.go
+++ b/modules/setting/markup.go
@@ -9,11 +9,14 @@ import (
"strings"
"code.gitea.io/gitea/modules/log"
+
+ "gopkg.in/ini.v1"
)
// ExternalMarkupParsers represents the external markup parsers
var (
- ExternalMarkupParsers []MarkupParser
+ ExternalMarkupParsers []MarkupParser
+ ExternalSanitizerRules []MarkupSanitizerRule
)
// MarkupParser defines the external parser configured in ini
@@ -25,8 +28,15 @@ type MarkupParser struct {
IsInputFile bool
}
+// MarkupSanitizerRule defines the policy for whitelisting attributes on
+// certain elements.
+type MarkupSanitizerRule struct {
+ Element string
+ AllowAttr string
+ Regexp *regexp.Regexp
+}
+
func newMarkup() {
- extensionReg := regexp.MustCompile(`\.\w`)
for _, sec := range Cfg.Section("markup").ChildSections() {
name := strings.TrimPrefix(sec.Name(), "markup.")
if name == "" {
@@ -34,33 +44,98 @@ func newMarkup() {
continue
}
- extensions := sec.Key("FILE_EXTENSIONS").Strings(",")
- var exts = make([]string, 0, len(extensions))
- for _, extension := range extensions {
- if !extensionReg.MatchString(extension) {
- log.Warn(sec.Name() + " file extension " + extension + " is invalid. Extension ignored")
- } else {
- exts = append(exts, extension)
- }
+ if name == "sanitizer" {
+ newMarkupSanitizer(name, sec)
+ } else {
+ newMarkupRenderer(name, sec)
}
+ }
+}
+
+func newMarkupSanitizer(name string, sec *ini.Section) {
+ haveElement := sec.HasKey("ELEMENT")
+ haveAttr := sec.HasKey("ALLOW_ATTR")
+ haveRegexp := sec.HasKey("REGEXP")
+
+ if !haveElement && !haveAttr && !haveRegexp {
+ log.Warn("Skipping empty section: markup.%s.", name)
+ return
+ }
+
+ if !haveElement || !haveAttr || !haveRegexp {
+ log.Error("Missing required keys from markup.%s. Must have all three of ELEMENT, ALLOW_ATTR, and REGEXP defined!", name)
+ return
+ }
+
+ elements := sec.Key("ELEMENT").ValueWithShadows()
+ allowAttrs := sec.Key("ALLOW_ATTR").ValueWithShadows()
+ regexps := sec.Key("REGEXP").ValueWithShadows()
+
+ if len(elements) != len(allowAttrs) ||
+ len(elements) != len(regexps) {
+ log.Error("All three keys in markup.%s (ELEMENT, ALLOW_ATTR, REGEXP) must be defined the same number of times! Got %d, %d, and %d respectively.", name, len(elements), len(allowAttrs), len(regexps))
+ return
+ }
- if len(exts) == 0 {
- log.Warn(sec.Name() + " file extension is empty, markup " + name + " ignored")
+ ExternalSanitizerRules = make([]MarkupSanitizerRule, 0, len(elements))
+
+ for index, pattern := range regexps {
+ if pattern == "" {
+ rule := MarkupSanitizerRule{
+ Element: elements[index],
+ AllowAttr: allowAttrs[index],
+ Regexp: nil,
+ }
+ ExternalSanitizerRules = append(ExternalSanitizerRules, rule)
continue
}
- command := sec.Key("RENDER_COMMAND").MustString("")
- if command == "" {
- log.Warn(" RENDER_COMMAND is empty, markup " + name + " ignored")
+ // Validate when parsing the config that this is a valid regular
+ // expression. Then we can use regexp.MustCompile(...) later.
+ compiled, err := regexp.Compile(pattern)
+ if err != nil {
+ log.Error("In module.%s: REGEXP at definition %d failed to compile: %v", name, index+1, err)
continue
}
- ExternalMarkupParsers = append(ExternalMarkupParsers, MarkupParser{
- Enabled: sec.Key("ENABLED").MustBool(false),
- MarkupName: name,
- FileExtensions: exts,
- Command: command,
- IsInputFile: sec.Key("IS_INPUT_FILE").MustBool(false),
- })
+ rule := MarkupSanitizerRule{
+ Element: elements[index],
+ AllowAttr: allowAttrs[index],
+ Regexp: compiled,
+ }
+ ExternalSanitizerRules = append(ExternalSanitizerRules, rule)
+ }
+}
+
+func newMarkupRenderer(name string, sec *ini.Section) {
+ extensionReg := regexp.MustCompile(`\.\w`)
+
+ extensions := sec.Key("FILE_EXTENSIONS").Strings(",")
+ var exts = make([]string, 0, len(extensions))
+ for _, extension := range extensions {
+ if !extensionReg.MatchString(extension) {
+ log.Warn(sec.Name() + " file extension " + extension + " is invalid. Extension ignored")
+ } else {
+ exts = append(exts, extension)
+ }
+ }
+
+ if len(exts) == 0 {
+ log.Warn(sec.Name() + " file extension is empty, markup " + name + " ignored")
+ return
}
+
+ command := sec.Key("RENDER_COMMAND").MustString("")
+ if command == "" {
+ log.Warn(" RENDER_COMMAND is empty, markup " + name + " ignored")
+ return
+ }
+
+ ExternalMarkupParsers = append(ExternalMarkupParsers, MarkupParser{
+ Enabled: sec.Key("ENABLED").MustBool(false),
+ MarkupName: name,
+ FileExtensions: exts,
+ Command: command,
+ IsInputFile: sec.Key("IS_INPUT_FILE").MustBool(false),
+ })
}