summaryrefslogtreecommitdiffstats
path: root/modules
diff options
context:
space:
mode:
authorKN4CK3R <admin@oldschoolhack.me>2021-06-23 23:09:51 +0200
committerGitHub <noreply@github.com>2021-06-23 17:09:51 -0400
commitc9c7afda1a80bda7b61ded222163db796132b78f (patch)
tree2145a5afe7c7a19d450b7def199dc302e1c8d6f2 /modules
parenteb324a9402878a13029116bafa8ccce527796522 (diff)
downloadgitea-c9c7afda1a80bda7b61ded222163db796132b78f.tar.gz
gitea-c9c7afda1a80bda7b61ded222163db796132b78f.zip
Add sanitizer rules per renderer (#16110)
* Added sanitizer rules per renderer. * Updated documentation. Co-authored-by: techknowlogick <techknowlogick@gitea.io>
Diffstat (limited to 'modules')
-rw-r--r--modules/markup/csv/csv.go10
-rw-r--r--modules/markup/external/external.go7
-rw-r--r--modules/markup/html_test.go4
-rw-r--r--modules/markup/markdown/markdown.go9
-rw-r--r--modules/markup/orgmode/orgmode.go6
-rw-r--r--modules/markup/renderer.go56
-rw-r--r--modules/markup/sanitizer.go88
-rw-r--r--modules/setting/markup.go103
8 files changed, 173 insertions, 110 deletions
diff --git a/modules/markup/csv/csv.go b/modules/markup/csv/csv.go
index 6572b0ee1e..8a4df89511 100644
--- a/modules/markup/csv/csv.go
+++ b/modules/markup/csv/csv.go
@@ -10,6 +10,7 @@ import (
"html"
"io"
"io/ioutil"
+ "regexp"
"strconv"
"code.gitea.io/gitea/modules/csv"
@@ -38,6 +39,15 @@ func (Renderer) Extensions() []string {
return []string{".csv", ".tsv"}
}
+// SanitizerRules implements markup.Renderer
+func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
+ return []setting.MarkupSanitizerRule{
+ {Element: "table", AllowAttr: "class", Regexp: regexp.MustCompile(`data-table`)},
+ {Element: "th", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)},
+ {Element: "td", AllowAttr: "class", Regexp: regexp.MustCompile(`line-num`)},
+ }
+}
+
func writeField(w io.Writer, element, class, field string) error {
if _, err := io.WriteString(w, "<"); err != nil {
return err
diff --git a/modules/markup/external/external.go b/modules/markup/external/external.go
index 62814c9914..c849f505e7 100644
--- a/modules/markup/external/external.go
+++ b/modules/markup/external/external.go
@@ -30,7 +30,7 @@ func RegisterRenderers() {
// Renderer implements markup.Renderer for external tools
type Renderer struct {
- setting.MarkupRenderer
+ *setting.MarkupRenderer
}
// Name returns the external tool name
@@ -48,6 +48,11 @@ func (p *Renderer) Extensions() []string {
return p.FileExtensions
}
+// SanitizerRules implements markup.Renderer
+func (p *Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
+ return p.MarkupSanitizerRules
+}
+
func envMark(envName string) string {
if runtime.GOOS == "windows" {
return "%" + envName + "%"
diff --git a/modules/markup/html_test.go b/modules/markup/html_test.go
index 4cdd5798c8..8c3d2b5395 100644
--- a/modules/markup/html_test.go
+++ b/modules/markup/html_test.go
@@ -112,7 +112,7 @@ func TestRender_links(t *testing.T) {
defaultCustom := setting.Markdown.CustomURLSchemes
setting.Markdown.CustomURLSchemes = []string{"ftp", "magnet"}
- ReplaceSanitizer()
+ InitializeSanitizer()
CustomLinkURLSchemes(setting.Markdown.CustomURLSchemes)
test(
@@ -192,7 +192,7 @@ func TestRender_links(t *testing.T) {
// Restore previous settings
setting.Markdown.CustomURLSchemes = defaultCustom
- ReplaceSanitizer()
+ InitializeSanitizer()
CustomLinkURLSchemes(setting.Markdown.CustomURLSchemes)
}
diff --git a/modules/markup/markdown/markdown.go b/modules/markup/markdown/markdown.go
index 87fae2a23b..cac2a180fa 100644
--- a/modules/markup/markdown/markdown.go
+++ b/modules/markup/markdown/markdown.go
@@ -199,7 +199,7 @@ func actualRender(ctx *markup.RenderContext, input io.Reader, output io.Writer)
}
_ = lw.Close()
}()
- buf := markup.SanitizeReader(rd)
+ buf := markup.SanitizeReader(rd, "")
_, err := io.Copy(output, buf)
return err
}
@@ -215,7 +215,7 @@ func render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error
if log.IsDebug() {
log.Debug("Panic in markdown: %v\n%s", err, string(log.Stack(2)))
}
- ret := markup.SanitizeReader(input)
+ ret := markup.SanitizeReader(input, "")
_, err = io.Copy(output, ret)
if err != nil {
log.Error("SanitizeReader failed: %v", err)
@@ -249,6 +249,11 @@ func (Renderer) Extensions() []string {
return setting.Markdown.FileExtensions
}
+// SanitizerRules implements markup.Renderer
+func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
+ return []setting.MarkupSanitizerRule{}
+}
+
// Render implements markup.Renderer
func (Renderer) Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error {
return render(ctx, input, output)
diff --git a/modules/markup/orgmode/orgmode.go b/modules/markup/orgmode/orgmode.go
index 851fc97f9a..7e9f1f45c5 100644
--- a/modules/markup/orgmode/orgmode.go
+++ b/modules/markup/orgmode/orgmode.go
@@ -13,6 +13,7 @@ import (
"code.gitea.io/gitea/modules/highlight"
"code.gitea.io/gitea/modules/markup"
+ "code.gitea.io/gitea/modules/setting"
"code.gitea.io/gitea/modules/util"
"github.com/alecthomas/chroma"
@@ -41,6 +42,11 @@ func (Renderer) Extensions() []string {
return []string{".org"}
}
+// SanitizerRules implements markup.Renderer
+func (Renderer) SanitizerRules() []setting.MarkupSanitizerRule {
+ return []setting.MarkupSanitizerRule{}
+}
+
// Render renders orgmode rawbytes to HTML
func Render(ctx *markup.RenderContext, input io.Reader, output io.Writer) error {
htmlWriter := org.NewHTMLWriter()
diff --git a/modules/markup/renderer.go b/modules/markup/renderer.go
index d60c8ad710..04619caee3 100644
--- a/modules/markup/renderer.go
+++ b/modules/markup/renderer.go
@@ -81,6 +81,7 @@ type Renderer interface {
Name() string // markup format name
Extensions() []string
NeedPostProcess() bool
+ SanitizerRules() []setting.MarkupSanitizerRule
Render(ctx *RenderContext, input io.Reader, output io.Writer) error
}
@@ -136,37 +137,32 @@ func render(ctx *RenderContext, renderer Renderer, input io.Reader, output io.Wr
_ = pw.Close()
}()
- if renderer.NeedPostProcess() {
- pr2, pw2 := io.Pipe()
- defer func() {
- _ = pr2.Close()
- _ = pw2.Close()
- }()
-
- wg.Add(1)
- go func() {
- buf := SanitizeReader(pr2)
- _, err = io.Copy(output, buf)
- _ = pr2.Close()
- wg.Done()
- }()
-
- wg.Add(1)
- go func() {
+ pr2, pw2 := io.Pipe()
+ defer func() {
+ _ = pr2.Close()
+ _ = pw2.Close()
+ }()
+
+ wg.Add(1)
+ go func() {
+ buf := SanitizeReader(pr2, renderer.Name())
+ _, err = io.Copy(output, buf)
+ _ = pr2.Close()
+ wg.Done()
+ }()
+
+ wg.Add(1)
+ go func() {
+ if renderer.NeedPostProcess() {
err = PostProcess(ctx, pr, pw2)
- _ = pr.Close()
- _ = pw2.Close()
- wg.Done()
- }()
- } else {
- wg.Add(1)
- go func() {
- buf := SanitizeReader(pr)
- _, err = io.Copy(output, buf)
- _ = pr.Close()
- wg.Done()
- }()
- }
+ } else {
+ _, err = io.Copy(pw2, pr)
+ }
+ _ = pr.Close()
+ _ = pw2.Close()
+ wg.Done()
+ }()
+
if err1 := renderer.Render(ctx, input, pw); err1 != nil {
return err1
}
diff --git a/modules/markup/sanitizer.go b/modules/markup/sanitizer.go
index 5611bd06ad..9342d65de5 100644
--- a/modules/markup/sanitizer.go
+++ b/modules/markup/sanitizer.go
@@ -19,8 +19,9 @@ import (
// Sanitizer is a protection wrapper of *bluemonday.Policy which does not allow
// any modification to the underlying policies once it's been created.
type Sanitizer struct {
- policy *bluemonday.Policy
- init sync.Once
+ defaultPolicy *bluemonday.Policy
+ rendererPolicies map[string]*bluemonday.Policy
+ init sync.Once
}
var sanitizer = &Sanitizer{}
@@ -30,47 +31,57 @@ var sanitizer = &Sanitizer{}
// entire application lifecycle.
func NewSanitizer() {
sanitizer.init.Do(func() {
- ReplaceSanitizer()
+ InitializeSanitizer()
})
}
-// ReplaceSanitizer replaces the current sanitizer to account for changes in settings
-func ReplaceSanitizer() {
- sanitizer.policy = bluemonday.UGCPolicy()
+// InitializeSanitizer (re)initializes the current sanitizer to account for changes in settings
+func InitializeSanitizer() {
+ sanitizer.rendererPolicies = map[string]*bluemonday.Policy{}
+ sanitizer.defaultPolicy = createDefaultPolicy()
+
+ for name, renderer := range renderers {
+ sanitizerRules := renderer.SanitizerRules()
+ if len(sanitizerRules) > 0 {
+ policy := createDefaultPolicy()
+ addSanitizerRules(policy, sanitizerRules)
+ sanitizer.rendererPolicies[name] = policy
+ }
+ }
+}
+
+func createDefaultPolicy() *bluemonday.Policy {
+ policy := bluemonday.UGCPolicy()
// For Chroma markdown plugin
- sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^is-loading$`)).OnElements("pre")
- sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(chroma )?language-[\w-]+$`)).OnElements("code")
+ policy.AllowAttrs("class").Matching(regexp.MustCompile(`^is-loading$`)).OnElements("pre")
+ policy.AllowAttrs("class").Matching(regexp.MustCompile(`^(chroma )?language-[\w-]+$`)).OnElements("code")
// Checkboxes
- sanitizer.policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input")
- sanitizer.policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input")
+ policy.AllowAttrs("type").Matching(regexp.MustCompile(`^checkbox$`)).OnElements("input")
+ policy.AllowAttrs("checked", "disabled", "data-source-position").OnElements("input")
// Custom URL-Schemes
if len(setting.Markdown.CustomURLSchemes) > 0 {
- sanitizer.policy.AllowURLSchemes(setting.Markdown.CustomURLSchemes...)
+ policy.AllowURLSchemes(setting.Markdown.CustomURLSchemes...)
}
// Allow classes for anchors
- sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`ref-issue`)).OnElements("a")
+ policy.AllowAttrs("class").Matching(regexp.MustCompile(`ref-issue`)).OnElements("a")
// Allow classes for task lists
- sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`task-list-item`)).OnElements("li")
+ policy.AllowAttrs("class").Matching(regexp.MustCompile(`task-list-item`)).OnElements("li")
// Allow icons
- sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^icon(\s+[\p{L}\p{N}_-]+)+$`)).OnElements("i")
+ policy.AllowAttrs("class").Matching(regexp.MustCompile(`^icon(\s+[\p{L}\p{N}_-]+)+$`)).OnElements("i")
// Allow unlabelled labels
- sanitizer.policy.AllowNoAttrs().OnElements("label")
+ policy.AllowNoAttrs().OnElements("label")
// Allow classes for emojis
- sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`emoji`)).OnElements("img")
+ policy.AllowAttrs("class").Matching(regexp.MustCompile(`emoji`)).OnElements("img")
// Allow icons, emojis, chroma syntax and keyword markup on span
- sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji))$|^([a-z][a-z0-9]{0,2})$|^` + keywordClass + `$`)).OnElements("span")
-
- // Allow data tables
- sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`data-table`)).OnElements("table")
- sanitizer.policy.AllowAttrs("class").Matching(regexp.MustCompile(`line-num`)).OnElements("th", "td")
+ policy.AllowAttrs("class").Matching(regexp.MustCompile(`^((icon(\s+[\p{L}\p{N}_-]+)+)|(emoji))$|^([a-z][a-z0-9]{0,2})$|^` + keywordClass + `$`)).OnElements("span")
// Allow generally safe attributes
generalSafeAttrs := []string{"abbr", "accept", "accept-charset",
@@ -101,18 +112,29 @@ func ReplaceSanitizer() {
"abbr", "bdo", "cite", "dfn", "mark", "small", "span", "time", "wbr",
}
- sanitizer.policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...)
+ policy.AllowAttrs(generalSafeAttrs...).OnElements(generalSafeElements...)
- sanitizer.policy.AllowAttrs("itemscope", "itemtype").OnElements("div")
+ policy.AllowAttrs("itemscope", "itemtype").OnElements("div")
// FIXME: Need to handle longdesc in img but there is no easy way to do it
// Custom keyword markup
- for _, rule := range setting.ExternalSanitizerRules {
- if rule.Regexp != nil {
- sanitizer.policy.AllowAttrs(rule.AllowAttr).Matching(rule.Regexp).OnElements(rule.Element)
- } else {
- sanitizer.policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element)
+ addSanitizerRules(policy, setting.ExternalSanitizerRules)
+
+ return policy
+}
+
+func addSanitizerRules(policy *bluemonday.Policy, rules []setting.MarkupSanitizerRule) {
+ for _, rule := range rules {
+ if rule.AllowDataURIImages {
+ policy.AllowDataURIImages()
+ }
+ if rule.Element != "" {
+ if rule.Regexp != nil {
+ policy.AllowAttrs(rule.AllowAttr).Matching(rule.Regexp).OnElements(rule.Element)
+ } else {
+ policy.AllowAttrs(rule.AllowAttr).OnElements(rule.Element)
+ }
}
}
}
@@ -120,11 +142,15 @@ func ReplaceSanitizer() {
// Sanitize takes a string that contains a HTML fragment or document and applies policy whitelist.
func Sanitize(s string) string {
NewSanitizer()
- return sanitizer.policy.Sanitize(s)
+ return sanitizer.defaultPolicy.Sanitize(s)
}
// SanitizeReader sanitizes a Reader
-func SanitizeReader(r io.Reader) *bytes.Buffer {
+func SanitizeReader(r io.Reader, renderer string) *bytes.Buffer {
NewSanitizer()
- return sanitizer.policy.SanitizeReader(r)
+ policy, exist := sanitizer.rendererPolicies[renderer]
+ if !exist {
+ policy = sanitizer.defaultPolicy
+ }
+ return policy.SanitizeReader(r)
}
diff --git a/modules/setting/markup.go b/modules/setting/markup.go
index 43df4ce442..31ec1dd2eb 100644
--- a/modules/setting/markup.go
+++ b/modules/setting/markup.go
@@ -15,31 +15,34 @@ import (
// ExternalMarkupRenderers represents the external markup renderers
var (
- ExternalMarkupRenderers []MarkupRenderer
+ ExternalMarkupRenderers []*MarkupRenderer
ExternalSanitizerRules []MarkupSanitizerRule
)
// MarkupRenderer defines the external parser configured in ini
type MarkupRenderer struct {
- Enabled bool
- MarkupName string
- Command string
- FileExtensions []string
- IsInputFile bool
- NeedPostProcess bool
+ Enabled bool
+ MarkupName string
+ Command string
+ FileExtensions []string
+ IsInputFile bool
+ NeedPostProcess bool
+ MarkupSanitizerRules []MarkupSanitizerRule
}
// MarkupSanitizerRule defines the policy for whitelisting attributes on
// certain elements.
type MarkupSanitizerRule struct {
- Element string
- AllowAttr string
- Regexp *regexp.Regexp
+ Element string
+ AllowAttr string
+ Regexp *regexp.Regexp
+ AllowDataURIImages bool
}
func newMarkup() {
- ExternalMarkupRenderers = make([]MarkupRenderer, 0, 10)
+ ExternalMarkupRenderers = make([]*MarkupRenderer, 0, 10)
ExternalSanitizerRules = make([]MarkupSanitizerRule, 0, 10)
+
for _, sec := range Cfg.Section("markup").ChildSections() {
name := strings.TrimPrefix(sec.Name(), "markup.")
if name == "" {
@@ -56,50 +59,62 @@ func newMarkup() {
}
func newMarkupSanitizer(name string, sec *ini.Section) {
- haveElement := sec.HasKey("ELEMENT")
- haveAttr := sec.HasKey("ALLOW_ATTR")
- haveRegexp := sec.HasKey("REGEXP")
-
- if !haveElement && !haveAttr && !haveRegexp {
- log.Warn("Skipping empty section: markup.%s.", name)
- return
+ rule, ok := createMarkupSanitizerRule(name, sec)
+ if ok {
+ if strings.HasPrefix(name, "sanitizer.") {
+ names := strings.SplitN(strings.TrimPrefix(name, "sanitizer."), ".", 2)
+ name = names[0]
+ }
+ for _, renderer := range ExternalMarkupRenderers {
+ if name == renderer.MarkupName {
+ renderer.MarkupSanitizerRules = append(renderer.MarkupSanitizerRules, rule)
+ return
+ }
+ }
+ ExternalSanitizerRules = append(ExternalSanitizerRules, rule)
}
+}
- if !haveElement || !haveAttr || !haveRegexp {
- log.Error("Missing required keys from markup.%s. Must have all three of ELEMENT, ALLOW_ATTR, and REGEXP defined!", name)
- return
+func createMarkupSanitizerRule(name string, sec *ini.Section) (MarkupSanitizerRule, bool) {
+ var rule MarkupSanitizerRule
+
+ ok := false
+ if sec.HasKey("ALLOW_DATA_URI_IMAGES") {
+ rule.AllowDataURIImages = sec.Key("ALLOW_DATA_URI_IMAGES").MustBool(false)
+ ok = true
}
- elements := sec.Key("ELEMENT").Value()
- allowAttrs := sec.Key("ALLOW_ATTR").Value()
- regexpStr := sec.Key("REGEXP").Value()
+ if sec.HasKey("ELEMENT") || sec.HasKey("ALLOW_ATTR") {
+ rule.Element = sec.Key("ELEMENT").Value()
+ rule.AllowAttr = sec.Key("ALLOW_ATTR").Value()
- if regexpStr == "" {
- rule := MarkupSanitizerRule{
- Element: elements,
- AllowAttr: allowAttrs,
- Regexp: nil,
+ if rule.Element == "" || rule.AllowAttr == "" {
+ log.Error("Missing required values from markup.%s. Must have ELEMENT and ALLOW_ATTR defined!", name)
+ return rule, false
}
- ExternalSanitizerRules = append(ExternalSanitizerRules, rule)
- return
- }
+ regexpStr := sec.Key("REGEXP").Value()
+ if regexpStr != "" {
+ // Validate when parsing the config that this is a valid regular
+ // expression. Then we can use regexp.MustCompile(...) later.
+ compiled, err := regexp.Compile(regexpStr)
+ if err != nil {
+ log.Error("In markup.%s: REGEXP (%s) failed to compile: %v", name, regexpStr, err)
+ return rule, false
+ }
+
+ rule.Regexp = compiled
+ }
- // Validate when parsing the config that this is a valid regular
- // expression. Then we can use regexp.MustCompile(...) later.
- compiled, err := regexp.Compile(regexpStr)
- if err != nil {
- log.Error("In module.%s: REGEXP (%s) at definition %d failed to compile: %v", regexpStr, name, err)
- return
+ ok = true
}
- rule := MarkupSanitizerRule{
- Element: elements,
- AllowAttr: allowAttrs,
- Regexp: compiled,
+ if !ok {
+ log.Error("Missing required keys from markup.%s. Must have ELEMENT and ALLOW_ATTR or ALLOW_DATA_URI_IMAGES defined!", name)
+ return rule, false
}
- ExternalSanitizerRules = append(ExternalSanitizerRules, rule)
+ return rule, true
}
func newMarkupRenderer(name string, sec *ini.Section) {
@@ -126,7 +141,7 @@ func newMarkupRenderer(name string, sec *ini.Section) {
return
}
- ExternalMarkupRenderers = append(ExternalMarkupRenderers, MarkupRenderer{
+ ExternalMarkupRenderers = append(ExternalMarkupRenderers, &MarkupRenderer{
Enabled: sec.Key("ENABLED").MustBool(false),
MarkupName: name,
FileExtensions: exts,