summaryrefslogtreecommitdiffstats
path: root/vendor/github.com/microcosm-cc/bluemonday/sanitize.go
diff options
context:
space:
mode:
author6543 <6543@obermui.de>2021-07-04 04:06:10 +0200
committerGitHub <noreply@github.com>2021-07-04 04:06:10 +0200
commitfae07cbc8fece383c88ed7b13474a94133c4accf (patch)
tree65e3279dc5655d22302c9b79c48ecd3d1a06ffcd /vendor/github.com/microcosm-cc/bluemonday/sanitize.go
parent65ae46bc20f60534ba2590a106a6c86aaa1ecae0 (diff)
downloadgitea-fae07cbc8fece383c88ed7b13474a94133c4accf.tar.gz
gitea-fae07cbc8fece383c88ed7b13474a94133c4accf.zip
Update Vendor (#16325)
* Add Dependencie Update Script * update gitea.com/lunny/levelqueue * update github.com/PuerkitoBio/goquery * update github.com/alecthomas/chroma * update github.com/blevesearch/bleve/v2 * update github.com/caddyserver/certmagic * update github.com/go-enry/go-enry/v2 * update github.com/go-redis/redis/v8 * update github.com/hashicorp/golang-lru * update github.com/klauspost/compress * update github.com/markbates/goth * update github.com/mholt/archiver/v3 * update github.com/microcosm-cc/bluemonday * update github.com/minio/minio-go/v7 * update github.com/olivere/elastic/v7 * update github.com/xanzy/go-gitlab * update github.com/yuin/goldmark
Diffstat (limited to 'vendor/github.com/microcosm-cc/bluemonday/sanitize.go')
-rw-r--r--vendor/github.com/microcosm-cc/bluemonday/sanitize.go296
1 files changed, 197 insertions, 99 deletions
diff --git a/vendor/github.com/microcosm-cc/bluemonday/sanitize.go b/vendor/github.com/microcosm-cc/bluemonday/sanitize.go
index b462f0990a..9bb87a6879 100644
--- a/vendor/github.com/microcosm-cc/bluemonday/sanitize.go
+++ b/vendor/github.com/microcosm-cc/bluemonday/sanitize.go
@@ -31,6 +31,7 @@ package bluemonday
import (
"bytes"
+ "fmt"
"io"
"net/url"
"regexp"
@@ -47,10 +48,11 @@ var (
dataAttributeXMLPrefix = regexp.MustCompile("^xml.+")
dataAttributeInvalidChars = regexp.MustCompile("[A-Z;]+")
cssUnicodeChar = regexp.MustCompile(`\\[0-9a-f]{1,6} ?`)
+ dataURIbase64Prefix = regexp.MustCompile(`^data:[^,]*;base64,`)
)
// Sanitize takes a string that contains a HTML fragment or document and applies
-// the given policy whitelist.
+// the given policy allowlist.
//
// It returns a HTML string that has been sanitized by the policy or an empty
// string if an error has occurred (most likely as a consequence of extremely
@@ -60,11 +62,11 @@ func (p *Policy) Sanitize(s string) string {
return s
}
- return p.sanitize(strings.NewReader(s)).String()
+ return p.sanitizeWithBuff(strings.NewReader(s)).String()
}
// SanitizeBytes takes a []byte that contains a HTML fragment or document and applies
-// the given policy whitelist.
+// the given policy allowlist.
//
// It returns a []byte containing the HTML that has been sanitized by the policy
// or an empty []byte if an error has occurred (most likely as a consequence of
@@ -74,26 +76,32 @@ func (p *Policy) SanitizeBytes(b []byte) []byte {
return b
}
- return p.sanitize(bytes.NewReader(b)).Bytes()
+ return p.sanitizeWithBuff(bytes.NewReader(b)).Bytes()
}
// SanitizeReader takes an io.Reader that contains a HTML fragment or document
-// and applies the given policy whitelist.
+// and applies the given policy allowlist.
//
// It returns a bytes.Buffer containing the HTML that has been sanitized by the
// policy. Errors during sanitization will merely return an empty result.
func (p *Policy) SanitizeReader(r io.Reader) *bytes.Buffer {
- return p.sanitize(r)
+ return p.sanitizeWithBuff(r)
+}
+
+// SanitizeReaderToWriter takes an io.Reader that contains a HTML fragment or document
+// and applies the given policy allowlist and writes to the provided writer returning
+// an error if there is one.
+func (p *Policy) SanitizeReaderToWriter(r io.Reader, w io.Writer) error {
+ return p.sanitize(r, w)
}
const escapedURLChars = "'<>\"\r"
-func escapeUrlComponent(val string) string {
- w := bytes.NewBufferString("")
+func escapeUrlComponent(w stringWriterWriter, val string) error {
i := strings.IndexAny(val, escapedURLChars)
for i != -1 {
if _, err := w.WriteString(val[:i]); err != nil {
- return w.String()
+ return err
}
var esc string
switch val[i] {
@@ -114,12 +122,12 @@ func escapeUrlComponent(val string) string {
}
val = val[i+1:]
if _, err := w.WriteString(esc); err != nil {
- return w.String()
+ return err
}
i = strings.IndexAny(val, escapedURLChars)
}
- w.WriteString(val)
- return w.String()
+ _, err := w.WriteString(val)
+ return err
}
// Query represents a query
@@ -205,15 +213,16 @@ func sanitizedURL(val string) (string, error) {
return u.String(), nil
}
-func (p *Policy) writeLinkableBuf(buff *bytes.Buffer, token *html.Token) {
+func (p *Policy) writeLinkableBuf(buff stringWriterWriter, token *html.Token) (int, error) {
// do not escape multiple query parameters
- tokenBuff := bytes.NewBufferString("")
- tokenBuff.WriteString("<")
+ tokenBuff := bytes.NewBuffer(make([]byte, 0, 1024)) // This should stay on the stack unless it gets too big
+
+ tokenBuff.WriteByte('<')
tokenBuff.WriteString(token.Data)
for _, attr := range token.Attr {
tokenBuff.WriteByte(' ')
tokenBuff.WriteString(attr.Key)
- tokenBuff.WriteString(`="`)
+ tokenBuff.Write([]byte{'=', '"'})
switch attr.Key {
case "href", "src":
u, ok := p.validURL(attr.Val)
@@ -238,12 +247,27 @@ func (p *Policy) writeLinkableBuf(buff *bytes.Buffer, token *html.Token) {
tokenBuff.WriteString("/")
}
tokenBuff.WriteString(">")
- buff.WriteString(tokenBuff.String())
+ return buff.Write(tokenBuff.Bytes())
}
// Performs the actual sanitization process.
-func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
+func (p *Policy) sanitizeWithBuff(r io.Reader) *bytes.Buffer {
+ var buff bytes.Buffer
+ if err := p.sanitize(r, &buff); err != nil {
+ return &bytes.Buffer{}
+ }
+ return &buff
+}
+
+type asStringWriter struct {
+ io.Writer
+}
+func (a *asStringWriter) WriteString(s string) (int, error) {
+ return a.Write([]byte(s))
+}
+
+func (p *Policy) sanitize(r io.Reader, w io.Writer) error {
// It is possible that the developer has created the policy via:
// p := bluemonday.Policy{}
// rather than:
@@ -252,8 +276,12 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
// would initiliaze the maps, then we need to do that.
p.init()
+ buff, ok := w.(stringWriterWriter)
+ if !ok {
+ buff = &asStringWriter{w}
+ }
+
var (
- buff bytes.Buffer
skipElementContent bool
skippingElementsCount int64
skipClosingTag bool
@@ -267,11 +295,11 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
err := tokenizer.Err()
if err == io.EOF {
// End of input means end of processing
- return &buff
+ return nil
}
// Raw tokenizer error
- return &bytes.Buffer{}
+ return err
}
token := tokenizer.Token()
@@ -289,6 +317,10 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
case html.CommentToken:
// Comments are ignored by default
+ if p.allowComments {
+ // But if allowed then write the comment out as-is
+ buff.WriteString(token.String())
+ }
case html.StartTagToken:
@@ -303,7 +335,9 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
skippingElementsCount++
}
if p.addSpaces {
- buff.WriteString(" ")
+ if _, err := buff.WriteString(" "); err != nil {
+ return err
+ }
}
break
}
@@ -318,7 +352,9 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
skipClosingTag = true
closingTagToSkipStack = append(closingTagToSkipStack, token.Data)
if p.addSpaces {
- buff.WriteString(" ")
+ if _, err := buff.WriteString(" "); err != nil {
+ return err
+ }
}
break
}
@@ -327,9 +363,13 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
if !skipElementContent {
// do not escape multiple query parameters
if linkable(token.Data) {
- p.writeLinkableBuf(&buff, &token)
+ if _, err := p.writeLinkableBuf(buff, &token); err != nil {
+ return err
+ }
} else {
- buff.WriteString(token.String())
+ if _, err := buff.WriteString(token.String()); err != nil {
+ return err
+ }
}
}
@@ -345,7 +385,9 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
skipClosingTag = false
}
if p.addSpaces {
- buff.WriteString(" ")
+ if _, err := buff.WriteString(" "); err != nil {
+ return err
+ }
}
break
}
@@ -366,14 +408,18 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
}
if !match {
if p.addSpaces {
- buff.WriteString(" ")
+ if _, err := buff.WriteString(" "); err != nil {
+ return err
+ }
}
break
}
}
if !skipElementContent {
- buff.WriteString(token.String())
+ if _, err := buff.WriteString(token.String()); err != nil {
+ return err
+ }
}
case html.SelfClosingTagToken:
@@ -383,7 +429,9 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
aa, matched := p.matchRegex(token.Data)
if !matched {
if p.addSpaces && !matched {
- buff.WriteString(" ")
+ if _, err := buff.WriteString(" "); err != nil {
+ return err
+ }
}
break
}
@@ -396,16 +444,22 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
if len(token.Attr) == 0 && !p.allowNoAttrs(token.Data) {
if p.addSpaces {
- buff.WriteString(" ")
+ if _, err := buff.WriteString(" "); err != nil {
+ return err
+ }
break
}
}
if !skipElementContent {
// do not escape multiple query parameters
if linkable(token.Data) {
- p.writeLinkableBuf(&buff, &token)
+ if _, err := p.writeLinkableBuf(buff, &token); err != nil {
+ return err
+ }
} else {
- buff.WriteString(token.String())
+ if _, err := buff.WriteString(token.String()); err != nil {
+ return err
+ }
}
}
@@ -416,20 +470,26 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
case `script`:
// not encouraged, but if a policy allows JavaScript we
// should not HTML escape it as that would break the output
- buff.WriteString(token.Data)
- case `style`:
+ if _, err := buff.WriteString(token.Data); err != nil {
+ return err
+ }
+ case "style":
// not encouraged, but if a policy allows CSS styles we
// should not HTML escape it as that would break the output
- buff.WriteString(token.Data)
+ if _, err := buff.WriteString(token.Data); err != nil {
+ return err
+ }
default:
// HTML escape the text
- buff.WriteString(token.String())
+ if _, err := buff.WriteString(token.String()); err != nil {
+ return err
+ }
}
}
default:
// A token that didn't exist in the html package when we wrote this
- return &bytes.Buffer{}
+ return fmt.Errorf("unknown token: %v", token)
}
}
}
@@ -440,7 +500,7 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
func (p *Policy) sanitizeAttrs(
elementName string,
attrs []html.Attribute,
- aps map[string]attrPolicy,
+ aps map[string][]attrPolicy,
) []html.Attribute {
if len(attrs) == 0 {
@@ -465,8 +525,9 @@ func (p *Policy) sanitizeAttrs(
}
// Builds a new attribute slice based on the whether the attribute has been
- // whitelisted explicitly or globally.
+ // allowed explicitly or globally.
cleanAttrs := []html.Attribute{}
+attrsLoop:
for _, htmlAttr := range attrs {
if p.allowDataAttributes {
// If we see a data attribute, let it through.
@@ -489,27 +550,30 @@ func (p *Policy) sanitizeAttrs(
}
// Is there an element specific attribute policy that applies?
- if ap, ok := aps[htmlAttr.Key]; ok {
- if ap.regexp != nil {
- if ap.regexp.MatchString(htmlAttr.Val) {
+ if apl, ok := aps[htmlAttr.Key]; ok {
+ for _, ap := range apl {
+ if ap.regexp != nil {
+ if ap.regexp.MatchString(htmlAttr.Val) {
+ cleanAttrs = append(cleanAttrs, htmlAttr)
+ continue attrsLoop
+ }
+ } else {
cleanAttrs = append(cleanAttrs, htmlAttr)
- continue
+ continue attrsLoop
}
- } else {
- cleanAttrs = append(cleanAttrs, htmlAttr)
- continue
}
}
// Is there a global attribute policy that applies?
- if ap, ok := p.globalAttrs[htmlAttr.Key]; ok {
-
- if ap.regexp != nil {
- if ap.regexp.MatchString(htmlAttr.Val) {
+ if apl, ok := p.globalAttrs[htmlAttr.Key]; ok {
+ for _, ap := range apl {
+ if ap.regexp != nil {
+ if ap.regexp.MatchString(htmlAttr.Val) {
+ cleanAttrs = append(cleanAttrs, htmlAttr)
+ }
+ } else {
cleanAttrs = append(cleanAttrs, htmlAttr)
}
- } else {
- cleanAttrs = append(cleanAttrs, htmlAttr)
}
}
}
@@ -533,7 +597,7 @@ func (p *Policy) sanitizeAttrs(
tmpAttrs := []html.Attribute{}
for _, htmlAttr := range cleanAttrs {
switch elementName {
- case "a", "area", "link":
+ case "a", "area", "base", "link":
if htmlAttr.Key == "href" {
if u, ok := p.validURL(htmlAttr.Val); ok {
htmlAttr.Val = u
@@ -542,7 +606,7 @@ func (p *Policy) sanitizeAttrs(
break
}
tmpAttrs = append(tmpAttrs, htmlAttr)
- case "blockquote", "q":
+ case "blockquote", "del", "ins", "q":
if htmlAttr.Key == "cite" {
if u, ok := p.validURL(htmlAttr.Val); ok {
htmlAttr.Val = u
@@ -551,7 +615,7 @@ func (p *Policy) sanitizeAttrs(
break
}
tmpAttrs = append(tmpAttrs, htmlAttr)
- case "img", "script":
+ case "audio", "embed", "iframe", "img", "script", "source", "track", "video":
if htmlAttr.Key == "src" {
if u, ok := p.validURL(htmlAttr.Val); ok {
htmlAttr.Val = u
@@ -576,7 +640,7 @@ func (p *Policy) sanitizeAttrs(
// Add rel="nofollow" if a "href" exists
switch elementName {
- case "a", "area", "link":
+ case "a", "area", "base", "link":
var hrefFound bool
var externalLink bool
for _, htmlAttr := range cleanAttrs {
@@ -753,14 +817,14 @@ func (p *Policy) sanitizeAttrs(
func (p *Policy) sanitizeStyles(attr html.Attribute, elementName string) html.Attribute {
sps := p.elsAndStyles[elementName]
if len(sps) == 0 {
- sps = map[string]stylePolicy{}
+ sps = map[string][]stylePolicy{}
// check for any matching elements, if we don't already have a policy found
// if multiple matches are found they will be overwritten, it's best
// to not have overlapping matchers
for regex, policies := range p.elsMatchingAndStyles {
if regex.MatchString(elementName) {
for k, v := range policies {
- sps[k] = v
+ sps[k] = append(sps[k], v...)
}
}
}
@@ -778,46 +842,51 @@ func (p *Policy) sanitizeStyles(attr html.Attribute, elementName string) html.At
clean := []string{}
prefixes := []string{"-webkit-", "-moz-", "-ms-", "-o-", "mso-", "-xv-", "-atsc-", "-wap-", "-khtml-", "prince-", "-ah-", "-hp-", "-ro-", "-rim-", "-tc-"}
+decLoop:
for _, dec := range decs {
- addedProperty := false
tempProperty := strings.ToLower(dec.Property)
tempValue := removeUnicode(strings.ToLower(dec.Value))
for _, i := range prefixes {
tempProperty = strings.TrimPrefix(tempProperty, i)
}
- if sp, ok := sps[tempProperty]; ok {
- if sp.handler != nil {
- if sp.handler(tempValue) {
- clean = append(clean, dec.Property+": "+dec.Value)
- addedProperty = true
- }
- } else if len(sp.enum) > 0 {
- if stringInSlice(tempValue, sp.enum) {
- clean = append(clean, dec.Property+": "+dec.Value)
- addedProperty = true
- }
- } else if sp.regexp != nil {
- if sp.regexp.MatchString(tempValue) {
- clean = append(clean, dec.Property+": "+dec.Value)
- addedProperty = true
+ if spl, ok := sps[tempProperty]; ok {
+ for _, sp := range spl {
+ if sp.handler != nil {
+ if sp.handler(tempValue) {
+ clean = append(clean, dec.Property+": "+dec.Value)
+ continue decLoop
+ }
+ } else if len(sp.enum) > 0 {
+ if stringInSlice(tempValue, sp.enum) {
+ clean = append(clean, dec.Property+": "+dec.Value)
+ continue decLoop
+ }
+ } else if sp.regexp != nil {
+ if sp.regexp.MatchString(tempValue) {
+ clean = append(clean, dec.Property+": "+dec.Value)
+ continue decLoop
+ }
}
- continue
}
}
- if sp, ok := p.globalStyles[tempProperty]; ok && !addedProperty {
- if sp.handler != nil {
- if sp.handler(tempValue) {
- clean = append(clean, dec.Property+": "+dec.Value)
- }
- } else if len(sp.enum) > 0 {
- if stringInSlice(tempValue, sp.enum) {
- clean = append(clean, dec.Property+": "+dec.Value)
- }
- } else if sp.regexp != nil {
- if sp.regexp.MatchString(tempValue) {
- clean = append(clean, dec.Property+": "+dec.Value)
+ if spl, ok := p.globalStyles[tempProperty]; ok {
+ for _, sp := range spl {
+ if sp.handler != nil {
+ if sp.handler(tempValue) {
+ clean = append(clean, dec.Property+": "+dec.Value)
+ continue decLoop
+ }
+ } else if len(sp.enum) > 0 {
+ if stringInSlice(tempValue, sp.enum) {
+ clean = append(clean, dec.Property+": "+dec.Value)
+ continue decLoop
+ }
+ } else if sp.regexp != nil {
+ if sp.regexp.MatchString(tempValue) {
+ clean = append(clean, dec.Property+": "+dec.Value)
+ continue decLoop
+ }
}
- continue
}
}
}
@@ -848,11 +917,28 @@ func (p *Policy) validURL(rawurl string) (string, bool) {
rawurl = strings.TrimSpace(rawurl)
// URLs cannot contain whitespace, unless it is a data-uri
- if (strings.Contains(rawurl, " ") ||
+ if strings.Contains(rawurl, " ") ||
strings.Contains(rawurl, "\t") ||
- strings.Contains(rawurl, "\n")) &&
- !strings.HasPrefix(rawurl, `data:`) {
- return "", false
+ strings.Contains(rawurl, "\n") {
+ if !strings.HasPrefix(rawurl, `data:`) {
+ return "", false
+ }
+
+ // Remove \r and \n from base64 encoded data to pass url.Parse.
+ matched := dataURIbase64Prefix.FindString(rawurl)
+ if matched != "" {
+ rawurl = matched + strings.Replace(
+ strings.Replace(
+ rawurl[len(matched):],
+ "\r",
+ "",
+ -1,
+ ),
+ "\n",
+ "",
+ -1,
+ )
+ }
}
// URLs are valid if they parse
@@ -863,16 +949,21 @@ func (p *Policy) validURL(rawurl string) (string, bool) {
if u.Scheme != "" {
- urlPolicy, ok := p.allowURLSchemes[u.Scheme]
+ urlPolicies, ok := p.allowURLSchemes[u.Scheme]
if !ok {
return "", false
-
}
- if urlPolicy == nil || urlPolicy(u) == true {
+ if len(urlPolicies) == 0 {
return u.String(), true
}
+ for _, urlPolicy := range urlPolicies {
+ if urlPolicy(u) == true {
+ return u.String(), true
+ }
+ }
+
return "", false
}
@@ -890,7 +981,14 @@ func (p *Policy) validURL(rawurl string) (string, bool) {
func linkable(elementName string) bool {
switch elementName {
- case "a", "area", "blockquote", "img", "link", "script":
+ case "a", "area", "base", "link":
+ // elements that allow .href
+ return true
+ case "blockquote", "del", "ins", "q":
+ // elements that allow .cite
+ return true
+ case "audio", "embed", "iframe", "img", "input", "script", "track", "video":
+ // elements that allow .src
return true
default:
return false
@@ -957,14 +1055,14 @@ func removeUnicode(value string) string {
return substitutedValue
}
-func (p *Policy) matchRegex(elementName string) (map[string]attrPolicy, bool) {
- aps := make(map[string]attrPolicy, 0)
+func (p *Policy) matchRegex(elementName string) (map[string][]attrPolicy, bool) {
+ aps := make(map[string][]attrPolicy, 0)
matched := false
for regex, attrs := range p.elsMatchingAndAttrs {
if regex.MatchString(elementName) {
matched = true
for k, v := range attrs {
- aps[k] = v
+ aps[k] = append(aps[k], v...)
}
}
}