1 files changed, 535 insertions, 0 deletions
diff --git a/vendor/github.com/microcosm-cc/bluemonday/sanitize.go b/vendor/github.com/microcosm-cc/bluemonday/sanitize.go
new file mode 100644
index 0000000000..8983685818
--- /dev/null
+++ b/vendor/github.com/microcosm-cc/bluemonday/sanitize.go
@@ -0,0 +1,535 @@
+// Copyright (c) 2014, David Kitchen <david@buro9.com>
+//
+// All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are met:
+//
+// * Redistributions of source code must retain the above copyright notice, this
+//   list of conditions and the following disclaimer.
+//
+// * Redistributions in binary form must reproduce the above copyright notice,
+//   this list of conditions and the following disclaimer in the documentation
+//   and/or other materials provided with the distribution.
+//
+// * Neither the name of the organisation (Microcosm) nor the names of its
+//   contributors may be used to endorse or promote products derived from
+//   this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
+// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
+// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
+// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
+// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
+// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+package bluemonday
+
+import (
+	"bytes"
+	"io"
+	"net/url"
+	"strings"
+
+	"golang.org/x/net/html"
+)
+
+// Sanitize takes a string that contains a HTML fragment or document and applies
+// the given policy whitelist.
+//
+// It returns a HTML string that has been sanitized by the policy or an empty
+// string if an error has occurred (most likely as a consequence of extremely
+// malformed input)
+func (p *Policy) Sanitize(s string) string {
+	if strings.TrimSpace(s) == "" {
+		return s
+	}
+
+	return p.sanitize(strings.NewReader(s)).String()
+}
+
+// SanitizeBytes takes a []byte that contains a HTML fragment or document and applies
+// the given policy whitelist.
+//
+// It returns a []byte containing the HTML that has been sanitized by the policy
+// or an empty []byte if an error has occurred (most likely as a consequence of
+// extremely malformed input)
+func (p *Policy) SanitizeBytes(b []byte) []byte {
+	if len(bytes.TrimSpace(b)) == 0 {
+		return b
+	}
+
+	return p.sanitize(bytes.NewReader(b)).Bytes()
+}
+
+// SanitizeReader takes an io.Reader that contains a HTML fragment or document
+// and applies the given policy whitelist.
+//
+// It returns a bytes.Buffer containing the HTML that has been sanitized by the
+// policy. Errors during sanitization will merely return an empty result.
+func (p *Policy) SanitizeReader(r io.Reader) *bytes.Buffer {
+	return p.sanitize(r)
+}
+
+// Performs the actual sanitization process.
+func (p *Policy) sanitize(r io.Reader) *bytes.Buffer {
+
+	// It is possible that the developer has created the policy via:
+	//   p := bluemonday.Policy{}
+	// rather than:
+	//   p := bluemonday.NewPolicy()
+	// If this is the case, and if they haven't yet triggered an action that
+	// would initiliaze the maps, then we need to do that.
+	p.init()
+
+	var (
+		buff                     bytes.Buffer
+		skipElementContent       bool
+		skippingElementsCount    int64
+		skipClosingTag           bool
+		closingTagToSkipStack    []string
+		mostRecentlyStartedToken string
+	)
+
+	tokenizer := html.NewTokenizer(r)
+	for {
+		if tokenizer.Next() == html.ErrorToken {
+			err := tokenizer.Err()
+			if err == io.EOF {
+				// End of input means end of processing
+				return &buff
+			}
+
+			// Raw tokenizer error
+			return &bytes.Buffer{}
+		}
+
+		token := tokenizer.Token()
+		switch token.Type {
+		case html.DoctypeToken:
+
+			if p.allowDocType {
+				buff.WriteString(token.String())
+			}
+
+		case html.CommentToken:
+
+			// Comments are ignored by default
+
+		case html.StartTagToken:
+
+			mostRecentlyStartedToken = token.Data
+
+			aps, ok := p.elsAndAttrs[token.Data]
+			if !ok {
+				if _, ok := p.setOfElementsToSkipContent[token.Data]; ok {
+					skipElementContent = true
+					skippingElementsCount++
+				}
+				if p.addSpaces {
+					buff.WriteString(" ")
+				}
+				break
+			}
+
+			if len(token.Attr) != 0 {
+				token.Attr = p.sanitizeAttrs(token.Data, token.Attr, aps)
+			}
+
+			if len(token.Attr) == 0 {
+				if !p.allowNoAttrs(token.Data) {
+					skipClosingTag = true
+					closingTagToSkipStack = append(closingTagToSkipStack, token.Data)
+					if p.addSpaces {
+						buff.WriteString(" ")
+					}
+					break
+				}
+			}
+
+			if !skipElementContent {
+				buff.WriteString(token.String())
+			}
+
+		case html.EndTagToken:
+
+			if skipClosingTag && closingTagToSkipStack[len(closingTagToSkipStack)-1] == token.Data {
+				closingTagToSkipStack = closingTagToSkipStack[:len(closingTagToSkipStack)-1]
+				if len(closingTagToSkipStack) == 0 {
+					skipClosingTag = false
+				}
+				if p.addSpaces {
+					buff.WriteString(" ")
+				}
+				break
+			}
+
+			if _, ok := p.elsAndAttrs[token.Data]; !ok {
+				if _, ok := p.setOfElementsToSkipContent[token.Data]; ok {
+					skippingElementsCount--
+					if skippingElementsCount == 0 {
+						skipElementContent = false
+					}
+				}
+				if p.addSpaces {
+					buff.WriteString(" ")
+				}
+				break
+			}
+
+			if !skipElementContent {
+				buff.WriteString(token.String())
+			}
+
+		case html.SelfClosingTagToken:
+
+			aps, ok := p.elsAndAttrs[token.Data]
+			if !ok {
+				if p.addSpaces {
+					buff.WriteString(" ")
+				}
+				break
+			}
+
+			if len(token.Attr) != 0 {
+				token.Attr = p.sanitizeAttrs(token.Data, token.Attr, aps)
+			}
+
+			if len(token.Attr) == 0 && !p.allowNoAttrs(token.Data) {
+				if p.addSpaces {
+					buff.WriteString(" ")
+				}
+				break
+			}
+
+			if !skipElementContent {
+				buff.WriteString(token.String())
+			}
+
+		case html.TextToken:
+
+			if !skipElementContent {
+				switch strings.ToLower(mostRecentlyStartedToken) {
+				case "javascript":
+					// not encouraged, but if a policy allows JavaScript we
+					// should not HTML escape it as that would break the output
+					buff.WriteString(token.Data)
+				case "style":
+					// not encouraged, but if a policy allows CSS styles we
+					// should not HTML escape it as that would break the output
+					buff.WriteString(token.Data)
+				default:
+					// HTML escape the text
+					buff.WriteString(token.String())
+				}
+			}
+
+		default:
+			// A token that didn't exist in the html package when we wrote this
+			return &bytes.Buffer{}
+		}
+	}
+}
+
+// sanitizeAttrs takes a set of element attribute policies and the global
+// attribute policies and applies them to the []html.Attribute returning a set
+// of html.Attributes that match the policies
+func (p *Policy) sanitizeAttrs(
+	elementName string,
+	attrs []html.Attribute,
+	aps map[string]attrPolicy,
+) []html.Attribute {
+
+	if len(attrs) == 0 {
+		return attrs
+	}
+
+	// Builds a new attribute slice based on the whether the attribute has been
+	// whitelisted explicitly or globally.
+	cleanAttrs := []html.Attribute{}
+	for _, htmlAttr := range attrs {
+		// Is there an element specific attribute policy that applies?
+		if ap, ok := aps[htmlAttr.Key]; ok {
+			if ap.regexp != nil {
+				if ap.regexp.MatchString(htmlAttr.Val) {
+					cleanAttrs = append(cleanAttrs, htmlAttr)
+					continue
+				}
+			} else {
+				cleanAttrs = append(cleanAttrs, htmlAttr)
+				continue
+			}
+		}
+
+		// Is there a global attribute policy that applies?
+		if ap, ok := p.globalAttrs[htmlAttr.Key]; ok {
+			if ap.regexp != nil {
+				if ap.regexp.MatchString(htmlAttr.Val) {
+					cleanAttrs = append(cleanAttrs, htmlAttr)
+				}
+			} else {
+				cleanAttrs = append(cleanAttrs, htmlAttr)
+			}
+		}
+	}
+
+	if len(cleanAttrs) == 0 {
+		// If nothing was allowed, let's get out of here
+		return cleanAttrs
+	}
+	// cleanAttrs now contains the attributes that are permitted
+
+	if linkable(elementName) {
+		if p.requireParseableURLs {
+			// Ensure URLs are parseable:
+			// - a.href
+			// - area.href
+			// - link.href
+			// - blockquote.cite
+			// - q.cite
+			// - img.src
+			// - script.src
+			tmpAttrs := []html.Attribute{}
+			for _, htmlAttr := range cleanAttrs {
+				switch elementName {
+				case "a", "area", "link":
+					if htmlAttr.Key == "href" {
+						if u, ok := p.validURL(htmlAttr.Val); ok {
+							htmlAttr.Val = u
+							tmpAttrs = append(tmpAttrs, htmlAttr)
+						}
+						break
+					}
+					tmpAttrs = append(tmpAttrs, htmlAttr)
+				case "blockquote", "q":
+					if htmlAttr.Key == "cite" {
+						if u, ok := p.validURL(htmlAttr.Val); ok {
+							htmlAttr.Val = u
+							tmpAttrs = append(tmpAttrs, htmlAttr)
+						}
+						break
+					}
+					tmpAttrs = append(tmpAttrs, htmlAttr)
+				case "img", "script":
+					if htmlAttr.Key == "src" {
+						if u, ok := p.validURL(htmlAttr.Val); ok {
+							htmlAttr.Val = u
+							tmpAttrs = append(tmpAttrs, htmlAttr)
+						}
+						break
+					}
+					tmpAttrs = append(tmpAttrs, htmlAttr)
+				default:
+					tmpAttrs = append(tmpAttrs, htmlAttr)
+				}
+			}
+			cleanAttrs = tmpAttrs
+		}
+
+		if (p.requireNoFollow ||
+			p.requireNoFollowFullyQualifiedLinks ||
+			p.addTargetBlankToFullyQualifiedLinks) &&
+			len(cleanAttrs) > 0 {
+
+			// Add rel="nofollow" if a "href" exists
+			switch elementName {
+			case "a", "area", "link":
+				var hrefFound bool
+				var externalLink bool
+				for _, htmlAttr := range cleanAttrs {
+					if htmlAttr.Key == "href" {
+						hrefFound = true
+
+						u, err := url.Parse(htmlAttr.Val)
+						if err != nil {
+							continue
+						}
+						if u.Host != "" {
+							externalLink = true
+						}
+
+						continue
+					}
+				}
+
+				if hrefFound {
+					var (
+						noFollowFound    bool
+						targetBlankFound bool
+					)
+
+					addNoFollow := (p.requireNoFollow ||
+						externalLink && p.requireNoFollowFullyQualifiedLinks)
+
+					addTargetBlank := (externalLink &&
+						p.addTargetBlankToFullyQualifiedLinks)
+
+					tmpAttrs := []html.Attribute{}
+					for _, htmlAttr := range cleanAttrs {
+
+						var appended bool
+						if htmlAttr.Key == "rel" && addNoFollow {
+
+							if strings.Contains(htmlAttr.Val, "nofollow") {
+								noFollowFound = true
+								tmpAttrs = append(tmpAttrs, htmlAttr)
+								appended = true
+							} else {
+								htmlAttr.Val += " nofollow"
+								noFollowFound = true
+								tmpAttrs = append(tmpAttrs, htmlAttr)
+								appended = true
+							}
+						}
+
+						if elementName == "a" && htmlAttr.Key == "target" {
+							if htmlAttr.Val == "_blank" {
+								targetBlankFound = true
+							}
+							if addTargetBlank && !targetBlankFound {
+								htmlAttr.Val = "_blank"
+								targetBlankFound = true
+								tmpAttrs = append(tmpAttrs, htmlAttr)
+								appended = true
+							}
+						}
+
+						if !appended {
+							tmpAttrs = append(tmpAttrs, htmlAttr)
+						}
+					}
+					if noFollowFound || targetBlankFound {
+						cleanAttrs = tmpAttrs
+					}
+
+					if addNoFollow && !noFollowFound {
+						rel := html.Attribute{}
+						rel.Key = "rel"
+						rel.Val = "nofollow"
+						cleanAttrs = append(cleanAttrs, rel)
+					}
+
+					if elementName == "a" && addTargetBlank && !targetBlankFound {
+						rel := html.Attribute{}
+						rel.Key = "target"
+						rel.Val = "_blank"
+						targetBlankFound = true
+						cleanAttrs = append(cleanAttrs, rel)
+					}
+
+					if targetBlankFound {
+						// target="_blank" has a security risk that allows the
+						// opened window/tab to issue JavaScript calls against
+						// window.opener, which in effect allow the destination
+						// of the link to control the source:
+						// https://dev.to/ben/the-targetblank-vulnerability-by-example
+						//
+						// To mitigate this risk, we need to add a specific rel
+						// attribute if it is not already present.
+						// rel="noopener"
+						//
+						// Unfortunately this is processing the rel twice (we
+						// already looked at it earlier ^^) as we cannot be sure
+						// of the ordering of the href and rel, and whether we
+						// have fully satisfied that we need to do this. This
+						// double processing only happens *if* target="_blank"
+						// is true.
+						var noOpenerAdded bool
+						tmpAttrs := []html.Attribute{}
+						for _, htmlAttr := range cleanAttrs {
+							var appended bool
+							if htmlAttr.Key == "rel" {
+								if strings.Contains(htmlAttr.Val, "noopener") {
+									noOpenerAdded = true
+									tmpAttrs = append(tmpAttrs, htmlAttr)
+								} else {
+									htmlAttr.Val += " noopener"
+									noOpenerAdded = true
+									tmpAttrs = append(tmpAttrs, htmlAttr)
+								}
+
+								appended = true
+							}
+							if !appended {
+								tmpAttrs = append(tmpAttrs, htmlAttr)
+							}
+						}
+						if noOpenerAdded {
+							cleanAttrs = tmpAttrs
+						} else {
+							// rel attr was not found, or else noopener would
+							// have been added already
+							rel := html.Attribute{}
+							rel.Key = "rel"
+							rel.Val = "noopener"
+							cleanAttrs = append(cleanAttrs, rel)
+						}
+
+					}
+				}
+			default:
+			}
+		}
+	}
+
+	return cleanAttrs
+}
+
+func (p *Policy) allowNoAttrs(elementName string) bool {
+	_, ok := p.setOfElementsAllowedWithoutAttrs[elementName]
+	return ok
+}
+
+func (p *Policy) validURL(rawurl string) (string, bool) {
+	if p.requireParseableURLs {
+		// URLs do not contain whitespace
+		if strings.Contains(rawurl, " ") ||
+			strings.Contains(rawurl, "\t") ||
+			strings.Contains(rawurl, "\n") {
+			return "", false
+		}
+
+		u, err := url.Parse(rawurl)
+		if err != nil {
+			return "", false
+		}
+
+		if u.Scheme != "" {
+
+			urlPolicy, ok := p.allowURLSchemes[u.Scheme]
+			if !ok {
+				return "", false
+
+			}
+
+			if urlPolicy == nil || urlPolicy(u) == true {
+				return u.String(), true
+			}
+
+			return "", false
+		}
+
+		if p.allowRelativeURLs {
+			if u.String() != "" {
+				return u.String(), true
+			}
+		}
+
+		return "", false
+	}
+
+	return rawurl, true
+}
+
+func linkable(elementName string) bool {
+	switch elementName {
+	case "a", "area", "blockquote", "img", "link", "script":
+		return true
+	default:
+		return false
+	}
+}