diff options
author | Thomas Boerger <thomas@webhippie.de> | 2016-11-03 23:16:01 +0100 |
---|---|---|
committer | Thomas Boerger <thomas@webhippie.de> | 2016-11-04 08:43:11 +0100 |
commit | 1ebb35b98889ff77299f24d82da426b434b0cca0 (patch) | |
tree | 6dcb814d6df4d11c7e7a0ba6da8a6945628e2c5d /vendor/github.com/microcosm-cc/bluemonday/policy.go | |
parent | 78f86abba45cb35018c58b8bd5f4c48a86cc8634 (diff) | |
download | gitea-1ebb35b98889ff77299f24d82da426b434b0cca0.tar.gz gitea-1ebb35b98889ff77299f24d82da426b434b0cca0.zip |
Added all required dependencies
Diffstat (limited to 'vendor/github.com/microcosm-cc/bluemonday/policy.go')
-rw-r--r-- | vendor/github.com/microcosm-cc/bluemonday/policy.go | 548 |
1 files changed, 548 insertions, 0 deletions
diff --git a/vendor/github.com/microcosm-cc/bluemonday/policy.go b/vendor/github.com/microcosm-cc/bluemonday/policy.go new file mode 100644 index 0000000000..5d28beca36 --- /dev/null +++ b/vendor/github.com/microcosm-cc/bluemonday/policy.go @@ -0,0 +1,548 @@ +// Copyright (c) 2014, David Kitchen <david@buro9.com> +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, this +// list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the organisation (Microcosm) nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package bluemonday + +import ( + "net/url" + "regexp" + "strings" +) + +// Policy encapsulates the whitelist of HTML elements and attributes that will +// be applied to the sanitised HTML. +// +// You should use bluemonday.NewPolicy() to create a blank policy as the +// unexported fields contain maps that need to be initialized. +type Policy struct { + + // Declares whether the maps have been initialized, used as a cheap check to + // ensure that those using Policy{} directly won't cause nil pointer + // exceptions + initialized bool + + // Allows the <!DOCTYPE > tag to exist in the sanitized document + allowDocType bool + + // If true then we add spaces when stripping tags, specifically the closing + // tag is replaced by a space character. + addSpaces bool + + // When true, add rel="nofollow" to HTML anchors + requireNoFollow bool + + // When true, add rel="nofollow" to HTML anchors + // Will add for href="http://foo" + // Will skip for href="/foo" or href="foo" + requireNoFollowFullyQualifiedLinks bool + + // When true add target="_blank" to fully qualified links + // Will add for href="http://foo" + // Will skip for href="/foo" or href="foo" + addTargetBlankToFullyQualifiedLinks bool + + // When true, URLs must be parseable by "net/url" url.Parse() + requireParseableURLs bool + + // When true, u, _ := url.Parse("url"); !u.IsAbs() is permitted + allowRelativeURLs bool + + // map[htmlElementName]map[htmlAttributeName]attrPolicy + elsAndAttrs map[string]map[string]attrPolicy + + // map[htmlAttributeName]attrPolicy + globalAttrs map[string]attrPolicy + + // If urlPolicy is nil, all URLs with matching schema are allowed. + // Otherwise, only the URLs with matching schema and urlPolicy(url) + // returning true are allowed. + allowURLSchemes map[string]urlPolicy + + // If an element has had all attributes removed as a result of a policy + // being applied, then the element would be removed from the output. + // + // However some elements are valid and have strong layout meaning without + // any attributes, i.e. <table>. To prevent those being removed we maintain + // a list of elements that are allowed to have no attributes and that will + // be maintained in the output HTML. + setOfElementsAllowedWithoutAttrs map[string]struct{} + + setOfElementsToSkipContent map[string]struct{} +} + +type attrPolicy struct { + + // optional pattern to match, when not nil the regexp needs to match + // otherwise the attribute is removed + regexp *regexp.Regexp +} + +type attrPolicyBuilder struct { + p *Policy + + attrNames []string + regexp *regexp.Regexp + allowEmpty bool +} + +type urlPolicy func(url *url.URL) (allowUrl bool) + +// init initializes the maps if this has not been done already +func (p *Policy) init() { + if !p.initialized { + p.elsAndAttrs = make(map[string]map[string]attrPolicy) + p.globalAttrs = make(map[string]attrPolicy) + p.allowURLSchemes = make(map[string]urlPolicy) + p.setOfElementsAllowedWithoutAttrs = make(map[string]struct{}) + p.setOfElementsToSkipContent = make(map[string]struct{}) + p.initialized = true + } +} + +// NewPolicy returns a blank policy with nothing whitelisted or permitted. This +// is the recommended way to start building a policy and you should now use +// AllowAttrs() and/or AllowElements() to construct the whitelist of HTML +// elements and attributes. +func NewPolicy() *Policy { + + p := Policy{} + + p.addDefaultElementsWithoutAttrs() + p.addDefaultSkipElementContent() + + return &p +} + +// AllowAttrs takes a range of HTML attribute names and returns an +// attribute policy builder that allows you to specify the pattern and scope of +// the whitelisted attribute. +// +// The attribute policy is only added to the core policy when either Globally() +// or OnElements(...) are called. +func (p *Policy) AllowAttrs(attrNames ...string) *attrPolicyBuilder { + + p.init() + + abp := attrPolicyBuilder{ + p: p, + allowEmpty: false, + } + + for _, attrName := range attrNames { + abp.attrNames = append(abp.attrNames, strings.ToLower(attrName)) + } + + return &abp +} + +// AllowNoAttrs says that attributes on element are optional. +// +// The attribute policy is only added to the core policy when OnElements(...) +// are called. +func (p *Policy) AllowNoAttrs() *attrPolicyBuilder { + + p.init() + + abp := attrPolicyBuilder{ + p: p, + allowEmpty: true, + } + return &abp +} + +// AllowNoAttrs says that attributes on element are optional. +// +// The attribute policy is only added to the core policy when OnElements(...) +// are called. +func (abp *attrPolicyBuilder) AllowNoAttrs() *attrPolicyBuilder { + + abp.allowEmpty = true + + return abp +} + +// Matching allows a regular expression to be applied to a nascent attribute +// policy, and returns the attribute policy. Calling this more than once will +// replace the existing regexp. +func (abp *attrPolicyBuilder) Matching(regex *regexp.Regexp) *attrPolicyBuilder { + + abp.regexp = regex + + return abp +} + +// OnElements will bind an attribute policy to a given range of HTML elements +// and return the updated policy +func (abp *attrPolicyBuilder) OnElements(elements ...string) *Policy { + + for _, element := range elements { + element = strings.ToLower(element) + + for _, attr := range abp.attrNames { + + if _, ok := abp.p.elsAndAttrs[element]; !ok { + abp.p.elsAndAttrs[element] = make(map[string]attrPolicy) + } + + ap := attrPolicy{} + if abp.regexp != nil { + ap.regexp = abp.regexp + } + + abp.p.elsAndAttrs[element][attr] = ap + } + + if abp.allowEmpty { + abp.p.setOfElementsAllowedWithoutAttrs[element] = struct{}{} + + if _, ok := abp.p.elsAndAttrs[element]; !ok { + abp.p.elsAndAttrs[element] = make(map[string]attrPolicy) + } + } + } + + return abp.p +} + +// Globally will bind an attribute policy to all HTML elements and return the +// updated policy +func (abp *attrPolicyBuilder) Globally() *Policy { + + for _, attr := range abp.attrNames { + if _, ok := abp.p.globalAttrs[attr]; !ok { + abp.p.globalAttrs[attr] = attrPolicy{} + } + + ap := attrPolicy{} + if abp.regexp != nil { + ap.regexp = abp.regexp + } + + abp.p.globalAttrs[attr] = ap + } + + return abp.p +} + +// AllowElements will append HTML elements to the whitelist without applying an +// attribute policy to those elements (the elements are permitted +// sans-attributes) +func (p *Policy) AllowElements(names ...string) *Policy { + p.init() + + for _, element := range names { + element = strings.ToLower(element) + + if _, ok := p.elsAndAttrs[element]; !ok { + p.elsAndAttrs[element] = make(map[string]attrPolicy) + } + } + + return p +} + +// RequireNoFollowOnLinks will result in all <a> tags having a rel="nofollow" +// added to them if one does not already exist +// +// Note: This requires p.RequireParseableURLs(true) and will enable it. +func (p *Policy) RequireNoFollowOnLinks(require bool) *Policy { + + p.requireNoFollow = require + p.requireParseableURLs = true + + return p +} + +// RequireNoFollowOnFullyQualifiedLinks will result in all <a> tags that point +// to a non-local destination (i.e. starts with a protocol and has a host) +// having a rel="nofollow" added to them if one does not already exist +// +// Note: This requires p.RequireParseableURLs(true) and will enable it. +func (p *Policy) RequireNoFollowOnFullyQualifiedLinks(require bool) *Policy { + + p.requireNoFollowFullyQualifiedLinks = require + p.requireParseableURLs = true + + return p +} + +// AddTargetBlankToFullyQualifiedLinks will result in all <a> tags that point +// to a non-local destination (i.e. starts with a protocol and has a host) +// having a target="_blank" added to them if one does not already exist +// +// Note: This requires p.RequireParseableURLs(true) and will enable it. +func (p *Policy) AddTargetBlankToFullyQualifiedLinks(require bool) *Policy { + + p.addTargetBlankToFullyQualifiedLinks = require + p.requireParseableURLs = true + + return p +} + +// RequireParseableURLs will result in all URLs requiring that they be parseable +// by "net/url" url.Parse() +// This applies to: +// - a.href +// - area.href +// - blockquote.cite +// - img.src +// - link.href +// - script.src +func (p *Policy) RequireParseableURLs(require bool) *Policy { + + p.requireParseableURLs = require + + return p +} + +// AllowRelativeURLs enables RequireParseableURLs and then permits URLs that +// are parseable, have no schema information and url.IsAbs() returns false +// This permits local URLs +func (p *Policy) AllowRelativeURLs(require bool) *Policy { + + p.RequireParseableURLs(true) + p.allowRelativeURLs = require + + return p +} + +// AllowURLSchemes will append URL schemes to the whitelist +// Example: p.AllowURLSchemes("mailto", "http", "https") +func (p *Policy) AllowURLSchemes(schemes ...string) *Policy { + p.init() + + p.RequireParseableURLs(true) + + for _, scheme := range schemes { + scheme = strings.ToLower(scheme) + + // Allow all URLs with matching scheme. + p.allowURLSchemes[scheme] = nil + } + + return p +} + +// AllowURLSchemeWithCustomPolicy will append URL schemes with +// a custom URL policy to the whitelist. +// Only the URLs with matching schema and urlPolicy(url) +// returning true will be allowed. +func (p *Policy) AllowURLSchemeWithCustomPolicy( + scheme string, + urlPolicy func(url *url.URL) (allowUrl bool), +) *Policy { + + p.init() + + p.RequireParseableURLs(true) + + scheme = strings.ToLower(scheme) + + p.allowURLSchemes[scheme] = urlPolicy + + return p +} + +// AllowDocType states whether the HTML sanitised by the sanitizer is allowed to +// contain the HTML DocType tag: <!DOCTYPE HTML> or one of it's variants. +// +// The HTML spec only permits one doctype per document, and as you know how you +// are using the output of this, you know best as to whether we should ignore it +// (default) or not. +// +// If you are sanitizing a HTML fragment the default (false) is fine. +func (p *Policy) AllowDocType(allow bool) *Policy { + + p.allowDocType = allow + + return p +} + +// AddSpaceWhenStrippingTag states whether to add a single space " " when +// removing tags that are not whitelisted by the policy. +// +// This is useful if you expect to strip tags in dense markup and may lose the +// value of whitespace. +// +// For example: "<p>Hello</p><p>World</p>"" would be sanitized to "HelloWorld" +// with the default value of false, but you may wish to sanitize this to +// " Hello World " by setting AddSpaceWhenStrippingTag to true as this would +// retain the intent of the text. +func (p *Policy) AddSpaceWhenStrippingTag(allow bool) *Policy { + + p.addSpaces = allow + + return p +} + +// SkipElementsContent adds the HTML elements whose tags is needed to be removed +// with it's content. +func (p *Policy) SkipElementsContent(names ...string) *Policy { + + p.init() + + for _, element := range names { + element = strings.ToLower(element) + + if _, ok := p.setOfElementsToSkipContent[element]; !ok { + p.setOfElementsToSkipContent[element] = struct{}{} + } + } + + return p +} + +// AllowElementsContent marks the HTML elements whose content should be +// retained after removing the tag. +func (p *Policy) AllowElementsContent(names ...string) *Policy { + + p.init() + + for _, element := range names { + delete(p.setOfElementsToSkipContent, strings.ToLower(element)) + } + + return p +} + +// addDefaultElementsWithoutAttrs adds the HTML elements that we know are valid +// without any attributes to an internal map. +// i.e. we know that <table> is valid, but <bdo> isn't valid as the "dir" attr +// is mandatory +func (p *Policy) addDefaultElementsWithoutAttrs() { + p.init() + + p.setOfElementsAllowedWithoutAttrs["abbr"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["acronym"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["article"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["aside"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["audio"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["b"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["bdi"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["blockquote"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["body"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["br"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["button"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["canvas"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["caption"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["cite"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["code"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["col"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["colgroup"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["datalist"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["dd"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["del"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["details"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["dfn"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["div"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["dl"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["dt"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["em"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["fieldset"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["figcaption"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["figure"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["footer"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["h1"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["h2"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["h3"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["h4"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["h5"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["h6"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["head"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["header"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["hgroup"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["hr"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["html"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["i"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["ins"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["kbd"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["li"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["mark"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["nav"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["ol"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["optgroup"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["option"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["p"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["pre"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["q"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["rp"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["rt"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["ruby"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["s"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["samp"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["section"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["select"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["small"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["span"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["strike"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["strong"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["style"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["sub"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["summary"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["sup"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["svg"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["table"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["tbody"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["td"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["textarea"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["tfoot"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["th"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["thead"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["title"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["time"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["tr"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["tt"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["u"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["ul"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["var"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["video"] = struct{}{} + p.setOfElementsAllowedWithoutAttrs["wbr"] = struct{}{} + +} + +// addDefaultSkipElementContent adds the HTML elements that we should skip +// rendering the character content of, if the element itself is not allowed. +// This is all character data that the end user would not normally see. +// i.e. if we exclude a <script> tag then we shouldn't render the JavaScript or +// anything else until we encounter the closing </script> tag. +func (p *Policy) addDefaultSkipElementContent() { + p.init() + + p.setOfElementsToSkipContent["frame"] = struct{}{} + p.setOfElementsToSkipContent["frameset"] = struct{}{} + p.setOfElementsToSkipContent["iframe"] = struct{}{} + p.setOfElementsToSkipContent["noembed"] = struct{}{} + p.setOfElementsToSkipContent["noframes"] = struct{}{} + p.setOfElementsToSkipContent["noscript"] = struct{}{} + p.setOfElementsToSkipContent["nostyle"] = struct{}{} + p.setOfElementsToSkipContent["object"] = struct{}{} + p.setOfElementsToSkipContent["script"] = struct{}{} + p.setOfElementsToSkipContent["style"] = struct{}{} + p.setOfElementsToSkipContent["title"] = struct{}{} +} |