diff options
Diffstat (limited to 'vendor/github.com/microcosm-cc/bluemonday/helpers.go')
-rw-r--r-- | vendor/github.com/microcosm-cc/bluemonday/helpers.go | 297 |
1 files changed, 297 insertions, 0 deletions
diff --git a/vendor/github.com/microcosm-cc/bluemonday/helpers.go b/vendor/github.com/microcosm-cc/bluemonday/helpers.go new file mode 100644 index 0000000000..dfa5868da8 --- /dev/null +++ b/vendor/github.com/microcosm-cc/bluemonday/helpers.go @@ -0,0 +1,297 @@ +// Copyright (c) 2014, David Kitchen <david@buro9.com> +// +// All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// * Redistributions of source code must retain the above copyright notice, this +// list of conditions and the following disclaimer. +// +// * Redistributions in binary form must reproduce the above copyright notice, +// this list of conditions and the following disclaimer in the documentation +// and/or other materials provided with the distribution. +// +// * Neither the name of the organisation (Microcosm) nor the names of its +// contributors may be used to endorse or promote products derived from +// this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE +// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR +// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER +// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, +// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +package bluemonday + +import ( + "encoding/base64" + "net/url" + "regexp" +) + +// A selection of regular expressions that can be used as .Matching() rules on +// HTML attributes. +var ( + // CellAlign handles the `align` attribute + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-align + CellAlign = regexp.MustCompile(`(?i)^(center|justify|left|right|char)$`) + + // CellVerticalAlign handles the `valign` attribute + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-valign + CellVerticalAlign = regexp.MustCompile(`(?i)^(baseline|bottom|middle|top)$`) + + // Direction handles the `dir` attribute + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/bdo#attr-dir + Direction = regexp.MustCompile(`(?i)^(rtl|ltr)$`) + + // ImageAlign handles the `align` attribute on the `image` tag + // http://www.w3.org/MarkUp/Test/Img/imgtest.html + ImageAlign = regexp.MustCompile( + `(?i)^(left|right|top|texttop|middle|absmiddle|baseline|bottom|absbottom)$`, + ) + + // Integer describes whole positive integers (including 0) used in places + // like td.colspan + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/td#attr-colspan + Integer = regexp.MustCompile(`^[0-9]+$`) + + // ISO8601 according to the W3 group is only a subset of the ISO8601 + // standard: http://www.w3.org/TR/NOTE-datetime + // + // Used in places like time.datetime + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/time#attr-datetime + // + // Matches patterns: + // Year: + // YYYY (eg 1997) + // Year and month: + // YYYY-MM (eg 1997-07) + // Complete date: + // YYYY-MM-DD (eg 1997-07-16) + // Complete date plus hours and minutes: + // YYYY-MM-DDThh:mmTZD (eg 1997-07-16T19:20+01:00) + // Complete date plus hours, minutes and seconds: + // YYYY-MM-DDThh:mm:ssTZD (eg 1997-07-16T19:20:30+01:00) + // Complete date plus hours, minutes, seconds and a decimal fraction of a + // second + // YYYY-MM-DDThh:mm:ss.sTZD (eg 1997-07-16T19:20:30.45+01:00) + ISO8601 = regexp.MustCompile( + `^[0-9]{4}(-[0-9]{2}(-[0-9]{2}([ T][0-9]{2}(:[0-9]{2}){1,2}(.[0-9]{1,6})` + + `?Z?([\+-][0-9]{2}:[0-9]{2})?)?)?)?$`, + ) + + // ListType encapsulates the common value as well as the latest spec + // values for lists + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/ol#attr-type + ListType = regexp.MustCompile(`(?i)^(circle|disc|square|a|A|i|I|1)$`) + + // SpaceSeparatedTokens is used in places like `a.rel` and the common attribute + // `class` which both contain space delimited lists of data tokens + // http://www.w3.org/TR/html-markup/datatypes.html#common.data.tokens-def + // Regexp: \p{L} matches unicode letters, \p{N} matches unicode numbers + SpaceSeparatedTokens = regexp.MustCompile(`^([\s\p{L}\p{N}_-]+)$`) + + // Number is a double value used on HTML5 meter and progress elements + // http://www.whatwg.org/specs/web-apps/current-work/multipage/the-button-element.html#the-meter-element + Number = regexp.MustCompile(`^[-+]?[0-9]*\.?[0-9]+([eE][-+]?[0-9]+)?$`) + + // NumberOrPercent is used predominantly as units of measurement in width + // and height attributes + // https://developer.mozilla.org/en-US/docs/Web/HTML/Element/img#attr-height + NumberOrPercent = regexp.MustCompile(`^[0-9]+[%]?$`) + + // Paragraph of text in an attribute such as *.'title', img.alt, etc + // https://developer.mozilla.org/en-US/docs/Web/HTML/Global_attributes#attr-title + // Note that we are not allowing chars that could close tags like '>' + Paragraph = regexp.MustCompile(`^[\p{L}\p{N}\s\-_',\[\]!\./\\\(\)]*$`) + + // dataURIImagePrefix is used by AllowDataURIImages to define the acceptable + // prefix of data URIs that contain common web image formats. + // + // This is not exported as it's not useful by itself, and only has value + // within the AllowDataURIImages func + dataURIImagePrefix = regexp.MustCompile( + `^image/(gif|jpeg|png|webp);base64,`, + ) +) + +// AllowStandardURLs is a convenience function that will enable rel="nofollow" +// on "a", "area" and "link" (if you have allowed those elements) and will +// ensure that the URL values are parseable and either relative or belong to the +// "mailto", "http", or "https" schemes +func (p *Policy) AllowStandardURLs() { + // URLs must be parseable by net/url.Parse() + p.RequireParseableURLs(true) + + // !url.IsAbs() is permitted + p.AllowRelativeURLs(true) + + // Most common URL schemes only + p.AllowURLSchemes("mailto", "http", "https") + + // For all anchors we will add rel="nofollow" if it does not already exist + // This applies to "a" "area" "link" + p.RequireNoFollowOnLinks(true) +} + +// AllowStandardAttributes will enable "id", "title" and the language specific +// attributes "dir" and "lang" on all elements that are whitelisted +func (p *Policy) AllowStandardAttributes() { + // "dir" "lang" are permitted as both language attributes affect charsets + // and direction of text. + p.AllowAttrs("dir").Matching(Direction).Globally() + p.AllowAttrs( + "lang", + ).Matching(regexp.MustCompile(`[a-zA-Z]{2,20}`)).Globally() + + // "id" is permitted. This is pretty much as some HTML elements require this + // to work well ("dfn" is an example of a "id" being value) + // This does create a risk that JavaScript and CSS within your web page + // might identify the wrong elements. Ensure that you select things + // accurately + p.AllowAttrs("id").Matching( + regexp.MustCompile(`[a-zA-Z0-9\:\-_\.]+`), + ).Globally() + + // "title" is permitted as it improves accessibility. + p.AllowAttrs("title").Matching(Paragraph).Globally() +} + +// AllowStyling presently enables the class attribute globally. +// +// Note: When bluemonday ships a CSS parser and we can safely sanitise that, +// this will also allow sanitized styling of elements via the style attribute. +func (p *Policy) AllowStyling() { + + // "class" is permitted globally + p.AllowAttrs("class").Matching(SpaceSeparatedTokens).Globally() +} + +// AllowImages enables the img element and some popular attributes. It will also +// ensure that URL values are parseable. This helper does not enable data URI +// images, for that you should also use the AllowDataURIImages() helper. +func (p *Policy) AllowImages() { + + // "img" is permitted + p.AllowAttrs("align").Matching(ImageAlign).OnElements("img") + p.AllowAttrs("alt").Matching(Paragraph).OnElements("img") + p.AllowAttrs("height", "width").Matching(NumberOrPercent).OnElements("img") + + // Standard URLs enabled + p.AllowStandardURLs() + p.AllowAttrs("src").OnElements("img") +} + +// AllowDataURIImages permits the use of inline images defined in RFC2397 +// http://tools.ietf.org/html/rfc2397 +// http://en.wikipedia.org/wiki/Data_URI_scheme +// +// Images must have a mimetype matching: +// image/gif +// image/jpeg +// image/png +// image/webp +// +// NOTE: There is a potential security risk to allowing data URIs and you should +// only permit them on content you already trust. +// http://palizine.plynt.com/issues/2010Oct/bypass-xss-filters/ +// https://capec.mitre.org/data/definitions/244.html +func (p *Policy) AllowDataURIImages() { + + // URLs must be parseable by net/url.Parse() + p.RequireParseableURLs(true) + + // Supply a function to validate images contained within data URI + p.AllowURLSchemeWithCustomPolicy( + "data", + func(url *url.URL) (allowUrl bool) { + if url.RawQuery != "" || url.Fragment != "" { + return false + } + + matched := dataURIImagePrefix.FindString(url.Opaque) + if matched == "" { + return false + } + + _, err := base64.StdEncoding.DecodeString(url.Opaque[len(matched):]) + if err != nil { + return false + } + + return true + }, + ) +} + +// AllowLists will enabled ordered and unordered lists, as well as definition +// lists +func (p *Policy) AllowLists() { + // "ol" "ul" are permitted + p.AllowAttrs("type").Matching(ListType).OnElements("ol", "ul") + + // "li" is permitted + p.AllowAttrs("type").Matching(ListType).OnElements("li") + p.AllowAttrs("value").Matching(Integer).OnElements("li") + + // "dl" "dt" "dd" are permitted + p.AllowElements("dl", "dt", "dd") +} + +// AllowTables will enable a rich set of elements and attributes to describe +// HTML tables +func (p *Policy) AllowTables() { + + // "table" is permitted + p.AllowAttrs("height", "width").Matching(NumberOrPercent).OnElements("table") + p.AllowAttrs("summary").Matching(Paragraph).OnElements("table") + + // "caption" is permitted + p.AllowElements("caption") + + // "col" "colgroup" are permitted + p.AllowAttrs("align").Matching(CellAlign).OnElements("col", "colgroup") + p.AllowAttrs("height", "width").Matching( + NumberOrPercent, + ).OnElements("col", "colgroup") + p.AllowAttrs("span").Matching(Integer).OnElements("colgroup", "col") + p.AllowAttrs("valign").Matching( + CellVerticalAlign, + ).OnElements("col", "colgroup") + + // "thead" "tr" are permitted + p.AllowAttrs("align").Matching(CellAlign).OnElements("thead", "tr") + p.AllowAttrs("valign").Matching(CellVerticalAlign).OnElements("thead", "tr") + + // "td" "th" are permitted + p.AllowAttrs("abbr").Matching(Paragraph).OnElements("td", "th") + p.AllowAttrs("align").Matching(CellAlign).OnElements("td", "th") + p.AllowAttrs("colspan", "rowspan").Matching(Integer).OnElements("td", "th") + p.AllowAttrs("headers").Matching( + SpaceSeparatedTokens, + ).OnElements("td", "th") + p.AllowAttrs("height", "width").Matching( + NumberOrPercent, + ).OnElements("td", "th") + p.AllowAttrs( + "scope", + ).Matching( + regexp.MustCompile(`(?i)(?:row|col)(?:group)?`), + ).OnElements("td", "th") + p.AllowAttrs("valign").Matching(CellVerticalAlign).OnElements("td", "th") + p.AllowAttrs("nowrap").Matching( + regexp.MustCompile(`(?i)|nowrap`), + ).OnElements("td", "th") + + // "tbody" "tfoot" + p.AllowAttrs("align").Matching(CellAlign).OnElements("tbody", "tfoot") + p.AllowAttrs("valign").Matching( + CellVerticalAlign, + ).OnElements("tbody", "tfoot") +} |