You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

policy.go 29KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865
  1. // Copyright (c) 2014, David Kitchen <david@buro9.com>
  2. //
  3. // All rights reserved.
  4. //
  5. // Redistribution and use in source and binary forms, with or without
  6. // modification, are permitted provided that the following conditions are met:
  7. //
  8. // * Redistributions of source code must retain the above copyright notice, this
  9. // list of conditions and the following disclaimer.
  10. //
  11. // * Redistributions in binary form must reproduce the above copyright notice,
  12. // this list of conditions and the following disclaimer in the documentation
  13. // and/or other materials provided with the distribution.
  14. //
  15. // * Neither the name of the organisation (Microcosm) nor the names of its
  16. // contributors may be used to endorse or promote products derived from
  17. // this software without specific prior written permission.
  18. //
  19. // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  20. // AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  21. // IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
  22. // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
  23. // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  24. // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
  25. // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
  26. // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
  27. // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
  28. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  29. package bluemonday
  30. //TODO sgutzwiller create map of styles to default handlers
  31. //TODO sgutzwiller create handlers for various attributes
  32. import (
  33. "net/url"
  34. "regexp"
  35. "strings"
  36. "github.com/microcosm-cc/bluemonday/css"
  37. )
  38. // Policy encapsulates the allowlist of HTML elements and attributes that will
  39. // be applied to the sanitised HTML.
  40. //
  41. // You should use bluemonday.NewPolicy() to create a blank policy as the
  42. // unexported fields contain maps that need to be initialized.
  43. type Policy struct {
  44. // Declares whether the maps have been initialized, used as a cheap check to
  45. // ensure that those using Policy{} directly won't cause nil pointer
  46. // exceptions
  47. initialized bool
  48. // If true then we add spaces when stripping tags, specifically the closing
  49. // tag is replaced by a space character.
  50. addSpaces bool
  51. // When true, add rel="nofollow" to HTML a, area, and link tags
  52. requireNoFollow bool
  53. // When true, add rel="nofollow" to HTML a, area, and link tags
  54. // Will add for href="http://foo"
  55. // Will skip for href="/foo" or href="foo"
  56. requireNoFollowFullyQualifiedLinks bool
  57. // When true, add rel="noreferrer" to HTML a, area, and link tags
  58. requireNoReferrer bool
  59. // When true, add rel="noreferrer" to HTML a, area, and link tags
  60. // Will add for href="http://foo"
  61. // Will skip for href="/foo" or href="foo"
  62. requireNoReferrerFullyQualifiedLinks bool
  63. // When true, add crossorigin="anonymous" to HTML audio, img, link, script, and video tags
  64. requireCrossOriginAnonymous bool
  65. // When true add target="_blank" to fully qualified links
  66. // Will add for href="http://foo"
  67. // Will skip for href="/foo" or href="foo"
  68. addTargetBlankToFullyQualifiedLinks bool
  69. // When true, URLs must be parseable by "net/url" url.Parse()
  70. requireParseableURLs bool
  71. // When true, u, _ := url.Parse("url"); !u.IsAbs() is permitted
  72. allowRelativeURLs bool
  73. // When true, allow data attributes.
  74. allowDataAttributes bool
  75. // When true, allow comments.
  76. allowComments bool
  77. // map[htmlElementName]map[htmlAttributeName][]attrPolicy
  78. elsAndAttrs map[string]map[string][]attrPolicy
  79. // elsMatchingAndAttrs stores regex based element matches along with attributes
  80. elsMatchingAndAttrs map[*regexp.Regexp]map[string][]attrPolicy
  81. // map[htmlAttributeName][]attrPolicy
  82. globalAttrs map[string][]attrPolicy
  83. // map[htmlElementName]map[cssPropertyName][]stylePolicy
  84. elsAndStyles map[string]map[string][]stylePolicy
  85. // map[regex]map[cssPropertyName][]stylePolicy
  86. elsMatchingAndStyles map[*regexp.Regexp]map[string][]stylePolicy
  87. // map[cssPropertyName][]stylePolicy
  88. globalStyles map[string][]stylePolicy
  89. // If urlPolicy is nil, all URLs with matching schema are allowed.
  90. // Otherwise, only the URLs with matching schema and urlPolicy(url)
  91. // returning true are allowed.
  92. allowURLSchemes map[string][]urlPolicy
  93. // If an element has had all attributes removed as a result of a policy
  94. // being applied, then the element would be removed from the output.
  95. //
  96. // However some elements are valid and have strong layout meaning without
  97. // any attributes, i.e. <table>. To prevent those being removed we maintain
  98. // a list of elements that are allowed to have no attributes and that will
  99. // be maintained in the output HTML.
  100. setOfElementsAllowedWithoutAttrs map[string]struct{}
  101. // If an element has had all attributes removed as a result of a policy
  102. // being applied, then the element would be removed from the output.
  103. //
  104. // However some elements are valid and have strong layout meaning without
  105. // any attributes, i.e. <table>.
  106. //
  107. // In this case, any element matching a regular expression will be accepted without
  108. // attributes added.
  109. setOfElementsMatchingAllowedWithoutAttrs []*regexp.Regexp
  110. setOfElementsToSkipContent map[string]struct{}
  111. // Permits fundamentally unsafe elements.
  112. //
  113. // If false (default) then elements such as `style` and `script` will not be
  114. // permitted even if declared in a policy. These elements when combined with
  115. // untrusted input cannot be safely handled by bluemonday at this point in
  116. // time.
  117. //
  118. // If true then `style` and `script` would be permitted by bluemonday if a
  119. // policy declares them. However this is not recommended under any circumstance
  120. // and can lead to XSS being rendered thus defeating the purpose of using a
  121. // HTML sanitizer.
  122. allowUnsafe bool
  123. }
  124. type attrPolicy struct {
  125. // optional pattern to match, when not nil the regexp needs to match
  126. // otherwise the attribute is removed
  127. regexp *regexp.Regexp
  128. }
  129. type stylePolicy struct {
  130. // handler to validate
  131. handler func(string) bool
  132. // optional pattern to match, when not nil the regexp needs to match
  133. // otherwise the property is removed
  134. regexp *regexp.Regexp
  135. // optional list of allowed property values, for properties which
  136. // have a defined list of allowed values; property will be removed
  137. // if the value is not allowed
  138. enum []string
  139. }
  140. type attrPolicyBuilder struct {
  141. p *Policy
  142. attrNames []string
  143. regexp *regexp.Regexp
  144. allowEmpty bool
  145. }
  146. type stylePolicyBuilder struct {
  147. p *Policy
  148. propertyNames []string
  149. regexp *regexp.Regexp
  150. enum []string
  151. handler func(string) bool
  152. }
  153. type urlPolicy func(url *url.URL) (allowUrl bool)
  154. // init initializes the maps if this has not been done already
  155. func (p *Policy) init() {
  156. if !p.initialized {
  157. p.elsAndAttrs = make(map[string]map[string][]attrPolicy)
  158. p.elsMatchingAndAttrs = make(map[*regexp.Regexp]map[string][]attrPolicy)
  159. p.globalAttrs = make(map[string][]attrPolicy)
  160. p.elsAndStyles = make(map[string]map[string][]stylePolicy)
  161. p.elsMatchingAndStyles = make(map[*regexp.Regexp]map[string][]stylePolicy)
  162. p.globalStyles = make(map[string][]stylePolicy)
  163. p.allowURLSchemes = make(map[string][]urlPolicy)
  164. p.setOfElementsAllowedWithoutAttrs = make(map[string]struct{})
  165. p.setOfElementsToSkipContent = make(map[string]struct{})
  166. p.initialized = true
  167. }
  168. }
  169. // NewPolicy returns a blank policy with nothing allowed or permitted. This
  170. // is the recommended way to start building a policy and you should now use
  171. // AllowAttrs() and/or AllowElements() to construct the allowlist of HTML
  172. // elements and attributes.
  173. func NewPolicy() *Policy {
  174. p := Policy{}
  175. p.addDefaultElementsWithoutAttrs()
  176. p.addDefaultSkipElementContent()
  177. return &p
  178. }
  179. // AllowAttrs takes a range of HTML attribute names and returns an
  180. // attribute policy builder that allows you to specify the pattern and scope of
  181. // the allowed attribute.
  182. //
  183. // The attribute policy is only added to the core policy when either Globally()
  184. // or OnElements(...) are called.
  185. func (p *Policy) AllowAttrs(attrNames ...string) *attrPolicyBuilder {
  186. p.init()
  187. abp := attrPolicyBuilder{
  188. p: p,
  189. allowEmpty: false,
  190. }
  191. for _, attrName := range attrNames {
  192. abp.attrNames = append(abp.attrNames, strings.ToLower(attrName))
  193. }
  194. return &abp
  195. }
  196. // AllowDataAttributes permits all data attributes. We can't specify the name
  197. // of each attribute exactly as they are customized.
  198. //
  199. // NOTE: These values are not sanitized and applications that evaluate or process
  200. // them without checking and verification of the input may be at risk if this option
  201. // is enabled. This is a 'caveat emptor' option and the person enabling this option
  202. // needs to fully understand the potential impact with regards to whatever application
  203. // will be consuming the sanitized HTML afterwards, i.e. if you know you put a link in a
  204. // data attribute and use that to automatically load some new window then you're giving
  205. // the author of a HTML fragment the means to open a malicious destination automatically.
  206. // Use with care!
  207. func (p *Policy) AllowDataAttributes() {
  208. p.allowDataAttributes = true
  209. }
  210. // AllowComments allows comments.
  211. //
  212. // Please note that only one type of comment will be allowed by this, this is the
  213. // the standard HTML comment <!-- --> which includes the use of that to permit
  214. // conditionals as per https://docs.microsoft.com/en-us/previous-versions/windows/internet-explorer/ie-developer/compatibility/ms537512(v=vs.85)?redirectedfrom=MSDN
  215. //
  216. // What is not permitted are CDATA XML comments, as the x/net/html package we depend
  217. // on does not handle this fully and we are not choosing to take on that work:
  218. // https://pkg.go.dev/golang.org/x/net/html#Tokenizer.AllowCDATA . If the x/net/html
  219. // package changes this then these will be considered, otherwise if you AllowComments
  220. // but provide a CDATA comment, then as per the documentation in x/net/html this will
  221. // be treated as a plain HTML comment.
  222. func (p *Policy) AllowComments() {
  223. p.allowComments = true
  224. }
  225. // AllowNoAttrs says that attributes on element are optional.
  226. //
  227. // The attribute policy is only added to the core policy when OnElements(...)
  228. // are called.
  229. func (p *Policy) AllowNoAttrs() *attrPolicyBuilder {
  230. p.init()
  231. abp := attrPolicyBuilder{
  232. p: p,
  233. allowEmpty: true,
  234. }
  235. return &abp
  236. }
  237. // AllowNoAttrs says that attributes on element are optional.
  238. //
  239. // The attribute policy is only added to the core policy when OnElements(...)
  240. // are called.
  241. func (abp *attrPolicyBuilder) AllowNoAttrs() *attrPolicyBuilder {
  242. abp.allowEmpty = true
  243. return abp
  244. }
  245. // Matching allows a regular expression to be applied to a nascent attribute
  246. // policy, and returns the attribute policy.
  247. func (abp *attrPolicyBuilder) Matching(regex *regexp.Regexp) *attrPolicyBuilder {
  248. abp.regexp = regex
  249. return abp
  250. }
  251. // OnElements will bind an attribute policy to a given range of HTML elements
  252. // and return the updated policy
  253. func (abp *attrPolicyBuilder) OnElements(elements ...string) *Policy {
  254. for _, element := range elements {
  255. element = strings.ToLower(element)
  256. for _, attr := range abp.attrNames {
  257. if _, ok := abp.p.elsAndAttrs[element]; !ok {
  258. abp.p.elsAndAttrs[element] = make(map[string][]attrPolicy)
  259. }
  260. ap := attrPolicy{}
  261. if abp.regexp != nil {
  262. ap.regexp = abp.regexp
  263. }
  264. abp.p.elsAndAttrs[element][attr] = append(abp.p.elsAndAttrs[element][attr], ap)
  265. }
  266. if abp.allowEmpty {
  267. abp.p.setOfElementsAllowedWithoutAttrs[element] = struct{}{}
  268. if _, ok := abp.p.elsAndAttrs[element]; !ok {
  269. abp.p.elsAndAttrs[element] = make(map[string][]attrPolicy)
  270. }
  271. }
  272. }
  273. return abp.p
  274. }
  275. // OnElementsMatching will bind an attribute policy to all elements matching a given regex
  276. // and return the updated policy
  277. func (abp *attrPolicyBuilder) OnElementsMatching(regex *regexp.Regexp) *Policy {
  278. for _, attr := range abp.attrNames {
  279. if _, ok := abp.p.elsMatchingAndAttrs[regex]; !ok {
  280. abp.p.elsMatchingAndAttrs[regex] = make(map[string][]attrPolicy)
  281. }
  282. ap := attrPolicy{}
  283. if abp.regexp != nil {
  284. ap.regexp = abp.regexp
  285. }
  286. abp.p.elsMatchingAndAttrs[regex][attr] = append(abp.p.elsMatchingAndAttrs[regex][attr], ap)
  287. }
  288. if abp.allowEmpty {
  289. abp.p.setOfElementsMatchingAllowedWithoutAttrs = append(abp.p.setOfElementsMatchingAllowedWithoutAttrs, regex)
  290. if _, ok := abp.p.elsMatchingAndAttrs[regex]; !ok {
  291. abp.p.elsMatchingAndAttrs[regex] = make(map[string][]attrPolicy)
  292. }
  293. }
  294. return abp.p
  295. }
  296. // Globally will bind an attribute policy to all HTML elements and return the
  297. // updated policy
  298. func (abp *attrPolicyBuilder) Globally() *Policy {
  299. for _, attr := range abp.attrNames {
  300. if _, ok := abp.p.globalAttrs[attr]; !ok {
  301. abp.p.globalAttrs[attr] = []attrPolicy{}
  302. }
  303. ap := attrPolicy{}
  304. if abp.regexp != nil {
  305. ap.regexp = abp.regexp
  306. }
  307. abp.p.globalAttrs[attr] = append(abp.p.globalAttrs[attr], ap)
  308. }
  309. return abp.p
  310. }
  311. // AllowStyles takes a range of CSS property names and returns a
  312. // style policy builder that allows you to specify the pattern and scope of
  313. // the allowed property.
  314. //
  315. // The style policy is only added to the core policy when either Globally()
  316. // or OnElements(...) are called.
  317. func (p *Policy) AllowStyles(propertyNames ...string) *stylePolicyBuilder {
  318. p.init()
  319. abp := stylePolicyBuilder{
  320. p: p,
  321. }
  322. for _, propertyName := range propertyNames {
  323. abp.propertyNames = append(abp.propertyNames, strings.ToLower(propertyName))
  324. }
  325. return &abp
  326. }
  327. // Matching allows a regular expression to be applied to a nascent style
  328. // policy, and returns the style policy.
  329. func (spb *stylePolicyBuilder) Matching(regex *regexp.Regexp) *stylePolicyBuilder {
  330. spb.regexp = regex
  331. return spb
  332. }
  333. // MatchingEnum allows a list of allowed values to be applied to a nascent style
  334. // policy, and returns the style policy.
  335. func (spb *stylePolicyBuilder) MatchingEnum(enum ...string) *stylePolicyBuilder {
  336. spb.enum = enum
  337. return spb
  338. }
  339. // MatchingHandler allows a handler to be applied to a nascent style
  340. // policy, and returns the style policy.
  341. func (spb *stylePolicyBuilder) MatchingHandler(handler func(string) bool) *stylePolicyBuilder {
  342. spb.handler = handler
  343. return spb
  344. }
  345. // OnElements will bind a style policy to a given range of HTML elements
  346. // and return the updated policy
  347. func (spb *stylePolicyBuilder) OnElements(elements ...string) *Policy {
  348. for _, element := range elements {
  349. element = strings.ToLower(element)
  350. for _, attr := range spb.propertyNames {
  351. if _, ok := spb.p.elsAndStyles[element]; !ok {
  352. spb.p.elsAndStyles[element] = make(map[string][]stylePolicy)
  353. }
  354. sp := stylePolicy{}
  355. if spb.handler != nil {
  356. sp.handler = spb.handler
  357. } else if len(spb.enum) > 0 {
  358. sp.enum = spb.enum
  359. } else if spb.regexp != nil {
  360. sp.regexp = spb.regexp
  361. } else {
  362. sp.handler = css.GetDefaultHandler(attr)
  363. }
  364. spb.p.elsAndStyles[element][attr] = append(spb.p.elsAndStyles[element][attr], sp)
  365. }
  366. }
  367. return spb.p
  368. }
  369. // OnElementsMatching will bind a style policy to any HTML elements matching the pattern
  370. // and return the updated policy
  371. func (spb *stylePolicyBuilder) OnElementsMatching(regex *regexp.Regexp) *Policy {
  372. for _, attr := range spb.propertyNames {
  373. if _, ok := spb.p.elsMatchingAndStyles[regex]; !ok {
  374. spb.p.elsMatchingAndStyles[regex] = make(map[string][]stylePolicy)
  375. }
  376. sp := stylePolicy{}
  377. if spb.handler != nil {
  378. sp.handler = spb.handler
  379. } else if len(spb.enum) > 0 {
  380. sp.enum = spb.enum
  381. } else if spb.regexp != nil {
  382. sp.regexp = spb.regexp
  383. } else {
  384. sp.handler = css.GetDefaultHandler(attr)
  385. }
  386. spb.p.elsMatchingAndStyles[regex][attr] = append(spb.p.elsMatchingAndStyles[regex][attr], sp)
  387. }
  388. return spb.p
  389. }
  390. // Globally will bind a style policy to all HTML elements and return the
  391. // updated policy
  392. func (spb *stylePolicyBuilder) Globally() *Policy {
  393. for _, attr := range spb.propertyNames {
  394. if _, ok := spb.p.globalStyles[attr]; !ok {
  395. spb.p.globalStyles[attr] = []stylePolicy{}
  396. }
  397. // Use only one strategy for validating styles, fallback to default
  398. sp := stylePolicy{}
  399. if spb.handler != nil {
  400. sp.handler = spb.handler
  401. } else if len(spb.enum) > 0 {
  402. sp.enum = spb.enum
  403. } else if spb.regexp != nil {
  404. sp.regexp = spb.regexp
  405. } else {
  406. sp.handler = css.GetDefaultHandler(attr)
  407. }
  408. spb.p.globalStyles[attr] = append(spb.p.globalStyles[attr], sp)
  409. }
  410. return spb.p
  411. }
  412. // AllowElements will append HTML elements to the allowlist without applying an
  413. // attribute policy to those elements (the elements are permitted
  414. // sans-attributes)
  415. func (p *Policy) AllowElements(names ...string) *Policy {
  416. p.init()
  417. for _, element := range names {
  418. element = strings.ToLower(element)
  419. if _, ok := p.elsAndAttrs[element]; !ok {
  420. p.elsAndAttrs[element] = make(map[string][]attrPolicy)
  421. }
  422. }
  423. return p
  424. }
  425. // AllowElementsMatching will append HTML elements to the allowlist if they
  426. // match a regexp.
  427. func (p *Policy) AllowElementsMatching(regex *regexp.Regexp) *Policy {
  428. p.init()
  429. if _, ok := p.elsMatchingAndAttrs[regex]; !ok {
  430. p.elsMatchingAndAttrs[regex] = make(map[string][]attrPolicy)
  431. }
  432. return p
  433. }
  434. // RequireNoFollowOnLinks will result in all a, area, link tags having a
  435. // rel="nofollow"added to them if one does not already exist
  436. //
  437. // Note: This requires p.RequireParseableURLs(true) and will enable it.
  438. func (p *Policy) RequireNoFollowOnLinks(require bool) *Policy {
  439. p.requireNoFollow = require
  440. p.requireParseableURLs = true
  441. return p
  442. }
  443. // RequireNoFollowOnFullyQualifiedLinks will result in all a, area, and link
  444. // tags that point to a non-local destination (i.e. starts with a protocol and
  445. // has a host) having a rel="nofollow" added to them if one does not already
  446. // exist
  447. //
  448. // Note: This requires p.RequireParseableURLs(true) and will enable it.
  449. func (p *Policy) RequireNoFollowOnFullyQualifiedLinks(require bool) *Policy {
  450. p.requireNoFollowFullyQualifiedLinks = require
  451. p.requireParseableURLs = true
  452. return p
  453. }
  454. // RequireNoReferrerOnLinks will result in all a, area, and link tags having a
  455. // rel="noreferrrer" added to them if one does not already exist
  456. //
  457. // Note: This requires p.RequireParseableURLs(true) and will enable it.
  458. func (p *Policy) RequireNoReferrerOnLinks(require bool) *Policy {
  459. p.requireNoReferrer = require
  460. p.requireParseableURLs = true
  461. return p
  462. }
  463. // RequireNoReferrerOnFullyQualifiedLinks will result in all a, area, and link
  464. // tags that point to a non-local destination (i.e. starts with a protocol and
  465. // has a host) having a rel="noreferrer" added to them if one does not already
  466. // exist
  467. //
  468. // Note: This requires p.RequireParseableURLs(true) and will enable it.
  469. func (p *Policy) RequireNoReferrerOnFullyQualifiedLinks(require bool) *Policy {
  470. p.requireNoReferrerFullyQualifiedLinks = require
  471. p.requireParseableURLs = true
  472. return p
  473. }
  474. // RequireCrossOriginAnonymous will result in all audio, img, link, script, and
  475. // video tags having a crossorigin="anonymous" added to them if one does not
  476. // already exist
  477. func (p *Policy) RequireCrossOriginAnonymous(require bool) *Policy {
  478. p.requireCrossOriginAnonymous = require
  479. return p
  480. }
  481. // AddTargetBlankToFullyQualifiedLinks will result in all a, area and link tags
  482. // that point to a non-local destination (i.e. starts with a protocol and has a
  483. // host) having a target="_blank" added to them if one does not already exist
  484. //
  485. // Note: This requires p.RequireParseableURLs(true) and will enable it.
  486. func (p *Policy) AddTargetBlankToFullyQualifiedLinks(require bool) *Policy {
  487. p.addTargetBlankToFullyQualifiedLinks = require
  488. p.requireParseableURLs = true
  489. return p
  490. }
  491. // RequireParseableURLs will result in all URLs requiring that they be parseable
  492. // by "net/url" url.Parse()
  493. // This applies to:
  494. // - a.href
  495. // - area.href
  496. // - blockquote.cite
  497. // - img.src
  498. // - link.href
  499. // - script.src
  500. func (p *Policy) RequireParseableURLs(require bool) *Policy {
  501. p.requireParseableURLs = require
  502. return p
  503. }
  504. // AllowRelativeURLs enables RequireParseableURLs and then permits URLs that
  505. // are parseable, have no schema information and url.IsAbs() returns false
  506. // This permits local URLs
  507. func (p *Policy) AllowRelativeURLs(require bool) *Policy {
  508. p.RequireParseableURLs(true)
  509. p.allowRelativeURLs = require
  510. return p
  511. }
  512. // AllowURLSchemes will append URL schemes to the allowlist
  513. // Example: p.AllowURLSchemes("mailto", "http", "https")
  514. func (p *Policy) AllowURLSchemes(schemes ...string) *Policy {
  515. p.init()
  516. p.RequireParseableURLs(true)
  517. for _, scheme := range schemes {
  518. scheme = strings.ToLower(scheme)
  519. // Allow all URLs with matching scheme.
  520. p.allowURLSchemes[scheme] = nil
  521. }
  522. return p
  523. }
  524. // AllowURLSchemeWithCustomPolicy will append URL schemes with
  525. // a custom URL policy to the allowlist.
  526. // Only the URLs with matching schema and urlPolicy(url)
  527. // returning true will be allowed.
  528. func (p *Policy) AllowURLSchemeWithCustomPolicy(
  529. scheme string,
  530. urlPolicy func(url *url.URL) (allowUrl bool),
  531. ) *Policy {
  532. p.init()
  533. p.RequireParseableURLs(true)
  534. scheme = strings.ToLower(scheme)
  535. p.allowURLSchemes[scheme] = append(p.allowURLSchemes[scheme], urlPolicy)
  536. return p
  537. }
  538. // AddSpaceWhenStrippingTag states whether to add a single space " " when
  539. // removing tags that are not allowed by the policy.
  540. //
  541. // This is useful if you expect to strip tags in dense markup and may lose the
  542. // value of whitespace.
  543. //
  544. // For example: "<p>Hello</p><p>World</p>"" would be sanitized to "HelloWorld"
  545. // with the default value of false, but you may wish to sanitize this to
  546. // " Hello World " by setting AddSpaceWhenStrippingTag to true as this would
  547. // retain the intent of the text.
  548. func (p *Policy) AddSpaceWhenStrippingTag(allow bool) *Policy {
  549. p.addSpaces = allow
  550. return p
  551. }
  552. // SkipElementsContent adds the HTML elements whose tags is needed to be removed
  553. // with its content.
  554. func (p *Policy) SkipElementsContent(names ...string) *Policy {
  555. p.init()
  556. for _, element := range names {
  557. element = strings.ToLower(element)
  558. if _, ok := p.setOfElementsToSkipContent[element]; !ok {
  559. p.setOfElementsToSkipContent[element] = struct{}{}
  560. }
  561. }
  562. return p
  563. }
  564. // AllowElementsContent marks the HTML elements whose content should be
  565. // retained after removing the tag.
  566. func (p *Policy) AllowElementsContent(names ...string) *Policy {
  567. p.init()
  568. for _, element := range names {
  569. delete(p.setOfElementsToSkipContent, strings.ToLower(element))
  570. }
  571. return p
  572. }
  573. // AllowUnsafe permits fundamentally unsafe elements.
  574. //
  575. // If false (default) then elements such as `style` and `script` will not be
  576. // permitted even if declared in a policy. These elements when combined with
  577. // untrusted input cannot be safely handled by bluemonday at this point in
  578. // time.
  579. //
  580. // If true then `style` and `script` would be permitted by bluemonday if a
  581. // policy declares them. However this is not recommended under any circumstance
  582. // and can lead to XSS being rendered thus defeating the purpose of using a
  583. // HTML sanitizer.
  584. func (p *Policy) AllowUnsafe(allowUnsafe bool) *Policy {
  585. p.init()
  586. p.allowUnsafe = allowUnsafe
  587. return p
  588. }
  589. // addDefaultElementsWithoutAttrs adds the HTML elements that we know are valid
  590. // without any attributes to an internal map.
  591. // i.e. we know that <table> is valid, but <bdo> isn't valid as the "dir" attr
  592. // is mandatory
  593. func (p *Policy) addDefaultElementsWithoutAttrs() {
  594. p.init()
  595. p.setOfElementsAllowedWithoutAttrs["abbr"] = struct{}{}
  596. p.setOfElementsAllowedWithoutAttrs["acronym"] = struct{}{}
  597. p.setOfElementsAllowedWithoutAttrs["address"] = struct{}{}
  598. p.setOfElementsAllowedWithoutAttrs["article"] = struct{}{}
  599. p.setOfElementsAllowedWithoutAttrs["aside"] = struct{}{}
  600. p.setOfElementsAllowedWithoutAttrs["audio"] = struct{}{}
  601. p.setOfElementsAllowedWithoutAttrs["b"] = struct{}{}
  602. p.setOfElementsAllowedWithoutAttrs["bdi"] = struct{}{}
  603. p.setOfElementsAllowedWithoutAttrs["blockquote"] = struct{}{}
  604. p.setOfElementsAllowedWithoutAttrs["body"] = struct{}{}
  605. p.setOfElementsAllowedWithoutAttrs["br"] = struct{}{}
  606. p.setOfElementsAllowedWithoutAttrs["button"] = struct{}{}
  607. p.setOfElementsAllowedWithoutAttrs["canvas"] = struct{}{}
  608. p.setOfElementsAllowedWithoutAttrs["caption"] = struct{}{}
  609. p.setOfElementsAllowedWithoutAttrs["center"] = struct{}{}
  610. p.setOfElementsAllowedWithoutAttrs["cite"] = struct{}{}
  611. p.setOfElementsAllowedWithoutAttrs["code"] = struct{}{}
  612. p.setOfElementsAllowedWithoutAttrs["col"] = struct{}{}
  613. p.setOfElementsAllowedWithoutAttrs["colgroup"] = struct{}{}
  614. p.setOfElementsAllowedWithoutAttrs["datalist"] = struct{}{}
  615. p.setOfElementsAllowedWithoutAttrs["dd"] = struct{}{}
  616. p.setOfElementsAllowedWithoutAttrs["del"] = struct{}{}
  617. p.setOfElementsAllowedWithoutAttrs["details"] = struct{}{}
  618. p.setOfElementsAllowedWithoutAttrs["dfn"] = struct{}{}
  619. p.setOfElementsAllowedWithoutAttrs["div"] = struct{}{}
  620. p.setOfElementsAllowedWithoutAttrs["dl"] = struct{}{}
  621. p.setOfElementsAllowedWithoutAttrs["dt"] = struct{}{}
  622. p.setOfElementsAllowedWithoutAttrs["em"] = struct{}{}
  623. p.setOfElementsAllowedWithoutAttrs["fieldset"] = struct{}{}
  624. p.setOfElementsAllowedWithoutAttrs["figcaption"] = struct{}{}
  625. p.setOfElementsAllowedWithoutAttrs["figure"] = struct{}{}
  626. p.setOfElementsAllowedWithoutAttrs["footer"] = struct{}{}
  627. p.setOfElementsAllowedWithoutAttrs["h1"] = struct{}{}
  628. p.setOfElementsAllowedWithoutAttrs["h2"] = struct{}{}
  629. p.setOfElementsAllowedWithoutAttrs["h3"] = struct{}{}
  630. p.setOfElementsAllowedWithoutAttrs["h4"] = struct{}{}
  631. p.setOfElementsAllowedWithoutAttrs["h5"] = struct{}{}
  632. p.setOfElementsAllowedWithoutAttrs["h6"] = struct{}{}
  633. p.setOfElementsAllowedWithoutAttrs["head"] = struct{}{}
  634. p.setOfElementsAllowedWithoutAttrs["header"] = struct{}{}
  635. p.setOfElementsAllowedWithoutAttrs["hgroup"] = struct{}{}
  636. p.setOfElementsAllowedWithoutAttrs["hr"] = struct{}{}
  637. p.setOfElementsAllowedWithoutAttrs["html"] = struct{}{}
  638. p.setOfElementsAllowedWithoutAttrs["i"] = struct{}{}
  639. p.setOfElementsAllowedWithoutAttrs["ins"] = struct{}{}
  640. p.setOfElementsAllowedWithoutAttrs["kbd"] = struct{}{}
  641. p.setOfElementsAllowedWithoutAttrs["li"] = struct{}{}
  642. p.setOfElementsAllowedWithoutAttrs["mark"] = struct{}{}
  643. p.setOfElementsAllowedWithoutAttrs["marquee"] = struct{}{}
  644. p.setOfElementsAllowedWithoutAttrs["nav"] = struct{}{}
  645. p.setOfElementsAllowedWithoutAttrs["ol"] = struct{}{}
  646. p.setOfElementsAllowedWithoutAttrs["optgroup"] = struct{}{}
  647. p.setOfElementsAllowedWithoutAttrs["option"] = struct{}{}
  648. p.setOfElementsAllowedWithoutAttrs["p"] = struct{}{}
  649. p.setOfElementsAllowedWithoutAttrs["pre"] = struct{}{}
  650. p.setOfElementsAllowedWithoutAttrs["q"] = struct{}{}
  651. p.setOfElementsAllowedWithoutAttrs["rp"] = struct{}{}
  652. p.setOfElementsAllowedWithoutAttrs["rt"] = struct{}{}
  653. p.setOfElementsAllowedWithoutAttrs["ruby"] = struct{}{}
  654. p.setOfElementsAllowedWithoutAttrs["s"] = struct{}{}
  655. p.setOfElementsAllowedWithoutAttrs["samp"] = struct{}{}
  656. p.setOfElementsAllowedWithoutAttrs["script"] = struct{}{}
  657. p.setOfElementsAllowedWithoutAttrs["section"] = struct{}{}
  658. p.setOfElementsAllowedWithoutAttrs["select"] = struct{}{}
  659. p.setOfElementsAllowedWithoutAttrs["small"] = struct{}{}
  660. p.setOfElementsAllowedWithoutAttrs["span"] = struct{}{}
  661. p.setOfElementsAllowedWithoutAttrs["strike"] = struct{}{}
  662. p.setOfElementsAllowedWithoutAttrs["strong"] = struct{}{}
  663. p.setOfElementsAllowedWithoutAttrs["style"] = struct{}{}
  664. p.setOfElementsAllowedWithoutAttrs["sub"] = struct{}{}
  665. p.setOfElementsAllowedWithoutAttrs["summary"] = struct{}{}
  666. p.setOfElementsAllowedWithoutAttrs["sup"] = struct{}{}
  667. p.setOfElementsAllowedWithoutAttrs["svg"] = struct{}{}
  668. p.setOfElementsAllowedWithoutAttrs["table"] = struct{}{}
  669. p.setOfElementsAllowedWithoutAttrs["tbody"] = struct{}{}
  670. p.setOfElementsAllowedWithoutAttrs["td"] = struct{}{}
  671. p.setOfElementsAllowedWithoutAttrs["textarea"] = struct{}{}
  672. p.setOfElementsAllowedWithoutAttrs["tfoot"] = struct{}{}
  673. p.setOfElementsAllowedWithoutAttrs["th"] = struct{}{}
  674. p.setOfElementsAllowedWithoutAttrs["thead"] = struct{}{}
  675. p.setOfElementsAllowedWithoutAttrs["title"] = struct{}{}
  676. p.setOfElementsAllowedWithoutAttrs["time"] = struct{}{}
  677. p.setOfElementsAllowedWithoutAttrs["tr"] = struct{}{}
  678. p.setOfElementsAllowedWithoutAttrs["tt"] = struct{}{}
  679. p.setOfElementsAllowedWithoutAttrs["u"] = struct{}{}
  680. p.setOfElementsAllowedWithoutAttrs["ul"] = struct{}{}
  681. p.setOfElementsAllowedWithoutAttrs["var"] = struct{}{}
  682. p.setOfElementsAllowedWithoutAttrs["video"] = struct{}{}
  683. p.setOfElementsAllowedWithoutAttrs["wbr"] = struct{}{}
  684. }
  685. // addDefaultSkipElementContent adds the HTML elements that we should skip
  686. // rendering the character content of, if the element itself is not allowed.
  687. // This is all character data that the end user would not normally see.
  688. // i.e. if we exclude a <script> tag then we shouldn't render the JavaScript or
  689. // anything else until we encounter the closing </script> tag.
  690. func (p *Policy) addDefaultSkipElementContent() {
  691. p.init()
  692. p.setOfElementsToSkipContent["frame"] = struct{}{}
  693. p.setOfElementsToSkipContent["frameset"] = struct{}{}
  694. p.setOfElementsToSkipContent["iframe"] = struct{}{}
  695. p.setOfElementsToSkipContent["noembed"] = struct{}{}
  696. p.setOfElementsToSkipContent["noframes"] = struct{}{}
  697. p.setOfElementsToSkipContent["noscript"] = struct{}{}
  698. p.setOfElementsToSkipContent["nostyle"] = struct{}{}
  699. p.setOfElementsToSkipContent["object"] = struct{}{}
  700. p.setOfElementsToSkipContent["script"] = struct{}{}
  701. p.setOfElementsToSkipContent["style"] = struct{}{}
  702. p.setOfElementsToSkipContent["title"] = struct{}{}
  703. }