From 3835ddee4c596fc0788b1256006c16b428100431 Mon Sep 17 00:00:00 2001 From: Mark Smith Date: Wed, 9 Oct 2019 22:11:22 +0100 Subject: [PATCH 1/6] WIP, adding support for regex matching elements --- .idea/.gitignore | 2 + .idea/bluemonday.iml | 9 +++++ .idea/misc.xml | 6 +++ .idea/modules.xml | 8 ++++ .idea/vcs.xml | 6 +++ go.mod | 2 +- policy.go | 46 +++++++++++++++++++++++ policy_test.go | 47 +++++++++++++++++++++++- sanitize.go | 87 ++++++++++++++++++++++++++++++++++++-------- 9 files changed, 195 insertions(+), 18 deletions(-) create mode 100644 .idea/.gitignore create mode 100644 .idea/bluemonday.iml create mode 100644 .idea/misc.xml create mode 100644 .idea/modules.xml create mode 100644 .idea/vcs.xml diff --git a/.idea/.gitignore b/.idea/.gitignore new file mode 100644 index 0000000..5c98b42 --- /dev/null +++ b/.idea/.gitignore @@ -0,0 +1,2 @@ +# Default ignored files +/workspace.xml \ No newline at end of file diff --git a/.idea/bluemonday.iml b/.idea/bluemonday.iml new file mode 100644 index 0000000..5e764c4 --- /dev/null +++ b/.idea/bluemonday.iml @@ -0,0 +1,9 @@ + + + + + + + + + \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml new file mode 100644 index 0000000..28a804d --- /dev/null +++ b/.idea/misc.xml @@ -0,0 +1,6 @@ + + + + + \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml new file mode 100644 index 0000000..db9d351 --- /dev/null +++ b/.idea/modules.xml @@ -0,0 +1,8 @@ + + + + + + + + \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml new file mode 100644 index 0000000..94a25f7 --- /dev/null +++ b/.idea/vcs.xml @@ -0,0 +1,6 @@ + + + + + + \ No newline at end of file diff --git a/go.mod b/go.mod index 69940a1..47b521a 100644 --- a/go.mod +++ b/go.mod @@ -3,7 +3,7 @@ module github.com/microcosm-cc/bluemonday go 1.9 require ( - github.com/aymerick/douceur v0.2.0 + github.com/aymerick/douceur v0.2.0 // indirect github.com/chris-ramon/douceur v0.2.0 github.com/gorilla/css v1.0.0 // indirect golang.org/x/net v0.0.0-20181220203305-927f97764cc3 diff --git a/policy.go b/policy.go index 17d3974..cceeb2d 100644 --- a/policy.go +++ b/policy.go @@ -86,6 +86,9 @@ type Policy struct { // map[htmlElementName]map[htmlAttributeName]attrPolicy elsAndAttrs map[string]map[string]attrPolicy + // elsMatchingAndAttrs stores regex based element matches along with attributes + elsMatchingAndAttrs map[*regexp.Regexp]map[string]attrPolicy + // map[htmlAttributeName]attrPolicy globalAttrs map[string]attrPolicy @@ -109,6 +112,16 @@ type Policy struct { // be maintained in the output HTML. setOfElementsAllowedWithoutAttrs map[string]struct{} + // If an element has had all attributes removed as a result of a policy + // being applied, then the element would be removed from the output. + // + // However some elements are valid and have strong layout meaning without + // any attributes, i.e. . + // + // In this case, any element matching a regular expression will be accepted without + // attributes added. + setOfElementsMatchingAllowedWithoutAttrs []*regexp.Regexp + setOfElementsToSkipContent map[string]struct{} } @@ -156,6 +169,7 @@ type urlPolicy func(url *url.URL) (allowUrl bool) func (p *Policy) init() { if !p.initialized { p.elsAndAttrs = make(map[string]map[string]attrPolicy) + p.elsMatchingAndAttrs = make(map[*regexp.Regexp]map[string]attrPolicy) p.globalAttrs = make(map[string]attrPolicy) p.elsAndStyles = make(map[string]map[string]stylePolicy) p.globalStyles = make(map[string]stylePolicy) @@ -286,6 +300,30 @@ func (abp *attrPolicyBuilder) OnElements(elements ...string) *Policy { return abp.p } +// OnElementsMatching will bind an attribute policy to all elements matching a given regex +// and return the updated policy +func (abp *attrPolicyBuilder) OnElementsMatching(regex *regexp.Regexp) *Policy { + for _, attr := range abp.attrNames { + if _, ok := abp.p.elsMatchingAndAttrs[regex]; !ok { + abp.p.elsMatchingAndAttrs[regex] = make(map[string]attrPolicy) + } + ap := attrPolicy{} + if abp.regexp != nil { + ap.regexp = abp.regexp + } + abp.p.elsMatchingAndAttrs[regex][attr] = ap + } + + if abp.allowEmpty { + abp.p.setOfElementsMatchingAllowedWithoutAttrs = append(abp.p.setOfElementsMatchingAllowedWithoutAttrs,regex) + if _, ok := abp.p.elsMatchingAndAttrs[regex]; !ok { + abp.p.elsMatchingAndAttrs[regex] = make(map[string]attrPolicy) + } + } + + return abp.p +} + // Globally will bind an attribute policy to all HTML elements and return the // updated policy func (abp *attrPolicyBuilder) Globally() *Policy { @@ -430,6 +468,14 @@ func (p *Policy) AllowElements(names ...string) *Policy { return p } +func (p *Policy) AllowElementsMatching(regex *regexp.Regexp) *Policy { + p.init() + if _, ok := p.elsMatchingAndAttrs[regex]; !ok { + p.elsMatchingAndAttrs[regex] = make(map[string]attrPolicy) + } + return p +} + // RequireNoFollowOnLinks will result in all a, area, link tags having a // rel="nofollow"added to them if one does not already exist // diff --git a/policy_test.go b/policy_test.go index ee9e2be..681e958 100644 --- a/policy_test.go +++ b/policy_test.go @@ -29,7 +29,10 @@ package bluemonday -import "testing" +import ( + "regexp" + "testing" +) func TestAllowElementsContent(t *testing.T) { policy := NewPolicy().AllowElementsContent("iframe", "script") @@ -58,3 +61,45 @@ func TestAllowElementsContent(t *testing.T) { } } } + +func TestElementsMatching(t *testing.T) { + tests := map[string]struct{ + regexs []*regexp.Regexp + in string + expected string + }{ + "Self closing tags with regex prefix should strip any that do not match":{ + regexs: []*regexp.Regexp{ + regexp.MustCompile(`^my-element-`), + }, + in:`
`, + expected:`
`, + },"Standard elements regex prefix should strip any that do not match":{ + regexs: []*regexp.Regexp{ + regexp.MustCompile(`^my-element-`), + }, + in:`
`, + expected:`
`, + }, + } + + for name, test := range tests { + policy := NewPolicy().AllowElements("div") + policy.AllowNoAttrs().OnElementsMatching(test.regexs[0]) + policy.AllowDataAttributes() + for _, regex := range test.regexs{ + policy.AllowElementsMatching(regex) + } + out := policy.Sanitize(test.in) + if out != test.expected { + t.Errorf( + "test %s failed;\ninput : %s\noutput : %s\nexpected: %s", + name, + test.in, + out, + test.expected, + ) + } + } +} + diff --git a/sanitize.go b/sanitize.go index 0715499..6ab8c28 100644 --- a/sanitize.go +++ b/sanitize.go @@ -31,6 +31,7 @@ package bluemonday import ( "bytes" + "fmt" "io" "net/url" "regexp" @@ -233,14 +234,34 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer { aps, ok := p.elsAndAttrs[token.Data] if !ok { - if _, ok := p.setOfElementsToSkipContent[token.Data]; ok { - skipElementContent = true - skippingElementsCount++ + fmt.Println("Not OKAY") + // check if we have any regex that match the element + if aps == nil{ + aps = make(map[string]attrPolicy,0) } - if p.addSpaces { - buff.WriteString(" ") + matched := false + for regex, attrs := range p.elsMatchingAndAttrs{ + fmt.Println("Start tag Iterating Regexps") + if regex.MatchString(token.Data){ + matched = true + fmt.Println("Start tag Matched and appending") + // append matching attrs on as could have multiple depending on match + for k, v := range attrs{ + aps[k] = v + } + } } - break + if !matched { + if _, ok := p.setOfElementsToSkipContent[token.Data]; ok { + skipElementContent = true + skippingElementsCount++ + } + if p.addSpaces { + buff.WriteString(" ") + } + break + } + } if len(token.Attr) != 0 { token.Attr = p.sanitizeAttrs(token.Data, token.Attr, aps) @@ -282,18 +303,28 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer { } break } - +fmt.Println("End tag") if _, ok := p.elsAndAttrs[token.Data]; !ok { - if _, ok := p.setOfElementsToSkipContent[token.Data]; ok { + match := false + for regex, _ := range p.elsMatchingAndAttrs{ + if regex.MatchString(token.Data) { + skipElementContent = false + match = true + break + } + } + if _, ok := p.setOfElementsToSkipContent[token.Data]; ok && !match { skippingElementsCount-- if skippingElementsCount == 0 { skipElementContent = false } } - if p.addSpaces { - buff.WriteString(" ") + if !match{ + if p.addSpaces { + buff.WriteString(" ") + } + break } - break } if !skipElementContent { @@ -304,10 +335,27 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer { aps, ok := p.elsAndAttrs[token.Data] if !ok { - if p.addSpaces { - buff.WriteString(" ") + if aps == nil{ + aps = make(map[string]attrPolicy,0) + } + matched := false + for regex, attrs := range p.elsMatchingAndAttrs{ + fmt.Println("Self Close Iterating Regexps") + if regex.MatchString(token.Data){ + matched = true + fmt.Println("Self Close Matched and appending") + // append matching attrs on as could have multiple depending on match + for k, v := range attrs{ + aps[k] = v + } + } + } + if !matched{ + if p.addSpaces && !matched { + buff.WriteString(" ") + } + break } - break } if len(token.Attr) != 0 { @@ -317,10 +365,9 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer { if len(token.Attr) == 0 && !p.allowNoAttrs(token.Data) { if p.addSpaces { buff.WriteString(" ") + break } - break } - if !skipElementContent { // do not escape multiple query parameters if linkable(token.Data) { @@ -710,6 +757,14 @@ func (p *Policy) sanitizeStyles(attr html.Attribute, elementName string) html.At func (p *Policy) allowNoAttrs(elementName string) bool { _, ok := p.setOfElementsAllowedWithoutAttrs[elementName] + if !ok{ + for _, r := range p.setOfElementsMatchingAllowedWithoutAttrs{ + if r.MatchString(elementName){ + ok = true + break + } + } + } return ok } From c2768850362436ae4d8c79fa56b97afe12a5aad6 Mon Sep 17 00:00:00 2001 From: Mark Smith Date: Thu, 10 Oct 2019 09:49:18 +0100 Subject: [PATCH 2/6] adding tests and ToElementMatch func --- policy.go | 28 ++++++++++++++-------------- policy_test.go | 21 ++++++++++----------- sanitize.go | 36 ++++++++++++++++++------------------ 3 files changed, 42 insertions(+), 43 deletions(-) diff --git a/policy.go b/policy.go index cceeb2d..acf4166 100644 --- a/policy.go +++ b/policy.go @@ -303,23 +303,23 @@ func (abp *attrPolicyBuilder) OnElements(elements ...string) *Policy { // OnElementsMatching will bind an attribute policy to all elements matching a given regex // and return the updated policy func (abp *attrPolicyBuilder) OnElementsMatching(regex *regexp.Regexp) *Policy { - for _, attr := range abp.attrNames { - if _, ok := abp.p.elsMatchingAndAttrs[regex]; !ok { - abp.p.elsMatchingAndAttrs[regex] = make(map[string]attrPolicy) - } - ap := attrPolicy{} - if abp.regexp != nil { - ap.regexp = abp.regexp - } - abp.p.elsMatchingAndAttrs[regex][attr] = ap + for _, attr := range abp.attrNames { + if _, ok := abp.p.elsMatchingAndAttrs[regex]; !ok { + abp.p.elsMatchingAndAttrs[regex] = make(map[string]attrPolicy) } + ap := attrPolicy{} + if abp.regexp != nil { + ap.regexp = abp.regexp + } + abp.p.elsMatchingAndAttrs[regex][attr] = ap + } - if abp.allowEmpty { - abp.p.setOfElementsMatchingAllowedWithoutAttrs = append(abp.p.setOfElementsMatchingAllowedWithoutAttrs,regex) - if _, ok := abp.p.elsMatchingAndAttrs[regex]; !ok { - abp.p.elsMatchingAndAttrs[regex] = make(map[string]attrPolicy) - } + if abp.allowEmpty { + abp.p.setOfElementsMatchingAllowedWithoutAttrs = append(abp.p.setOfElementsMatchingAllowedWithoutAttrs, regex) + if _, ok := abp.p.elsMatchingAndAttrs[regex]; !ok { + abp.p.elsMatchingAndAttrs[regex] = make(map[string]attrPolicy) } + } return abp.p } diff --git a/policy_test.go b/policy_test.go index 681e958..c9bb1bf 100644 --- a/policy_test.go +++ b/policy_test.go @@ -63,23 +63,23 @@ func TestAllowElementsContent(t *testing.T) { } func TestElementsMatching(t *testing.T) { - tests := map[string]struct{ - regexs []*regexp.Regexp - in string + tests := map[string]struct { + regexs []*regexp.Regexp + in string expected string }{ - "Self closing tags with regex prefix should strip any that do not match":{ + "Self closing tags with regex prefix should strip any that do not match": { regexs: []*regexp.Regexp{ regexp.MustCompile(`^my-element-`), }, - in:`
`, - expected:`
`, - },"Standard elements regex prefix should strip any that do not match":{ + in: `
`, + expected: `
`, + }, "Standard elements regex prefix should strip any that do not match": { regexs: []*regexp.Regexp{ regexp.MustCompile(`^my-element-`), }, - in:`
`, - expected:`
`, + in: `
`, + expected: `
`, }, } @@ -87,7 +87,7 @@ func TestElementsMatching(t *testing.T) { policy := NewPolicy().AllowElements("div") policy.AllowNoAttrs().OnElementsMatching(test.regexs[0]) policy.AllowDataAttributes() - for _, regex := range test.regexs{ + for _, regex := range test.regexs { policy.AllowElementsMatching(regex) } out := policy.Sanitize(test.in) @@ -102,4 +102,3 @@ func TestElementsMatching(t *testing.T) { } } } - diff --git a/sanitize.go b/sanitize.go index 6ab8c28..1c444e6 100644 --- a/sanitize.go +++ b/sanitize.go @@ -236,17 +236,17 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer { if !ok { fmt.Println("Not OKAY") // check if we have any regex that match the element - if aps == nil{ - aps = make(map[string]attrPolicy,0) + if aps == nil { + aps = make(map[string]attrPolicy, 0) } matched := false - for regex, attrs := range p.elsMatchingAndAttrs{ + for regex, attrs := range p.elsMatchingAndAttrs { fmt.Println("Start tag Iterating Regexps") - if regex.MatchString(token.Data){ + if regex.MatchString(token.Data) { matched = true fmt.Println("Start tag Matched and appending") // append matching attrs on as could have multiple depending on match - for k, v := range attrs{ + for k, v := range attrs { aps[k] = v } } @@ -303,23 +303,23 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer { } break } -fmt.Println("End tag") + fmt.Println("End tag") if _, ok := p.elsAndAttrs[token.Data]; !ok { match := false - for regex, _ := range p.elsMatchingAndAttrs{ + for regex := range p.elsMatchingAndAttrs { if regex.MatchString(token.Data) { skipElementContent = false match = true break - } } + } if _, ok := p.setOfElementsToSkipContent[token.Data]; ok && !match { skippingElementsCount-- if skippingElementsCount == 0 { skipElementContent = false } } - if !match{ + if !match { if p.addSpaces { buff.WriteString(" ") } @@ -335,22 +335,22 @@ fmt.Println("End tag") aps, ok := p.elsAndAttrs[token.Data] if !ok { - if aps == nil{ - aps = make(map[string]attrPolicy,0) + if aps == nil { + aps = make(map[string]attrPolicy, 0) } matched := false - for regex, attrs := range p.elsMatchingAndAttrs{ + for regex, attrs := range p.elsMatchingAndAttrs { fmt.Println("Self Close Iterating Regexps") - if regex.MatchString(token.Data){ + if regex.MatchString(token.Data) { matched = true fmt.Println("Self Close Matched and appending") // append matching attrs on as could have multiple depending on match - for k, v := range attrs{ + for k, v := range attrs { aps[k] = v } } } - if !matched{ + if !matched { if p.addSpaces && !matched { buff.WriteString(" ") } @@ -757,9 +757,9 @@ func (p *Policy) sanitizeStyles(attr html.Attribute, elementName string) html.At func (p *Policy) allowNoAttrs(elementName string) bool { _, ok := p.setOfElementsAllowedWithoutAttrs[elementName] - if !ok{ - for _, r := range p.setOfElementsMatchingAllowedWithoutAttrs{ - if r.MatchString(elementName){ + if !ok { + for _, r := range p.setOfElementsMatchingAllowedWithoutAttrs { + if r.MatchString(elementName) { ok = true break } From 356b2f0a6c3bd374e2b817a5c3e1b99ff1cb232e Mon Sep 17 00:00:00 2001 From: Mark Smith Date: Mon, 14 Oct 2019 09:45:06 +0100 Subject: [PATCH 3/6] adding ignore file and more tests --- .gitignore | 15 ++++++ policy_test.go | 127 ++++++++++++++++++++++++++++++++++++++++++++----- 2 files changed, 129 insertions(+), 13 deletions(-) create mode 100644 .gitignore diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..c3df40e --- /dev/null +++ b/.gitignore @@ -0,0 +1,15 @@ + # Binaries for programs and plugins +*.exe +*.exe~ +*.dll +*.so +*.dylib + +# Test binary, built with `go test -c` +*.test + +# Output of the go coverage tool, specifically when used with LiteIDE +*.out + +# goland idea folder +*.idea \ No newline at end of file diff --git a/policy_test.go b/policy_test.go index c9bb1bf..8912174 100644 --- a/policy_test.go +++ b/policy_test.go @@ -62,33 +62,63 @@ func TestAllowElementsContent(t *testing.T) { } } -func TestElementsMatching(t *testing.T) { +func TestAllowElementsMatching(t *testing.T) { tests := map[string]struct { - regexs []*regexp.Regexp + policyFn func(policy *Policy) in string expected string }{ "Self closing tags with regex prefix should strip any that do not match": { - regexs: []*regexp.Regexp{ - regexp.MustCompile(`^my-element-`), + policyFn: func(policy *Policy) { + policy.AllowElementsMatching(regexp.MustCompile(`^my-element-`)) }, - in: `
`, - expected: `
`, + in: `
+ + + +
`, + expected: `
+ + + +
`, }, "Standard elements regex prefix should strip any that do not match": { - regexs: []*regexp.Regexp{ - regexp.MustCompile(`^my-element-`), + policyFn: func(policy *Policy) { + policy.AllowElementsMatching(regexp.MustCompile(`^my-element-`)) }, - in: `
`, - expected: `
`, + in: `
+ + + +
`, + expected: `
+ + + +
`, + },"Self closing tags with regex prefix and custom attr should strip any that do not match": { + policyFn: func(policy *Policy) { + policy.AllowElementsMatching(regexp.MustCompile(`^my-element-`)) + policy.AllowElements("not-my-element-demo-one") + }, + in: `
+ + + +
`, + expected: `
+ + + +
`, }, } for name, test := range tests { policy := NewPolicy().AllowElements("div") - policy.AllowNoAttrs().OnElementsMatching(test.regexs[0]) policy.AllowDataAttributes() - for _, regex := range test.regexs { - policy.AllowElementsMatching(regex) + if test.policyFn != nil{ + test.policyFn(policy) } out := policy.Sanitize(test.in) if out != test.expected { @@ -102,3 +132,74 @@ func TestElementsMatching(t *testing.T) { } } } + +func TestAttrToElementMatching(t *testing.T){ + tests := map[string]struct { + policyFn func(policy *Policy) + in string + expected string + }{ + "Self closing tags with regex prefix should strip any that do not match": { + policyFn: func(policy *Policy) { + policy.AllowElementsMatching(regexp.MustCompile(`^my-element-`)) + }, + in: `
+ + + +
`, + expected: `
+ + + +
`, + }, "Standard elements regex prefix should strip any that do not match": { + policyFn: func(policy *Policy) { + policy.AllowElementsMatching(regexp.MustCompile(`^my-element-`)) + }, + in: `
+ + + +
`, + expected: `
+ + + +
`, + },"Self closing tags with regex prefix and custom attr should strip any that do not match": { + policyFn: func(policy *Policy) { + policy.AllowElementsMatching(regexp.MustCompile(`^my-element-`)) + policy.AllowElements("not-my-element-demo-one") + }, + in: `
+ + + +
`, + expected: `
+ + + +
`, + }, + } + + for name, test := range tests { + policy := NewPolicy().AllowElements("div") + policy.AllowDataAttributes() + if test.policyFn != nil{ + test.policyFn(policy) + } + out := policy.Sanitize(test.in) + if out != test.expected { + t.Errorf( + "test %s failed;\ninput : %s\noutput : %s\nexpected: %s", + name, + test.in, + out, + test.expected, + ) + } + } +} \ No newline at end of file From 6074ff7ee9127a3adc6c8e0da84763046b12484d Mon Sep 17 00:00:00 2001 From: Mark Smith Date: Mon, 14 Oct 2019 09:49:37 +0100 Subject: [PATCH 4/6] remove idea folder --- .idea/.gitignore | 2 -- .idea/bluemonday.iml | 9 --------- .idea/misc.xml | 6 ------ .idea/modules.xml | 8 -------- .idea/vcs.xml | 6 ------ 5 files changed, 31 deletions(-) delete mode 100644 .idea/.gitignore delete mode 100644 .idea/bluemonday.iml delete mode 100644 .idea/misc.xml delete mode 100644 .idea/modules.xml delete mode 100644 .idea/vcs.xml diff --git a/.idea/.gitignore b/.idea/.gitignore deleted file mode 100644 index 5c98b42..0000000 --- a/.idea/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -# Default ignored files -/workspace.xml \ No newline at end of file diff --git a/.idea/bluemonday.iml b/.idea/bluemonday.iml deleted file mode 100644 index 5e764c4..0000000 --- a/.idea/bluemonday.iml +++ /dev/null @@ -1,9 +0,0 @@ - - - - - - - - - \ No newline at end of file diff --git a/.idea/misc.xml b/.idea/misc.xml deleted file mode 100644 index 28a804d..0000000 --- a/.idea/misc.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - \ No newline at end of file diff --git a/.idea/modules.xml b/.idea/modules.xml deleted file mode 100644 index db9d351..0000000 --- a/.idea/modules.xml +++ /dev/null @@ -1,8 +0,0 @@ - - - - - - - - \ No newline at end of file diff --git a/.idea/vcs.xml b/.idea/vcs.xml deleted file mode 100644 index 94a25f7..0000000 --- a/.idea/vcs.xml +++ /dev/null @@ -1,6 +0,0 @@ - - - - - - \ No newline at end of file From 6d3f97351f5a961d45456571eb359047b615117e Mon Sep 17 00:00:00 2001 From: Mark Smith Date: Mon, 14 Oct 2019 14:25:38 +0100 Subject: [PATCH 5/6] added element regex matchin to styles, tests added and readme updated --- README.md | 23 +++++++++ policy.go | 30 ++++++++++++ policy_test.go | 126 ++++++++++++++++++++++++++++++++++++++++++------- sanitize.go | 33 +++++++++---- 4 files changed, 187 insertions(+), 25 deletions(-) diff --git a/README.md b/README.md index ac17b4c..cd2766f 100644 --- a/README.md +++ b/README.md @@ -169,12 +169,26 @@ To add elements to a policy either add just the elements: p.AllowElements("b", "strong") ``` +Or using a regex: + +_Note: if an element is added by name as shown above, any matching regex will be ignored_ + +It is also recommended to ensure multiple patterns don't overlap as order of execution is not guaranteed and can result in some rules being missed. +```go +p.AllowElementsMatching(regex.MustCompile(`^my-element-`)) +``` + Or add elements as a virtue of adding an attribute: ```go // Not the recommended pattern, see the recommendation on using .Matching() below p.AllowAttrs("nowrap").OnElements("td", "th") ``` +Again, this also supports a regex pattern match alternative: +```go +p.AllowAttrs("nowrap").OnElementsMatching(regex.MustCompile(`^my-element-`)) +``` + Attributes can either be added to all elements: ```go p.AllowAttrs("dir").Matching(regexp.MustCompile("(?i)rtl|ltr")).Globally() @@ -226,6 +240,15 @@ p.AllowAttrs("style").OnElements("span", "p") // on 'span' elements only p.AllowStyles("text-decoration").MatchingEnum("underline", "line-through", "none").OnElements("span") ``` + +Or you can specify elements based on a regex patterm match: +```go +p.AllowAttrs("style").OnElementsMatching(regex.MustCompile(`^my-element-`)) +// Allow the 'text-decoration' property to be set to 'underline', 'line-through' or 'none' +// on 'span' elements only +p.AllowStyles("text-decoration").MatchingEnum("underline", "line-through", "none").OnElementsMatching(regex.MustCompile(`^my-element-`)) +``` + If you need more specific checking, you can create a handler that takes in a string and returns a bool to validate the values for a given property. The string parameter has been converted to lowercase and unicode code points have been converted. diff --git a/policy.go b/policy.go index acf4166..739d302 100644 --- a/policy.go +++ b/policy.go @@ -95,6 +95,9 @@ type Policy struct { // map[htmlElementName]map[cssPropertyName]stylePolicy elsAndStyles map[string]map[string]stylePolicy + // map[regex]map[cssPropertyName]stylePolicy + elsMatchingAndStyles map[*regexp.Regexp]map[string]stylePolicy + // map[cssPropertyName]stylePolicy globalStyles map[string]stylePolicy @@ -172,6 +175,7 @@ func (p *Policy) init() { p.elsMatchingAndAttrs = make(map[*regexp.Regexp]map[string]attrPolicy) p.globalAttrs = make(map[string]attrPolicy) p.elsAndStyles = make(map[string]map[string]stylePolicy) + p.elsMatchingAndStyles = make(map[*regexp.Regexp]map[string]stylePolicy) p.globalStyles = make(map[string]stylePolicy) p.allowURLSchemes = make(map[string]urlPolicy) p.setOfElementsAllowedWithoutAttrs = make(map[string]struct{}) @@ -425,6 +429,32 @@ func (spb *stylePolicyBuilder) OnElements(elements ...string) *Policy { return spb.p } +// OnElementsMatching will bind a style policy to any HTML elements matching the pattern +// and return the updated policy +func (spb *stylePolicyBuilder) OnElementsMatching(regex *regexp.Regexp) *Policy { + + for _, attr := range spb.propertyNames { + + if _, ok := spb.p.elsMatchingAndStyles[regex]; !ok { + spb.p.elsMatchingAndStyles[regex] = make(map[string]stylePolicy) + } + + sp := stylePolicy{} + if spb.handler != nil { + sp.handler = spb.handler + } else if len(spb.enum) > 0 { + sp.enum = spb.enum + } else if spb.regexp != nil { + sp.regexp = spb.regexp + } else { + sp.handler = getDefaultHandler(attr) + } + spb.p.elsMatchingAndStyles[regex][attr] = sp + } + + return spb.p +} + // Globally will bind a style policy to all HTML elements and return the // updated policy func (spb *stylePolicyBuilder) Globally() *Policy { diff --git a/policy_test.go b/policy_test.go index 8912174..b352c30 100644 --- a/policy_test.go +++ b/policy_test.go @@ -133,55 +133,149 @@ func TestAllowElementsMatching(t *testing.T) { } } -func TestAttrToElementMatching(t *testing.T){ +func TestAttrOnElementMatching(t *testing.T){ tests := map[string]struct { policyFn func(policy *Policy) in string expected string }{ - "Self closing tags with regex prefix should strip any that do not match": { + "Self closing tags with regex prefix should strip any that do not match with custom attr": { policyFn: func(policy *Policy) { - policy.AllowElementsMatching(regexp.MustCompile(`^my-element-`)) + policy.AllowAttrs("my-attr").OnElementsMatching(regexp.MustCompile(`^my-element-`)) }, in: `
- +
`, expected: `
- +
`, }, "Standard elements regex prefix should strip any that do not match": { policyFn: func(policy *Policy) { - policy.AllowElementsMatching(regexp.MustCompile(`^my-element-`)) + policy.AllowAttrs("my-attr").OnElementsMatching(regexp.MustCompile(`^my-element-`)) }, in: `
- - - + + +
`, expected: `
- +
`, - },"Self closing tags with regex prefix and custom attr should strip any that do not match": { + },"Specific element rule defined should override matching rules": { policyFn: func(policy *Policy) { - policy.AllowElementsMatching(regexp.MustCompile(`^my-element-`)) - policy.AllowElements("not-my-element-demo-one") + // specific element rule + policy.AllowAttrs("my-other-attr").OnElements("my-element-demo-one") + // matched rule takes lower precedence + policy.AllowAttrs("my-attr").OnElementsMatching(regexp.MustCompile(`^my-element-`)) }, in: `
- - + +
`, expected: `
- + + + +
`, + }, + } + + for name, test := range tests { + policy := NewPolicy().AllowElements("div") + policy.AllowDataAttributes() + if test.policyFn != nil{ + test.policyFn(policy) + } + out := policy.Sanitize(test.in) + if out != test.expected { + t.Errorf( + "test %s failed;\ninput : %s\noutput : %s\nexpected: %s", + name, + test.in, + out, + test.expected, + ) + } + } +} + +func TestStyleOnElementMatching(t *testing.T){ + tests := map[string]struct { + policyFn func(policy *Policy) + in string + expected string + }{ + "Self closing tags with style policy matching prefix should strip any that do not match with custom attr": { + policyFn: func(policy *Policy) { + policy.AllowAttrs("style"). + OnElementsMatching(regexp.MustCompile(`^my-element-`)) + policy.AllowStyles("color","mystyle"). + MatchingHandler(func(s string) bool { + return true + }).OnElementsMatching(regexp.MustCompile(`^my-element-`)) + }, + in: `
+ + + +
`, + expected: `
+ + +
`, + }, "Standard elements with style policy and matching elements should strip any styles not allowed": { + policyFn: func(policy *Policy) { + policy.AllowAttrs("style"). + OnElementsMatching(regexp.MustCompile(`^my-element-`)) + policy.AllowStyles("color","mystyle"). + MatchingHandler(func(s string) bool { + return true + }).OnElementsMatching(regexp.MustCompile(`^my-element-`)) + }, + in: `
+ + + +
`, + expected: `
+ + + +
`, + },"Specific element rule defined should override matching rules": { + policyFn: func(policy *Policy) { + policy.AllowAttrs("style"). + OnElements("my-element-demo-one") + policy.AllowStyles("color","mystyle"). + MatchingHandler(func(s string) bool { + return true + }).OnElements("my-element-demo-one") + + policy.AllowAttrs("style"). + OnElementsMatching(regexp.MustCompile(`^my-element-`)) + policy.AllowStyles("color","customstyle"). + MatchingHandler(func(s string) bool { + return true + }).OnElementsMatching(regexp.MustCompile(`^my-element-`)) + }, + in: `
+ +
`, + expected: `
+ + + +
`, }, } diff --git a/sanitize.go b/sanitize.go index 1c444e6..830ec54 100644 --- a/sanitize.go +++ b/sanitize.go @@ -31,7 +31,6 @@ package bluemonday import ( "bytes" - "fmt" "io" "net/url" "regexp" @@ -234,17 +233,14 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer { aps, ok := p.elsAndAttrs[token.Data] if !ok { - fmt.Println("Not OKAY") // check if we have any regex that match the element if aps == nil { aps = make(map[string]attrPolicy, 0) } matched := false for regex, attrs := range p.elsMatchingAndAttrs { - fmt.Println("Start tag Iterating Regexps") if regex.MatchString(token.Data) { matched = true - fmt.Println("Start tag Matched and appending") // append matching attrs on as could have multiple depending on match for k, v := range attrs { aps[k] = v @@ -303,7 +299,6 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer { } break } - fmt.Println("End tag") if _, ok := p.elsAndAttrs[token.Data]; !ok { match := false for regex := range p.elsMatchingAndAttrs { @@ -340,10 +335,8 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer { } matched := false for regex, attrs := range p.elsMatchingAndAttrs { - fmt.Println("Self Close Iterating Regexps") if regex.MatchString(token.Data) { matched = true - fmt.Println("Self Close Matched and appending") // append matching attrs on as could have multiple depending on match for k, v := range attrs { aps[k] = v @@ -420,6 +413,17 @@ func (p *Policy) sanitizeAttrs( if len(p.globalStyles) > 0 || (elementHasStylePolicies && len(sps) > 0) { hasStylePolicies = true } + // no specific element policy found, look for a pattern match + if !hasStylePolicies{ + for k, v := range p.elsMatchingAndStyles{ + if k.MatchString(elementName) { + if len(v) > 0{ + hasStylePolicies = true + break + } + } + } + } // Builds a new attribute slice based on the whether the attribute has been // whitelisted explicitly or globally. @@ -689,6 +693,19 @@ func (p *Policy) sanitizeAttrs( func (p *Policy) sanitizeStyles(attr html.Attribute, elementName string) html.Attribute { sps := p.elsAndStyles[elementName] + if len(sps) == 0{ + sps = map[string]stylePolicy{} + // check for any matching elements, if we don't already have a policy found + // if multiple matches are found they will be overwritten, it's best + // to not have overlapping matchers + for regex, policies :=range p.elsMatchingAndStyles{ + if regex.MatchString(elementName){ + for k, v := range policies{ + sps[k] = v + } + } + } + } //Add semi-colon to end to fix parsing issue if len(attr.Val) > 0 && attr.Val[len(attr.Val)-1] != ';' { @@ -699,7 +716,6 @@ func (p *Policy) sanitizeStyles(attr html.Attribute, elementName string) html.At attr.Val = "" return attr } - clean := []string{} prefixes := []string{"-webkit-", "-moz-", "-ms-", "-o-", "mso-", "-xv-", "-atsc-", "-wap-", "-khtml-", "prince-", "-ah-", "-hp-", "-ro-", "-rim-", "-tc-"} @@ -729,7 +745,6 @@ func (p *Policy) sanitizeStyles(attr html.Attribute, elementName string) html.At continue } } - if sp, ok := p.globalStyles[tempProperty]; ok && !addedProperty { if sp.handler != nil { if sp.handler(tempValue) { From ae99c5d0cf0085cfcf3e545e5b588de76863cc9a Mon Sep 17 00:00:00 2001 From: Mark Smith Date: Mon, 14 Oct 2019 14:38:41 +0100 Subject: [PATCH 6/6] removing a bit of duplicateion of regex matching --- sanitize.go | 46 ++++++++++++++++++---------------------------- 1 file changed, 18 insertions(+), 28 deletions(-) diff --git a/sanitize.go b/sanitize.go index 830ec54..771fc2c 100644 --- a/sanitize.go +++ b/sanitize.go @@ -233,20 +233,7 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer { aps, ok := p.elsAndAttrs[token.Data] if !ok { - // check if we have any regex that match the element - if aps == nil { - aps = make(map[string]attrPolicy, 0) - } - matched := false - for regex, attrs := range p.elsMatchingAndAttrs { - if regex.MatchString(token.Data) { - matched = true - // append matching attrs on as could have multiple depending on match - for k, v := range attrs { - aps[k] = v - } - } - } + aa, matched := p.matchRegex(token.Data) if !matched { if _, ok := p.setOfElementsToSkipContent[token.Data]; ok { skipElementContent = true @@ -257,7 +244,7 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer { } break } - + aps = aa } if len(token.Attr) != 0 { token.Attr = p.sanitizeAttrs(token.Data, token.Attr, aps) @@ -330,25 +317,14 @@ func (p *Policy) sanitize(r io.Reader) *bytes.Buffer { aps, ok := p.elsAndAttrs[token.Data] if !ok { - if aps == nil { - aps = make(map[string]attrPolicy, 0) - } - matched := false - for regex, attrs := range p.elsMatchingAndAttrs { - if regex.MatchString(token.Data) { - matched = true - // append matching attrs on as could have multiple depending on match - for k, v := range attrs { - aps[k] = v - } - } - } + aa, matched := p.matchRegex(token.Data) if !matched { if p.addSpaces && !matched { buff.WriteString(" ") } break } + aps = aa } if len(token.Attr) != 0 { @@ -897,3 +873,17 @@ func removeUnicode(value string) string { } return substitutedValue } + +func (p *Policy) matchRegex(elementName string ) (map[string]attrPolicy, bool) { + aps := make(map[string]attrPolicy, 0) + matched := false + for regex, attrs := range p.elsMatchingAndAttrs { + if regex.MatchString(elementName) { + matched = true + for k, v := range attrs { + aps[k] = v + } + } + } + return aps, matched +}