Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

AllowElements regex support #92

Merged
merged 6 commits into from Nov 19, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
15 changes: 15 additions & 0 deletions .gitignore
@@ -0,0 +1,15 @@
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib

# Test binary, built with `go test -c`
*.test

# Output of the go coverage tool, specifically when used with LiteIDE
*.out

# goland idea folder
*.idea
23 changes: 23 additions & 0 deletions README.md
Expand Up @@ -169,12 +169,26 @@ To add elements to a policy either add just the elements:
p.AllowElements("b", "strong")
```

Or using a regex:

_Note: if an element is added by name as shown above, any matching regex will be ignored_

It is also recommended to ensure multiple patterns don't overlap as order of execution is not guaranteed and can result in some rules being missed.
```go
p.AllowElementsMatching(regex.MustCompile(`^my-element-`))
```

Or add elements as a virtue of adding an attribute:
```go
// Not the recommended pattern, see the recommendation on using .Matching() below
p.AllowAttrs("nowrap").OnElements("td", "th")
```

Again, this also supports a regex pattern match alternative:
```go
p.AllowAttrs("nowrap").OnElementsMatching(regex.MustCompile(`^my-element-`))
```

Attributes can either be added to all elements:
```go
p.AllowAttrs("dir").Matching(regexp.MustCompile("(?i)rtl|ltr")).Globally()
Expand Down Expand Up @@ -226,6 +240,15 @@ p.AllowAttrs("style").OnElements("span", "p")
// on 'span' elements only
p.AllowStyles("text-decoration").MatchingEnum("underline", "line-through", "none").OnElements("span")
```

Or you can specify elements based on a regex patterm match:
```go
p.AllowAttrs("style").OnElementsMatching(regex.MustCompile(`^my-element-`))
// Allow the 'text-decoration' property to be set to 'underline', 'line-through' or 'none'
// on 'span' elements only
p.AllowStyles("text-decoration").MatchingEnum("underline", "line-through", "none").OnElementsMatching(regex.MustCompile(`^my-element-`))
```

If you need more specific checking, you can create a handler that takes in a string and returns a bool to
validate the values for a given property. The string parameter has been
converted to lowercase and unicode code points have been converted.
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Expand Up @@ -3,7 +3,7 @@ module github.com/microcosm-cc/bluemonday
go 1.9

require (
github.com/aymerick/douceur v0.2.0
github.com/aymerick/douceur v0.2.0 // indirect
github.com/chris-ramon/douceur v0.2.0
github.com/gorilla/css v1.0.0 // indirect
golang.org/x/net v0.0.0-20181220203305-927f97764cc3
Expand Down
76 changes: 76 additions & 0 deletions policy.go
Expand Up @@ -86,12 +86,18 @@ type Policy struct {
// map[htmlElementName]map[htmlAttributeName]attrPolicy
elsAndAttrs map[string]map[string]attrPolicy

// elsMatchingAndAttrs stores regex based element matches along with attributes
elsMatchingAndAttrs map[*regexp.Regexp]map[string]attrPolicy

// map[htmlAttributeName]attrPolicy
globalAttrs map[string]attrPolicy

// map[htmlElementName]map[cssPropertyName]stylePolicy
elsAndStyles map[string]map[string]stylePolicy

// map[regex]map[cssPropertyName]stylePolicy
elsMatchingAndStyles map[*regexp.Regexp]map[string]stylePolicy

// map[cssPropertyName]stylePolicy
globalStyles map[string]stylePolicy

Expand All @@ -109,6 +115,16 @@ type Policy struct {
// be maintained in the output HTML.
setOfElementsAllowedWithoutAttrs map[string]struct{}

// If an element has had all attributes removed as a result of a policy
// being applied, then the element would be removed from the output.
//
// However some elements are valid and have strong layout meaning without
// any attributes, i.e. <table>.
//
// In this case, any element matching a regular expression will be accepted without
// attributes added.
setOfElementsMatchingAllowedWithoutAttrs []*regexp.Regexp

setOfElementsToSkipContent map[string]struct{}
}

Expand Down Expand Up @@ -156,8 +172,10 @@ type urlPolicy func(url *url.URL) (allowUrl bool)
func (p *Policy) init() {
if !p.initialized {
p.elsAndAttrs = make(map[string]map[string]attrPolicy)
p.elsMatchingAndAttrs = make(map[*regexp.Regexp]map[string]attrPolicy)
p.globalAttrs = make(map[string]attrPolicy)
p.elsAndStyles = make(map[string]map[string]stylePolicy)
p.elsMatchingAndStyles = make(map[*regexp.Regexp]map[string]stylePolicy)
p.globalStyles = make(map[string]stylePolicy)
p.allowURLSchemes = make(map[string]urlPolicy)
p.setOfElementsAllowedWithoutAttrs = make(map[string]struct{})
Expand Down Expand Up @@ -286,6 +304,30 @@ func (abp *attrPolicyBuilder) OnElements(elements ...string) *Policy {
return abp.p
}

// OnElementsMatching will bind an attribute policy to all elements matching a given regex
// and return the updated policy
func (abp *attrPolicyBuilder) OnElementsMatching(regex *regexp.Regexp) *Policy {
for _, attr := range abp.attrNames {
if _, ok := abp.p.elsMatchingAndAttrs[regex]; !ok {
abp.p.elsMatchingAndAttrs[regex] = make(map[string]attrPolicy)
}
ap := attrPolicy{}
if abp.regexp != nil {
ap.regexp = abp.regexp
}
abp.p.elsMatchingAndAttrs[regex][attr] = ap
}

if abp.allowEmpty {
abp.p.setOfElementsMatchingAllowedWithoutAttrs = append(abp.p.setOfElementsMatchingAllowedWithoutAttrs, regex)
if _, ok := abp.p.elsMatchingAndAttrs[regex]; !ok {
abp.p.elsMatchingAndAttrs[regex] = make(map[string]attrPolicy)
}
}

return abp.p
}

// Globally will bind an attribute policy to all HTML elements and return the
// updated policy
func (abp *attrPolicyBuilder) Globally() *Policy {
Expand Down Expand Up @@ -387,6 +429,32 @@ func (spb *stylePolicyBuilder) OnElements(elements ...string) *Policy {
return spb.p
}

// OnElementsMatching will bind a style policy to any HTML elements matching the pattern
// and return the updated policy
func (spb *stylePolicyBuilder) OnElementsMatching(regex *regexp.Regexp) *Policy {

for _, attr := range spb.propertyNames {

if _, ok := spb.p.elsMatchingAndStyles[regex]; !ok {
spb.p.elsMatchingAndStyles[regex] = make(map[string]stylePolicy)
}

sp := stylePolicy{}
if spb.handler != nil {
sp.handler = spb.handler
} else if len(spb.enum) > 0 {
sp.enum = spb.enum
} else if spb.regexp != nil {
sp.regexp = spb.regexp
} else {
sp.handler = getDefaultHandler(attr)
}
spb.p.elsMatchingAndStyles[regex][attr] = sp
}

return spb.p
}

// Globally will bind a style policy to all HTML elements and return the
// updated policy
func (spb *stylePolicyBuilder) Globally() *Policy {
Expand Down Expand Up @@ -430,6 +498,14 @@ func (p *Policy) AllowElements(names ...string) *Policy {
return p
}

func (p *Policy) AllowElementsMatching(regex *regexp.Regexp) *Policy {
p.init()
if _, ok := p.elsMatchingAndAttrs[regex]; !ok {
p.elsMatchingAndAttrs[regex] = make(map[string]attrPolicy)
}
return p
}

// RequireNoFollowOnLinks will result in all a, area, link tags having a
// rel="nofollow"added to them if one does not already exist
//
Expand Down