Skip to content

Commit

Permalink
Merge pull request #92 from theflyingcodr/element_regex
Browse files Browse the repository at this point in the history
AllowElements regex support
  • Loading branch information
buro9 committed Nov 19, 2019
2 parents 1c05eea + ae99c5d commit 0a75d76
Show file tree
Hide file tree
Showing 6 changed files with 434 additions and 21 deletions.
15 changes: 15 additions & 0 deletions .gitignore
@@ -0,0 +1,15 @@
# Binaries for programs and plugins
*.exe
*.exe~
*.dll
*.so
*.dylib

# Test binary, built with `go test -c`
*.test

# Output of the go coverage tool, specifically when used with LiteIDE
*.out

# goland idea folder
*.idea
23 changes: 23 additions & 0 deletions README.md
Expand Up @@ -169,12 +169,26 @@ To add elements to a policy either add just the elements:
p.AllowElements("b", "strong")
```

Or using a regex:

_Note: if an element is added by name as shown above, any matching regex will be ignored_

It is also recommended to ensure multiple patterns don't overlap as order of execution is not guaranteed and can result in some rules being missed.
```go
p.AllowElementsMatching(regex.MustCompile(`^my-element-`))
```

Or add elements as a virtue of adding an attribute:
```go
// Not the recommended pattern, see the recommendation on using .Matching() below
p.AllowAttrs("nowrap").OnElements("td", "th")
```

Again, this also supports a regex pattern match alternative:
```go
p.AllowAttrs("nowrap").OnElementsMatching(regex.MustCompile(`^my-element-`))
```

Attributes can either be added to all elements:
```go
p.AllowAttrs("dir").Matching(regexp.MustCompile("(?i)rtl|ltr")).Globally()
Expand Down Expand Up @@ -226,6 +240,15 @@ p.AllowAttrs("style").OnElements("span", "p")
// on 'span' elements only
p.AllowStyles("text-decoration").MatchingEnum("underline", "line-through", "none").OnElements("span")
```

Or you can specify elements based on a regex patterm match:
```go
p.AllowAttrs("style").OnElementsMatching(regex.MustCompile(`^my-element-`))
// Allow the 'text-decoration' property to be set to 'underline', 'line-through' or 'none'
// on 'span' elements only
p.AllowStyles("text-decoration").MatchingEnum("underline", "line-through", "none").OnElementsMatching(regex.MustCompile(`^my-element-`))
```

If you need more specific checking, you can create a handler that takes in a string and returns a bool to
validate the values for a given property. The string parameter has been
converted to lowercase and unicode code points have been converted.
Expand Down
2 changes: 1 addition & 1 deletion go.mod
Expand Up @@ -3,7 +3,7 @@ module github.com/microcosm-cc/bluemonday
go 1.9

require (
github.com/aymerick/douceur v0.2.0
github.com/aymerick/douceur v0.2.0 // indirect
github.com/chris-ramon/douceur v0.2.0
github.com/gorilla/css v1.0.0 // indirect
golang.org/x/net v0.0.0-20181220203305-927f97764cc3
Expand Down
76 changes: 76 additions & 0 deletions policy.go
Expand Up @@ -86,12 +86,18 @@ type Policy struct {
// map[htmlElementName]map[htmlAttributeName]attrPolicy
elsAndAttrs map[string]map[string]attrPolicy

// elsMatchingAndAttrs stores regex based element matches along with attributes
elsMatchingAndAttrs map[*regexp.Regexp]map[string]attrPolicy

// map[htmlAttributeName]attrPolicy
globalAttrs map[string]attrPolicy

// map[htmlElementName]map[cssPropertyName]stylePolicy
elsAndStyles map[string]map[string]stylePolicy

// map[regex]map[cssPropertyName]stylePolicy
elsMatchingAndStyles map[*regexp.Regexp]map[string]stylePolicy

// map[cssPropertyName]stylePolicy
globalStyles map[string]stylePolicy

Expand All @@ -109,6 +115,16 @@ type Policy struct {
// be maintained in the output HTML.
setOfElementsAllowedWithoutAttrs map[string]struct{}

// If an element has had all attributes removed as a result of a policy
// being applied, then the element would be removed from the output.
//
// However some elements are valid and have strong layout meaning without
// any attributes, i.e. <table>.
//
// In this case, any element matching a regular expression will be accepted without
// attributes added.
setOfElementsMatchingAllowedWithoutAttrs []*regexp.Regexp

setOfElementsToSkipContent map[string]struct{}
}

Expand Down Expand Up @@ -156,8 +172,10 @@ type urlPolicy func(url *url.URL) (allowUrl bool)
func (p *Policy) init() {
if !p.initialized {
p.elsAndAttrs = make(map[string]map[string]attrPolicy)
p.elsMatchingAndAttrs = make(map[*regexp.Regexp]map[string]attrPolicy)
p.globalAttrs = make(map[string]attrPolicy)
p.elsAndStyles = make(map[string]map[string]stylePolicy)
p.elsMatchingAndStyles = make(map[*regexp.Regexp]map[string]stylePolicy)
p.globalStyles = make(map[string]stylePolicy)
p.allowURLSchemes = make(map[string]urlPolicy)
p.setOfElementsAllowedWithoutAttrs = make(map[string]struct{})
Expand Down Expand Up @@ -286,6 +304,30 @@ func (abp *attrPolicyBuilder) OnElements(elements ...string) *Policy {
return abp.p
}

// OnElementsMatching will bind an attribute policy to all elements matching a given regex
// and return the updated policy
func (abp *attrPolicyBuilder) OnElementsMatching(regex *regexp.Regexp) *Policy {
for _, attr := range abp.attrNames {
if _, ok := abp.p.elsMatchingAndAttrs[regex]; !ok {
abp.p.elsMatchingAndAttrs[regex] = make(map[string]attrPolicy)
}
ap := attrPolicy{}
if abp.regexp != nil {
ap.regexp = abp.regexp
}
abp.p.elsMatchingAndAttrs[regex][attr] = ap
}

if abp.allowEmpty {
abp.p.setOfElementsMatchingAllowedWithoutAttrs = append(abp.p.setOfElementsMatchingAllowedWithoutAttrs, regex)
if _, ok := abp.p.elsMatchingAndAttrs[regex]; !ok {
abp.p.elsMatchingAndAttrs[regex] = make(map[string]attrPolicy)
}
}

return abp.p
}

// Globally will bind an attribute policy to all HTML elements and return the
// updated policy
func (abp *attrPolicyBuilder) Globally() *Policy {
Expand Down Expand Up @@ -387,6 +429,32 @@ func (spb *stylePolicyBuilder) OnElements(elements ...string) *Policy {
return spb.p
}

// OnElementsMatching will bind a style policy to any HTML elements matching the pattern
// and return the updated policy
func (spb *stylePolicyBuilder) OnElementsMatching(regex *regexp.Regexp) *Policy {

for _, attr := range spb.propertyNames {

if _, ok := spb.p.elsMatchingAndStyles[regex]; !ok {
spb.p.elsMatchingAndStyles[regex] = make(map[string]stylePolicy)
}

sp := stylePolicy{}
if spb.handler != nil {
sp.handler = spb.handler
} else if len(spb.enum) > 0 {
sp.enum = spb.enum
} else if spb.regexp != nil {
sp.regexp = spb.regexp
} else {
sp.handler = getDefaultHandler(attr)
}
spb.p.elsMatchingAndStyles[regex][attr] = sp
}

return spb.p
}

// Globally will bind a style policy to all HTML elements and return the
// updated policy
func (spb *stylePolicyBuilder) Globally() *Policy {
Expand Down Expand Up @@ -430,6 +498,14 @@ func (p *Policy) AllowElements(names ...string) *Policy {
return p
}

func (p *Policy) AllowElementsMatching(regex *regexp.Regexp) *Policy {
p.init()
if _, ok := p.elsMatchingAndAttrs[regex]; !ok {
p.elsMatchingAndAttrs[regex] = make(map[string]attrPolicy)
}
return p
}

// RequireNoFollowOnLinks will result in all a, area, link tags having a
// rel="nofollow"added to them if one does not already exist
//
Expand Down

0 comments on commit 0a75d76

Please sign in to comment.