Skip to content

Commit

Permalink
Closes #143 Rely on html and url packages for sanitization, don't rol…
Browse files Browse the repository at this point in the history
…l your own as produced double-escaping
  • Loading branch information
buro9 committed Jul 1, 2022
1 parent 4f006b3 commit cdefdb2
Show file tree
Hide file tree
Showing 2 changed files with 46 additions and 23 deletions.
23 changes: 2 additions & 21 deletions sanitize.go
Expand Up @@ -322,9 +322,7 @@ func (p *Policy) sanitize(r io.Reader, w io.Writer) error {
aps = aa
}
if len(token.Attr) != 0 {
token.Attr = escapeAttributes(
p.sanitizeAttrs(token.Data, token.Attr, aps),
)
token.Attr = p.sanitizeAttrs(token.Data, token.Attr, aps)
}

if len(token.Attr) == 0 {
Expand Down Expand Up @@ -434,7 +432,7 @@ func (p *Policy) sanitize(r io.Reader, w io.Writer) error {
}

if len(token.Attr) != 0 {
token.Attr = escapeAttributes(p.sanitizeAttrs(token.Data, token.Attr, aps))
token.Attr = p.sanitizeAttrs(token.Data, token.Attr, aps)
}

if len(token.Attr) == 0 && !p.allowNoAttrs(token.Data) {
Expand Down Expand Up @@ -565,11 +563,9 @@ attrsLoop:
for _, ap := range apl {
if ap.regexp != nil {
if ap.regexp.MatchString(htmlAttr.Val) {
htmlAttr.Val = escapeAttribute(htmlAttr.Val)
cleanAttrs = append(cleanAttrs, htmlAttr)
}
} else {
htmlAttr.Val = escapeAttribute(htmlAttr.Val)
cleanAttrs = append(cleanAttrs, htmlAttr)
}
}
Expand Down Expand Up @@ -1112,18 +1108,3 @@ func normaliseElementName(str string) string {
`"`,
)
}

func escapeAttributes(attrs []html.Attribute) []html.Attribute {
escapedAttrs := []html.Attribute{}
for _, attr := range attrs {
attr.Val = escapeAttribute(attr.Val)
escapedAttrs = append(escapedAttrs, attr)
}
return escapedAttrs
}

func escapeAttribute(val string) string {
val = strings.Replace(val, string([]rune{'\u00A0'}), ` `, -1)
val = strings.Replace(val, `"`, `"`, -1)
return val
}
46 changes: 44 additions & 2 deletions sanitize_test.go
Expand Up @@ -3201,11 +3201,11 @@ func TestHrefSanitization(t *testing.T) {
tests := []test{
{
in: `abc<a href="https://abc&quot;&gt;<script&gt;alert(1)<&#x2f;script/">CLICK`,
expected: `abc<a href="https://abc&amp;quot;&gt;&lt;script&gt;alert(1)&lt;/script/" rel="nofollow">CLICK`,
expected: `abc<a href="https://abc&#34;&gt;&lt;script&gt;alert(1)&lt;/script/" rel="nofollow">CLICK`,
},
{
in: `<a href="https://abc&quot;&gt;<script&gt;alert(1)<&#x2f;script/">`,
expected: `<a href="https://abc&amp;quot;&gt;&lt;script&gt;alert(1)&lt;/script/" rel="nofollow">`,
expected: `<a href="https://abc&#34;&gt;&lt;script&gt;alert(1)&lt;/script/" rel="nofollow">`,
},
}

Expand Down Expand Up @@ -3711,6 +3711,48 @@ func TestIssue107(t *testing.T) {
wg.Wait()
}

func TestIssue143(t *testing.T) {
// HTML escaping of attribute values appears to occur twice
tests := []test{
{
in: `<p title='"'></p>`,
expected: `<p title="&#34;"></p>`,
},
{
in: `<p title="&quot;"></p>`,
expected: `<p title="&#34;"></p>`,
},
{
in: `<p title="&nbsp;"></p>`,
expected: `<p title=" "></p>`,
},
}

p := UGCPolicy()
p.AllowAttrs("title").OnElements("p")

// These tests are run concurrently to enable the race detector to pick up
// potential issues
wg := sync.WaitGroup{}
wg.Add(len(tests))
for ii, tt := range tests {
go func(ii int, tt test) {
out := p.Sanitize(tt.in)
if out != tt.expected {
t.Errorf(
"test %d failed;\ninput : %s\noutput : %s\nexpected: %s",
ii,
tt.in,
out,
tt.expected,
)
}
wg.Done()
}(ii, tt)
}
wg.Wait()
}

func TestIssue146(t *testing.T) {
// https://github.com/microcosm-cc/bluemonday/issues/146
//
Expand Down

0 comments on commit cdefdb2

Please sign in to comment.