From cdefdb21caacec5aa8b3a8452a007443d5f9502a Mon Sep 17 00:00:00 2001 From: David Kitchen Date: Fri, 1 Jul 2022 13:07:00 +0100 Subject: [PATCH] Closes #143 Rely on html and url packages for sanitization, don't roll your own as produced double-escaping --- sanitize.go | 23 ++--------------------- sanitize_test.go | 46 ++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 46 insertions(+), 23 deletions(-) diff --git a/sanitize.go b/sanitize.go index 9bd91ab..904ee82 100644 --- a/sanitize.go +++ b/sanitize.go @@ -322,9 +322,7 @@ func (p *Policy) sanitize(r io.Reader, w io.Writer) error { aps = aa } if len(token.Attr) != 0 { - token.Attr = escapeAttributes( - p.sanitizeAttrs(token.Data, token.Attr, aps), - ) + token.Attr = p.sanitizeAttrs(token.Data, token.Attr, aps) } if len(token.Attr) == 0 { @@ -434,7 +432,7 @@ func (p *Policy) sanitize(r io.Reader, w io.Writer) error { } if len(token.Attr) != 0 { - token.Attr = escapeAttributes(p.sanitizeAttrs(token.Data, token.Attr, aps)) + token.Attr = p.sanitizeAttrs(token.Data, token.Attr, aps) } if len(token.Attr) == 0 && !p.allowNoAttrs(token.Data) { @@ -565,11 +563,9 @@ attrsLoop: for _, ap := range apl { if ap.regexp != nil { if ap.regexp.MatchString(htmlAttr.Val) { - htmlAttr.Val = escapeAttribute(htmlAttr.Val) cleanAttrs = append(cleanAttrs, htmlAttr) } } else { - htmlAttr.Val = escapeAttribute(htmlAttr.Val) cleanAttrs = append(cleanAttrs, htmlAttr) } } @@ -1112,18 +1108,3 @@ func normaliseElementName(str string) string { `"`, ) } - -func escapeAttributes(attrs []html.Attribute) []html.Attribute { - escapedAttrs := []html.Attribute{} - for _, attr := range attrs { - attr.Val = escapeAttribute(attr.Val) - escapedAttrs = append(escapedAttrs, attr) - } - return escapedAttrs -} - -func escapeAttribute(val string) string { - val = strings.Replace(val, string([]rune{'\u00A0'}), ` `, -1) - val = strings.Replace(val, `"`, `"`, -1) - return val -} diff --git a/sanitize_test.go b/sanitize_test.go index 4927afb..97b6186 100644 --- a/sanitize_test.go +++ b/sanitize_test.go @@ -3201,11 +3201,11 @@ func TestHrefSanitization(t *testing.T) { tests := []test{ { in: `abcCLICK`, - expected: `abcCLICK`, + expected: `abcCLICK`, }, { in: ``, - expected: ``, + expected: ``, }, } @@ -3711,6 +3711,48 @@ func TestIssue107(t *testing.T) { wg.Wait() } +func TestIssue143(t *testing.T) { + // HTML escaping of attribute values appears to occur twice + tests := []test{ + { + in: `

`, + expected: `

`, + }, + { + in: `

`, + expected: `

`, + }, + { + in: `

`, + expected: `

`, + }, + } + + p := UGCPolicy() + p.AllowAttrs("title").OnElements("p") + + // These tests are run concurrently to enable the race detector to pick up + // potential issues + wg := sync.WaitGroup{} + wg.Add(len(tests)) + for ii, tt := range tests { + go func(ii int, tt test) { + out := p.Sanitize(tt.in) + if out != tt.expected { + t.Errorf( + "test %d failed;\ninput : %s\noutput : %s\nexpected: %s", + ii, + tt.in, + out, + tt.expected, + ) + } + wg.Done() + }(ii, tt) + } + wg.Wait() +} + func TestIssue146(t *testing.T) { // https://github.com/microcosm-cc/bluemonday/issues/146 //