From 587177dc009969adec1a9cedbe6fb3606a96ab56 Mon Sep 17 00:00:00 2001 From: Mike Dalessio Date: Fri, 29 Oct 2021 00:50:02 -0400 Subject: [PATCH] test: use CSS hex-encoded strings to test sanitization This adds onto #205. The original reported exploit in 2006 used CSS hex encoding (e.g., "\0075" for "u"), which was ... - mistakenly put into a double-quoted Ruby string in the Instiki test suite in 2007, - then copied into html5lib-ruby's test suite, - then copied into html5lib-python's suite, - then finally copied into the html5lib shared suite, - which was imported into Loofah --- test/assets/testdata_sanitizer_tests1.dat | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/test/assets/testdata_sanitizer_tests1.dat b/test/assets/testdata_sanitizer_tests1.dat index dd96171..0fc287e 100644 --- a/test/assets/testdata_sanitizer_tests1.dat +++ b/test/assets/testdata_sanitizer_tests1.dat @@ -61,6 +61,20 @@ "output": "
foo
" }, + { + /* and put that version into a CSS hex-encoded string */ + "name": "div_background_image_unicode_encoded5", + "input": "
foo
", + "output": "
foo
" + }, + + { + /* and again without encoding the parens */ + "name": "div_background_image_unicode_encoded6", + "input": "
foo
", + "output": "
foo
" + }, + { "name": "div_expression", "input": "
foo
",