From bd498e5caff4939f9f87554427ac8f6212140744 Mon Sep 17 00:00:00 2001 From: Christian Bewernitz Date: Sat, 13 Mar 2021 01:33:26 +0100 Subject: [PATCH 1/2] fix: Escape `<` when serializing attribute values to produce well formed XML. > Well-formedness constraint: No `<` in Attribute Values > The replacement text of any entity referred to directly or indirectly in an attribute value must not contain a `<`. https://www.w3.org/TR/xml/#CleanAttrVals https://www.w3.org/TR/xml/#NT-AttValue fixes #198 --- lib/dom.js | 8 +++++++- test/html/__snapshots__/normalize.test.js.snap | 6 +++--- test/parse/__snapshots__/locator.test.js.snap | 2 +- test/parse/__snapshots__/simple.test.js.snap | 4 ++-- test/xmltest/__snapshots__/not-wf.test.js.snap | 2 +- test/xmltest/__snapshots__/valid.test.js.snap | 2 +- 6 files changed, 15 insertions(+), 9 deletions(-) diff --git a/lib/dom.js b/lib/dom.js index 31eea2a9f..28f71160a 100644 --- a/lib/dom.js +++ b/lib/dom.js @@ -1068,7 +1068,13 @@ function serializeToString(node,buf,isHTML,nodeFilter,visibleNamespaces){ } return; case ATTRIBUTE_NODE: - return buf.push(' ',node.name,'="',node.value.replace(/[&"]/g,_xmlEncoder),'"'); + /** + * Well-formedness constraint: No < in Attribute Values + * The replacement text of any entity referred to directly or indirectly in an attribute value must not contain a <. + * @see https://www.w3.org/TR/xml/#CleanAttrVals + * @see https://www.w3.org/TR/xml/#NT-AttValue + */ + return buf.push(' ',node.name,'="',node.value.replace(/[<&"]/g,_xmlEncoder),'"'); case TEXT_NODE: /** * The ampersand character (&) and the left angle bracket (<) must not appear in their literal form, diff --git a/test/html/__snapshots__/normalize.test.js.snap b/test/html/__snapshots__/normalize.test.js.snap index 68351824f..a612ecf4d 100644 --- a/test/html/__snapshots__/normalize.test.js.snap +++ b/test/html/__snapshots__/normalize.test.js.snap @@ -44,13 +44,13 @@ Object { exports[`html normalizer
1`] = ` Object { - "actual": "
d\\" xmlns=\\"http://www.w3.org/1999/xhtml\\">
", + "actual": "
d\\" xmlns=\\"http://www.w3.org/1999/xhtml\\">
", } `; exports[`html normalizer
1`] = ` Object { - "actual": "
')\\" xmlns=\\"http://www.w3.org/1999/xhtml\\">
", + "actual": "
')\\" xmlns=\\"http://www.w3.org/1999/xhtml\\">
", } `; @@ -90,7 +90,7 @@ Object { exports[`html normalizer 1`] = ` Object { - "actual": "b && '&&&'\\" xmlns=\\"http://www.w3.org/1999/xhtml\\">", + "actual": "b && '&&&'\\" xmlns=\\"http://www.w3.org/1999/xhtml\\">", } `; diff --git a/test/parse/__snapshots__/locator.test.js.snap b/test/parse/__snapshots__/locator.test.js.snap index be4371f19..cc247f5bd 100644 --- a/test/parse/__snapshots__/locator.test.js.snap +++ b/test/parse/__snapshots__/locator.test.js.snap @@ -2,7 +2,7 @@ exports[`DOMLocator attribute position 1`] = ` Object { - "actual": "
<;test", + "actual": "
<;test", } `; diff --git a/test/parse/__snapshots__/simple.test.js.snap b/test/parse/__snapshots__/simple.test.js.snap index 28e5448b6..7478b6f71 100644 --- a/test/parse/__snapshots__/simple.test.js.snap +++ b/test/parse/__snapshots__/simple.test.js.snap @@ -12,7 +12,7 @@ Object { exports[`parse simple 1`] = ` Object { - "actual": "", + "actual": "", } `; @@ -49,6 +49,6 @@ Object { exports[`parse wrong closing tag 1`] = ` Object { - "actual": "
<;test", + "actual": "
<;test", } `; diff --git a/test/xmltest/__snapshots__/not-wf.test.js.snap b/test/xmltest/__snapshots__/not-wf.test.js.snap index 400dd6e40..34eba2916 100644 --- a/test/xmltest/__snapshots__/not-wf.test.js.snap +++ b/test/xmltest/__snapshots__/not-wf.test.js.snap @@ -112,7 +112,7 @@ Object { exports[`xmltest/not-wellformed standalone should match 014.xml with snapshot 1`] = ` Object { - "actual": "\\"/>", + "actual": "\\"/>", } `; diff --git a/test/xmltest/__snapshots__/valid.test.js.snap b/test/xmltest/__snapshots__/valid.test.js.snap index 085aa7531..fdf34ca9f 100644 --- a/test/xmltest/__snapshots__/valid.test.js.snap +++ b/test/xmltest/__snapshots__/valid.test.js.snap @@ -293,7 +293,7 @@ Object { exports[`xmltest/valid standalone should match 040.xml with snapshot 1`] = ` Object { - "actual": "'\\"/>", + "actual": "'\\"/>", "expected": "", } `; From 8a1711917635ee0052e823919de5e7ae44c88a5f Mon Sep 17 00:00:00 2001 From: Christian Bewernitz Date: Sun, 14 Mar 2021 09:37:14 +0100 Subject: [PATCH 2/2] Update lib/dom.js Co-authored-by: Chris Brody --- lib/dom.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/dom.js b/lib/dom.js index 28f71160a..12288c8aa 100644 --- a/lib/dom.js +++ b/lib/dom.js @@ -1074,7 +1074,7 @@ function serializeToString(node,buf,isHTML,nodeFilter,visibleNamespaces){ * @see https://www.w3.org/TR/xml/#CleanAttrVals * @see https://www.w3.org/TR/xml/#NT-AttValue */ - return buf.push(' ',node.name,'="',node.value.replace(/[<&"]/g,_xmlEncoder),'"'); + return buf.push(' ', node.name, '="', node.value.replace(/[<&"]/g,_xmlEncoder), '"'); case TEXT_NODE: /** * The ampersand character (&) and the left angle bracket (<) must not appear in their literal form,