From bd498e5caff4939f9f87554427ac8f6212140744 Mon Sep 17 00:00:00 2001
From: Christian Bewernitz
Date: Sat, 13 Mar 2021 01:33:26 +0100
Subject: [PATCH 1/2] fix: Escape `<` when serializing attribute values
to produce well formed XML.
> Well-formedness constraint: No `<` in Attribute Values
> The replacement text of any entity referred to directly or indirectly in an attribute value must not contain a `<`.
https://www.w3.org/TR/xml/#CleanAttrVals
https://www.w3.org/TR/xml/#NT-AttValue
fixes #198
---
lib/dom.js | 8 +++++++-
test/html/__snapshots__/normalize.test.js.snap | 6 +++---
test/parse/__snapshots__/locator.test.js.snap | 2 +-
test/parse/__snapshots__/simple.test.js.snap | 4 ++--
test/xmltest/__snapshots__/not-wf.test.js.snap | 2 +-
test/xmltest/__snapshots__/valid.test.js.snap | 2 +-
6 files changed, 15 insertions(+), 9 deletions(-)
diff --git a/lib/dom.js b/lib/dom.js
index 31eea2a9f..28f71160a 100644
--- a/lib/dom.js
+++ b/lib/dom.js
@@ -1068,7 +1068,13 @@ function serializeToString(node,buf,isHTML,nodeFilter,visibleNamespaces){
}
return;
case ATTRIBUTE_NODE:
- return buf.push(' ',node.name,'="',node.value.replace(/[&"]/g,_xmlEncoder),'"');
+ /**
+ * Well-formedness constraint: No < in Attribute Values
+ * The replacement text of any entity referred to directly or indirectly in an attribute value must not contain a <.
+ * @see https://www.w3.org/TR/xml/#CleanAttrVals
+ * @see https://www.w3.org/TR/xml/#NT-AttValue
+ */
+ return buf.push(' ',node.name,'="',node.value.replace(/[<&"]/g,_xmlEncoder),'"');
case TEXT_NODE:
/**
* The ampersand character (&) and the left angle bracket (<) must not appear in their literal form,
diff --git a/test/html/__snapshots__/normalize.test.js.snap b/test/html/__snapshots__/normalize.test.js.snap
index 68351824f..a612ecf4d 100644
--- a/test/html/__snapshots__/normalize.test.js.snap
+++ b/test/html/__snapshots__/normalize.test.js.snap
@@ -44,13 +44,13 @@ Object {
exports[`html normalizer 1`] = `
Object {
- "actual": "d\\" xmlns=\\"http://www.w3.org/1999/xhtml\\">
",
+ "actual": "d\\" xmlns=\\"http://www.w3.org/1999/xhtml\\">
",
}
`;
exports[`html normalizer 1`] = `
Object {
- "actual": "')\\" xmlns=\\"http://www.w3.org/1999/xhtml\\">
",
+ "actual": "')\\" xmlns=\\"http://www.w3.org/1999/xhtml\\">
",
}
`;
@@ -90,7 +90,7 @@ Object {
exports[`html normalizer 1`] = `
Object {
- "actual": "b && '&&&'\\" xmlns=\\"http://www.w3.org/1999/xhtml\\">",
+ "actual": "b && '&&&'\\" xmlns=\\"http://www.w3.org/1999/xhtml\\">",
}
`;
diff --git a/test/parse/__snapshots__/locator.test.js.snap b/test/parse/__snapshots__/locator.test.js.snap
index be4371f19..cc247f5bd 100644
--- a/test/parse/__snapshots__/locator.test.js.snap
+++ b/test/parse/__snapshots__/locator.test.js.snap
@@ -2,7 +2,7 @@
exports[`DOMLocator attribute position 1`] = `
Object {
- "actual": "
<;test",
+ "actual": "<;test",
}
`;
diff --git a/test/parse/__snapshots__/simple.test.js.snap b/test/parse/__snapshots__/simple.test.js.snap
index 28e5448b6..7478b6f71 100644
--- a/test/parse/__snapshots__/simple.test.js.snap
+++ b/test/parse/__snapshots__/simple.test.js.snap
@@ -12,7 +12,7 @@ Object {
exports[`parse simple 1`] = `
Object {
- "actual": "",
+ "actual": "",
}
`;
@@ -49,6 +49,6 @@ Object {
exports[`parse wrong closing tag 1`] = `
Object {
- "actual": "<;test",
+ "actual": "<;test",
}
`;
diff --git a/test/xmltest/__snapshots__/not-wf.test.js.snap b/test/xmltest/__snapshots__/not-wf.test.js.snap
index 400dd6e40..34eba2916 100644
--- a/test/xmltest/__snapshots__/not-wf.test.js.snap
+++ b/test/xmltest/__snapshots__/not-wf.test.js.snap
@@ -112,7 +112,7 @@ Object {
exports[`xmltest/not-wellformed standalone should match 014.xml with snapshot 1`] = `
Object {
- "actual": "\\"/>",
+ "actual": "\\"/>",
}
`;
diff --git a/test/xmltest/__snapshots__/valid.test.js.snap b/test/xmltest/__snapshots__/valid.test.js.snap
index 085aa7531..fdf34ca9f 100644
--- a/test/xmltest/__snapshots__/valid.test.js.snap
+++ b/test/xmltest/__snapshots__/valid.test.js.snap
@@ -293,7 +293,7 @@ Object {
exports[`xmltest/valid standalone should match 040.xml with snapshot 1`] = `
Object {
- "actual": "'\\"/>",
+ "actual": "'\\"/>",
"expected": "",
}
`;
From 8a1711917635ee0052e823919de5e7ae44c88a5f Mon Sep 17 00:00:00 2001
From: Christian Bewernitz
Date: Sun, 14 Mar 2021 09:37:14 +0100
Subject: [PATCH 2/2] Update lib/dom.js
Co-authored-by: Chris Brody
---
lib/dom.js | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/lib/dom.js b/lib/dom.js
index 28f71160a..12288c8aa 100644
--- a/lib/dom.js
+++ b/lib/dom.js
@@ -1074,7 +1074,7 @@ function serializeToString(node,buf,isHTML,nodeFilter,visibleNamespaces){
* @see https://www.w3.org/TR/xml/#CleanAttrVals
* @see https://www.w3.org/TR/xml/#NT-AttValue
*/
- return buf.push(' ',node.name,'="',node.value.replace(/[<&"]/g,_xmlEncoder),'"');
+ return buf.push(' ', node.name, '="', node.value.replace(/[<&"]/g,_xmlEncoder), '"');
case TEXT_NODE:
/**
* The ampersand character (&) and the left angle bracket (<) must not appear in their literal form,