From ceff92705bdcd709d1a12cf6f50ae24b83f304a1 Mon Sep 17 00:00:00 2001 From: Christian Bewernitz Date: Sun, 6 Mar 2022 16:57:23 +0100 Subject: [PATCH] fix(sax): Handle raw text elements in HTML and drop warning for boolean attributes in HTML --- lib/conventions.js | 51 +++ lib/sax.js | 29 +- .../__snapshots__/html.test.js.snap | 295 ++++++++++++++++++ test/conventions/html.test.js | 58 ++++ .../reported-levels.test.js.snap | 30 -- test/error/reported.js | 2 + .../html/__snapshots__/normalize.test.js.snap | 58 +++- test/html/normalize.test.js | 4 + 8 files changed, 471 insertions(+), 56 deletions(-) create mode 100644 test/conventions/__snapshots__/html.test.js.snap diff --git a/lib/conventions.js b/lib/conventions.js index 7879a4e15..ce6bdb60a 100644 --- a/lib/conventions.js +++ b/lib/conventions.js @@ -144,6 +144,54 @@ function isHTMLVoidElement(tagName) { return HTML_VOID_ELEMENTS.hasOwnProperty(tagName.toLowerCase()) } +/** + * Tag names that are raw text elements according to HTML spec. + * The value denotes whether they are escapable or not. + * + * @see isHTMLEscapableRawTextElement + * @see isHTMLRawTextElement + * @see https://html.spec.whatwg.org/#raw-text-elements + * @see https://html.spec.whatwg.org/#escapable-raw-text-elements + */ +var HTML_RAW_TEXT_ELEMENTS = freeze({ + script: false, + style: false, + textarea: true, + title: true, +}) + +/** + * Check if `tagName` is matching one of the HTML raw text element names. + * It includes escapable and not escapable raw text elements. + * This method doesn't check if such tags are allowed + * in the context of the current document/parsing. + * + * @param {string} tagName + * @return {boolean} + * @see HTML_RAW_TEXT_ELEMENTS + * @see https://html.spec.whatwg.org/#raw-text-elements + * @see https://html.spec.whatwg.org/#escapable-raw-text-elements + */ +function isHTMLRawTextElement(tagName) { + return HTML_RAW_TEXT_ELEMENTS.hasOwnProperty(tagName.toLowerCase()) +} +/** + * Check if `tagName` is matching one of the HTML escapable raw text element names. + * This method doesn't check if such tags are allowed + * in the context of the current document/parsing. + * + * @param {string} tagName + * @return {boolean} + * @see isHTMLRawTextElement + * @see HTML_RAW_TEXT_ELEMENTS + * @see https://html.spec.whatwg.org/#raw-text-elements + * @see https://html.spec.whatwg.org/#escapable-raw-text-elements + */ +function isHTMLEscapableRawTextElement(tagName) { + const key = tagName.toLowerCase(); + return HTML_RAW_TEXT_ELEMENTS.hasOwnProperty(key) && HTML_RAW_TEXT_ELEMENTS[key]; +} + /** * All mime types that are allowed as input to `DOMParser.parseFromString` * @@ -282,8 +330,11 @@ var NAMESPACE = freeze({ exports.assign = assign exports.freeze = freeze exports.HTML_BOOLEAN_ATTRIBUTES = HTML_BOOLEAN_ATTRIBUTES +exports.HTML_RAW_TEXT_ELEMENTS = HTML_RAW_TEXT_ELEMENTS exports.HTML_VOID_ELEMENTS = HTML_VOID_ELEMENTS exports.isHTMLBooleanAttribute = isHTMLBooleanAttribute +exports.isHTMLRawTextElement = isHTMLRawTextElement +exports.isHTMLEscapableRawTextElement = isHTMLEscapableRawTextElement exports.isHTMLVoidElement = isHTMLVoidElement exports.MIME_TYPE = MIME_TYPE exports.NAMESPACE = NAMESPACE diff --git a/lib/sax.js b/lib/sax.js index 06aeba540..424c58861 100644 --- a/lib/sax.js +++ b/lib/sax.js @@ -1,6 +1,8 @@ 'use strict' var conventions = require("./conventions"); +var isHTMLRawTextElement = conventions.isHTMLRawTextElement; +var isHTMLEscapableRawTextElement = conventions.isHTMLEscapableRawTextElement; var NAMESPACE = conventions.NAMESPACE; var MIME_TYPE = conventions.MIME_TYPE; @@ -181,7 +183,7 @@ function parse(source,defaultNSMapCopy,entityMap,domBuilder,errorHandler){ if(!el.closed && fixSelfClosed(source,end,el.tagName,closeMap)){ el.closed = true; - if(!entityMap.nbsp){ + if(!isHTML){ errorHandler.warning('unclosed xml attribute'); } } @@ -352,7 +354,7 @@ function parseElementStartPart( errorHandler.warning('attribute "'+value+'" missed quot(")!'); addAttribute(attrName, value, start) }else{ - if(!isHTML || !value.match(/^(?:disabled|checked|selected)$/i)){ + if(!isHTML){ errorHandler.warning('attribute "'+value+'" missed value!! "'+value+'" instead!!') } addAttribute(value, value, start) @@ -400,7 +402,7 @@ function parseElementStartPart( //case S_ATTR_NOQUOT_VALUE:void();break; case S_ATTR_SPACE: var tagName = el.tagName; - if (!isHTML || !attrName.match(/^(?:disabled|checked|selected)$/i)) { + if (!isHTML) { errorHandler.warning('attribute "'+attrName+'" missed value!! "'+attrName+'" instead2!!') } addAttribute(attrName, attrName, start); @@ -503,24 +505,19 @@ function appendElement(el,domBuilder,currentNSMap){ } } function parseHtmlSpecialContent(source,elStartEnd,tagName,entityReplacer,domBuilder){ - if(/^(?:script|textarea)$/i.test(tagName)){ + // https://html.spec.whatwg.org/#raw-text-elements + // https://html.spec.whatwg.org/#escapable-raw-text-elements + // https://html.spec.whatwg.org/#cdata-rcdata-restrictions:raw-text-elements + // TODO: https://html.spec.whatwg.org/#cdata-rcdata-restrictions + if(isHTMLRawTextElement(tagName)){ var elEndStart = source.indexOf('',elStartEnd); var text = source.substring(elStartEnd+1,elEndStart); - if(/[&<]/.test(text)){ - if(/^script$/i.test(tagName)){ - //if(!/\]\]>/.test(text)){ - //lexHandler.startCDATA(); - domBuilder.characters(text,0,text.length); - //lexHandler.endCDATA(); - return elEndStart; - //} - }//}else{//text area + + if(isHTMLEscapableRawTextElement(tagName)){ text = text.replace(/&#?\w+;/g,entityReplacer); + } domBuilder.characters(text,0,text.length); return elEndStart; - //} - - } } return elStartEnd+1; } diff --git a/test/conventions/__snapshots__/html.test.js.snap b/test/conventions/__snapshots__/html.test.js.snap new file mode 100644 index 000000000..ff48bb910 --- /dev/null +++ b/test/conventions/__snapshots__/html.test.js.snap @@ -0,0 +1,295 @@ +// Jest Snapshot v1, https://goo.gl/fbAQLP + +exports[`HTML_BOOLEAN_ATTRIBUTES should contain immutable allowfullscreen with value 'true' 1`] = ` +Array [ + "allowfullscreen", + true, +] +`; + +exports[`HTML_BOOLEAN_ATTRIBUTES should contain immutable async with value 'true' 1`] = ` +Array [ + "async", + true, +] +`; + +exports[`HTML_BOOLEAN_ATTRIBUTES should contain immutable autofocus with value 'true' 1`] = ` +Array [ + "autofocus", + true, +] +`; + +exports[`HTML_BOOLEAN_ATTRIBUTES should contain immutable autoplay with value 'true' 1`] = ` +Array [ + "autoplay", + true, +] +`; + +exports[`HTML_BOOLEAN_ATTRIBUTES should contain immutable checked with value 'true' 1`] = ` +Array [ + "checked", + true, +] +`; + +exports[`HTML_BOOLEAN_ATTRIBUTES should contain immutable controls with value 'true' 1`] = ` +Array [ + "controls", + true, +] +`; + +exports[`HTML_BOOLEAN_ATTRIBUTES should contain immutable default with value 'true' 1`] = ` +Array [ + "default", + true, +] +`; + +exports[`HTML_BOOLEAN_ATTRIBUTES should contain immutable defer with value 'true' 1`] = ` +Array [ + "defer", + true, +] +`; + +exports[`HTML_BOOLEAN_ATTRIBUTES should contain immutable disabled with value 'true' 1`] = ` +Array [ + "disabled", + true, +] +`; + +exports[`HTML_BOOLEAN_ATTRIBUTES should contain immutable formnovalidate with value 'true' 1`] = ` +Array [ + "formnovalidate", + true, +] +`; + +exports[`HTML_BOOLEAN_ATTRIBUTES should contain immutable hidden with value 'true' 1`] = ` +Array [ + "hidden", + true, +] +`; + +exports[`HTML_BOOLEAN_ATTRIBUTES should contain immutable ismap with value 'true' 1`] = ` +Array [ + "ismap", + true, +] +`; + +exports[`HTML_BOOLEAN_ATTRIBUTES should contain immutable itemscope with value 'true' 1`] = ` +Array [ + "itemscope", + true, +] +`; + +exports[`HTML_BOOLEAN_ATTRIBUTES should contain immutable loop with value 'true' 1`] = ` +Array [ + "loop", + true, +] +`; + +exports[`HTML_BOOLEAN_ATTRIBUTES should contain immutable multiple with value 'true' 1`] = ` +Array [ + "multiple", + true, +] +`; + +exports[`HTML_BOOLEAN_ATTRIBUTES should contain immutable muted with value 'true' 1`] = ` +Array [ + "muted", + true, +] +`; + +exports[`HTML_BOOLEAN_ATTRIBUTES should contain immutable nomodule with value 'true' 1`] = ` +Array [ + "nomodule", + true, +] +`; + +exports[`HTML_BOOLEAN_ATTRIBUTES should contain immutable novalidate with value 'true' 1`] = ` +Array [ + "novalidate", + true, +] +`; + +exports[`HTML_BOOLEAN_ATTRIBUTES should contain immutable open with value 'true' 1`] = ` +Array [ + "open", + true, +] +`; + +exports[`HTML_BOOLEAN_ATTRIBUTES should contain immutable playsinline with value 'true' 1`] = ` +Array [ + "playsinline", + true, +] +`; + +exports[`HTML_BOOLEAN_ATTRIBUTES should contain immutable readonly with value 'true' 1`] = ` +Array [ + "readonly", + true, +] +`; + +exports[`HTML_BOOLEAN_ATTRIBUTES should contain immutable required with value 'true' 1`] = ` +Array [ + "required", + true, +] +`; + +exports[`HTML_BOOLEAN_ATTRIBUTES should contain immutable reversed with value 'true' 1`] = ` +Array [ + "reversed", + true, +] +`; + +exports[`HTML_BOOLEAN_ATTRIBUTES should contain immutable selected with value 'true' 1`] = ` +Array [ + "selected", + true, +] +`; + +exports[`HTML_RAW_TEXT_ELEMENTS should contain immutable script with value 'true' 1`] = ` +Array [ + "script", + false, +] +`; + +exports[`HTML_RAW_TEXT_ELEMENTS should contain immutable style with value 'true' 1`] = ` +Array [ + "style", + false, +] +`; + +exports[`HTML_RAW_TEXT_ELEMENTS should contain immutable textarea with value 'true' 1`] = ` +Array [ + "textarea", + true, +] +`; + +exports[`HTML_RAW_TEXT_ELEMENTS should contain immutable title with value 'true' 1`] = ` +Array [ + "title", + true, +] +`; + +exports[`HTML_VOID_ELEMENTS should contain immutable area with value 'true' 1`] = ` +Array [ + "area", + true, +] +`; + +exports[`HTML_VOID_ELEMENTS should contain immutable base with value 'true' 1`] = ` +Array [ + "base", + true, +] +`; + +exports[`HTML_VOID_ELEMENTS should contain immutable br with value 'true' 1`] = ` +Array [ + "br", + true, +] +`; + +exports[`HTML_VOID_ELEMENTS should contain immutable col with value 'true' 1`] = ` +Array [ + "col", + true, +] +`; + +exports[`HTML_VOID_ELEMENTS should contain immutable embed with value 'true' 1`] = ` +Array [ + "embed", + true, +] +`; + +exports[`HTML_VOID_ELEMENTS should contain immutable hr with value 'true' 1`] = ` +Array [ + "hr", + true, +] +`; + +exports[`HTML_VOID_ELEMENTS should contain immutable img with value 'true' 1`] = ` +Array [ + "img", + true, +] +`; + +exports[`HTML_VOID_ELEMENTS should contain immutable input with value 'true' 1`] = ` +Array [ + "input", + true, +] +`; + +exports[`HTML_VOID_ELEMENTS should contain immutable link with value 'true' 1`] = ` +Array [ + "link", + true, +] +`; + +exports[`HTML_VOID_ELEMENTS should contain immutable meta with value 'true' 1`] = ` +Array [ + "meta", + true, +] +`; + +exports[`HTML_VOID_ELEMENTS should contain immutable param with value 'true' 1`] = ` +Array [ + "param", + true, +] +`; + +exports[`HTML_VOID_ELEMENTS should contain immutable source with value 'true' 1`] = ` +Array [ + "source", + true, +] +`; + +exports[`HTML_VOID_ELEMENTS should contain immutable track with value 'true' 1`] = ` +Array [ + "track", + true, +] +`; + +exports[`HTML_VOID_ELEMENTS should contain immutable wbr with value 'true' 1`] = ` +Array [ + "wbr", + true, +] +`; diff --git a/test/conventions/html.test.js b/test/conventions/html.test.js index 95c38e200..8e892719a 100644 --- a/test/conventions/html.test.js +++ b/test/conventions/html.test.js @@ -3,6 +3,9 @@ const { HTML_BOOLEAN_ATTRIBUTES, isHTMLBooleanAttribute, + HTML_RAW_TEXT_ELEMENTS, + isHTMLRawTextElement, + isHTMLEscapableRawTextElement, HTML_VOID_ELEMENTS, isHTMLVoidElement, } = require('../../lib/conventions') @@ -73,3 +76,58 @@ describe('isHTMLVoidElement', () => { expect(isHTMLVoidElement('__proto__')).toBe(false) }); }) +describe('HTML_RAW_TEXT_ELEMENTS', () => { + Object.keys(HTML_RAW_TEXT_ELEMENTS).forEach((key) => { + const value = HTML_RAW_TEXT_ELEMENTS[key] + it(`should contain immutable ${key} with value 'true'`, () => { + expect([key, value]).toMatchSnapshot() + try { + HTML_RAW_TEXT_ELEMENTS[key] = 'boo' + } catch {} + expect(HTML_RAW_TEXT_ELEMENTS[key]).toBe(value) + }) + }) +}) +describe('isHTMLRawTextElement', () => { + Object.keys(HTML_RAW_TEXT_ELEMENTS).forEach((key) => { + it(`should detect attribute '${key}'`, () => { + expect(isHTMLRawTextElement(key)).toBe(true) + }) + const upperKey = key.toUpperCase() + it(`should detect attribute '${upperKey}'`, () => { + expect(isHTMLRawTextElement(upperKey)).toBe(true) + }) + const mixedKey = key[0].toUpperCase() + key.substring(1) + it(`should detect attribute '${mixedKey}'`, () => { + expect(isHTMLRawTextElement(mixedKey)).toBe(true) + }) + }) + it("should not detect prototype properties", () => { + expect(isHTMLRawTextElement('hasOwnProperty')).toBe(false) + expect(isHTMLRawTextElement('constructor')).toBe(false) + expect(isHTMLRawTextElement('prototype')).toBe(false) + expect(isHTMLRawTextElement('__proto__')).toBe(false) + }); +}) +describe('isHTMLEscapableRawTextElement', () => { + Object.keys(HTML_RAW_TEXT_ELEMENTS).forEach((key) => { + const expected = HTML_RAW_TEXT_ELEMENTS[key] + it(`should detect attribute '${key}' as ${expected}`, () => { + expect(isHTMLEscapableRawTextElement(key)).toBe(expected) + }) + const upperKey = key.toUpperCase() + it(`should detect attribute '${upperKey}' as ${expected}`, () => { + expect(isHTMLEscapableRawTextElement(upperKey)).toBe(expected) + }) + const mixedKey = key[0].toUpperCase() + key.substring(1) + it(`should detect attribute '${mixedKey}' as ${expected}`, () => { + expect(isHTMLEscapableRawTextElement(mixedKey)).toBe(expected) + }) + }) + it("should not detect prototype properties", () => { + expect(isHTMLEscapableRawTextElement('hasOwnProperty')).toBe(false) + expect(isHTMLEscapableRawTextElement('constructor')).toBe(false) + expect(isHTMLEscapableRawTextElement('prototype')).toBe(false) + expect(isHTMLEscapableRawTextElement('__proto__')).toBe(false) + }); +}) diff --git a/test/error/__snapshots__/reported-levels.test.js.snap b/test/error/__snapshots__/reported-levels.test.js.snap index c32c545ab..8ee742c6f 100644 --- a/test/error/__snapshots__/reported-levels.test.js.snap +++ b/test/error/__snapshots__/reported-levels.test.js.snap @@ -352,20 +352,6 @@ Array [ ] `; -exports[`WF_AttributeMissingValue with mimeType text/html should be reported as warning 1`] = ` -Array [ - "[xmldom warning] attribute \\"attr\\" missed value!! \\"attr\\" instead!! -@#[line:1,col:1]", -] -`; - -exports[`WF_AttributeMissingValue with mimeType text/html should escalate Error thrown in errorHandler.warning to errorHandler.error 1`] = ` -Array [ - "[xmldom warning] attribute \\"attr\\" missed value!! \\"attr\\" instead!!||@#[line:1,col:1] - at parseElementStartPart (lib/sax.js:#15)", -] -`; - exports[`WF_AttributeMissingValue with mimeType text/xml should be reported as warning 1`] = ` Array [ "[xmldom warning] attribute \\"attr\\" missed value!! \\"attr\\" instead!! @@ -380,22 +366,6 @@ Array [ ] `; -exports[`WF_AttributeMissingValue2 with mimeType text/html should be reported as warning 1`] = ` -Array [ - "[xmldom warning] attribute \\"attr\\" missed value!! \\"attr\\" instead2!! -@#[line:1,col:1]", - "[xmldom warning] attribute \\"attr2\\" missed value!! \\"attr2\\" instead!! -@#[line:1,col:1]", -] -`; - -exports[`WF_AttributeMissingValue2 with mimeType text/html should escalate Error thrown in errorHandler.warning to errorHandler.error 1`] = ` -Array [ - "[xmldom warning] attribute \\"attr\\" missed value!! \\"attr\\" instead2!!||@#[line:1,col:1] - at parseElementStartPart (lib/sax.js:#18)", -] -`; - exports[`WF_AttributeMissingValue2 with mimeType text/xml should be reported as warning 1`] = ` Array [ "[xmldom warning] attribute \\"attr\\" missed value!! \\"attr\\" instead2!! diff --git a/test/error/reported.js b/test/error/reported.js index ec4f04814..1944ecf4f 100644 --- a/test/error/reported.js +++ b/test/error/reported.js @@ -223,6 +223,7 @@ const REPORTED = { source: '', level: 'warning', match: (msg) => /missed value/.test(msg) && /instead!!/.test(msg), + skippedInHtml: true, }, /** * Triggered by lib/sax.js:376 @@ -236,6 +237,7 @@ const REPORTED = { source: '', level: 'warning', match: (msg) => /missed value/.test(msg) && /instead2!!/.test(msg), + skippedInHtml: true, }, } diff --git a/test/html/__snapshots__/normalize.test.js.snap b/test/html/__snapshots__/normalize.test.js.snap index a612ecf4d..3c2ee4b3c 100644 --- a/test/html/__snapshots__/normalize.test.js.snap +++ b/test/html/__snapshots__/normalize.test.js.snap @@ -19,8 +19,6 @@ Object { ], "warning": Array [ "[xmldom warning] attribute \\"&\\" missed quot(\\")!! -@#[line:1,col:1]", - "[xmldom warning] attribute \\"b\\" missed value!! \\"b\\" instead!! @#[line:1,col:1]", ], } @@ -31,10 +29,6 @@ Object { "actual": "
", "warning": Array [ "[xmldom warning] attribute \\"&\\" missed quot(\\")!! -@#[line:1,col:1]", - "[xmldom warning] attribute \\"bb\\" missed value!! \\"bb\\" instead2!! -@#[line:1,col:1]", - "[xmldom warning] attribute \\"c\\" missed value!! \\"c\\" instead2!! @#[line:1,col:1]", "[xmldom warning] attribute \\"123&&456\\" missed quot(\\")! @#[line:1,col:1]", @@ -110,6 +104,24 @@ Object { } `; +exports[`html normalizer text/html: script 1`] = ` +Object { + "actual": "", +} +`; + +exports[`html normalizer text/html: script 1`] = ` +Object { + "actual": "", +} +`; + +exports[`html normalizer text/html: script 1`] = ` +Object { + "actual": "", +} +`; + exports[`html normalizer text/html: script ", @@ -134,6 +146,36 @@ Object { } `; +exports[`html normalizer text/xml: script 1`] = ` +Object { + "actual": "", + "warning": Array [ + "[xmldom warning] attribute \\"disabled\\" missed value!! \\"disabled\\" instead!! +@#[line:1,col:1]", + ], +} +`; + +exports[`html normalizer text/xml: script 1`] = ` +Object { + "actual": "", + "warning": Array [ + "[xmldom warning] attribute \\"checked\\" missed value!! \\"checked\\" instead!! +@#[line:1,col:1]", + ], +} +`; + +exports[`html normalizer text/xml: script 1`] = ` +Object { + "actual": "