Skip to content

Commit

Permalink
fix(sax): Handle raw text elements in HTML
Browse files Browse the repository at this point in the history
and drop warning for boolean attributes in HTML
  • Loading branch information
karfau committed Mar 6, 2022
1 parent 5fd4e1b commit 86cab47
Show file tree
Hide file tree
Showing 7 changed files with 176 additions and 56 deletions.
51 changes: 51 additions & 0 deletions lib/conventions.js
Expand Up @@ -144,6 +144,54 @@ function isHTMLVoidElement(tagName) {
return HTML_VOID_ELEMENTS.hasOwnProperty(tagName.toLowerCase())
}

/**
* Tag names that are raw text elements according to HTML spec.
* The value denotes whether they are escapable or not.
*
* @see isHTMLEscapableRawTextElement
* @see isHTMLRawTextElement
* @see https://html.spec.whatwg.org/#raw-text-elements
* @see https://html.spec.whatwg.org/#escapable-raw-text-elements
*/
var HTML_RAW_TEXT_ELEMENTS = freeze({
script: false,
style: false,
textarea: true,
title: true,
})

/**
* Check if `tagName` is matching one of the HTML raw text element names.
* It includes escapable and not escapable raw text elements.
* This method doesn't check if such tags are allowed
* in the context of the current document/parsing.
*
* @param {string} tagName
* @return {boolean}
* @see HTML_RAW_TEXT_ELEMENTS
* @see https://html.spec.whatwg.org/#raw-text-elements
* @see https://html.spec.whatwg.org/#escapable-raw-text-elements
*/
function isHTMLRawTextElement(tagName) {
return HTML_RAW_TEXT_ELEMENTS.hasOwnProperty(tagName.toLowerCase())
}
/**
* Check if `tagName` is matching one of the HTML escapable raw text element names.
* This method doesn't check if such tags are allowed
* in the context of the current document/parsing.
*
* @param {string} tagName
* @return {boolean}
* @see isHTMLRawTextElement
* @see HTML_RAW_TEXT_ELEMENTS
* @see https://html.spec.whatwg.org/#raw-text-elements
* @see https://html.spec.whatwg.org/#escapable-raw-text-elements
*/
function isHTMLEscapableRawTextElement(tagName) {
const key = tagName.toLowerCase();
return HTML_RAW_TEXT_ELEMENTS.hasOwnProperty(key) && HTML_RAW_TEXT_ELEMENTS[key];
}

/**
* All mime types that are allowed as input to `DOMParser.parseFromString`
*
Expand Down Expand Up @@ -282,8 +330,11 @@ var NAMESPACE = freeze({
exports.assign = assign
exports.freeze = freeze
exports.HTML_BOOLEAN_ATTRIBUTES = HTML_BOOLEAN_ATTRIBUTES
exports.HTML_RAW_TEXT_ELEMENTS = HTML_RAW_TEXT_ELEMENTS
exports.HTML_VOID_ELEMENTS = HTML_VOID_ELEMENTS
exports.isHTMLBooleanAttribute = isHTMLBooleanAttribute
exports.isHTMLRawTextElement = isHTMLRawTextElement
exports.isHTMLEscapableRawTextElement = isHTMLEscapableRawTextElement
exports.isHTMLVoidElement = isHTMLVoidElement
exports.MIME_TYPE = MIME_TYPE
exports.NAMESPACE = NAMESPACE
29 changes: 13 additions & 16 deletions lib/sax.js
@@ -1,6 +1,8 @@
'use strict'

var conventions = require("./conventions");
var isHTMLRawTextElement = conventions.isHTMLRawTextElement;
var isHTMLEscapableRawTextElement = conventions.isHTMLEscapableRawTextElement;
var NAMESPACE = conventions.NAMESPACE;
var MIME_TYPE = conventions.MIME_TYPE;

Expand Down Expand Up @@ -181,7 +183,7 @@ function parse(source,defaultNSMapCopy,entityMap,domBuilder,errorHandler){

if(!el.closed && fixSelfClosed(source,end,el.tagName,closeMap)){
el.closed = true;
if(!entityMap.nbsp){
if(!isHTML){
errorHandler.warning('unclosed xml attribute');
}
}
Expand Down Expand Up @@ -352,7 +354,7 @@ function parseElementStartPart(
errorHandler.warning('attribute "'+value+'" missed quot(")!');
addAttribute(attrName, value, start)
}else{
if(!isHTML || !value.match(/^(?:disabled|checked|selected)$/i)){
if(!isHTML){
errorHandler.warning('attribute "'+value+'" missed value!! "'+value+'" instead!!')
}
addAttribute(value, value, start)
Expand Down Expand Up @@ -400,7 +402,7 @@ function parseElementStartPart(
//case S_ATTR_NOQUOT_VALUE:void();break;
case S_ATTR_SPACE:
var tagName = el.tagName;
if (!isHTML || !attrName.match(/^(?:disabled|checked|selected)$/i)) {
if (!isHTML) {
errorHandler.warning('attribute "'+attrName+'" missed value!! "'+attrName+'" instead2!!')
}
addAttribute(attrName, attrName, start);
Expand Down Expand Up @@ -503,24 +505,19 @@ function appendElement(el,domBuilder,currentNSMap){
}
}
function parseHtmlSpecialContent(source,elStartEnd,tagName,entityReplacer,domBuilder){
if(/^(?:script|textarea)$/i.test(tagName)){
// https://html.spec.whatwg.org/#raw-text-elements
// https://html.spec.whatwg.org/#escapable-raw-text-elements
// https://html.spec.whatwg.org/#cdata-rcdata-restrictions:raw-text-elements
// TODO: https://html.spec.whatwg.org/#cdata-rcdata-restrictions
if(isHTMLRawTextElement(tagName)){
var elEndStart = source.indexOf('</'+tagName+'>',elStartEnd);
var text = source.substring(elStartEnd+1,elEndStart);
if(/[&<]/.test(text)){
if(/^script$/i.test(tagName)){
//if(!/\]\]>/.test(text)){
//lexHandler.startCDATA();
domBuilder.characters(text,0,text.length);
//lexHandler.endCDATA();
return elEndStart;
//}
}//}else{//text area

if(isHTMLEscapableRawTextElement(tagName)){
text = text.replace(/&#?\w+;/g,entityReplacer);
}
domBuilder.characters(text,0,text.length);
return elEndStart;
//}

}
}
return elStartEnd+1;
}
Expand Down
58 changes: 58 additions & 0 deletions test/conventions/html.test.js
Expand Up @@ -3,6 +3,9 @@
const {
HTML_BOOLEAN_ATTRIBUTES,
isHTMLBooleanAttribute,
HTML_RAW_TEXT_ELEMENTS,
isHTMLRawTextElement,
isHTMLEscapableRawTextElement,
HTML_VOID_ELEMENTS,
isHTMLVoidElement,
} = require('../../lib/conventions')
Expand Down Expand Up @@ -73,3 +76,58 @@ describe('isHTMLVoidElement', () => {
expect(isHTMLVoidElement('__proto__')).toBe(false)
});
})
describe('HTML_RAW_TEXT_ELEMENTS', () => {
Object.keys(HTML_RAW_TEXT_ELEMENTS).forEach((key) => {
const value = HTML_RAW_TEXT_ELEMENTS[key]
it(`should contain immutable ${key} with value 'true'`, () => {
expect([key, value]).toMatchSnapshot()
try {
HTML_RAW_TEXT_ELEMENTS[key] = 'boo'
} catch {}
expect(HTML_RAW_TEXT_ELEMENTS[key]).toBe(value)
})
})
})
describe('isHTMLRawTextElement', () => {
Object.keys(HTML_RAW_TEXT_ELEMENTS).forEach((key) => {
it(`should detect attribute '${key}'`, () => {
expect(isHTMLRawTextElement(key)).toBe(true)
})
const upperKey = key.toUpperCase()
it(`should detect attribute '${upperKey}'`, () => {
expect(isHTMLRawTextElement(upperKey)).toBe(true)
})
const mixedKey = key[0].toUpperCase() + key.substring(1)
it(`should detect attribute '${mixedKey}'`, () => {
expect(isHTMLRawTextElement(mixedKey)).toBe(true)
})
})
it("should not detect prototype properties", () => {
expect(isHTMLRawTextElement('hasOwnProperty')).toBe(false)
expect(isHTMLRawTextElement('constructor')).toBe(false)
expect(isHTMLRawTextElement('prototype')).toBe(false)
expect(isHTMLRawTextElement('__proto__')).toBe(false)
});
})
describe('isHTMLEscapableRawTextElement', () => {
Object.keys(HTML_RAW_TEXT_ELEMENTS).forEach((key) => {
const expected = HTML_RAW_TEXT_ELEMENTS[key]
it(`should detect attribute '${key}' as ${expected}`, () => {
expect(isHTMLEscapableRawTextElement(key)).toBe(expected)
})
const upperKey = key.toUpperCase()
it(`should detect attribute '${upperKey}' as ${expected}`, () => {
expect(isHTMLEscapableRawTextElement(upperKey)).toBe(expected)
})
const mixedKey = key[0].toUpperCase() + key.substring(1)
it(`should detect attribute '${mixedKey}' as ${expected}`, () => {
expect(isHTMLEscapableRawTextElement(mixedKey)).toBe(expected)
})
})
it("should not detect prototype properties", () => {
expect(isHTMLEscapableRawTextElement('hasOwnProperty')).toBe(false)
expect(isHTMLEscapableRawTextElement('constructor')).toBe(false)
expect(isHTMLEscapableRawTextElement('prototype')).toBe(false)
expect(isHTMLEscapableRawTextElement('__proto__')).toBe(false)
});
})
30 changes: 0 additions & 30 deletions test/error/__snapshots__/reported-levels.test.js.snap
Expand Up @@ -352,20 +352,6 @@ Array [
]
`;

exports[`WF_AttributeMissingValue with mimeType text/html should be reported as warning 1`] = `
Array [
"[xmldom warning] attribute \\"attr\\" missed value!! \\"attr\\" instead!!
@#[line:1,col:1]",
]
`;

exports[`WF_AttributeMissingValue with mimeType text/html should escalate Error thrown in errorHandler.warning to errorHandler.error 1`] = `
Array [
"[xmldom warning] attribute \\"attr\\" missed value!! \\"attr\\" instead!!||@#[line:1,col:1]
at parseElementStartPart (lib/sax.js:#15)",
]
`;

exports[`WF_AttributeMissingValue with mimeType text/xml should be reported as warning 1`] = `
Array [
"[xmldom warning] attribute \\"attr\\" missed value!! \\"attr\\" instead!!
Expand All @@ -380,22 +366,6 @@ Array [
]
`;

exports[`WF_AttributeMissingValue2 with mimeType text/html should be reported as warning 1`] = `
Array [
"[xmldom warning] attribute \\"attr\\" missed value!! \\"attr\\" instead2!!
@#[line:1,col:1]",
"[xmldom warning] attribute \\"attr2\\" missed value!! \\"attr2\\" instead!!
@#[line:1,col:1]",
]
`;

exports[`WF_AttributeMissingValue2 with mimeType text/html should escalate Error thrown in errorHandler.warning to errorHandler.error 1`] = `
Array [
"[xmldom warning] attribute \\"attr\\" missed value!! \\"attr\\" instead2!!||@#[line:1,col:1]
at parseElementStartPart (lib/sax.js:#18)",
]
`;

exports[`WF_AttributeMissingValue2 with mimeType text/xml should be reported as warning 1`] = `
Array [
"[xmldom warning] attribute \\"attr\\" missed value!! \\"attr\\" instead2!!
Expand Down
2 changes: 2 additions & 0 deletions test/error/reported.js
Expand Up @@ -223,6 +223,7 @@ const REPORTED = {
source: '<xml attr ></xml>',
level: 'warning',
match: (msg) => /missed value/.test(msg) && /instead!!/.test(msg),
skippedInHtml: true,
},
/**
* Triggered by lib/sax.js:376
Expand All @@ -236,6 +237,7 @@ const REPORTED = {
source: '<xml attr attr2 ></xml>',
level: 'warning',
match: (msg) => /missed value/.test(msg) && /instead2!!/.test(msg),
skippedInHtml: true,
},
}

Expand Down
58 changes: 48 additions & 10 deletions test/html/__snapshots__/normalize.test.js.snap
Expand Up @@ -19,8 +19,6 @@ Object {
],
"warning": Array [
"[xmldom warning] attribute \\"&\\" missed quot(\\")!!
@#[line:1,col:1]",
"[xmldom warning] attribute \\"b\\" missed value!! \\"b\\" instead!!
@#[line:1,col:1]",
],
}
Expand All @@ -31,10 +29,6 @@ Object {
"actual": "<div a=\\"&amp;\\" bb=\\"bb\\" c=\\"c\\" d=\\"123&amp;&amp;456\\" xmlns=\\"http://www.w3.org/1999/xhtml\\"></div>",
"warning": Array [
"[xmldom warning] attribute \\"&\\" missed quot(\\")!!
@#[line:1,col:1]",
"[xmldom warning] attribute \\"bb\\" missed value!! \\"bb\\" instead2!!
@#[line:1,col:1]",
"[xmldom warning] attribute \\"c\\" missed value!! \\"c\\" instead2!!
@#[line:1,col:1]",
"[xmldom warning] attribute \\"123&&456\\" missed quot(\\")!
@#[line:1,col:1]",
Expand Down Expand Up @@ -110,6 +104,24 @@ Object {
}
`;
exports[`html normalizer text/html: script <input type="button" disabled></input> 1`] = `
Object {
"actual": "<input type=\\"button\\" disabled=\\"disabled\\" xmlns=\\"http://www.w3.org/1999/xhtml\\"/>",
}
`;
exports[`html normalizer text/html: script <input type="checkbox" checked></input> 1`] = `
Object {
"actual": "<input type=\\"checkbox\\" checked=\\"checked\\" xmlns=\\"http://www.w3.org/1999/xhtml\\"/>",
}
`;
exports[`html normalizer text/html: script <option selected></option> 1`] = `
Object {
"actual": "<option selected=\\"selected\\" xmlns=\\"http://www.w3.org/1999/xhtml\\"></option>",
}
`;
exports[`html normalizer text/html: script <script src="./test.js"/> 1`] = `
Object {
"actual": "<script src=\\"./test.js\\" xmlns=\\"http://www.w3.org/1999/xhtml\\"></script>",
Expand All @@ -134,6 +146,36 @@ Object {
}
`;
exports[`html normalizer text/xml: script <input type="button" disabled></input> 1`] = `
Object {
"actual": "<input type=\\"button\\" disabled=\\"disabled\\"/>",
"warning": Array [
"[xmldom warning] attribute \\"disabled\\" missed value!! \\"disabled\\" instead!!
@#[line:1,col:1]",
],
}
`;
exports[`html normalizer text/xml: script <input type="checkbox" checked></input> 1`] = `
Object {
"actual": "<input type=\\"checkbox\\" checked=\\"checked\\"/>",
"warning": Array [
"[xmldom warning] attribute \\"checked\\" missed value!! \\"checked\\" instead!!
@#[line:1,col:1]",
],
}
`;
exports[`html normalizer text/xml: script <option selected></option> 1`] = `
Object {
"actual": "<option selected=\\"selected\\"/>",
"warning": Array [
"[xmldom warning] attribute \\"selected\\" missed value!! \\"selected\\" instead!!
@#[line:1,col:1]",
],
}
`;
exports[`html normalizer text/xml: script <script src="./test.js"/> 1`] = `
Object {
"actual": "<script src=\\"./test.js\\"/>",
Expand Down Expand Up @@ -201,10 +243,6 @@ Object {
exports[`html normalizer unclosed html <html title/> 1`] = `
Object {
"actual": "<html title=\\"title\\" xmlns=\\"http://www.w3.org/1999/xhtml\\"></html>",
"warning": Array [
"[xmldom warning] attribute \\"title\\" missed value!! \\"title\\" instead!!
@#[line:1,col:1]",
],
}
`;
Expand Down
4 changes: 4 additions & 0 deletions test/html/normalize.test.js
Expand Up @@ -44,6 +44,10 @@ describe('html normalizer', () => {
'<script>alert(a<b&&c?"<br/>":">>");</script>',
'<script src="./test.js"/>',
'<textarea>alert(a<b&&c?"<br>":">>");</textarea>',
'<input type="button" disabled></input>',
'<input type="checkbox" checked></input>',
'<option selected></option>',
,
])(`${mimeType}: script %s`, (xml) => {
const { errors, parser } = getTestParser()

Expand Down

0 comments on commit 86cab47

Please sign in to comment.