Skip to content

Commit

Permalink
fix(sax): Handle raw text elements in HTML
Browse files Browse the repository at this point in the history
and drop warning for boolean attributes in HTML
  • Loading branch information
karfau committed Mar 6, 2022
1 parent 5fd4e1b commit ceff927
Show file tree
Hide file tree
Showing 8 changed files with 471 additions and 56 deletions.
51 changes: 51 additions & 0 deletions lib/conventions.js
Expand Up @@ -144,6 +144,54 @@ function isHTMLVoidElement(tagName) {
return HTML_VOID_ELEMENTS.hasOwnProperty(tagName.toLowerCase())
}

/**
* Tag names that are raw text elements according to HTML spec.
* The value denotes whether they are escapable or not.
*
* @see isHTMLEscapableRawTextElement
* @see isHTMLRawTextElement
* @see https://html.spec.whatwg.org/#raw-text-elements
* @see https://html.spec.whatwg.org/#escapable-raw-text-elements
*/
var HTML_RAW_TEXT_ELEMENTS = freeze({
script: false,
style: false,
textarea: true,
title: true,
})

/**
* Check if `tagName` is matching one of the HTML raw text element names.
* It includes escapable and not escapable raw text elements.
* This method doesn't check if such tags are allowed
* in the context of the current document/parsing.
*
* @param {string} tagName
* @return {boolean}
* @see HTML_RAW_TEXT_ELEMENTS
* @see https://html.spec.whatwg.org/#raw-text-elements
* @see https://html.spec.whatwg.org/#escapable-raw-text-elements
*/
function isHTMLRawTextElement(tagName) {
return HTML_RAW_TEXT_ELEMENTS.hasOwnProperty(tagName.toLowerCase())
}
/**
* Check if `tagName` is matching one of the HTML escapable raw text element names.
* This method doesn't check if such tags are allowed
* in the context of the current document/parsing.
*
* @param {string} tagName
* @return {boolean}
* @see isHTMLRawTextElement
* @see HTML_RAW_TEXT_ELEMENTS
* @see https://html.spec.whatwg.org/#raw-text-elements
* @see https://html.spec.whatwg.org/#escapable-raw-text-elements
*/
function isHTMLEscapableRawTextElement(tagName) {
const key = tagName.toLowerCase();
return HTML_RAW_TEXT_ELEMENTS.hasOwnProperty(key) && HTML_RAW_TEXT_ELEMENTS[key];
}

/**
* All mime types that are allowed as input to `DOMParser.parseFromString`
*
Expand Down Expand Up @@ -282,8 +330,11 @@ var NAMESPACE = freeze({
exports.assign = assign
exports.freeze = freeze
exports.HTML_BOOLEAN_ATTRIBUTES = HTML_BOOLEAN_ATTRIBUTES
exports.HTML_RAW_TEXT_ELEMENTS = HTML_RAW_TEXT_ELEMENTS
exports.HTML_VOID_ELEMENTS = HTML_VOID_ELEMENTS
exports.isHTMLBooleanAttribute = isHTMLBooleanAttribute
exports.isHTMLRawTextElement = isHTMLRawTextElement
exports.isHTMLEscapableRawTextElement = isHTMLEscapableRawTextElement
exports.isHTMLVoidElement = isHTMLVoidElement
exports.MIME_TYPE = MIME_TYPE
exports.NAMESPACE = NAMESPACE
29 changes: 13 additions & 16 deletions lib/sax.js
@@ -1,6 +1,8 @@
'use strict'

var conventions = require("./conventions");
var isHTMLRawTextElement = conventions.isHTMLRawTextElement;
var isHTMLEscapableRawTextElement = conventions.isHTMLEscapableRawTextElement;
var NAMESPACE = conventions.NAMESPACE;
var MIME_TYPE = conventions.MIME_TYPE;

Expand Down Expand Up @@ -181,7 +183,7 @@ function parse(source,defaultNSMapCopy,entityMap,domBuilder,errorHandler){

if(!el.closed && fixSelfClosed(source,end,el.tagName,closeMap)){
el.closed = true;
if(!entityMap.nbsp){
if(!isHTML){
errorHandler.warning('unclosed xml attribute');
}
}
Expand Down Expand Up @@ -352,7 +354,7 @@ function parseElementStartPart(
errorHandler.warning('attribute "'+value+'" missed quot(")!');
addAttribute(attrName, value, start)
}else{
if(!isHTML || !value.match(/^(?:disabled|checked|selected)$/i)){
if(!isHTML){
errorHandler.warning('attribute "'+value+'" missed value!! "'+value+'" instead!!')
}
addAttribute(value, value, start)
Expand Down Expand Up @@ -400,7 +402,7 @@ function parseElementStartPart(
//case S_ATTR_NOQUOT_VALUE:void();break;
case S_ATTR_SPACE:
var tagName = el.tagName;
if (!isHTML || !attrName.match(/^(?:disabled|checked|selected)$/i)) {
if (!isHTML) {
errorHandler.warning('attribute "'+attrName+'" missed value!! "'+attrName+'" instead2!!')
}
addAttribute(attrName, attrName, start);
Expand Down Expand Up @@ -503,24 +505,19 @@ function appendElement(el,domBuilder,currentNSMap){
}
}
function parseHtmlSpecialContent(source,elStartEnd,tagName,entityReplacer,domBuilder){
if(/^(?:script|textarea)$/i.test(tagName)){
// https://html.spec.whatwg.org/#raw-text-elements
// https://html.spec.whatwg.org/#escapable-raw-text-elements
// https://html.spec.whatwg.org/#cdata-rcdata-restrictions:raw-text-elements
// TODO: https://html.spec.whatwg.org/#cdata-rcdata-restrictions
if(isHTMLRawTextElement(tagName)){
var elEndStart = source.indexOf('</'+tagName+'>',elStartEnd);
var text = source.substring(elStartEnd+1,elEndStart);
if(/[&<]/.test(text)){
if(/^script$/i.test(tagName)){
//if(!/\]\]>/.test(text)){
//lexHandler.startCDATA();
domBuilder.characters(text,0,text.length);
//lexHandler.endCDATA();
return elEndStart;
//}
}//}else{//text area

if(isHTMLEscapableRawTextElement(tagName)){
text = text.replace(/&#?\w+;/g,entityReplacer);
}
domBuilder.characters(text,0,text.length);
return elEndStart;
//}

}
}
return elStartEnd+1;
}
Expand Down

0 comments on commit ceff927

Please sign in to comment.