Skip to content

Commit

Permalink
fix(html/parser): Fix parsing of cdata (#6534)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-akait committed Nov 29, 2022
1 parent 5a9aab2 commit e3cbe7e
Show file tree
Hide file tree
Showing 15 changed files with 4,594 additions and 41 deletions.
11 changes: 11 additions & 0 deletions crates/swc_html_codegen/tests/fixture/cdata/input.html
@@ -0,0 +1,11 @@
<!doctype html>
<html lang="en">
<head>
<title>Document</title>
</head>
<body>
<svg viewBox="0 0 100 100">
<text><![CDATA[content]]></text>
</svg>
</body>
</html>
11 changes: 11 additions & 0 deletions crates/swc_html_codegen/tests/fixture/cdata/output.html
@@ -0,0 +1,11 @@
<!DOCTYPE html>
<html lang="en">
<head>
<title>Document</title>
</head>
<body>
<svg viewBox="0 0 100 100">
<text>content</text>
</svg>

</body></html>
8 changes: 8 additions & 0 deletions crates/swc_html_codegen/tests/fixture/cdata/output.min.html
@@ -0,0 +1,8 @@
<!doctype html><html lang=en><head>
<title>Document</title>
</head>
<body>
<svg viewBox="0 0 100 100">
<text>content</text>
</svg>

64 changes: 30 additions & 34 deletions crates/swc_html_parser/src/lexer/mod.rs
Expand Up @@ -134,8 +134,6 @@ struct Comment {

pub(crate) type LexResult<T> = Result<T, ErrorKind>;

// TODO improve `raw` for all tokens (linting + better codegen)

pub struct Lexer<I>
where
I: Input,
Expand Down Expand Up @@ -2682,37 +2680,35 @@ where
// error. Create a comment token whose data is the "[CDATA[" string.
// Switch to the bogus comment state.
Some('[') => match self.consume_next_char() {
Some(c @ 'c' | c @ 'C') => match self.consume_next_char() {
Some(d @ 'd' | d @ 'D') => match self.consume_next_char() {
Some(a1 @ 'a' | a1 @ 'A') => match self.consume_next_char() {
Some(t @ 't' | t @ 'T') => match self.consume_next_char() {
Some(a2 @ 'a' | a2 @ 'A') => {
match self.consume_next_char() {
Some('[') => {
if let Some(false) = self.is_adjusted_current_node_is_element_in_html_namespace {
self.state = State::CdataSection;
} else {
self.emit_error(
ErrorKind::CdataInHtmlContent,
);
let mut data = String::with_capacity(7);

data.push('[');
data.push(c);
data.push(d);
data.push(a1);
data.push(t);
data.push(a2);
data.push('[');

self.create_comment_token(Some(data), "<!");
self.state = State::BogusComment;
}
}
_ => {
anything_else(self);
Some(c @ 'C') => match self.consume_next_char() {
Some(d @ 'D') => match self.consume_next_char() {
Some(a1 @ 'A') => match self.consume_next_char() {
Some(t @ 'T') => match self.consume_next_char() {
Some(a2 @ 'A') => match self.consume_next_char() {
Some('[') => {
if let Some(false) = self.is_adjusted_current_node_is_element_in_html_namespace {
self.state = State::CdataSection;
} else {
self.emit_error(
ErrorKind::CdataInHtmlContent,
);
let mut data = String::with_capacity(7);

data.push('[');
data.push(c);
data.push(d);
data.push(a1);
data.push(t);
data.push(a2);
data.push('[');

self.create_comment_token(Some(data), "<!");
self.state = State::BogusComment;
}
}
_ => {
anything_else(self);
}
}
_ => {
anything_else(self);
Expand All @@ -2725,15 +2721,15 @@ where
_ => {
anything_else(self);
}
},
}
_ => {
anything_else(self);
}
},
}
_ => {
anything_else(self);
}
},
}
// Anything else
// This is an incorrectly-opened-comment parse error. Create a comment token
// whose data is the empty string. Switch to the bogus comment state (don't
Expand Down
12 changes: 7 additions & 5 deletions crates/swc_html_parser/src/parser/mod.rs
Expand Up @@ -508,7 +508,7 @@ where
}
Data::Text { data, raw } => {
let span = if let Some(end_span) = node.end_span.take() {
swc_common::Span::new(start_span.lo(), end_span.hi(), Default::default())
Span::new(start_span.lo(), end_span.hi(), Default::default())
} else {
start_span
};
Expand All @@ -533,7 +533,12 @@ where
fn run(&mut self) -> PResult<()> {
while !self.stopped {
let adjusted_current_node = self.get_adjusted_current_node();
let is_element_in_html_namespace = is_element_in_html_namespace(adjusted_current_node);
let is_element_in_html_namespace =
if is_element_in_html_namespace(adjusted_current_node) {
true
} else {
is_html_integration_point(adjusted_current_node)
};

self.input
.set_adjusted_current_node_to_html_namespace(is_element_in_html_namespace);
Expand Down Expand Up @@ -625,9 +630,6 @@ where
let is_mathml_annotation_xml = is_mathml_annotation_xml(adjusted_current_node);
let is_html_integration_point = is_html_integration_point(adjusted_current_node);

self.input
.set_adjusted_current_node_to_html_namespace(is_element_in_html_namespace);

if self.open_elements_stack.items.is_empty()
|| is_element_in_html_namespace
|| (is_mathml_text_integration_point
Expand Down
112 changes: 112 additions & 0 deletions crates/swc_html_parser/tests/fixture/text/cdata-svg/dom.rust-debug
@@ -0,0 +1,112 @@
| <!DOCTYPE html>
| <html>
| lang="en-US"
| <head>
| "
"
| <meta>
| charset="utf-8"
| "
"
| <title>
| "SVG Demo"
| "
"
| <meta>
| content="width=device-width"
| name="viewport"
| "
"
| "
"
| <body>
| "
"
| <svg svg>
| viewBox="0 0 100 100"
| "
"
| <svg title>
| "A gradient"
| "
"
| <svg linearGradient>
| id="gradient"
| "
"
| <svg stop>
| class="begin"
| offset="0%"
| "
"
| <svg stop>
| class="end"
| offset="100%"
| "
"
| "
"
| <svg rect>
| height="100"
| style="fill:url(#gradient)"
| width="100"
| x="0"
| y="0"
| "
"
| <svg circle>
| cx="50"
| cy="50"
| r="30"
| style="fill:url(#gradient)"
| "
"
| <svg text>
| class="empty"
| "
"
| <svg text>
| "content"
| "
"
| <svg text>
| "&amping"
| "
"
| <svg text>
| "&amping ]"
| "
"
| <svg text>
| "&amping]] "
| "
"
| <svg text>
| "<message>text</message>"
| "
"
| <svg text>
| "</this is malformed!</malformed</malformed & worse>"
| "
"
| <svg text>
| "12"
| "
"
| <svg text>
| "
data
"
| "
"
| <svg text>
| "bracket ]after"
| "
"
| <svg text>
| "abracket ]afterb"
| "
"
| "

"
32 changes: 32 additions & 0 deletions crates/swc_html_parser/tests/fixture/text/cdata-svg/input.html
@@ -0,0 +1,32 @@
<!DOCTYPE html>
<html lang="en-US">
<head>
<meta charset="utf-8" />
<title>SVG Demo</title>
<meta name="viewport" content="width=device-width" />
</head>
<body>
<svg viewBox="0 0 100 100">
<title>A gradient</title>
<linearGradient id="gradient">
<stop class="begin" offset="0%" />
<stop class="end" offset="100%" />
</linearGradient>
<rect x="0" y="0" width="100" height="100" style="fill:url(#gradient)" />
<circle cx="50" cy="50" r="30" style="fill:url(#gradient)" />
<text class="empty"><![CDATA[]]></text>
<text><![CDATA[content]]></text>
<text><![CDATA[&amping]]></text>
<text><![CDATA[&amping ]]]></text>
<text><![CDATA[&amping]] ]]></text>
<text><![CDATA[<message>text</message>]]></text>
<text><![CDATA[</this is malformed!</malformed</malformed & worse>]]></text>
<text><![CDATA[1]]><![CDATA[2]]></text>
<text>
<![CDATA[data]]>
</text>
<text><![CDATA[bracket ]after]]></text>
<text>a<![CDATA[bracket ]after]]>b</text>
</svg>
</body>
</html>

1 comment on commit e3cbe7e

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: e3cbe7e Previous: b8fe04f Ratio
es/full/bugs-1 376952 ns/iter (± 63545) 399120 ns/iter (± 52162) 0.94
es/full/minify/libraries/antd 2206985732 ns/iter (± 42259483) 1993023368 ns/iter (± 44891391) 1.11
es/full/minify/libraries/d3 484973260 ns/iter (± 39467533) 486241169 ns/iter (± 30508753) 1.00
es/full/minify/libraries/echarts 1857187478 ns/iter (± 510194701) 1738037462 ns/iter (± 52651405) 1.07
es/full/minify/libraries/jquery 130091506 ns/iter (± 20044770) 120839543 ns/iter (± 6184030) 1.08
es/full/minify/libraries/lodash 146377125 ns/iter (± 26383586) 138878875 ns/iter (± 5627895) 1.05
es/full/minify/libraries/moment 71872937 ns/iter (± 1502540) 69705182 ns/iter (± 3568091) 1.03
es/full/minify/libraries/react 23211015 ns/iter (± 684772) 22283327 ns/iter (± 470802) 1.04
es/full/minify/libraries/terser 383136017 ns/iter (± 18860054) 392040037 ns/iter (± 26457884) 0.98
es/full/minify/libraries/three 711727581 ns/iter (± 62140383) 638024730 ns/iter (± 48645090) 1.12
es/full/minify/libraries/typescript 4053621880 ns/iter (± 71180315) 3665441927 ns/iter (± 44857092) 1.11
es/full/minify/libraries/victory 999038812 ns/iter (± 62498851) 887616584 ns/iter (± 30196126) 1.13
es/full/minify/libraries/vue 192076773 ns/iter (± 10135522) 182198013 ns/iter (± 12943376) 1.05
es/full/codegen/es3 34210 ns/iter (± 5190) 32911 ns/iter (± 990) 1.04
es/full/codegen/es5 33777 ns/iter (± 4969) 33126 ns/iter (± 716) 1.02
es/full/codegen/es2015 33849 ns/iter (± 4475) 33183 ns/iter (± 824) 1.02
es/full/codegen/es2016 34213 ns/iter (± 5357) 33153 ns/iter (± 965) 1.03
es/full/codegen/es2017 33764 ns/iter (± 4339) 33123 ns/iter (± 1507) 1.02
es/full/codegen/es2018 33728 ns/iter (± 3952) 33038 ns/iter (± 537) 1.02
es/full/codegen/es2019 33797 ns/iter (± 1742) 33088 ns/iter (± 276) 1.02
es/full/codegen/es2020 34018 ns/iter (± 1077) 34985 ns/iter (± 7037) 0.97
es/full/all/es3 213102648 ns/iter (± 20481603) 192441456 ns/iter (± 21713845) 1.11
es/full/all/es5 195372280 ns/iter (± 18757534) 185760146 ns/iter (± 10959542) 1.05
es/full/all/es2015 155940559 ns/iter (± 14707035) 148849281 ns/iter (± 15487614) 1.05
es/full/all/es2016 150608362 ns/iter (± 16151921) 151247700 ns/iter (± 12194811) 1.00
es/full/all/es2017 149342192 ns/iter (± 12163642) 136116902 ns/iter (± 5166314) 1.10
es/full/all/es2018 148412554 ns/iter (± 17358285) 135576049 ns/iter (± 14456838) 1.09
es/full/all/es2019 147093097 ns/iter (± 16962553) 137973891 ns/iter (± 6703669) 1.07
es/full/all/es2020 141669916 ns/iter (± 9577567) 134284640 ns/iter (± 10302669) 1.05
es/full/parser 735105 ns/iter (± 43330) 723394 ns/iter (± 58722) 1.02
es/full/base/fixer 26692 ns/iter (± 2253) 26359 ns/iter (± 1384) 1.01
es/full/base/resolver_and_hygiene 93471 ns/iter (± 12030) 91198 ns/iter (± 7217) 1.02
serialization of ast node 217 ns/iter (± 11) 218 ns/iter (± 6) 1.00
serialization of serde 223 ns/iter (± 32) 223 ns/iter (± 11) 1

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.