Skip to content

Commit

Permalink
fix(xml/parser): Fix parsing of comments (#6449)
Browse files Browse the repository at this point in the history
  • Loading branch information
alexander-akait committed Nov 18, 2022
1 parent 84c0215 commit 3a0d98b
Show file tree
Hide file tree
Showing 206 changed files with 7,712 additions and 0 deletions.
2 changes: 2 additions & 0 deletions crates/swc_xml_parser/src/error.rs
Expand Up @@ -81,6 +81,7 @@ impl Error {
"Missing whitespace between doctype public and system identifiers".into()
}
ErrorKind::NestedComment => "Nested comment".into(),
ErrorKind::DoubleHyphenWithInComment => "Double hyper within comment".into(),
ErrorKind::NoncharacterInInputStream => "Noncharacter in input stream".into(),
ErrorKind::SurrogateInInputStream => "Surrogate in input stream".into(),
ErrorKind::SurrogateCharacterReference => "Surrogate character reference".into(),
Expand Down Expand Up @@ -150,6 +151,7 @@ pub enum ErrorKind {
MissingWhitespaceBeforeDoctypeName,
MissingWhitespaceBetweenDoctypePublicAndSystemIdentifiers,
NestedComment,
DoubleHyphenWithInComment,
NoncharacterInInputStream,
SurrogateInInputStream,
SurrogateCharacterReference,
Expand Down
1 change: 1 addition & 0 deletions crates/swc_xml_parser/src/lexer/mod.rs
Expand Up @@ -1656,6 +1656,7 @@ where
// Append a U+002D HYPHEN-MINUS character (-) to the comment token's data.
Some(c @ '-') => {
self.append_to_comment_token(c, c);
self.emit_error(ErrorKind::DoubleHyphenWithInComment);
}
// EOF
// This is an eof-in-comment parse error. Emit the current comment token.
Expand Down
43 changes: 43 additions & 0 deletions crates/swc_xml_parser/tests/fixture/comments/dom.rust-debug
@@ -0,0 +1,43 @@
<?xml version="1.0" encoding="UTF-8">
| <root>
| "
"
| <!-- comment -->
| "
"
| <!-- <head> -->
| "
"
| <!-- <!-x -->
| "
"
| <!-- <!x -->
| "
"
| <!-- <<!x -->
| "
"
| <!-- <<!-x -->
| "
"
| <!-- <x -->
| "
"
| <!-- <> -->
| "
"
| <!-- < -->
| "
"
| <!-- <! -->
| "
"
| <!-- -->
| "
"
| <!-- -a->-a -->
| "
"
| <!-- -<!-test-> -->
| "
"
17 changes: 17 additions & 0 deletions crates/swc_xml_parser/tests/fixture/comments/input.xml
@@ -0,0 +1,17 @@
<?xml version="1.0" encoding="UTF-8"?>
<root>
<!--comment-->
<!--<head>-->
<!--<!-x-->
<!--<!x-->
<!--<<!x-->
<!--<<!-x-->
<!--<x-->
<!--<>-->
<!--<-->
<!--<!-->
<!---->
<!---a->-a-->
<!---<!-test->-->
</root>

302 changes: 302 additions & 0 deletions crates/swc_xml_parser/tests/fixture/comments/output.json
@@ -0,0 +1,302 @@
{
"type": "Document",
"span": {
"start": 1,
"end": 264,
"ctxt": 0
},
"children": [
{
"type": "ProcessingInstruction",
"span": {
"start": 1,
"end": 39,
"ctxt": 0
},
"target": "xml",
"data": "version=\"1.0\" encoding=\"UTF-8\""
},
{
"type": "Element",
"span": {
"start": 40,
"end": 262,
"ctxt": 0
},
"tagName": "root",
"attributes": [],
"children": [
{
"type": "Text",
"span": {
"start": 46,
"end": 51,
"ctxt": 0
},
"data": "\n ",
"raw": "\n "
},
{
"type": "Comment",
"span": {
"start": 51,
"end": 65,
"ctxt": 0
},
"data": "comment",
"raw": "<!--comment-->"
},
{
"type": "Text",
"span": {
"start": 65,
"end": 70,
"ctxt": 0
},
"data": "\n ",
"raw": "\n "
},
{
"type": "Comment",
"span": {
"start": 70,
"end": 83,
"ctxt": 0
},
"data": "<head>",
"raw": "<!--<head>-->"
},
{
"type": "Text",
"span": {
"start": 83,
"end": 88,
"ctxt": 0
},
"data": "\n ",
"raw": "\n "
},
{
"type": "Comment",
"span": {
"start": 88,
"end": 99,
"ctxt": 0
},
"data": "<!-x",
"raw": "<!--<!-x-->"
},
{
"type": "Text",
"span": {
"start": 99,
"end": 104,
"ctxt": 0
},
"data": "\n ",
"raw": "\n "
},
{
"type": "Comment",
"span": {
"start": 104,
"end": 114,
"ctxt": 0
},
"data": "<!x",
"raw": "<!--<!x-->"
},
{
"type": "Text",
"span": {
"start": 114,
"end": 119,
"ctxt": 0
},
"data": "\n ",
"raw": "\n "
},
{
"type": "Comment",
"span": {
"start": 119,
"end": 130,
"ctxt": 0
},
"data": "<<!x",
"raw": "<!--<<!x-->"
},
{
"type": "Text",
"span": {
"start": 130,
"end": 135,
"ctxt": 0
},
"data": "\n ",
"raw": "\n "
},
{
"type": "Comment",
"span": {
"start": 135,
"end": 147,
"ctxt": 0
},
"data": "<<!-x",
"raw": "<!--<<!-x-->"
},
{
"type": "Text",
"span": {
"start": 147,
"end": 152,
"ctxt": 0
},
"data": "\n ",
"raw": "\n "
},
{
"type": "Comment",
"span": {
"start": 152,
"end": 161,
"ctxt": 0
},
"data": "<x",
"raw": "<!--<x-->"
},
{
"type": "Text",
"span": {
"start": 161,
"end": 166,
"ctxt": 0
},
"data": "\n ",
"raw": "\n "
},
{
"type": "Comment",
"span": {
"start": 166,
"end": 175,
"ctxt": 0
},
"data": "<>",
"raw": "<!--<>-->"
},
{
"type": "Text",
"span": {
"start": 175,
"end": 180,
"ctxt": 0
},
"data": "\n ",
"raw": "\n "
},
{
"type": "Comment",
"span": {
"start": 180,
"end": 188,
"ctxt": 0
},
"data": "<",
"raw": "<!--<-->"
},
{
"type": "Text",
"span": {
"start": 188,
"end": 193,
"ctxt": 0
},
"data": "\n ",
"raw": "\n "
},
{
"type": "Comment",
"span": {
"start": 193,
"end": 202,
"ctxt": 0
},
"data": "<!",
"raw": "<!--<!-->"
},
{
"type": "Text",
"span": {
"start": 202,
"end": 207,
"ctxt": 0
},
"data": "\n ",
"raw": "\n "
},
{
"type": "Comment",
"span": {
"start": 207,
"end": 214,
"ctxt": 0
},
"data": "",
"raw": "<!---->"
},
{
"type": "Text",
"span": {
"start": 214,
"end": 219,
"ctxt": 0
},
"data": "\n ",
"raw": "\n "
},
{
"type": "Comment",
"span": {
"start": 219,
"end": 232,
"ctxt": 0
},
"data": "-a->-a",
"raw": "<!---a->-a-->"
},
{
"type": "Text",
"span": {
"start": 232,
"end": 237,
"ctxt": 0
},
"data": "\n ",
"raw": "\n "
},
{
"type": "Comment",
"span": {
"start": 237,
"end": 254,
"ctxt": 0
},
"data": "-<!-test->",
"raw": "<!---<!-test->-->"
},
{
"type": "Text",
"span": {
"start": 254,
"end": 255,
"ctxt": 0
},
"data": "\n",
"raw": "\n"
}
]
}
]
}

1 comment on commit 3a0d98b

@github-actions
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Benchmark

Benchmark suite Current: 3a0d98b Previous: 7f6bf59 Ratio
es/full/bugs-1 349469 ns/iter (± 26789) 348397 ns/iter (± 15399) 1.00
es/full/minify/libraries/antd 1876656822 ns/iter (± 56033831) 1863807999 ns/iter (± 162149761) 1.01
es/full/minify/libraries/d3 404384040 ns/iter (± 21025219) 413098744 ns/iter (± 16603723) 0.98
es/full/minify/libraries/echarts 1608705814 ns/iter (± 84598762) 1565493732 ns/iter (± 14645178) 1.03
es/full/minify/libraries/jquery 106647129 ns/iter (± 4016188) 102335223 ns/iter (± 6117583) 1.04
es/full/minify/libraries/lodash 117891899 ns/iter (± 7177772) 121707896 ns/iter (± 4575117) 0.97
es/full/minify/libraries/moment 59007063 ns/iter (± 2215825) 61469172 ns/iter (± 1971469) 0.96
es/full/minify/libraries/react 21631361 ns/iter (± 1280464) 20343176 ns/iter (± 643850) 1.06
es/full/minify/libraries/terser 315809725 ns/iter (± 20152727) 315187300 ns/iter (± 15549644) 1.00
es/full/minify/libraries/three 560806557 ns/iter (± 39506166) 561959490 ns/iter (± 13042263) 1.00
es/full/minify/libraries/typescript 3427217507 ns/iter (± 62591104) 3355215596 ns/iter (± 23508635) 1.02
es/full/minify/libraries/victory 835004104 ns/iter (± 25292111) 839499341 ns/iter (± 15145850) 0.99
es/full/minify/libraries/vue 156789506 ns/iter (± 33803224) 151729042 ns/iter (± 10764046) 1.03
es/full/codegen/es3 35363 ns/iter (± 2463) 33263 ns/iter (± 402) 1.06
es/full/codegen/es5 34630 ns/iter (± 1842) 33369 ns/iter (± 299) 1.04
es/full/codegen/es2015 34506 ns/iter (± 4392) 33415 ns/iter (± 277) 1.03
es/full/codegen/es2016 34684 ns/iter (± 1140) 33426 ns/iter (± 919) 1.04
es/full/codegen/es2017 34941 ns/iter (± 2274) 33668 ns/iter (± 998) 1.04
es/full/codegen/es2018 34982 ns/iter (± 2754) 33349 ns/iter (± 802) 1.05
es/full/codegen/es2019 34067 ns/iter (± 1119) 33560 ns/iter (± 2483) 1.02
es/full/codegen/es2020 34735 ns/iter (± 3145) 33540 ns/iter (± 273) 1.04
es/full/all/es3 203149043 ns/iter (± 17675770) 194342651 ns/iter (± 6657917) 1.05
es/full/all/es5 194456614 ns/iter (± 15624268) 180355266 ns/iter (± 5650393) 1.08
es/full/all/es2015 154106465 ns/iter (± 14656696) 145237646 ns/iter (± 4606421) 1.06
es/full/all/es2016 154841236 ns/iter (± 10726170) 144008478 ns/iter (± 5280613) 1.08
es/full/all/es2017 152833698 ns/iter (± 10249986) 143484000 ns/iter (± 5366385) 1.07
es/full/all/es2018 152184171 ns/iter (± 9003410) 142492911 ns/iter (± 3674110) 1.07
es/full/all/es2019 152409785 ns/iter (± 62782543) 141674187 ns/iter (± 4524276) 1.08
es/full/all/es2020 147463031 ns/iter (± 13557910) 135341954 ns/iter (± 3317944) 1.09
es/full/parser 774906 ns/iter (± 51842) 712237 ns/iter (± 20203) 1.09
es/full/base/fixer 28208 ns/iter (± 1812) 26786 ns/iter (± 979) 1.05
es/full/base/resolver_and_hygiene 94926 ns/iter (± 10953) 91038 ns/iter (± 1991) 1.04
serialization of ast node 224 ns/iter (± 10) 216 ns/iter (± 2) 1.04
serialization of serde 228 ns/iter (± 29) 217 ns/iter (± 3) 1.05

This comment was automatically generated by workflow using github-action-benchmark.

Please sign in to comment.