From 07e37153f66d27ca18ae349fdf119dda9a609906 Mon Sep 17 00:00:00 2001 From: "alexander.akait" Date: Fri, 18 Nov 2022 03:43:56 +0300 Subject: [PATCH 1/9] fix(ecma/parser): regexp parsing --- crates/swc_ecma_parser/src/lexer/mod.rs | 15 +++-- crates/swc_ecma_parser/src/lexer/state.rs | 13 ++++ crates/swc_ecma_parser/src/parser/expr.rs | 70 +++++++++++++++------- crates/swc_ecma_parser/src/parser/input.rs | 13 ++++ crates/swc_ecma_parser/src/parser/stmt.rs | 14 +++++ 5 files changed, 94 insertions(+), 31 deletions(-) diff --git a/crates/swc_ecma_parser/src/lexer/mod.rs b/crates/swc_ecma_parser/src/lexer/mod.rs index 26b1a3f46b52..235adc36fe1f 100644 --- a/crates/swc_ecma_parser/src/lexer/mod.rs +++ b/crates/swc_ecma_parser/src/lexer/mod.rs @@ -695,12 +695,6 @@ impl<'a, I: Input> Lexer<'a, I> { #[inline(never)] fn read_slash(&mut self) -> LexResult> { debug_assert_eq!(self.cur(), Some('/')); - // let start = self.cur_pos(); - - // Regex - if self.state.is_expr_allowed { - return self.read_regexp().map(Some); - } // Divide operator self.bump(); @@ -1121,12 +1115,12 @@ impl<'a, I: Input> Lexer<'a, I> { /// Expects current char to be '/' fn read_regexp(&mut self) -> LexResult { - debug_assert_eq!(self.cur(), Some('/')); let start = self.cur_pos(); + self.bump(); let (mut escaped, mut in_class) = (false, false); - // let content_start = self.cur_pos(); + let content = self.with_buf(|l, buf| { while let Some(c) = l.cur() { // This is ported from babel. @@ -1287,6 +1281,11 @@ impl<'a, I: Input> Lexer<'a, I> { pub fn set_expr_allowed(&mut self, allow: bool) { self.state.is_expr_allowed = allow; } + + #[inline] + pub fn set_regexp_allowed(&mut self, allow: bool) { + self.state.is_regexp_allowed = allow; + } } fn pos_span(p: BytePos) -> Span { diff --git a/crates/swc_ecma_parser/src/lexer/state.rs b/crates/swc_ecma_parser/src/lexer/state.rs index 0147297f9998..1b1a203915e8 100644 --- a/crates/swc_ecma_parser/src/lexer/state.rs +++ b/crates/swc_ecma_parser/src/lexer/state.rs @@ -22,6 +22,7 @@ use crate::{ #[derive(Clone)] pub(super) struct State { pub is_expr_allowed: bool, + pub is_regexp_allowed: bool, /// if line break exists between previous token and new token? pub had_line_break: bool, /// TODO: Remove this field. @@ -152,6 +153,11 @@ impl Tokens for Lexer<'_, I> { self.set_expr_allowed(allow) } + #[inline] + fn set_regexp_allowed(&mut self, allow: bool) { + self.state.is_regexp_allowed = allow; + } + #[inline] fn token_context(&self) -> &TokenContexts { &self.state.context @@ -191,6 +197,12 @@ impl<'a, I: Input> Iterator for Lexer<'a, I> { let mut start = self.cur_pos(); let res = (|| -> Result, _> { + if self.state.is_regexp_allowed { + if let Ok(regexp) = self.read_regexp() { + return Ok(Some(regexp)); + } + } + if self.state.is_first { if let Some(shebang) = self.read_shebang()? { return Ok(Some(Token::Shebang(shebang))); @@ -363,6 +375,7 @@ impl State { State { is_expr_allowed: true, + is_regexp_allowed: false, is_first: true, had_line_break: false, prev_hi: start_pos, diff --git a/crates/swc_ecma_parser/src/parser/expr.rs b/crates/swc_ecma_parser/src/parser/expr.rs index 227c6b4ec39b..88a51fb5a85f 100644 --- a/crates/swc_ecma_parser/src/parser/expr.rs +++ b/crates/swc_ecma_parser/src/parser/expr.rs @@ -332,31 +332,55 @@ impl Parser { } // Regexp - Token::Regex(..) => match bump!(self) { - Token::Regex(exp, flags) => { - let span = span!(self, start); - - let mut flags_count = flags.chars().fold( - AHashMap::::default(), - |mut map, flag| { - let key = match flag { - 'g' | 'i' | 'm' | 's' | 'u' | 'y' | 'd' => flag, - _ => '\u{0000}', // special marker for unknown flags - }; - map.entry(key).and_modify(|count| *count += 1).or_insert(1); - map - }, - ); - if flags_count.remove(&'\u{0000}').is_some() { - self.emit_err(span, SyntaxError::UnknownRegExpFlags); - } - if let Some((flag, _)) = flags_count.iter().find(|(_, count)| **count > 1) { - self.emit_err(span, SyntaxError::DuplicatedRegExpFlags(*flag)); + tok!('/') | tok!("/=") => { + bump!(self); + + self.input.set_regexp_allowed(true); + + let token = self.input.cur(); + + self.input.set_regexp_allowed(false); + + match token { + Some(Token::Regex(..)) => match bump!(self) { + Token::Regex(exp, flags) => { + let span = span!(self, start); + + let mut flags_count = flags.chars().fold( + AHashMap::::default(), + |mut map, flag| { + let key = match flag { + 'g' | 'i' | 'm' | 's' | 'u' | 'y' | 'd' => flag, + _ => '\u{0000}', // special marker for unknown flags + }; + map.entry(key).and_modify(|count| *count += 1).or_insert(1); + map + }, + ); + + if flags_count.remove(&'\u{0000}').is_some() { + self.emit_err(span, SyntaxError::UnknownRegExpFlags); + } + + if let Some((flag, _)) = + flags_count.iter().find(|(_, count)| **count > 1) + { + self.emit_err(span, SyntaxError::DuplicatedRegExpFlags(*flag)); + } + + return Ok(Box::new(Expr::Lit(Lit::Regex(Regex { + span, + exp, + flags, + })))); + } + _ => unreachable!(), + }, + _ => { + unreachable!(); } - return Ok(Box::new(Expr::Lit(Lit::Regex(Regex { span, exp, flags })))); } - _ => unreachable!(), - }, + } tok!('`') => { // parse template literal diff --git a/crates/swc_ecma_parser/src/parser/input.rs b/crates/swc_ecma_parser/src/parser/input.rs index 2931705f8371..0cee7d9939b3 100644 --- a/crates/swc_ecma_parser/src/parser/input.rs +++ b/crates/swc_ecma_parser/src/parser/input.rs @@ -24,6 +24,8 @@ pub trait Tokens: Clone + Iterator { } fn set_expr_allowed(&mut self, allow: bool); + fn set_regexp_allowed(&mut self, allow: bool); + fn token_context(&self) -> &lexer::TokenContexts; fn token_context_mut(&mut self) -> &mut lexer::TokenContexts; fn set_token_context(&mut self, _c: lexer::TokenContexts); @@ -110,6 +112,8 @@ impl Tokens for TokensInput { fn set_expr_allowed(&mut self, _: bool) {} + fn set_regexp_allowed(&mut self, _: bool) {} + fn token_context(&self) -> &TokenContexts { &self.token_ctx } @@ -222,6 +226,10 @@ impl Tokens for Capturing { self.inner.set_expr_allowed(allow) } + fn set_regexp_allowed(&mut self, allow: bool) { + self.inner.set_regexp_allowed(allow); + } + fn token_context(&self) -> &TokenContexts { self.inner.token_context() } @@ -467,6 +475,11 @@ impl Buffer { self.iter.set_expr_allowed(allow) } + #[inline] + pub fn set_regexp_allowed(&mut self, allow: bool) { + self.iter.set_regexp_allowed(allow); + } + #[inline] pub(crate) fn token_context(&self) -> &lexer::TokenContexts { self.iter.token_context() diff --git a/crates/swc_ecma_parser/src/parser/stmt.rs b/crates/swc_ecma_parser/src/parser/stmt.rs index 3ad1bfbf285f..e73824a2701a 100644 --- a/crates/swc_ecma_parser/src/parser/stmt.rs +++ b/crates/swc_ecma_parser/src/parser/stmt.rs @@ -2527,4 +2527,18 @@ const foo;"#; test_parser(src, Default::default(), |p| p.parse_script()); } + + #[test] + fn issue_6322() { + let src = "for ( ; { } / 1 ; ) ;"; + + test_parser(src, Default::default(), |p| p.parse_script()); + } + + #[test] + fn issue_6323() { + let src = "let x = 0 < { } / 0 ;"; + + test_parser(src, Default::default(), |p| p.parse_script()); + } } From c70e7a6b8468811e00f03c70e94f497fb9a00120 Mon Sep 17 00:00:00 2001 From: "alexander.akait" Date: Fri, 18 Nov 2022 03:54:30 +0300 Subject: [PATCH 2/9] refactor: better naming --- crates/swc_ecma_parser/src/lexer/mod.rs | 4 ++-- crates/swc_ecma_parser/src/lexer/state.rs | 10 +++++----- crates/swc_ecma_parser/src/parser/expr.rs | 6 +++--- crates/swc_ecma_parser/src/parser/input.rs | 12 ++++++------ 4 files changed, 16 insertions(+), 16 deletions(-) diff --git a/crates/swc_ecma_parser/src/lexer/mod.rs b/crates/swc_ecma_parser/src/lexer/mod.rs index 235adc36fe1f..32cf845901ca 100644 --- a/crates/swc_ecma_parser/src/lexer/mod.rs +++ b/crates/swc_ecma_parser/src/lexer/mod.rs @@ -1283,8 +1283,8 @@ impl<'a, I: Input> Lexer<'a, I> { } #[inline] - pub fn set_regexp_allowed(&mut self, allow: bool) { - self.state.is_regexp_allowed = allow; + pub fn set_next_regexp(&mut self, value: bool) { + self.state.next_regexp = value; } } diff --git a/crates/swc_ecma_parser/src/lexer/state.rs b/crates/swc_ecma_parser/src/lexer/state.rs index 1b1a203915e8..3ef6edb0fd0f 100644 --- a/crates/swc_ecma_parser/src/lexer/state.rs +++ b/crates/swc_ecma_parser/src/lexer/state.rs @@ -22,7 +22,7 @@ use crate::{ #[derive(Clone)] pub(super) struct State { pub is_expr_allowed: bool, - pub is_regexp_allowed: bool, + pub next_regexp: bool, /// if line break exists between previous token and new token? pub had_line_break: bool, /// TODO: Remove this field. @@ -154,8 +154,8 @@ impl Tokens for Lexer<'_, I> { } #[inline] - fn set_regexp_allowed(&mut self, allow: bool) { - self.state.is_regexp_allowed = allow; + fn set_next_regexp(&mut self, value: bool) { + self.state.next_regexp = value; } #[inline] @@ -197,7 +197,7 @@ impl<'a, I: Input> Iterator for Lexer<'a, I> { let mut start = self.cur_pos(); let res = (|| -> Result, _> { - if self.state.is_regexp_allowed { + if self.state.next_regexp { if let Ok(regexp) = self.read_regexp() { return Ok(Some(regexp)); } @@ -375,7 +375,7 @@ impl State { State { is_expr_allowed: true, - is_regexp_allowed: false, + next_regexp: false, is_first: true, had_line_break: false, prev_hi: start_pos, diff --git a/crates/swc_ecma_parser/src/parser/expr.rs b/crates/swc_ecma_parser/src/parser/expr.rs index 88a51fb5a85f..65297712c7c2 100644 --- a/crates/swc_ecma_parser/src/parser/expr.rs +++ b/crates/swc_ecma_parser/src/parser/expr.rs @@ -335,15 +335,15 @@ impl Parser { tok!('/') | tok!("/=") => { bump!(self); - self.input.set_regexp_allowed(true); + self.input.set_next_regexp(true); let token = self.input.cur(); - self.input.set_regexp_allowed(false); - match token { Some(Token::Regex(..)) => match bump!(self) { Token::Regex(exp, flags) => { + self.input.set_next_regexp(false); + let span = span!(self, start); let mut flags_count = flags.chars().fold( diff --git a/crates/swc_ecma_parser/src/parser/input.rs b/crates/swc_ecma_parser/src/parser/input.rs index 0cee7d9939b3..bfcd20e99877 100644 --- a/crates/swc_ecma_parser/src/parser/input.rs +++ b/crates/swc_ecma_parser/src/parser/input.rs @@ -24,7 +24,7 @@ pub trait Tokens: Clone + Iterator { } fn set_expr_allowed(&mut self, allow: bool); - fn set_regexp_allowed(&mut self, allow: bool); + fn set_next_regexp(&mut self, value: bool); fn token_context(&self) -> &lexer::TokenContexts; fn token_context_mut(&mut self) -> &mut lexer::TokenContexts; @@ -112,7 +112,7 @@ impl Tokens for TokensInput { fn set_expr_allowed(&mut self, _: bool) {} - fn set_regexp_allowed(&mut self, _: bool) {} + fn set_next_regexp(&mut self, _: bool) {} fn token_context(&self) -> &TokenContexts { &self.token_ctx @@ -226,8 +226,8 @@ impl Tokens for Capturing { self.inner.set_expr_allowed(allow) } - fn set_regexp_allowed(&mut self, allow: bool) { - self.inner.set_regexp_allowed(allow); + fn set_next_regexp(&mut self, value: bool) { + self.inner.set_next_regexp(value); } fn token_context(&self) -> &TokenContexts { @@ -476,8 +476,8 @@ impl Buffer { } #[inline] - pub fn set_regexp_allowed(&mut self, allow: bool) { - self.iter.set_regexp_allowed(allow); + pub fn set_next_regexp(&mut self, value: bool) { + self.iter.set_next_regexp(value); } #[inline] From ead943f01d7bb97c1d696206c5b0cdaf56ad547a Mon Sep 17 00:00:00 2001 From: "alexander.akait" Date: Fri, 18 Nov 2022 04:23:30 +0300 Subject: [PATCH 3/9] fix: logic --- crates/swc_ecma_parser/src/lexer/mod.rs | 10 +++++++--- crates/swc_ecma_parser/src/lexer/state.rs | 12 ++++++------ crates/swc_ecma_parser/src/parser/expr.rs | 4 ++-- crates/swc_ecma_parser/src/parser/input.rs | 12 ++++++------ 4 files changed, 21 insertions(+), 17 deletions(-) diff --git a/crates/swc_ecma_parser/src/lexer/mod.rs b/crates/swc_ecma_parser/src/lexer/mod.rs index 32cf845901ca..a1d364d26d84 100644 --- a/crates/swc_ecma_parser/src/lexer/mod.rs +++ b/crates/swc_ecma_parser/src/lexer/mod.rs @@ -1114,7 +1114,11 @@ impl<'a, I: Input> Lexer<'a, I> { } /// Expects current char to be '/' - fn read_regexp(&mut self) -> LexResult { + fn read_regexp(&mut self, start: BytePos) -> LexResult { + self.input.reset_to(start); + + debug_assert_eq!(self.cur(), Some('/')); + let start = self.cur_pos(); self.bump(); @@ -1283,8 +1287,8 @@ impl<'a, I: Input> Lexer<'a, I> { } #[inline] - pub fn set_next_regexp(&mut self, value: bool) { - self.state.next_regexp = value; + pub fn set_next_regexp(&mut self, start: Option) { + self.state.next_regexp = start; } } diff --git a/crates/swc_ecma_parser/src/lexer/state.rs b/crates/swc_ecma_parser/src/lexer/state.rs index 3ef6edb0fd0f..ed386f2c3e18 100644 --- a/crates/swc_ecma_parser/src/lexer/state.rs +++ b/crates/swc_ecma_parser/src/lexer/state.rs @@ -22,7 +22,7 @@ use crate::{ #[derive(Clone)] pub(super) struct State { pub is_expr_allowed: bool, - pub next_regexp: bool, + pub next_regexp: Option, /// if line break exists between previous token and new token? pub had_line_break: bool, /// TODO: Remove this field. @@ -154,8 +154,8 @@ impl Tokens for Lexer<'_, I> { } #[inline] - fn set_next_regexp(&mut self, value: bool) { - self.state.next_regexp = value; + fn set_next_regexp(&mut self, start: Option) { + self.state.next_regexp = start; } #[inline] @@ -197,8 +197,8 @@ impl<'a, I: Input> Iterator for Lexer<'a, I> { let mut start = self.cur_pos(); let res = (|| -> Result, _> { - if self.state.next_regexp { - if let Ok(regexp) = self.read_regexp() { + if let Some(start) = self.state.next_regexp { + if let Ok(regexp) = self.read_regexp(start) { return Ok(Some(regexp)); } } @@ -375,7 +375,7 @@ impl State { State { is_expr_allowed: true, - next_regexp: false, + next_regexp: None, is_first: true, had_line_break: false, prev_hi: start_pos, diff --git a/crates/swc_ecma_parser/src/parser/expr.rs b/crates/swc_ecma_parser/src/parser/expr.rs index 65297712c7c2..5a517b0401e6 100644 --- a/crates/swc_ecma_parser/src/parser/expr.rs +++ b/crates/swc_ecma_parser/src/parser/expr.rs @@ -335,14 +335,14 @@ impl Parser { tok!('/') | tok!("/=") => { bump!(self); - self.input.set_next_regexp(true); + self.input.set_next_regexp(Some(start)); let token = self.input.cur(); match token { Some(Token::Regex(..)) => match bump!(self) { Token::Regex(exp, flags) => { - self.input.set_next_regexp(false); + self.input.set_next_regexp(None); let span = span!(self, start); diff --git a/crates/swc_ecma_parser/src/parser/input.rs b/crates/swc_ecma_parser/src/parser/input.rs index bfcd20e99877..e0c46b735feb 100644 --- a/crates/swc_ecma_parser/src/parser/input.rs +++ b/crates/swc_ecma_parser/src/parser/input.rs @@ -24,7 +24,7 @@ pub trait Tokens: Clone + Iterator { } fn set_expr_allowed(&mut self, allow: bool); - fn set_next_regexp(&mut self, value: bool); + fn set_next_regexp(&mut self, start: Option); fn token_context(&self) -> &lexer::TokenContexts; fn token_context_mut(&mut self) -> &mut lexer::TokenContexts; @@ -112,7 +112,7 @@ impl Tokens for TokensInput { fn set_expr_allowed(&mut self, _: bool) {} - fn set_next_regexp(&mut self, _: bool) {} + fn set_next_regexp(&mut self, _: Option) {} fn token_context(&self) -> &TokenContexts { &self.token_ctx @@ -226,8 +226,8 @@ impl Tokens for Capturing { self.inner.set_expr_allowed(allow) } - fn set_next_regexp(&mut self, value: bool) { - self.inner.set_next_regexp(value); + fn set_next_regexp(&mut self, start: Option) { + self.inner.set_next_regexp(start); } fn token_context(&self) -> &TokenContexts { @@ -476,8 +476,8 @@ impl Buffer { } #[inline] - pub fn set_next_regexp(&mut self, value: bool) { - self.iter.set_next_regexp(value); + pub fn set_next_regexp(&mut self, start: Option) { + self.iter.set_next_regexp(start); } #[inline] From d99734f6f01568315050de1f20d363977f2af2f3 Mon Sep 17 00:00:00 2001 From: "alexander.akait" Date: Fri, 18 Nov 2022 05:10:17 +0300 Subject: [PATCH 4/9] fix: logic --- crates/swc_ecma_parser/src/parser/expr.rs | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/crates/swc_ecma_parser/src/parser/expr.rs b/crates/swc_ecma_parser/src/parser/expr.rs index 5a517b0401e6..83fa30c70ef3 100644 --- a/crates/swc_ecma_parser/src/parser/expr.rs +++ b/crates/swc_ecma_parser/src/parser/expr.rs @@ -376,9 +376,7 @@ impl Parser { } _ => unreachable!(), }, - _ => { - unreachable!(); - } + _ => {} } } From 6b2e495c7a0ed74584e6e2ac82e338b1e6da421d Mon Sep 17 00:00:00 2001 From: "alexander.akait" Date: Fri, 18 Nov 2022 06:30:23 +0300 Subject: [PATCH 5/9] test: fix --- crates/swc_ecma_parser/src/lexer/tests.rs | 62 +++++++++++++++++++---- 1 file changed, 53 insertions(+), 9 deletions(-) diff --git a/crates/swc_ecma_parser/src/lexer/tests.rs b/crates/swc_ecma_parser/src/lexer/tests.rs index d665b2c6f1b1..940c8abb281d 100644 --- a/crates/swc_ecma_parser/src/lexer/tests.rs +++ b/crates/swc_ecma_parser/src/lexer/tests.rs @@ -416,7 +416,9 @@ fn regexp_unary_void() { lex(Syntax::default(), "void /test/"), vec![ Void.span(0..4).lb(), - Regex("test".into(), "".into()).span(5..11), + BinOp(Div).span(5), + Word(Word::Ident("test".into())).span(6..10), + BinOp(Div).span(10), ] ); assert_eq!( @@ -424,7 +426,9 @@ fn regexp_unary_void() { vec![ Void.span(0..4).lb(), LParen.span(5..6), - Regex("test".into(), "".into()).span(6..12), + BinOp(Div).span(6), + Word(Word::Ident("test".into())).span(7..11), + BinOp(Div).span(11), RParen.span(12..13), ] ); @@ -483,13 +487,28 @@ fn simple_regex() { vec![ "x".span(0).lb(), Assign.span(2), - Regex("42".into(), "i".into(),).span(4..9), + BinOp(Div).span(4), + 42.span(5..7), + BinOp(Div).span(7), + Word(Word::Ident("i".into())).span(8), ], ); assert_eq!( lex(Syntax::default(), "/42/"), - vec![Regex("42".into(), "".into()).span(0..4).lb(),] + vec![ + TokenAndSpan { + token: Token::BinOp(BinOpToken::Div), + had_line_break: true, + span: Span { + lo: BytePos(1), + hi: BytePos(2), + ctxt: Default::default(), + }, + }, + 42.span(1..3), + BinOp(Div).span(3) + ] ); } @@ -508,7 +527,13 @@ fn complex_regex() { RParen, LBrace, RBrace, - Regex("42".into(), "i".into(),), + BinOp(Div), + Num { + value: 42.0, + raw: Atom::new("42") + }, + BinOp(Div), + Word(Word::Ident("i".into())), ] ) } @@ -595,7 +620,9 @@ fn after_if() { RParen.span(4), LBrace.span(5), RBrace.span(6), - Regex("y".into(), "".into()).span(8..11), + Div.span(8), + "y".span(9), + Div.span(10), Dot.span(11), "test".span(12..16), LParen.span(16), @@ -639,7 +666,9 @@ fn migrated_0002() { vec![ "tokenize".span(0..8).lb(), LParen.span(8), - Regex("42".into(), "".into()).span(9..13), + BinOp(Div).span(9), + 42.span(10..12), + BinOp(Div).span(12), RParen.span(13), ], ) @@ -671,7 +700,9 @@ fn migrated_0004() { RParen.span(11), LBrace.span(12), RBrace.span(13), - Regex("42".into(), "".into()).span(15..19), + BinOp(Div).span(15), + 42.span(16..18), + BinOp(Div).span(18), ] ); } @@ -707,7 +738,20 @@ fn migrated_0006() { vec![ LBrace.span(0).lb(), RBrace.span(1), - Regex("42".into(), "".into()).span(3..7), + BinOp(Div).span(3), + TokenAndSpan { + token: Num { + value: 42.0, + raw: "42".into(), + }, + had_line_break: false, + span: Span { + lo: BytePos(5), + hi: BytePos(7), + ctxt: Default::default(), + } + }, + BinOp(Div).span(6), ], ) } From 433fc94e61b950d21f0c3b32d64da64ca19c031e Mon Sep 17 00:00:00 2001 From: "alexander.akait" Date: Fri, 18 Nov 2022 06:33:53 +0300 Subject: [PATCH 6/9] fix: clippy --- crates/swc_ecma_parser/src/parser/expr.rs | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/crates/swc_ecma_parser/src/parser/expr.rs b/crates/swc_ecma_parser/src/parser/expr.rs index 83fa30c70ef3..d45ec0ed6d8d 100644 --- a/crates/swc_ecma_parser/src/parser/expr.rs +++ b/crates/swc_ecma_parser/src/parser/expr.rs @@ -337,13 +337,11 @@ impl Parser { self.input.set_next_regexp(Some(start)); - let token = self.input.cur(); + if let Some(Token::Regex(..)) = self.input.cur() { + self.input.set_next_regexp(None); - match token { - Some(Token::Regex(..)) => match bump!(self) { + match bump!(self) { Token::Regex(exp, flags) => { - self.input.set_next_regexp(None); - let span = span!(self, start); let mut flags_count = flags.chars().fold( @@ -375,8 +373,7 @@ impl Parser { })))); } _ => unreachable!(), - }, - _ => {} + } } } From 8417c446db57ab90eb17fd0aa3fa195108c36bed Mon Sep 17 00:00:00 2001 From: "alexander.akait" Date: Fri, 18 Nov 2022 06:42:30 +0300 Subject: [PATCH 7/9] fix: yield logic --- crates/swc_ecma_parser/src/parser/expr.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/crates/swc_ecma_parser/src/parser/expr.rs b/crates/swc_ecma_parser/src/parser/expr.rs index d45ec0ed6d8d..3f73a9a70a66 100644 --- a/crates/swc_ecma_parser/src/parser/expr.rs +++ b/crates/swc_ecma_parser/src/parser/expr.rs @@ -1899,7 +1899,10 @@ impl Parser { } if is!(self, ';') - || (!is!(self, '*') && !cur!(self, false).map(Token::starts_expr).unwrap_or(true)) + || (!is!(self, '*') + && !is!(self, '/') + && !is!(self, "/=") + && !cur!(self, false).map(Token::starts_expr).unwrap_or(true)) { Ok(Box::new(Expr::Yield(YieldExpr { span: span!(self, start), From 9921ec01df95c6c991cba5bdb88c58859facfe5d Mon Sep 17 00:00:00 2001 From: "alexander.akait" Date: Fri, 18 Nov 2022 07:00:58 +0300 Subject: [PATCH 8/9] test: update --- .../tests/jsx/errors/adjacent-tags/input.js.stderr | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/crates/swc_ecma_parser/tests/jsx/errors/adjacent-tags/input.js.stderr b/crates/swc_ecma_parser/tests/jsx/errors/adjacent-tags/input.js.stderr index 1d24f370b5cf..10cb2e779884 100644 --- a/crates/swc_ecma_parser/tests/jsx/errors/adjacent-tags/input.js.stderr +++ b/crates/swc_ecma_parser/tests/jsx/errors/adjacent-tags/input.js.stderr @@ -1,6 +1,7 @@ - x Unterminated regexp literal + x Unexpected token ``. Expected this, import, async, function, [ for array literal, { for object literal, @ for decorator, function, class, null, true, false, number, bigint, string, regexp, + | ` for template literal, (, or an identifier ,-[$DIR/tests/jsx/errors/adjacent-tags/input.js:1:1] 1 | var x =
one
two
; - : ^^^^^^ + : ^ `---- From 49aa674c2194e51d81f49b251ef79bd0cfc77729 Mon Sep 17 00:00:00 2001 From: "alexander.akait" Date: Fri, 18 Nov 2022 07:48:19 +0300 Subject: [PATCH 9/9] test: update --- crates/swc_ecma_parser/src/lexer/mod.rs | 12 ++++++++---- crates/swc_ecma_parser/src/lexer/state.rs | 4 +--- .../tests/jsx/errors/adjacent-tags/input.js.stderr | 5 ++--- .../fail/095bea002b10b8e1.js.stderr | 4 ++-- .../fail/97fc32bf01227e39.js.stderr | 4 ++-- 5 files changed, 15 insertions(+), 14 deletions(-) diff --git a/crates/swc_ecma_parser/src/lexer/mod.rs b/crates/swc_ecma_parser/src/lexer/mod.rs index a1d364d26d84..4d54641ec7bf 100644 --- a/crates/swc_ecma_parser/src/lexer/mod.rs +++ b/crates/swc_ecma_parser/src/lexer/mod.rs @@ -1130,7 +1130,9 @@ impl<'a, I: Input> Lexer<'a, I> { // This is ported from babel. // Seems like regexp literal cannot contain linebreak. if c.is_line_terminator() { - l.error(start, SyntaxError::UnterminatedRegExp)?; + let span = l.span(start); + + return Err(Error::new(span, SyntaxError::UnterminatedRegExp)); } if escaped { @@ -1143,20 +1145,22 @@ impl<'a, I: Input> Lexer<'a, I> { '/' if !in_class => break, _ => {} } + escaped = c == '\\'; } + l.bump(); buf.push(c); } Ok(Atom::new(&**buf)) })?; - // let content_span = Span::new(content_start, self.cur_pos(), - // Default::default()); // input is terminated without following `/` if !self.is(b'/') { - self.error(start, SyntaxError::UnterminatedRegExp)?; + let span = self.span(start); + + return Err(Error::new(span, SyntaxError::UnterminatedRegExp)); } self.bump(); // '/' diff --git a/crates/swc_ecma_parser/src/lexer/state.rs b/crates/swc_ecma_parser/src/lexer/state.rs index ed386f2c3e18..58cebcf42e49 100644 --- a/crates/swc_ecma_parser/src/lexer/state.rs +++ b/crates/swc_ecma_parser/src/lexer/state.rs @@ -198,9 +198,7 @@ impl<'a, I: Input> Iterator for Lexer<'a, I> { let res = (|| -> Result, _> { if let Some(start) = self.state.next_regexp { - if let Ok(regexp) = self.read_regexp(start) { - return Ok(Some(regexp)); - } + return Ok(Some(self.read_regexp(start)?)); } if self.state.is_first { diff --git a/crates/swc_ecma_parser/tests/jsx/errors/adjacent-tags/input.js.stderr b/crates/swc_ecma_parser/tests/jsx/errors/adjacent-tags/input.js.stderr index 10cb2e779884..1d24f370b5cf 100644 --- a/crates/swc_ecma_parser/tests/jsx/errors/adjacent-tags/input.js.stderr +++ b/crates/swc_ecma_parser/tests/jsx/errors/adjacent-tags/input.js.stderr @@ -1,7 +1,6 @@ - x Unexpected token ``. Expected this, import, async, function, [ for array literal, { for object literal, @ for decorator, function, class, null, true, false, number, bigint, string, regexp, - | ` for template literal, (, or an identifier + x Unterminated regexp literal ,-[$DIR/tests/jsx/errors/adjacent-tags/input.js:1:1] 1 | var x =
one
two
; - : ^ + : ^^^^^^ `---- diff --git a/crates/swc_ecma_parser/tests/test262-error-references/fail/095bea002b10b8e1.js.stderr b/crates/swc_ecma_parser/tests/test262-error-references/fail/095bea002b10b8e1.js.stderr index 42808f21783e..16104415f098 100644 --- a/crates/swc_ecma_parser/tests/test262-error-references/fail/095bea002b10b8e1.js.stderr +++ b/crates/swc_ecma_parser/tests/test262-error-references/fail/095bea002b10b8e1.js.stderr @@ -1,6 +1,6 @@ - x Unexpected eof + x Unterminated regexp literal ,-[$DIR/tests/test262-parser/fail/095bea002b10b8e1.js:1:1] 1 | foo[/42 - : ^ + : ^^^ `---- diff --git a/crates/swc_ecma_parser/tests/test262-error-references/fail/97fc32bf01227e39.js.stderr b/crates/swc_ecma_parser/tests/test262-error-references/fail/97fc32bf01227e39.js.stderr index 4c1545894580..2c28f7c8cfc8 100644 --- a/crates/swc_ecma_parser/tests/test262-error-references/fail/97fc32bf01227e39.js.stderr +++ b/crates/swc_ecma_parser/tests/test262-error-references/fail/97fc32bf01227e39.js.stderr @@ -1,6 +1,6 @@ - x Unexpected eof + x Unterminated regexp literal ,-[$DIR/tests/test262-parser/fail/97fc32bf01227e39.js:1:1] 1 | [/[/] - : ^ + : ^^^^ `----