Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix(ecma/parser): regexp parsing #6469

Merged
merged 10 commits into from Nov 18, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
31 changes: 19 additions & 12 deletions crates/swc_ecma_parser/src/lexer/mod.rs
Expand Up @@ -695,12 +695,6 @@ impl<'a, I: Input> Lexer<'a, I> {
#[inline(never)]
fn read_slash(&mut self) -> LexResult<Option<Token>> {
debug_assert_eq!(self.cur(), Some('/'));
// let start = self.cur_pos();

// Regex
if self.state.is_expr_allowed {
return self.read_regexp().map(Some);
}

// Divide operator
self.bump();
Expand Down Expand Up @@ -1120,19 +1114,25 @@ impl<'a, I: Input> Lexer<'a, I> {
}

/// Expects current char to be '/'
fn read_regexp(&mut self) -> LexResult<Token> {
fn read_regexp(&mut self, start: BytePos) -> LexResult<Token> {
self.input.reset_to(start);

debug_assert_eq!(self.cur(), Some('/'));

let start = self.cur_pos();

self.bump();

let (mut escaped, mut in_class) = (false, false);
// let content_start = self.cur_pos();

let content = self.with_buf(|l, buf| {
while let Some(c) = l.cur() {
// This is ported from babel.
// Seems like regexp literal cannot contain linebreak.
if c.is_line_terminator() {
l.error(start, SyntaxError::UnterminatedRegExp)?;
let span = l.span(start);

return Err(Error::new(span, SyntaxError::UnterminatedRegExp));
}

if escaped {
Expand All @@ -1145,20 +1145,22 @@ impl<'a, I: Input> Lexer<'a, I> {
'/' if !in_class => break,
_ => {}
}

escaped = c == '\\';
}

l.bump();
buf.push(c);
}

Ok(Atom::new(&**buf))
})?;
// let content_span = Span::new(content_start, self.cur_pos(),
// Default::default());

// input is terminated without following `/`
if !self.is(b'/') {
self.error(start, SyntaxError::UnterminatedRegExp)?;
let span = self.span(start);

return Err(Error::new(span, SyntaxError::UnterminatedRegExp));
}

self.bump(); // '/'
Expand Down Expand Up @@ -1287,6 +1289,11 @@ impl<'a, I: Input> Lexer<'a, I> {
pub fn set_expr_allowed(&mut self, allow: bool) {
self.state.is_expr_allowed = allow;
}

#[inline]
pub fn set_next_regexp(&mut self, start: Option<BytePos>) {
self.state.next_regexp = start;
}
}

fn pos_span(p: BytePos) -> Span {
Expand Down
11 changes: 11 additions & 0 deletions crates/swc_ecma_parser/src/lexer/state.rs
Expand Up @@ -22,6 +22,7 @@ use crate::{
#[derive(Clone)]
pub(super) struct State {
pub is_expr_allowed: bool,
kdy1 marked this conversation as resolved.
Show resolved Hide resolved
pub next_regexp: Option<BytePos>,
/// if line break exists between previous token and new token?
pub had_line_break: bool,
/// TODO: Remove this field.
Expand Down Expand Up @@ -152,6 +153,11 @@ impl<I: Input> Tokens for Lexer<'_, I> {
self.set_expr_allowed(allow)
}

#[inline]
fn set_next_regexp(&mut self, start: Option<BytePos>) {
self.state.next_regexp = start;
}

#[inline]
fn token_context(&self) -> &TokenContexts {
&self.state.context
Expand Down Expand Up @@ -191,6 +197,10 @@ impl<'a, I: Input> Iterator for Lexer<'a, I> {
let mut start = self.cur_pos();

let res = (|| -> Result<Option<_>, _> {
if let Some(start) = self.state.next_regexp {
return Ok(Some(self.read_regexp(start)?));
}

if self.state.is_first {
if let Some(shebang) = self.read_shebang()? {
return Ok(Some(Token::Shebang(shebang)));
Expand Down Expand Up @@ -363,6 +373,7 @@ impl State {

State {
is_expr_allowed: true,
next_regexp: None,
is_first: true,
had_line_break: false,
prev_hi: start_pos,
Expand Down
62 changes: 53 additions & 9 deletions crates/swc_ecma_parser/src/lexer/tests.rs
Expand Up @@ -416,15 +416,19 @@ fn regexp_unary_void() {
lex(Syntax::default(), "void /test/"),
vec![
Void.span(0..4).lb(),
Regex("test".into(), "".into()).span(5..11),
BinOp(Div).span(5),
Word(Word::Ident("test".into())).span(6..10),
BinOp(Div).span(10),
]
);
assert_eq!(
lex(Syntax::default(), "void (/test/)"),
vec![
Void.span(0..4).lb(),
LParen.span(5..6),
Regex("test".into(), "".into()).span(6..12),
BinOp(Div).span(6),
Word(Word::Ident("test".into())).span(7..11),
BinOp(Div).span(11),
RParen.span(12..13),
]
);
Expand Down Expand Up @@ -483,13 +487,28 @@ fn simple_regex() {
vec![
"x".span(0).lb(),
Assign.span(2),
Regex("42".into(), "i".into(),).span(4..9),
BinOp(Div).span(4),
42.span(5..7),
BinOp(Div).span(7),
Word(Word::Ident("i".into())).span(8),
],
);

assert_eq!(
lex(Syntax::default(), "/42/"),
vec![Regex("42".into(), "".into()).span(0..4).lb(),]
vec![
TokenAndSpan {
token: Token::BinOp(BinOpToken::Div),
had_line_break: true,
span: Span {
lo: BytePos(1),
hi: BytePos(2),
ctxt: Default::default(),
},
},
42.span(1..3),
BinOp(Div).span(3)
]
);
}

Expand All @@ -508,7 +527,13 @@ fn complex_regex() {
RParen,
LBrace,
RBrace,
Regex("42".into(), "i".into(),),
BinOp(Div),
Num {
value: 42.0,
raw: Atom::new("42")
},
BinOp(Div),
Word(Word::Ident("i".into())),
]
)
}
Expand Down Expand Up @@ -595,7 +620,9 @@ fn after_if() {
RParen.span(4),
LBrace.span(5),
RBrace.span(6),
Regex("y".into(), "".into()).span(8..11),
Div.span(8),
"y".span(9),
Div.span(10),
Dot.span(11),
"test".span(12..16),
LParen.span(16),
Expand Down Expand Up @@ -639,7 +666,9 @@ fn migrated_0002() {
vec![
"tokenize".span(0..8).lb(),
LParen.span(8),
Regex("42".into(), "".into()).span(9..13),
BinOp(Div).span(9),
42.span(10..12),
BinOp(Div).span(12),
RParen.span(13),
],
)
Expand Down Expand Up @@ -671,7 +700,9 @@ fn migrated_0004() {
RParen.span(11),
LBrace.span(12),
RBrace.span(13),
Regex("42".into(), "".into()).span(15..19),
BinOp(Div).span(15),
42.span(16..18),
BinOp(Div).span(18),
]
);
}
Expand Down Expand Up @@ -707,7 +738,20 @@ fn migrated_0006() {
vec![
LBrace.span(0).lb(),
RBrace.span(1),
Regex("42".into(), "".into()).span(3..7),
BinOp(Div).span(3),
TokenAndSpan {
token: Num {
value: 42.0,
raw: "42".into(),
},
had_line_break: false,
span: Span {
lo: BytePos(5),
hi: BytePos(7),
ctxt: Default::default(),
}
},
BinOp(Div).span(6),
],
)
}
Expand Down
70 changes: 46 additions & 24 deletions crates/swc_ecma_parser/src/parser/expr.rs
Expand Up @@ -332,31 +332,50 @@ impl<I: Tokens> Parser<I> {
}

// Regexp
Token::Regex(..) => match bump!(self) {
Token::Regex(exp, flags) => {
let span = span!(self, start);

let mut flags_count = flags.chars().fold(
AHashMap::<char, usize>::default(),
|mut map, flag| {
let key = match flag {
'g' | 'i' | 'm' | 's' | 'u' | 'y' | 'd' => flag,
_ => '\u{0000}', // special marker for unknown flags
};
map.entry(key).and_modify(|count| *count += 1).or_insert(1);
map
},
);
if flags_count.remove(&'\u{0000}').is_some() {
self.emit_err(span, SyntaxError::UnknownRegExpFlags);
}
if let Some((flag, _)) = flags_count.iter().find(|(_, count)| **count > 1) {
self.emit_err(span, SyntaxError::DuplicatedRegExpFlags(*flag));
tok!('/') | tok!("/=") => {
bump!(self);

self.input.set_next_regexp(Some(start));

if let Some(Token::Regex(..)) = self.input.cur() {
self.input.set_next_regexp(None);

match bump!(self) {
Token::Regex(exp, flags) => {
let span = span!(self, start);

let mut flags_count = flags.chars().fold(
AHashMap::<char, usize>::default(),
|mut map, flag| {
let key = match flag {
'g' | 'i' | 'm' | 's' | 'u' | 'y' | 'd' => flag,
_ => '\u{0000}', // special marker for unknown flags
};
map.entry(key).and_modify(|count| *count += 1).or_insert(1);
map
},
);

if flags_count.remove(&'\u{0000}').is_some() {
self.emit_err(span, SyntaxError::UnknownRegExpFlags);
}

if let Some((flag, _)) =
flags_count.iter().find(|(_, count)| **count > 1)
{
self.emit_err(span, SyntaxError::DuplicatedRegExpFlags(*flag));
}

return Ok(Box::new(Expr::Lit(Lit::Regex(Regex {
span,
exp,
flags,
}))));
}
_ => unreachable!(),
}
return Ok(Box::new(Expr::Lit(Lit::Regex(Regex { span, exp, flags }))));
}
_ => unreachable!(),
},
}

tok!('`') => {
// parse template literal
Expand Down Expand Up @@ -1880,7 +1899,10 @@ impl<I: Tokens> Parser<I> {
}

if is!(self, ';')
|| (!is!(self, '*') && !cur!(self, false).map(Token::starts_expr).unwrap_or(true))
|| (!is!(self, '*')
&& !is!(self, '/')
&& !is!(self, "/=")
&& !cur!(self, false).map(Token::starts_expr).unwrap_or(true))
kdy1 marked this conversation as resolved.
Show resolved Hide resolved
{
Ok(Box::new(Expr::Yield(YieldExpr {
span: span!(self, start),
Expand Down
13 changes: 13 additions & 0 deletions crates/swc_ecma_parser/src/parser/input.rs
Expand Up @@ -24,6 +24,8 @@ pub trait Tokens: Clone + Iterator<Item = TokenAndSpan> {
}

fn set_expr_allowed(&mut self, allow: bool);
fn set_next_regexp(&mut self, start: Option<BytePos>);

fn token_context(&self) -> &lexer::TokenContexts;
fn token_context_mut(&mut self) -> &mut lexer::TokenContexts;
fn set_token_context(&mut self, _c: lexer::TokenContexts);
Expand Down Expand Up @@ -110,6 +112,8 @@ impl Tokens for TokensInput {

fn set_expr_allowed(&mut self, _: bool) {}

fn set_next_regexp(&mut self, _: Option<BytePos>) {}

fn token_context(&self) -> &TokenContexts {
&self.token_ctx
}
Expand Down Expand Up @@ -222,6 +226,10 @@ impl<I: Tokens> Tokens for Capturing<I> {
self.inner.set_expr_allowed(allow)
}

fn set_next_regexp(&mut self, start: Option<BytePos>) {
self.inner.set_next_regexp(start);
}

fn token_context(&self) -> &TokenContexts {
self.inner.token_context()
}
Expand Down Expand Up @@ -467,6 +475,11 @@ impl<I: Tokens> Buffer<I> {
self.iter.set_expr_allowed(allow)
}

#[inline]
pub fn set_next_regexp(&mut self, start: Option<BytePos>) {
self.iter.set_next_regexp(start);
}

#[inline]
pub(crate) fn token_context(&self) -> &lexer::TokenContexts {
self.iter.token_context()
Expand Down
14 changes: 14 additions & 0 deletions crates/swc_ecma_parser/src/parser/stmt.rs
Expand Up @@ -2527,4 +2527,18 @@ const foo;"#;

test_parser(src, Default::default(), |p| p.parse_script());
}

#[test]
fn issue_6322() {
let src = "for ( ; { } / 1 ; ) ;";

test_parser(src, Default::default(), |p| p.parse_script());
}

#[test]
fn issue_6323() {
let src = "let x = 0 < { } / 0 ;";

test_parser(src, Default::default(), |p| p.parse_script());
}
}
@@ -1,6 +1,6 @@

x Unexpected eof
x Unterminated regexp literal
,-[$DIR/tests/test262-parser/fail/095bea002b10b8e1.js:1:1]
1 | foo[/42
Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Better error

: ^
: ^^^
`----