From dec6808d6735217bad0d071b1211cec0d69e6f25 Mon Sep 17 00:00:00 2001 From: Han YANG Date: Tue, 22 Nov 2022 18:35:58 +0800 Subject: [PATCH] fix: handle nested comments --- src/tokenizer.rs | 38 ++++++++++++++++++++++++++++---------- 1 file changed, 28 insertions(+), 10 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 5a9afdbef..6759f3cd5 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -832,22 +832,23 @@ impl<'a> Tokenizer<'a> { chars: &mut Peekable>, ) -> Result, TokenizerError> { let mut s = String::new(); - let mut maybe_closing_comment = false; - // TODO: deal with nested comments + let mut nested = 1; + let mut last_ch = ' '; + loop { match chars.next() { Some(ch) => { - if maybe_closing_comment { - if ch == '/' { + if last_ch == '/' && ch == '*' { + nested += 1; + } else if last_ch == '*' && ch == '/' { + nested -= 1; + if nested == 0 { + s.pop(); break Ok(Some(Token::Whitespace(Whitespace::MultiLineComment(s)))); - } else { - s.push('*'); } } - maybe_closing_comment = ch == '*'; - if !maybe_closing_comment { - s.push(ch); - } + s.push(ch); + last_ch = ch; } None => break self.tokenizer_error("Unexpected EOF while in a multi-line comment"), } @@ -1355,6 +1356,23 @@ mod tests { compare(expected, tokens); } + #[test] + fn tokenize_nested_multiline_comment() { + let sql = String::from("0/*multi-line\n* \n/* comment \n /*comment*/*/ */ /comment*/1"); + + let dialect = GenericDialect {}; + let mut tokenizer = Tokenizer::new(&dialect, &sql); + let tokens = tokenizer.tokenize().unwrap(); + let expected = vec![ + Token::Number("0".to_string(), false), + Token::Whitespace(Whitespace::MultiLineComment( + "multi-line\n* \n/* comment \n /*comment*/*/ */ /comment".to_string(), + )), + Token::Number("1".to_string(), false), + ]; + compare(expected, tokens); + } + #[test] fn tokenize_multiline_comment_with_even_asterisks() { let sql = String::from("\n/** Comment **/\n");