From 124a1cb8ae0bc56ae807472454fde4e08306924f Mon Sep 17 00:00:00 2001 From: Yoshiyuki Komazaki Date: Tue, 28 Jun 2022 12:19:35 +0900 Subject: [PATCH] feat: Support double quoted string --- src/parser.rs | 3 +++ src/tokenizer.rs | 26 +++++++++++++++++++------- tests/sqlparser_bigquery.rs | 15 +++++++++++++++ tests/sqlparser_mysql.rs | 15 +++++++++++++++ 4 files changed, 52 insertions(+), 7 deletions(-) diff --git a/src/parser.rs b/src/parser.rs index 43daf797b..6f0096dea 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -504,6 +504,7 @@ impl<'a> Parser<'a> { } Token::Number(_, _) | Token::SingleQuotedString(_) + | Token::DoubleQuotedString(_) | Token::NationalStringLiteral(_) | Token::HexStringLiteral(_) => { self.prev_token(); @@ -2773,6 +2774,7 @@ impl<'a> Parser<'a> { Err(e) => parser_err!(format!("Could not parse '{}' as number: {}", n, e)), }, Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())), + Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())), Token::NationalStringLiteral(ref s) => Ok(Value::NationalStringLiteral(s.to_string())), Token::EscapedStringLiteral(ref s) => Ok(Value::EscapedStringLiteral(s.to_string())), Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())), @@ -2807,6 +2809,7 @@ impl<'a> Parser<'a> { match self.next_token() { Token::Word(Word { value, keyword, .. }) if keyword == Keyword::NoKeyword => Ok(value), Token::SingleQuotedString(s) => Ok(s), + Token::DoubleQuotedString(s) => Ok(s), Token::EscapedStringLiteral(s) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { Ok(s) } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 91cb16a80..5a9a09613 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -49,6 +49,8 @@ pub enum Token { Char(char), /// Single quoted string: i.e: 'string' SingleQuotedString(String), + /// Double quoted string: i.e: "string" + DoubleQuotedString(String), /// "National" string literal: i.e: N'string' NationalStringLiteral(String), /// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second' @@ -161,6 +163,7 @@ impl fmt::Display for Token { Token::Number(ref n, l) => write!(f, "{}{long}", n, long = if *l { "L" } else { "" }), Token::Char(ref c) => write!(f, "{}", c), Token::SingleQuotedString(ref s) => write!(f, "'{}'", s), + Token::DoubleQuotedString(ref s) => write!(f, "\"{}\"", s), Token::NationalStringLiteral(ref s) => write!(f, "N'{}'", s), Token::EscapedStringLiteral(ref s) => write!(f, "E'{}'", s), Token::HexStringLiteral(ref s) => write!(f, "X'{}'", s), @@ -385,7 +388,7 @@ impl<'a> Tokenizer<'a> { match chars.peek() { Some('\'') => { // N'...' - a - let s = self.tokenize_single_quoted_string(chars)?; + let s = self.tokenize_quoted_string(chars, '\'')?; Ok(Some(Token::NationalStringLiteral(s))) } _ => { @@ -417,7 +420,7 @@ impl<'a> Tokenizer<'a> { match chars.peek() { Some('\'') => { // X'...' - a - let s = self.tokenize_single_quoted_string(chars)?; + let s = self.tokenize_quoted_string(chars, '\'')?; Ok(Some(Token::HexStringLiteral(s))) } _ => { @@ -442,12 +445,20 @@ impl<'a> Tokenizer<'a> { } Ok(Some(Token::make_word(&s, None))) } - // string + // single quoted string '\'' => { - let s = self.tokenize_single_quoted_string(chars)?; + let s = self.tokenize_quoted_string(chars, '\'')?; Ok(Some(Token::SingleQuotedString(s))) } + // double quoted string + '\"' if !self.dialect.is_delimited_identifier_start(ch) + && !self.dialect.is_identifier_start(ch) => + { + let s = self.tokenize_quoted_string(chars, '"')?; + + Ok(Some(Token::DoubleQuotedString(s))) + } // delimited (quoted) identifier quote_start if self.dialect.is_delimited_identifier_start(ch) @@ -769,9 +780,10 @@ impl<'a> Tokenizer<'a> { } /// Read a single quoted string, starting with the opening quote. - fn tokenize_single_quoted_string( + fn tokenize_quoted_string( &self, chars: &mut Peekable>, + quote_style: char, ) -> Result { let mut s = String::new(); chars.next(); // consume the opening quote @@ -780,12 +792,12 @@ impl<'a> Tokenizer<'a> { let mut is_escaped = false; while let Some(&ch) = chars.peek() { match ch { - '\'' => { + char if char == quote_style => { chars.next(); // consume if is_escaped { s.push(ch); is_escaped = false; - } else if chars.peek().map(|c| *c == '\'').unwrap_or(false) { + } else if chars.peek().map(|c| *c == quote_style).unwrap_or(false) { s.push(ch); chars.next(); } else { diff --git a/tests/sqlparser_bigquery.rs b/tests/sqlparser_bigquery.rs index 2b49abf6f..17240d22d 100644 --- a/tests/sqlparser_bigquery.rs +++ b/tests/sqlparser_bigquery.rs @@ -18,6 +18,21 @@ use test_utils::*; use sqlparser::ast::*; use sqlparser::dialect::BigQueryDialect; +#[test] +fn parse_literal_string() { + let sql = r#"SELECT 'single', "double""#; + let select = bigquery().verified_only_select(sql); + assert_eq!(2, select.projection.len()); + assert_eq!( + &Expr::Value(Value::SingleQuotedString("single".to_string())), + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Value(Value::DoubleQuotedString("double".to_string())), + expr_from_projection(&select.projection[1]) + ); +} + #[test] fn parse_table_identifiers() { fn test_table_ident(ident: &str, expected: Vec) { diff --git a/tests/sqlparser_mysql.rs b/tests/sqlparser_mysql.rs index 715ce7e8d..51ade0515 100644 --- a/tests/sqlparser_mysql.rs +++ b/tests/sqlparser_mysql.rs @@ -30,6 +30,21 @@ fn parse_identifiers() { mysql().verified_stmt("SELECT $a$, àà"); } +#[test] +fn parse_literal_string() { + let sql = r#"SELECT 'single', "double""#; + let select = mysql().verified_only_select(sql); + assert_eq!(2, select.projection.len()); + assert_eq!( + &Expr::Value(Value::SingleQuotedString("single".to_string())), + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Value(Value::DoubleQuotedString("double".to_string())), + expr_from_projection(&select.projection[1]) + ); +} + #[test] fn parse_show_columns() { let table_name = ObjectName(vec![Ident::new("mytable")]);