From 51de4b40837d4414c1edf45b181885c0c6505b91 Mon Sep 17 00:00:00 2001 From: Alex Vasilev Date: Wed, 28 Dec 2022 02:03:21 +0530 Subject: [PATCH 1/8] feat: support dollar-quoted strings --- src/ast/mod.rs | 4 +- src/ast/value.rs | 23 +++++++++++ src/lib.rs | 1 + src/parser.rs | 23 +++-------- src/tokenizer.rs | 99 +++++++++++++++++++++++++++++++++++++++++------- 5 files changed, 118 insertions(+), 32 deletions(-) diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 80dff8504..a731dc85b 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -39,7 +39,9 @@ pub use self::query::{ SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Table, TableAlias, TableFactor, TableWithJoins, Top, Values, WildcardAdditionalOptions, With, }; -pub use self::value::{escape_quoted_string, DateTimeField, TrimWhereField, Value}; +pub use self::value::{ + escape_quoted_string, DateTimeField, DollarQuotedString, TrimWhereField, Value, +}; #[cfg(feature = "visitor")] pub use visitor::*; diff --git a/src/ast/value.rs b/src/ast/value.rs index e17f464cf..7544262f9 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -35,6 +35,8 @@ pub enum Value { Number(BigDecimal, bool), /// 'string value' SingleQuotedString(String), + // $$string value$$ (postgres syntax) + DollarQuotedString(DollarQuotedString), /// e'string value' (postgres extension) /// write!(f, "{}{long}", v, long = if *l { "L" } else { "" }), Value::DoubleQuotedString(v) => write!(f, "\"{}\"", v), Value::SingleQuotedString(v) => write!(f, "'{}'", escape_single_quote_string(v)), + Value::DollarQuotedString(v) => write!(f, "{}", v), Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)), Value::NationalStringLiteral(v) => write!(f, "N'{}'", v), Value::HexStringLiteral(v) => write!(f, "X'{}'", v), @@ -71,6 +74,26 @@ impl fmt::Display for Value { } } +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct DollarQuotedString { + pub value: String, + pub tag: Option, +} + +impl fmt::Display for DollarQuotedString { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match &self.tag { + Some(tag) => { + write!(f, "${}${}${}$", tag, self.value, tag) + } + None => { + write!(f, "$${}$$", self.value) + } + } + } +} + #[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit))] diff --git a/src/lib.rs b/src/lib.rs index 75209b054..c937fbcba 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -51,6 +51,7 @@ extern crate alloc; #[macro_use] #[cfg(test)] extern crate pretty_assertions; +// extern crate core; pub mod ast; #[macro_use] diff --git a/src/parser.rs b/src/parser.rs index ba62ff9b7..3387d5aea 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -778,6 +778,7 @@ impl<'a> Parser<'a> { Token::Number(_, _) | Token::SingleQuotedString(_) | Token::DoubleQuotedString(_) + | Token::DollarQuotedString(_) | Token::NationalStringLiteral(_) | Token::HexStringLiteral(_) => { self.prev_token(); @@ -4104,6 +4105,7 @@ impl<'a> Parser<'a> { }, Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())), Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())), + Token::DollarQuotedString(ref s) => Ok(Value::DollarQuotedString(s.clone())), Token::NationalStringLiteral(ref s) => Ok(Value::NationalStringLiteral(s.to_string())), Token::EscapedStringLiteral(ref s) => Ok(Value::EscapedStringLiteral(s.to_string())), Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())), @@ -4148,24 +4150,9 @@ impl<'a> Parser<'a> { pub fn parse_function_definition(&mut self) -> Result { let peek_token = self.peek_token(); match peek_token.token { - Token::DoubleDollarQuoting if dialect_of!(self is PostgreSqlDialect) => { + Token::DollarQuotedString(value) if dialect_of!(self is PostgreSqlDialect) => { self.next_token(); - let mut func_desc = String::new(); - loop { - if let Some(next_token) = self.next_token_no_skip() { - match &next_token.token { - Token::DoubleDollarQuoting => break, - Token::EOF => { - return self.expected( - "literal string", - TokenWithLocation::wrap(Token::EOF), - ); - } - token => func_desc.push_str(token.to_string().as_str()), - } - } - } - Ok(FunctionDefinition::DoubleDollarDef(func_desc)) + Ok(FunctionDefinition::DoubleDollarDef(value.value)) } _ => Ok(FunctionDefinition::SingleQuotedDef( self.parse_literal_string()?, @@ -4712,7 +4699,7 @@ impl<'a> Parser<'a> { } /// Parse a query expression, i.e. a `SELECT` statement optionally - /// preceeded with some `WITH` CTE declarations and optionally followed + /// preceded with some `WITH` CTE declarations and optionally followed /// by `ORDER BY`. Unlike some other parse_... methods, this one doesn't /// expect the initial keyword to be already consumed pub fn parse_query(&mut self) -> Result { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index d74032657..a6da0d66f 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -34,6 +34,10 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "visitor")] use sqlparser_derive::Visit; +#[cfg(feature = "visitor")] +use sqlparser_derive::Visit; + +use crate::ast::DollarQuotedString; use crate::dialect::SnowflakeDialect; use crate::dialect::{Dialect, MySqlDialect}; use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX}; @@ -55,6 +59,8 @@ pub enum Token { SingleQuotedString(String), /// Double quoted string: i.e: "string" DoubleQuotedString(String), + /// Dollar quoted string: i.e: $$string$$ or $tag_name$string$tag_name$ + DollarQuotedString(DollarQuotedString), /// "National" string literal: i.e: N'string' NationalStringLiteral(String), /// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second' @@ -149,8 +155,9 @@ pub enum Token { PGCubeRoot, /// `?` or `$` , a prepared statement arg placeholder Placeholder(String), + // todo: remove /// `$$`, used for PostgreSQL create function definition - DoubleDollarQuoting, + // DoubleDollarQuoting, /// ->, used as a operator to extract json field in PostgreSQL Arrow, /// ->>, used as a operator to extract json field as text in PostgreSQL @@ -184,6 +191,7 @@ impl fmt::Display for Token { Token::Char(ref c) => write!(f, "{}", c), Token::SingleQuotedString(ref s) => write!(f, "'{}'", s), Token::DoubleQuotedString(ref s) => write!(f, "\"{}\"", s), + Token::DollarQuotedString(ref s) => write!(f, "{}", s), Token::NationalStringLiteral(ref s) => write!(f, "N'{}'", s), Token::EscapedStringLiteral(ref s) => write!(f, "E'{}'", s), Token::HexStringLiteral(ref s) => write!(f, "X'{}'", s), @@ -236,7 +244,7 @@ impl fmt::Display for Token { Token::HashArrow => write!(f, "#>"), Token::HashLongArrow => write!(f, "#>>"), Token::AtArrow => write!(f, "@>"), - Token::DoubleDollarQuoting => write!(f, "$$"), + // Token::DoubleDollarQuoting => write!(f, "$$"), Token::ArrowAt => write!(f, "<@"), Token::HashMinus => write!(f, "#-"), Token::AtQuestion => write!(f, "@?"), @@ -466,6 +474,7 @@ impl<'a> Tokenizer<'a> { let mut location = state.location(); while let Some(token) = self.next_token(&mut state)? { + println!("{:?}", token); tokens.push(TokenWithLocation { token, location: location.clone(), @@ -837,17 +846,8 @@ impl<'a> Tokenizer<'a> { let s = peeking_take_while(chars, |ch| ch.is_numeric()); Ok(Some(Token::Placeholder(String::from("?") + &s))) } - '$' => { - chars.next(); - match chars.peek() { - Some('$') => self.consume_and_return(chars, Token::DoubleDollarQuoting), - _ => { - let s = - peeking_take_while(chars, |ch| ch.is_alphanumeric() || ch == '_'); - Ok(Some(Token::Placeholder(String::from("$") + &s))) - } - } - } + '$' => Ok(Some(self.tokenize_dollar_preceded_value(chars)?)), + //whitespace check (including unicode chars) should be last as it covers some of the chars above ch if ch.is_whitespace() => { self.consume_and_return(chars, Token::Whitespace(Whitespace::Space)) @@ -858,6 +858,79 @@ impl<'a> Tokenizer<'a> { } } + /// Tokenize dollar preceded value (i.e: a string/placeholder) + fn tokenize_dollar_preceded_value(&self, chars: &mut State) -> Result { + let mut s = String::new(); + let mut value = String::new(); + + chars.next(); + + if let Some('$') = chars.peek() { + chars.next(); + s.push_str(&peeking_take_while(chars, |ch| ch != '$')); + chars.next(); + + return if let Some('$') = chars.peek() { + chars.next(); + Ok(Token::DollarQuotedString(DollarQuotedString { + value: s, + tag: None, + })) + } else { + self.tokenizer_error(chars.location(), "Unterminated dollar-quoted string") + }; + } else { + value.push_str(&peeking_take_while(chars, |ch| { + ch.is_alphanumeric() || ch == '_' + })); + + if let Some('$') = chars.peek() { + chars.next(); + s.push_str(&peeking_take_while(chars, |ch| ch != '$')); + + match chars.peek() { + Some('$') => { + chars.next(); + for (_, c) in value.chars().enumerate() { + let next_char = chars.next(); + if Some(c) != next_char { + return self.tokenizer_error( + chars.location(), + format!( + "Unterminated dollar-quoted string, expected ${}$", + value + ), + ); + } + } + + if let Some('$') = chars.peek() { + chars.next(); + } else { + return self.tokenizer_error( + chars.location(), + "Unterminated dollar-quoted string, expected $", + ); + } + } + _ => { + return self.tokenizer_error( + chars.location(), + "Unterminated dollar-quoted, expected $", + ); + } + } + } else { + return Ok(Token::Placeholder(String::from("$") + &value)); + } + } + + Ok(Token::DollarQuotedString(DollarQuotedString { + value: s, + tag: if value.is_empty() { None } else { Some(value) }, + })) + } + fn tokenizer_error( &self, loc: Location, From 840fe4b1f23ec99dd9e681d0c505e9e41d24b2b5 Mon Sep 17 00:00:00 2001 From: Alex Vasilev Date: Wed, 28 Dec 2022 02:04:36 +0530 Subject: [PATCH 2/8] remove comment --- src/lib.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/lib.rs b/src/lib.rs index c937fbcba..75209b054 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -51,7 +51,6 @@ extern crate alloc; #[macro_use] #[cfg(test)] extern crate pretty_assertions; -// extern crate core; pub mod ast; #[macro_use] From dae1287ecfd39b07340fd3bce9259f2e7bb689f2 Mon Sep 17 00:00:00 2001 From: Alex Vasilev Date: Wed, 28 Dec 2022 02:05:24 +0530 Subject: [PATCH 3/8] unused code --- src/tokenizer.rs | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index a6da0d66f..575f0c063 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -155,8 +155,6 @@ pub enum Token { PGCubeRoot, /// `?` or `$` , a prepared statement arg placeholder Placeholder(String), - // todo: remove - /// `$$`, used for PostgreSQL create function definition // DoubleDollarQuoting, /// ->, used as a operator to extract json field in PostgreSQL Arrow, @@ -244,7 +242,6 @@ impl fmt::Display for Token { Token::HashArrow => write!(f, "#>"), Token::HashLongArrow => write!(f, "#>>"), Token::AtArrow => write!(f, "@>"), - // Token::DoubleDollarQuoting => write!(f, "$$"), Token::ArrowAt => write!(f, "<@"), Token::HashMinus => write!(f, "#-"), Token::AtQuestion => write!(f, "@?"), From d70d95dc30712b19a7153c7e7d048adefb36fd96 Mon Sep 17 00:00:00 2001 From: Alex Vasilev Date: Wed, 28 Dec 2022 02:22:44 +0530 Subject: [PATCH 4/8] removed debugging --- src/tokenizer.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 575f0c063..3b6113d06 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -471,7 +471,6 @@ impl<'a> Tokenizer<'a> { let mut location = state.location(); while let Some(token) = self.next_token(&mut state)? { - println!("{:?}", token); tokens.push(TokenWithLocation { token, location: location.clone(), From 5dd6e3ede0cd78378dddf835a5c971480482e76e Mon Sep 17 00:00:00 2001 From: Alex Vasilev Date: Thu, 29 Dec 2022 02:00:08 +0530 Subject: [PATCH 5/8] added tests --- tests/sqlparser_postgres.rs | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 6e190a01b..d0460b105 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2507,3 +2507,34 @@ fn parse_drop_function() { } ); } + +#[test] +fn parse_dollar_quoted_string() { + let sql = "SELECT $$hello$$, $tag_name$world$tag_name$"; + let select = pg().verified_only_select(sql); + + assert_eq!( + &Expr::Value(Value::DollarQuotedString(DollarQuotedString { + tag: None, + value: "hello".into() + })), + expr_from_projection(&select.projection[0]) + ); + + assert_eq!( + &Expr::Value(Value::DollarQuotedString(DollarQuotedString { + tag: Some("tag_name".into()), + value: "world".into() + })), + expr_from_projection(&select.projection[1]) + ); +} + +#[test] +fn parse_incorrect_dollar_quoted_string() { + let sql = "SELECT $x$hello$$"; + assert!(pg().parse_sql_statements(sql).is_err()); + + let sql = "SELECT $hello$$"; + assert!(pg().parse_sql_statements(sql).is_err()); +} From 1f9b9847d1bf563851c23e4d268c5927fc9d67d1 Mon Sep 17 00:00:00 2001 From: Alex Vasilev Date: Thu, 29 Dec 2022 02:10:56 +0530 Subject: [PATCH 6/8] fmt --- tests/sqlparser_postgres.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index d0460b105..8c61e0d31 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2534,7 +2534,7 @@ fn parse_dollar_quoted_string() { fn parse_incorrect_dollar_quoted_string() { let sql = "SELECT $x$hello$$"; assert!(pg().parse_sql_statements(sql).is_err()); - + let sql = "SELECT $hello$$"; assert!(pg().parse_sql_statements(sql).is_err()); } From 7180f2f06e6745ad829e98f191b03c17b3657f25 Mon Sep 17 00:00:00 2001 From: Alex Vasilev Date: Thu, 29 Dec 2022 02:16:44 +0530 Subject: [PATCH 7/8] clippy --- src/ast/value.rs | 1 + src/tokenizer.rs | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/src/ast/value.rs b/src/ast/value.rs index 7544262f9..022251cdb 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -76,6 +76,7 @@ impl fmt::Display for Value { #[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit))] pub struct DollarQuotedString { pub value: String, pub tag: Option, diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 3b6113d06..fe0d72230 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -34,9 +34,6 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "visitor")] use sqlparser_derive::Visit; -#[cfg(feature = "visitor")] -use sqlparser_derive::Visit; - use crate::ast::DollarQuotedString; use crate::dialect::SnowflakeDialect; use crate::dialect::{Dialect, MySqlDialect}; From 7a1fa9c1d57c89e604f367ae95aeb8c2f2e422df Mon Sep 17 00:00:00 2001 From: Alex Vasilev Date: Thu, 29 Dec 2022 15:39:41 +0530 Subject: [PATCH 8/8] updated tests --- src/tokenizer.rs | 31 +++++++++++++++----- tests/sqlparser_postgres.rs | 58 ++++++++++++++++++++++++++++++++++--- 2 files changed, 78 insertions(+), 11 deletions(-) diff --git a/src/tokenizer.rs b/src/tokenizer.rs index fe0d72230..c92012a5a 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -152,7 +152,6 @@ pub enum Token { PGCubeRoot, /// `?` or `$` , a prepared statement arg placeholder Placeholder(String), - // DoubleDollarQuoting, /// ->, used as a operator to extract json field in PostgreSQL Arrow, /// ->>, used as a operator to extract json field as text in PostgreSQL @@ -860,17 +859,35 @@ impl<'a> Tokenizer<'a> { if let Some('$') = chars.peek() { chars.next(); - s.push_str(&peeking_take_while(chars, |ch| ch != '$')); - chars.next(); - return if let Some('$') = chars.peek() { + let mut is_terminated = false; + let mut prev: Option = None; + + while let Some(&ch) = chars.peek() { + if prev == Some('$') { + if ch == '$' { + chars.next(); + is_terminated = true; + break; + } else { + s.push('$'); + s.push(ch); + } + } else if ch != '$' { + s.push(ch); + } + + prev = Some(ch); chars.next(); + } + + return if chars.peek().is_none() && !is_terminated { + self.tokenizer_error(chars.location(), "Unterminated dollar-quoted string") + } else { Ok(Token::DollarQuotedString(DollarQuotedString { value: s, tag: None, })) - } else { - self.tokenizer_error(chars.location(), "Unterminated dollar-quoted string") }; } else { value.push_str(&peeking_take_while(chars, |ch| { @@ -890,7 +907,7 @@ impl<'a> Tokenizer<'a> { return self.tokenizer_error( chars.location(), format!( - "Unterminated dollar-quoted string, expected ${}$", + "Unterminated dollar-quoted string at or near \"{}\"", value ), ); diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 8c61e0d31..496a61843 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2510,15 +2510,24 @@ fn parse_drop_function() { #[test] fn parse_dollar_quoted_string() { - let sql = "SELECT $$hello$$, $tag_name$world$tag_name$"; - let select = pg().verified_only_select(sql); + let sql = "SELECT $$hello$$, $tag_name$world$tag_name$, $$Foo$Bar$$, $$Foo$Bar$$col_name, $$$$, $tag_name$$tag_name$"; + + let stmt = pg().parse_sql_statements(sql).unwrap(); + + let projection = match stmt.get(0).unwrap() { + Statement::Query(query) => match &*query.body { + SetExpr::Select(select) => &select.projection, + _ => unreachable!(), + }, + _ => unreachable!(), + }; assert_eq!( &Expr::Value(Value::DollarQuotedString(DollarQuotedString { tag: None, value: "hello".into() })), - expr_from_projection(&select.projection[0]) + expr_from_projection(&projection[0]) ); assert_eq!( @@ -2526,7 +2535,45 @@ fn parse_dollar_quoted_string() { tag: Some("tag_name".into()), value: "world".into() })), - expr_from_projection(&select.projection[1]) + expr_from_projection(&projection[1]) + ); + + assert_eq!( + &Expr::Value(Value::DollarQuotedString(DollarQuotedString { + tag: None, + value: "Foo$Bar".into() + })), + expr_from_projection(&projection[2]) + ); + + assert_eq!( + projection[3], + SelectItem::ExprWithAlias { + expr: Expr::Value(Value::DollarQuotedString(DollarQuotedString { + tag: None, + value: "Foo$Bar".into(), + })), + alias: Ident { + value: "col_name".into(), + quote_style: None, + }, + } + ); + + assert_eq!( + expr_from_projection(&projection[4]), + &Expr::Value(Value::DollarQuotedString(DollarQuotedString { + tag: None, + value: "".into() + })), + ); + + assert_eq!( + expr_from_projection(&projection[5]), + &Expr::Value(Value::DollarQuotedString(DollarQuotedString { + tag: Some("tag_name".into()), + value: "".into() + })), ); } @@ -2537,4 +2584,7 @@ fn parse_incorrect_dollar_quoted_string() { let sql = "SELECT $hello$$"; assert!(pg().parse_sql_statements(sql).is_err()); + + let sql = "SELECT $$$"; + assert!(pg().parse_sql_statements(sql).is_err()); }