diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 80dff8504..a731dc85b 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -39,7 +39,9 @@ pub use self::query::{ SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Table, TableAlias, TableFactor, TableWithJoins, Top, Values, WildcardAdditionalOptions, With, }; -pub use self::value::{escape_quoted_string, DateTimeField, TrimWhereField, Value}; +pub use self::value::{ + escape_quoted_string, DateTimeField, DollarQuotedString, TrimWhereField, Value, +}; #[cfg(feature = "visitor")] pub use visitor::*; diff --git a/src/ast/value.rs b/src/ast/value.rs index e17f464cf..022251cdb 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -35,6 +35,8 @@ pub enum Value { Number(BigDecimal, bool), /// 'string value' SingleQuotedString(String), + // $$string value$$ (postgres syntax) + DollarQuotedString(DollarQuotedString), /// e'string value' (postgres extension) /// write!(f, "{}{long}", v, long = if *l { "L" } else { "" }), Value::DoubleQuotedString(v) => write!(f, "\"{}\"", v), Value::SingleQuotedString(v) => write!(f, "'{}'", escape_single_quote_string(v)), + Value::DollarQuotedString(v) => write!(f, "{}", v), Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)), Value::NationalStringLiteral(v) => write!(f, "N'{}'", v), Value::HexStringLiteral(v) => write!(f, "X'{}'", v), @@ -71,6 +74,27 @@ impl fmt::Display for Value { } } +#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +#[cfg_attr(feature = "visitor", derive(Visit))] +pub struct DollarQuotedString { + pub value: String, + pub tag: Option, +} + +impl fmt::Display for DollarQuotedString { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match &self.tag { + Some(tag) => { + write!(f, "${}${}${}$", tag, self.value, tag) + } + None => { + write!(f, "$${}$$", self.value) + } + } + } +} + #[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] #[cfg_attr(feature = "visitor", derive(Visit))] diff --git a/src/parser.rs b/src/parser.rs index ba62ff9b7..3387d5aea 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -778,6 +778,7 @@ impl<'a> Parser<'a> { Token::Number(_, _) | Token::SingleQuotedString(_) | Token::DoubleQuotedString(_) + | Token::DollarQuotedString(_) | Token::NationalStringLiteral(_) | Token::HexStringLiteral(_) => { self.prev_token(); @@ -4104,6 +4105,7 @@ impl<'a> Parser<'a> { }, Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())), Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())), + Token::DollarQuotedString(ref s) => Ok(Value::DollarQuotedString(s.clone())), Token::NationalStringLiteral(ref s) => Ok(Value::NationalStringLiteral(s.to_string())), Token::EscapedStringLiteral(ref s) => Ok(Value::EscapedStringLiteral(s.to_string())), Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())), @@ -4148,24 +4150,9 @@ impl<'a> Parser<'a> { pub fn parse_function_definition(&mut self) -> Result { let peek_token = self.peek_token(); match peek_token.token { - Token::DoubleDollarQuoting if dialect_of!(self is PostgreSqlDialect) => { + Token::DollarQuotedString(value) if dialect_of!(self is PostgreSqlDialect) => { self.next_token(); - let mut func_desc = String::new(); - loop { - if let Some(next_token) = self.next_token_no_skip() { - match &next_token.token { - Token::DoubleDollarQuoting => break, - Token::EOF => { - return self.expected( - "literal string", - TokenWithLocation::wrap(Token::EOF), - ); - } - token => func_desc.push_str(token.to_string().as_str()), - } - } - } - Ok(FunctionDefinition::DoubleDollarDef(func_desc)) + Ok(FunctionDefinition::DoubleDollarDef(value.value)) } _ => Ok(FunctionDefinition::SingleQuotedDef( self.parse_literal_string()?, @@ -4712,7 +4699,7 @@ impl<'a> Parser<'a> { } /// Parse a query expression, i.e. a `SELECT` statement optionally - /// preceeded with some `WITH` CTE declarations and optionally followed + /// preceded with some `WITH` CTE declarations and optionally followed /// by `ORDER BY`. Unlike some other parse_... methods, this one doesn't /// expect the initial keyword to be already consumed pub fn parse_query(&mut self) -> Result { diff --git a/src/tokenizer.rs b/src/tokenizer.rs index d74032657..c92012a5a 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -34,6 +34,7 @@ use serde::{Deserialize, Serialize}; #[cfg(feature = "visitor")] use sqlparser_derive::Visit; +use crate::ast::DollarQuotedString; use crate::dialect::SnowflakeDialect; use crate::dialect::{Dialect, MySqlDialect}; use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX}; @@ -55,6 +56,8 @@ pub enum Token { SingleQuotedString(String), /// Double quoted string: i.e: "string" DoubleQuotedString(String), + /// Dollar quoted string: i.e: $$string$$ or $tag_name$string$tag_name$ + DollarQuotedString(DollarQuotedString), /// "National" string literal: i.e: N'string' NationalStringLiteral(String), /// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second' @@ -149,8 +152,6 @@ pub enum Token { PGCubeRoot, /// `?` or `$` , a prepared statement arg placeholder Placeholder(String), - /// `$$`, used for PostgreSQL create function definition - DoubleDollarQuoting, /// ->, used as a operator to extract json field in PostgreSQL Arrow, /// ->>, used as a operator to extract json field as text in PostgreSQL @@ -184,6 +185,7 @@ impl fmt::Display for Token { Token::Char(ref c) => write!(f, "{}", c), Token::SingleQuotedString(ref s) => write!(f, "'{}'", s), Token::DoubleQuotedString(ref s) => write!(f, "\"{}\"", s), + Token::DollarQuotedString(ref s) => write!(f, "{}", s), Token::NationalStringLiteral(ref s) => write!(f, "N'{}'", s), Token::EscapedStringLiteral(ref s) => write!(f, "E'{}'", s), Token::HexStringLiteral(ref s) => write!(f, "X'{}'", s), @@ -236,7 +238,6 @@ impl fmt::Display for Token { Token::HashArrow => write!(f, "#>"), Token::HashLongArrow => write!(f, "#>>"), Token::AtArrow => write!(f, "@>"), - Token::DoubleDollarQuoting => write!(f, "$$"), Token::ArrowAt => write!(f, "<@"), Token::HashMinus => write!(f, "#-"), Token::AtQuestion => write!(f, "@?"), @@ -837,17 +838,8 @@ impl<'a> Tokenizer<'a> { let s = peeking_take_while(chars, |ch| ch.is_numeric()); Ok(Some(Token::Placeholder(String::from("?") + &s))) } - '$' => { - chars.next(); - match chars.peek() { - Some('$') => self.consume_and_return(chars, Token::DoubleDollarQuoting), - _ => { - let s = - peeking_take_while(chars, |ch| ch.is_alphanumeric() || ch == '_'); - Ok(Some(Token::Placeholder(String::from("$") + &s))) - } - } - } + '$' => Ok(Some(self.tokenize_dollar_preceded_value(chars)?)), + //whitespace check (including unicode chars) should be last as it covers some of the chars above ch if ch.is_whitespace() => { self.consume_and_return(chars, Token::Whitespace(Whitespace::Space)) @@ -858,6 +850,97 @@ impl<'a> Tokenizer<'a> { } } + /// Tokenize dollar preceded value (i.e: a string/placeholder) + fn tokenize_dollar_preceded_value(&self, chars: &mut State) -> Result { + let mut s = String::new(); + let mut value = String::new(); + + chars.next(); + + if let Some('$') = chars.peek() { + chars.next(); + + let mut is_terminated = false; + let mut prev: Option = None; + + while let Some(&ch) = chars.peek() { + if prev == Some('$') { + if ch == '$' { + chars.next(); + is_terminated = true; + break; + } else { + s.push('$'); + s.push(ch); + } + } else if ch != '$' { + s.push(ch); + } + + prev = Some(ch); + chars.next(); + } + + return if chars.peek().is_none() && !is_terminated { + self.tokenizer_error(chars.location(), "Unterminated dollar-quoted string") + } else { + Ok(Token::DollarQuotedString(DollarQuotedString { + value: s, + tag: None, + })) + }; + } else { + value.push_str(&peeking_take_while(chars, |ch| { + ch.is_alphanumeric() || ch == '_' + })); + + if let Some('$') = chars.peek() { + chars.next(); + s.push_str(&peeking_take_while(chars, |ch| ch != '$')); + + match chars.peek() { + Some('$') => { + chars.next(); + for (_, c) in value.chars().enumerate() { + let next_char = chars.next(); + if Some(c) != next_char { + return self.tokenizer_error( + chars.location(), + format!( + "Unterminated dollar-quoted string at or near \"{}\"", + value + ), + ); + } + } + + if let Some('$') = chars.peek() { + chars.next(); + } else { + return self.tokenizer_error( + chars.location(), + "Unterminated dollar-quoted string, expected $", + ); + } + } + _ => { + return self.tokenizer_error( + chars.location(), + "Unterminated dollar-quoted, expected $", + ); + } + } + } else { + return Ok(Token::Placeholder(String::from("$") + &value)); + } + } + + Ok(Token::DollarQuotedString(DollarQuotedString { + value: s, + tag: if value.is_empty() { None } else { Some(value) }, + })) + } + fn tokenizer_error( &self, loc: Location, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 6e190a01b..496a61843 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -2507,3 +2507,84 @@ fn parse_drop_function() { } ); } + +#[test] +fn parse_dollar_quoted_string() { + let sql = "SELECT $$hello$$, $tag_name$world$tag_name$, $$Foo$Bar$$, $$Foo$Bar$$col_name, $$$$, $tag_name$$tag_name$"; + + let stmt = pg().parse_sql_statements(sql).unwrap(); + + let projection = match stmt.get(0).unwrap() { + Statement::Query(query) => match &*query.body { + SetExpr::Select(select) => &select.projection, + _ => unreachable!(), + }, + _ => unreachable!(), + }; + + assert_eq!( + &Expr::Value(Value::DollarQuotedString(DollarQuotedString { + tag: None, + value: "hello".into() + })), + expr_from_projection(&projection[0]) + ); + + assert_eq!( + &Expr::Value(Value::DollarQuotedString(DollarQuotedString { + tag: Some("tag_name".into()), + value: "world".into() + })), + expr_from_projection(&projection[1]) + ); + + assert_eq!( + &Expr::Value(Value::DollarQuotedString(DollarQuotedString { + tag: None, + value: "Foo$Bar".into() + })), + expr_from_projection(&projection[2]) + ); + + assert_eq!( + projection[3], + SelectItem::ExprWithAlias { + expr: Expr::Value(Value::DollarQuotedString(DollarQuotedString { + tag: None, + value: "Foo$Bar".into(), + })), + alias: Ident { + value: "col_name".into(), + quote_style: None, + }, + } + ); + + assert_eq!( + expr_from_projection(&projection[4]), + &Expr::Value(Value::DollarQuotedString(DollarQuotedString { + tag: None, + value: "".into() + })), + ); + + assert_eq!( + expr_from_projection(&projection[5]), + &Expr::Value(Value::DollarQuotedString(DollarQuotedString { + tag: Some("tag_name".into()), + value: "".into() + })), + ); +} + +#[test] +fn parse_incorrect_dollar_quoted_string() { + let sql = "SELECT $x$hello$$"; + assert!(pg().parse_sql_statements(sql).is_err()); + + let sql = "SELECT $hello$$"; + assert!(pg().parse_sql_statements(sql).is_err()); + + let sql = "SELECT $$$"; + assert!(pg().parse_sql_statements(sql).is_err()); +}