Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: dollar-quoted strings support #772

Merged
merged 8 commits into from Dec 29, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 3 additions & 1 deletion src/ast/mod.rs
Expand Up @@ -39,7 +39,9 @@ pub use self::query::{
SelectInto, SelectItem, SetExpr, SetOperator, SetQuantifier, Table, TableAlias, TableFactor,
TableWithJoins, Top, Values, WildcardAdditionalOptions, With,
};
pub use self::value::{escape_quoted_string, DateTimeField, TrimWhereField, Value};
pub use self::value::{
escape_quoted_string, DateTimeField, DollarQuotedString, TrimWhereField, Value,
};

#[cfg(feature = "visitor")]
pub use visitor::*;
Expand Down
24 changes: 24 additions & 0 deletions src/ast/value.rs
Expand Up @@ -35,6 +35,8 @@ pub enum Value {
Number(BigDecimal, bool),
/// 'string value'
SingleQuotedString(String),
// $<tag_name>$string value$<tag_name>$ (postgres syntax)
DollarQuotedString(DollarQuotedString),
/// e'string value' (postgres extension)
/// <https://www.postgresql.org/docs/8.3/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS
EscapedStringLiteral(String),
Expand All @@ -60,6 +62,7 @@ impl fmt::Display for Value {
Value::Number(v, l) => write!(f, "{}{long}", v, long = if *l { "L" } else { "" }),
Value::DoubleQuotedString(v) => write!(f, "\"{}\"", v),
Value::SingleQuotedString(v) => write!(f, "'{}'", escape_single_quote_string(v)),
Value::DollarQuotedString(v) => write!(f, "{}", v),
Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)),
Value::NationalStringLiteral(v) => write!(f, "N'{}'", v),
Value::HexStringLiteral(v) => write!(f, "X'{}'", v),
Expand All @@ -71,6 +74,27 @@ impl fmt::Display for Value {
}
}

#[derive(Debug, Clone, PartialEq, PartialOrd, Eq, Ord, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit))]
pub struct DollarQuotedString {
pub value: String,
pub tag: Option<String>,
}

impl fmt::Display for DollarQuotedString {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match &self.tag {
Some(tag) => {
write!(f, "${}${}${}$", tag, self.value, tag)
}
None => {
write!(f, "$${}$$", self.value)
}
}
}
}

#[derive(Debug, Copy, Clone, PartialEq, Eq, Ord, PartialOrd, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
#[cfg_attr(feature = "visitor", derive(Visit))]
Expand Down
23 changes: 5 additions & 18 deletions src/parser.rs
Expand Up @@ -778,6 +778,7 @@ impl<'a> Parser<'a> {
Token::Number(_, _)
| Token::SingleQuotedString(_)
| Token::DoubleQuotedString(_)
| Token::DollarQuotedString(_)
| Token::NationalStringLiteral(_)
| Token::HexStringLiteral(_) => {
self.prev_token();
Expand Down Expand Up @@ -4104,6 +4105,7 @@ impl<'a> Parser<'a> {
},
Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())),
Token::DoubleQuotedString(ref s) => Ok(Value::DoubleQuotedString(s.to_string())),
Token::DollarQuotedString(ref s) => Ok(Value::DollarQuotedString(s.clone())),
Token::NationalStringLiteral(ref s) => Ok(Value::NationalStringLiteral(s.to_string())),
Token::EscapedStringLiteral(ref s) => Ok(Value::EscapedStringLiteral(s.to_string())),
Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())),
Expand Down Expand Up @@ -4148,24 +4150,9 @@ impl<'a> Parser<'a> {
pub fn parse_function_definition(&mut self) -> Result<FunctionDefinition, ParserError> {
let peek_token = self.peek_token();
match peek_token.token {
Token::DoubleDollarQuoting if dialect_of!(self is PostgreSqlDialect) => {
Token::DollarQuotedString(value) if dialect_of!(self is PostgreSqlDialect) => {
self.next_token();
let mut func_desc = String::new();
loop {
if let Some(next_token) = self.next_token_no_skip() {
match &next_token.token {
Token::DoubleDollarQuoting => break,
Token::EOF => {
return self.expected(
"literal string",
TokenWithLocation::wrap(Token::EOF),
);
}
token => func_desc.push_str(token.to_string().as_str()),
}
}
}
Ok(FunctionDefinition::DoubleDollarDef(func_desc))
Ok(FunctionDefinition::DoubleDollarDef(value.value))
}
_ => Ok(FunctionDefinition::SingleQuotedDef(
self.parse_literal_string()?,
Expand Down Expand Up @@ -4712,7 +4699,7 @@ impl<'a> Parser<'a> {
}

/// Parse a query expression, i.e. a `SELECT` statement optionally
/// preceeded with some `WITH` CTE declarations and optionally followed
/// preceded with some `WITH` CTE declarations and optionally followed
/// by `ORDER BY`. Unlike some other parse_... methods, this one doesn't
/// expect the initial keyword to be already consumed
pub fn parse_query(&mut self) -> Result<Query, ParserError> {
Expand Down
111 changes: 97 additions & 14 deletions src/tokenizer.rs
Expand Up @@ -34,6 +34,7 @@ use serde::{Deserialize, Serialize};
#[cfg(feature = "visitor")]
use sqlparser_derive::Visit;

use crate::ast::DollarQuotedString;
use crate::dialect::SnowflakeDialect;
use crate::dialect::{Dialect, MySqlDialect};
use crate::keywords::{Keyword, ALL_KEYWORDS, ALL_KEYWORDS_INDEX};
Expand All @@ -55,6 +56,8 @@ pub enum Token {
SingleQuotedString(String),
/// Double quoted string: i.e: "string"
DoubleQuotedString(String),
/// Dollar quoted string: i.e: $$string$$ or $tag_name$string$tag_name$
DollarQuotedString(DollarQuotedString),
/// "National" string literal: i.e: N'string'
NationalStringLiteral(String),
/// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second'
Expand Down Expand Up @@ -149,8 +152,6 @@ pub enum Token {
PGCubeRoot,
/// `?` or `$` , a prepared statement arg placeholder
Placeholder(String),
/// `$$`, used for PostgreSQL create function definition
DoubleDollarQuoting,
/// ->, used as a operator to extract json field in PostgreSQL
Arrow,
/// ->>, used as a operator to extract json field as text in PostgreSQL
Expand Down Expand Up @@ -184,6 +185,7 @@ impl fmt::Display for Token {
Token::Char(ref c) => write!(f, "{}", c),
Token::SingleQuotedString(ref s) => write!(f, "'{}'", s),
Token::DoubleQuotedString(ref s) => write!(f, "\"{}\"", s),
Token::DollarQuotedString(ref s) => write!(f, "{}", s),
Token::NationalStringLiteral(ref s) => write!(f, "N'{}'", s),
Token::EscapedStringLiteral(ref s) => write!(f, "E'{}'", s),
Token::HexStringLiteral(ref s) => write!(f, "X'{}'", s),
Expand Down Expand Up @@ -236,7 +238,6 @@ impl fmt::Display for Token {
Token::HashArrow => write!(f, "#>"),
Token::HashLongArrow => write!(f, "#>>"),
Token::AtArrow => write!(f, "@>"),
Token::DoubleDollarQuoting => write!(f, "$$"),
Token::ArrowAt => write!(f, "<@"),
Token::HashMinus => write!(f, "#-"),
Token::AtQuestion => write!(f, "@?"),
Expand Down Expand Up @@ -837,17 +838,8 @@ impl<'a> Tokenizer<'a> {
let s = peeking_take_while(chars, |ch| ch.is_numeric());
Ok(Some(Token::Placeholder(String::from("?") + &s)))
}
'$' => {
chars.next();
match chars.peek() {
Some('$') => self.consume_and_return(chars, Token::DoubleDollarQuoting),
_ => {
let s =
peeking_take_while(chars, |ch| ch.is_alphanumeric() || ch == '_');
Ok(Some(Token::Placeholder(String::from("$") + &s)))
}
}
}
'$' => Ok(Some(self.tokenize_dollar_preceded_value(chars)?)),

//whitespace check (including unicode chars) should be last as it covers some of the chars above
ch if ch.is_whitespace() => {
self.consume_and_return(chars, Token::Whitespace(Whitespace::Space))
Expand All @@ -858,6 +850,97 @@ impl<'a> Tokenizer<'a> {
}
}

/// Tokenize dollar preceded value (i.e: a string/placeholder)
fn tokenize_dollar_preceded_value(&self, chars: &mut State) -> Result<Token, TokenizerError> {
let mut s = String::new();
let mut value = String::new();

chars.next();

if let Some('$') = chars.peek() {
chars.next();

let mut is_terminated = false;
let mut prev: Option<char> = None;

while let Some(&ch) = chars.peek() {
if prev == Some('$') {
if ch == '$' {
chars.next();
is_terminated = true;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

break;
} else {
s.push('$');
s.push(ch);
}
} else if ch != '$' {
s.push(ch);
}

prev = Some(ch);
chars.next();
}

return if chars.peek().is_none() && !is_terminated {
self.tokenizer_error(chars.location(), "Unterminated dollar-quoted string")
} else {
Ok(Token::DollarQuotedString(DollarQuotedString {
value: s,
tag: None,
}))
};
} else {
value.push_str(&peeking_take_while(chars, |ch| {
ch.is_alphanumeric() || ch == '_'
}));

if let Some('$') = chars.peek() {
chars.next();
s.push_str(&peeking_take_while(chars, |ch| ch != '$'));

match chars.peek() {
Some('$') => {
chars.next();
for (_, c) in value.chars().enumerate() {
let next_char = chars.next();
if Some(c) != next_char {
return self.tokenizer_error(
chars.location(),
format!(
"Unterminated dollar-quoted string at or near \"{}\"",
value
),
);
}
}

if let Some('$') = chars.peek() {
chars.next();
} else {
return self.tokenizer_error(
chars.location(),
"Unterminated dollar-quoted string, expected $",
);
}
}
_ => {
return self.tokenizer_error(
chars.location(),
"Unterminated dollar-quoted, expected $",
);
}
}
} else {
return Ok(Token::Placeholder(String::from("$") + &value));
}
}

Ok(Token::DollarQuotedString(DollarQuotedString {
value: s,
tag: if value.is_empty() { None } else { Some(value) },
}))
}

fn tokenizer_error<R>(
&self,
loc: Location,
Expand Down
81 changes: 81 additions & 0 deletions tests/sqlparser_postgres.rs
Expand Up @@ -2507,3 +2507,84 @@ fn parse_drop_function() {
}
);
}

#[test]
fn parse_dollar_quoted_string() {
let sql = "SELECT $$hello$$, $tag_name$world$tag_name$, $$Foo$Bar$$, $$Foo$Bar$$col_name, $$$$, $tag_name$$tag_name$";

let stmt = pg().parse_sql_statements(sql).unwrap();

let projection = match stmt.get(0).unwrap() {
Statement::Query(query) => match &*query.body {
SetExpr::Select(select) => &select.projection,
_ => unreachable!(),
},
_ => unreachable!(),
};

assert_eq!(
&Expr::Value(Value::DollarQuotedString(DollarQuotedString {
tag: None,
value: "hello".into()
})),
expr_from_projection(&projection[0])
);

assert_eq!(
&Expr::Value(Value::DollarQuotedString(DollarQuotedString {
tag: Some("tag_name".into()),
value: "world".into()
})),
expr_from_projection(&projection[1])
);

assert_eq!(
&Expr::Value(Value::DollarQuotedString(DollarQuotedString {
tag: None,
value: "Foo$Bar".into()
})),
expr_from_projection(&projection[2])
);

assert_eq!(
projection[3],
SelectItem::ExprWithAlias {
expr: Expr::Value(Value::DollarQuotedString(DollarQuotedString {
tag: None,
value: "Foo$Bar".into(),
})),
alias: Ident {
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

👍

value: "col_name".into(),
quote_style: None,
},
}
);

assert_eq!(
expr_from_projection(&projection[4]),
&Expr::Value(Value::DollarQuotedString(DollarQuotedString {
tag: None,
value: "".into()
})),
);

assert_eq!(
expr_from_projection(&projection[5]),
&Expr::Value(Value::DollarQuotedString(DollarQuotedString {
tag: Some("tag_name".into()),
value: "".into()
})),
);
}

#[test]
fn parse_incorrect_dollar_quoted_string() {
let sql = "SELECT $x$hello$$";
assert!(pg().parse_sql_statements(sql).is_err());

let sql = "SELECT $hello$$";
assert!(pg().parse_sql_statements(sql).is_err());

let sql = "SELECT $$$";
assert!(pg().parse_sql_statements(sql).is_err());
}