Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Support escaped string literals (PostgreSQL) #502

Merged
merged 9 commits into from May 25, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
38 changes: 38 additions & 0 deletions src/ast/value.rs
Expand Up @@ -30,6 +30,9 @@ pub enum Value {
Number(BigDecimal, bool),
/// 'string value'
SingleQuotedString(String),
/// e'string value' (postgres extension)
/// <https://www.postgresql.org/docs/8.3/sql-syntax-lexical.html#SQL-SYNTAX-STRINGS
EscapedStringLiteral(String),
/// N'string value'
NationalStringLiteral(String),
/// X'hex value'
Expand Down Expand Up @@ -69,6 +72,7 @@ impl fmt::Display for Value {
Value::Number(v, l) => write!(f, "{}{long}", v, long = if *l { "L" } else { "" }),
Value::DoubleQuotedString(v) => write!(f, "\"{}\"", v),
Value::SingleQuotedString(v) => write!(f, "'{}'", escape_single_quote_string(v)),
Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)),
Value::NationalStringLiteral(v) => write!(f, "N'{}'", v),
Value::HexStringLiteral(v) => write!(f, "X'{}'", v),
Value::Boolean(v) => write!(f, "{}", v),
Expand Down Expand Up @@ -193,6 +197,40 @@ pub fn escape_single_quote_string(s: &str) -> EscapeSingleQuoteString<'_> {
EscapeSingleQuoteString(s)
}

pub struct EscapeEscapedStringLiteral<'a>(&'a str);

impl<'a> fmt::Display for EscapeEscapedStringLiteral<'a> {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
for c in self.0.chars() {
match c {
'\'' => {
write!(f, r#"\'"#)?;
}
'\\' => {
write!(f, r#"\\"#)?;
}
'\n' => {
write!(f, r#"\n"#)?;
}
'\t' => {
write!(f, r#"\t"#)?;
}
'\r' => {
write!(f, r#"\r"#)?;
}
_ => {
write!(f, "{}", c)?;
}
}
}
Ok(())
}
}

pub fn escape_escaped_string(s: &str) -> EscapeEscapedStringLiteral<'_> {
EscapeEscapedStringLiteral(s)
}

#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum TrimWhereField {
Expand Down
10 changes: 10 additions & 0 deletions src/parser.rs
Expand Up @@ -496,6 +496,11 @@ impl<'a> Parser<'a> {
expr: Box::new(self.parse_subexpr(Self::PLUS_MINUS_PREC)?),
})
}
Token::EscapedStringLiteral(_) if dialect_of!(self is PostgreSqlDialect | GenericDialect) =>
{
self.prev_token();
Ok(Expr::Value(self.parse_value()?))
}
Token::Number(_, _)
| Token::SingleQuotedString(_)
| Token::NationalStringLiteral(_)
Expand Down Expand Up @@ -901,6 +906,7 @@ impl<'a> Parser<'a> {
None
}
Token::SingleQuotedString(_)
| Token::EscapedStringLiteral(_)
| Token::NationalStringLiteral(_)
| Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)),
unexpected => {
Expand Down Expand Up @@ -2565,6 +2571,7 @@ impl<'a> Parser<'a> {
},
Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())),
Token::NationalStringLiteral(ref s) => Ok(Value::NationalStringLiteral(s.to_string())),
Token::EscapedStringLiteral(ref s) => Ok(Value::EscapedStringLiteral(s.to_string())),
Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())),
Token::Placeholder(ref s) => Ok(Value::Placeholder(s.to_string())),
unexpected => self.expected("a value", unexpected),
Expand Down Expand Up @@ -2596,6 +2603,9 @@ impl<'a> Parser<'a> {
match self.next_token() {
Token::Word(Word { value, keyword, .. }) if keyword == Keyword::NoKeyword => Ok(value),
Token::SingleQuotedString(s) => Ok(s),
Token::EscapedStringLiteral(s) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => {
Ok(s)
}
unexpected => self.expected("literal string", unexpected),
}
}
Expand Down
78 changes: 78 additions & 0 deletions src/tokenizer.rs
Expand Up @@ -51,6 +51,8 @@ pub enum Token {
SingleQuotedString(String),
/// "National" string literal: i.e: N'string'
NationalStringLiteral(String),
/// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second'
EscapedStringLiteral(String),
/// Hexadecimal string literal: i.e.: X'deadbeef'
HexStringLiteral(String),
/// Comma
Expand Down Expand Up @@ -160,6 +162,7 @@ impl fmt::Display for Token {
Token::Char(ref c) => write!(f, "{}", c),
Token::SingleQuotedString(ref s) => write!(f, "'{}'", s),
Token::NationalStringLiteral(ref s) => write!(f, "N'{}'", s),
Token::EscapedStringLiteral(ref s) => write!(f, "E'{}'", s),
Token::HexStringLiteral(ref s) => write!(f, "X'{}'", s),
Token::Comma => f.write_str(","),
Token::Whitespace(ws) => write!(f, "{}", ws),
Expand Down Expand Up @@ -392,6 +395,21 @@ impl<'a> Tokenizer<'a> {
}
}
}
// PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard.
x @ 'e' | x @ 'E' => {
chars.next(); // consume, to check the next char
match chars.peek() {
Some('\'') => {
let s = self.tokenize_escaped_single_quoted_string(chars)?;
Ok(Some(Token::EscapedStringLiteral(s)))
}
_ => {
// regular identifier starting with an "E" or "e"
let s = self.tokenize_word(x, chars);
Ok(Some(Token::make_word(&s, None)))
}
}
}
// The spec only allows an uppercase 'X' to introduce a hex
// string, but PostgreSQL, at least, allows a lowercase 'x' too.
x @ 'x' | x @ 'X' => {
Expand Down Expand Up @@ -690,6 +708,66 @@ impl<'a> Tokenizer<'a> {
s
}

/// Read a single quoted string, starting with the opening quote.
fn tokenize_escaped_single_quoted_string(
&self,
chars: &mut Peekable<Chars<'_>>,
) -> Result<String, TokenizerError> {
let mut s = String::new();
chars.next(); // consume the opening quote

// slash escaping
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

FYIW this example from stack overflow looks like it might be a nice way to avoid macro overhead (and thus code bloat): https://stackoverflow.com/questions/58551211/how-do-i-interpret-escaped-characters-in-a-string

Copy link
Contributor Author

@ovr ovr May 24, 2022

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's not the same because this function tries to find & escapes the string from the query. It tries to find a single quote that can be escaped or not escaped (end of the string).

in our case string are wrapped in single quotes, i.e e'str'

let mut is_escaped = false;
while let Some(&ch) = chars.peek() {
macro_rules! escape_control_character {
($ESCAPED:expr) => {{
if is_escaped {
s.push($ESCAPED);
is_escaped = false;
} else {
s.push(ch);
}

chars.next();
}};
}

match ch {
'\'' => {
chars.next(); // consume
if is_escaped {
s.push(ch);
is_escaped = false;
} else if chars.peek().map(|c| *c == '\'').unwrap_or(false) {
s.push(ch);
chars.next();
} else {
return Ok(s);
}
}
'\\' => {
if is_escaped {
s.push('\\');
is_escaped = false;
} else {
is_escaped = true;
}

chars.next();
}
'r' => escape_control_character!('\r'),
'n' => escape_control_character!('\n'),
't' => escape_control_character!('\t'),
_ => {
is_escaped = false;
chars.next(); // consume
s.push(ch);
}
}
}
self.tokenizer_error("Unterminated encoded string literal")
}

/// Read a single quoted string, starting with the opening quote.
fn tokenize_single_quoted_string(
&self,
Expand Down
41 changes: 41 additions & 0 deletions tests/sqlparser_postgres.rs
Expand Up @@ -1467,3 +1467,44 @@ fn pg_and_generic() -> TestedDialects {
dialects: vec![Box::new(PostgreSqlDialect {}), Box::new(GenericDialect {})],
}
}

#[test]
fn parse_escaped_literal_string() {
let sql =
r#"SELECT E's1 \n s1', E's2 \\n s2', E's3 \\\n s3', E's4 \\\\n s4', E'\'', E'foo \\'"#;
let select = pg_and_generic().verified_only_select(sql);
assert_eq!(6, select.projection.len());
assert_eq!(
&Expr::Value(Value::EscapedStringLiteral("s1 \n s1".to_string())),
expr_from_projection(&select.projection[0])
);
assert_eq!(
&Expr::Value(Value::EscapedStringLiteral("s2 \\n s2".to_string())),
expr_from_projection(&select.projection[1])
);
assert_eq!(
&Expr::Value(Value::EscapedStringLiteral("s3 \\\n s3".to_string())),
expr_from_projection(&select.projection[2])
);
assert_eq!(
&Expr::Value(Value::EscapedStringLiteral("s4 \\\\n s4".to_string())),
expr_from_projection(&select.projection[3])
);
assert_eq!(
&Expr::Value(Value::EscapedStringLiteral("'".to_string())),
expr_from_projection(&select.projection[4])
);
assert_eq!(
&Expr::Value(Value::EscapedStringLiteral("foo \\".to_string())),
expr_from_projection(&select.projection[5])
);

let sql = r#"SELECT E'\'"#;
assert_eq!(
pg_and_generic()
.parse_sql_statements(sql)
.unwrap_err()
.to_string(),
"sql parser error: Unterminated encoded string literal at Line: 1, Column 8"
);
}