diff --git a/src/ast/value.rs b/src/ast/value.rs index 1401855f1..8337fbaa8 100644 --- a/src/ast/value.rs +++ b/src/ast/value.rs @@ -30,6 +30,9 @@ pub enum Value { Number(BigDecimal, bool), /// 'string value' SingleQuotedString(String), + /// e'string value' (postgres extension) + /// write!(f, "{}{long}", v, long = if *l { "L" } else { "" }), Value::DoubleQuotedString(v) => write!(f, "\"{}\"", v), Value::SingleQuotedString(v) => write!(f, "'{}'", escape_single_quote_string(v)), + Value::EscapedStringLiteral(v) => write!(f, "E'{}'", escape_escaped_string(v)), Value::NationalStringLiteral(v) => write!(f, "N'{}'", v), Value::HexStringLiteral(v) => write!(f, "X'{}'", v), Value::Boolean(v) => write!(f, "{}", v), @@ -193,6 +197,40 @@ pub fn escape_single_quote_string(s: &str) -> EscapeSingleQuoteString<'_> { EscapeSingleQuoteString(s) } +pub struct EscapeEscapedStringLiteral<'a>(&'a str); + +impl<'a> fmt::Display for EscapeEscapedStringLiteral<'a> { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + for c in self.0.chars() { + match c { + '\'' => { + write!(f, r#"\'"#)?; + } + '\\' => { + write!(f, r#"\\"#)?; + } + '\n' => { + write!(f, r#"\n"#)?; + } + '\t' => { + write!(f, r#"\t"#)?; + } + '\r' => { + write!(f, r#"\r"#)?; + } + _ => { + write!(f, "{}", c)?; + } + } + } + Ok(()) + } +} + +pub fn escape_escaped_string(s: &str) -> EscapeEscapedStringLiteral<'_> { + EscapeEscapedStringLiteral(s) +} + #[derive(Debug, Clone, PartialEq, Eq, Hash)] #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum TrimWhereField { diff --git a/src/parser.rs b/src/parser.rs index 5ee3d5cb6..2c3f4875d 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -496,6 +496,11 @@ impl<'a> Parser<'a> { expr: Box::new(self.parse_subexpr(Self::PLUS_MINUS_PREC)?), }) } + Token::EscapedStringLiteral(_) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => + { + self.prev_token(); + Ok(Expr::Value(self.parse_value()?)) + } Token::Number(_, _) | Token::SingleQuotedString(_) | Token::NationalStringLiteral(_) @@ -901,6 +906,7 @@ impl<'a> Parser<'a> { None } Token::SingleQuotedString(_) + | Token::EscapedStringLiteral(_) | Token::NationalStringLiteral(_) | Token::HexStringLiteral(_) => Some(Box::new(self.parse_expr()?)), unexpected => { @@ -2565,6 +2571,7 @@ impl<'a> Parser<'a> { }, Token::SingleQuotedString(ref s) => Ok(Value::SingleQuotedString(s.to_string())), Token::NationalStringLiteral(ref s) => Ok(Value::NationalStringLiteral(s.to_string())), + Token::EscapedStringLiteral(ref s) => Ok(Value::EscapedStringLiteral(s.to_string())), Token::HexStringLiteral(ref s) => Ok(Value::HexStringLiteral(s.to_string())), Token::Placeholder(ref s) => Ok(Value::Placeholder(s.to_string())), unexpected => self.expected("a value", unexpected), @@ -2596,6 +2603,9 @@ impl<'a> Parser<'a> { match self.next_token() { Token::Word(Word { value, keyword, .. }) if keyword == Keyword::NoKeyword => Ok(value), Token::SingleQuotedString(s) => Ok(s), + Token::EscapedStringLiteral(s) if dialect_of!(self is PostgreSqlDialect | GenericDialect) => { + Ok(s) + } unexpected => self.expected("literal string", unexpected), } } diff --git a/src/tokenizer.rs b/src/tokenizer.rs index 3fb0f66b2..91cb16a80 100644 --- a/src/tokenizer.rs +++ b/src/tokenizer.rs @@ -51,6 +51,8 @@ pub enum Token { SingleQuotedString(String), /// "National" string literal: i.e: N'string' NationalStringLiteral(String), + /// "escaped" string literal, which are an extension to the SQL standard: i.e: e'first \n second' or E 'first \n second' + EscapedStringLiteral(String), /// Hexadecimal string literal: i.e.: X'deadbeef' HexStringLiteral(String), /// Comma @@ -160,6 +162,7 @@ impl fmt::Display for Token { Token::Char(ref c) => write!(f, "{}", c), Token::SingleQuotedString(ref s) => write!(f, "'{}'", s), Token::NationalStringLiteral(ref s) => write!(f, "N'{}'", s), + Token::EscapedStringLiteral(ref s) => write!(f, "E'{}'", s), Token::HexStringLiteral(ref s) => write!(f, "X'{}'", s), Token::Comma => f.write_str(","), Token::Whitespace(ws) => write!(f, "{}", ws), @@ -392,6 +395,21 @@ impl<'a> Tokenizer<'a> { } } } + // PostgreSQL accepts "escape" string constants, which are an extension to the SQL standard. + x @ 'e' | x @ 'E' => { + chars.next(); // consume, to check the next char + match chars.peek() { + Some('\'') => { + let s = self.tokenize_escaped_single_quoted_string(chars)?; + Ok(Some(Token::EscapedStringLiteral(s))) + } + _ => { + // regular identifier starting with an "E" or "e" + let s = self.tokenize_word(x, chars); + Ok(Some(Token::make_word(&s, None))) + } + } + } // The spec only allows an uppercase 'X' to introduce a hex // string, but PostgreSQL, at least, allows a lowercase 'x' too. x @ 'x' | x @ 'X' => { @@ -690,6 +708,66 @@ impl<'a> Tokenizer<'a> { s } + /// Read a single quoted string, starting with the opening quote. + fn tokenize_escaped_single_quoted_string( + &self, + chars: &mut Peekable>, + ) -> Result { + let mut s = String::new(); + chars.next(); // consume the opening quote + + // slash escaping + let mut is_escaped = false; + while let Some(&ch) = chars.peek() { + macro_rules! escape_control_character { + ($ESCAPED:expr) => {{ + if is_escaped { + s.push($ESCAPED); + is_escaped = false; + } else { + s.push(ch); + } + + chars.next(); + }}; + } + + match ch { + '\'' => { + chars.next(); // consume + if is_escaped { + s.push(ch); + is_escaped = false; + } else if chars.peek().map(|c| *c == '\'').unwrap_or(false) { + s.push(ch); + chars.next(); + } else { + return Ok(s); + } + } + '\\' => { + if is_escaped { + s.push('\\'); + is_escaped = false; + } else { + is_escaped = true; + } + + chars.next(); + } + 'r' => escape_control_character!('\r'), + 'n' => escape_control_character!('\n'), + 't' => escape_control_character!('\t'), + _ => { + is_escaped = false; + chars.next(); // consume + s.push(ch); + } + } + } + self.tokenizer_error("Unterminated encoded string literal") + } + /// Read a single quoted string, starting with the opening quote. fn tokenize_single_quoted_string( &self, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index 69b7fcfa4..00594ddb4 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -1467,3 +1467,44 @@ fn pg_and_generic() -> TestedDialects { dialects: vec![Box::new(PostgreSqlDialect {}), Box::new(GenericDialect {})], } } + +#[test] +fn parse_escaped_literal_string() { + let sql = + r#"SELECT E's1 \n s1', E's2 \\n s2', E's3 \\\n s3', E's4 \\\\n s4', E'\'', E'foo \\'"#; + let select = pg_and_generic().verified_only_select(sql); + assert_eq!(6, select.projection.len()); + assert_eq!( + &Expr::Value(Value::EscapedStringLiteral("s1 \n s1".to_string())), + expr_from_projection(&select.projection[0]) + ); + assert_eq!( + &Expr::Value(Value::EscapedStringLiteral("s2 \\n s2".to_string())), + expr_from_projection(&select.projection[1]) + ); + assert_eq!( + &Expr::Value(Value::EscapedStringLiteral("s3 \\\n s3".to_string())), + expr_from_projection(&select.projection[2]) + ); + assert_eq!( + &Expr::Value(Value::EscapedStringLiteral("s4 \\\\n s4".to_string())), + expr_from_projection(&select.projection[3]) + ); + assert_eq!( + &Expr::Value(Value::EscapedStringLiteral("'".to_string())), + expr_from_projection(&select.projection[4]) + ); + assert_eq!( + &Expr::Value(Value::EscapedStringLiteral("foo \\".to_string())), + expr_from_projection(&select.projection[5]) + ); + + let sql = r#"SELECT E'\'"#; + assert_eq!( + pg_and_generic() + .parse_sql_statements(sql) + .unwrap_err() + .to_string(), + "sql parser error: Unterminated encoded string literal at Line: 1, Column 8" + ); +}