From 3d1b5fc611c88bb9dd9d8f9dd0c47d82a5bb0d23 Mon Sep 17 00:00:00 2001 From: AugustoFKL Date: Sun, 9 Oct 2022 13:54:43 -0300 Subject: [PATCH] Add support for unit on char length units for small character string types. This results in complete support for ANSI CHARACTER, CHAR, CHARACTER VARYING, CHAR VARYING, and VARCHAR. --- src/ast/data_type.rs | 84 +++++++++++++++---- src/ast/mod.rs | 6 +- src/keywords.rs | 2 + src/parser.rs | 156 +++++++++++++++++++++++++++++++++--- tests/sqlparser_common.rs | 15 +++- tests/sqlparser_postgres.rs | 15 +++- 6 files changed, 244 insertions(+), 34 deletions(-) diff --git a/src/ast/data_type.rs b/src/ast/data_type.rs index 0e3d4552d..baa23acf2 100644 --- a/src/ast/data_type.rs +++ b/src/ast/data_type.rs @@ -26,15 +26,15 @@ use super::value::escape_single_quote_string; #[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] pub enum DataType { /// Fixed-length character type e.g. CHARACTER(10) - Character(Option), + Character(Option), /// Fixed-length char type e.g. CHAR(10) - Char(Option), + Char(Option), /// Character varying type e.g. CHARACTER VARYING(10) - CharacterVarying(Option), + CharacterVarying(Option), /// Char varying type e.g. CHAR VARYING(10) - CharVarying(Option), + CharVarying(Option), /// Variable-length character type e.g. VARCHAR(10) - Varchar(Option), + Varchar(Option), /// Variable-length character type e.g. NVARCHAR(10) Nvarchar(Option), /// Uuid type @@ -133,17 +133,14 @@ pub enum DataType { impl fmt::Display for DataType { fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { match self { - DataType::Character(size) => { - format_type_with_optional_length(f, "CHARACTER", size, false) - } - DataType::Char(size) => format_type_with_optional_length(f, "CHAR", size, false), + DataType::Character(size) => format_character_string_type(f, "CHARACTER", size), + DataType::Char(size) => format_character_string_type(f, "CHAR", size), DataType::CharacterVarying(size) => { - format_type_with_optional_length(f, "CHARACTER VARYING", size, false) - } - DataType::CharVarying(size) => { - format_type_with_optional_length(f, "CHAR VARYING", size, false) + format_character_string_type(f, "CHARACTER VARYING", size) } - DataType::Varchar(size) => format_type_with_optional_length(f, "VARCHAR", size, false), + + DataType::CharVarying(size) => format_character_string_type(f, "CHAR VARYING", size), + DataType::Varchar(size) => format_character_string_type(f, "VARCHAR", size), DataType::Nvarchar(size) => { format_type_with_optional_length(f, "NVARCHAR", size, false) } @@ -247,6 +244,18 @@ fn format_type_with_optional_length( Ok(()) } +fn format_character_string_type( + f: &mut fmt::Formatter, + sql_type: &str, + size: &Option, +) -> fmt::Result { + write!(f, "{}", sql_type)?; + if let Some(size) = size { + write!(f, "({})", size)?; + } + Ok(()) +} + /// Timestamp and Time data types information about TimeZone formatting. /// /// This is more related to a display information than real differences between each variant. To @@ -324,3 +333,50 @@ impl fmt::Display for ExactNumberInfo { } } } + +/// Information about [character length][1], including length and possibly unit. +/// +/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#character-length +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub struct CharacterLength { + /// Default (if VARYING) or maximum (if not VARYING) length + pub length: u64, + /// Optional unit. If not informed, the ANSI handles it as CHARACTERS implicitly + pub unit: Option, +} + +impl fmt::Display for CharacterLength { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}", self.length)?; + if let Some(unit) = &self.unit { + write!(f, " {}", unit)?; + } + Ok(()) + } +} + +/// Possible units for characters, initially based on 2016 ANSI [standard][1]. +/// +/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#char-length-units +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))] +pub enum CharLengthUnits { + /// CHARACTERS unit + Characters, + /// OCTETS unit + Octets, +} + +impl fmt::Display for CharLengthUnits { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Characters => { + write!(f, "CHARACTERS") + } + Self::Octets => { + write!(f, "OCTETS") + } + } + } +} diff --git a/src/ast/mod.rs b/src/ast/mod.rs index 7f9d42f05..82e9eff53 100644 --- a/src/ast/mod.rs +++ b/src/ast/mod.rs @@ -29,9 +29,9 @@ use core::fmt; #[cfg(feature = "serde")] use serde::{Deserialize, Serialize}; -pub use self::data_type::DataType; -pub use self::data_type::ExactNumberInfo; -pub use self::data_type::TimezoneInfo; +pub use self::data_type::{ + CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo, +}; pub use self::ddl::{ AlterColumnOperation, AlterTableOperation, ColumnDef, ColumnOption, ColumnOptionDef, ReferentialAction, TableConstraint, diff --git a/src/keywords.rs b/src/keywords.rs index b84b4cf9d..0b6b06b33 100644 --- a/src/keywords.rs +++ b/src/keywords.rs @@ -123,6 +123,7 @@ define_keywords!( CHANGE, CHAR, CHARACTER, + CHARACTERS, CHARACTER_LENGTH, CHARSET, CHAR_LENGTH, @@ -372,6 +373,7 @@ define_keywords!( NVARCHAR, OBJECT, OCCURRENCES_REGEX, + OCTETS, OCTET_LENGTH, OF, OFFSET, diff --git a/src/parser.rs b/src/parser.rs index cb261e183..b065f42b6 100644 --- a/src/parser.rs +++ b/src/parser.rs @@ -3426,20 +3426,24 @@ impl<'a> Parser<'a> { Ok(DataType::BigInt(optional_precision?)) } } - Keyword::VARCHAR => Ok(DataType::Varchar(self.parse_optional_precision()?)), + Keyword::VARCHAR => Ok(DataType::Varchar(self.parse_optional_character_length()?)), Keyword::NVARCHAR => Ok(DataType::Nvarchar(self.parse_optional_precision()?)), Keyword::CHARACTER => { if self.parse_keyword(Keyword::VARYING) { - Ok(DataType::CharacterVarying(self.parse_optional_precision()?)) + Ok(DataType::CharacterVarying( + self.parse_optional_character_length()?, + )) } else { - Ok(DataType::Character(self.parse_optional_precision()?)) + Ok(DataType::Character(self.parse_optional_character_length()?)) } } Keyword::CHAR => { if self.parse_keyword(Keyword::VARYING) { - Ok(DataType::CharVarying(self.parse_optional_precision()?)) + Ok(DataType::CharVarying( + self.parse_optional_character_length()?, + )) } else { - Ok(DataType::Char(self.parse_optional_precision()?)) + Ok(DataType::Char(self.parse_optional_character_length()?)) } } Keyword::CLOB => Ok(DataType::Clob(self.parse_optional_precision()?)), @@ -3680,6 +3684,31 @@ impl<'a> Parser<'a> { } } + pub fn parse_optional_character_length( + &mut self, + ) -> Result, ParserError> { + if self.consume_token(&Token::LParen) { + let character_length = self.parse_character_length()?; + self.expect_token(&Token::RParen)?; + Ok(Some(character_length)) + } else { + Ok(None) + } + } + + pub fn parse_character_length(&mut self) -> Result { + let length = self.parse_literal_uint()?; + let unit = if self.parse_keyword(Keyword::CHARACTERS) { + Some(CharLengthUnits::Characters) + } else if self.parse_keyword(Keyword::OCTETS) { + Some(CharLengthUnits::Octets) + } else { + None + }; + + Ok(CharacterLength { length, unit }) + } + pub fn parse_optional_precision_scale( &mut self, ) -> Result<(Option, Option), ParserError> { @@ -5332,7 +5361,9 @@ mod tests { #[cfg(test)] mod test_parse_data_type { - use crate::ast::{DataType, ExactNumberInfo, TimezoneInfo}; + use crate::ast::{ + CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo, + }; use crate::dialect::{AnsiDialect, GenericDialect}; use crate::test_utils::TestedDialects; @@ -5355,21 +5386,124 @@ mod tests { test_parse_data_type!(dialect, "CHARACTER", DataType::Character(None)); - test_parse_data_type!(dialect, "CHARACTER(20)", DataType::Character(Some(20))); + test_parse_data_type!( + dialect, + "CHARACTER(20)", + DataType::Character(Some(CharacterLength { + length: 20, + unit: None + })) + ); + + test_parse_data_type!( + dialect, + "CHARACTER(20 CHARACTERS)", + DataType::Character(Some(CharacterLength { + length: 20, + unit: Some(CharLengthUnits::Characters) + })) + ); + + test_parse_data_type!( + dialect, + "CHARACTER(20 OCTETS)", + DataType::Character(Some(CharacterLength { + length: 20, + unit: Some(CharLengthUnits::Octets) + })) + ); test_parse_data_type!(dialect, "CHAR", DataType::Char(None)); - test_parse_data_type!(dialect, "CHAR(20)", DataType::Char(Some(20))); + test_parse_data_type!( + dialect, + "CHAR(20)", + DataType::Char(Some(CharacterLength { + length: 20, + unit: None + })) + ); + + test_parse_data_type!( + dialect, + "CHAR(20 CHARACTERS)", + DataType::Char(Some(CharacterLength { + length: 20, + unit: Some(CharLengthUnits::Characters) + })) + ); + + test_parse_data_type!( + dialect, + "CHAR(20 OCTETS)", + DataType::Char(Some(CharacterLength { + length: 20, + unit: Some(CharLengthUnits::Octets) + })) + ); test_parse_data_type!( dialect, "CHARACTER VARYING(20)", - DataType::CharacterVarying(Some(20)) + DataType::CharacterVarying(Some(CharacterLength { + length: 20, + unit: None + })) + ); + + test_parse_data_type!( + dialect, + "CHARACTER VARYING(20 CHARACTERS)", + DataType::CharacterVarying(Some(CharacterLength { + length: 20, + unit: Some(CharLengthUnits::Characters) + })) + ); + + test_parse_data_type!( + dialect, + "CHARACTER VARYING(20 OCTETS)", + DataType::CharacterVarying(Some(CharacterLength { + length: 20, + unit: Some(CharLengthUnits::Octets) + })) ); - test_parse_data_type!(dialect, "CHAR VARYING(20)", DataType::CharVarying(Some(20))); + test_parse_data_type!( + dialect, + "CHAR VARYING(20)", + DataType::CharVarying(Some(CharacterLength { + length: 20, + unit: None + })) + ); + + test_parse_data_type!( + dialect, + "CHAR VARYING(20 CHARACTERS)", + DataType::CharVarying(Some(CharacterLength { + length: 20, + unit: Some(CharLengthUnits::Characters) + })) + ); - test_parse_data_type!(dialect, "VARCHAR(20)", DataType::Varchar(Some(20))); + test_parse_data_type!( + dialect, + "CHAR VARYING(20 OCTETS)", + DataType::CharVarying(Some(CharacterLength { + length: 20, + unit: Some(CharLengthUnits::Octets) + })) + ); + + test_parse_data_type!( + dialect, + "VARCHAR(20)", + DataType::Varchar(Some(CharacterLength { + length: 20, + unit: None + })) + ); } #[test] diff --git a/tests/sqlparser_common.rs b/tests/sqlparser_common.rs index 7654d677e..aa4013394 100644 --- a/tests/sqlparser_common.rs +++ b/tests/sqlparser_common.rs @@ -1945,7 +1945,10 @@ fn parse_create_table() { vec![ ColumnDef { name: "name".into(), - data_type: DataType::Varchar(Some(100)), + data_type: DataType::Varchar(Some(CharacterLength { + length: 100, + unit: None + })), collation: None, options: vec![ColumnOptionDef { name: None, @@ -2401,7 +2404,10 @@ fn parse_create_external_table() { vec![ ColumnDef { name: "name".into(), - data_type: DataType::Varchar(Some(100)), + data_type: DataType::Varchar(Some(CharacterLength { + length: 100, + unit: None + })), collation: None, options: vec![ColumnOptionDef { name: None, @@ -2469,7 +2475,10 @@ fn parse_create_or_replace_external_table() { columns, vec![ColumnDef { name: "name".into(), - data_type: DataType::Varchar(Some(100)), + data_type: DataType::Varchar(Some(CharacterLength { + length: 100, + unit: None + })), collation: None, options: vec![ColumnOptionDef { name: None, diff --git a/tests/sqlparser_postgres.rs b/tests/sqlparser_postgres.rs index c589feec5..74079ae38 100644 --- a/tests/sqlparser_postgres.rs +++ b/tests/sqlparser_postgres.rs @@ -74,7 +74,10 @@ fn parse_create_table_with_defaults() { }, ColumnDef { name: "first_name".into(), - data_type: DataType::CharacterVarying(Some(45)), + data_type: DataType::CharacterVarying(Some(CharacterLength { + length: 45, + unit: None + })), collation: None, options: vec![ColumnOptionDef { name: None, @@ -83,7 +86,10 @@ fn parse_create_table_with_defaults() { }, ColumnDef { name: "last_name".into(), - data_type: DataType::CharacterVarying(Some(45)), + data_type: DataType::CharacterVarying(Some(CharacterLength { + length: 45, + unit: None + })), collation: Some(ObjectName(vec![Ident::with_quote('"', "es_ES")])), options: vec![ColumnOptionDef { name: None, @@ -92,7 +98,10 @@ fn parse_create_table_with_defaults() { }, ColumnDef { name: "email".into(), - data_type: DataType::CharacterVarying(Some(50)), + data_type: DataType::CharacterVarying(Some(CharacterLength { + length: 50, + unit: None + })), collation: None, options: vec![], },