Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Support optional unit (CHARACTER or OCTET) for character string types such as VARCHAR #663

Merged
merged 1 commit into from Oct 11, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
84 changes: 70 additions & 14 deletions src/ast/data_type.rs
Expand Up @@ -26,15 +26,15 @@ use super::value::escape_single_quote_string;
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum DataType {
/// Fixed-length character type e.g. CHARACTER(10)
Character(Option<u64>),
Character(Option<CharacterLength>),
/// Fixed-length char type e.g. CHAR(10)
Char(Option<u64>),
Char(Option<CharacterLength>),
/// Character varying type e.g. CHARACTER VARYING(10)
CharacterVarying(Option<u64>),
CharacterVarying(Option<CharacterLength>),
/// Char varying type e.g. CHAR VARYING(10)
CharVarying(Option<u64>),
CharVarying(Option<CharacterLength>),
/// Variable-length character type e.g. VARCHAR(10)
Varchar(Option<u64>),
Varchar(Option<CharacterLength>),
/// Variable-length character type e.g. NVARCHAR(10)
Nvarchar(Option<u64>),
/// Uuid type
Expand Down Expand Up @@ -133,17 +133,14 @@ pub enum DataType {
impl fmt::Display for DataType {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
match self {
DataType::Character(size) => {
format_type_with_optional_length(f, "CHARACTER", size, false)
}
DataType::Char(size) => format_type_with_optional_length(f, "CHAR", size, false),
DataType::Character(size) => format_character_string_type(f, "CHARACTER", size),
DataType::Char(size) => format_character_string_type(f, "CHAR", size),
DataType::CharacterVarying(size) => {
format_type_with_optional_length(f, "CHARACTER VARYING", size, false)
}
DataType::CharVarying(size) => {
format_type_with_optional_length(f, "CHAR VARYING", size, false)
format_character_string_type(f, "CHARACTER VARYING", size)
}
DataType::Varchar(size) => format_type_with_optional_length(f, "VARCHAR", size, false),

DataType::CharVarying(size) => format_character_string_type(f, "CHAR VARYING", size),
DataType::Varchar(size) => format_character_string_type(f, "VARCHAR", size),
DataType::Nvarchar(size) => {
format_type_with_optional_length(f, "NVARCHAR", size, false)
}
Expand Down Expand Up @@ -247,6 +244,18 @@ fn format_type_with_optional_length(
Ok(())
}

fn format_character_string_type(
f: &mut fmt::Formatter,
sql_type: &str,
size: &Option<CharacterLength>,
) -> fmt::Result {
write!(f, "{}", sql_type)?;
if let Some(size) = size {
write!(f, "({})", size)?;
}
Ok(())
}

/// Timestamp and Time data types information about TimeZone formatting.
///
/// This is more related to a display information than real differences between each variant. To
Expand Down Expand Up @@ -324,3 +333,50 @@ impl fmt::Display for ExactNumberInfo {
}
}
}

/// Information about [character length][1], including length and possibly unit.
///
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#character-length
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub struct CharacterLength {
/// Default (if VARYING) or maximum (if not VARYING) length
pub length: u64,
/// Optional unit. If not informed, the ANSI handles it as CHARACTERS implicitly
pub unit: Option<CharLengthUnits>,
}

impl fmt::Display for CharacterLength {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{}", self.length)?;
if let Some(unit) = &self.unit {
write!(f, " {}", unit)?;
}
Ok(())
}
}

/// Possible units for characters, initially based on 2016 ANSI [standard][1].
///
/// [1]: https://jakewheat.github.io/sql-overview/sql-2016-foundation-grammar.html#char-length-units
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
#[cfg_attr(feature = "serde", derive(Serialize, Deserialize))]
pub enum CharLengthUnits {
/// CHARACTERS unit
Characters,
/// OCTETS unit
Octets,
}

impl fmt::Display for CharLengthUnits {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Characters => {
write!(f, "CHARACTERS")
}
Self::Octets => {
write!(f, "OCTETS")
}
}
}
}
6 changes: 3 additions & 3 deletions src/ast/mod.rs
Expand Up @@ -29,9 +29,9 @@ use core::fmt;
#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};

pub use self::data_type::DataType;
pub use self::data_type::ExactNumberInfo;
pub use self::data_type::TimezoneInfo;
pub use self::data_type::{
CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo,
};
pub use self::ddl::{
AlterColumnOperation, AlterTableOperation, ColumnDef, ColumnOption, ColumnOptionDef,
ReferentialAction, TableConstraint,
Expand Down
2 changes: 2 additions & 0 deletions src/keywords.rs
Expand Up @@ -123,6 +123,7 @@ define_keywords!(
CHANGE,
CHAR,
CHARACTER,
CHARACTERS,
CHARACTER_LENGTH,
CHARSET,
CHAR_LENGTH,
Expand Down Expand Up @@ -372,6 +373,7 @@ define_keywords!(
NVARCHAR,
OBJECT,
OCCURRENCES_REGEX,
OCTETS,
OCTET_LENGTH,
OF,
OFFSET,
Expand Down
156 changes: 145 additions & 11 deletions src/parser.rs
Expand Up @@ -3426,20 +3426,24 @@ impl<'a> Parser<'a> {
Ok(DataType::BigInt(optional_precision?))
}
}
Keyword::VARCHAR => Ok(DataType::Varchar(self.parse_optional_precision()?)),
Keyword::VARCHAR => Ok(DataType::Varchar(self.parse_optional_character_length()?)),
Keyword::NVARCHAR => Ok(DataType::Nvarchar(self.parse_optional_precision()?)),
Keyword::CHARACTER => {
if self.parse_keyword(Keyword::VARYING) {
Ok(DataType::CharacterVarying(self.parse_optional_precision()?))
Ok(DataType::CharacterVarying(
self.parse_optional_character_length()?,
))
} else {
Ok(DataType::Character(self.parse_optional_precision()?))
Ok(DataType::Character(self.parse_optional_character_length()?))
}
}
Keyword::CHAR => {
if self.parse_keyword(Keyword::VARYING) {
Ok(DataType::CharVarying(self.parse_optional_precision()?))
Ok(DataType::CharVarying(
self.parse_optional_character_length()?,
))
} else {
Ok(DataType::Char(self.parse_optional_precision()?))
Ok(DataType::Char(self.parse_optional_character_length()?))
}
}
Keyword::CLOB => Ok(DataType::Clob(self.parse_optional_precision()?)),
Expand Down Expand Up @@ -3680,6 +3684,31 @@ impl<'a> Parser<'a> {
}
}

pub fn parse_optional_character_length(
&mut self,
) -> Result<Option<CharacterLength>, ParserError> {
if self.consume_token(&Token::LParen) {
let character_length = self.parse_character_length()?;
self.expect_token(&Token::RParen)?;
Ok(Some(character_length))
} else {
Ok(None)
}
}

pub fn parse_character_length(&mut self) -> Result<CharacterLength, ParserError> {
let length = self.parse_literal_uint()?;
let unit = if self.parse_keyword(Keyword::CHARACTERS) {
Some(CharLengthUnits::Characters)
} else if self.parse_keyword(Keyword::OCTETS) {
Some(CharLengthUnits::Octets)
} else {
None
};

Ok(CharacterLength { length, unit })
}

pub fn parse_optional_precision_scale(
&mut self,
) -> Result<(Option<u64>, Option<u64>), ParserError> {
Expand Down Expand Up @@ -5332,7 +5361,9 @@ mod tests {

#[cfg(test)]
mod test_parse_data_type {
use crate::ast::{DataType, ExactNumberInfo, TimezoneInfo};
use crate::ast::{
CharLengthUnits, CharacterLength, DataType, ExactNumberInfo, TimezoneInfo,
};
use crate::dialect::{AnsiDialect, GenericDialect};
use crate::test_utils::TestedDialects;

Expand All @@ -5355,21 +5386,124 @@ mod tests {

test_parse_data_type!(dialect, "CHARACTER", DataType::Character(None));

test_parse_data_type!(dialect, "CHARACTER(20)", DataType::Character(Some(20)));
test_parse_data_type!(
dialect,
"CHARACTER(20)",
DataType::Character(Some(CharacterLength {
length: 20,
unit: None
}))
);

test_parse_data_type!(
dialect,
"CHARACTER(20 CHARACTERS)",
DataType::Character(Some(CharacterLength {
length: 20,
unit: Some(CharLengthUnits::Characters)
}))
);

test_parse_data_type!(
dialect,
"CHARACTER(20 OCTETS)",
DataType::Character(Some(CharacterLength {
length: 20,
unit: Some(CharLengthUnits::Octets)
}))
);

test_parse_data_type!(dialect, "CHAR", DataType::Char(None));

test_parse_data_type!(dialect, "CHAR(20)", DataType::Char(Some(20)));
test_parse_data_type!(
dialect,
"CHAR(20)",
DataType::Char(Some(CharacterLength {
length: 20,
unit: None
}))
);

test_parse_data_type!(
dialect,
"CHAR(20 CHARACTERS)",
DataType::Char(Some(CharacterLength {
length: 20,
unit: Some(CharLengthUnits::Characters)
}))
);

test_parse_data_type!(
dialect,
"CHAR(20 OCTETS)",
DataType::Char(Some(CharacterLength {
length: 20,
unit: Some(CharLengthUnits::Octets)
}))
);

test_parse_data_type!(
dialect,
"CHARACTER VARYING(20)",
DataType::CharacterVarying(Some(20))
DataType::CharacterVarying(Some(CharacterLength {
length: 20,
unit: None
}))
);

test_parse_data_type!(
dialect,
"CHARACTER VARYING(20 CHARACTERS)",
DataType::CharacterVarying(Some(CharacterLength {
length: 20,
unit: Some(CharLengthUnits::Characters)
}))
);

test_parse_data_type!(
dialect,
"CHARACTER VARYING(20 OCTETS)",
DataType::CharacterVarying(Some(CharacterLength {
length: 20,
unit: Some(CharLengthUnits::Octets)
}))
);

test_parse_data_type!(dialect, "CHAR VARYING(20)", DataType::CharVarying(Some(20)));
test_parse_data_type!(
dialect,
"CHAR VARYING(20)",
DataType::CharVarying(Some(CharacterLength {
length: 20,
unit: None
}))
);

test_parse_data_type!(
dialect,
"CHAR VARYING(20 CHARACTERS)",
DataType::CharVarying(Some(CharacterLength {
length: 20,
unit: Some(CharLengthUnits::Characters)
}))
);

test_parse_data_type!(dialect, "VARCHAR(20)", DataType::Varchar(Some(20)));
test_parse_data_type!(
dialect,
"CHAR VARYING(20 OCTETS)",
DataType::CharVarying(Some(CharacterLength {
length: 20,
unit: Some(CharLengthUnits::Octets)
}))
);

test_parse_data_type!(
dialect,
"VARCHAR(20)",
DataType::Varchar(Some(CharacterLength {
length: 20,
unit: None
}))
);
}

#[test]
Expand Down
15 changes: 12 additions & 3 deletions tests/sqlparser_common.rs
Expand Up @@ -1945,7 +1945,10 @@ fn parse_create_table() {
vec![
ColumnDef {
name: "name".into(),
data_type: DataType::Varchar(Some(100)),
data_type: DataType::Varchar(Some(CharacterLength {
length: 100,
unit: None
})),
collation: None,
options: vec![ColumnOptionDef {
name: None,
Expand Down Expand Up @@ -2401,7 +2404,10 @@ fn parse_create_external_table() {
vec![
ColumnDef {
name: "name".into(),
data_type: DataType::Varchar(Some(100)),
data_type: DataType::Varchar(Some(CharacterLength {
length: 100,
unit: None
})),
collation: None,
options: vec![ColumnOptionDef {
name: None,
Expand Down Expand Up @@ -2469,7 +2475,10 @@ fn parse_create_or_replace_external_table() {
columns,
vec![ColumnDef {
name: "name".into(),
data_type: DataType::Varchar(Some(100)),
data_type: DataType::Varchar(Some(CharacterLength {
length: 100,
unit: None
})),
collation: None,
options: vec![ColumnOptionDef {
name: None,
Expand Down