Skip to content

Commit

Permalink
feat: add sql grammar with pratt parser and tests (#983)
Browse files Browse the repository at this point in the history
  • Loading branch information
EmirVildanov committed Mar 1, 2024
1 parent a0a92fb commit 57daac4
Show file tree
Hide file tree
Showing 4 changed files with 482 additions and 1 deletion.
1 change: 1 addition & 0 deletions grammars/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ pest_derive = { path = "../derive", version = "2.7.7" }
[dev-dependencies]
criterion = "0.5"
pretty_assertions = "1.3.0"
lazy_static = "1.4.0"

[[bench]]
name = "json"
Expand Down
190 changes: 190 additions & 0 deletions grammars/src/grammars/sql.pest
Original file line number Diff line number Diff line change
@@ -0,0 +1,190 @@
Command = _{ SOI ~ (Query | ExplainQuery | DDL | ACL) ~ EOF }

ACL = _{ DropRole | DropUser | CreateRole | CreateUser | AlterUser | GrantPrivilege | RevokePrivilege }
CreateUser = {
^"create" ~ ^"user" ~ Identifier ~ (^"with")? ~ ^"password" ~ SingleQuotedString ~
AuthMethod?
}
AlterUser = {
^"alter" ~ ^"user" ~ Identifier ~ (^"with")? ~ AlterOption
}
AlterOption = _{ AlterLogin | AlterNoLogin | AlterPassword }
AlterLogin = { ^"login" }
AlterNoLogin = { ^"nologin" }
AlterPassword = { ^"password" ~ SingleQuotedString ~ AuthMethod? }
AuthMethod = { ^"using" ~ (ChapSha1 | Md5 | Ldap) }
ChapSha1 = { ^"chap-sha1" }
Md5 = { ^"md5" }
Ldap = { ^"ldap" }
DropUser = { ^"drop" ~ ^"user" ~ Identifier }
CreateRole = { ^"create" ~ ^"role" ~ Identifier }
DropRole = { ^"drop" ~ ^"role" ~ Identifier }
GrantPrivilege = { ^"grant" ~ PrivBlock ~ ^"to" ~ Identifier }
RevokePrivilege = { ^"revoke" ~ PrivBlock ~ ^"from" ~ Identifier }
PrivBlock = _{ PrivBlockPrivilege | PrivBlockRolePass }
PrivBlockPrivilege = {Privilege ~ (PrivBlockUser | PrivBlockSpecificUser | PrivBlockRole
| PrivBlockSpecificRole | PrivBlockTable | PrivBlockSpecificTable)}
PrivBlockUser = { ^"user" }
PrivBlockSpecificUser = { ^"on" ~ ^"user" ~ Identifier }
PrivBlockRole = { ^"role" }
PrivBlockSpecificRole = { ^"on" ~ ^"role" ~ Identifier }
PrivBlockTable = { ^"table" }
PrivBlockSpecificTable = { ^"on" ~ ^"table" ~ Identifier }
PrivBlockRolePass = { Identifier }
Privilege = _{ PrivilegeRead | PrivilegeWrite | PrivilegeExecute |
PrivilegeCreate | PrivilegeAlter | PrivilegeDrop |
PrivilegeSession | PrivilegeUsage }
PrivilegeAlter = { ^"alter" }
PrivilegeCreate = { ^"create" }
PrivilegeDrop = { ^"drop" }
PrivilegeExecute = { ^"execute" }
PrivilegeRead = { ^"read" }
PrivilegeSession = { ^"session" }
PrivilegeUsage = { ^"usage" }
PrivilegeWrite = { ^"write" }

DDL = _{ CreateTable | DropTable | CreateProc }
CreateTable = {
^"create" ~ ^"table" ~ Identifier ~
"(" ~ Columns ~ "," ~ PrimaryKey ~ ")" ~
Distribution
}
Columns = { ColumnDef ~ ("," ~ ColumnDef)* }
ColumnDef = { Identifier ~ ColumnDefType ~ ColumnDefIsNull? }
ColumnDefIsNull = { NotFlag? ~ ^"null" }
PrimaryKey = {
^"primary" ~ ^"key" ~
"(" ~ Identifier ~ ("," ~ Identifier)* ~ ")"
}
Distribution = { ^"distributed" ~ (Global | Sharding) }
Global = { ^"globally" }
Sharding = { ^"by" ~ "(" ~ Identifier ~ ("," ~ Identifier)* ~ ")"}
DropTable = { ^"drop" ~ ^"table" ~ Identifier }

CreateProc = {
^"create" ~ ^"procedure" ~ Identifier ~
"(" ~ ProcParams? ~ ")" ~ (^"language" ~ ProcLanguage)? ~
((^"as" ~ "$$" ~ ProcBody ~ "$$") | (^"begin" ~ "atomic" ~ ProcBody ~ "end"))
}
ProcParams = { ProcParamDef ~ ("," ~ ProcParamDef)* }
ProcParamDef = { ColumnDefType }
ProcLanguage = { SQL }
SQL = { ^"sql" }
ProcBody = { (Insert | Update | Delete) }

ExplainQuery = _{ Explain }
Explain = { ^"explain" ~ Query }

Query = { (SelectWithOptionalContinuation | Values | Insert | Update | Delete) }
SelectWithOptionalContinuation = { Select ~ (ExceptContinuation | UnionAllContinuation)? }
ExceptContinuation = { ((^"except" ~ ^"distinct") | ^"except") ~ Select }
UnionAllContinuation = { ^"union" ~ ^"all" ~ Select }
Select = {
^"select" ~ Projection ~ ^"from" ~ Scan ~
Join? ~ WhereClause? ~
(^"group" ~ ^"by" ~ GroupBy)? ~
(^"having" ~ Having)?
}
Projection = { Distinct? ~ ProjectionElement ~ ("," ~ ProjectionElement)* }
ProjectionElement = _{ Asterisk | Column }
Column = { Expr ~ ((^"as")? ~ Identifier)? }
Asterisk = { "*" }
WhereClause = _{ ^"where" ~ Selection }
Selection = { Expr }
Scan = { (Identifier | SubQuery) ~ ((^"as")? ~ Identifier)? }
Join = { JoinKind? ~ ^"join" ~ Scan ~ ^"on" ~ Expr }
JoinKind = _{ ( InnerJoinKind | LeftJoinKind ) }
InnerJoinKind = { ^"inner" }
LeftJoinKind = { ^"left" ~ (^"outer")? }
GroupBy = { Expr ~ ("," ~ Expr)* }
Having = { Expr }
SubQuery = { "(" ~ (SelectWithOptionalContinuation | Values) ~ ")" }
Insert = { ^"insert" ~ ^"into" ~ Identifier ~ ("(" ~ TargetColumns ~ ")")? ~ (Values | Select) ~ OnConflict? }
TargetColumns = { Identifier ~ ("," ~ Identifier)* }
OnConflict = _{ ^"on conflict" ~ ^"do" ~ (DoNothing | DoReplace | DoFail) }
DoReplace = { ^"replace" }
DoNothing = { ^"nothing" }
DoFail = { ^"fail" }
Update = { ^"update" ~ Identifier ~ ^"set" ~ UpdateList ~ (UpdateFrom | WhereClause)? }
UpdateList = { UpdateItem ~ ("," ~ UpdateItem)* }
UpdateItem = { Identifier ~ "=" ~ Expr }
UpdateFrom = _{ ^"from" ~ Scan ~ (^"where" ~ Expr)? }
Values = { ^"values" ~ Row ~ ("," ~ Row)* }
Delete = { ^"delete" ~ ^"from" ~ Identifier ~ (^"where" ~ DeleteFilter)? }
DeleteFilter = { Expr }

Identifier = @{ DoubleQuotedIdentifier | IdentifierInner }
DoubleQuotedIdentifier = @{ ("\"" ~ IdentifierInner ~ "\"") }
IdentifierInner = @{ !(Keyword ~ ("(" | WHITESPACE | "," | EOF)) ~ (IdentifierNonDigit ~ (IdentifierNonDigit | ASCII_DIGIT)*) }
IdentifierNonDigit = _{ ('a'..'z' | 'A' .. 'Z' | 'А' .. 'Я' | 'а' .. 'я' | "-" | "_") }
Keyword = { ^"left" | ^"having" | ^"not" | ^"inner" | ^"group"
| ^"on" | ^"join" | ^"from" | ^"exists" | ^"except"
| ^"union" | ^"where" | ^"distinct" | ^"between" | ^"option"
| ^"values"}

Expr = { ExprAtomValue ~ (ExprInfixOp ~ ExprAtomValue)* }
ExprInfixOp = _{ Between | ArithInfixOp | CmpInfixOp | ConcatInfixOp | And | Or }
Between = { NotFlag? ~ ^"between" }
And = { ^"and" }
Or = { ^"or" }
ConcatInfixOp = { "||" }
ArithInfixOp = _{ Add | Subtract | Multiply | Divide }
Add = { "+" }
Subtract = { "-" }
Multiply = { "*" }
Divide = { "/" }
CmpInfixOp = _{ NotEq | GtEq | Gt | LtEq | Lt | Eq | Lt | In }
Eq = { "=" }
Gt = { ">" }
GtEq = { ">=" }
Lt = { "<" }
LtEq = { "<=" }
NotEq = { "<>" | "!=" }
In = { NotFlag? ~ ^"in" }
ExprAtomValue = _{ UnaryNot* ~ AtomicExpr ~ IsNullPostfix? }
UnaryNot = @{ NotFlag }
IsNullPostfix = { ^"is" ~ NotFlag? ~ ^"null" }
AtomicExpr = _{ Literal | Parameter | Cast | IdentifierWithOptionalContinuation | ExpressionInParentheses | UnaryOperator | SubQuery | Row }
Literal = _{ True | False | Null | Double | Decimal | Unsigned | Integer | SingleQuotedString }
True = { ^"true" }
False = { ^"false" }
Null = { ^"null" }
Decimal = @{ Integer ~ ("." ~ ASCII_DIGIT*) }
Double = @{ Integer ~ ("." ~ ASCII_DIGIT*)? ~ (^"e" ~ Integer) }
Integer = @{ ("+" | "-")? ~ ASCII_DIGIT+ }
Unsigned = @{ ASCII_DIGIT+ }
SingleQuotedString = @{ OnlyQuotesSequence | AnythingButQuotesSequence }
OnlyQuotesSequence = @{ ("'" ~ "'")+ }
AnythingButQuotesSequence = @{ "'" ~ (!("'") ~ ANY)* ~ "'" }
Parameter = { PgParameter | QuestionParameter }
QuestionParameter = @{ "?" }
PgParameter = { "$" ~ Unsigned }
IdentifierWithOptionalContinuation = { Identifier ~ (ReferenceContinuation | FunctionInvocationContinuation)? }
ReferenceContinuation = { "." ~ Identifier }
FunctionInvocationContinuation = { "(" ~ (CountAsterisk | FunctionArgs)? ~ ")" }
FunctionArgs = { Distinct? ~ (Expr ~ ("," ~ Expr)*)? }
CountAsterisk = { "*" }
ExpressionInParentheses = { "(" ~ Expr ~ ")" }
Cast = { ^"cast" ~ "(" ~ Expr ~ ^"as" ~ TypeCast ~ ")" }
TypeCast = _{ TypeAny | ColumnDefType }
ColumnDefType = { TypeBool | TypeDecimal | TypeDouble | TypeInt | TypeNumber
| TypeScalar | TypeString | TypeText | TypeUnsigned | TypeVarchar }
TypeAny = { ^"any" }
TypeBool = { (^"boolean" | ^"bool") }
TypeDecimal = { ^"decimal" }
TypeDouble = { ^"double" }
TypeInt = { (^"integer" | ^"int") }
TypeNumber = { ^"number" }
TypeScalar = { ^"scalar" }
TypeString = { ^"string" }
TypeText = { ^"text" }
TypeUnsigned = { ^"unsigned" }
TypeVarchar = { ^"varchar" ~ "(" ~ Unsigned ~ ")" }
UnaryOperator = _{ Exists }
Exists = { NotFlag? ~ ^"exists" ~ SubQuery }
Row = { "(" ~ Expr ~ ("," ~ Expr)* ~ ")" }

Distinct = { ^"distinct" }
NotFlag = { ^"not" }
EOF = { EOI | ";" }
WHITESPACE = _{ " " | "\t" | "\n" | "\r\n" }
174 changes: 173 additions & 1 deletion grammars/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,29 @@ pub mod toml {
pub struct TomlParser;
}

/// Grammar rules of an SQL parser
#[allow(missing_docs)]
pub mod sql {
/// SQL parser.
/// Grammar is a tinkered version of the one used in distributed SQL executor module named
/// [sbroad](https://git.picodata.io/picodata/picodata/sbroad/-/blob/main/sbroad-core/src/frontend/sql/query.pest).
/// Being a submodule of [Picodata](https://git.picodata.io/picodata/picodata/picodata) (that
/// operates with Tarantool database) it tries to simulate SQLite flavour (Tarantool uses
/// SQLite to execute SQL queries).
#[derive(Parser)]
#[grammar = "grammars/sql.pest"]
pub struct SqlParser;
}

#[cfg(test)]
mod tests {
use pest::iterators::Pairs;
use std::convert::TryInto;

use pest::pratt_parser::PrattParser;
use pest::Parser;

use crate::{json, toml};
use crate::{json, sql, toml};

fn test_toml_deep_nesting(input: &str) {
const ERROR: &str = "call limit reached";
Expand Down Expand Up @@ -104,4 +120,160 @@ mod tests {
assert!(s2.is_err());
assert_eq!(s2.unwrap_err().variant.message(), ERROR);
}

#[test]
fn sql_check_expressions_priorities() {
lazy_static::lazy_static! {
static ref PRATT_PARSER: PrattParser<sql::Rule> = {
use pest::pratt_parser::{Assoc::{Left, Right}, Op};
use sql::Rule::{Add, And, Between, ConcatInfixOp, Divide, Eq, Gt, GtEq, In,
IsNullPostfix, Lt, LtEq, Multiply, NotEq, Or, Subtract, UnaryNot};

// Precedence is defined lowest to highest.
PrattParser::new()
.op(Op::infix(Or, Left))
.op(Op::infix(Between, Left))
.op(Op::infix(And, Left))
.op(Op::prefix(UnaryNot))
.op(
Op::infix(Eq, Right) | Op::infix(NotEq, Right) | Op::infix(NotEq, Right)
| Op::infix(Gt, Right) | Op::infix(GtEq, Right) | Op::infix(Lt, Right)
| Op::infix(LtEq, Right) | Op::infix(In, Right)
)
.op(Op::infix(Add, Left) | Op::infix(Subtract, Left))
.op(Op::infix(Multiply, Left) | Op::infix(Divide, Left) | Op::infix(ConcatInfixOp, Left))
.op(Op::postfix(IsNullPostfix))
};
}

#[derive(Debug, PartialEq, Eq)]
enum ArithOp {
Add,
Mult,
}

#[derive(Debug, PartialEq, Eq)]
enum BoolOp {
And,
Or,
Eq,
In,
}

#[derive(Debug, PartialEq, Eq)]
enum InfixOp {
ArithInfix(ArithOp),
BoolInfix(BoolOp),
}

#[derive(Debug, PartialEq, Eq)]
enum Expr {
SubQuery,
Infix {
left: Box<Expr>,
op: InfixOp,
right: Box<Expr>,
},
ArithValue(u64),
BoolConst(bool),
Not {
child: Box<Expr>,
},
IsNull {
child: Box<Expr>,
},
}

// Example of SQL expression containing many operators with different priorities.
// Should be interpreted as
// `(not ((1 + 1 * 2) = 3)) or ((false is null) and (1 in (select * from t where true)))`
let input = r#"not 1 + 1 * 2 = 3
or false is null
and 1 in (
select "name", avg("grade") from students
where "age" > 14
group by "class"
)"#;

let res_pairs = sql::SqlParser::parse(sql::Rule::Expr, input).unwrap();
fn parse_expr(expression_pairs: Pairs<'_, sql::Rule>) -> Expr {
PRATT_PARSER
.map_primary(|primary| match primary.as_rule() {
sql::Rule::Expr => parse_expr(primary.into_inner()),
sql::Rule::SubQuery => Expr::SubQuery,
sql::Rule::Unsigned => {
let u64_value = primary.as_str().parse::<u64>().unwrap();
Expr::ArithValue(u64_value)
}
sql::Rule::True | sql::Rule::False => {
let bool_value = primary.as_str().parse::<bool>().unwrap();
Expr::BoolConst(bool_value)
}
rule => unreachable!("Expr::parse expected atomic rule, found {:?}", rule),
})
.map_infix(|lhs, op, rhs| {
let op = match op.as_rule() {
sql::Rule::And => InfixOp::BoolInfix(BoolOp::And),
sql::Rule::Or => InfixOp::BoolInfix(BoolOp::Or),
sql::Rule::Eq => InfixOp::BoolInfix(BoolOp::Eq),
sql::Rule::In => InfixOp::BoolInfix(BoolOp::In),
sql::Rule::Multiply => InfixOp::ArithInfix(ArithOp::Mult),
sql::Rule::Add => InfixOp::ArithInfix(ArithOp::Add),
rule => {
unreachable!("Expr::parse expected infix operation, found {:?}", rule)
}
};
Expr::Infix {
left: Box::new(lhs),
op,
right: Box::new(rhs),
}
})
.map_prefix(|op, child| match op.as_rule() {
sql::Rule::UnaryNot => Expr::Not {
child: Box::new(child),
},
rule => unreachable!("Expr::parse expected prefix operator, found {:?}", rule),
})
.map_postfix(|child, op| match op.as_rule() {
sql::Rule::IsNullPostfix => Expr::IsNull {
child: Box::new(child),
},
rule => unreachable!("Expr::parse expected postfix operator, found {:?}", rule),
})
.parse(expression_pairs)
}

let actual_expr = parse_expr(res_pairs);
let expected_expr = Expr::Infix {
op: InfixOp::BoolInfix(BoolOp::Or),
left: Box::new(Expr::Not {
child: Box::new(Expr::Infix {
left: Box::new(Expr::Infix {
left: Box::new(Expr::ArithValue(1)),
op: InfixOp::ArithInfix(ArithOp::Add),
right: Box::new(Expr::Infix {
left: Box::new(Expr::ArithValue(1)),
op: InfixOp::ArithInfix(ArithOp::Mult),
right: Box::new(Expr::ArithValue(2)),
}),
}),
op: InfixOp::BoolInfix(BoolOp::Eq),
right: Box::new(Expr::ArithValue(3)),
}),
}),
right: Box::new(Expr::Infix {
left: Box::new(Expr::IsNull {
child: Box::new(Expr::BoolConst(false)),
}),
op: InfixOp::BoolInfix(BoolOp::And),
right: Box::new(Expr::Infix {
left: Box::new(Expr::ArithValue(1)),
op: InfixOp::BoolInfix(BoolOp::In),
right: Box::new(Expr::SubQuery),
}),
}),
};
assert_eq!(expected_expr, actual_expr);
}
}

0 comments on commit 57daac4

Please sign in to comment.