From d5fb7cb787bcc90e763cd23b242a1aa7c1e155ec Mon Sep 17 00:00:00 2001 From: David Symonds Date: Thu, 24 Sep 2020 09:44:04 +1000 Subject: [PATCH] feat(spanner/spansql): parse joins (#2896) This parses almost all of the JOIN clause syntax, with the exception of HASH and hints. It doesn't support the full range of common joins due to a lack of support for path expressions, but that will be addressed separately. Updates #2850. --- spanner/spansql/parser.go | 89 +++++++++++++++++++++++++++++++++- spanner/spansql/parser_test.go | 35 +++++++++++++ spanner/spansql/sql.go | 25 +++++++++- spanner/spansql/types.go | 28 ++++++++++- 4 files changed, 172 insertions(+), 5 deletions(-) diff --git a/spanner/spansql/parser.go b/spanner/spansql/parser.go index e50273d76c6..ca109bf3059 100644 --- a/spanner/spansql/parser.go +++ b/spanner/spansql/parser.go @@ -1779,7 +1779,31 @@ func (p *parser) parseSelectList() ([]Expr, []ID, *parseError) { } func (p *parser) parseSelectFrom() (SelectFrom, *parseError) { - // TODO: support more than a single table name. + debugf("parseSelectFrom: %v", p) + + /* + from_item: { + table_name [ table_hint_expr ] [ [ AS ] alias ] | + join | + ( query_expr ) [ table_hint_expr ] [ [ AS ] alias ] | + field_path | + { UNNEST( array_expression ) | UNNEST( array_path ) | array_path } + [ table_hint_expr ] [ [ AS ] alias ] [ WITH OFFSET [ [ AS ] alias ] ] | + with_query_name [ table_hint_expr ] [ [ AS ] alias ] + } + + join: + from_item [ join_type ] [ join_method ] JOIN [ join_hint_expr ] from_item + [ ON bool_expression | USING ( join_column [, ...] ) ] + + join_type: + { INNER | CROSS | FULL [OUTER] | LEFT [OUTER] | RIGHT [OUTER] } + */ + + // A join starts with a from_item, so that can't be detected in advance. + // TODO: Support more than table name or join. + // TODO: Verify associativity of multile joins. + tname, err := p.parseTableOrIndexOrColumnName() if err != nil { return nil, err @@ -1795,7 +1819,68 @@ func (p *parser) parseSelectFrom() (SelectFrom, *parseError) { sf.Alias = alias } - return sf, nil + // Look ahead to see if this is a join. + tok := p.next() + if tok.err != nil { + p.back() + return sf, nil + } + var jt JoinType + if tok.value == "JOIN" { + // This is implicitly an inner join. + jt = InnerJoin + } else if j, ok := joinKeywords[tok.value]; ok { + jt = j + switch jt { + case FullJoin, LeftJoin, RightJoin: + // These join types are implicitly "outer" joins, + // so the "OUTER" keyword is optional. + p.eat("OUTER") + } + if err := p.expect("JOIN"); err != nil { + return nil, err + } + } else { + p.back() + return sf, nil + } + + // TODO: consume "HASH" + + sfj := SelectFromJoin{ + Type: jt, + LHS: sf, + } + sfj.RHS, err = p.parseSelectFrom() + if err != nil { + return nil, err + } + + if p.eat("ON") { + sfj.On, err = p.parseBoolExpr() + if err != nil { + return nil, err + } + } + if p.eat("USING") { + if sfj.On != nil { + return nil, p.errorf("join may not have both ON and USING clauses") + } + sfj.Using, err = p.parseColumnNameList() + if err != nil { + return nil, err + } + } + + return sfj, nil +} + +var joinKeywords = map[string]JoinType{ + "INNER": InnerJoin, + "CROSS": CrossJoin, + "FULL": FullJoin, + "LEFT": LeftJoin, + "RIGHT": RightJoin, } func (p *parser) parseTableSample() (TableSample, *parseError) { diff --git a/spanner/spansql/parser_test.go b/spanner/spansql/parser_test.go index 8b6b4bfaf9b..95cc5b79287 100644 --- a/spanner/spansql/parser_test.go +++ b/spanner/spansql/parser_test.go @@ -113,6 +113,41 @@ func TestParseQuery(t *testing.T) { }, }, }, + // TODO: `SELECT * FROM A INNER JOIN B ON A.w = B.y` + {`SELECT * FROM A INNER JOIN B USING (x)`, + Query{ + Select: Select{ + List: []Expr{Star}, + From: []SelectFrom{SelectFromJoin{ + Type: InnerJoin, + LHS: SelectFromTable{Table: "A"}, + RHS: SelectFromTable{Table: "B"}, + Using: []ID{"x"}, + }}, + }, + }, + }, + // TODO: This should be `SELECT Roster.LastName, TeamMascot.Mascot FROM Roster JOIN TeamMascot ON Roster.SchoolID = TeamMascot.SchoolID` + {`SELECT RosterLastName, TeamMascotMascot FROM Roster JOIN TeamMascot ON RosterSchoolID = TeamMascotSchoolID`, + Query{ + Select: Select{ + List: []Expr{ + ID("RosterLastName"), + ID("TeamMascotMascot"), + }, + From: []SelectFrom{SelectFromJoin{ + Type: InnerJoin, + LHS: SelectFromTable{Table: "Roster"}, + RHS: SelectFromTable{Table: "TeamMascot"}, + On: ComparisonOp{ + Op: Eq, + LHS: ID("RosterSchoolID"), + RHS: ID("TeamMascotSchoolID"), + }, + }}, + }, + }, + }, } for _, test := range tests { got, err := ParseQuery(test.in) diff --git a/spanner/spansql/sql.go b/spanner/spansql/sql.go index deae3135cbb..239e35ce5fd 100644 --- a/spanner/spansql/sql.go +++ b/spanner/spansql/sql.go @@ -281,13 +281,34 @@ func (sel Select) SQL() string { } func (sft SelectFromTable) SQL() string { - str := ID(sft.Table).SQL() + str := sft.Table.SQL() if sft.Alias != "" { - str += " AS " + ID(sft.Alias).SQL() + str += " AS " + sft.Alias.SQL() } return str } +func (sfj SelectFromJoin) SQL() string { + // TODO: The grammar permits arbitrary nesting. Does this need to add parens? + str := sfj.LHS.SQL() + " " + joinTypes[sfj.Type] + " JOIN " + // TODO: hints go here + str += sfj.RHS.SQL() + if sfj.On != nil { + str += " " + sfj.On.SQL() + } else if len(sfj.Using) > 0 { + str += " USING (" + idList(sfj.Using) + ")" + } + return str +} + +var joinTypes = map[JoinType]string{ + InnerJoin: "INNER", + CrossJoin: "CROSS", + FullJoin: "FULL", + LeftJoin: "LEFT", + RightJoin: "RIGHT", +} + func (o Order) SQL() string { str := o.Expr.SQL() if o.Desc { diff --git a/spanner/spansql/types.go b/spanner/spansql/types.go index f1222d505c0..2c2853e2b89 100644 --- a/spanner/spansql/types.go +++ b/spanner/spansql/types.go @@ -316,7 +316,33 @@ type SelectFromTable struct { func (SelectFromTable) isSelectFrom() {} -// TODO: SelectFromJoin, SelectFromSubquery, etc. +// SelectFromJoin is a SelectFrom that joins two other SelectFroms. +// https://cloud.google.com/spanner/docs/query-syntax#join_types +type SelectFromJoin struct { + Type JoinType + LHS, RHS SelectFrom + + // Join condition. + // At most one of {On,Using} may be set. + On BoolExpr + Using []ID + + // TODO: hint keys (this will cover `X HASH JOIN Y` too). +} + +func (SelectFromJoin) isSelectFrom() {} + +type JoinType int + +const ( + InnerJoin JoinType = iota + CrossJoin + FullJoin + LeftJoin + RightJoin +) + +// TODO: SelectFromSubquery, etc. type Order struct { Expr Expr