Skip to content

Commit

Permalink
feat(spanner/spansql): parse joins (#2896)
Browse files Browse the repository at this point in the history
This parses almost all of the JOIN clause syntax, with the exception of
HASH and hints.

It doesn't support the full range of common joins due to a lack of
support for path expressions, but that will be addressed separately.

Updates #2850.
  • Loading branch information
dsymonds committed Sep 23, 2020
1 parent 411f8bc commit d5fb7cb
Show file tree
Hide file tree
Showing 4 changed files with 172 additions and 5 deletions.
89 changes: 87 additions & 2 deletions spanner/spansql/parser.go
Expand Up @@ -1779,7 +1779,31 @@ func (p *parser) parseSelectList() ([]Expr, []ID, *parseError) {
}

func (p *parser) parseSelectFrom() (SelectFrom, *parseError) {
// TODO: support more than a single table name.
debugf("parseSelectFrom: %v", p)

/*
from_item: {
table_name [ table_hint_expr ] [ [ AS ] alias ] |
join |
( query_expr ) [ table_hint_expr ] [ [ AS ] alias ] |
field_path |
{ UNNEST( array_expression ) | UNNEST( array_path ) | array_path }
[ table_hint_expr ] [ [ AS ] alias ] [ WITH OFFSET [ [ AS ] alias ] ] |
with_query_name [ table_hint_expr ] [ [ AS ] alias ]
}
join:
from_item [ join_type ] [ join_method ] JOIN [ join_hint_expr ] from_item
[ ON bool_expression | USING ( join_column [, ...] ) ]
join_type:
{ INNER | CROSS | FULL [OUTER] | LEFT [OUTER] | RIGHT [OUTER] }
*/

// A join starts with a from_item, so that can't be detected in advance.
// TODO: Support more than table name or join.
// TODO: Verify associativity of multile joins.

tname, err := p.parseTableOrIndexOrColumnName()
if err != nil {
return nil, err
Expand All @@ -1795,7 +1819,68 @@ func (p *parser) parseSelectFrom() (SelectFrom, *parseError) {
sf.Alias = alias
}

return sf, nil
// Look ahead to see if this is a join.
tok := p.next()
if tok.err != nil {
p.back()
return sf, nil
}
var jt JoinType
if tok.value == "JOIN" {
// This is implicitly an inner join.
jt = InnerJoin
} else if j, ok := joinKeywords[tok.value]; ok {
jt = j
switch jt {
case FullJoin, LeftJoin, RightJoin:
// These join types are implicitly "outer" joins,
// so the "OUTER" keyword is optional.
p.eat("OUTER")
}
if err := p.expect("JOIN"); err != nil {
return nil, err
}
} else {
p.back()
return sf, nil
}

// TODO: consume "HASH"

sfj := SelectFromJoin{
Type: jt,
LHS: sf,
}
sfj.RHS, err = p.parseSelectFrom()
if err != nil {
return nil, err
}

if p.eat("ON") {
sfj.On, err = p.parseBoolExpr()
if err != nil {
return nil, err
}
}
if p.eat("USING") {
if sfj.On != nil {
return nil, p.errorf("join may not have both ON and USING clauses")
}
sfj.Using, err = p.parseColumnNameList()
if err != nil {
return nil, err
}
}

return sfj, nil
}

var joinKeywords = map[string]JoinType{
"INNER": InnerJoin,
"CROSS": CrossJoin,
"FULL": FullJoin,
"LEFT": LeftJoin,
"RIGHT": RightJoin,
}

func (p *parser) parseTableSample() (TableSample, *parseError) {
Expand Down
35 changes: 35 additions & 0 deletions spanner/spansql/parser_test.go
Expand Up @@ -113,6 +113,41 @@ func TestParseQuery(t *testing.T) {
},
},
},
// TODO: `SELECT * FROM A INNER JOIN B ON A.w = B.y`
{`SELECT * FROM A INNER JOIN B USING (x)`,
Query{
Select: Select{
List: []Expr{Star},
From: []SelectFrom{SelectFromJoin{
Type: InnerJoin,
LHS: SelectFromTable{Table: "A"},
RHS: SelectFromTable{Table: "B"},
Using: []ID{"x"},
}},
},
},
},
// TODO: This should be `SELECT Roster.LastName, TeamMascot.Mascot FROM Roster JOIN TeamMascot ON Roster.SchoolID = TeamMascot.SchoolID`
{`SELECT RosterLastName, TeamMascotMascot FROM Roster JOIN TeamMascot ON RosterSchoolID = TeamMascotSchoolID`,
Query{
Select: Select{
List: []Expr{
ID("RosterLastName"),
ID("TeamMascotMascot"),
},
From: []SelectFrom{SelectFromJoin{
Type: InnerJoin,
LHS: SelectFromTable{Table: "Roster"},
RHS: SelectFromTable{Table: "TeamMascot"},
On: ComparisonOp{
Op: Eq,
LHS: ID("RosterSchoolID"),
RHS: ID("TeamMascotSchoolID"),
},
}},
},
},
},
}
for _, test := range tests {
got, err := ParseQuery(test.in)
Expand Down
25 changes: 23 additions & 2 deletions spanner/spansql/sql.go
Expand Up @@ -281,13 +281,34 @@ func (sel Select) SQL() string {
}

func (sft SelectFromTable) SQL() string {
str := ID(sft.Table).SQL()
str := sft.Table.SQL()
if sft.Alias != "" {
str += " AS " + ID(sft.Alias).SQL()
str += " AS " + sft.Alias.SQL()
}
return str
}

func (sfj SelectFromJoin) SQL() string {
// TODO: The grammar permits arbitrary nesting. Does this need to add parens?
str := sfj.LHS.SQL() + " " + joinTypes[sfj.Type] + " JOIN "
// TODO: hints go here
str += sfj.RHS.SQL()
if sfj.On != nil {
str += " " + sfj.On.SQL()
} else if len(sfj.Using) > 0 {
str += " USING (" + idList(sfj.Using) + ")"
}
return str
}

var joinTypes = map[JoinType]string{
InnerJoin: "INNER",
CrossJoin: "CROSS",
FullJoin: "FULL",
LeftJoin: "LEFT",
RightJoin: "RIGHT",
}

func (o Order) SQL() string {
str := o.Expr.SQL()
if o.Desc {
Expand Down
28 changes: 27 additions & 1 deletion spanner/spansql/types.go
Expand Up @@ -316,7 +316,33 @@ type SelectFromTable struct {

func (SelectFromTable) isSelectFrom() {}

// TODO: SelectFromJoin, SelectFromSubquery, etc.
// SelectFromJoin is a SelectFrom that joins two other SelectFroms.
// https://cloud.google.com/spanner/docs/query-syntax#join_types
type SelectFromJoin struct {
Type JoinType
LHS, RHS SelectFrom

// Join condition.
// At most one of {On,Using} may be set.
On BoolExpr
Using []ID

// TODO: hint keys (this will cover `X HASH JOIN Y` too).
}

func (SelectFromJoin) isSelectFrom() {}

type JoinType int

const (
InnerJoin JoinType = iota
CrossJoin
FullJoin
LeftJoin
RightJoin
)

// TODO: SelectFromSubquery, etc.

type Order struct {
Expr Expr
Expand Down

0 comments on commit d5fb7cb

Please sign in to comment.