Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Allow custom & union productions (#233)
Implements #229
- Loading branch information
Showing
14 changed files
with
738 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,134 @@ | ||
package main | ||
|
||
import ( | ||
"strings" | ||
|
||
"github.com/alecthomas/kong" | ||
"github.com/alecthomas/participle/v2" | ||
"github.com/alecthomas/repr" | ||
) | ||
|
||
type ( | ||
ExprString struct { | ||
Value string `@String` | ||
} | ||
|
||
ExprNumber struct { | ||
Value float64 `@Int | @Float` | ||
} | ||
|
||
ExprIdent struct { | ||
Name string `@Ident` | ||
} | ||
|
||
ExprParens struct { | ||
Inner ExprPrecAll `"(" @@ ")"` | ||
} | ||
|
||
ExprUnary struct { | ||
Op string `@("-" | "!")` | ||
Expr ExprOperand `@@` | ||
} | ||
|
||
ExprAddSub struct { | ||
Head ExprPrec2 `@@` | ||
Tail []ExprAddSubExt `@@+` | ||
} | ||
|
||
ExprAddSubExt struct { | ||
Op string `@("+" | "-")` | ||
Expr ExprPrec2 `@@` | ||
} | ||
|
||
ExprMulDiv struct { | ||
Head ExprPrec3 `@@` | ||
Tail []ExprMulDivExt `@@+` | ||
} | ||
|
||
ExprMulDivExt struct { | ||
Op string `@("*" | "/")` | ||
Expr ExprPrec3 `@@` | ||
} | ||
|
||
ExprRem struct { | ||
Head ExprOperand `@@` | ||
Tail []ExprRemExt `@@+` | ||
} | ||
|
||
ExprRemExt struct { | ||
Op string `@"%"` | ||
Expr ExprOperand `@@` | ||
} | ||
|
||
ExprPrecAll interface{ exprPrecAll() } | ||
ExprPrec2 interface{ exprPrec2() } | ||
ExprPrec3 interface{ exprPrec3() } | ||
ExprOperand interface{ exprOperand() } | ||
) | ||
|
||
// These expression types can be matches as individual operands | ||
func (ExprIdent) exprOperand() {} | ||
func (ExprNumber) exprOperand() {} | ||
func (ExprString) exprOperand() {} | ||
func (ExprParens) exprOperand() {} | ||
func (ExprUnary) exprOperand() {} | ||
|
||
// These expression types can be matched at precedence level 3 | ||
func (ExprIdent) exprPrec3() {} | ||
func (ExprNumber) exprPrec3() {} | ||
func (ExprString) exprPrec3() {} | ||
func (ExprParens) exprPrec3() {} | ||
func (ExprUnary) exprPrec3() {} | ||
func (ExprRem) exprPrec3() {} | ||
|
||
// These expression types can be matched at precedence level 2 | ||
func (ExprIdent) exprPrec2() {} | ||
func (ExprNumber) exprPrec2() {} | ||
func (ExprString) exprPrec2() {} | ||
func (ExprParens) exprPrec2() {} | ||
func (ExprUnary) exprPrec2() {} | ||
func (ExprRem) exprPrec2() {} | ||
func (ExprMulDiv) exprPrec2() {} | ||
|
||
// These expression types can be matched at the minimum precedence level | ||
func (ExprIdent) exprPrecAll() {} | ||
func (ExprNumber) exprPrecAll() {} | ||
func (ExprString) exprPrecAll() {} | ||
func (ExprParens) exprPrecAll() {} | ||
func (ExprUnary) exprPrecAll() {} | ||
func (ExprRem) exprPrecAll() {} | ||
func (ExprMulDiv) exprPrecAll() {} | ||
func (ExprAddSub) exprPrecAll() {} | ||
|
||
type Expression struct { | ||
X ExprPrecAll `@@` | ||
} | ||
|
||
var parser = participle.MustBuild(&Expression{}, | ||
// This grammar requires enough lookahead to see the entire expression before | ||
// it can select the proper binary expression type - in other words, we only | ||
// know that `1 * 2 * 3 * 4` isn't the left-hand side of an addition or subtraction | ||
// expression until we know for sure that no `+` or `-` operator follows it | ||
participle.UseLookahead(99999), | ||
// Register the ExprOperand union so we can parse individual operands | ||
participle.ParseUnion[ExprOperand](ExprUnary{}, ExprIdent{}, ExprNumber{}, ExprString{}, ExprParens{}), | ||
// Register the ExprPrec3 union so we can parse expressions at precedence level 3 | ||
participle.ParseUnion[ExprPrec3](ExprRem{}, ExprUnary{}, ExprIdent{}, ExprNumber{}, ExprString{}, ExprParens{}), | ||
// Register the ExprPrec2 union so we can parse expressions at precedence level 2 | ||
participle.ParseUnion[ExprPrec2](ExprMulDiv{}, ExprRem{}, ExprUnary{}, ExprIdent{}, ExprNumber{}, ExprString{}, ExprParens{}), | ||
// Register the ExprPrecAll union so we can parse expressions at the minimum precedence level | ||
participle.ParseUnion[ExprPrecAll](ExprAddSub{}, ExprMulDiv{}, ExprRem{}, ExprUnary{}, ExprIdent{}, ExprNumber{}, ExprString{}, ExprParens{}), | ||
) | ||
|
||
func main() { | ||
var cli struct { | ||
Expr []string `arg required help:"Expression to parse."` | ||
} | ||
ctx := kong.Parse(&cli) | ||
|
||
expr := &Expression{} | ||
err := parser.ParseString("", strings.Join(cli.Expr, " "), expr) | ||
ctx.FatalIfErrorf(err) | ||
|
||
repr.Println(expr) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,49 @@ | ||
package main | ||
|
||
import ( | ||
"testing" | ||
|
||
require "github.com/alecthomas/assert/v2" | ||
) | ||
|
||
func TestExpressionParser(t *testing.T) { | ||
type testCase struct { | ||
src string | ||
expected ExprPrecAll | ||
} | ||
|
||
for _, c := range []testCase{ | ||
{`1`, ExprNumber{1}}, | ||
{`1.5`, ExprNumber{1.5}}, | ||
{`"a"`, ExprString{`"a"`}}, | ||
{`(1)`, ExprParens{ExprNumber{1}}}, | ||
{`1 + 1`, ExprAddSub{ExprNumber{1}, []ExprAddSubExt{{"+", ExprNumber{1}}}}}, | ||
{`1 - 1`, ExprAddSub{ExprNumber{1}, []ExprAddSubExt{{"-", ExprNumber{1}}}}}, | ||
{`1 * 1`, ExprMulDiv{ExprNumber{1}, []ExprMulDivExt{{"*", ExprNumber{1}}}}}, | ||
{`1 / 1`, ExprMulDiv{ExprNumber{1}, []ExprMulDivExt{{"/", ExprNumber{1}}}}}, | ||
{`1 % 1`, ExprRem{ExprNumber{1}, []ExprRemExt{{"%", ExprNumber{1}}}}}, | ||
{ | ||
`a + b - c * d / e % f`, | ||
ExprAddSub{ | ||
ExprIdent{"a"}, | ||
[]ExprAddSubExt{ | ||
{"+", ExprIdent{"b"}}, | ||
{"-", ExprMulDiv{ | ||
ExprIdent{"c"}, | ||
[]ExprMulDivExt{ | ||
{"*", ExprIdent{Name: "d"}}, | ||
{"/", ExprRem{ | ||
ExprIdent{"e"}, | ||
[]ExprRemExt{{"%", ExprIdent{"f"}}}, | ||
}}, | ||
}, | ||
}}, | ||
}, | ||
}, | ||
}, | ||
} { | ||
var actual Expression | ||
require.NoError(t, parser.ParseString("<test>", c.src, &actual)) | ||
require.Equal(t, c.expected, actual.X) | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,136 @@ | ||
package main | ||
|
||
import ( | ||
"fmt" | ||
"strconv" | ||
"strings" | ||
"text/scanner" | ||
|
||
"github.com/alecthomas/kong" | ||
"github.com/alecthomas/participle/v2" | ||
"github.com/alecthomas/participle/v2/lexer" | ||
"github.com/alecthomas/repr" | ||
) | ||
|
||
type operatorPrec struct{ Left, Right int } | ||
|
||
var operatorPrecs = map[string]operatorPrec{ | ||
"+": {1, 1}, | ||
"-": {1, 1}, | ||
"*": {3, 2}, | ||
"/": {5, 4}, | ||
"%": {7, 6}, | ||
} | ||
|
||
type ( | ||
Expr interface{ expr() } | ||
|
||
ExprIdent struct{ Name string } | ||
ExprString struct{ Value string } | ||
ExprNumber struct{ Value float64 } | ||
ExprParens struct{ Sub Expr } | ||
|
||
ExprUnary struct { | ||
Op string | ||
Sub Expr | ||
} | ||
|
||
ExprBinary struct { | ||
Lhs Expr | ||
Op string | ||
Rhs Expr | ||
} | ||
) | ||
|
||
func (ExprIdent) expr() {} | ||
func (ExprString) expr() {} | ||
func (ExprNumber) expr() {} | ||
func (ExprParens) expr() {} | ||
func (ExprUnary) expr() {} | ||
func (ExprBinary) expr() {} | ||
|
||
func parseExprAny(lex *lexer.PeekingLexer) (Expr, error) { return parseExprPrec(lex, 0) } | ||
|
||
func parseExprAtom(lex *lexer.PeekingLexer) (Expr, error) { | ||
switch peek := lex.Peek(); { | ||
case peek.Type == scanner.Ident: | ||
return ExprIdent{lex.Next().Value}, nil | ||
case peek.Type == scanner.String: | ||
val, err := strconv.Unquote(lex.Next().Value) | ||
if err != nil { | ||
return nil, err | ||
} | ||
return ExprString{val}, nil | ||
case peek.Type == scanner.Int || peek.Type == scanner.Float: | ||
val, err := strconv.ParseFloat(lex.Next().Value, 64) | ||
if err != nil { | ||
return nil, err | ||
} | ||
return ExprNumber{val}, nil | ||
case peek.Value == "(": | ||
_ = lex.Next() | ||
inner, err := parseExprAny(lex) | ||
if err != nil { | ||
return nil, err | ||
} | ||
if lex.Peek().Value != ")" { | ||
return nil, fmt.Errorf("expected closing ')'") | ||
} | ||
_ = lex.Next() | ||
return ExprParens{inner}, nil | ||
default: | ||
return nil, participle.NextMatch | ||
} | ||
} | ||
|
||
func parseExprPrec(lex *lexer.PeekingLexer, minPrec int) (Expr, error) { | ||
var lhs Expr | ||
if peeked := lex.Peek(); peeked.Value == "-" || peeked.Value == "!" { | ||
op := lex.Next().Value | ||
atom, err := parseExprAtom(lex) | ||
if err != nil { | ||
return nil, err | ||
} | ||
lhs = ExprUnary{op, atom} | ||
} else { | ||
atom, err := parseExprAtom(lex) | ||
if err != nil { | ||
return nil, err | ||
} | ||
lhs = atom | ||
} | ||
|
||
for { | ||
peek := lex.Peek() | ||
prec, isOp := operatorPrecs[peek.Value] | ||
if !isOp || prec.Left < minPrec { | ||
break | ||
} | ||
op := lex.Next().Value | ||
rhs, err := parseExprPrec(lex, prec.Right) | ||
if err != nil { | ||
return nil, err | ||
} | ||
lhs = ExprBinary{lhs, op, rhs} | ||
} | ||
return lhs, nil | ||
} | ||
|
||
type Expression struct { | ||
X Expr `@@` | ||
} | ||
|
||
var parser = participle.MustBuild(&Expression{}, participle.ParseTypeWith(parseExprAny)) | ||
|
||
func main() { | ||
var cli struct { | ||
Expr []string `arg required help:"Expression to parse."` | ||
} | ||
ctx := kong.Parse(&cli) | ||
|
||
expr := &Expression{} | ||
err := parser.ParseString("", strings.Join(cli.Expr, " "), expr) | ||
ctx.FatalIfErrorf(err) | ||
|
||
repr.Println(expr) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,61 @@ | ||
package main | ||
|
||
import ( | ||
"testing" | ||
|
||
require "github.com/alecthomas/assert/v2" | ||
) | ||
|
||
func TestCustomExprParser(t *testing.T) { | ||
type testCase struct { | ||
src string | ||
expected Expr | ||
} | ||
|
||
for _, c := range []testCase{ | ||
{`1`, ExprNumber{1}}, | ||
{`1.5`, ExprNumber{1.5}}, | ||
{`"a"`, ExprString{"a"}}, | ||
{`(1)`, ExprParens{ExprNumber{1}}}, | ||
{`1+1`, ExprBinary{ExprNumber{1}, "+", ExprNumber{1}}}, | ||
{`1-1`, ExprBinary{ExprNumber{1}, "-", ExprNumber{1}}}, | ||
{`1*1`, ExprBinary{ExprNumber{1}, "*", ExprNumber{1}}}, | ||
{`1/1`, ExprBinary{ExprNumber{1}, "/", ExprNumber{1}}}, | ||
{`1%1`, ExprBinary{ExprNumber{1}, "%", ExprNumber{1}}}, | ||
{`a - -b`, ExprBinary{ExprIdent{"a"}, "-", ExprUnary{"-", ExprIdent{"b"}}}}, | ||
{ | ||
`a + b - c * d / e % f`, | ||
ExprBinary{ | ||
ExprIdent{"a"}, "+", ExprBinary{ | ||
ExprIdent{"b"}, "-", ExprBinary{ | ||
ExprIdent{"c"}, "*", ExprBinary{ | ||
ExprIdent{"d"}, "/", ExprBinary{ | ||
ExprIdent{"e"}, "%", ExprIdent{"f"}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
}, | ||
{ | ||
`a * b + c * d`, | ||
ExprBinary{ | ||
ExprBinary{ExprIdent{"a"}, "*", ExprIdent{"b"}}, | ||
"+", | ||
ExprBinary{ExprIdent{"c"}, "*", ExprIdent{"d"}}, | ||
}, | ||
}, | ||
{ | ||
`(a + b) * (c + d)`, | ||
ExprBinary{ | ||
ExprParens{ExprBinary{ExprIdent{"a"}, "+", ExprIdent{"b"}}}, | ||
"*", | ||
ExprParens{ExprBinary{ExprIdent{"c"}, "+", ExprIdent{"d"}}}, | ||
}, | ||
}, | ||
} { | ||
var actual Expression | ||
require.NoError(t, parser.ParseString("", c.src, &actual)) | ||
require.Equal(t, c.expected, actual.X) | ||
} | ||
} |
Oops, something went wrong.