Skip to content

GO API To Help You Create Hand-Written Parsers - See My 'go_lexer' Project for the Lexer API

License

Notifications You must be signed in to change notification settings

iNamik/go_parser

Folders and files

NameName
Last commit message
Last commit date

Latest commit

 

History

10 Commits
 
 
 
 
 
 
 
 
 
 
 
 
 
 

Repository files navigation

go_parser

Parser API in Go

ABOUT

The 'go_parser' package is an API to help you create hand-written lexers and parsers.

The package was inspired by Rob Pikes' video Lexical Scanning In Go and golang's 'template' package.

PARSER INTERFACE

Below is the interface for the main Parser type:

// Parser helps you process lexer tokens
type Parser interface {

	// PeekTokenType allows you to look ahead at tokens without consuming them
	PeekTokenType(int) lexer.TokenType

	// PeekToken allows you to look ahead at tokens without consuming them
	PeekToken(int) *lexer.Token

	// NextToken consumes and returns the next token
	NextToken() *lexer.Token

	// SkipToken consumes the next token without returning it
	SkipToken()

	// SkipTokens consumes the next n tokens without returning them
	SkipTokens(int)

	// BackupToken un-consumes the last token
	BackupToken()

	// BackupTokens un-consumes the last n tokens
	BackupTokens(int)

	// ClearTokens clears all consumed tokens
	ClearTokens()

	// Emit emits an object, consuming matched tokens
	Emit(interface{})

	EOF() bool

	// Next retrieves the next emitted item
	Next() interface{}

	// Marker returns a marker that you can use to reset the parser state later
	Marker() *Marker

	// Reset resets the lexer state to the specified marker
	Reset(*Marker)
}

EXAMPLE

Below is a sample calculator program that uses the parser (and lexer) API:

//
//	calc.go implements a simple calculator using the iNamik lexer and parser api.
//
//	Input is read from STDIN
//
//	The input expression is matched against the following pattern:
//
//	input_exp:
//	( id '=' )? general_exp
//	general_exp:
//		operand ( operator operand )?
//	operand:
//		number | id | '(' general_exp ')'
//	operator:
//		'+' | '-' | '*' | '/'
//	number:
//		digit+ ( '.' digit+ )?
//	digit:
//		['0'..'9']
//	id:
//		alpha ( alpha | digit )*
//	alpha:
//		['a'..'z'] | ['A'..'Z']
//
//	Precedence is as expected, with '*' and '/' have higher precedence
//	than '+' and '-', as follows:
//
//	1 + 2 * 3 - 4 / 5  ==  1 + (2 * 3) - (4 / 5)
//

package main

import (
	"bufio"
	"bytes"
	"fmt"
	"os"
	"strconv"
	"strings"
)
import (
	"github.com/iNamik/go_lexer"
	"github.com/iNamik/go_lexer/rangeutil"
	"github.com/iNamik/go_parser"
)

// We define our lexer tokens starting from the pre-defined EOF token
const (
	T_EOF lexer.TokenType = lexer.TokenTypeEOF
	T_NIL                 = lexer.TokenTypeEOF + iota
	T_ID
	T_NUMBER
	T_PLUS
	T_MINUS
	T_MULTIPLY
	T_DIVIDE
	T_EQUALS
	T_OPEN_PAREN
	T_CLOSE_PAREN
)

// To store variables
var vars = map[string]float64{}

// Single-character tokens
var singleChars = []byte{'+', '-', '*', '/', '=', '(', ')'}

var singleTokens = []lexer.TokenType{T_PLUS, T_MINUS, T_MULTIPLY, T_DIVIDE, T_EQUALS, T_OPEN_PAREN, T_CLOSE_PAREN}

// Multi-character tokens
var bytesWhitespace = []byte{' ', '\t'}

var bytesDigits = rangeutil.RangeToBytes("0-9")

var bytesAlpha = rangeutil.RangeToBytes("a-zA-Z")

var bytesAlphaNum = rangeutil.RangeToBytes("0-9a-zA-Z")

// main
func main() {
	// Create a buffered reader from STDIN
	stdin := bufio.NewReader(os.Stdin)

	for {
		// Read a line of input
		input, _, err := stdin.ReadLine()

		// Error? we're done
		if nil != err {
			break
		}

		// Anything to process?
		if len(input) > 0 {
			// Create a new lexer to turn the input text into tokens
			l := lexer.New(lex, strings.NewReader(string(input)), len(input), 2)

			// Create a new parser that feeds off the lexer and generates expression values
			p := parser.New(parse, l, 2)

			// Loop over parser emits
			for i := p.Next(); nil != i; i = p.Next() {
				fmt.Printf("%v\n", i)
			}
		}
	}
}

// lex is the starting (and only) StateFn for lexing the input into tokens
func lex(l lexer.Lexer) lexer.StateFn {

	// EOF
	if l.MatchEOF() {
		l.EmitEOF()
		return nil // We're done here
	}

	// Single-char token?
	if i := bytes.IndexRune(singleChars, l.PeekRune(0)); i >= 0 {
		l.NextRune()
		l.EmitToken(singleTokens[i])
		return lex
	}

	switch {

	// Skip whitespace
	case l.MatchOneOrMoreBytes(bytesWhitespace):
		l.IgnoreToken()

	// Number
	case l.MatchOneOrMoreBytes(bytesDigits):
		if l.PeekRune(0) == '.' {
			l.NextRune() // skip '.'
			if !l.MatchOneOrMoreBytes(bytesDigits) {
				printError(l.Column(), "Illegal number format - Missing digits after '.'")
				l.IgnoreToken()
				break
			}
		}
		l.EmitTokenWithBytes(T_NUMBER)

	// ID
	case l.MatchOneBytes(bytesAlpha) && l.MatchZeroOrMoreBytes(bytesAlphaNum):
		l.EmitTokenWithBytes(T_ID)

	// Unknown
	default:
		l.NextRune()
		printError(l.Column(), "Unknown Character")
		l.IgnoreToken()
	}

	// See you again soon!
	return lex
}

// parse tries to execute a general expression from the lexed tokens.
// Returns nil - We only take one pass at the input string
func parse(p parser.Parser) parser.StateFn {

	if p.PeekTokenType(0) != T_EOF {
		// Assignment ( id = general_expression )
		if p.PeekTokenType(0) == T_ID && p.PeekTokenType(1) == T_EQUALS {
			tId := p.NextToken()

			p.SkipToken() // skip '='

			val, ok := pGeneralExpression(p)

			if ok {
				t := p.NextToken()
				if t.Type() != T_EOF {
					printError(t.Column(), "Expecting operator")
				} else {
					id := string(tId.Bytes())
					vars[id] = val
				}
			}
			// General expression
		} else {
			val, ok := pGeneralExpression(p)

			if ok {
				t := p.NextToken()
				if t.Type() != T_EOF {
					printError(t.Column(), "Expecting operator")
				} else {
					p.Emit(val)
				}
			}
		}
	}

	p.ClearTokens()
	p.Emit(nil) // We're done - One pass only

	return nil
}

// pGeneralExpression is the starting point for parsing a General Expression.
// It is basically a pass-through to pAdditiveExpression, but it feels cleaner
func pGeneralExpression(p parser.Parser) (f float64, ok bool) {
	return pAdditiveExpression(p)
}

// pAdditiveExpression parses [ expression ( ( '+' | '-' ) expression )? ]
func pAdditiveExpression(p parser.Parser) (f float64, ok bool) {

	f, ok = pMultiplicitiveExpression(p)

	if ok {
		t := p.NextToken()
		switch t.Type() {

		// Add (+)
		case T_PLUS:
			r, ok := pAdditiveExpression(p)
			if ok {
				f += r
			}

		// Subtract (-)
		case T_MINUS:
			r, ok := pAdditiveExpression(p)
			if ok {
				f -= r
			}

		// Unknown - Send it back upstream
		default:
			p.BackupToken()
			ok = true
		}
	}

	return
}

// pMultiplicitiveExpression parses [ expression ( ( '*' | '/' ) expression )? ]
func pMultiplicitiveExpression(p parser.Parser) (f float64, ok bool) {
	f, ok = pOperand(p)

	if ok {
		t := p.NextToken()
		switch t.Type() {

		// Multiply (*)
		case T_MULTIPLY:
			r, ok := pMultiplicitiveExpression(p)
			if ok {
				f *= r
			}

		// Divide (/)
		case T_DIVIDE:
			r, ok := pMultiplicitiveExpression(p)
			if ok {
				f /= r
			}

		// Unknown - Send it back upstream
		default:
			p.BackupToken()
			ok = true
		}
	}

	return
}

// pOperand parses [ id | number | '(' expression ')' ]
func pOperand(p parser.Parser) (f float64, ok bool) {
	var err error

	m := p.Marker()
	t := p.NextToken()

	switch t.Type() {

	// ID
	case T_ID:
		var id = string(t.Bytes())
		f, ok = vars[id]
		if !ok {
			printError(t.Column(), fmt.Sprint("id '", id, "' not defined"))
			f = 0.0
		}

	// Number
	case T_NUMBER:
		f, err = strconv.ParseFloat(string(t.Bytes()), 64)
		ok = nil == err
		if !ok {
			printError(t.Column(), fmt.Sprint("Error reading number: ", err.Error()))
			f = 0.0
		}

	// '(' Expresson ')'
	case T_OPEN_PAREN:
		f, ok = pGeneralExpression(p)
		if ok {
			t2 := p.NextToken()
			if t2.Type() != T_CLOSE_PAREN {
				printError(t.Column(), "Unbalanced Paren")
				ok = false
				f = 0.0
			}
		}

	// EOF
	case T_EOF:
		printError(t.Column(), "Unexpected EOF - Expecting operand")
		ok = false
		f = 0.0

	// Unknown
	default:
		printError(t.Column(), "Expecting operand")
		ok = false
		f = 0.0
	}

	if !ok {
		p.Reset(m)
	}

	return
}

// printError prints an error msg pointing to the specified column of the input.
func printError(col int, msg string) {
	fmt.Print(strings.Repeat(" ", col-1), "^ ", msg, "\n")
}

INSTALL

The package is built using the Go tool. Assuming you have correctly set the $GOPATH variable, you can run the folloing command:

go get github.com/iNamik/go_parser

DEPENDENCIES

go_parser depends on the iNamik go_container queue package and the iNamik go_lexer package:

AUTHORS

  • David Farrell

About

GO API To Help You Create Hand-Written Parsers - See My 'go_lexer' Project for the Lexer API

Resources

License

Stars

Watchers

Forks

Releases

No releases published

Packages

No packages published

Languages