Skip to content

Commit

Permalink
Implement syntax highlighting for p4
Browse files Browse the repository at this point in the history
Add "p4" syntax language (https://p4.org/)
  • Loading branch information
wedaly committed Jan 9, 2024
1 parent 06f072b commit 94672f5
Show file tree
Hide file tree
Showing 6 changed files with 426 additions and 85 deletions.
25 changes: 13 additions & 12 deletions docs/config-reference.md
Original file line number Diff line number Diff line change
Expand Up @@ -23,23 +23,24 @@ Syntax Languages

| Value | Description |
|--------------|------------------------------------------------------------------------------------------|
| plaintext | Do not apply any syntax highlighting. |
| json | [JSON](https://www.json.org/json-en.html) |
| yaml | [YAML](https://yaml.org/spec/) |
| go | [Go](https://golang.org/ref/spec) |
| python | [Python](https://docs.python.org/3/reference/) |
| rust | [Rust](https://doc.rust-lang.org/stable/reference/) |
| c | [C](http://www.gnu.org/software/gnu-c-manual/gnu-c-manual.html) |
| bash | [bash](https://www.gnu.org/software/bash/manual/bash.html) |
| makefile | [Makefile](https://www.gnu.org/software/make/manual/make.html) |
| xml | [xml](https://www.w3.org/TR/2006/REC-xml11-20060816/) |
| markdown | [Markdown](https://commonmark.org/) |
| c | [C](http://www.gnu.org/software/gnu-c-manual/gnu-c-manual.html) |
| criticmarkup | [CriticMarkup](https://github.com/CriticMarkup/CriticMarkup-toolkit) |
| protobuf | [Protocol Buffers Version 3](https://developers.google.com/protocol-buffers/docs/proto3) |
| todotxt | [todo.txt](https://github.com/todotxt/todo.txt) |
| gitcommit | Format for editing a git commit |
| gitrebase | Format for git interactive rebase |
| go | [Go](https://golang.org/ref/spec) |
| gotemplate | [Go template](https://pkg.go.dev/text/template) |
| json | [JSON](https://www.json.org/json-en.html) |
| makefile | [Makefile](https://www.gnu.org/software/make/manual/make.html) |
| markdown | [Markdown](https://commonmark.org/) |
| p4 | [p4](https://p4.org) |
| plaintext | Do not apply any syntax highlighting. |
| protobuf | [Protocol Buffers Version 3](https://developers.google.com/protocol-buffers/docs/proto3) |
| python | [Python](https://docs.python.org/3/reference/) |
| rust | [Rust](https://doc.rust-lang.org/stable/reference/) |
| todotxt | [todo.txt](https://github.com/todotxt/todo.txt) |
| xml | [xml](https://www.w3.org/TR/2006/REC-xml11-20060816/) |
| yaml | [YAML](https://yaml.org/spec/) |

Menu Command Object
-------------------
Expand Down
77 changes: 4 additions & 73 deletions syntax/languages/c.go
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
package languages

import (
"io"
"unicode"

"github.com/aretext/aretext/syntax/parser"
Expand Down Expand Up @@ -37,79 +36,11 @@ func cCommentParseFunc() parser.Func {
}

func cPreprocessorDirective() parser.Func {
// Consume leading '#' with optional whitespace after.
consumeStartOfDirective := func(iter parser.TrackingRuneIter, state parser.State) parser.Result {
var numConsumed uint64
var sawHashmark bool
for {
r, err := iter.NextRune()
if err == io.EOF {
break
} else if err != nil {
return parser.FailedResult
}

if r == '#' && !sawHashmark {
sawHashmark = true
numConsumed++
} else if sawHashmark && (r == ' ' || r == '\t') {
numConsumed++
} else {
break
}
}

if !sawHashmark {
return parser.FailedResult
}

return parser.Result{
NumConsumed: numConsumed,
NextState: state,
}
directives := []string{
"include", "pragma", "ifndef", "define", "error", "undef",
"endif", "ifdef", "elif", "else", "if",
}

// Consume to the end of line or EOF, unless the line ends with a backslash.
consumeToEndOfDirective := func(iter parser.TrackingRuneIter, state parser.State) parser.Result {
var numConsumed uint64
var lastWasBackslash bool
for {
r, err := iter.NextRune()
if err == io.EOF {
break
} else if err != nil {
return parser.FailedResult
}

numConsumed++

if r == '\n' && !lastWasBackslash {
break
}
lastWasBackslash = (r == '\\')
}
return parser.Result{
NumConsumed: numConsumed,
NextState: state,
}
}

return parser.Func(consumeStartOfDirective).
Then(consumeString("include").
Or(consumeString("pragma")).
Or(consumeString("ifndef")).
Or(consumeString("define")).
Or(consumeString("error")).
Or(consumeString("undef")).
Or(consumeString("endif")).
Or(consumeString("ifdef")).
Or(consumeString("elif")).
Or(consumeString("else")).
Or(consumeString("if"))).
ThenNot(consumeSingleRuneLike(func(r rune) bool {
return !unicode.IsSpace(r) // must be followed by space, newline, or EOF
})).
ThenMaybe(consumeToEndOfDirective).
return consumeCStylePreprocessorDirective(directives).
Map(recognizeToken(cTokenRolePreprocessorDirective))
}

Expand Down
69 changes: 69 additions & 0 deletions syntax/languages/helpers.go
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import (
"io"
"sort"
"strings"
"unicode"
"unicode/utf8"

"github.com/aretext/aretext/syntax/parser"
Expand Down Expand Up @@ -367,3 +368,71 @@ func parseCStyleString(quoteRune rune, allowLineBreaks bool) parser.Func {
return consumeCStyleString(quoteRune, allowLineBreaks).
Map(recognizeToken(parser.TokenRoleString))
}

// consumeCStylePreprocessorDirective parses a preprocessor directive (like "#include")
func consumeCStylePreprocessorDirective(directives []string) parser.Func {
// Consume leading '#' with optional whitespace after.
consumeStartOfDirective := func(iter parser.TrackingRuneIter, state parser.State) parser.Result {
var numConsumed uint64
var sawHashmark bool
for {
r, err := iter.NextRune()
if err == io.EOF {
break
} else if err != nil {
return parser.FailedResult
}

if r == '#' && !sawHashmark {
sawHashmark = true
numConsumed++
} else if sawHashmark && (r == ' ' || r == '\t') {
numConsumed++
} else {
break
}
}

if !sawHashmark {
return parser.FailedResult
}

return parser.Result{
NumConsumed: numConsumed,
NextState: state,
}
}

// Consume to the end of line or EOF, unless the line ends with a backslash.
consumeToEndOfDirective := func(iter parser.TrackingRuneIter, state parser.State) parser.Result {
var numConsumed uint64
var lastWasBackslash bool
for {
r, err := iter.NextRune()
if err == io.EOF {
break
} else if err != nil {
return parser.FailedResult
}

numConsumed++

if r == '\n' && !lastWasBackslash {
break
}
lastWasBackslash = (r == '\\')
}
return parser.Result{
NumConsumed: numConsumed,
NextState: state,
}
}

return parser.Func(consumeStartOfDirective).
Then(consumeLongestMatchingOption(directives)).
ThenNot(consumeSingleRuneLike(func(r rune) bool {
return !unicode.IsSpace(r) // must be followed by space, newline, or EOF
})).
ThenMaybe(consumeToEndOfDirective).
Map(recognizeToken(cTokenRolePreprocessorDirective))
}
163 changes: 163 additions & 0 deletions syntax/languages/p4.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,163 @@
package languages

import "github.com/aretext/aretext/syntax/parser"

const (
p4TokenRolePreprocessorDirective = parser.TokenRoleCustom1
p4TokenRoleAnnotation = parser.TokenRoleCustom2
)

// P4ParseFunc returns a parse func for P4-16.
// See https://p4.org/p4-spec/docs/P4-16-v1.0.0-spec.html for the spec.
// See also p4.json for syntax highlighting rules:
// https://github.com/p4lang/p4-spec/blob/c84896fcd87f940983648b185ef9acf2b6f14838/p4-16/spec/p4.json
func P4ParseFunc() parser.Func {
return p4CommentParseFunc().
Or(p4PreprocessorDirectiveParseFunc()).
Or(p4AnnotationParseFunc()).
Or(p4IdentifierOrKeywordParseFunc()).
Or(p4OperatorParseFunc()).
Or(p4StringParseFunc()).
Or(p4NumberParseFunc())
}

func p4CommentParseFunc() parser.Func {
consumeLineComment := consumeString("//").
ThenMaybe(consumeToNextLineFeed)

consumeBlockComment := consumeString("/*").
Then(consumeToString("*/"))

return consumeLineComment.
Or(consumeBlockComment).
Map(recognizeToken(parser.TokenRoleComment))
}

func p4PreprocessorDirectiveParseFunc() parser.Func {
directives := []string{
"include", "if", "endif", "ifdef",
"define", "ifndef", "undef", "line",
}
return consumeCStylePreprocessorDirective(directives).
Map(recognizeToken(p4TokenRolePreprocessorDirective))
}

func p4AnnotationParseFunc() parser.Func {
annotations := []string{
"atomic", "defaultonly", "deprecated", "name", "noSideEffects", "noWarn",
"optional", "priority", "pure", "tableonly", "hidden", "globalname",
}
return consumeString("@").
Then(consumeLongestMatchingOption(annotations)).
Map(recognizeToken(p4TokenRoleAnnotation))
}

func p4IdentifierOrKeywordParseFunc() parser.Func {
isIdStart := func(r rune) bool {
return (r >= 'a' && r <= 'z') || (r >= 'A' && r <= 'Z') || r == '_' || r == '$'
}

isIdContinue := func(r rune) bool {
return isIdStart(r) || (r >= '0' && r <= '9')
}

keywords := []string{
"abstract", "action", "apply", "control", "default", "else",
"extern", "exit", "false", "if",
"package", "parser", "return", "select", "state", "switch",
"table", "this", "transition", "true", "type", "typedef", "value_set", "verify",
"bool", "bit", "const", "enum", "entries", "error", "header", "header_union",
"in", "inout", "int", "list", "match_kind", "out", "string", "tuple", "struct", "varbit", "void",
}

return consumeSingleRuneLike(isIdStart).
ThenMaybe(consumeRunesLike(isIdContinue)).
MapWithInput(recognizeKeywordOrConsume(keywords))
}

func p4OperatorParseFunc() parser.Func {
return consumeLongestMatchingOption([]string{
"=", ">", "<", "!", "~", "?", ":",
"==", "<=", ">=", "!=", "&&", "||", "++",
"+", "-", "*", "/", "&", "|", "^", "%", "<<",
">>", "&&&", "..",
}).Map(recognizeToken(parser.TokenRoleOperator))
}

func p4StringParseFunc() parser.Func {
return parseCStyleString('"', false)
}

func p4NumberParseFunc() parser.Func {
// NOTE: the number regex patterns in the spec's syntax highlighting definition (p4.json)
// differs from the spec itself (P4-16-spec.mdk). Follow the latter here.
consumeDigitsWithUnderscores := func(isDigit func(r rune) bool) parser.Func {
return func(iter parser.TrackingRuneIter, state parser.State) parser.Result {
var numUnderscores, numDigits uint64
for {
r, err := iter.NextRune()
if err != nil {
break
} else if r == '_' {
numUnderscores++
} else if isDigit(r) {
numDigits++
} else {
break
}
}

if numDigits == 0 {
return parser.FailedResult
}

return parser.Result{
NumConsumed: numUnderscores + numDigits,
NextState: state,
}
}
}

isDecimalDigit := func(r rune) bool { return r >= '0' && r <= '9' }
isHexDigit := func(r rune) bool {
return (r >= '0' && r <= '9') || (r >= 'a' && r <= 'f') || (r >= 'A' && r <= 'F')
}
isOctalDigit := func(r rune) bool {
return r >= '0' && r <= '7'
}
isBinaryDigit := func(r rune) bool {
return r == '0' || r == '1'
}

consumeWidthPrefix := consumeRunesLike(isDecimalDigit).
Then(consumeSingleRuneLike(func(r rune) bool { return r == 'w' || r == 's' }))

consumeHex := consumeString("0").
Then(consumeSingleRuneLike(func(r rune) bool { return r == 'x' || r == 'X' })).
Then(consumeDigitsWithUnderscores(isHexDigit))

consumeOctal := consumeString("0").
Then(consumeSingleRuneLike(func(r rune) bool { return r == 'o' || r == 'O' })).
Then(consumeDigitsWithUnderscores(isOctalDigit))

consumeBinary := consumeString("0").
Then(consumeSingleRuneLike(func(r rune) bool { return r == 'b' || r == 'B' })).
Then(consumeDigitsWithUnderscores(isBinaryDigit))

consumeDecimalWithPrefix := consumeString("0").
Then(consumeSingleRuneLike(func(r rune) bool { return r == 'd' || r == 'D' })).
Then(consumeDigitsWithUnderscores(isDecimalDigit))

// Ensure first digit is not an underscore.
consumeDecimalWithoutPrefix := consumeSingleRuneLike(isDecimalDigit).
ThenMaybe(consumeDigitsWithUnderscores(isDecimalDigit))

return consumeWidthPrefix.
MaybeBefore(
consumeHex.
Or(consumeOctal).
Or(consumeBinary).
Or(consumeDecimalWithPrefix).
Or(consumeDecimalWithoutPrefix)).
Map(recognizeToken(parser.TokenRoleNumber))
}

0 comments on commit 94672f5

Please sign in to comment.