Skip to content

Commit

Permalink
add gherkin lexer (alecthomas#354)
Browse files Browse the repository at this point in the history
  • Loading branch information
micbar committed May 1, 2020
1 parent de4a06f commit 3bd9228
Show file tree
Hide file tree
Showing 3 changed files with 452 additions and 0 deletions.
118 changes: 118 additions & 0 deletions lexers/g/gherkin.go
@@ -0,0 +1,118 @@
package g

import (
. "github.com/alecthomas/chroma" // nolint
"github.com/alecthomas/chroma/lexers/internal"
)

var stepKeywords = `^(\s*)(하지만|조건|먼저|만일|만약|단|그리고|그러면|那麼|那么|而且|當|当|前提|假設|假设|假如|假定|但是|但し|並且|并且|同時|同时|もし|ならば|ただし|しかし|かつ|و |متى |لكن |عندما |ثم |بفرض |اذاً |כאשר |וגם |בהינתן |אזי |אז |אבל |Якщо |Унда |То |Припустимо, що |Припустимо |Онда |Но |Нехай |Лекин |Когато |Када |Кад |К тому же |И |Задато |Задати |Задате |Если |Допустим |Дадено |Ва |Бирок |Аммо |Али |Але |Агар |А |І |Și |És |Zatati |Zakładając |Zadato |Zadate |Zadano |Zadani |Zadan |Youse know when youse got |Youse know like when |Yna |Ya know how |Ya gotta |Y |Wun |Wtedy |When y'all |When |Wenn |WEN |Và |Ve |Und |Un |Thì |Then y'all |Then |Tapi |Tak |Tada |Tad |Så |Stel |Soit |Siis |Si |Sed |Se |Quando |Quand |Quan |Pryd |Pokud |Pokiaľ |Però |Pero |Pak |Oraz |Onda |Ond |Oletetaan |Og |Och |O zaman |Når |När |Niin |Nhưng |N |Mutta |Men |Mas |Maka |Majd |Mais |Maar |Ma |Lorsque |Lorsqu'|Kun |Kuid |Kui |Khi |Keď |Ketika |Když |Kaj |Kai |Kada |Kad |Jeżeli |Ja |Ir |I CAN HAZ |I |Ha |Givun |Givet |Given y'all |Given |Gitt |Gegeven |Gegeben sei |Fakat |Eğer ki |Etant donné |Et |Então |Entonces |Entao |En |Eeldades |E |Duota |Dun |Donitaĵo |Donat |Donada |Do |Diyelim ki |Dengan |Den youse gotta |De |Dato |Dar |Dann |Dan |Dado |Dacă |Daca |DEN |Când |Cuando |Cho |Cept |Cand |Cal |But y'all |But |Buh |Biết |Bet |BUT |Atès |Atunci |Atesa |Anrhegedig a |Angenommen |And y'all |And |An |Ama |Als |Alors |Allora |Ali |Aleshores |Ale |Akkor |Aber |AN |A také |A |\* )`

var featureKeywords = `^(기능|機能|功能|フィーチャ|خاصية|תכונה|Функціонал|Функционалност|Функционал|Фича|Особина|Могућност|Özellik|Właściwość|Tính năng|Trajto|Savybė|Požiadavka|Požadavek|Osobina|Ominaisuus|Omadus|OH HAI|Mogućnost|Mogucnost|Jellemző|Fīča|Funzionalità|Funktionalität|Funkcionalnost|Funkcionalitāte|Funcționalitate|Functionaliteit|Functionalitate|Funcionalitat|Funcionalidade|Fonctionnalité|Fitur|Feature|Egenskap|Egenskab|Crikey|Característica|Arwedd)(:)(.*)$`

var featureElementKeywords = `^(\s*)(시나리오 개요|시나리오|배경|背景|場景大綱|場景|场景大纲|场景|劇本大綱|劇本|剧本大纲|剧本|テンプレ|シナリオテンプレート|シナリオテンプレ|シナリオアウトライン|シナリオ|سيناريو مخطط|سيناريو|الخلفية|תרחיש|תבנית תרחיש|רקע|Тарих|Сценарій|Сценарио|Сценарий структураси|Сценарий|Структура сценарію|Структура сценарија|Структура сценария|Скица|Рамка на сценарий|Пример|Предыстория|Предистория|Позадина|Передумова|Основа|Концепт|Контекст|Założenia|Wharrimean is|Tình huống|The thing of it is|Tausta|Taust|Tapausaihio|Tapaus|Szenariogrundriss|Szenario|Szablon scenariusza|Stsenaarium|Struktura scenarija|Skica|Skenario konsep|Skenario|Situācija|Senaryo taslağı|Senaryo|Scénář|Scénario|Schema dello scenario|Scenārijs pēc parauga|Scenārijs|Scenár|Scenaro|Scenariusz|Scenariul de şablon|Scenariul de sablon|Scenariu|Scenario Outline|Scenario Amlinellol|Scenario|Scenarijus|Scenarijaus šablonas|Scenarij|Scenarie|Rerefons|Raamstsenaarium|Primer|Pozadí|Pozadina|Pozadie|Plan du scénario|Plan du Scénario|Osnova scénáře|Osnova|Náčrt Scénáře|Náčrt Scenáru|Mate|MISHUN SRSLY|MISHUN|Kịch bản|Konturo de la scenaro|Kontext|Konteksts|Kontekstas|Kontekst|Koncept|Khung tình huống|Khung kịch bản|Háttér|Grundlage|Geçmiş|Forgatókönyv vázlat|Forgatókönyv|Fono|Esquema do Cenário|Esquema do Cenario|Esquema del escenario|Esquema de l'escenari|Escenario|Escenari|Dis is what went down|Dasar|Contexto|Contexte|Contesto|Condiţii|Conditii|Cenário|Cenario|Cefndir|Bối cảnh|Blokes|Bakgrunn|Bakgrund|Baggrund|Background|B4|Antecedents|Antecedentes|All y'all|Achtergrond|Abstrakt Scenario|Abstract Scenario)(:)(.*)$`

var examplesKeywords = `^(\s*)(예|例子|例|サンプル|امثلة|דוגמאות|Сценарији|Примери|Приклади|Мисоллар|Значения|Örnekler|Voorbeelden|Variantai|Tapaukset|Scenarios|Scenariji|Scenarijai|Příklady|Példák|Príklady|Przykłady|Primjeri|Primeri|Piemēri|Pavyzdžiai|Paraugs|Juhtumid|Exemplos|Exemples|Exemplele|Exempel|Examples|Esempi|Enghreifftiau|Ekzemploj|Eksempler|Ejemplos|EXAMPLZ|Dữ liệu|Contoh|Cobber|Beispiele)(:)(.*)$`

// Gherkin lexer.
var Gherkin = internal.Register(MustNewLexer(
&Config{
Name: "Gherkin",
Aliases: []string{"cucumber", "Cucumber", "gherkin", "Gherkin"},
Filenames: []string{"*.feature", "*.FEATURE"},
MimeTypes: []string{"text/x-gherkin"},
},
Rules{
"comments": {
{`\s*#.*$`, Comment, nil},
},
"featureElements": {
{stepKeywords, Keyword, Push("stepContentStack")},
Include("comments"),
{`(\s|.)`, NameFunction, nil},
},
"featureElementsOnStack": {
{stepKeywords, Keyword, Pop(2)},
Include("comments"),
{`(\s|.)`, NameFunction, nil},
},
"examplesTable": {
{`\s+\|`, Keyword, Push("examplesTableHeader")},
Include("comments"),
{`(\s|.)`, NameFunction, nil},
},
"examplesTableHeader": {
{`\s+\|\s*$`, Keyword, Pop(2)},
Include("comments"),
{`\\\|`, NameVariable, nil},
{`\s*\|`, Keyword, nil},
{`[^|]`, NameVariable, nil},
},
"scenarioSectionsOnStack": {
{featureElementKeywords, ByGroups(NameFunction, Keyword, Keyword, NameFunction), Push("featureElementsOnStack")},
},
"narrative": {
Include("scenarioSectionsOnStack"),
{`(\s|.)`, NameFunction, nil},
},
"tableVars": {
{`(<[^>]+>)`, NameVariable, nil},
},
"numbers": {
{`(\d+\.?\d*|\d*\.\d+)([eE][+-]?[0-9]+)?`, LiteralString, nil},
},
"string": {
Include("tableVars"),
{`(\s|.)`, LiteralString, nil},
},
"pyString": {
{`"""`, Keyword, Pop(1)},
Include("string"),
},
"stepContentRoot": {
{`$`, Keyword, Pop(1)},
Include("stepContent"),
},
"stepContentStack": {
{`$`, Keyword, Pop(2)},
Include("stepContent"),
},
"stepContent": {
{`"`, NameFunction, Push("doubleString")},
Include("tableVars"),
Include("numbers"),
Include("comments"),
{`(\s|.)`, NameFunction, nil},
},
"tableContent": {
{`\s+\|\s*$`, Keyword, Pop(1)},
Include("comments"),
{`\\\|`, LiteralString, nil},
{`\s*\|`, Keyword, nil},
{`"`, LiteralString, Push("doubleStringTable")},
Include("string"),
},
"doubleString": {
{`"`, NameFunction, Pop(1)},
Include("string"),
},
"doubleStringTable": {
{`"`, LiteralString, Pop(1)},
Include("string"),
},
"root": {
{`\n`, NameFunction, nil},
Include("comments"),
{`"""`, Keyword, Push("pyString")},
{`\s+\|`, Keyword, Push("tableContent")},
{`"`, NameFunction, Push("doubleString")},
Include("tableVars"),
Include("numbers"),
{`(\s*)(@[^@\r\n\t ]+)`, ByGroups(NameFunction, NameTag), nil},
{stepKeywords, ByGroups(NameFunction, Keyword), Push("stepContentRoot")},
{featureKeywords, ByGroups(Keyword, Keyword, NameFunction), Push("narrative")},
{featureElementKeywords, ByGroups(NameFunction, Keyword, Keyword, NameFunction), Push("featureElements")},
{examplesKeywords, ByGroups(NameFunction, Keyword, Keyword, NameFunction), Push("examplesTable")},
{`(\s|.)`, NameFunction, nil},
},
},
))
88 changes: 88 additions & 0 deletions lexers/testdata/gherkin.actual
@@ -0,0 +1,88 @@
@foo34 @bar # some comment
Feature: proof of concept

In order to read Cucumber features faster and eaiser on Github
As a Cucumber user
I want to have syntax highlighting for Gherkin
This is part of the narrative, even if I have the # symbol, it is still the narrative- not a comment

Background: this is multi-line description
do you like it?
Given cheese is good

Scenario Outline: this is a test
this the second line of the desctiption
Given I have a <var1> and some "string"
And the following table and some 'string'
| header 1 | header 2 | # here is a comment after a step table header
| cell 1-1 | cell 1-2 | # here is a comment after a step table row
| cell 2-1 | "cell 2-2" |

When I do <var2>
And use apostrophes then the feature's syntax still looks good
And here is a string with single and double quotes- "i'll be back"
And here is a string with the opposite 'the quote is "Foo"'
And what about a var in a quote like so: "<var2>"
And what about a multiline string with a var in it like so:
"""
Hello!
I am a multiple line string, often called a pystring.
I can have regular "double quotes" without a problem...
I can also sub in vars as well like: <var1> and <var2>
"""

Then I should see something...
But not something else...

Examples:
| var1 | var2 | # i am a comment
| foo | bar | # so am i
| dog | food |

Scenarios: some other examples with a description
and guess waht?!? I can have multilines as well! # look at me, I'm a comment
Who would have thunk?
| var1 | var2 |
| foo | bar |
| dog | food |

@some_tag @another_tag
Scenario: more examples
will follow after this multi-line description
Given some context# this is an inline comment
# This is a comment
# So is this with no space at front...

Funktionalität: Addition
Um dumme Fehler zu vermeiden
möchte ich als Matheidiot
die Summe zweier Zahlen gesagt bekommen

Szenariogrundriss: Zwei Zahlen hinzufügen
Gegeben sei ich habe <Eingabe_1> in den Taschenrechner eingegeben
Und ich habe <Eingabe_2> in den Taschenrechner eingegeben
Wenn ich <Knopf> drücke
Dann sollte das Ergebniss auf dem Bildschirm <Ausgabe> sein

Beispiele:
| Eingabe_1 | Eingabe_2 | Knopf | Ausgabe |
| 20 | 30 | add | 50 |
| 2 | 5 | add | 7 |
| 0 | 40 | add | 40 |

Feature: Addition
In order to avoid silly mistakes
As a math idiot
I want to be told the sum of two numbers

Scenario Outline: Add two numbers
Given I have entered <input_1> into the calculator
* I have entered <input_2> into the calculator
When I press <button>
Then the result should be <output> on the screen

Examples:
| input_1 | input_2 | button | output |
| 20 | 30 | add | 50 |
| 2 | 5 | add | 7 |
| 0 | 40 | add | 40 |

0 comments on commit 3bd9228

Please sign in to comment.