Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add ECL lexer #1396

Merged
merged 17 commits into from
Feb 20, 2020
Merged
Show file tree
Hide file tree
Changes from 14 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
18 changes: 18 additions & 0 deletions lib/rouge/demos/ecl
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
/*
Example code - use without restriction.
*/
Layout_Person := RECORD
UNSIGNED1 PersonID;
STRING15 FirstName;
STRING25 LastName;
END;
pyrmont marked this conversation as resolved.
Show resolved Hide resolved

allPeople := DATASET([ {1,'Fred','Smith'},
{2,'Joe','Blow'},
{3,'Jane','Smith'}],Layout_Person);

somePeople := allPeople(LastName = 'Smith');

// Outputs ---
somePeople;

177 changes: 177 additions & 0 deletions lib/rouge/lexers/ecl.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,177 @@
# -*- codding: utf-8 -*- #
# frozen_string_literal: true

module Rouge
module Lexers
class ECL < RegexLexer
tag 'ecl'
filenames '*.ecl'
mimetypes 'application/x-ecl'

title "ECL"
desc "Enterprise Control Language (hpccsystems.com)"

id = /(#?)\b([a-z_][\w]*?)(\d*)\b/i

def self.class_first
@class1 ||= Set.new %w(
file date str math metaphone metaphone3 uni audit blas system
)
end

def self.class_second
@class2 ||= Set.new %w(
debug email job log thorlib util workunit
)
end

def self.functions
@functions ||= Set.new %w(
abs acos aggregate allnodes apply ascii asin asstring atan _token ave
case catch choose choosen choosesets clustersize combine correlation
cos cosh count covariance cron dataset dedup define denormalize
dictionary distribute distributed distribution ebcdic enth error
evaluate event eventextra eventname exists exp failcode failmessage
fetch fromunicode fromxml getenv getisvalid global graph group hash
hashcrc having httpcall httpheader if iff index intformat isvalid
iterate join keyunicode length library limit ln local log loop map
matched matchlength matchposition matchtext matchunicode max merge
mergejoin min nofold nolocal nonempty normalize parse pipe power
preload process project pull random range rank ranked realformat
recordof regexfind regexreplace regroup rejected rollup round roundup
row rowdiff sample set sin sinh sizeof soapcall sort sorted sqrt
stepped stored sum table tan tanh thisnode topn tounicode toxml
transfer transform trim truncate typeof ungroup unicodeorder variance
which workunit xmldecode xmlencode xmltext xmlunicode apply assert
build buildindex evaluate fail keydiff keypatch loadxml nothor notify
output parallel sequential soapcall wait
)
end

def self.keywords
@keywords ||= Set.new %w(
and or in not all any as from atmost before best between case const
counter csv descend encrypt end endmacro enum except exclusive expire
export extend fail few first flat full function functionmacro group
heading hole ifblock import joined keep keyed last left limit load
local locale lookup many maxcount maxlength _token module interface
named nocase noroot noscan nosort of only opt outer overwrite packed
partition penalty physicallength pipe quote record repeat return
right rows scan self separator service shared skew skip sql store
terminator thor threshold token transform trim type unicodeorder
unsorted validate virtual whole wild within xml xpath after cluster
compressed compression default encoding escape fileposition forward
grouped inner internal linkcounted literal lzw mofn multiple
namespace wnotrim noxpath onfail prefetch retry rowset scope smart
soapaction stable timelimit timeout unordered unstable update use
width
)
end

def self.template
@template ||= Set.new %w(
append apply break constant debug declare demangle else elseif end
endregion error expand export exportxml for forall getdatatype if
ifdefined inmodule isdefined isvalid line link loop mangle onwarning
option region set stored text trace uniquename warning webservice
workunit loadxml
)
end

def self.type
@type ||= Set.new %w(
ascii big_endian boolean data decimal ebcdic grouped integer
linkcounted pattern qstring real record rule set of streamed string
token udecimal unicode utf8 unsigned varstring varunicode
)
end

def self.typed
@typed ||= Set.new %w(
data string qstring varstring varunicode unicode utf8
)
end

state :single_quote do
rule %r([xDQUV]?'([^'\\]*(?:\\.[^'\\]*)*)'), Str::Single
rule %r/\\(x\\h{2}|[0-2][0-7]{,2}|3[0-6][0-7]?|37[0-7]?|[4-7][0-7]?|.|$)/, Text
end

state :inline_whitespace do
rule %r/[ \t\r]+/, Text
rule %r/\\\n/, Text # line continuation
rule %r(/[*].*?[*]/)m, Comment::Multiline
end

state :whitespace do
rule %r/\n+/m, Text
rule %r(//.*), Comment::Single
mixin :inline_whitespace
end

state :root do
mixin :whitespace
mixin :single_quote

rule %r(\b(?i:(and|not|or|in))\b), Operator::Word
rule %r([:=|>|<|<>|/|\\|\+|-|=]), Operator
rule %r([\[\]{}();,\&,\.,\%]), Punctuation

rule %r(\b(?i:(beginc\+\+.*?endc\+\+)))m, Str::Single
rule %r(\b(embed)([(])(.*?)([)])(.*?)(endembed))im do |m|
groups Keyword, Punctuation, Name::Other, Punctuation, Name::Other, Keyword
pyrmont marked this conversation as resolved.
Show resolved Hide resolved
end

rule %r(\b(\w+)\.(\w+)\.(\w+)) do |m|
if m[1] == "std" &&
self.class.class_first.include?(m[2]) &&
self.class.class_second.include?(m[3])
token Name::Class
else
token Name::Variable
end
end

rule %r(\b(?i:(u)?decimal)(\d+(_\d+)?)\b), Keyword::Type

rule %r/\d+\.\d+(e[\+\-]?\d+)?/i, Num::Float
rule %r/x[0-9a-f]+/i, Num::Hex

rule %r/0x[0-9a-f]+/i, Num::Hex
rule %r/0[0-9a-f]+x/i, Num::Hex
rule %r(0[bB][01]+), Num::Bin
rule %r([01]+[bB]), Num::Bin
rule %r(\d+), Num::Integer

rule id do |m|
name_only = m[2].downcase
name = name_only + m[3]
number = (m[3] == "") ? nil : m[3].to_i
if m[1] == "#"
if self.class.template.include? name
token Keyword::Type
else
token Error
end
elsif self.class.typed.include?(name_only) && number != nil
token Keyword::Type
elsif self.class.type.include? name
token Keyword::Type
elsif self.class.keywords.include? name
token Keyword
elsif self.class.functions.include? name
token Name::Function
elsif ["integer", "unsigned"].include?(name_only) && (1..8).cover?(number)
token Keyword::Type
elsif name_only == "real" && [4, 8].include?(number)
token Keyword::Type
elsif ["true", "false"].include? name
token Keyword::Constant
else
token Name::Other
end
end
end
end
end
end
26 changes: 26 additions & 0 deletions spec/lexers/ecl_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
# -*- coding: utf-8 -*- #
# frozen_string_literal: true

describe Rouge::Lexers::ECL do
let(:subject) { Rouge::Lexers::ECL.new }

describe 'guessing' do
include Support::Guessing

it 'guesses by filename' do
assert_guess :filename => 'foo.ecl'
end

it 'guesses by mimetype' do
assert_guess :mimetype => 'application/x-ecl'
end
end

describe 'lexing' do
include Support::Lexing

it 'recognizes one-line comments not followed by a newline (#796)' do
assert_tokens_equal '// comment', ['Comment.Single', '// comment']
end
pyrmont marked this conversation as resolved.
Show resolved Hide resolved
end
end
146 changes: 146 additions & 0 deletions spec/visual/samples/ecl
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
// hpccsystems.com

/******************
Multi-line comment
******************/

import std.math.job;
IMPORT Str;

/****************** TYPES ******************/

R1 := RECORD
integer1 F1;
UNSIGNED4 F2;
STRING100 F3;
REAL4 R1;
REAL8 R2;
REAL9 R2; // This should not match a type
boolean B1;
decimal23 D1;
udecimal3 D2;
END;

D1 := dataset('RTTEMP::SomeFile',R1,THOR);
D1 := DATASET('RTTEMP::SomeFile',R1,THOR);

/****************** TEMPLATES ******************/

#append
#APPEND
#inmodule

/****************** NUMBERS AND STRINGS ******************/

ArcCosine := acos(0.55);

DATA2 MyHexString := x'0D0A'; // a 2-byte hexadecimal string
MyDataString := D'abcd'; // same as: (DATA)'abcd'
MyUnicodeString := U'abcd'; // same as: (UNICODE)'abcd'
MyUnicodeString := U'abcd\353'; // becomes 'abcd�'
MyVarString := V'abcd'; // same as: (VARSTRING)'abcd'
MyQString := Q'ABCD'; // same as: (QSTRING)'ABCD'

MyInt1 := 10; // value of MyInt1 is the INTEGER value 10
MyInt2 := 0x0A; // value of MyInt2 is the INTEGER value 10
MyInt3 := 0Ax; // value of MyInt3 is the INTEGER value 10
MyInt4 := 0b1010; // value of MyInt4 is the INTEGER value 10
MyInt5 := 1010b; // value of MyInt5 is the INTEGER value 10
MyReal := 10.0; // value of MyReal is the REAL value 10.0
MyRea2 := 1.0e1; // value of MyReal2 is the REAL value 10.0
MyRea3 := 1.0E1; // value of MyReal3 is the REAL value 10.0

MyFunc(STRING1 val, SET OF STRING1 S=ALL) := val IN S;

eMails := REGEXFINDSET('\\w+@[a-zA-Z_]+?\\.[a-zA-Z]{2,3}' , sampleStr);
eMails2 := REGEXFINDSET(U'\\w+@[a-zA-Z_]+?\\.[a-zA-Z]{2,3}', sampleStr2);

//check for presence in passed set, if passed

SET OF INTEGER4 MySet := IF(SomeCondition=TRUE,[88888,99999,66666,33333,55555],ALL);
MyRecs := MyFile(Zip IN MySet);

str := '---\'---';

/****************** BLOCKS (not working currently) ******************/

INTEGER addone(INTEGER p) := EMBED(Python)
# Python code that returns one more than the value passed to it
pyrmont marked this conversation as resolved.
Show resolved Hide resolved
if p < 10:
return p+1
else:
return 0
ENDEMBED;

REAL8 colors_should_be_different_here := 10.5;

BEGINC++
#option action
struct tm localt; // localtime in "tm" structure
time_t timeinsecs; // variable to store time in secs
time(&timeinsecs);
localtime_r(&timeinsecs,&localt);
char temp[15];
strftime(temp , 15, "%Y%m%d%H%M%S", &localt); // Formats the localtime to YYYYMMDDhhmmss
pyrmont marked this conversation as resolved.
Show resolved Hide resolved
strncpy(__result, temp, 14);
ENDC++;

REAL8 colors_should_be_different_here := 10.5;

AddOne(num) := FUNCTIONMACRO
LOCAL numPlus := num + 1; //LOCAL required here
RETURN numPlus;
ENDMACRO;

/****************** NESTED ******************/
MyMod := MODULE
SHARED x := 88;
y := 42;
EXPORT InMod := MODULE //nested MODULE
EXPORT Val1 := x + 10;
EXPORT Val2 := y + 10;
END;
END;

/****************** PATTERNS ******************/

PATTERN ws := PATTERN('[ \t\r\n]');
PATTERN arb := PATTERN('[-!.,\t a-zA-Z0-9]')+;
PATTERN number := PATTERN('[0-9]')+;
PATTERN age := '(' number OPT('/I') ')';
PATTERN role := '[' arb ']';
PATTERN m_rank := '<' number '>';
PATTERN actor := arb OPT(ws '(I)' ws);

MyFunc(STRING1 val, SET OF STRING1 S=ALL) := val IN S;

/****************** JOINS *******************/
Bld1 := BUILD(AlphaKey ,OVERWRITE);
Bld2 := BUILD(SeqKey,OVERWRITE);
peopleRecord := RECORD
INTEGER8 id;
STRING20 addr;
END;

peopleDataset := DATASET([{3000,'LONDON'},{3500,'SMITH'},
{30,'TAYLOR'}], peopleRecord);

joinedRecord := RECORD
PtblRec;
peopleRecord;
END;

joinedRecord doJoin(peopleRecord l, Ptbl r) := TRANSFORM
SELF := l;
SELF := r;
END;

FilledRecs1 := JOIN(peopleDataset, Ptbl,LEFT.id=RIGHT.seq,
doJoin(LEFT,RIGHT), KEYED(SeqKey));
FilledRecs2 := JOIN(peopleDataset, Ptbl,LEFT.addr=RIGHT.Lname,
doJoin(LEFT,RIGHT), KEYED(AlphaKey));

SEQUENTIAL(PtblOut,Bld1,Bld2,OUTPUT(FilledRecs1),OUTPUT(FilledRecs2))

/****************** JOINS *******************/
OUTPUT(d, {Label := Subject, Value := Result}, NAMED('BarChart'));