pygments · jeanas · Sep 17, 2022 · Sep 14, 2022 · Sep 14, 2022 · Sep 14, 2022
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py
@@ -167,6 +167,7 @@
     'FortranLexer': ('pygments.lexers.fortran', 'Fortran', ('fortran', 'f90'), ('*.f03', '*.f90', '*.F03', '*.F90'), ('text/x-fortran',)),
     'FoxProLexer': ('pygments.lexers.foxpro', 'FoxPro', ('foxpro', 'vfp', 'clipper', 'xbase'), ('*.PRG', '*.prg'), ()),
     'FreeFemLexer': ('pygments.lexers.freefem', 'Freefem', ('freefem',), ('*.edp',), ('text/x-freefem',)),
+    'FuncLexer': ('pygments.lexers.func', 'FunC', ('func', 'fc'), ('*.func','*.fc'), ()),
     'FutharkLexer': ('pygments.lexers.futhark', 'Futhark', ('futhark',), ('*.fut',), ('text/x-futhark',)),
     'GAPConsoleLexer': ('pygments.lexers.algebra', 'GAP session', ('gap-console', 'gap-repl'), ('*.tst',), ()),
     'GAPLexer': ('pygments.lexers.algebra', 'GAP', ('gap',), ('*.g', '*.gd', '*.gi', '*.gap'), ()),

diff --git a/pygments/lexers/func.py b/pygments/lexers/func.py
@@ -0,0 +1,93 @@
+from pygments.lexer import RegexLexer, bygroups, include, words
+from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
+    Number, Whitespace, Punctuation
+
+__all__ = ['FuncLexer']
+
+
+class FuncLexer(RegexLexer):
+    name = 'FunC'
+    aliases = ['func', 'fc']
+    filenames = ['*.fc', '*.func']
+
+    identifier_allowed_symbols = r'([^;,\[\]\(\)\s~.]+)'
+    # 1. Does not start from "
+    # 2. Can start from ` and end with `, containing any character
+    # 3. Starts with underscore or { or } and have more than 1 character after it
+    # 4. Starts with letter, contains letters, numbers and underscores
+    identifier = '(?!")(`([^`]+)`|((?=_)_|(?={{){{|(?=}})}}|(?![_`{{}}])){})'.format(identifier_allowed_symbols)
+
+    tokens = {
+        'root': [
+            (r'\n', Whitespace),
+            (r'\s+', Whitespace),
+
+            include('keywords'),
+            include('strings'),
+            include('directives'),
+            include('numeric'),
+            include('comments'),
+            include('storage'),
+            include('functions'),
+            include('variables'),
+
+            (r'[.;(),\[\]~{}]', Punctuation)
+        ],
+        'keywords': [
+            (words((
+                '<=>', '>=', '<=', '!=', '==', '^>>', '~>>',
+                '>>', '<<', '/%', '^%', '~%', '^/', '~/', '+=',
+                '-=', '*=', '/=', '~/=', '^/=', '%=', '^%=', '<<=',
+                '>>=', '~>>=', '^>>=', '&=', '|=', '^=', '^', '=', 
+                '~', '/', '%', '-', '*', '+','>', 
+                '<', '&', '|', ':', '?'), prefix=r'(?<=\s)', suffix=r'(?=\s)'), 
+             Operator),
+            (words((
+                'if', 'ifnot', 
+                'else', 'elseif', 'elseifnot', 
+                'while', 'do', 'until', 'repeat', 
+                'return', 'impure', 'method_id', 
+                'forall', 'asm', 'inline', 'inline_ref'), prefix=r'\b', suffix=r'\b'), 
+             Keyword),
+            (words(('true', 'false'), prefix=r'\b', suffix=r'\b'), Keyword.Constant),
+        ],
+        'directives': [
+            (r'#include|#pragma', Keyword, 'directive'),
+        ],
+        'directive': [
+            include('strings'),
+            (r'version|not-version', Keyword),
+            (r'(>=|<=|=|>|<|\^)?([0-9]+)(.[0-9]+)?(.[0-9]+)?', Number), # version
+            (r';', Text, '#pop')
+        ],
+        'strings': [
+            (r'\"([^\n\"]+)\"[Hhcusa]?', String),
+        ],
+        'numeric': [
+            (r'\b(-?(?!_)([\d_]+|0x[\d_a-fA-F]+)|0b[1_0]+)(?<!_)(?=[\s\)\],;])', Number)
+        ],
+        'comments': [
+            (r';;([^\n]*)', Comment.Singleline),
+            (r'{-', Comment.Multiline, 'comment'),
+        ],
+        'comment': [
+            (r'[^-}]+', Comment.Multiline),
+            (r'{-', Comment.Multiline, '#push'),
+            (r'-}', Comment.Multiline, '#pop'),
+            (r'[-}]', Comment.Multiline),
+        ],
+        'storage': [
+            (words((
+                'var', 'int', 'slice', 'tuple', 
+                'cell', 'builder', 'cont', '_'), prefix=r'\b', suffix=r'(?=[\s\(\),\[\]])'), 
+             Keyword.Type),
+            (words(('global', 'const'), prefix='\b', suffix='\b'), Keyword.Constant),
+        ],
+        'variables': [
+            (identifier, Name.Variable),
+        ],
+        'functions': [
+            # identifier followed by (
+            (identifier + r'(?=[\(])', Name.Function),
+        ]
+    }
diff --git a/tests/test_func.py b/tests/test_func.py
@@ -0,0 +1,103 @@
+import pytest
+from pygments.lexers.func import FuncLexer
+from pygments.token import Token, Name
+
+@pytest.fixture(scope='module')
+def lexer_func():
+    yield FuncLexer()
+
+
+def test_simple_func_contract(lexer_func):
+    """valid, even all contract is parsed ok"""
+
+    fragment = '''
+#include "../";
+#pragma version >=1.0.0;
+
+global int k;
+const int k = 1;
+
+() recv_internal(int my_balance, int msg_value, cell in_msg_full, slice in_msg_body) impure {
+    slice cs = in_msg_full.begin_parse();
+    int flags = cs~load_uint(0x4_1_0);
+
+    if ((flags & 1) == true) { ;; ignore all bounced messages
+        return ();
+    }
+
+    {-
+    {-
+     Test comment
+    -}
+
+    slice sender_address = cs~load_msg_addr();
+
+    ;; Send message
+    var message = begin_cell()
+        .store_uint(0x18, 6)
+        .store_slice(sender_address)
+        .store_coins(0)
+        .store_uint(0, 1 + 4 + 4 + 64 + 32 + 1 + 1)
+        .store_slice("Hello, world!"s)
+        .end_cell();
+
+    send_raw_message(message, 64);
+
+    ;; Update counter
+    var cs = get_data().begin_parse();
+    var counter = data~load_uint(32);
+
+    store_data(
+        begin_cell()
+            .store_uint(counter + 1, 32)
+        .end_cell()
+    );
+}
+
+() recv_external(slice in_msg) impure {
+    throw(0xffff);
+}
+
+int counter() method_id {
+    var data = get_data().begin_parse();
+    return data~load_uint(32);
+}
+    '''
+
+    tokens = list(lexer_func.get_tokens(fragment))
+    assert all(x[1] != Token.Error for x in tokens)
+
+
+@pytest.mark.parametrize('text', (
+    'take(first)Entry', '"not_a_string', 'msg.sender', 'send_message,then_terminate', '_'))
+def test_func_not_identifier(lexer_func, text):
+    """Test text that should **not** be tokenized as identifier."""
+    assert list(lexer_func.get_tokens(text))[0] != (Name.Variable, text)
+
+
+@pytest.mark.parametrize('text', (
+    '`test identifier`', 'simple_identifier', 'query\'\'', 
+    '_internal_value', 'get_pubkeys&signatures',
+    'dict::udict_set_builder', '2+2=2*2', '-alsovalidname', '{hehehe}'))
+def test_func_identifier(lexer_func, text):
+    """Test text that should be tokenized as identifier."""
+    assert list(lexer_func.get_tokens(text))[0] == (Name.Variable, text)
+
+@pytest.mark.parametrize('text', (
+'`test identifier`(', 'simple_identifier(', 'query\'\'(', 
+'_internal_value(', 'get_pubkeys&signatures(',
+'dict::udict_set_builder(', '2+2=2*2(', '-alsovalidname(', '{hehehe}('))
+def test_func_function(lexer_func, text):
+    """Test text that should be tokenized as identifier."""
+    assert list(lexer_func.get_tokens(text))[0] == (Name.Function, text[:-1])
+
+
+@pytest.mark.parametrize('text', ('0x0f', '0x1_2', '123', '0b10', '0xffff_fff', '1'))
+def test_func_number(lexer_func, text):
+    """Test text that should be tokenized as number."""
+    assert list(lexer_func.get_tokens(text))[0] == (Token.Literal.Number, text)
+
+@pytest.mark.parametrize('text', ('0x0f_m', '0X1_2', '12d3', '0b1_0f', '0bff_fff', '0b'))
+def test_func_not_number(lexer_func, text):
+    """Test text that should *not* be tokenized as number."""
+    assert list(lexer_func.get_tokens(text))[0] != (Token.Literal.Number, text)