Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add support for PortugolLexer #2300

Merged
merged 2 commits into from Dec 10, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions pygments/lexers/_mapping.py
Expand Up @@ -365,6 +365,7 @@
'PlPgsqlLexer': ('pygments.lexers.sql', 'PL/pgSQL', ('plpgsql',), (), ('text/x-plpgsql',)),
'PointlessLexer': ('pygments.lexers.pointless', 'Pointless', ('pointless',), ('*.ptls',), ()),
'PonyLexer': ('pygments.lexers.pony', 'Pony', ('pony',), ('*.pony',), ()),
'PortugolLexer': ('pygments.lexers.pascal', 'Portugol', ('portugol',), ('*.alg', '*.portugol'), ()),
'PostScriptLexer': ('pygments.lexers.graphics', 'PostScript', ('postscript', 'postscr'), ('*.ps', '*.eps'), ('application/postscript',)),
'PostgresConsoleLexer': ('pygments.lexers.sql', 'PostgreSQL console (psql)', ('psql', 'postgresql-console', 'postgres-console'), (), ('text/x-postgresql-psql',)),
'PostgresLexer': ('pygments.lexers.sql', 'PostgreSQL SQL dialect', ('postgresql', 'postgres'), (), ('text/x-postgresql',)),
Expand Down
2 changes: 1 addition & 1 deletion pygments/lexers/compiled.py
Expand Up @@ -16,7 +16,7 @@
from pygments.lexers.go import GoLexer
from pygments.lexers.rust import RustLexer
from pygments.lexers.c_like import ECLexer, ValaLexer, CudaLexer
from pygments.lexers.pascal import DelphiLexer, Modula2Lexer
from pygments.lexers.pascal import DelphiLexer, PortugolLexer, Modula2Lexer
from pygments.lexers.ada import AdaLexer
from pygments.lexers.business import CobolLexer, CobolFreeformatLexer
from pygments.lexers.fortran import FortranLexer
Expand Down
241 changes: 187 additions & 54 deletions pygments/lexers/pascal.py
Expand Up @@ -19,7 +19,21 @@
# compatibility import
from pygments.lexers.modula2 import Modula2Lexer

__all__ = ['DelphiLexer']
__all__ = ['DelphiLexer', 'PortugolLexer']


class PortugolLexer(Lexer):
name = 'Portugol'
aliases = ['portugol']
filenames = ['*.alg', '*.portugol']
mimetypes = []

def __init__(self, **options):
Lexer.__init__(self, **options)
self.lexer = DelphiLexer(**options, portugol=True)

def get_tokens_unprocessed(self, text):
return self.lexer.get_tokens_unprocessed(text)


class DelphiLexer(Lexer):
Expand Down Expand Up @@ -296,18 +310,109 @@ class DelphiLexer(Lexer):
'xlatb', 'xor'
}

PORTUGOL_KEYWORDS = (
'aleatorio',
'algoritmo',
'arquivo',
'ate',
'caso',
'cronometro',
'debug',
'e',
'eco',
'enquanto',
'entao',
'escolha',
'escreva',
'escreval',
'faca',
'falso',
'fimalgoritmo',
'fimenquanto',
'fimescolha',
'fimfuncao',
'fimpara',
'fimprocedimento',
'fimrepita',
'fimse',
'funcao',
'inicio',
'int',
'interrompa',
'leia',
'limpatela',
'mod',
'nao',
'ou',
'outrocaso',
'para',
'passo',
'pausa',
'procedimento',
'repita',
'retorne',
'se',
'senao',
'timer',
'var',
'vetor',
'verdadeiro',
'xou',
'div',
'mod',
'abs',
'arccos',
'arcsen',
'arctan',
'cos',
'cotan',
'Exp',
'grauprad',
'int',
'log',
'logn',
'pi',
'quad',
'radpgrau',
'raizq',
'rand',
'randi',
'sen',
'Tan',
'asc',
'carac',
'caracpnum',
'compr',
'copia',
'maiusc',
'minusc',
'numpcarac',
'pos',
)

PORTUGOL_BUILTIN_TYPES = {
'inteiro', 'real', 'caractere', 'logico'
}

def __init__(self, **options):
Lexer.__init__(self, **options)
self.keywords = set()
if get_bool_opt(options, 'turbopascal', True):
self.keywords.update(self.TURBO_PASCAL_KEYWORDS)
if get_bool_opt(options, 'delphi', True):
self.keywords.update(self.DELPHI_KEYWORDS)
if get_bool_opt(options, 'freepascal', True):
self.keywords.update(self.FREE_PASCAL_KEYWORDS)
self.builtins = set()
for unit in get_list_opt(options, 'units', list(self.BUILTIN_UNITS)):
self.builtins.update(self.BUILTIN_UNITS[unit])
if get_bool_opt(options, 'portugol', False):
self.keywords.update(self.PORTUGOL_KEYWORDS)
self.builtins.update(self.PORTUGOL_BUILTIN_TYPES)
self.is_portugol = True
else:
self.is_portugol = False

if get_bool_opt(options, 'turbopascal', True):
self.keywords.update(self.TURBO_PASCAL_KEYWORDS)
if get_bool_opt(options, 'delphi', True):
self.keywords.update(self.DELPHI_KEYWORDS)
if get_bool_opt(options, 'freepascal', True):
self.keywords.update(self.FREE_PASCAL_KEYWORDS)
for unit in get_list_opt(options, 'units', list(self.BUILTIN_UNITS)):
self.builtins.update(self.BUILTIN_UNITS[unit])

def get_tokens_unprocessed(self, text):
scanner = Scanner(text, re.DOTALL | re.MULTILINE | re.IGNORECASE)
Expand All @@ -327,14 +432,16 @@ def get_tokens_unprocessed(self, text):
if stack[-1] == 'initial':
if scanner.scan(r'\s+'):
token = Whitespace
elif scanner.scan(r'\{.*?\}|\(\*.*?\*\)'):
elif not self.is_portugol and scanner.scan(r'\{.*?\}|\(\*.*?\*\)'):
if scanner.match.startswith('$'):
token = Comment.Preproc
else:
token = Comment.Multiline
elif scanner.scan(r'//.*?$'):
token = Comment.Single
elif scanner.scan(r'[-+*\/=<>:;,.@\^]'):
elif self.is_portugol and scanner.scan(r'(<\-)|(>=)|(<=)|%|<|>|-|\+|\*|\=|(<>)|\/|\.|:|,'):
token = Operator
elif not self.is_portugol and scanner.scan(r'[-+*\/=<>:;,.@\^]'):
token = Operator
# stop label highlighting on next ";"
if collect_labels and scanner.match == ';':
Expand Down Expand Up @@ -364,35 +471,40 @@ def get_tokens_unprocessed(self, text):
# if we are in a special block and a
# block ending keyword occurs (and the parenthesis
# is balanced) we end the current block context
if (in_function_block or in_property_block) and \
lowercase_name in self.BLOCK_KEYWORDS and \
brace_balance[0] <= 0 and \
brace_balance[1] <= 0:
in_function_block = False
in_property_block = False
brace_balance = [0, 0]
block_labels = set()
if lowercase_name in ('label', 'goto'):
collect_labels = True
elif lowercase_name == 'asm':
stack.append('asm')
elif lowercase_name == 'property':
in_property_block = True
next_token_is_property = True
elif lowercase_name in ('procedure', 'operator',
'function', 'constructor',
'destructor'):
in_function_block = True
next_token_is_function = True
if self.is_portugol:
if lowercase_name in ('funcao', 'procedimento'):
in_function_block = True
next_token_is_function = True
else:
if (in_function_block or in_property_block) and \
lowercase_name in self.BLOCK_KEYWORDS and \
brace_balance[0] <= 0 and \
brace_balance[1] <= 0:
in_function_block = False
in_property_block = False
brace_balance = [0, 0]
block_labels = set()
if lowercase_name in ('label', 'goto'):
collect_labels = True
elif lowercase_name == 'asm':
stack.append('asm')
elif lowercase_name == 'property':
in_property_block = True
next_token_is_property = True
elif lowercase_name in ('procedure', 'operator',
'function', 'constructor',
'destructor'):
in_function_block = True
next_token_is_function = True
# we are in a function block and the current name
# is in the set of registered modifiers. highlight
# it as pseudo keyword
elif in_function_block and \
elif not self.is_portugol and in_function_block and \
lowercase_name in self.FUNCTION_MODIFIERS:
token = Keyword.Pseudo
# if we are in a property highlight some more
# modifiers
elif in_property_block and \
elif not self.is_portugol and in_property_block and \
lowercase_name in ('read', 'write'):
token = Keyword.Pseudo
next_token_is_function = True
Expand All @@ -403,40 +515,49 @@ def get_tokens_unprocessed(self, text):
# Look if the next token is a dot. If yes it's
# not a function, but a class name and the
# part after the dot a function name
if scanner.test(r'\s*\.\s*'):
if not self.is_portugol and scanner.test(r'\s*\.\s*'):
token = Name.Class
# it's not a dot, our job is done
else:
token = Name.Function
next_token_is_function = False

if self.is_portugol:
block_labels.add(scanner.match.lower())

# same for properties
elif next_token_is_property:
elif not self.is_portugol and next_token_is_property:
token = Name.Property
next_token_is_property = False
# Highlight this token as label and add it
# to the list of known labels
elif collect_labels:
elif not self.is_portugol and collect_labels:
token = Name.Label
block_labels.add(scanner.match.lower())
# name is in list of known labels
elif lowercase_name in block_labels:
token = Name.Label
elif lowercase_name in self.BUILTIN_TYPES:
elif self.is_portugol and lowercase_name in self.PORTUGOL_BUILTIN_TYPES:
token = Keyword.Type
elif not self.is_portugol and lowercase_name in self.BUILTIN_TYPES:
token = Keyword.Type
elif lowercase_name in self.DIRECTIVES:
elif not self.is_portugol and lowercase_name in self.DIRECTIVES:
token = Keyword.Pseudo
# builtins are just builtins if the token
# before isn't a dot
elif not was_dot and lowercase_name in self.builtins:
elif not self.is_portugol and not was_dot and lowercase_name in self.builtins:
token = Name.Builtin
else:
token = Name
elif scanner.scan(r"'"):
elif self.is_portugol and scanner.scan(r"\""):
token = String
stack.append('string')
elif not self.is_portugol and scanner.scan(r"'"):
token = String
stack.append('string')
elif scanner.scan(r'\#(\d+|\$[0-9A-Fa-f]+)'):
elif not self.is_portugol and scanner.scan(r'\#(\d+|\$[0-9A-Fa-f]+)'):
token = String.Char
elif scanner.scan(r'\$[0-9A-Fa-f]+'):
elif not self.is_portugol and scanner.scan(r'\$[0-9A-Fa-f]+'):
token = Number.Hex
elif scanner.scan(r'\d+(?![eE]|\.[^.])'):
token = Number.Integer
Expand All @@ -449,18 +570,29 @@ def get_tokens_unprocessed(self, text):
scanner.get_char()

elif stack[-1] == 'string':
if scanner.scan(r"''"):
token = String.Escape
elif scanner.scan(r"'"):
token = String
stack.pop()
elif scanner.scan(r"[^']*"):
token = String
if self.is_portugol:
if scanner.scan(r"''"):
token = String.Escape
elif scanner.scan(r"\""):
token = String
stack.pop()
elif scanner.scan(r"[^\"]*"):
token = String
else:
scanner.get_char()
stack.pop()
else:
scanner.get_char()
stack.pop()

elif stack[-1] == 'asm':
if scanner.scan(r"''"):
token = String.Escape
elif scanner.scan(r"'"):
token = String
stack.pop()
elif scanner.scan(r"[^']*"):
token = String
else:
scanner.get_char()
stack.pop()
elif not self.is_portugol and stack[-1] == 'asm':
if scanner.scan(r'\s+'):
token = Whitespace
elif scanner.scan(r'end'):
Expand Down Expand Up @@ -501,6 +633,7 @@ def get_tokens_unprocessed(self, text):
stack.pop()

# save the dot!!!11
if scanner.match.strip():
if not self.is_portugol and scanner.match.strip():
was_dot = scanner.match == '.'

yield scanner.start_pos, token, scanner.match or ''