pygments · jeanas · Dec 10, 2022 · Dec 6, 2022 · Dec 10, 2022
diff --git a/pygments/lexers/_mapping.py b/pygments/lexers/_mapping.py
@@ -365,6 +365,7 @@
     'PlPgsqlLexer': ('pygments.lexers.sql', 'PL/pgSQL', ('plpgsql',), (), ('text/x-plpgsql',)),
     'PointlessLexer': ('pygments.lexers.pointless', 'Pointless', ('pointless',), ('*.ptls',), ()),
     'PonyLexer': ('pygments.lexers.pony', 'Pony', ('pony',), ('*.pony',), ()),
+    'PortugolLexer': ('pygments.lexers.pascal', 'Portugol', ('portugol',), ('*.alg', '*.portugol'), ()),
     'PostScriptLexer': ('pygments.lexers.graphics', 'PostScript', ('postscript', 'postscr'), ('*.ps', '*.eps'), ('application/postscript',)),
     'PostgresConsoleLexer': ('pygments.lexers.sql', 'PostgreSQL console (psql)', ('psql', 'postgresql-console', 'postgres-console'), (), ('text/x-postgresql-psql',)),
     'PostgresLexer': ('pygments.lexers.sql', 'PostgreSQL SQL dialect', ('postgresql', 'postgres'), (), ('text/x-postgresql',)),

diff --git a/pygments/lexers/compiled.py b/pygments/lexers/compiled.py
@@ -16,7 +16,7 @@
 from pygments.lexers.go import GoLexer
 from pygments.lexers.rust import RustLexer
 from pygments.lexers.c_like import ECLexer, ValaLexer, CudaLexer
-from pygments.lexers.pascal import DelphiLexer, Modula2Lexer
+from pygments.lexers.pascal import DelphiLexer, PortugolLexer, Modula2Lexer
 from pygments.lexers.ada import AdaLexer
 from pygments.lexers.business import CobolLexer, CobolFreeformatLexer
 from pygments.lexers.fortran import FortranLexer

diff --git a/pygments/lexers/pascal.py b/pygments/lexers/pascal.py
@@ -19,7 +19,21 @@
 # compatibility import
 from pygments.lexers.modula2 import Modula2Lexer
 
-__all__ = ['DelphiLexer']
+__all__ = ['DelphiLexer', 'PortugolLexer']
+
+
+class PortugolLexer(Lexer):
+    name = 'Portugol'
+    aliases = ['portugol']
+    filenames = ['*.alg', '*.portugol']
+    mimetypes = []
+
+    def __init__(self, **options):
+        Lexer.__init__(self, **options)
+        self.lexer = DelphiLexer(**options, portugol=True)
+
+    def get_tokens_unprocessed(self, text):
+        return self.lexer.get_tokens_unprocessed(text)
 
 
 class DelphiLexer(Lexer):
@@ -296,18 +310,109 @@ class DelphiLexer(Lexer):
         'xlatb', 'xor'
     }
 
+    PORTUGOL_KEYWORDS = (
+        'aleatorio',
+        'algoritmo',
+        'arquivo',
+        'ate',
+        'caso',
+        'cronometro',
+        'debug',
+        'e',
+        'eco',
+        'enquanto',
+        'entao',
+        'escolha',
+        'escreva',
+        'escreval',
+        'faca',
+        'falso',
+        'fimalgoritmo',
+        'fimenquanto',
+        'fimescolha',
+        'fimfuncao',
+        'fimpara',
+        'fimprocedimento',
+        'fimrepita',
+        'fimse',
+        'funcao',
+        'inicio',
+        'int',
+        'interrompa',
+        'leia',
+        'limpatela',
+        'mod',
+        'nao',
+        'ou',
+        'outrocaso',
+        'para',
+        'passo',
+        'pausa',
+        'procedimento',
+        'repita',
+        'retorne',
+        'se',
+        'senao',
+        'timer',
+        'var',
+        'vetor',
+        'verdadeiro',
+        'xou',
+        'div',
+        'mod',
+        'abs',
+        'arccos',
+        'arcsen',
+        'arctan',
+        'cos',
+        'cotan',
+        'Exp',
+        'grauprad',
+        'int',
+        'log',
+        'logn',
+        'pi',
+        'quad',
+        'radpgrau',
+        'raizq',
+        'rand',
+        'randi',
+        'sen',
+        'Tan',
+        'asc',
+        'carac',
+        'caracpnum',
+        'compr',
+        'copia',
+        'maiusc',
+        'minusc',
+        'numpcarac',
+        'pos',
+    )
+
+    PORTUGOL_BUILTIN_TYPES = {
+        'inteiro', 'real', 'caractere', 'logico'
+    }
+
     def __init__(self, **options):
         Lexer.__init__(self, **options)
         self.keywords = set()
-        if get_bool_opt(options, 'turbopascal', True):
-            self.keywords.update(self.TURBO_PASCAL_KEYWORDS)
-        if get_bool_opt(options, 'delphi', True):
-            self.keywords.update(self.DELPHI_KEYWORDS)
-        if get_bool_opt(options, 'freepascal', True):
-            self.keywords.update(self.FREE_PASCAL_KEYWORDS)
         self.builtins = set()
-        for unit in get_list_opt(options, 'units', list(self.BUILTIN_UNITS)):
-            self.builtins.update(self.BUILTIN_UNITS[unit])
+        if get_bool_opt(options, 'portugol', False):
+            self.keywords.update(self.PORTUGOL_KEYWORDS)
+            self.builtins.update(self.PORTUGOL_BUILTIN_TYPES)
+            self.is_portugol = True
+        else:
+            self.is_portugol = False
+
+            if get_bool_opt(options, 'turbopascal', True):
+                self.keywords.update(self.TURBO_PASCAL_KEYWORDS)
+            if get_bool_opt(options, 'delphi', True):
+                self.keywords.update(self.DELPHI_KEYWORDS)
+            if get_bool_opt(options, 'freepascal', True):
+                self.keywords.update(self.FREE_PASCAL_KEYWORDS)
+            for unit in get_list_opt(options, 'units', list(self.BUILTIN_UNITS)):
+                self.builtins.update(self.BUILTIN_UNITS[unit])
 
     def get_tokens_unprocessed(self, text):
         scanner = Scanner(text, re.DOTALL | re.MULTILINE | re.IGNORECASE)
@@ -327,14 +432,16 @@ def get_tokens_unprocessed(self, text):
             if stack[-1] == 'initial':
                 if scanner.scan(r'\s+'):
                     token = Whitespace
-                elif scanner.scan(r'\{.*?\}|\(\*.*?\*\)'):
+                elif not self.is_portugol and scanner.scan(r'\{.*?\}|\(\*.*?\*\)'):
                     if scanner.match.startswith('$'):
                         token = Comment.Preproc
                     else:
                         token = Comment.Multiline
                 elif scanner.scan(r'//.*?$'):
                     token = Comment.Single
-                elif scanner.scan(r'[-+*\/=<>:;,.@\^]'):
+                elif self.is_portugol and scanner.scan(r'(<\-)|(>=)|(<=)|%|<|>|-|\+|\*|\=|(<>)|\/|\.|:|,'):
+                    token = Operator
+                elif not self.is_portugol and scanner.scan(r'[-+*\/=<>:;,.@\^]'):
                     token = Operator
                     # stop label highlighting on next ";"
                     if collect_labels and scanner.match == ';':
@@ -364,35 +471,40 @@ def get_tokens_unprocessed(self, text):
                         # if we are in a special block and a
                         # block ending keyword occurs (and the parenthesis
                         # is balanced) we end the current block context
-                        if (in_function_block or in_property_block) and \
-                           lowercase_name in self.BLOCK_KEYWORDS and \
-                           brace_balance[0] <= 0 and \
-                           brace_balance[1] <= 0:
-                            in_function_block = False
-                            in_property_block = False
-                            brace_balance = [0, 0]
-                            block_labels = set()
-                        if lowercase_name in ('label', 'goto'):
-                            collect_labels = True
-                        elif lowercase_name == 'asm':
-                            stack.append('asm')
-                        elif lowercase_name == 'property':
-                            in_property_block = True
-                            next_token_is_property = True
-                        elif lowercase_name in ('procedure', 'operator',
-                                                'function', 'constructor',
-                                                'destructor'):
-                            in_function_block = True
-                            next_token_is_function = True
+                        if self.is_portugol:
+                            if lowercase_name in ('funcao', 'procedimento'):
+                                in_function_block = True
+                                next_token_is_function = True
+                        else:
+                            if (in_function_block or in_property_block) and \
+                                    lowercase_name in self.BLOCK_KEYWORDS and \
+                                    brace_balance[0] <= 0 and \
+                                    brace_balance[1] <= 0:
+                                in_function_block = False
+                                in_property_block = False
+                                brace_balance = [0, 0]
+                                block_labels = set()
+                            if lowercase_name in ('label', 'goto'):
+                                collect_labels = True
+                            elif lowercase_name == 'asm':
+                                stack.append('asm')
+                            elif lowercase_name == 'property':
+                                in_property_block = True
+                                next_token_is_property = True
+                            elif lowercase_name in ('procedure', 'operator',
+                                                    'function', 'constructor',
+                                                    'destructor'):
+                                in_function_block = True
+                                next_token_is_function = True
                     # we are in a function block and the current name
                     # is in the set of registered modifiers. highlight
                     # it as pseudo keyword
-                    elif in_function_block and \
+                    elif not self.is_portugol and in_function_block and \
                             lowercase_name in self.FUNCTION_MODIFIERS:
                         token = Keyword.Pseudo
                     # if we are in a property highlight some more
                     # modifiers
-                    elif in_property_block and \
+                    elif not self.is_portugol and in_property_block and \
                             lowercase_name in ('read', 'write'):
                         token = Keyword.Pseudo
                         next_token_is_function = True
@@ -403,40 +515,49 @@ def get_tokens_unprocessed(self, text):
                         # Look if the next token is a dot. If yes it's
                         # not a function, but a class name and the
                         # part after the dot a function name
-                        if scanner.test(r'\s*\.\s*'):
+                        if not self.is_portugol and scanner.test(r'\s*\.\s*'):
                             token = Name.Class
                         # it's not a dot, our job is done
                         else:
                             token = Name.Function
                             next_token_is_function = False
+
+                            if self.is_portugol:
+                                block_labels.add(scanner.match.lower())
+
                     # same for properties
-                    elif next_token_is_property:
+                    elif not self.is_portugol and next_token_is_property:
                         token = Name.Property
                         next_token_is_property = False
                     # Highlight this token as label and add it
                     # to the list of known labels
-                    elif collect_labels:
+                    elif not self.is_portugol and collect_labels:
                         token = Name.Label
                         block_labels.add(scanner.match.lower())
                     # name is in list of known labels
                     elif lowercase_name in block_labels:
                         token = Name.Label
-                    elif lowercase_name in self.BUILTIN_TYPES:
+                    elif self.is_portugol and lowercase_name in self.PORTUGOL_BUILTIN_TYPES:
+                        token = Keyword.Type
+                    elif not self.is_portugol and lowercase_name in self.BUILTIN_TYPES:
                         token = Keyword.Type
-                    elif lowercase_name in self.DIRECTIVES:
+                    elif not self.is_portugol and lowercase_name in self.DIRECTIVES:
                         token = Keyword.Pseudo
                     # builtins are just builtins if the token
                     # before isn't a dot
-                    elif not was_dot and lowercase_name in self.builtins:
+                    elif not self.is_portugol and not was_dot and lowercase_name in self.builtins:
                         token = Name.Builtin
                     else:
                         token = Name
-                elif scanner.scan(r"'"):
+                elif self.is_portugol and scanner.scan(r"\""):
+                    token = String
+                    stack.append('string')
+                elif not self.is_portugol and scanner.scan(r"'"):
                     token = String
                     stack.append('string')
-                elif scanner.scan(r'\#(\d+|\$[0-9A-Fa-f]+)'):
+                elif not self.is_portugol and scanner.scan(r'\#(\d+|\$[0-9A-Fa-f]+)'):
                     token = String.Char
-                elif scanner.scan(r'\$[0-9A-Fa-f]+'):
+                elif not self.is_portugol and scanner.scan(r'\$[0-9A-Fa-f]+'):
                     token = Number.Hex
                 elif scanner.scan(r'\d+(?![eE]|\.[^.])'):
                     token = Number.Integer
@@ -449,18 +570,29 @@ def get_tokens_unprocessed(self, text):
                     scanner.get_char()
 
             elif stack[-1] == 'string':
-                if scanner.scan(r"''"):
-                    token = String.Escape
-                elif scanner.scan(r"'"):
-                    token = String
-                    stack.pop()
-                elif scanner.scan(r"[^']*"):
-                    token = String
+                if self.is_portugol:
+                    if scanner.scan(r"''"):
+                        token = String.Escape
+                    elif scanner.scan(r"\""):
+                        token = String
+                        stack.pop()
+                    elif scanner.scan(r"[^\"]*"):
+                        token = String
+                    else:
+                        scanner.get_char()
+                        stack.pop()
                 else:
-                    scanner.get_char()
-                    stack.pop()
-
-            elif stack[-1] == 'asm':
+                    if scanner.scan(r"''"):
+                        token = String.Escape
+                    elif scanner.scan(r"'"):
+                        token = String
+                        stack.pop()
+                    elif scanner.scan(r"[^']*"):
+                        token = String
+                    else:
+                        scanner.get_char()
+                        stack.pop()
+            elif not self.is_portugol and stack[-1] == 'asm':
                 if scanner.scan(r'\s+'):
                     token = Whitespace
                 elif scanner.scan(r'end'):
@@ -501,6 +633,7 @@ def get_tokens_unprocessed(self, text):
                     stack.pop()
 
             # save the dot!!!11
-            if scanner.match.strip():
+            if not self.is_portugol and scanner.match.strip():
                 was_dot = scanner.match == '.'
+
             yield scanner.start_pos, token, scanner.match or ''