Skip to content

Commit

Permalink
Update javascript lexer (#1814)
Browse files Browse the repository at this point in the history
* make ts extends js lexer

* add regex's d flag for js lexers

cf. https://v8.dev/features/regexp-match-indices

* update js builtins, operators, exceptions

* fixup! update js builtins, operators, exceptions

* add typescript override keywork

* Update _mapping.py
  • Loading branch information
Mesteery committed Jul 18, 2021
1 parent 6d9b79e commit fe572dc
Show file tree
Hide file tree
Showing 7 changed files with 263 additions and 133 deletions.
2 changes: 1 addition & 1 deletion pygments/lexers/_mapping.py
Expand Up @@ -471,7 +471,7 @@
'TurtleLexer': ('pygments.lexers.rdf', 'Turtle', ('turtle',), ('*.ttl',), ('text/turtle', 'application/x-turtle')),
'TwigHtmlLexer': ('pygments.lexers.templates', 'HTML+Twig', ('html+twig',), ('*.twig',), ('text/html+twig',)),
'TwigLexer': ('pygments.lexers.templates', 'Twig', ('twig',), (), ('application/x-twig',)),
'TypeScriptLexer': ('pygments.lexers.javascript', 'TypeScript', ('typescript', 'ts'), ('*.ts', '*.tsx'), ('text/x-typescript',)),
'TypeScriptLexer': ('pygments.lexers.javascript', 'TypeScript', ('typescript', 'ts'), ('*.ts',), ('application/x-typescript', 'text/x-typescript')),
'TypoScriptCssDataLexer': ('pygments.lexers.typoscript', 'TypoScriptCssData', ('typoscriptcssdata',), (), ()),
'TypoScriptHtmlDataLexer': ('pygments.lexers.typoscript', 'TypoScriptHtmlData', ('typoscripthtmldata',), (), ()),
'TypoScriptLexer': ('pygments.lexers.typoscript', 'TypoScript', ('typoscript',), ('*.typoscript',), ('text/x-typoscript',)),
Expand Down
199 changes: 77 additions & 122 deletions pygments/lexers/javascript.py
Expand Up @@ -10,16 +10,16 @@

import re

from pygments.lexer import RegexLexer, include, bygroups, default, using, \
from pygments.lexer import RegexLexer, include, bygroups, default, inherit, using, \
this, words, combined
from pygments.token import Text, Comment, Operator, Keyword, Name, String, \
Number, Punctuation, Other
from pygments.util import get_bool_opt
import pygments.unistring as uni

__all__ = ['JavascriptLexer', 'KalLexer', 'LiveScriptLexer', 'DartLexer',
'TypeScriptLexer', 'LassoLexer', 'ObjectiveJLexer',
'CoffeeScriptLexer', 'MaskLexer', 'EarlGreyLexer', 'JuttleLexer']
__all__ = ['JavascriptLexer', 'TypeScriptLexer', 'KalLexer', 'LiveScriptLexer',
'DartLexer', 'LassoLexer', 'ObjectiveJLexer', 'CoffeeScriptLexer',
'MaskLexer', 'EarlGreyLexer', 'JuttleLexer']

JS_IDENT_START = ('(?:[$_' + uni.combine('Lu', 'Ll', 'Lt', 'Lm', 'Lo', 'Nl') +
']|\\\\u[a-fA-F0-9]{4})')
Expand Down Expand Up @@ -52,7 +52,7 @@ class JavascriptLexer(RegexLexer):
'slashstartsregex': [
include('commentsandwhitespace'),
(r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
r'([gimuys]+\b|\B)', String.Regex, '#pop'),
r'([gimuysd]+\b|\B)', String.Regex, '#pop'),
(r'(?=/)', Text, ('#pop', 'badregex')),
default('#pop')
],
Expand All @@ -75,23 +75,43 @@ class JavascriptLexer(RegexLexer):
(r'(\.[0-9]+|[0-9]+\.[0-9]*|[0-9]+)([eE][-+]?[0-9]+)?', Number.Float),

(r'\.\.\.|=>', Punctuation),
(r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|'
r'(<<|>>>?|==?|!=?|[-<>+*%&|^/])=?', Operator, 'slashstartsregex'),
(r'\+\+|--|~|\?\?=?|\?|:|\\(?=\n)|'
r'(<<|>>>?|==?|!=?|(?:\*\*|\|\||&&|[-<>+*%&|^/]))=?', Operator, 'slashstartsregex'),
(r'[{(\[;,]', Punctuation, 'slashstartsregex'),
(r'[})\].]', Punctuation),

(r'(typeof|instanceof|in|void|delete|new)\b', Operator.Word, 'slashstartsregex'),

# Match stuff like: constructor
(r'\b(constructor|from|as)\b', Keyword.Reserved),

(r'(for|in|while|do|break|return|continue|switch|case|default|if|else|'
r'throw|try|catch|finally|new|delete|typeof|instanceof|void|yield|await|async|'
r'this|of|static|export|import|from|as|debugger|extends|super)\b', Keyword, 'slashstartsregex'),
r'throw|try|catch|finally|yield|await|async|this|of|static|export|'
r'import|debugger|extends|super)\b', Keyword, 'slashstartsregex'),
(r'(var|let|const|with|function|class)\b', Keyword.Declaration, 'slashstartsregex'),
(r'(abstract|boolean|byte|char|double|enum|final|float|goto'
r'implements|int|interface|long|native|package|private|protected'

(r'(abstract|boolean|byte|char|double|enum|final|float|goto|'
r'implements|int|interface|long|native|package|private|protected|'
r'public|short|synchronized|throws|transient|volatile)\b', Keyword.Reserved),
(r'(true|false|null|NaN|Infinity|undefined)\b', Keyword.Constant),
(r'(Array|Boolean|Date|BigInt|Error|Function|Math|'

(r'(Array|Boolean|Date|BigInt|Function|Math|ArrayBuffer|'
r'Number|Object|RegExp|String|Promise|Proxy|decodeURI|'
r'decodeURIComponent|encodeURI|encodeURIComponent|'
r'Error|eval|isFinite|isNaN|isSafeInteger|parseFloat|parseInt|'
r'document|this|window|globalThis|Symbol)\b', Name.Builtin),
r'eval|isFinite|isNaN|parseFloat|parseInt|DataView|'
r'document|window|globalThis|global|Symbol|Intl|'
r'WeakSet|WeakMap|Set|Map|Reflect|JSON|Atomics|'
r'Int(?:8|16|32)Array|BigInt64Array|Float32Array|Float64Array|'
r'Uint8ClampedArray|Uint(?:8|16|32)Array|BigUint64Array)\b', Name.Builtin),

(r'((?:Eval|Internal|Range|Reference|Syntax|Type|URI)?Error)\b', Name.Exception),

# Match stuff like: super(argument, list)
(r'(super)(\s*)(\([\w,?.$\s]+\s*\))',
bygroups(Keyword, Text), 'slashstartsregex'),
# Match stuff like: function() {...}
(r'([a-zA-Z_?.$][\w?.$]*)(?=\(\) \{)', Name.Other, 'slashstartsregex'),

(JS_IDENT, Name.Other),
(r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
(r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
Expand All @@ -112,6 +132,43 @@ class JavascriptLexer(RegexLexer):
}


class TypeScriptLexer(JavascriptLexer):
"""
For `TypeScript <http://typescriptlang.org/>`_ source code.
.. versionadded:: 1.6
"""

name = 'TypeScript'
aliases = ['typescript', 'ts']
filenames = ['*.ts']
mimetypes = ['application/x-typescript', 'text/x-typescript']

# Higher priority than the TypoScriptLexer, as TypeScript is far more
# common these days
priority = 0.5

tokens = {
'root': [
(r'(abstract|implements|private|protected|public|readonly)\b',
Keyword, 'slashstartsregex'),
(r'(enum|interface|override)\b', Keyword.Declaration, 'slashstartsregex'),
(r'\b(declare|type)\b', Keyword.Reserved),
# Match variable type keywords
(r'\b(string|boolean|number)\b', Keyword.Type),
# Match stuff like: module name {...}
(r'\b(module)(\s*)(\s*[\w?.$][\w?.$]*)(\s*)',
bygroups(Keyword.Reserved, Text, Name.Other, Text), 'slashstartsregex'),
# Match stuff like: (function: return type)
(r'([\w?.$][\w?.$]*)(\s*:\s*)([\w?.$][\w?.$]*)',
bygroups(Name.Other, Text, Keyword.Type)),
# Match stuff like: Decorators
(r'@' + JS_IDENT, Keyword.Declaration),
inherit,
],
}


class KalLexer(RegexLexer):
"""
For `Kal`_ source code.
Expand Down Expand Up @@ -157,7 +214,7 @@ class KalLexer(RegexLexer):
'root': [
include('commentsandwhitespace'),
(r'/(?! )(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
r'([gimuys]+\b|\B)', String.Regex),
r'([gimuysd]+\b|\B)', String.Regex),
(r'\?|:|_(?=\n)|==?|!=|-(?!>)|[<>+*/-]=?',
Operator),
(r'\b(and|or|isnt|is|not|but|bitwise|mod|\^|xor|exists|'
Expand Down Expand Up @@ -250,15 +307,15 @@ class LiveScriptLexer(RegexLexer):
],
'multilineregex': [
include('commentsandwhitespace'),
(r'//([gimuys]+\b|\B)', String.Regex, '#pop'),
(r'//([gimuysd]+\b|\B)', String.Regex, '#pop'),
(r'/', String.Regex),
(r'[^/#]+', String.Regex)
],
'slashstartsregex': [
include('commentsandwhitespace'),
(r'//', String.Regex, ('#pop', 'multilineregex')),
(r'/(?! )(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
r'([gimuys]+\b|\B)', String.Regex, '#pop'),
r'([gimuysd]+\b|\B)', String.Regex, '#pop'),
(r'/', Operator, '#pop'),
default('#pop'),
],
Expand Down Expand Up @@ -441,108 +498,6 @@ class DartLexer(RegexLexer):
}


class TypeScriptLexer(RegexLexer):
"""
For `TypeScript <http://typescriptlang.org/>`_ source code.
.. versionadded:: 1.6
"""

name = 'TypeScript'
aliases = ['typescript', 'ts']
filenames = ['*.ts', '*.tsx']
mimetypes = ['text/x-typescript']

flags = re.DOTALL | re.MULTILINE

# Higher priority than the TypoScriptLexer, as TypeScript is far more
# common these days
priority = 0.5

tokens = {
'commentsandwhitespace': [
(r'\s+', Text),
(r'<!--', Comment),
(r'//.*?\n', Comment.Single),
(r'/\*.*?\*/', Comment.Multiline)
],
'slashstartsregex': [
include('commentsandwhitespace'),
(r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
r'([gimuys]+\b|\B)', String.Regex, '#pop'),
(r'(?=/)', Text, ('#pop', 'badregex')),
default('#pop')
],
'badregex': [
(r'\n', Text, '#pop')
],
'root': [
(r'^(?=\s|/|<!--)', Text, 'slashstartsregex'),
include('commentsandwhitespace'),
(r'\+\+|--|~|&&|\?|:|\|\||\\(?=\n)|'
r'(<<|>>>?|==?|!=?|[-<>+*%&|^/])=?', Operator, 'slashstartsregex'),
(r'[{(\[;,]', Punctuation, 'slashstartsregex'),
(r'[})\].]', Punctuation),
(r'(for|in|while|do|break|return|continue|switch|case|default|if|else|'
r'throw|try|catch|finally|new|delete|typeof|instanceof|void|of|'
r'this|async|await|debugger|yield|abstract|static|import|export|'
r'from|implements|super|extends|private|protected|public|readonly)\b',
Keyword, 'slashstartsregex'),
(r'(var|let|const|with|function|class|type|enum|interface)\b',
Keyword.Declaration, 'slashstartsregex'),
(r'(boolean|byte|char|double|final|float|goto|int|long|native|'
r'package|short|synchronized|throws|transient|volatile)\b', Keyword.Reserved),
(r'(true|false|null|NaN|Infinity|undefined)\b', Keyword.Constant),
(r'(Array|Boolean|Date|Error|Function|Math|'
r'Number|Object|RegExp|String|decodeURI|'
r'decodeURIComponent|encodeURI|encodeURIComponent|'
r'Error|eval|isFinite|isNaN|parseFloat|parseInt|document|this|'
r'window|globalThis|Symbol|BigInt)\b', Name.Builtin),
# Match stuff like: module name {...}
(r'\b(module)(\s*)(\s*[\w?.$][\w?.$]*)(\s*)',
bygroups(Keyword.Reserved, Text, Name.Other, Text), 'slashstartsregex'),
# Match variable type keywords
(r'\b(string|bool|number)\b', Keyword.Type),
# Match stuff like: constructor
(r'\b(constructor|declare|interface|as)\b', Keyword.Reserved),
# Match stuff like: super(argument, list)
(r'(super)(\s*)(\([\w,?.$\s]+\s*\))',
bygroups(Keyword.Reserved, Text), 'slashstartsregex'),
# Match stuff like: function() {...}
(r'([a-zA-Z_?.$][\w?.$]*)(?=\(\) \{)', Name.Other, 'slashstartsregex'),
# Match stuff like: (function: return type)
(r'([\w?.$][\w?.$]*)(\s*:\s*)([\w?.$][\w?.$]*)',
bygroups(Name.Other, Text, Keyword.Type)),
(r'[$a-zA-Z_]\w*', Name.Other),
(r'0[bB][01]+n?', Number.Bin),
(r'0[oO]?[0-7]+n?', Number.Oct), # Browsers support "0o7" and "07" (< ES5) notations
(r'0[xX][0-9a-fA-F]+n?', Number.Hex),
(r'[0-9]+n', Number.Integer),
(r'(\.[0-9]+|[0-9]+\.[0-9]*|[0-9]+)([eE][-+]?[0-9]+)?', Number.Float),
(r'"(\\\\|\\[^\\]|[^"\\])*"', String.Double),
(r"'(\\\\|\\[^\\]|[^'\\])*'", String.Single),
(r'`', String.Backtick, 'interp'),
# Match stuff like: Decorators
(r'@\w+', Keyword.Declaration),
],

# The 'interp*' rules match those in JavascriptLexer. Changes made
# there should be reflected here as well.
'interp': [
(r'`', String.Backtick, '#pop'),
(r'\\.', String.Backtick),
(r'\$\{', String.Interpol, 'interp-inside'),
(r'\$', String.Backtick),
(r'[^`\\$]+', String.Backtick),
],
'interp-inside': [
# TODO: should this include single-line comments and allow nesting strings?
(r'\}', String.Interpol, '#pop'),
include('root'),
],
}


class LassoLexer(RegexLexer):
"""
For `Lasso <http://www.lassosoft.com/>`_ source code, covering both Lasso 9
Expand Down Expand Up @@ -1051,15 +1006,15 @@ class CoffeeScriptLexer(RegexLexer):
],
'multilineregex': [
(r'[^/#]+', String.Regex),
(r'///([gimuys]+\b|\B)', String.Regex, '#pop'),
(r'///([gimuysd]+\b|\B)', String.Regex, '#pop'),
(r'#\{', String.Interpol, 'interpoling_string'),
(r'[/#]', String.Regex),
],
'slashstartsregex': [
include('commentsandwhitespace'),
(r'///', String.Regex, ('#pop', 'multilineregex')),
(r'/(?! )(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
r'([gimuys]+\b|\B)', String.Regex, '#pop'),
r'([gimuysd]+\b|\B)', String.Regex, '#pop'),
# This isn't really guarding against mishighlighting well-formed
# code, just the ability to infinite-loop between root and
# slashstartsregex.
Expand Down Expand Up @@ -1493,7 +1448,7 @@ class JuttleLexer(RegexLexer):
'slashstartsregex': [
include('commentsandwhitespace'),
(r'/(\\.|[^[/\\\n]|\[(\\.|[^\]\\\n])*])+/'
r'([gimuys]+\b|\B)', String.Regex, '#pop'),
r'([gimuysd]+\b|\B)', String.Regex, '#pop'),
(r'(?=/)', Text, ('#pop', 'badregex')),
default('#pop')
],
Expand Down
2 changes: 1 addition & 1 deletion tests/examplefiles/duel/jbst_example1.jbst.output

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Expand Up @@ -39,8 +39,34 @@ yield;
export class Node {
}

class A {
constructor() {
super()
}

constructor(test) {
super(test);
}
}

isFinite();
isNaN();
isSafeInteger();
x = new Promise(...a);
x = new Proxy(...a);

x ??= 1;
x &&= 2 ?? 3;
x **= 2**3|2&4;
x ||= 2;

throw new Error();
throw new TypeError();

new Uint8ClampedArray();
new DataView();
new Map();
new WeakMap();

Intl.DateTimeFormat();

globalThis = window = global = this;

0 comments on commit fe572dc

Please sign in to comment.