This repository has been archived by the owner on Sep 8, 2023. It is now read-only.
forked from rouge-ruby/rouge
-
Notifications
You must be signed in to change notification settings - Fork 0
/
python.rb
249 lines (212 loc) · 7.38 KB
/
python.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
# -*- coding: utf-8 -*- #
# frozen_string_literal: true
module Rouge
module Lexers
class Python < RegexLexer
title "Python"
desc "The Python programming language (python.org)"
tag 'python'
aliases 'py'
filenames '*.py', '*.pyw', '*.sc', 'SConstruct', 'SConscript', '*.tac',
'*.bzl', 'BUCK', 'BUILD', 'BUILD.bazel', 'WORKSPACE'
mimetypes 'text/x-python', 'application/x-python'
def self.detect?(text)
return true if text.shebang?(/pythonw?(?:[23](?:\.\d+)?)?/)
end
def self.keywords
@keywords ||= %w(
assert break continue del elif else except exec
finally for global if lambda pass print raise
return try while yield as with from import yield
async await nonlocal
)
end
def self.builtins
@builtins ||= %w(
__import__ abs all any apply ascii basestring bin bool buffer
bytearray bytes callable chr classmethod cmp coerce compile
complex delattr dict dir divmod enumerate eval execfile exit
file filter float format frozenset getattr globals hasattr hash hex id
input int intern isinstance issubclass iter len list locals
long map max memoryview min next object oct open ord pow property range
raw_input reduce reload repr reversed round set setattr slice
sorted staticmethod str sum super tuple type unichr unicode
vars xrange zip
)
end
def self.builtins_pseudo
@builtins_pseudo ||= %w(self None Ellipsis NotImplemented False True)
end
def self.exceptions
@exceptions ||= %w(
ArithmeticError AssertionError AttributeError
BaseException BlockingIOError BrokenPipeError BufferError
BytesWarning ChildProcessError ConnectionAbortedError
ConnectionError ConnectionRefusedError ConnectionResetError
DeprecationWarning EOFError EnvironmentError
Exception FileExistsError FileNotFoundError
FloatingPointError FutureWarning GeneratorExit IOError
ImportError ImportWarning IndentationError IndexError
InterruptedError IsADirectoryError KeyError KeyboardInterrupt
LookupError MemoryError ModuleNotFoundError NameError
NotADirectoryError NotImplemented NotImplementedError OSError
OverflowError OverflowWarning PendingDeprecationWarning
ProcessLookupError RecursionError ReferenceError ResourceWarning
RuntimeError RuntimeWarning StandardError StopAsyncIteration
StopIteration SyntaxError SyntaxWarning SystemError SystemExit
TabError TimeoutError TypeError UnboundLocalError UnicodeDecodeError
UnicodeEncodeError UnicodeError UnicodeTranslateError
UnicodeWarning UserWarning ValueError VMSError Warning
WindowsError ZeroDivisionError
)
end
identifier = /[a-z_][a-z0-9_]*/i
dotted_identifier = /[a-z_.][a-z0-9_.]*/i
def current_string
@string_register ||= StringRegister.new
end
state :root do
rule %r/\n+/m, Text
rule %r/^(:)(\s*)([ru]{,2}""".*?""")/mi do
groups Punctuation, Text, Str::Doc
end
rule %r/[^\S\n]+/, Text
rule %r(#(.*)?\n?), Comment::Single
rule %r/[\[\]{}:(),;.]/, Punctuation
rule %r/\\\n/, Text
rule %r/\\/, Text
rule %r/(in|is|and|or|not)\b/, Operator::Word
rule %r/(<<|>>|\/\/|\*\*)=?/, Operator
rule %r/[-~+\/*%=<>&^|@]=?|!=/, Operator
rule %r/(from)((?:\\\s|\s)+)(#{dotted_identifier})((?:\\\s|\s)+)(import)/ do
groups Keyword::Namespace,
Text,
Name::Namespace,
Text,
Keyword::Namespace
end
rule %r/(import)(\s+)(#{dotted_identifier})/ do
groups Keyword::Namespace, Text, Name::Namespace
end
rule %r/(def)((?:\s|\\\s)+)/ do
groups Keyword, Text
push :funcname
end
rule %r/(class)((?:\s|\\\s)+)/ do
groups Keyword, Text
push :classname
end
# TODO: not in python 3
rule %r/`.*?`/, Str::Backtick
rule %r/([rfbu]{0,2})('''|"""|['"])/i do |m|
token Str
current_string.register type: m[1].downcase, delim: m[2]
push :generic_string
end
rule %r/@#{dotted_identifier}/i, Name::Decorator
# using negative lookbehind so we don't match property names
rule %r/(?<!\.)#{identifier}/ do |m|
if self.class.keywords.include? m[0]
token Keyword
elsif self.class.exceptions.include? m[0]
token Name::Builtin
elsif self.class.builtins.include? m[0]
token Name::Builtin
elsif self.class.builtins_pseudo.include? m[0]
token Name::Builtin::Pseudo
else
token Name
end
end
rule identifier, Name
digits = /[0-9](_?[0-9])*/
decimal = /((#{digits})?\.#{digits}|#{digits}\.)/
exponent = /e[+-]?#{digits}/i
rule %r/#{decimal}(#{exponent})?j?/i, Num::Float
rule %r/#{digits}#{exponent}j?/i, Num::Float
rule %r/#{digits}j/i, Num::Float
rule %r/0b(_?[0-1])+/i, Num::Bin
rule %r/0o(_?[0-7])+/i, Num::Oct
rule %r/0x(_?[a-f0-9])+/i, Num::Hex
rule %r/\d+L/, Num::Integer::Long
rule %r/([1-9](_?[0-9])*|0(_?0)*)/, Num::Integer
end
state :funcname do
rule identifier, Name::Function, :pop!
end
state :classname do
rule identifier, Name::Class, :pop!
end
state :raise do
rule %r/from\b/, Keyword
rule %r/raise\b/, Keyword
rule %r/yield\b/, Keyword
rule %r/\n/, Text, :pop!
rule %r/;/, Punctuation, :pop!
mixin :root
end
state :yield do
mixin :raise
end
state :generic_string do
rule %r/[^'"\\{]+/, Str
rule %r/{{/, Str
rule %r/'''|"""|['"]/ do |m|
token Str
if current_string.delim? m[0]
current_string.remove
pop!
end
end
rule %r/(?=\\)/, Str, :generic_escape
rule %r/{/ do |m|
if current_string.type? "f"
token Str::Interpol
push :generic_interpol
else
token Str
end
end
end
state :generic_escape do
rule %r(\\
( [\\abfnrtv"']
| \n
| newline
| N{[a-zA-Z][a-zA-Z ]+[a-zA-Z]}
| u[a-fA-F0-9]{4}
| U[a-fA-F0-9]{8}
| x[a-fA-F0-9]{2}
| [0-7]{1,3}
)
)x do
token (current_string.type?("r") ? Str : Str::Escape)
pop!
end
rule %r/\\./, Str, :pop!
end
state :generic_interpol do
rule %r/[^{}]+/ do |m|
recurse m[0]
end
rule %r/{/, Str::Interpol, :generic_interpol
rule %r/}/, Str::Interpol, :pop!
end
class StringRegister < Array
def delim?(delim)
self.last[1] == delim
end
def register(type: "u", delim: "'")
self.push [type, delim]
end
def remove
self.pop
end
def type?(type)
self.last[0].include? type
end
end
private_constant :StringRegister
end
end
end