From feeab321e2422a92565c360324f8d113e978c26c Mon Sep 17 00:00:00 2001 From: Niko Strijbol Date: Mon, 27 Apr 2020 14:54:18 +0200 Subject: [PATCH 1/2] Allow unicode in Python identifiers The official specification is located at https://docs.python.org/3/reference/lexical_analysis.html#identifiers Note that is not yet 100% conforming, as some rare Unicode characters won't be recognized, but this should be sufficient for most purposes.[1] [1]: https://stackoverflow.com/questions/5474008 --- lib/rouge/lexers/python.rb | 4 ++-- spec/visual/samples/python | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/rouge/lexers/python.rb b/lib/rouge/lexers/python.rb index 6eeea88655..768ab11257 100644 --- a/lib/rouge/lexers/python.rb +++ b/lib/rouge/lexers/python.rb @@ -67,8 +67,8 @@ def self.exceptions ) end - identifier = /[a-z_][a-z0-9_]*/i - dotted_identifier = /[a-z_.][a-z0-9_.]*/i + identifier = /[[:alpha:]_][[:alpha:][:digit:]_]*/i + dotted_identifier = /[[:alpha:]_.][[:alpha:][:digit:]_.]*/i def current_string @string_register ||= StringRegister.new diff --git a/spec/visual/samples/python b/spec/visual/samples/python index 8331581809..7a59935014 100644 --- a/spec/visual/samples/python +++ b/spec/visual/samples/python @@ -150,3 +150,8 @@ x @= y f'{hello} world {int(x) + 1}' f'{{ {4*10} }}' f'result: {value:{width}.{precision}}' + +# Unicode identifiers +α = 10 +def coöperative(б): + return f"{б} is Russian" \ No newline at end of file From 49d411251457933b71c3662dc21e7b86033eef35 Mon Sep 17 00:00:00 2001 From: Michael Camilleri Date: Tue, 12 May 2020 18:27:30 +0900 Subject: [PATCH 2/2] Simplify POSIX bracket expressions --- lib/rouge/lexers/python.rb | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/lib/rouge/lexers/python.rb b/lib/rouge/lexers/python.rb index 768ab11257..01b826b624 100644 --- a/lib/rouge/lexers/python.rb +++ b/lib/rouge/lexers/python.rb @@ -67,8 +67,8 @@ def self.exceptions ) end - identifier = /[[:alpha:]_][[:alpha:][:digit:]_]*/i - dotted_identifier = /[[:alpha:]_.][[:alpha:][:digit:]_.]*/i + identifier = /[[:alpha:]_][[:alnum:]_]*/ + dotted_identifier = /[[:alpha:]_.][[:alnum:]_.]*/ def current_string @string_register ||= StringRegister.new