From 9f1f582fc13d16d7c02d83d6fbc61ddb05c1e3d5 Mon Sep 17 00:00:00 2001 From: Niko Strijbol Date: Tue, 12 May 2020 11:45:01 +0200 Subject: [PATCH] Allow Unicode in Python identifiers (#1510) Python supports the use of Unicode in identifiers. This commit uses POSIX bracket expressions that match against Unicode characters rather than the more common character classes that only match ASCII characters. Co-authored-by: Michael Camilleri --- lib/rouge/lexers/python.rb | 4 ++-- spec/visual/samples/python | 5 +++++ 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/lib/rouge/lexers/python.rb b/lib/rouge/lexers/python.rb index 6eeea88655..01b826b624 100644 --- a/lib/rouge/lexers/python.rb +++ b/lib/rouge/lexers/python.rb @@ -67,8 +67,8 @@ def self.exceptions ) end - identifier = /[a-z_][a-z0-9_]*/i - dotted_identifier = /[a-z_.][a-z0-9_.]*/i + identifier = /[[:alpha:]_][[:alnum:]_]*/ + dotted_identifier = /[[:alpha:]_.][[:alnum:]_.]*/ def current_string @string_register ||= StringRegister.new diff --git a/spec/visual/samples/python b/spec/visual/samples/python index 8331581809..7a59935014 100644 --- a/spec/visual/samples/python +++ b/spec/visual/samples/python @@ -150,3 +150,8 @@ x @= y f'{hello} world {int(x) + 1}' f'{{ {4*10} }}' f'result: {value:{width}.{precision}}' + +# Unicode identifiers +α = 10 +def coöperative(б): + return f"{б} is Russian" \ No newline at end of file