From e8d9b158c810d7fe305cdd8531a6353ffbab5ef0 Mon Sep 17 00:00:00 2001 From: Christopher Pence Date: Sun, 20 Aug 2023 12:19:32 -0700 Subject: [PATCH 1/4] Initial fix --- lib/rouge/lexers/python.rb | 57 ++++++++++++++++++++++++++------------ spec/visual/samples/python | 10 +++++++ 2 files changed, 49 insertions(+), 18 deletions(-) diff --git a/lib/rouge/lexers/python.rb b/lib/rouge/lexers/python.rb index b47b64aa76..0c6b983f71 100644 --- a/lib/rouge/lexers/python.rb +++ b/lib/rouge/lexers/python.rb @@ -68,12 +68,32 @@ def self.exceptions end identifier = /[[:alpha:]_][[:alnum:]_]*/ + lower_identifier = /[[:lower:]_][[:alnum:]_]*/ + upper_identifier = /[[:upper:]_][[:alnum:]_]*/ dotted_identifier = /[[:alpha:]_.][[:alnum:]_.]*/ def current_string @string_register ||= StringRegister.new end + def id(match, type) + if self.class.keywords.include? match + token Keyword + elsif not in_state?(:dot) and self.class.exceptions.include? match + token Name::Builtin + elsif not in_state?(:dot) and self.class.builtins.include? match + token Name::Builtin + elsif not in_state?(:dot) and self.class.builtins_pseudo.include? match + token Name::Builtin::Pseudo + else + token type + end + + if in_state?(:dot) + pop! + end + end + state :root do rule %r/\n+/m, Text rule %r/^(:)(\s*)([ru]{,2}""".*?""")/mi do @@ -84,10 +104,15 @@ def current_string rule %r/[^\S\n]+/, Text rule %r(#(.*)?\n?), Comment::Single - rule %r/[\[\]{}:(),;.]/, Punctuation + rule %r/[\[\]{}:(),;]/, Punctuation rule %r/\\\n/, Text rule %r/\\/, Text + rule /\./ do + token Punctuation + push :dot if not (in_state?(:generic_string) or in_state?(:dot)) + end + rule %r/@#{dotted_identifier}/i, Name::Decorator rule %r/(in|is|and|or|not)\b/, Operator::Word @@ -116,9 +141,6 @@ def current_string push :classname end - rule %r/([a-z_]\w*)[ \t]*(?=(\(.*\)))/m, Name::Function - rule %r/([A-Z_]\w*)[ \t]*(?=(\(.*\)))/m, Name::Class - # TODO: not in python 3 rule %r/`.*?`/, Str::Backtick rule %r/([rfbu]{0,2})('''|"""|['"])/i do |m| @@ -127,22 +149,17 @@ def current_string push :generic_string end - # using negative lookbehind so we don't match property names - rule %r/(? Date: Sun, 20 Aug 2023 12:40:29 -0700 Subject: [PATCH 2/4] Fix ambiguous regexp literal --- lib/rouge/lexers/python.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/rouge/lexers/python.rb b/lib/rouge/lexers/python.rb index 0c6b983f71..fc16bcc2bb 100644 --- a/lib/rouge/lexers/python.rb +++ b/lib/rouge/lexers/python.rb @@ -108,7 +108,7 @@ def id(match, type) rule %r/\\\n/, Text rule %r/\\/, Text - rule /\./ do + rule %r/\./ do token Punctuation push :dot if not (in_state?(:generic_string) or in_state?(:dot)) end From 86cc28b1958d8c1c73157f3242f46abe714293cd Mon Sep 17 00:00:00 2001 From: Christopher Pence Date: Sun, 20 Aug 2023 13:18:42 -0700 Subject: [PATCH 3/4] Add comments, rename --- lib/rouge/lexers/python.rb | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/lib/rouge/lexers/python.rb b/lib/rouge/lexers/python.rb index fc16bcc2bb..b8ba11d70b 100644 --- a/lib/rouge/lexers/python.rb +++ b/lib/rouge/lexers/python.rb @@ -76,19 +76,21 @@ def current_string @string_register ||= StringRegister.new end - def id(match, type) - if self.class.keywords.include? match + # Yield a token for an identifier. Handle keywords/builtins, attr accesses + def token_for_identifier(word, fallback) + if self.class.keywords.include? word token Keyword - elsif not in_state?(:dot) and self.class.exceptions.include? match + elsif not in_state?(:dot) and self.class.exceptions.include? word token Name::Builtin - elsif not in_state?(:dot) and self.class.builtins.include? match + elsif not in_state?(:dot) and self.class.builtins.include? word token Name::Builtin - elsif not in_state?(:dot) and self.class.builtins_pseudo.include? match + elsif not in_state?(:dot) and self.class.builtins_pseudo.include? word token Name::Builtin::Pseudo else - token type + token fallback end + # Reset attr access state if in_state?(:dot) pop! end @@ -108,6 +110,7 @@ def id(match, type) rule %r/\\\n/, Text rule %r/\\/, Text + # Push the :dot state to the stack to keep track of attr accesses rule %r/\./ do token Punctuation push :dot if not (in_state?(:generic_string) or in_state?(:dot)) @@ -149,16 +152,17 @@ def id(match, type) push :generic_string end - # Handle identifiers that look like a call + # Identifiers used in a call expr rule %r/#{lower_identifier}(?=[[:blank:]]*\()/m do |m| - id m[0], Name::Function + token_for_identifier m[0], Name::Function end rule %r/#{upper_identifier}(?=[[:blank:]]*\()/m do |m| - id m[0], Name::Class + token_for_identifier m[0], Name::Class end + # All other identifiers rule identifier do |m| - id m[0], Name + token_for_identifier m[0], Name end digits = /[0-9](_?[0-9])*/ From 9c941a1609cbdd8033c47a857f5057e50d246997 Mon Sep 17 00:00:00 2001 From: Christopher Pence Date: Mon, 21 Aug 2023 16:52:09 -0700 Subject: [PATCH 4/4] Appease line linter --- spec/visual/samples/python | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/spec/visual/samples/python b/spec/visual/samples/python index 7090e0ace6..22eee55e4a 100644 --- a/spec/visual/samples/python +++ b/spec/visual/samples/python @@ -227,4 +227,4 @@ def take_first(pairs): self.for('nonsense') # Distinguish between builtins and method calls -self.filter(filter(lambda x: x == 1, items)) \ No newline at end of file +self.filter(filter(lambda x: x == 1, items))