From ead03fa7e395320629383a8f00be474229bc6856 Mon Sep 17 00:00:00 2001 From: Michael Camilleri Date: Tue, 12 May 2020 18:08:39 +0900 Subject: [PATCH] Fix escape sequences in Python's strings (#1508) Version 3.18.0 of Rouge included a new mechanism for handling strings in the Python lexer. One of the consequences of that change was that raw strings would break if they included 'invalid' escape sequences. This is a mistake as raw strings do not have 'invalid' escape sequences. This commit fixes this error by changing the approach that the Python lexer takes to escape sequences more generally. Rather than dividing recognised and unrecognised escape sequences into 'valid' and 'invalid', it simply treats unrecognised escape sequences as ordinary strings. The result of this is that all escape sequences in raw strings produce `Str` tokens. In other types of strings, recognised escape sequences produce `Str::Escape` tokens and unrecognised escape sequences produce `Str` tokens. --- lib/rouge/lexers/python.rb | 20 ++++++-------------- spec/visual/samples/python | 2 ++ 2 files changed, 8 insertions(+), 14 deletions(-) diff --git a/lib/rouge/lexers/python.rb b/lib/rouge/lexers/python.rb index 9a718299c4..6eeea88655 100644 --- a/lib/rouge/lexers/python.rb +++ b/lib/rouge/lexers/python.rb @@ -186,14 +186,7 @@ def current_string end end - rule %r/\\/ do |m| - if current_string.type? "r" - token Str - else - token Str::Interpol - end - push :generic_escape - end + rule %r/(?=\\)/, Str, :generic_escape rule %r/{/ do |m| if current_string.type? "f" @@ -206,9 +199,10 @@ def current_string end state :generic_escape do - rule %r( + rule %r(\\ ( [\\abfnrtv"'] | \n + | newline | N{[a-zA-Z][a-zA-Z ]+[a-zA-Z]} | u[a-fA-F0-9]{4} | U[a-fA-F0-9]{8} @@ -216,13 +210,11 @@ def current_string | [0-7]{1,3} ) )x do - if current_string.type? "r" - token Str - else - token Str::Escape - end + token (current_string.type?("r") ? Str : Str::Escape) pop! end + + rule %r/\\./, Str, :pop! end state :generic_interpol do diff --git a/spec/visual/samples/python b/spec/visual/samples/python index 148e113939..8331581809 100644 --- a/spec/visual/samples/python +++ b/spec/visual/samples/python @@ -47,6 +47,7 @@ def baz(): '\UaaaaAF09' '\xaf\xAF\x09' '\007' + '.*\[p00t_(d\d{4})\].*' # There are no escape sequences in this string # escaped characters in raw strings def baz(): @@ -56,6 +57,7 @@ def baz(): r'\UaaaaAF09' r'\xaf\xAF\x09' r'\007' + r'.*\[p00t_(d\d{4})\].*' # line continuations apple.filter(x, y)