Skip to content

Commit

Permalink
Fix non-ascii characters in names in Crystal and Ruby (#1894)
Browse files Browse the repository at this point in the history
Fixes #1893
  • Loading branch information
kolesar-andras committed Apr 1, 2024
1 parent 1953972 commit f9a174d
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 35 deletions.
34 changes: 17 additions & 17 deletions lib/rouge/lexers/crystal.rb
Expand Up @@ -21,7 +21,7 @@ def self.detect?(text)
rule %r(
: # initial :
@{0,2} # optional ivar, for :@foo and :@@foo
[a-z_]\w*[!?]? # the symbol
[\p{Ll}_]\p{Word}*[!?]? # the symbol
)xi, Str::Symbol

# special symbols
Expand All @@ -35,7 +35,7 @@ def self.detect?(text)
# %-sigiled strings
# %(abc), %[abc], %<abc>, %.abc., %r.abc., etc
delimiter_map = { '{' => '}', '[' => ']', '(' => ')', '<' => '>' }
rule %r/%([rqswQWxiI])?([^\w\s}])/ do |m|
rule %r/%([rqswQWxiI])?([^\p{Word}\s}])/ do |m|
open = Regexp.escape(m[2])
close = Regexp.escape(delimiter_map[m[2]] || m[2])
interp = /[rQWxI]/ === m[1]
Expand Down Expand Up @@ -77,7 +77,7 @@ def self.detect?(text)

state :strings do
mixin :symbols
rule %r/\b[a-z_]\w*?[?!]?:\s+/, Str::Symbol, :expr_start
rule %r/\b[\p{Ll}_]\p{Word}*?[?!]?:\s+/, Str::Symbol, :expr_start
rule %r/"/, Str::Double, :simple_string
rule %r/(?<!\.)`/, Str::Backtick, :simple_backtick
rule %r/(')(\\u[a-fA-F0-9]{4}|\\u\{[a-fA-F0-9]{1,6}\}|\\[abefnrtv])?(\\\\|\\'|[^'])*(')/ do
Expand Down Expand Up @@ -166,9 +166,9 @@ def self.detect?(text)
rule %r/@\[([^\]]+)\]/, Name::Decorator

# names
rule %r/@@[a-z_]\w*/i, Name::Variable::Class
rule %r/@[a-z_]\w*/i, Name::Variable::Instance
rule %r/\$\w+/, Name::Variable::Global
rule %r/@@[\p{Ll}_]\p{Word}*/i, Name::Variable::Class
rule %r/@[\p{Ll}_]\p{Word}*/i, Name::Variable::Instance
rule %r/\$\p{Word}+/, Name::Variable::Global
rule %r(\$[!@&`'+~=/\\,;.<>_*\$?:"]), Name::Variable::Global
rule %r/\$-[0adFiIlpvw]/, Name::Variable::Global
rule %r/::/, Operator
Expand All @@ -181,7 +181,7 @@ def self.detect?(text)
rule %r(
(module)
(\s+)
([a-zA-Z_][a-zA-Z0-9_]*(::[a-zA-Z_][a-zA-Z0-9_]*)*)
([\p{L}_][\p{L}0-9_]*(::[\p{L}_][\p{L}0-9_]*)*)
)x do
groups Keyword, Text, Name::Namespace
end
Expand All @@ -207,14 +207,14 @@ def self.detect?(text)
# Otherwise, they will be parsed as :method_call
rule %r/\.{2,3}/, Operator, :expr_start

rule %r/[A-Z][a-zA-Z0-9_]*/, Name::Constant, :method_call
rule %r/(\.|::)(\s*)([a-z_]\w*[!?]?|[*%&^`~+-\/\[<>=])/ do
rule %r/[\p{Lu}][\p{L}0-9_]*/, Name::Constant, :method_call
rule %r/(\.|::)(\s*)([\p{Ll}_]\p{Word}*[!?]?|[*%&^`~+-\/\[<>=])/ do
groups Punctuation, Text, Name::Function
push :method_call
end

rule %r/[a-zA-Z_]\w*[?!]/, Name, :expr_start
rule %r/[a-zA-Z_]\w*/, Name, :method_call
rule %r/[\p{L}_]\p{Word}*[?!]/, Name, :expr_start
rule %r/[\p{L}_]\p{Word}*/, Name, :method_call
rule %r/\*\*|\/\/|>=|<=|<=>|<<?|>>?|=~|={3}|!~|&&?|\|\||\./,
Operator, :expr_start
rule %r/{%|%}/, Punctuation
Expand All @@ -225,7 +225,7 @@ def self.detect?(text)
end

state :has_heredocs do
rule %r/(?<!\w)(<<[-~]?)(["`']?)([a-zA-Z_]\w*)(\2)/ do |m|
rule %r/(?<!\p{Word})(<<[-~]?)(["`']?)([\p{L}_]\p{Word}*)(\2)/ do |m|
token Operator, m[1]
token Name::Constant, "#{m[2]}#{m[3]}#{m[4]}"
@heredoc_queue << [['<<-', '<<~'].include?(m[1]), m[3]]
Expand Down Expand Up @@ -282,9 +282,9 @@ def self.detect?(text)
rule %r/\s+/, Text
rule %r/\(/, Punctuation, :defexpr
rule %r(
(?:([a-zA-Z_]\w*)(\.))?
(?:([\p{L}_]\p{Word}*)(\.))?
(
[a-zA-Z_]\w*[!?]? |
[\p{L}_]\p{Word}*[!?]? |
\*\*? | [-+]@? | [/%&\|^`~] | \[\]=? |
<<? | >>? | <=>? | >= | ===?
)
Expand All @@ -311,7 +311,7 @@ def self.detect?(text)
goto :expr_start
end

rule %r/[A-Z_]\w*/, Name::Class, :pop!
rule %r/[\p{Lu}_]\p{Word}*/, Name::Class, :pop!

rule(//) { pop! }
end
Expand Down Expand Up @@ -343,7 +343,7 @@ def self.detect?(text)

state :string_intp do
rule %r/[#][{]/, Str::Interpol, :in_interp
rule %r/#(@@?|\$)[a-z_]\w*/i, Str::Interpol
rule %r/#(@@?|\$)[\p{Ll}_]\p{Word}*/i, Str::Interpol
end

state :string_intp_escaped do
Expand Down Expand Up @@ -399,7 +399,7 @@ def self.detect?(text)
rule %r(
[?](\\[MC]-)* # modifiers
(\\([\\abefnrstv\#"']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S)
(?!\w)
(?!\p{Word})
)x, Str::Char, :pop!

# special case for using a single space. Ruby demands that
Expand Down
36 changes: 18 additions & 18 deletions lib/rouge/lexers/ruby.rb
Expand Up @@ -24,7 +24,7 @@ def self.detect?(text)
rule %r(
: # initial :
@{0,2} # optional ivar, for :@foo and :@@foo
[a-z_]\w*[!?]? # the symbol
[\p{Ll}_]\p{Word}*[!?]? # the symbol
)xi, Str::Symbol

# special symbols
Expand All @@ -39,7 +39,7 @@ def self.detect?(text)
# %-sigiled strings
# %(abc), %[abc], %<abc>, %.abc., %r.abc., etc
delimiter_map = { '{' => '}', '[' => ']', '(' => ')', '<' => '>' }
rule %r/%([rqswQWxiI])?([^\w\s])/ do |m|
rule %r/%([rqswQWxiI])?([^\p{Word}\s])/ do |m|
open = Regexp.escape(m[2])
close = Regexp.escape(delimiter_map[m[2]] || m[2])
interp = /[rQWxI]/ === m[1] || !m[1]
Expand Down Expand Up @@ -83,7 +83,7 @@ def self.detect?(text)

state :strings do
mixin :symbols
rule %r/\b[a-z_]\w*?[?!]?:\s+/, Str::Symbol, :expr_start
rule %r/\b[\p{Ll}_]\p{Word}*?[?!]?:\s+/, Str::Symbol, :expr_start
rule %r/'(\\\\|\\'|[^'])*'/, Str::Single
rule %r/"/, Str::Double, :simple_string
rule %r/(?<!\.)`/, Str::Backtick, :simple_backtick
Expand Down Expand Up @@ -177,9 +177,9 @@ def self.detect?(text)
rule decimal, Num::Integer

# names
rule %r/@@[a-z_]\w*/i, Name::Variable::Class
rule %r/@[a-z_]\w*/i, Name::Variable::Instance
rule %r/\$\w+/, Name::Variable::Global
rule %r/@@[\p{Ll}_]\p{Word}*/i, Name::Variable::Class
rule %r/@[\p{Ll}_]\p{Word}*/i, Name::Variable::Instance
rule %r/\$\p{Word}+/, Name::Variable::Global
rule %r(\$[!@&`'+~=/\\,;.<>_*\$?:"]), Name::Variable::Global
rule %r/\$-[0adFiIlpvw]/, Name::Variable::Global
rule %r/::/, Operator
Expand All @@ -193,7 +193,7 @@ def self.detect?(text)
rule %r(
(module)
(\s+)
([a-zA-Z_][a-zA-Z0-9_]*(::[a-zA-Z_][a-zA-Z0-9_]*)*)
([\p{L}_][\p{L}0-9_]*(::[\p{L}_][\p{L}0-9_]*)*)
)x do
groups Keyword, Text, Name::Namespace
end
Expand All @@ -219,14 +219,14 @@ def self.detect?(text)
# Otherwise, they will be parsed as :method_call
rule %r/\.{2,3}/, Operator, :expr_start

rule %r/[A-Z][a-zA-Z0-9_]*/, Name::Constant, :method_call
rule %r/(\.|::)(\s*)([a-z_]\w*[!?]?|[*%&^`~+-\/\[<>=])/ do
rule %r/[\p{Lu}][\p{L}0-9_]*/, Name::Constant, :method_call
rule %r/(\.|::)(\s*)([\p{Ll}_]\p{Word}*[!?]?|[*%&^`~+-\/\[<>=])/ do
groups Punctuation, Text, Name::Function
push :method_call
end

rule %r/[a-zA-Z_]\w*[?!]/, Name, :expr_start
rule %r/[a-zA-Z_]\w*/, Name, :method_call
rule %r/[\p{L}_]\p{Word}*[?!]/, Name, :expr_start
rule %r/[\p{L}_]\p{Word}*/, Name, :method_call
rule %r/\*\*|<<?|>>?|>=|<=|<=>|=~|={3}|!~|&&?|\|\||\./,
Operator, :expr_start
rule %r/[-+\/*%=<>&!^|~]=?/, Operator, :expr_start
Expand All @@ -236,7 +236,7 @@ def self.detect?(text)
end

state :has_heredocs do
rule %r/(?<!\w)(<<[-~]?)(["`']?)([a-zA-Z_]\w*)(\2)/ do |m|
rule %r/(?<!\p{Word})(<<[-~]?)(["`']?)([\p{L}_]\p{Word}*)(\2)/ do |m|
token Operator, m[1]
token Name::Constant, "#{m[2]}#{m[3]}#{m[4]}"
@heredoc_queue << [['<<-', '<<~'].include?(m[1]), m[3]]
Expand Down Expand Up @@ -293,9 +293,9 @@ def self.detect?(text)
rule %r/\s+/, Text
rule %r/\(/, Punctuation, :defexpr
rule %r(
(?:([a-zA-Z_]\w*)(\.))?
(?:([\p{L}_]\p{Word}*)(\.))?
(
[a-zA-Z_]\w*[!?]? |
[\p{L}_]\p{Word}*[!?]? |
\*\*? | [-+]@? | [/%&\|^`~] | \[\]=? |
<<? | >>? | <=>? | >= | ===?
)
Expand All @@ -310,7 +310,7 @@ def self.detect?(text)

state :classname do
rule %r/\s+/, Text
rule %r/\w+(::\w+)+/, Name::Class
rule %r/\p{Word}+(::\p{Word}+)+/, Name::Class

rule %r/\(/ do
token Punctuation
Expand All @@ -324,7 +324,7 @@ def self.detect?(text)
goto :expr_start
end

rule %r/[A-Z_]\w*/, Name::Class, :pop!
rule %r/[\p{Lu}_]\p{Word}*/, Name::Class, :pop!

rule(//) { pop! }
end
Expand Down Expand Up @@ -364,7 +364,7 @@ def self.detect?(text)

state :string_intp do
rule %r/[#][{]/, Str::Interpol, :in_interp
rule %r/#(@@?|\$)[a-z_]\w*/i, Str::Interpol
rule %r/#(@@?|\$)[\p{Ll}_]\p{Word}*/i, Str::Interpol
end

state :string_intp_escaped do
Expand Down Expand Up @@ -419,7 +419,7 @@ def self.detect?(text)
rule %r(
[?](\\[MC]-)* # modifiers
(\\([\\abefnrstv\#"']|x[a-fA-F0-9]{1,2}|[0-7]{1,3})|\S)
(?!\w)
(?!\p{Word})
)x, Str::Char, :pop!

# special case for using a single space. Ruby demands that
Expand Down

0 comments on commit f9a174d

Please sign in to comment.