From cbf2ed37bf41cd6469fda02ef1c7a38d8086fcfb Mon Sep 17 00:00:00 2001 From: Michael Camilleri Date: Thu, 4 Jun 2020 05:40:00 +0900 Subject: [PATCH] Improve support for single quotes in Haskell lexer (#1524) Haskell uses matching single quotes to express character literals (e.g. `'A'`). However, the unmatched single quote character, `'`, is also used for various purposes (e.g. as part of a variable name like `x'` or in the name of a promoted type like `'Bar`). Currently, Rouge's Haskell lexer permits the use of an unmatched single quote by having separate rules for naming and then a general rule of lower precedence for when the lexer encounters `'`. A better approach would be to only treat `'` as being part of a character literal if the lexer detects that there is a matching `'` within an appropriate number of characters. This commit does that and then creates a more general rule for matching names. --- lib/rouge/lexers/haskell.rb | 46 ++++++++++++++++++++++--------------- spec/visual/samples/haskell | 2 ++ 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/lib/rouge/lexers/haskell.rb b/lib/rouge/lexers/haskell.rb index 594cd927f1..6891847d15 100644 --- a/lib/rouge/lexers/haskell.rb +++ b/lib/rouge/lexers/haskell.rb @@ -17,8 +17,8 @@ def self.detect?(text) end reserved = %w( - _ case class data default deriving do else if in - infix[lr]? instance let newtype of then type where + _ case class data default deriving do else if in infix infixl infixr + instance let newtype of then type where ) ascii = %w( @@ -54,14 +54,31 @@ def self.detect?(text) state :root do mixin :basic - rule %r/\bimport\b/, Keyword::Reserved, :import - rule %r/\bmodule\b/, Keyword::Reserved, :module - rule %r/\b(?:#{reserved.join('|')})\b/, Keyword::Reserved - # not sure why, but ^ doesn't work here - # rule %r/^[_a-z][\w']*/, Name::Function - rule %r/[_a-z][\w']*/, Name - rule %r/[A-Z][\w']*/, Keyword::Type - rule %r/'[A-Z]\w+'?/, Keyword::Type # promoted data constructor + rule %r/'(?=(?:.|\\\S+)')/, Str::Char, :character + rule %r/"/, Str, :string + + rule %r/\d+e[+-]?\d+/i, Num::Float + rule %r/\d+\.\d+(e[+-]?\d+)?/i, Num::Float + rule %r/0o[0-7]+/i, Num::Oct + rule %r/0x[\da-f]+/i, Num::Hex + rule %r/\d+/, Num::Integer + + rule %r/[\w']+/ do |m| + match = m[0] + if match == "import" + token Keyword::Reserved + push :import + elsif match == "module" + token Keyword::Reserved + push :module + elsif reserved.include?(match) + token Keyword::Reserved + elsif match =~ /\A'?[A-Z]/ + token Keyword::Type + else + token Name + end + end # lambda operator rule %r(\\(?![:!#\$\%&*+.\\/<=>?@^\|~-]+)), Name::Function @@ -72,15 +89,6 @@ def self.detect?(text) # other operators rule %r([:!#\$\%&*+.\\/<=>?@^\|~-]+), Operator - rule %r/\d+e[+-]?\d+/i, Num::Float - rule %r/\d+\.\d+(e[+-]?\d+)?/i, Num::Float - rule %r/0o[0-7]+/i, Num::Oct - rule %r/0x[\da-f]+/i, Num::Hex - rule %r/\d+/, Num::Integer - - rule %r/'/, Str::Char, :character - rule %r/"/, Str, :string - rule %r/\[\s*\]/, Keyword::Type rule %r/\(\s*\)/, Name::Builtin diff --git a/spec/visual/samples/haskell b/spec/visual/samples/haskell index a0ac7d1ce0..6d7a9e1f3e 100644 --- a/spec/visual/samples/haskell +++ b/spec/visual/samples/haskell @@ -399,3 +399,5 @@ quasi3 = [here|Newlines are part of the quotation |] + +[ H.label [ H.for' hName, H.class' "col-sm-2 col-form-label" ] [ text msg ]