diff --git a/CHANGELOG.md b/CHANGELOG.md index 9c067e2ed..de56c59df 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -8,6 +8,7 @@ * [#109](https://github.com/rubocop-hq/rubocop-ast/pull/109): Add `NodePattern` debugging rake tasks: `test_pattern`, `compile`, `parse`. See also [this app](https://nodepattern.herokuapp.com) ([@marcandre][]) * [#110](https://github.com/rubocop-hq/rubocop-ast/pull/110): Add `NodePattern` support for multiple terms unions. ([@marcandre][]) * [#111](https://github.com/rubocop-hq/rubocop-ast/pull/111): Optimize some `NodePattern`s by using `Set`s. ([@marcandre][]) +* [#112](https://github.com/rubocop-hq/rubocop-ast/pull/112): Add `NodePattern` support for Regexp literals. ([@marcandre][]) ## 0.6.0 (2020-09-26) diff --git a/docs/modules/ROOT/pages/node_pattern.adoc b/docs/modules/ROOT/pages/node_pattern.adoc index 1b65c16f5..56f23620c 100644 --- a/docs/modules/ROOT/pages/node_pattern.adoc +++ b/docs/modules/ROOT/pages/node_pattern.adoc @@ -194,6 +194,13 @@ Branches of the union can contain more than one term: If all the branches have a single term, you can omit the `|`, so `{int | float}` can be simplified to `{int float}`. +When checking for symbols or string, you can use regexp literals for a similar effect: + +[source,sh] +---- +(send _ /to_s|inspect/) # => matches calls to `to_s` or `inspect` +---- + == `[]` for "AND" Imagine you want to check if the number is `odd?` and also positive numbers: diff --git a/lib/rubocop/ast/node_pattern/compiler/atom_subcompiler.rb b/lib/rubocop/ast/node_pattern/compiler/atom_subcompiler.rb index 027827d4f..0f9cf77ea 100644 --- a/lib/rubocop/ast/node_pattern/compiler/atom_subcompiler.rb +++ b/lib/rubocop/ast/node_pattern/compiler/atom_subcompiler.rb @@ -23,6 +23,7 @@ def visit_symbol end alias visit_number visit_symbol alias visit_string visit_symbol + alias visit_regexp visit_symbol def visit_const node.child diff --git a/lib/rubocop/ast/node_pattern/lexer.rb b/lib/rubocop/ast/node_pattern/lexer.rb index 698ee0912..12183906a 100644 --- a/lib/rubocop/ast/node_pattern/lexer.rb +++ b/lib/rubocop/ast/node_pattern/lexer.rb @@ -18,6 +18,14 @@ class NodePattern class Lexer < LexerRex Error = ScanError + REGEXP_OPTIONS = { + 'i' => ::Regexp::IGNORECASE, + 'm' => ::Regexp::MULTILINE, + 'x' => ::Regexp::EXTENDED, + 'o' => 0 + }.freeze + private_constant :REGEXP_OPTIONS + attr_reader :source_buffer, :comments, :tokens def initialize(source) @@ -41,6 +49,13 @@ def emit_comment nil end + def emit_regexp + body, options = ss.captures + flag = options.each_char.map { |c| REGEXP_OPTIONS[c] }.sum + + emit(:tREGEXP) { Regexp.new(body, flag) } + end + def do_parse # Called by the generated `parse` method, do nothing here. end diff --git a/lib/rubocop/ast/node_pattern/lexer.rex b/lib/rubocop/ast/node_pattern/lexer.rex index 29079b4fd..92c020f0c 100644 --- a/lib/rubocop/ast/node_pattern/lexer.rex +++ b/lib/rubocop/ast/node_pattern/lexer.rex @@ -13,6 +13,8 @@ class RuboCop::AST::NodePattern::LexerRex macros SYMBOL_NAME /[\w+@*\/?!<>=~|%^-]+|\[\]=?/ IDENTIFIER /[a-zA-Z_][a-zA-Z0-9_-]*/ + REGEXP_BODY /(?:[^\/]|\\\/)*/ + REGEXP /\/(#{REGEXP_BODY})(? $ ! ^ ` ... + * ? ," )}/o { emit ss.matched, &:to_sym } + /#{REGEXP}/o { emit_regexp } /%([A-Z:][a-zA-Z_:]+)/ { emit :tPARAM_CONST } /%([a-z_]+)/ { emit :tPARAM_NAMED } /%(\d*)/ { emit(:tPARAM_NUMBER) { |s| s.empty? ? 1 : s.to_i } } # Map `%` to `%1` diff --git a/lib/rubocop/ast/node_pattern/parser.y b/lib/rubocop/ast/node_pattern/parser.y index 56de0abea..a3a42bbd3 100644 --- a/lib/rubocop/ast/node_pattern/parser.y +++ b/lib/rubocop/ast/node_pattern/parser.y @@ -1,7 +1,7 @@ class RuboCop::AST::NodePattern::Parser options no_result_var token tSYMBOL tNUMBER tSTRING tWILDCARD tPARAM_NAMED tPARAM_CONST tPARAM_NUMBER - tFUNCTION_CALL tPREDICATE tNODE_TYPE tARG_LIST tUNIFY + tFUNCTION_CALL tPREDICATE tNODE_TYPE tARG_LIST tUNIFY tREGEXP rule node_pattern # @return Node : node_pattern_no_union @@ -28,6 +28,7 @@ rule | tPARAM_CONST { emit_atom :const, *val } | tPARAM_NAMED { emit_atom :named_parameter, *val } | tPARAM_NUMBER { emit_atom :positional_parameter, *val } + | tREGEXP { emit_atom :regexp, *val } | tWILDCARD { emit_atom :wildcard, *val } | tUNIFY { emit_atom :unify, *val } ; diff --git a/spec/rubocop/ast/node_pattern/lexer_spec.rb b/spec/rubocop/ast/node_pattern/lexer_spec.rb index 53ad93443..230290737 100644 --- a/spec/rubocop/ast/node_pattern/lexer_spec.rb +++ b/spec/rubocop/ast/node_pattern/lexer_spec.rb @@ -36,4 +36,28 @@ %i[( array sym $ int + x )] end end + + [ + /test/, + /[abc]+\/()?/x, # rubocop:disable Style/RegexpLiteral + /back\\slash/ + ].each do |regexp| + context "when given a regexp #{regexp.inspect}" do + let(:source) { regexp.inspect } + + it 'round trips' do + token = tokens.first + value = token.last.first + expect(value.inspect).to eq regexp.inspect + end + end + end + + context 'when given a regexp ending with a backslash' do + let(:source) { '/tricky\\/' } + + it 'does not lexes it properly' do + expect { tokens }.to raise_error(RuboCop::AST::NodePattern::LexerRex::ScanError) + end + end end diff --git a/spec/rubocop/ast/node_pattern_spec.rb b/spec/rubocop/ast/node_pattern_spec.rb index d08f171f4..511d07689 100644 --- a/spec/rubocop/ast/node_pattern_spec.rb +++ b/spec/rubocop/ast/node_pattern_spec.rb @@ -1896,6 +1896,16 @@ def withargs(foo, bar, qux) end end + describe 'regexp' do + it 'matches symbols or strings' do + expect('(_ _ $/abc|def|foo/i ...)').to match_codes( + 'Foo(42)', 'foo(42)' + ).and not_match_codes( + 'bar(42)' + ) + end + end + describe 'bad syntax' do context 'with empty parentheses' do let(:pattern) { '()' }