diff --git a/changelog/new_add_new_cop_styleselectbyregexp.md b/changelog/new_add_new_cop_styleselectbyregexp.md new file mode 100644 index 00000000000..f1a12043953 --- /dev/null +++ b/changelog/new_add_new_cop_styleselectbyregexp.md @@ -0,0 +1 @@ +* [#8327](https://github.com/rubocop/rubocop/issues/8327): Add new cop `Style/SelectByRegexp`. ([@dvandersluis][]) diff --git a/config/default.yml b/config/default.yml index 19e5fd203da..25008cd165f 100644 --- a/config/default.yml +++ b/config/default.yml @@ -4562,6 +4562,12 @@ Style/Sample: Enabled: true VersionAdded: '0.30' +Style/SelectByRegexp: + Description: 'Prefer grep/grep_v to select/reject with a regexp match.' + Enabled: pending + SafeAutoCorrect: false + VersionAdded: '<>' + Style/SelfAssignment: Description: >- Checks for places where self-assignment shorthand should have diff --git a/lib/rubocop.rb b/lib/rubocop.rb index 2815286fca1..c1d105d6c57 100644 --- a/lib/rubocop.rb +++ b/lib/rubocop.rb @@ -591,6 +591,7 @@ require_relative 'rubocop/cop/style/return_nil' require_relative 'rubocop/cop/style/safe_navigation' require_relative 'rubocop/cop/style/sample' +require_relative 'rubocop/cop/style/select_by_regexp' require_relative 'rubocop/cop/style/self_assignment' require_relative 'rubocop/cop/style/semicolon' require_relative 'rubocop/cop/style/send' diff --git a/lib/rubocop/cop/style/select_by_regexp.rb b/lib/rubocop/cop/style/select_by_regexp.rb new file mode 100644 index 00000000000..573b4fe9ef7 --- /dev/null +++ b/lib/rubocop/cop/style/select_by_regexp.rb @@ -0,0 +1,101 @@ +# frozen_string_literal: true + +module RuboCop + module Cop + module Style + # This cop looks for places where an subset of an array + # is calculated based on a `Regexp` match, and suggests `grep` or + # `grep_v` instead. + # + # NOTE: `grep` and `grep_v` were optimized when used without a block + # in Ruby 3.0, but may be slower in previous versions. + # See https://bugs.ruby-lang.org/issues/17030 + # + # @safety + # Autocorrection is marked as unsafe because `MatchData` will + # not be created by `grep`, but may have previously been relied + # upon after the `match?` or `=~` call. + # + # @example + # # bad (select or find_all) + # array.select { |x| x.match? /regexp/ } + # array.select { |x| /regexp/.match?(x) } + # array.select { |x| x =~ /regexp/ } + # array.select { |x| /regexp/ =~ x } + # + # # bad (reject) + # array.reject { |x| x.match? /regexp/ } + # array.reject { |x| /regexp/.match?(x) } + # array.reject { |x| x =~ /regexp/ } + # array.reject { |x| /regexp/ =~ x } + # + # # good + # array.grep(regexp) + # array.grep_v(regexp) + class SelectByRegexp < Base + extend AutoCorrector + include RangeHelp + + MSG = 'Prefer `%s` to `%s` with a regexp match.' + RESTRICT_ON_SEND = %i[select find_all reject].freeze + REPLACEMENTS = { select: 'grep', find_all: 'grep', reject: 'grep_v' }.freeze + REGEXP_METHODS = %i[match? =~].to_set.freeze + + # @!method regexp_match?(node) + def_node_matcher :regexp_match?, <<~PATTERN + (block send (args (arg $_)) ${(send _ %REGEXP_METHODS _) match-with-lvasgn}) + PATTERN + + # @!method calls_lvar?(node, name) + def_node_matcher :calls_lvar?, <<~PATTERN + { + (send (lvar %1) ...) + (send ... (lvar %1)) + (match-with-lvasgn regexp (lvar %1)) + } + PATTERN + + def on_send(node) + return unless (block_node = node.block_node) + return if block_node.body.begin_type? + return unless (regexp_method_send_node = extract_send_node(block_node)) + + regexp = find_regexp(regexp_method_send_node) + register_offense(node, block_node, regexp) + end + + private + + def register_offense(node, block_node, regexp) + replacement = REPLACEMENTS[node.method_name.to_sym] + message = format(MSG, replacement: replacement, original_method: node.method_name) + + add_offense(block_node, message: message) do |corrector| + # Only correct if it can be determined what the regexp is + if regexp + range = range_between(node.loc.selector.begin_pos, block_node.loc.end.end_pos) + corrector.replace(range, "#{replacement}(#{regexp.source})") + end + end + end + + def extract_send_node(block_node) + return unless (block_arg_name, regexp_method_send_node = regexp_match?(block_node)) + return unless calls_lvar?(regexp_method_send_node, block_arg_name) + + regexp_method_send_node + end + + def find_regexp(node) + return node.child_nodes.first if node.match_with_lvasgn_type? + + if node.receiver.lvar_type? + node.first_argument + elsif node.first_argument.lvar_type? + node.receiver + end + end + end + end + end +end diff --git a/spec/rubocop/cop/style/select_by_regexp_spec.rb b/spec/rubocop/cop/style/select_by_regexp_spec.rb new file mode 100644 index 00000000000..f65f5c99bfa --- /dev/null +++ b/spec/rubocop/cop/style/select_by_regexp_spec.rb @@ -0,0 +1,128 @@ +# frozen_string_literal: true + +RSpec.describe RuboCop::Cop::Style::SelectByRegexp, :config do + { 'select' => 'grep', 'find_all' => 'grep', 'reject' => 'grep_v' }.each do |method, correction| + message = "Prefer `#{correction}` to `#{method}` with a regexp match." + + context "with #{method}" do + it 'registers an offense and corrects for `match?`' do + expect_offense(<<~RUBY, method: method) + array.#{method} { |x| x.match? /regexp/ } + ^^^^^^^{method}^^^^^^^^^^^^^^^^^^^^^^^^^^ #{message} + RUBY + + expect_correction(<<~RUBY) + array.#{correction}(/regexp/) + RUBY + end + + it 'registers an offense and corrects for `Regexp#match?`' do + expect_offense(<<~RUBY, method: method) + array.#{method} { |x| /regexp/.match? x } + ^^^^^^^{method}^^^^^^^^^^^^^^^^^^^^^^^^^^ #{message} + RUBY + + expect_correction(<<~RUBY) + array.#{correction}(/regexp/) + RUBY + end + + it 'registers an offense and corrects for `blockvar =~ regexp`' do + expect_offense(<<~RUBY, method: method) + array.#{method} { |x| x =~ /regexp/ } + ^^^^^^^{method}^^^^^^^^^^^^^^^^^^^^^^ #{message} + RUBY + + expect_correction(<<~RUBY) + array.#{correction}(/regexp/) + RUBY + end + + it 'registers an offense and corrects for `regexp =~ blockvar`' do + expect_offense(<<~RUBY, method: method) + array.#{method} { |x| /regexp/ =~ x } + ^^^^^^^{method}^^^^^^^^^^^^^^^^^^^^^^ #{message} + RUBY + + expect_correction(<<~RUBY) + array.#{correction}(/regexp/) + RUBY + end + + it 'registers an offense and corrects when there is no explicit regexp' do + expect_offense(<<~RUBY, method: method) + array.#{method} { |x| x =~ y } + ^^^^^^^{method}^^^^^^^^^^^^^^^ #{message} + array.#{method} { |x| x =~ REGEXP } + ^^^^^^^{method}^^^^^^^^^^^^^^^^^^^^ #{message} + array.#{method} { |x| x =~ foo.bar.baz(quux) } + ^^^^^^^{method}^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ #{message} + RUBY + + expect_correction(<<~RUBY) + array.#{correction}(y) + array.#{correction}(REGEXP) + array.#{correction}(foo.bar.baz(quux)) + RUBY + end + + it 'registers an offense and corrects with a multiline block' do + expect_offense(<<~RUBY, method: method) + array.#{method} do |x| + ^^^^^^^{method}^^^^^^^ #{message} + x.match? /regexp/ + end + RUBY + + expect_correction(<<~RUBY) + array.#{correction}(/regexp/) + RUBY + end + + it 'does not register an offense when there is no block' do + expect_no_offenses(<<~RUBY) + array.#{method} + RUBY + end + + it 'does not register an offense when given a proc' do + expect_no_offenses(<<~RUBY) + array.#{method}(&:even?) + RUBY + end + + it 'does not register an offense when the block does not match a regexp' do + expect_no_offenses(<<~RUBY) + array.#{method} { |x| x.even? } + RUBY + end + + it 'does not register an offense when the block has multiple expressions' do + expect_no_offenses(<<~RUBY) + array.#{method} do |x| + next if x.even? + x.match? /regexp/ + end + RUBY + end + + it 'does not register an offense when the block arity is not 1' do + expect_no_offenses(<<~RUBY) + obj.#{method} { |x, y| y.match? /regexp/ } + RUBY + end + + it 'does not register an offense when the block uses an external variable in a regexp match' do + expect_no_offenses(<<~RUBY) + array.#{method} { |x| y.match? /regexp/ } + RUBY + end + + it 'does not register an offense when the block param is a method argument' do + expect_no_offenses(<<~RUBY) + array.#{method} { |x| /regexp/.match?(foo(x)) } + RUBY + end + end + end +end