From 31ea6847cdd12e1269b57350104791d237e8ab3b Mon Sep 17 00:00:00 2001 From: Daniel Vandersluis Date: Fri, 1 Oct 2021 14:23:29 -0400 Subject: [PATCH] [Fix #10145] Update `Style/SelectByRegexp` to ignore cases where the receiver appears to be a hash. --- ...ix_update_styleselectbyregexp_to_ignore.md | 1 + lib/rubocop/cop/style/select_by_regexp.rb | 33 ++++++- .../cop/style/select_by_regexp_spec.rb | 99 +++++++++++++++++++ 3 files changed, 130 insertions(+), 3 deletions(-) create mode 100644 changelog/fix_update_styleselectbyregexp_to_ignore.md diff --git a/changelog/fix_update_styleselectbyregexp_to_ignore.md b/changelog/fix_update_styleselectbyregexp_to_ignore.md new file mode 100644 index 00000000000..a9bcd547191 --- /dev/null +++ b/changelog/fix_update_styleselectbyregexp_to_ignore.md @@ -0,0 +1 @@ +* [#10145](https://github.com/rubocop/rubocop/issues/10145): Update `Style/SelectByRegexp` to ignore cases where the receiver appears to be a hash. ([@dvandersluis][]) diff --git a/lib/rubocop/cop/style/select_by_regexp.rb b/lib/rubocop/cop/style/select_by_regexp.rb index 10dae8801b6..50489dfb371 100644 --- a/lib/rubocop/cop/style/select_by_regexp.rb +++ b/lib/rubocop/cop/style/select_by_regexp.rb @@ -3,9 +3,15 @@ module RuboCop module Cop module Style - # This cop looks for places where an subset of an array - # is calculated based on a `Regexp` match, and suggests `grep` or - # `grep_v` instead. + # This cop looks for places where an subset of an Enumerable (array, + # range, set, etc.; see note below) is calculated based on a `Regexp` + # match, and suggests `grep` or `grep_v` instead. + # + # NOTE: Hashes do not behave as you may expect with `grep`, which + # means that `hash.grep` is not equivalent to `hash.select`. Although + # RuboCop is limited by static analysis, this cop attempts to avoid + # registering an offense when the receiver is a hash (hash literal, + # `Hash.new`, `Hash#[]`, or `to_h`/`to_hash`). # # NOTE: `grep` and `grep_v` were optimized when used without a block # in Ruby 3.0, but may be slower in previous versions. @@ -16,6 +22,10 @@ module Style # not be created by `grep`, but may have previously been relied # upon after the `match?` or `=~` call. # + # Additionally, the cop cannot guarantee that the receiver of + # `select` or `reject` is actually an array by static analysis, + # so the correction may not be actually equivalent. + # # @example # # bad (select or find_all) # array.select { |x| x.match? /regexp/ } @@ -49,6 +59,16 @@ class SelectByRegexp < Base } PATTERN + # Returns true if a node appears to return a hash + # @!method creates_hash?(node) + def_node_matcher :creates_hash?, <<~PATTERN + { + (send (const _ :Hash) {:new :[]} ...) + (block (send (const _ :Hash) :new ...) ...) + (send _ { :to_h :to_hash } ...) + } + PATTERN + # @!method calls_lvar?(node, name) def_node_matcher :calls_lvar?, <<~PATTERN { @@ -61,6 +81,7 @@ class SelectByRegexp < Base def on_send(node) return unless (block_node = node.block_node) return if block_node.body.begin_type? + return if receiver_allowed?(block_node.receiver) return unless (regexp_method_send_node = extract_send_node(block_node)) regexp = find_regexp(regexp_method_send_node) @@ -69,6 +90,12 @@ def on_send(node) private + def receiver_allowed?(node) + return false unless node + + node.hash_type? || creates_hash?(node) + end + def register_offense(node, block_node, regexp) replacement = REPLACEMENTS[node.method_name.to_sym] message = format(MSG, replacement: replacement, original_method: node.method_name) diff --git a/spec/rubocop/cop/style/select_by_regexp_spec.rb b/spec/rubocop/cop/style/select_by_regexp_spec.rb index c5fea0a98a9..b3a2b51e98b 100644 --- a/spec/rubocop/cop/style/select_by_regexp_spec.rb +++ b/spec/rubocop/cop/style/select_by_regexp_spec.rb @@ -124,6 +124,105 @@ RUBY end + it 'registers an offense and corrects without a receiver' do + expect_offense(<<~RUBY, method: method) + #{method} { |x| x.match?(/regexp/) } + ^{method}^^^^^^^^^^^^^^^^^^^^^^^^^^^ #{message} + RUBY + + expect_correction(<<~RUBY) + #{correction}(/regexp/) + RUBY + end + + it 'registers an offense and corrects when the receiver is an array' do + expect_offense(<<~RUBY, method: method) + [].#{method} { |x| x.match?(/regexp/) } + ^^^^{method}^^^^^^^^^^^^^^^^^^^^^^^^^^^ #{message} + foo.to_a.#{method} { |x| x.match?(/regexp/) } + ^^^^^^^^^^{method}^^^^^^^^^^^^^^^^^^^^^^^^^^^ #{message} + RUBY + + expect_correction(<<~RUBY) + [].#{correction}(/regexp/) + foo.to_a.#{correction}(/regexp/) + RUBY + end + + it 'registers an offense and corrects when the receiver is a range' do + expect_offense(<<~RUBY, method: method) + ('aaa'...'abc').#{method} { |x| x.match?(/ab/) } + ^^^^^^^^^^^^^^^^^{method}^^^^^^^^^^^^^^^^^^^^^^^ #{message} + RUBY + + expect_correction(<<~RUBY) + ('aaa'...'abc').#{correction}(/ab/) + RUBY + end + + it 'registers an offense and corrects when the receiver is a set' do + expect_offense(<<~RUBY, method: method) + Set.new.#{method} { |x| x.match?(/regexp/) } + ^^^^^^^^^{method}^^^^^^^^^^^^^^^^^^^^^^^^^^^ #{message} + [].to_set.#{method} { |x| x.match?(/regexp/) } + ^^^^^^^^^^^{method}^^^^^^^^^^^^^^^^^^^^^^^^^^^ #{message} + RUBY + + expect_correction(<<~RUBY) + Set.new.#{correction}(/regexp/) + [].to_set.#{correction}(/regexp/) + RUBY + end + + it 'does not register an offense when the receiver is a hash literal' do + expect_no_offenses(<<~RUBY) + {}.#{method} { |x| x.match? /regexp/ } + { foo: :bar }.#{method} { |x| x.match? /regexp/ } + RUBY + end + + it 'does not register an offense when the receiver is `Hash.new`' do + expect_no_offenses(<<~RUBY) + Hash.new.#{method} { |x| x.match? /regexp/ } + Hash.new(:default).#{method} { |x| x.match? /regexp/ } + Hash.new { |hash, key| :default }.#{method} { |x| x.match? /regexp/ } + RUBY + end + + it 'does not register an offense when the receiver is `Hash[]`' do + expect_no_offenses(<<~RUBY) + Hash[h].#{method} { |x| x.match? /regexp/ } + Hash[:foo, 0, :bar, 1].#{method} { |x| x.match? /regexp/ } + RUBY + end + + it 'does not register an offense when the receiver is `to_h`' do + expect_no_offenses(<<~RUBY) + to_h.#{method} { |x| x.match? /regexp/ } + foo.to_h.#{method} { |x| x.match? /regexp/ } + RUBY + end + + it 'does not register an offense when the receiver is `to_hash`' do + expect_no_offenses(<<~RUBY) + to_hash.#{method} { |x| x.match? /regexp/ } + foo.to_hash.#{method} { |x| x.match? /regexp/ } + RUBY + end + + it 'registers an offense if `to_h` is in the receiver chain but not the actual receiver' do + # Although there is a `to_h` in the chain, we cannot be sure + # of the type of the ultimate receiver. + expect_offense(<<~RUBY, method: method) + foo.to_h.bar.#{method} { |x| x.match? /regexp/ } + ^^^^^^^^^^^^^^{method}^^^^^^^^^^^^^^^^^^^^^^^^^^ #{message} + RUBY + + expect_correction(<<~RUBY) + foo.to_h.bar.#{correction}(/regexp/) + RUBY + end + context 'with `numblock`s', :ruby27 do it 'registers an offense and corrects for `match?`' do expect_offense(<<~RUBY, method: method)