From d0bee923b113327e1208e9525409e7e72668f2f1 Mon Sep 17 00:00:00 2001 From: Eugene Kenny Date: Wed, 5 Aug 2020 22:48:07 +0100 Subject: [PATCH] Set valid ref range when regexp is matched Last match global variables are set when the regexp is matched, not when it's defined. This meant that the previous implementation gave incorrect results when those things happened separately, e.g. when a regexp was assigned to a local variable or a constant and then matched elsewhere. This change means that the cop only handles matching on regexp literals, but that was the only case where it worked correctly before anyway. It should be possible to improve the cop to handle more complex cases with `VariableForce`, but that can be done as a separate change. --- CHANGELOG.md | 4 ++ .../cop/lint/out_of_range_regexp_ref.rb | 25 +++++++-- .../cop/lint/out_of_range_regexp_ref_spec.rb | 55 +++++++++++++++++++ 3 files changed, 80 insertions(+), 4 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 08fa6635f92..932953641e5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## master (unreleased) +### Bug fixes + +* [#8463](https://github.com/rubocop-hq/rubocop/pull/8463): Fix false positives for `Lint/OutOfRangeRegexpRef` when a regexp is defined and matched in separate steps. ([@eugeneius][]) + ## 0.89.0 (2020-08-05) ### New features diff --git a/lib/rubocop/cop/lint/out_of_range_regexp_ref.rb b/lib/rubocop/cop/lint/out_of_range_regexp_ref.rb index 8a81e2a2f9b..4c3a52a6064 100644 --- a/lib/rubocop/cop/lint/out_of_range_regexp_ref.rb +++ b/lib/rubocop/cop/lint/out_of_range_regexp_ref.rb @@ -21,16 +21,26 @@ module Lint class OutOfRangeRegexpRef < Base MSG = 'Do not use out of range reference for the Regexp.' + REGEXP_CAPTURE_METHODS = %i[=~ === match].to_set.freeze + def on_new_investigation @valid_ref = 0 end - def on_regexp(node) + def on_match_with_lvasgn(node) + check_regexp(node.children.first) + end + + def on_send(node) + return unless REGEXP_CAPTURE_METHODS.include?(node.method_name) + @valid_ref = nil - return if contain_non_literal?(node) - tree = Regexp::Parser.parse(node.content) - @valid_ref = regexp_captures(tree) + if node.receiver&.regexp_type? + check_regexp(node.receiver) + elsif node.first_argument&.regexp_type? && node.method?(:=~) + check_regexp(node.first_argument) + end end def on_nth_ref(node) @@ -42,6 +52,13 @@ def on_nth_ref(node) private + def check_regexp(regexp) + return if contain_non_literal?(regexp) + + tree = Regexp::Parser.parse(regexp.content) + @valid_ref = regexp_captures(tree) + end + def contain_non_literal?(node) node.children.size != 2 || !node.children.first.str_type? end diff --git a/spec/rubocop/cop/lint/out_of_range_regexp_ref_spec.rb b/spec/rubocop/cop/lint/out_of_range_regexp_ref_spec.rb index c386b08b8c0..7c748988f5a 100644 --- a/spec/rubocop/cop/lint/out_of_range_regexp_ref_spec.rb +++ b/spec/rubocop/cop/lint/out_of_range_regexp_ref_spec.rb @@ -78,4 +78,59 @@ puts $2 RUBY end + + it 'registers an offense when the regexp comes after `=~`' do + expect_offense(<<~RUBY) + "foobar" =~ /(foo)(bar)/ + puts $3 + ^^ Do not use out of range reference for the Regexp. + RUBY + end + + it 'registers an offense when the regexp is matched with `===`' do + expect_offense(<<~RUBY) + /(foo)(bar)/ === "foobar" + puts $3 + ^^ Do not use out of range reference for the Regexp. + RUBY + end + + it 'registers an offense when the regexp is matched with `match`' do + expect_offense(<<~RUBY) + /(foo)(bar)/.match("foobar") + puts $3 + ^^ Do not use out of range reference for the Regexp. + RUBY + end + + it 'ignores calls to `match?`' do + expect_offense(<<~RUBY) + /(foo)(bar)/.match("foobar") + /(foo)(bar)(baz)/.match?("foobarbaz") + puts $3 + ^^ Do not use out of range reference for the Regexp. + RUBY + end + + it 'handles `match` with no arguments' do + expect_no_offenses(<<~RUBY) + foo.match + RUBY + end + + it 'handles `match` with no receiver' do + expect_no_offenses(<<~RUBY) + match(bar) + RUBY + end + + it 'only registers an offense when the regexp is matched as a literal' do + expect_no_offenses(<<~RUBY) + foo_bar_regexp = /(foo)(bar)/ + foo_regexp = /(foo)/ + + foo_bar_regexp =~ 'foobar' + puts $2 + RUBY + end end