From a678aba56c6703756139650a651d9c2ab9323f45 Mon Sep 17 00:00:00 2001 From: Sonali Navlakhe <40335339+sonalinavlakhe@users.noreply.github.com> Date: Wed, 5 Aug 2020 23:01:19 +0530 Subject: [PATCH] [Fixes #7755] Add `Lint/OutOfRangeRefInRegexp` cop (#8407) --- CHANGELOG.md | 2 + config/default.yml | 6 ++ docs/modules/ROOT/pages/cops.adoc | 1 + docs/modules/ROOT/pages/cops_lint.adoc | 30 +++++++ lib/rubocop.rb | 1 + .../cop/lint/out_of_range_regexp_ref.rb | 61 ++++++++++++++ .../cop/lint/out_of_range_regexp_ref_spec.rb | 81 +++++++++++++++++++ 7 files changed, 182 insertions(+) create mode 100644 lib/rubocop/cop/lint/out_of_range_regexp_ref.rb create mode 100644 spec/rubocop/cop/lint/out_of_range_regexp_ref_spec.rb diff --git a/CHANGELOG.md b/CHANGELOG.md index 613f859a6ba..8896c4390c6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -21,6 +21,7 @@ * [#8417](https://github.com/rubocop-hq/rubocop/pull/8417): Add new `Style/GlobalStdStream` cop. ([@fatkodima][]) * [#7949](https://github.com/rubocop-hq/rubocop/issues/7949): Add new `Style/SingleArgumentDig` cop. ([@volfgox][]) * [#8341](https://github.com/rubocop-hq/rubocop/pull/8341): Add new `Lint/EmptyConditionalBody` cop. ([@fatkodima][]) +* [#7755](https://github.com/rubocop-hq/rubocop/issues/7755): Add new `Lint/OutOfRangeRegexpRef` cop. ([@sonalinavlakhe][]) ### Bug fixes @@ -4750,3 +4751,4 @@ [@iamravitejag]: https://github.com/iamravitejag [@volfgox]: https://github.com/volfgox [@dsavochkin]: https://github.com/dmytro-savochkin +[@sonalinavlakhe]: https://github.com/sonalinavlakhe diff --git a/config/default.yml b/config/default.yml index d12af9e9150..d8e2d72c4d7 100644 --- a/config/default.yml +++ b/config/default.yml @@ -1640,6 +1640,12 @@ Lint/OrderedMagicComments: Enabled: true VersionAdded: '0.53' +Lint/OutOfRangeRegexpRef: + Description: 'Checks for out of range reference for Regexp because it always returns nil.' + Enabled: pending + Safe: false + VersionAdded: '0.89' + Lint/ParenthesesAsGroupedExpression: Description: >- Checks for method calls with a space before the opening diff --git a/docs/modules/ROOT/pages/cops.adoc b/docs/modules/ROOT/pages/cops.adoc index b971767730f..f900a87f38b 100644 --- a/docs/modules/ROOT/pages/cops.adoc +++ b/docs/modules/ROOT/pages/cops.adoc @@ -230,6 +230,7 @@ In the following section you find all available cops: * xref:cops_lint.adoc#lintnonlocalexitfromiterator[Lint/NonLocalExitFromIterator] * xref:cops_lint.adoc#lintnumberconversion[Lint/NumberConversion] * xref:cops_lint.adoc#lintorderedmagiccomments[Lint/OrderedMagicComments] +* xref:cops_lint.adoc#lintoutofrangeregexpref[Lint/OutOfRangeRegexpRef] * xref:cops_lint.adoc#lintparenthesesasgroupedexpression[Lint/ParenthesesAsGroupedExpression] * xref:cops_lint.adoc#lintpercentstringarray[Lint/PercentStringArray] * xref:cops_lint.adoc#lintpercentsymbolarray[Lint/PercentSymbolArray] diff --git a/docs/modules/ROOT/pages/cops_lint.adoc b/docs/modules/ROOT/pages/cops_lint.adoc index 440dfe09301..aad7d1e956d 100644 --- a/docs/modules/ROOT/pages/cops_lint.adoc +++ b/docs/modules/ROOT/pages/cops_lint.adoc @@ -2356,6 +2356,36 @@ p [''.frozen?, ''.encoding] #=> [true, #] p [''.frozen?, ''.encoding] #=> [true, #] ---- +== Lint/OutOfRangeRegexpRef + +|=== +| Enabled by default | Safe | Supports autocorrection | VersionAdded | VersionChanged + +| Pending +| No +| No +| 0.89 +| - +|=== + +This cops looks for references of Regexp captures that are out of range +and thus always returns nil. + +=== Examples + +[source,ruby] +---- +/(foo)bar/ =~ 'foobar' + +# bad - always returns nil + +puts $2 # => nil + +# good + +puts $1 # => foo +---- + == Lint/ParenthesesAsGroupedExpression |=== diff --git a/lib/rubocop.rb b/lib/rubocop.rb index aee3e7f71b1..e96e16eb34f 100644 --- a/lib/rubocop.rb +++ b/lib/rubocop.rb @@ -288,6 +288,7 @@ require_relative 'rubocop/cop/lint/non_local_exit_from_iterator' require_relative 'rubocop/cop/lint/number_conversion' require_relative 'rubocop/cop/lint/ordered_magic_comments' +require_relative 'rubocop/cop/lint/out_of_range_regexp_ref' require_relative 'rubocop/cop/lint/parentheses_as_grouped_expression' require_relative 'rubocop/cop/lint/percent_string_array' require_relative 'rubocop/cop/lint/percent_symbol_array' diff --git a/lib/rubocop/cop/lint/out_of_range_regexp_ref.rb b/lib/rubocop/cop/lint/out_of_range_regexp_ref.rb new file mode 100644 index 00000000000..8a81e2a2f9b --- /dev/null +++ b/lib/rubocop/cop/lint/out_of_range_regexp_ref.rb @@ -0,0 +1,61 @@ +# frozen_string_literal: true + +module RuboCop + module Cop + module Lint + # This cops looks for references of Regexp captures that are out of range + # and thus always returns nil. + # + # @example + # + # /(foo)bar/ =~ 'foobar' + # + # # bad - always returns nil + # + # puts $2 # => nil + # + # # good + # + # puts $1 # => foo + # + class OutOfRangeRegexpRef < Base + MSG = 'Do not use out of range reference for the Regexp.' + + def on_new_investigation + @valid_ref = 0 + end + + def on_regexp(node) + @valid_ref = nil + return if contain_non_literal?(node) + + tree = Regexp::Parser.parse(node.content) + @valid_ref = regexp_captures(tree) + end + + def on_nth_ref(node) + backref, = *node + return if @valid_ref.nil? + + add_offense(node) if backref > @valid_ref + end + + private + + def contain_non_literal?(node) + node.children.size != 2 || !node.children.first.str_type? + end + + def regexp_captures(tree) + named_capture = numbered_capture = 0 + tree.each_expression do |e| + if e.type?(:group) + e.respond_to?(:name) ? named_capture += 1 : numbered_capture += 1 + end + end + named_capture.positive? ? named_capture : numbered_capture + end + end + end + end +end diff --git a/spec/rubocop/cop/lint/out_of_range_regexp_ref_spec.rb b/spec/rubocop/cop/lint/out_of_range_regexp_ref_spec.rb new file mode 100644 index 00000000000..c386b08b8c0 --- /dev/null +++ b/spec/rubocop/cop/lint/out_of_range_regexp_ref_spec.rb @@ -0,0 +1,81 @@ +# frozen_string_literal: true + +RSpec.describe RuboCop::Cop::Lint::OutOfRangeRegexpRef do + subject(:cop) { described_class.new(config) } + + let(:config) { RuboCop::Config.new } + + it 'registers an offense when references are used before any Regexp' do + expect_offense(<<~RUBY) + puts $3 + ^^ Do not use out of range reference for the Regexp. + RUBY + end + + it 'registers an offense when out of range references are used for named captures' do + expect_offense(<<~RUBY) + /(?FOO)(?BAR)/ =~ "FOOBAR" + puts $3 + ^^ Do not use out of range reference for the Regexp. + RUBY + end + + it 'registers an offense when out of range references are used for numbered captures' do + expect_offense(<<~RUBY) + /(foo)(bar)/ =~ "foobar" + puts $3 + ^^ Do not use out of range reference for the Regexp. + RUBY + end + + it 'registers an offense when out of range references are used for mix of numbered and named captures' do + expect_offense(<<~RUBY) + /(?FOO)(BAR)/ =~ "FOOBAR" + puts $2 + ^^ Do not use out of range reference for the Regexp. + RUBY + end + + it 'registers an offense when out of range references are used for non captures' do + expect_offense(<<~RUBY) + /bar/ =~ 'foo' + puts $1 + ^^ Do not use out of range reference for the Regexp. + RUBY + end + + it 'does not register offense to a regexp with valid references for named captures' do + expect_no_offenses(<<~RUBY) + /(?FOO)(?BAR)/ =~ "FOOBAR" + puts $1 + puts $2 + RUBY + end + + it 'does not register offense to a regexp with valid references for numbered captures' do + expect_no_offenses(<<~RUBY) + /(foo)(bar)/ =~ "foobar" + puts $1 + puts $2 + RUBY + end + + it 'does not register offense to a regexp with valid references for a mix named and numbered captures' do + expect_no_offenses(<<~RUBY) + /(?FOO)(BAR)/ =~ "FOOBAR" + puts $1 + RUBY + end + + # RuboCop does not know a value of variables that it will contain in the regexp literal. + # For example, `/(?#{var}*)` is interpreted as `/(?*)`. + # So it does not offense when variables are used in regexp literals. + it 'does not register an offence Regexp containing non literal' do + expect_no_offenses(<<~'RUBY') + var = '(\d+)' + /(?#{var}*)/ =~ "12" + puts $1 + puts $2 + RUBY + end +end