forked from rubocop/rubocop
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add new Lint/OutOfRangeRefInRegexp cop rubocop#7755
- Loading branch information
1 parent
1fc1981
commit d1e1f64
Showing
8 changed files
with
207 additions
and
1 deletion.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,4 @@ | ||
= Installation | ||
= Installation | ||
|
||
RuboCop's installation is pretty standard: | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
# frozen_string_literal: true | ||
|
||
module RuboCop | ||
module Cop | ||
module Lint | ||
# This cops looks for out of range referencing for Regexp, as while capturing groups out of | ||
# out of range reference always returns nil. | ||
|
||
# @example | ||
# /(foo)bar/ =~ 'foobar'\ | ||
|
||
# # bad - always returns nil | ||
# puts $2 # => nil | ||
|
||
# # good | ||
# puts $1 # => foo | ||
# | ||
class OutOfRangeRefInRegexp < Cop | ||
MSG = 'Do not use out of range reference for the Regexp.' | ||
|
||
def investigate(processed_source) | ||
ast = processed_source.ast | ||
valid_ref = cop_config['Count'] | ||
ast.each_node do |node| | ||
if node.regexp_type? | ||
break if contain_non_literal?(node) | ||
|
||
tree = parse_node(node.content) | ||
break if tree.nil? | ||
|
||
valid_ref = regexp_captures(tree) | ||
elsif node.nth_ref_type? | ||
backref, = *node | ||
add_offense(node) if backref > valid_ref | ||
end | ||
end | ||
end | ||
|
||
private | ||
|
||
def contain_non_literal?(node) | ||
if node.respond_to?(:type) && (node.variable? || node.send_type? || node.const_type?) | ||
return true | ||
end | ||
return false unless node.respond_to?(:children) | ||
|
||
node.children.any? { |child| contain_non_literal?(child) } | ||
end | ||
|
||
def parse_node(content) | ||
Regexp::Parser.parse(content) | ||
rescue Regexp::Scanner::ScannerError | ||
nil | ||
end | ||
|
||
def regexp_captures(tree) | ||
named_capture = numbered_capture = 0 | ||
tree.each_expression do |e| | ||
named_capture += 1 if e.instance_of?(Regexp::Expression::Group::Named) | ||
numbered_capture += 1 if e.instance_of?(Regexp::Expression::Group::Capture) | ||
end | ||
return named_capture if numbered_capture.zero? | ||
|
||
return numbered_capture if named_capture.zero? | ||
|
||
named_capture | ||
end | ||
end | ||
end | ||
end | ||
end |
111 changes: 111 additions & 0 deletions
111
spec/rubocop/cop/lint/out_of_range_ref_in_regexp_spec.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,111 @@ | ||
# frozen_string_literal: true | ||
|
||
RSpec.describe RuboCop::Cop::Lint::OutOfRangeRefInRegexp do | ||
subject(:cop) { described_class.new(config) } | ||
|
||
let(:config) { RuboCop::Config.new } | ||
|
||
it 'registers an offense when out of range references are used for named captures' do | ||
expect_offense(<<~RUBY) | ||
/(?<foo>FOO)(?<bar>BAR)/ =~ "FOOBAR" | ||
puts $3 | ||
^^ Do not use out of range reference for the Regexp. | ||
RUBY | ||
end | ||
|
||
it 'registers an offense when out of range references are used for numbered captures' do | ||
expect_offense(<<~RUBY) | ||
/(foo)(bar)/ =~ "foobar" | ||
puts $3 | ||
^^ Do not use out of range reference for the Regexp. | ||
RUBY | ||
end | ||
|
||
it 'registers an offense when out of range references are used for mix of numbered and named captures' do | ||
expect_offense(<<~RUBY) | ||
/(?<foo>FOO)(BAR)/ =~ "FOOBAR" | ||
puts $2 | ||
^^ Do not use out of range reference for the Regexp. | ||
RUBY | ||
end | ||
|
||
it 'registers an offense when out of range references are used for non captures' do | ||
expect_offense(<<~RUBY) | ||
/bar/ =~ 'foo' | ||
puts $1 | ||
^^ Do not use out of range reference for the Regexp. | ||
RUBY | ||
end | ||
|
||
it 'does not register offense to a regexp with valid references for named captures' do | ||
expect_no_offenses(<<~RUBY) | ||
/(?<foo>FOO)(?<bar>BAR)/ =~ "FOOBAR" | ||
puts $1 | ||
puts $2 | ||
RUBY | ||
end | ||
|
||
it 'does not register offense to a regexp with valid references for numbered captures' do | ||
expect_no_offenses(<<~RUBY) | ||
/(foo)(bar)/ =~ "foobar" | ||
puts $1 | ||
puts $2 | ||
RUBY | ||
end | ||
|
||
it 'does not register offense to a regexp with valid references for a mix named and numbered captures' do | ||
expect_no_offenses(<<~RUBY) | ||
/(?<foo>FOO)(BAR)/ =~ "FOOBAR" | ||
puts $1 | ||
RUBY | ||
end | ||
|
||
# See https://github.com/rubocop-hq/rubocop/issues/8083 | ||
it 'does not register offense when using a Regexp cannot be processed by regexp_parser gem' do | ||
expect_no_offenses(<<~'RUBY') | ||
/data = ({"words":.+}}}[^}]*})/m | ||
RUBY | ||
end | ||
|
||
# RuboCop does not know a value of variables that it will contain in the regexp literal. | ||
# For example, `/(?<foo>#{var}*)` is interpreted as `/(?<foo>*)`. | ||
# So it does not offense when variables are used in regexp literals. | ||
context 'when containing a non-regexp literal' do | ||
it 'does not register an offence when containing a lvar' do | ||
expect_no_offenses(<<~'RUBY') | ||
var = '(\d+)' | ||
/(?<foo>#{var}*)/ | ||
RUBY | ||
end | ||
|
||
it 'does not register an offence when containing a ivar' do | ||
expect_no_offenses(<<~'RUBY') | ||
/(?<foo>#{@var}*)/ | ||
RUBY | ||
end | ||
|
||
it 'does not register an offence when containing a cvar' do | ||
expect_no_offenses(<<~'RUBY') | ||
/(?<foo>#{@@var}*)/ | ||
RUBY | ||
end | ||
|
||
it 'does not register an offence when containing a gvar' do | ||
expect_no_offenses(<<~'RUBY') | ||
/(?<foo>#{$var}*)/ | ||
RUBY | ||
end | ||
|
||
it 'does not register an offence when containing a method' do | ||
expect_no_offenses(<<~'RUBY') | ||
/(?<foo>#{do_something}*)/ | ||
RUBY | ||
end | ||
|
||
it 'does not register an offence when containing a constant' do | ||
expect_no_offenses(<<~'RUBY') | ||
/(?<foo>#{CONST}*)/ | ||
RUBY | ||
end | ||
end | ||
end |