Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add new
Lint/DuplicateRegexpCharacterClassElement
cop (#8896)
- Loading branch information
Showing
9 changed files
with
283 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
77 changes: 77 additions & 0 deletions
77
lib/rubocop/cop/lint/duplicate_regexp_character_class_element.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
# frozen_string_literal: true | ||
|
||
module RuboCop | ||
module Cop | ||
module Lint | ||
# This cop checks for duplicate elements in Regexp character classes. | ||
# | ||
# @example | ||
# | ||
# # bad | ||
# r = /[xyx]/ | ||
# | ||
# # bad | ||
# r = /[0-9x0-9]/ | ||
# | ||
# # good | ||
# r = /[xy]/ | ||
# | ||
# # good | ||
# r = /[0-9x]/ | ||
class DuplicateRegexpCharacterClassElement < Base | ||
include RangeHelp | ||
extend AutoCorrector | ||
|
||
MSG_REPEATED_ELEMENT = 'Duplicate element inside regexp character class' | ||
|
||
def on_regexp(node) | ||
each_repeated_character_class_element_loc(node) do |loc| | ||
add_offense(loc, message: MSG_REPEATED_ELEMENT) do |corrector| | ||
corrector.remove(loc) | ||
end | ||
end | ||
end | ||
|
||
def each_repeated_character_class_element_loc(node) | ||
node.parsed_tree&.each_expression do |expr| | ||
next if expr.type != :set || expr.token == :intersection | ||
|
||
seen = Set.new | ||
|
||
expr.expressions.each do |child| | ||
next if within_interpolation?(node, child) | ||
|
||
child_source = child.to_s | ||
|
||
yield node.parsed_tree_expr_loc(child) if seen.include?(child_source) | ||
|
||
seen << child_source | ||
end | ||
end | ||
end | ||
|
||
private | ||
|
||
# Since we blank interpolations with a space for every char of the interpolation, we would | ||
# mark every space (except the first) as duplicate if we do not skip regexp_parser nodes | ||
# that are within an interpolation. | ||
def within_interpolation?(node, child) | ||
parse_tree_child_loc = node.parsed_tree_expr_loc(child) | ||
|
||
interpolation_locs(node).any? { |il| il.overlaps?(parse_tree_child_loc) } | ||
end | ||
|
||
def interpolation_locs(node) | ||
@interpolation_locs ||= {} | ||
|
||
# Cache by loc, not by regexp content, as content can be repeated in multiple patterns | ||
key = node.loc | ||
|
||
@interpolation_locs[key] ||= node.children.select(&:begin_type?).map do |interpolation| | ||
interpolation.loc.expression | ||
end | ||
end | ||
end | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
138 changes: 138 additions & 0 deletions
138
spec/rubocop/cop/lint/duplicate_regexp_character_class_element_spec.rb
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,138 @@ | ||
# frozen_string_literal: true | ||
|
||
RSpec.describe RuboCop::Cop::Lint::DuplicateRegexpCharacterClassElement do | ||
subject(:cop) { described_class.new } | ||
|
||
context 'with a repeated character class element' do | ||
it 'registers an offense and corrects' do | ||
expect_offense(<<~RUBY) | ||
foo = /[xyx]/ | ||
^ Duplicate element inside regexp character class | ||
RUBY | ||
|
||
expect_correction(<<~RUBY) | ||
foo = /[xy]/ | ||
RUBY | ||
end | ||
end | ||
|
||
context 'with a repeated character class element with quantifier' do | ||
it 'registers an offense and corrects' do | ||
expect_offense(<<~RUBY) | ||
foo = /[xyx]+/ | ||
^ Duplicate element inside regexp character class | ||
RUBY | ||
|
||
expect_correction(<<~RUBY) | ||
foo = /[xy]+/ | ||
RUBY | ||
end | ||
end | ||
|
||
context 'with no repeated character class elements' do | ||
it 'registers an offense and corrects' do | ||
expect_no_offenses(<<~RUBY) | ||
foo = /[xyz]/ | ||
RUBY | ||
end | ||
end | ||
|
||
context 'with repeated elements in different character classes' do | ||
it 'registers an offense and corrects' do | ||
expect_no_offenses(<<~RUBY) | ||
foo = /[xyz][xyz]/ | ||
RUBY | ||
end | ||
end | ||
|
||
context 'with a repeated character class element and %r{} literal' do | ||
it 'registers an offense and corrects' do | ||
expect_offense(<<~RUBY) | ||
foo = %r{[xyx]} | ||
^ Duplicate element inside regexp character class | ||
RUBY | ||
|
||
expect_correction(<<~RUBY) | ||
foo = %r{[xy]} | ||
RUBY | ||
end | ||
end | ||
|
||
context 'with a repeated character class element inside a group' do | ||
it 'registers an offense and corrects' do | ||
expect_offense(<<~RUBY) | ||
foo = /([xyx])/ | ||
^ Duplicate element inside regexp character class | ||
RUBY | ||
|
||
expect_correction(<<~RUBY) | ||
foo = /([xy])/ | ||
RUBY | ||
end | ||
end | ||
|
||
context 'with a repeated character posix character class inside a group' do | ||
it 'registers an offense and corrects' do | ||
expect_offense(<<~RUBY) | ||
foo = /([[:alnum:]y[:alnum:]])/ | ||
^^^^^^^^^ Duplicate element inside regexp character class | ||
RUBY | ||
|
||
expect_correction(<<~RUBY) | ||
foo = /([[:alnum:]y])/ | ||
RUBY | ||
end | ||
end | ||
|
||
context 'with a repeated character class element with interpolation' do | ||
it 'registers an offense and corrects' do | ||
expect_offense(<<~'RUBY') | ||
foo = /([a#{foo}a#{bar}a])/ | ||
^ Duplicate element inside regexp character class | ||
^ Duplicate element inside regexp character class | ||
RUBY | ||
|
||
expect_correction(<<~'RUBY') | ||
foo = /([a#{foo}#{bar}])/ | ||
RUBY | ||
end | ||
end | ||
|
||
context 'with a repeated range element' do | ||
it 'registers an offense and corrects' do | ||
expect_offense(<<~RUBY) | ||
foo = /[0-9x0-9]/ | ||
^^^ Duplicate element inside regexp character class | ||
RUBY | ||
|
||
expect_correction(<<~RUBY) | ||
foo = /[0-9x]/ | ||
RUBY | ||
end | ||
end | ||
|
||
context 'with a repeated intersection character class' do | ||
it 'does not register an offense' do | ||
expect_no_offenses(<<~RUBY) | ||
foo = /[ab&&ab]/ | ||
RUBY | ||
end | ||
end | ||
|
||
context 'with a range that covers a repeated element character class' do | ||
it 'does not register an offense' do | ||
expect_no_offenses(<<~RUBY) | ||
foo = /[a-cb]/ | ||
RUBY | ||
end | ||
end | ||
|
||
context 'with multiple regexps with the same interpolation' do | ||
it 'does not register an offense' do | ||
expect_no_offenses(<<~'RUBY') | ||
a_field.gsub!(/[#{bad_chars}]/, '') | ||
some_other_field.gsub!(/[#{bad_chars}]/, '') | ||
RUBY | ||
end | ||
end | ||
end |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters