Skip to content

Commit

Permalink
Add new Lint/DuplicateRegexpCharacterClassElement cop (#8896)
Browse files Browse the repository at this point in the history
  • Loading branch information
owst committed Oct 26, 2020
1 parent 291aec7 commit fd65a6b
Show file tree
Hide file tree
Showing 9 changed files with 283 additions and 0 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Expand Up @@ -4,6 +4,7 @@

### New features

* [#8896](https://github.com/rubocop-hq/rubocop/pull/8896): Add new `Lint/DuplicateRegexpCharacterClassElement` cop. ([@owst][])
* [#8895](https://github.com/rubocop-hq/rubocop/pull/8895): Add new `Lint/EmptyBlock` cop. ([@fatkodima][])
* [#8934](https://github.com/rubocop-hq/rubocop/pull/8934): Add new `Style/SwapValues` cop. ([@fatkodima][])
* [#7549](https://github.com/rubocop-hq/rubocop/issues/7549): Add new `Style/ArgumentsForwarding` cop. ([@koic][])
Expand Down
5 changes: 5 additions & 0 deletions config/default.yml
Expand Up @@ -1454,6 +1454,11 @@ Lint/DuplicateMethods:
Enabled: true
VersionAdded: '0.29'

Lint/DuplicateRegexpCharacterClassElement:
Description: 'Checks for duplicate elements in Regexp character classes.'
Enabled: pending
VersionAdded: '1.1'

Lint/DuplicateRequire:
Description: 'Check for duplicate `require`s and `require_relative`s.'
Enabled: true
Expand Down
1 change: 1 addition & 0 deletions docs/modules/ROOT/pages/cops.adoc
Expand Up @@ -200,6 +200,7 @@ In the following section you find all available cops:
* xref:cops_lint.adoc#lintduplicateelsifcondition[Lint/DuplicateElsifCondition]
* xref:cops_lint.adoc#lintduplicatehashkey[Lint/DuplicateHashKey]
* xref:cops_lint.adoc#lintduplicatemethods[Lint/DuplicateMethods]
* xref:cops_lint.adoc#lintduplicateregexpcharacterclasselement[Lint/DuplicateRegexpCharacterClassElement]
* xref:cops_lint.adoc#lintduplicaterequire[Lint/DuplicateRequire]
* xref:cops_lint.adoc#lintduplicaterescueexception[Lint/DuplicateRescueException]
* xref:cops_lint.adoc#linteachwithobjectargument[Lint/EachWithObjectArgument]
Expand Down
31 changes: 31 additions & 0 deletions docs/modules/ROOT/pages/cops_lint.adoc
Expand Up @@ -848,6 +848,37 @@ end
alias bar foo
----

== Lint/DuplicateRegexpCharacterClassElement

|===
| Enabled by default | Safe | Supports autocorrection | VersionAdded | VersionChanged

| Pending
| Yes
| Yes
| 1.1
| -
|===

This cop checks for duplicate elements in Regexp character classes.

=== Examples

[source,ruby]
----
# bad
r = /[xyx]/
# bad
r = /[0-9x0-9]/
# good
r = /[xy]/
# good
r = /[0-9x]/
----

== Lint/DuplicateRequire

|===
Expand Down
1 change: 1 addition & 0 deletions lib/rubocop.rb
Expand Up @@ -262,6 +262,7 @@
require_relative 'rubocop/cop/lint/duplicate_elsif_condition'
require_relative 'rubocop/cop/lint/duplicate_hash_key'
require_relative 'rubocop/cop/lint/duplicate_methods'
require_relative 'rubocop/cop/lint/duplicate_regexp_character_class_element'
require_relative 'rubocop/cop/lint/duplicate_require'
require_relative 'rubocop/cop/lint/duplicate_rescue_exception'
require_relative 'rubocop/cop/lint/each_with_object_argument'
Expand Down
77 changes: 77 additions & 0 deletions lib/rubocop/cop/lint/duplicate_regexp_character_class_element.rb
@@ -0,0 +1,77 @@
# frozen_string_literal: true

module RuboCop
module Cop
module Lint
# This cop checks for duplicate elements in Regexp character classes.
#
# @example
#
# # bad
# r = /[xyx]/
#
# # bad
# r = /[0-9x0-9]/
#
# # good
# r = /[xy]/
#
# # good
# r = /[0-9x]/
class DuplicateRegexpCharacterClassElement < Base
include RangeHelp
extend AutoCorrector

MSG_REPEATED_ELEMENT = 'Duplicate element inside regexp character class'

def on_regexp(node)
each_repeated_character_class_element_loc(node) do |loc|
add_offense(loc, message: MSG_REPEATED_ELEMENT) do |corrector|
corrector.remove(loc)
end
end
end

def each_repeated_character_class_element_loc(node)
node.parsed_tree&.each_expression do |expr|
next if expr.type != :set || expr.token == :intersection

seen = Set.new

expr.expressions.each do |child|
next if within_interpolation?(node, child)

child_source = child.to_s

yield node.parsed_tree_expr_loc(child) if seen.include?(child_source)

seen << child_source
end
end
end

private

# Since we blank interpolations with a space for every char of the interpolation, we would
# mark every space (except the first) as duplicate if we do not skip regexp_parser nodes
# that are within an interpolation.
def within_interpolation?(node, child)
parse_tree_child_loc = node.parsed_tree_expr_loc(child)

interpolation_locs(node).any? { |il| il.overlaps?(parse_tree_child_loc) }
end

def interpolation_locs(node)
@interpolation_locs ||= {}

# Cache by loc, not by regexp content, as content can be repeated in multiple patterns
key = node.loc

@interpolation_locs[key] ||= node.children.select(&:begin_type?).map do |interpolation|
interpolation.loc.expression
end
end
end
end
end
end
5 changes: 5 additions & 0 deletions lib/rubocop/ext/regexp_node.rb
Expand Up @@ -38,6 +38,11 @@ def each_capture(named: ANY)
self
end

# @return [Parser::Source::Range] the range of the parse-tree expression
def parsed_tree_expr_loc(expr)
loc.begin.end.adjust(begin_pos: expr.ts, end_pos: expr.ts).resize(expr.full_length)
end

private

def with_interpolations_blanked
Expand Down
138 changes: 138 additions & 0 deletions spec/rubocop/cop/lint/duplicate_regexp_character_class_element_spec.rb
@@ -0,0 +1,138 @@
# frozen_string_literal: true

RSpec.describe RuboCop::Cop::Lint::DuplicateRegexpCharacterClassElement do
subject(:cop) { described_class.new }

context 'with a repeated character class element' do
it 'registers an offense and corrects' do
expect_offense(<<~RUBY)
foo = /[xyx]/
^ Duplicate element inside regexp character class
RUBY

expect_correction(<<~RUBY)
foo = /[xy]/
RUBY
end
end

context 'with a repeated character class element with quantifier' do
it 'registers an offense and corrects' do
expect_offense(<<~RUBY)
foo = /[xyx]+/
^ Duplicate element inside regexp character class
RUBY

expect_correction(<<~RUBY)
foo = /[xy]+/
RUBY
end
end

context 'with no repeated character class elements' do
it 'registers an offense and corrects' do
expect_no_offenses(<<~RUBY)
foo = /[xyz]/
RUBY
end
end

context 'with repeated elements in different character classes' do
it 'registers an offense and corrects' do
expect_no_offenses(<<~RUBY)
foo = /[xyz][xyz]/
RUBY
end
end

context 'with a repeated character class element and %r{} literal' do
it 'registers an offense and corrects' do
expect_offense(<<~RUBY)
foo = %r{[xyx]}
^ Duplicate element inside regexp character class
RUBY

expect_correction(<<~RUBY)
foo = %r{[xy]}
RUBY
end
end

context 'with a repeated character class element inside a group' do
it 'registers an offense and corrects' do
expect_offense(<<~RUBY)
foo = /([xyx])/
^ Duplicate element inside regexp character class
RUBY

expect_correction(<<~RUBY)
foo = /([xy])/
RUBY
end
end

context 'with a repeated character posix character class inside a group' do
it 'registers an offense and corrects' do
expect_offense(<<~RUBY)
foo = /([[:alnum:]y[:alnum:]])/
^^^^^^^^^ Duplicate element inside regexp character class
RUBY

expect_correction(<<~RUBY)
foo = /([[:alnum:]y])/
RUBY
end
end

context 'with a repeated character class element with interpolation' do
it 'registers an offense and corrects' do
expect_offense(<<~'RUBY')
foo = /([a#{foo}a#{bar}a])/
^ Duplicate element inside regexp character class
^ Duplicate element inside regexp character class
RUBY

expect_correction(<<~'RUBY')
foo = /([a#{foo}#{bar}])/
RUBY
end
end

context 'with a repeated range element' do
it 'registers an offense and corrects' do
expect_offense(<<~RUBY)
foo = /[0-9x0-9]/
^^^ Duplicate element inside regexp character class
RUBY

expect_correction(<<~RUBY)
foo = /[0-9x]/
RUBY
end
end

context 'with a repeated intersection character class' do
it 'does not register an offense' do
expect_no_offenses(<<~RUBY)
foo = /[ab&&ab]/
RUBY
end
end

context 'with a range that covers a repeated element character class' do
it 'does not register an offense' do
expect_no_offenses(<<~RUBY)
foo = /[a-cb]/
RUBY
end
end

context 'with multiple regexps with the same interpolation' do
it 'does not register an offense' do
expect_no_offenses(<<~'RUBY')
a_field.gsub!(/[#{bad_chars}]/, '')
some_other_field.gsub!(/[#{bad_chars}]/, '')
RUBY
end
end
end
24 changes: 24 additions & 0 deletions spec/rubocop/ext/regexp_node_spec.rb
Expand Up @@ -97,4 +97,28 @@
end
end
end

describe '#parsed_node_loc' do
let(:source) { '/([a-z]+)\d*\s?(?:foo)/' }

it 'returns the correct loc for each node in the parsed_tree' do
loc_sources = node.parsed_tree.each_expression.map do |regexp_node|
node.parsed_tree_expr_loc(regexp_node).source
end

expect(loc_sources).to eq(
[
'([a-z]+)',
'[a-z]+',
'a-z',
'a',
'z',
'\d*',
'\s?',
'(?:foo)',
'foo'
]
)
end
end
end

0 comments on commit fd65a6b

Please sign in to comment.