Skip to content

Commit

Permalink
Add new Lint/DuplicateRegexpCharacterClassElement cop
Browse files Browse the repository at this point in the history
  • Loading branch information
owst committed Oct 15, 2020
1 parent 171eaf3 commit 52d3d2b
Show file tree
Hide file tree
Showing 9 changed files with 269 additions and 0 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Expand Up @@ -2,6 +2,10 @@

## master (unreleased)

### New features

* [#8896](https://github.com/rubocop-hq/rubocop/pull/8896): Add new `Style/DuplicateRegexpCharacterClassElement` cop. ([@owst][])

### Bug fixes

* [#8892](https://github.com/rubocop-hq/rubocop/issues/8892): Fix an error for `Style/StringConcatenation` when correcting nested concatenable parts. ([@fatkodima][])
Expand Down
5 changes: 5 additions & 0 deletions config/default.yml
Expand Up @@ -2951,6 +2951,11 @@ Style/DoubleNegation:
- allowed_in_returns
- forbidden

Style/DuplicateRegexpCharacterClassElement:
Description: 'Checks for duplicate elements in Regexp character classes.'
Enabled: pending
VersionAdded: '0.94'

Style/EachForSimpleLoop:
Description: >-
Use `Integer#times` for a simple loop which iterates a fixed
Expand Down
1 change: 1 addition & 0 deletions docs/modules/ROOT/pages/cops.adoc
Expand Up @@ -373,6 +373,7 @@ In the following section you find all available cops:
* xref:cops_style.adoc#styledocumentationmethod[Style/DocumentationMethod]
* xref:cops_style.adoc#styledoublecopdisabledirective[Style/DoubleCopDisableDirective]
* xref:cops_style.adoc#styledoublenegation[Style/DoubleNegation]
* xref:cops_style.adoc#styleduplicateregexpcharacterclasselement[Style/DuplicateRegexpCharacterClassElement]
* xref:cops_style.adoc#styleeachforsimpleloop[Style/EachForSimpleLoop]
* xref:cops_style.adoc#styleeachwithobject[Style/EachWithObject]
* xref:cops_style.adoc#styleemptyblockparameter[Style/EmptyBlockParameter]
Expand Down
31 changes: 31 additions & 0 deletions docs/modules/ROOT/pages/cops_style.adoc
Expand Up @@ -2425,6 +2425,37 @@ end

* https://rubystyle.guide#no-bang-bang

== Style/DuplicateRegexpCharacterClassElement

|===
| Enabled by default | Safe | Supports autocorrection | VersionAdded | VersionChanged

| Pending
| Yes
| Yes
| 0.94
| -
|===

This cop checks for duplicate elements in Regexp character classes.

=== Examples

[source,ruby]
----
# bad
r = /[xyx]/
# bad
r = /[0-9x0-9]/
# good
r = /[xy]/
# good
r = /[0-9x]/
----

== Style/EachForSimpleLoop

|===
Expand Down
1 change: 1 addition & 0 deletions lib/rubocop.rb
Expand Up @@ -421,6 +421,7 @@
require_relative 'rubocop/cop/style/documentation'
require_relative 'rubocop/cop/style/double_cop_disable_directive'
require_relative 'rubocop/cop/style/double_negation'
require_relative 'rubocop/cop/style/duplicate_regexp_character_class_element'
require_relative 'rubocop/cop/style/each_for_simple_loop'
require_relative 'rubocop/cop/style/each_with_object'
require_relative 'rubocop/cop/style/empty_block_parameter'
Expand Down
73 changes: 73 additions & 0 deletions lib/rubocop/cop/style/duplicate_regexp_character_class_element.rb
@@ -0,0 +1,73 @@
# frozen_string_literal: true

module RuboCop
module Cop
module Style
# This cop checks for duplicate elements in Regexp character classes.
#
# @example
#
# # bad
# r = /[xyx]/
#
# # bad
# r = /[0-9x0-9]/
#
# # good
# r = /[xy]/
#
# # good
# r = /[0-9x]/
class DuplicateRegexpCharacterClassElement < Base
include RangeHelp
extend AutoCorrector

MSG_REPEATED_ELEMENT = 'Duplicate element inside regexp character class'

def on_regexp(node)
each_repeated_character_class_element_loc(node) do |loc|
add_offense(loc, message: MSG_REPEATED_ELEMENT) do |corrector|
corrector.remove(loc)
end
end
end

def each_repeated_character_class_element_loc(node)
node.parsed_tree&.each_expression do |expr|
next if expr.type != :set || expr.token == :intersection

seen = Set.new

expr.expressions.each do |child|
next if within_interpolation?(node, child)

child_source = child.to_s

yield node.parsed_tree_expr_loc(child) if seen.include?(child_source)

seen << child_source
end
end
end

private

# Since we blank interpolations with a space for every char of the interpolation, we would
# mark every space (except the first) as duplicate if we do not skip regexp_parser nodes
# that are within an interpolation.
def within_interpolation?(node, child)
@interpolation_locs ||= {}

# Cache by loc, not by regexp content, as content can be repeated in multiple patterns
key = node.loc

@interpolation_locs[key] ||= node.children.select(&:begin_type?).map do |interpolation|
interpolation.loc.expression
end

@interpolation_locs[key].any? { |il| il.overlaps?(node.parsed_tree_expr_loc(child)) }
end
end
end
end
end
5 changes: 5 additions & 0 deletions lib/rubocop/ext/regexp_node.rb
Expand Up @@ -38,6 +38,11 @@ def each_capture(named: ANY)
self
end

# @return [Parser::Source::Range] the range of the parse-tree expression
def parsed_tree_expr_loc(expr)
loc.begin.end.adjust(begin_pos: expr.ts, end_pos: expr.ts).resize(expr.full_length)
end

private

def with_interpolations_blanked
Expand Down
@@ -0,0 +1,125 @@
# frozen_string_literal: true

RSpec.describe RuboCop::Cop::Style::DuplicateRegexpCharacterClassElement do
subject(:cop) { described_class.new }

context 'with a repeated character class element' do
it 'registers an offense and corrects' do
expect_offense(<<~RUBY)
foo = /[xyx]/
^ Duplicate element inside regexp character class
RUBY

expect_correction(<<~RUBY)
foo = /[xy]/
RUBY
end
end

context 'with no repeated character class elements' do
it 'registers an offense and corrects' do
expect_no_offenses(<<~RUBY)
foo = /[xyz]/
RUBY
end
end

context 'with repeated elements in different character classes' do
it 'registers an offense and corrects' do
expect_no_offenses(<<~RUBY)
foo = /[xyz][xyz]/
RUBY
end
end

context 'with a repeated character class element and %r{} literal' do
it 'registers an offense and corrects' do
expect_offense(<<~RUBY)
foo = %r{[xyx]}
^ Duplicate element inside regexp character class
RUBY

expect_correction(<<~RUBY)
foo = %r{[xy]}
RUBY
end
end

context 'with a repeated character class element inside a group' do
it 'registers an offense and corrects' do
expect_offense(<<~RUBY)
foo = /([xyx])/
^ Duplicate element inside regexp character class
RUBY

expect_correction(<<~RUBY)
foo = /([xy])/
RUBY
end
end

context 'with a repeated character posix character class inside a group' do
it 'registers an offense and corrects' do
expect_offense(<<~RUBY)
foo = /([[:alnum:]y[:alnum:]])/
^^^^^^^^^ Duplicate element inside regexp character class
RUBY

expect_correction(<<~RUBY)
foo = /([[:alnum:]y])/
RUBY
end
end

context 'with a repeated character class element with interpolation' do
it 'registers an offense and corrects' do
expect_offense(<<~'RUBY')
foo = /([a#{foo}a#{bar}a])/
^ Duplicate element inside regexp character class
^ Duplicate element inside regexp character class
RUBY

expect_correction(<<~'RUBY')
foo = /([a#{foo}#{bar}])/
RUBY
end
end

context 'with a repeated range element' do
it 'registers an offense and corrects' do
expect_offense(<<~RUBY)
foo = /[0-9x0-9]/
^^^ Duplicate element inside regexp character class
RUBY

expect_correction(<<~RUBY)
foo = /[0-9x]/
RUBY
end
end

context 'with a repeated intersection character class' do
it 'does not register an offense' do
expect_no_offenses(<<~RUBY)
foo = /[ab&&ab]/
RUBY
end
end

context 'with a range that covers a repeated element character class' do
it 'does not register an offense' do
expect_no_offenses(<<~RUBY)
foo = /[a-cb]/
RUBY
end
end

context 'with multiple regexps with the same interpolation' do
it 'does not register an offense' do
expect_no_offenses(<<~'RUBY')
a_field.gsub!(/[#{bad_chars}]/, '')
some_other_field.gsub!(/[#{bad_chars}]/, '')
RUBY
end
end
end
24 changes: 24 additions & 0 deletions spec/rubocop/ext/regexp_node_spec.rb
Expand Up @@ -97,4 +97,28 @@
end
end
end

describe '#parsed_node_loc' do
let(:source) { '/([a-z]+)\d*\s?(?:foo)/' }

it 'returns the correct loc for each node in the parsed_tree' do
loc_sources = node.parsed_tree.each_expression.map do |regexp_node|
node.parsed_tree_expr_loc(regexp_node).source
end

expect(loc_sources).to eq(
[
'([a-z]+)',
'[a-z]+',
'a-z',
'a',
'z',
'\d*',
'\s?',
'(?:foo)',
'foo'
]
)
end
end
end

0 comments on commit 52d3d2b

Please sign in to comment.