Add new Lint/DuplicateRegexpCharacterClassElement cop

rubocop · Oct 15, 2020 · 52d3d2b · 52d3d2b
1 parent 171eaf3
commit 52d3d2b
Show file tree

Hide file tree

Showing 9 changed files with 269 additions and 0 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -2,6 +2,10 @@
 
 ## master (unreleased)
 
+### New features
+
+* [#8896](https://github.com/rubocop-hq/rubocop/pull/8896): Add new `Style/DuplicateRegexpCharacterClassElement` cop. ([@owst][])
+
 ### Bug fixes
 
 * [#8892](https://github.com/rubocop-hq/rubocop/issues/8892): Fix an error for `Style/StringConcatenation` when correcting nested concatenable parts. ([@fatkodima][])

diff --git a/config/default.yml b/config/default.yml
@@ -2951,6 +2951,11 @@ Style/DoubleNegation:
     - allowed_in_returns
     - forbidden
 
+Style/DuplicateRegexpCharacterClassElement:
+  Description: 'Checks for duplicate elements in Regexp character classes.'
+  Enabled: pending
+  VersionAdded: '0.94'
+
 Style/EachForSimpleLoop:
   Description: >-
                  Use `Integer#times` for a simple loop which iterates a fixed

diff --git a/docs/modules/ROOT/pages/cops.adoc b/docs/modules/ROOT/pages/cops.adoc
@@ -373,6 +373,7 @@ In the following section you find all available cops:
 * xref:cops_style.adoc#styledocumentationmethod[Style/DocumentationMethod]
 * xref:cops_style.adoc#styledoublecopdisabledirective[Style/DoubleCopDisableDirective]
 * xref:cops_style.adoc#styledoublenegation[Style/DoubleNegation]
+* xref:cops_style.adoc#styleduplicateregexpcharacterclasselement[Style/DuplicateRegexpCharacterClassElement]
 * xref:cops_style.adoc#styleeachforsimpleloop[Style/EachForSimpleLoop]
 * xref:cops_style.adoc#styleeachwithobject[Style/EachWithObject]
 * xref:cops_style.adoc#styleemptyblockparameter[Style/EmptyBlockParameter]

diff --git a/docs/modules/ROOT/pages/cops_style.adoc b/docs/modules/ROOT/pages/cops_style.adoc
@@ -2425,6 +2425,37 @@ end
 
 * https://rubystyle.guide#no-bang-bang
 
+== Style/DuplicateRegexpCharacterClassElement
+
+|===
+| Enabled by default | Safe | Supports autocorrection | VersionAdded | VersionChanged
+
+| Pending
+| Yes
+| Yes
+| 0.94
+| -
+|===
+
+This cop checks for duplicate elements in Regexp character classes.
+
+=== Examples
+
+[source,ruby]
+----
+# bad
+r = /[xyx]/
+
+# bad
+r = /[0-9x0-9]/
+
+# good
+r = /[xy]/
+
+# good
+r = /[0-9x]/
+----
+
 == Style/EachForSimpleLoop
 
 |===

diff --git a/lib/rubocop.rb b/lib/rubocop.rb
@@ -421,6 +421,7 @@
 require_relative 'rubocop/cop/style/documentation'
 require_relative 'rubocop/cop/style/double_cop_disable_directive'
 require_relative 'rubocop/cop/style/double_negation'
+require_relative 'rubocop/cop/style/duplicate_regexp_character_class_element'
 require_relative 'rubocop/cop/style/each_for_simple_loop'
 require_relative 'rubocop/cop/style/each_with_object'
 require_relative 'rubocop/cop/style/empty_block_parameter'

diff --git a/lib/rubocop/cop/style/duplicate_regexp_character_class_element.rb b/lib/rubocop/cop/style/duplicate_regexp_character_class_element.rb
@@ -0,0 +1,73 @@
+# frozen_string_literal: true
+
+module RuboCop
+  module Cop
+    module Style
+      # This cop checks for duplicate elements in Regexp character classes.
+      #
+      # @example
+      #
+      #   # bad
+      #   r = /[xyx]/
+      #
+      #   # bad
+      #   r = /[0-9x0-9]/
+      #
+      #   # good
+      #   r = /[xy]/
+      #
+      #   # good
+      #   r = /[0-9x]/
+      class DuplicateRegexpCharacterClassElement < Base
+        include RangeHelp
+        extend AutoCorrector
+
+        MSG_REPEATED_ELEMENT = 'Duplicate element inside regexp character class'
+
+        def on_regexp(node)
+          each_repeated_character_class_element_loc(node) do |loc|
+            add_offense(loc, message: MSG_REPEATED_ELEMENT) do |corrector|
+              corrector.remove(loc)
+            end
+          end
+        end
+
+        def each_repeated_character_class_element_loc(node)
+          node.parsed_tree&.each_expression do |expr|
+            next if expr.type != :set || expr.token == :intersection
+
+            seen = Set.new
+
+            expr.expressions.each do |child|
+              next if within_interpolation?(node, child)
+
+              child_source = child.to_s
+
+              yield node.parsed_tree_expr_loc(child) if seen.include?(child_source)
+
+              seen << child_source
+            end
+          end
+        end
+
+        private
+
+        # Since we blank interpolations with a space for every char of the interpolation, we would
+        # mark every space (except the first) as duplicate if we do not skip regexp_parser nodes
+        # that are within an interpolation.
+        def within_interpolation?(node, child)
+          @interpolation_locs ||= {}
+
+          # Cache by loc, not by regexp content, as content can be repeated in multiple patterns
+          key = node.loc
+
+          @interpolation_locs[key] ||= node.children.select(&:begin_type?).map do |interpolation|
+            interpolation.loc.expression
+          end
+
+          @interpolation_locs[key].any? { |il| il.overlaps?(node.parsed_tree_expr_loc(child)) }
+        end
+      end
+    end
+  end
+end
diff --git a/lib/rubocop/ext/regexp_node.rb b/lib/rubocop/ext/regexp_node.rb
@@ -38,6 +38,11 @@ def each_capture(named: ANY)
         self
       end
 
+      # @return [Parser::Source::Range] the range of the parse-tree expression
+      def parsed_tree_expr_loc(expr)
+        loc.begin.end.adjust(begin_pos: expr.ts, end_pos: expr.ts).resize(expr.full_length)
+      end
+
       private
 
       def with_interpolations_blanked

diff --git a/spec/rubocop/cop/style/duplicate_regexp_character_class_element_spec.rb b/spec/rubocop/cop/style/duplicate_regexp_character_class_element_spec.rb
@@ -0,0 +1,125 @@
+# frozen_string_literal: true
+
+RSpec.describe RuboCop::Cop::Style::DuplicateRegexpCharacterClassElement do
+  subject(:cop) { described_class.new }
+
+  context 'with a repeated character class element' do
+    it 'registers an offense and corrects' do
+      expect_offense(<<~RUBY)
+        foo = /[xyx]/
+                  ^ Duplicate element inside regexp character class
+      RUBY
+
+      expect_correction(<<~RUBY)
+        foo = /[xy]/
+      RUBY
+    end
+  end
+
+  context 'with no repeated character class elements' do
+    it 'registers an offense and corrects' do
+      expect_no_offenses(<<~RUBY)
+        foo = /[xyz]/
+      RUBY
+    end
+  end
+
+  context 'with repeated elements in different character classes' do
+    it 'registers an offense and corrects' do
+      expect_no_offenses(<<~RUBY)
+        foo = /[xyz][xyz]/
+      RUBY
+    end
+  end
+
+  context 'with a repeated character class element and %r{} literal' do
+    it 'registers an offense and corrects' do
+      expect_offense(<<~RUBY)
+        foo = %r{[xyx]}
+                    ^ Duplicate element inside regexp character class
+      RUBY
+
+      expect_correction(<<~RUBY)
+        foo = %r{[xy]}
+      RUBY
+    end
+  end
+
+  context 'with a repeated character class element inside a group' do
+    it 'registers an offense and corrects' do
+      expect_offense(<<~RUBY)
+        foo = /([xyx])/
+                   ^ Duplicate element inside regexp character class
+      RUBY
+
+      expect_correction(<<~RUBY)
+        foo = /([xy])/
+      RUBY
+    end
+  end
+
+  context 'with a repeated character posix character class inside a group' do
+    it 'registers an offense and corrects' do
+      expect_offense(<<~RUBY)
+        foo = /([[:alnum:]y[:alnum:]])/
+                           ^^^^^^^^^ Duplicate element inside regexp character class
+      RUBY
+
+      expect_correction(<<~RUBY)
+        foo = /([[:alnum:]y])/
+      RUBY
+    end
+  end
+
+  context 'with a repeated character class element with interpolation' do
+    it 'registers an offense and corrects' do
+      expect_offense(<<~'RUBY')
+        foo = /([a#{foo}a#{bar}a])/
+                        ^ Duplicate element inside regexp character class
+                               ^ Duplicate element inside regexp character class
+      RUBY
+
+      expect_correction(<<~'RUBY')
+        foo = /([a#{foo}#{bar}])/
+      RUBY
+    end
+  end
+
+  context 'with a repeated range element' do
+    it 'registers an offense and corrects' do
+      expect_offense(<<~RUBY)
+        foo = /[0-9x0-9]/
+                    ^^^ Duplicate element inside regexp character class
+      RUBY
+
+      expect_correction(<<~RUBY)
+        foo = /[0-9x]/
+      RUBY
+    end
+  end
+
+  context 'with a repeated intersection character class' do
+    it 'does not register an offense' do
+      expect_no_offenses(<<~RUBY)
+        foo = /[ab&&ab]/
+      RUBY
+    end
+  end
+
+  context 'with a range that covers a repeated element character class' do
+    it 'does not register an offense' do
+      expect_no_offenses(<<~RUBY)
+        foo = /[a-cb]/
+      RUBY
+    end
+  end
+
+  context 'with multiple regexps with the same interpolation' do
+    it 'does not register an offense' do
+      expect_no_offenses(<<~'RUBY')
+        a_field.gsub!(/[#{bad_chars}]/, '')
+        some_other_field.gsub!(/[#{bad_chars}]/, '')
+      RUBY
+    end
+  end
+end
diff --git a/spec/rubocop/ext/regexp_node_spec.rb b/spec/rubocop/ext/regexp_node_spec.rb
@@ -97,4 +97,28 @@
       end
     end
   end
+
+  describe '#parsed_node_loc' do
+    let(:source) { '/([a-z]+)\d*\s?(?:foo)/' }
+
+    it 'returns the correct loc for each node in the parsed_tree' do
+      loc_sources = node.parsed_tree.each_expression.map do |regexp_node|
+        node.parsed_tree_expr_loc(regexp_node).source
+      end
+
+      expect(loc_sources).to eq(
+        [
+          '([a-z]+)',
+          '[a-z]+',
+          'a-z',
+          'a',
+          'z',
+          '\d*',
+          '\s?',
+          '(?:foo)',
+          'foo'
+        ]
+      )
+    end
+  end
 end