Add new Lint/DuplicateRegexpCharacterClassElement cop (#8896)

rubocop · Oct 26, 2020 · fd65a6b · fd65a6b
1 parent 291aec7
commit fd65a6b
Show file tree

Hide file tree

Showing 9 changed files with 283 additions and 0 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -4,6 +4,7 @@
 
 ### New features
 
+* [#8896](https://github.com/rubocop-hq/rubocop/pull/8896): Add new `Lint/DuplicateRegexpCharacterClassElement` cop. ([@owst][])
 * [#8895](https://github.com/rubocop-hq/rubocop/pull/8895): Add new `Lint/EmptyBlock` cop. ([@fatkodima][])
 * [#8934](https://github.com/rubocop-hq/rubocop/pull/8934): Add new `Style/SwapValues` cop. ([@fatkodima][])
 * [#7549](https://github.com/rubocop-hq/rubocop/issues/7549): Add new `Style/ArgumentsForwarding` cop. ([@koic][])

diff --git a/config/default.yml b/config/default.yml
@@ -1454,6 +1454,11 @@ Lint/DuplicateMethods:
   Enabled: true
   VersionAdded: '0.29'
 
+Lint/DuplicateRegexpCharacterClassElement:
+  Description: 'Checks for duplicate elements in Regexp character classes.'
+  Enabled: pending
+  VersionAdded: '1.1'
+
 Lint/DuplicateRequire:
   Description: 'Check for duplicate `require`s and `require_relative`s.'
   Enabled: true

diff --git a/docs/modules/ROOT/pages/cops.adoc b/docs/modules/ROOT/pages/cops.adoc
@@ -200,6 +200,7 @@ In the following section you find all available cops:
 * xref:cops_lint.adoc#lintduplicateelsifcondition[Lint/DuplicateElsifCondition]
 * xref:cops_lint.adoc#lintduplicatehashkey[Lint/DuplicateHashKey]
 * xref:cops_lint.adoc#lintduplicatemethods[Lint/DuplicateMethods]
+* xref:cops_lint.adoc#lintduplicateregexpcharacterclasselement[Lint/DuplicateRegexpCharacterClassElement]
 * xref:cops_lint.adoc#lintduplicaterequire[Lint/DuplicateRequire]
 * xref:cops_lint.adoc#lintduplicaterescueexception[Lint/DuplicateRescueException]
 * xref:cops_lint.adoc#linteachwithobjectargument[Lint/EachWithObjectArgument]

diff --git a/docs/modules/ROOT/pages/cops_lint.adoc b/docs/modules/ROOT/pages/cops_lint.adoc
@@ -848,6 +848,37 @@ end
 alias bar foo
 ----
 
+== Lint/DuplicateRegexpCharacterClassElement
+
+|===
+| Enabled by default | Safe | Supports autocorrection | VersionAdded | VersionChanged
+
+| Pending
+| Yes
+| Yes
+| 1.1
+| -
+|===
+
+This cop checks for duplicate elements in Regexp character classes.
+
+=== Examples
+
+[source,ruby]
+----
+# bad
+r = /[xyx]/
+
+# bad
+r = /[0-9x0-9]/
+
+# good
+r = /[xy]/
+
+# good
+r = /[0-9x]/
+----
+
 == Lint/DuplicateRequire
 
 |===

diff --git a/lib/rubocop.rb b/lib/rubocop.rb
@@ -262,6 +262,7 @@
 require_relative 'rubocop/cop/lint/duplicate_elsif_condition'
 require_relative 'rubocop/cop/lint/duplicate_hash_key'
 require_relative 'rubocop/cop/lint/duplicate_methods'
+require_relative 'rubocop/cop/lint/duplicate_regexp_character_class_element'
 require_relative 'rubocop/cop/lint/duplicate_require'
 require_relative 'rubocop/cop/lint/duplicate_rescue_exception'
 require_relative 'rubocop/cop/lint/each_with_object_argument'

diff --git a/lib/rubocop/cop/lint/duplicate_regexp_character_class_element.rb b/lib/rubocop/cop/lint/duplicate_regexp_character_class_element.rb
@@ -0,0 +1,77 @@
+# frozen_string_literal: true
+
+module RuboCop
+  module Cop
+    module Lint
+      # This cop checks for duplicate elements in Regexp character classes.
+      #
+      # @example
+      #
+      #   # bad
+      #   r = /[xyx]/
+      #
+      #   # bad
+      #   r = /[0-9x0-9]/
+      #
+      #   # good
+      #   r = /[xy]/
+      #
+      #   # good
+      #   r = /[0-9x]/
+      class DuplicateRegexpCharacterClassElement < Base
+        include RangeHelp
+        extend AutoCorrector
+
+        MSG_REPEATED_ELEMENT = 'Duplicate element inside regexp character class'
+
+        def on_regexp(node)
+          each_repeated_character_class_element_loc(node) do |loc|
+            add_offense(loc, message: MSG_REPEATED_ELEMENT) do |corrector|
+              corrector.remove(loc)
+            end
+          end
+        end
+
+        def each_repeated_character_class_element_loc(node)
+          node.parsed_tree&.each_expression do |expr|
+            next if expr.type != :set || expr.token == :intersection
+
+            seen = Set.new
+
+            expr.expressions.each do |child|
+              next if within_interpolation?(node, child)
+
+              child_source = child.to_s
+
+              yield node.parsed_tree_expr_loc(child) if seen.include?(child_source)
+
+              seen << child_source
+            end
+          end
+        end
+
+        private
+
+        # Since we blank interpolations with a space for every char of the interpolation, we would
+        # mark every space (except the first) as duplicate if we do not skip regexp_parser nodes
+        # that are within an interpolation.
+        def within_interpolation?(node, child)
+          parse_tree_child_loc = node.parsed_tree_expr_loc(child)
+
+          interpolation_locs(node).any? { |il| il.overlaps?(parse_tree_child_loc) }
+        end
+
+        def interpolation_locs(node)
+          @interpolation_locs ||= {}
+
+          # Cache by loc, not by regexp content, as content can be repeated in multiple patterns
+          key = node.loc
+
+          @interpolation_locs[key] ||= node.children.select(&:begin_type?).map do |interpolation|
+            interpolation.loc.expression
+          end
+        end
+      end
+    end
+  end
+end
diff --git a/lib/rubocop/ext/regexp_node.rb b/lib/rubocop/ext/regexp_node.rb
@@ -38,6 +38,11 @@ def each_capture(named: ANY)
         self
       end
 
+      # @return [Parser::Source::Range] the range of the parse-tree expression
+      def parsed_tree_expr_loc(expr)
+        loc.begin.end.adjust(begin_pos: expr.ts, end_pos: expr.ts).resize(expr.full_length)
+      end
+
       private
 
       def with_interpolations_blanked

diff --git a/spec/rubocop/cop/lint/duplicate_regexp_character_class_element_spec.rb b/spec/rubocop/cop/lint/duplicate_regexp_character_class_element_spec.rb
@@ -0,0 +1,138 @@
+# frozen_string_literal: true
+
+RSpec.describe RuboCop::Cop::Lint::DuplicateRegexpCharacterClassElement do
+  subject(:cop) { described_class.new }
+
+  context 'with a repeated character class element' do
+    it 'registers an offense and corrects' do
+      expect_offense(<<~RUBY)
+        foo = /[xyx]/
+                  ^ Duplicate element inside regexp character class
+      RUBY
+
+      expect_correction(<<~RUBY)
+        foo = /[xy]/
+      RUBY
+    end
+  end
+
+  context 'with a repeated character class element with quantifier' do
+    it 'registers an offense and corrects' do
+      expect_offense(<<~RUBY)
+        foo = /[xyx]+/
+                  ^ Duplicate element inside regexp character class
+      RUBY
+
+      expect_correction(<<~RUBY)
+        foo = /[xy]+/
+      RUBY
+    end
+  end
+
+  context 'with no repeated character class elements' do
+    it 'registers an offense and corrects' do
+      expect_no_offenses(<<~RUBY)
+        foo = /[xyz]/
+      RUBY
+    end
+  end
+
+  context 'with repeated elements in different character classes' do
+    it 'registers an offense and corrects' do
+      expect_no_offenses(<<~RUBY)
+        foo = /[xyz][xyz]/
+      RUBY
+    end
+  end
+
+  context 'with a repeated character class element and %r{} literal' do
+    it 'registers an offense and corrects' do
+      expect_offense(<<~RUBY)
+        foo = %r{[xyx]}
+                    ^ Duplicate element inside regexp character class
+      RUBY
+
+      expect_correction(<<~RUBY)
+        foo = %r{[xy]}
+      RUBY
+    end
+  end
+
+  context 'with a repeated character class element inside a group' do
+    it 'registers an offense and corrects' do
+      expect_offense(<<~RUBY)
+        foo = /([xyx])/
+                   ^ Duplicate element inside regexp character class
+      RUBY
+
+      expect_correction(<<~RUBY)
+        foo = /([xy])/
+      RUBY
+    end
+  end
+
+  context 'with a repeated character posix character class inside a group' do
+    it 'registers an offense and corrects' do
+      expect_offense(<<~RUBY)
+        foo = /([[:alnum:]y[:alnum:]])/
+                           ^^^^^^^^^ Duplicate element inside regexp character class
+      RUBY
+
+      expect_correction(<<~RUBY)
+        foo = /([[:alnum:]y])/
+      RUBY
+    end
+  end
+
+  context 'with a repeated character class element with interpolation' do
+    it 'registers an offense and corrects' do
+      expect_offense(<<~'RUBY')
+        foo = /([a#{foo}a#{bar}a])/
+                        ^ Duplicate element inside regexp character class
+                               ^ Duplicate element inside regexp character class
+      RUBY
+
+      expect_correction(<<~'RUBY')
+        foo = /([a#{foo}#{bar}])/
+      RUBY
+    end
+  end
+
+  context 'with a repeated range element' do
+    it 'registers an offense and corrects' do
+      expect_offense(<<~RUBY)
+        foo = /[0-9x0-9]/
+                    ^^^ Duplicate element inside regexp character class
+      RUBY
+
+      expect_correction(<<~RUBY)
+        foo = /[0-9x]/
+      RUBY
+    end
+  end
+
+  context 'with a repeated intersection character class' do
+    it 'does not register an offense' do
+      expect_no_offenses(<<~RUBY)
+        foo = /[ab&&ab]/
+      RUBY
+    end
+  end
+
+  context 'with a range that covers a repeated element character class' do
+    it 'does not register an offense' do
+      expect_no_offenses(<<~RUBY)
+        foo = /[a-cb]/
+      RUBY
+    end
+  end
+
+  context 'with multiple regexps with the same interpolation' do
+    it 'does not register an offense' do
+      expect_no_offenses(<<~'RUBY')
+        a_field.gsub!(/[#{bad_chars}]/, '')
+        some_other_field.gsub!(/[#{bad_chars}]/, '')
+      RUBY
+    end
+  end
+end
diff --git a/spec/rubocop/ext/regexp_node_spec.rb b/spec/rubocop/ext/regexp_node_spec.rb
@@ -97,4 +97,28 @@
       end
     end
   end
+
+  describe '#parsed_node_loc' do
+    let(:source) { '/([a-z]+)\d*\s?(?:foo)/' }
+
+    it 'returns the correct loc for each node in the parsed_tree' do
+      loc_sources = node.parsed_tree.each_expression.map do |regexp_node|
+        node.parsed_tree_expr_loc(regexp_node).source
+      end
+
+      expect(loc_sources).to eq(
+        [
+          '([a-z]+)',
+          '[a-z]+',
+          'a-z',
+          'a',
+          'z',
+          '\d*',
+          '\s?',
+          '(?:foo)',
+          'foo'
+        ]
+      )
+    end
+  end
 end