diff --git a/CHANGELOG.md b/CHANGELOG.md index 01fbbf882ca..d90176874a1 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### New features +* [#8896](https://github.com/rubocop-hq/rubocop/pull/8896): Add new `Lint/DuplicateRegexpCharacterClassElement` cop. ([@owst][]) * [#8895](https://github.com/rubocop-hq/rubocop/pull/8895): Add new `Lint/EmptyBlock` cop. ([@fatkodima][]) * [#7549](https://github.com/rubocop-hq/rubocop/issues/7549): Add new `Style/ArgumentsForwarding` cop. ([@koic][]) diff --git a/config/default.yml b/config/default.yml index c4b197e88a2..84329ecc6fd 100644 --- a/config/default.yml +++ b/config/default.yml @@ -1454,6 +1454,11 @@ Lint/DuplicateMethods: Enabled: true VersionAdded: '0.29' +Lint/DuplicateRegexpCharacterClassElement: + Description: 'Checks for duplicate elements in Regexp character classes.' + Enabled: pending + VersionAdded: '1.1' + Lint/DuplicateRequire: Description: 'Check for duplicate `require`s and `require_relative`s.' Enabled: true diff --git a/docs/modules/ROOT/pages/cops.adoc b/docs/modules/ROOT/pages/cops.adoc index 962acf8a32a..0511bd2092b 100644 --- a/docs/modules/ROOT/pages/cops.adoc +++ b/docs/modules/ROOT/pages/cops.adoc @@ -200,6 +200,7 @@ In the following section you find all available cops: * xref:cops_lint.adoc#lintduplicateelsifcondition[Lint/DuplicateElsifCondition] * xref:cops_lint.adoc#lintduplicatehashkey[Lint/DuplicateHashKey] * xref:cops_lint.adoc#lintduplicatemethods[Lint/DuplicateMethods] +* xref:cops_lint.adoc#lintduplicateregexpcharacterclasselement[Lint/DuplicateRegexpCharacterClassElement] * xref:cops_lint.adoc#lintduplicaterequire[Lint/DuplicateRequire] * xref:cops_lint.adoc#lintduplicaterescueexception[Lint/DuplicateRescueException] * xref:cops_lint.adoc#linteachwithobjectargument[Lint/EachWithObjectArgument] diff --git a/docs/modules/ROOT/pages/cops_lint.adoc b/docs/modules/ROOT/pages/cops_lint.adoc index f7c34a760ca..731f191ea64 100644 --- a/docs/modules/ROOT/pages/cops_lint.adoc +++ b/docs/modules/ROOT/pages/cops_lint.adoc @@ -848,6 +848,37 @@ end alias bar foo ---- +== Lint/DuplicateRegexpCharacterClassElement + +|=== +| Enabled by default | Safe | Supports autocorrection | VersionAdded | VersionChanged + +| Pending +| Yes +| Yes +| 1.1 +| - +|=== + +This cop checks for duplicate elements in Regexp character classes. + +=== Examples + +[source,ruby] +---- +# bad +r = /[xyx]/ + +# bad +r = /[0-9x0-9]/ + +# good +r = /[xy]/ + +# good +r = /[0-9x]/ +---- + == Lint/DuplicateRequire |=== diff --git a/lib/rubocop.rb b/lib/rubocop.rb index beade3dc188..53d7e988302 100644 --- a/lib/rubocop.rb +++ b/lib/rubocop.rb @@ -262,6 +262,7 @@ require_relative 'rubocop/cop/lint/duplicate_elsif_condition' require_relative 'rubocop/cop/lint/duplicate_hash_key' require_relative 'rubocop/cop/lint/duplicate_methods' +require_relative 'rubocop/cop/lint/duplicate_regexp_character_class_element' require_relative 'rubocop/cop/lint/duplicate_require' require_relative 'rubocop/cop/lint/duplicate_rescue_exception' require_relative 'rubocop/cop/lint/each_with_object_argument' diff --git a/lib/rubocop/cop/lint/duplicate_regexp_character_class_element.rb b/lib/rubocop/cop/lint/duplicate_regexp_character_class_element.rb new file mode 100644 index 00000000000..3358af83fad --- /dev/null +++ b/lib/rubocop/cop/lint/duplicate_regexp_character_class_element.rb @@ -0,0 +1,77 @@ +# frozen_string_literal: true + +module RuboCop + module Cop + module Lint + # This cop checks for duplicate elements in Regexp character classes. + # + # @example + # + # # bad + # r = /[xyx]/ + # + # # bad + # r = /[0-9x0-9]/ + # + # # good + # r = /[xy]/ + # + # # good + # r = /[0-9x]/ + class DuplicateRegexpCharacterClassElement < Base + include RangeHelp + extend AutoCorrector + + MSG_REPEATED_ELEMENT = 'Duplicate element inside regexp character class' + + def on_regexp(node) + each_repeated_character_class_element_loc(node) do |loc| + add_offense(loc, message: MSG_REPEATED_ELEMENT) do |corrector| + corrector.remove(loc) + end + end + end + + def each_repeated_character_class_element_loc(node) + node.parsed_tree&.each_expression do |expr| + next if expr.type != :set || expr.token == :intersection + + seen = Set.new + + expr.expressions.each do |child| + next if within_interpolation?(node, child) + + child_source = child.to_s + + yield node.parsed_tree_expr_loc(child) if seen.include?(child_source) + + seen << child_source + end + end + end + + private + + # Since we blank interpolations with a space for every char of the interpolation, we would + # mark every space (except the first) as duplicate if we do not skip regexp_parser nodes + # that are within an interpolation. + def within_interpolation?(node, child) + parse_tree_child_loc = node.parsed_tree_expr_loc(child) + + interpolation_locs(node).any? { |il| il.overlaps?(parse_tree_child_loc) } + end + + def interpolation_locs(node) + @interpolation_locs ||= {} + + # Cache by loc, not by regexp content, as content can be repeated in multiple patterns + key = node.loc + + @interpolation_locs[key] ||= node.children.select(&:begin_type?).map do |interpolation| + interpolation.loc.expression + end + end + end + end + end +end diff --git a/lib/rubocop/ext/regexp_node.rb b/lib/rubocop/ext/regexp_node.rb index 197bb06bf9d..2318a0bcfda 100644 --- a/lib/rubocop/ext/regexp_node.rb +++ b/lib/rubocop/ext/regexp_node.rb @@ -38,6 +38,11 @@ def each_capture(named: ANY) self end + # @return [Parser::Source::Range] the range of the parse-tree expression + def parsed_tree_expr_loc(expr) + loc.begin.end.adjust(begin_pos: expr.ts, end_pos: expr.ts).resize(expr.full_length) + end + private def with_interpolations_blanked diff --git a/spec/rubocop/cop/lint/duplicate_regexp_character_class_element_spec.rb b/spec/rubocop/cop/lint/duplicate_regexp_character_class_element_spec.rb new file mode 100644 index 00000000000..be10a74a84a --- /dev/null +++ b/spec/rubocop/cop/lint/duplicate_regexp_character_class_element_spec.rb @@ -0,0 +1,138 @@ +# frozen_string_literal: true + +RSpec.describe RuboCop::Cop::Lint::DuplicateRegexpCharacterClassElement do + subject(:cop) { described_class.new } + + context 'with a repeated character class element' do + it 'registers an offense and corrects' do + expect_offense(<<~RUBY) + foo = /[xyx]/ + ^ Duplicate element inside regexp character class + RUBY + + expect_correction(<<~RUBY) + foo = /[xy]/ + RUBY + end + end + + context 'with a repeated character class element with quantifier' do + it 'registers an offense and corrects' do + expect_offense(<<~RUBY) + foo = /[xyx]+/ + ^ Duplicate element inside regexp character class + RUBY + + expect_correction(<<~RUBY) + foo = /[xy]+/ + RUBY + end + end + + context 'with no repeated character class elements' do + it 'registers an offense and corrects' do + expect_no_offenses(<<~RUBY) + foo = /[xyz]/ + RUBY + end + end + + context 'with repeated elements in different character classes' do + it 'registers an offense and corrects' do + expect_no_offenses(<<~RUBY) + foo = /[xyz][xyz]/ + RUBY + end + end + + context 'with a repeated character class element and %r{} literal' do + it 'registers an offense and corrects' do + expect_offense(<<~RUBY) + foo = %r{[xyx]} + ^ Duplicate element inside regexp character class + RUBY + + expect_correction(<<~RUBY) + foo = %r{[xy]} + RUBY + end + end + + context 'with a repeated character class element inside a group' do + it 'registers an offense and corrects' do + expect_offense(<<~RUBY) + foo = /([xyx])/ + ^ Duplicate element inside regexp character class + RUBY + + expect_correction(<<~RUBY) + foo = /([xy])/ + RUBY + end + end + + context 'with a repeated character posix character class inside a group' do + it 'registers an offense and corrects' do + expect_offense(<<~RUBY) + foo = /([[:alnum:]y[:alnum:]])/ + ^^^^^^^^^ Duplicate element inside regexp character class + RUBY + + expect_correction(<<~RUBY) + foo = /([[:alnum:]y])/ + RUBY + end + end + + context 'with a repeated character class element with interpolation' do + it 'registers an offense and corrects' do + expect_offense(<<~'RUBY') + foo = /([a#{foo}a#{bar}a])/ + ^ Duplicate element inside regexp character class + ^ Duplicate element inside regexp character class + RUBY + + expect_correction(<<~'RUBY') + foo = /([a#{foo}#{bar}])/ + RUBY + end + end + + context 'with a repeated range element' do + it 'registers an offense and corrects' do + expect_offense(<<~RUBY) + foo = /[0-9x0-9]/ + ^^^ Duplicate element inside regexp character class + RUBY + + expect_correction(<<~RUBY) + foo = /[0-9x]/ + RUBY + end + end + + context 'with a repeated intersection character class' do + it 'does not register an offense' do + expect_no_offenses(<<~RUBY) + foo = /[ab&&ab]/ + RUBY + end + end + + context 'with a range that covers a repeated element character class' do + it 'does not register an offense' do + expect_no_offenses(<<~RUBY) + foo = /[a-cb]/ + RUBY + end + end + + context 'with multiple regexps with the same interpolation' do + it 'does not register an offense' do + expect_no_offenses(<<~'RUBY') + a_field.gsub!(/[#{bad_chars}]/, '') + some_other_field.gsub!(/[#{bad_chars}]/, '') + RUBY + end + end +end diff --git a/spec/rubocop/ext/regexp_node_spec.rb b/spec/rubocop/ext/regexp_node_spec.rb index bf131068228..fe4eb11856e 100644 --- a/spec/rubocop/ext/regexp_node_spec.rb +++ b/spec/rubocop/ext/regexp_node_spec.rb @@ -97,4 +97,28 @@ end end end + + describe '#parsed_node_loc' do + let(:source) { '/([a-z]+)\d*\s?(?:foo)/' } + + it 'returns the correct loc for each node in the parsed_tree' do + loc_sources = node.parsed_tree.each_expression.map do |regexp_node| + node.parsed_tree_expr_loc(regexp_node).source + end + + expect(loc_sources).to eq( + [ + '([a-z]+)', + '[a-z]+', + 'a-z', + 'a', + 'z', + '\d*', + '\s?', + '(?:foo)', + 'foo' + ] + ) + end + end end