diff --git a/CHANGELOG.md b/CHANGELOG.md index 3551215a559..d592a12a761 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -2,6 +2,10 @@ ## master (unreleased) +### New features + +* [#8896](https://github.com/rubocop-hq/rubocop/pull/8896): Add new `Style/DuplicateRegexpCharacterClassElement` cop. ([@owst][]) + ### Bug fixes * [#8892](https://github.com/rubocop-hq/rubocop/issues/8892): Fix an error for `Style/StringConcatenation` when correcting nested concatenable parts. ([@fatkodima][]) diff --git a/config/default.yml b/config/default.yml index f57334fa916..df77c4cd966 100644 --- a/config/default.yml +++ b/config/default.yml @@ -2951,6 +2951,11 @@ Style/DoubleNegation: - allowed_in_returns - forbidden +Style/DuplicateRegexpCharacterClassElement: + Description: 'Checks for duplicate elements in Regexp character classes.' + Enabled: pending + VersionAdded: '0.94' + Style/EachForSimpleLoop: Description: >- Use `Integer#times` for a simple loop which iterates a fixed diff --git a/docs/modules/ROOT/pages/cops.adoc b/docs/modules/ROOT/pages/cops.adoc index 80a3f3de169..ca3cae8af3a 100644 --- a/docs/modules/ROOT/pages/cops.adoc +++ b/docs/modules/ROOT/pages/cops.adoc @@ -373,6 +373,7 @@ In the following section you find all available cops: * xref:cops_style.adoc#styledocumentationmethod[Style/DocumentationMethod] * xref:cops_style.adoc#styledoublecopdisabledirective[Style/DoubleCopDisableDirective] * xref:cops_style.adoc#styledoublenegation[Style/DoubleNegation] +* xref:cops_style.adoc#styleduplicateregexpcharacterclasselement[Style/DuplicateRegexpCharacterClassElement] * xref:cops_style.adoc#styleeachforsimpleloop[Style/EachForSimpleLoop] * xref:cops_style.adoc#styleeachwithobject[Style/EachWithObject] * xref:cops_style.adoc#styleemptyblockparameter[Style/EmptyBlockParameter] diff --git a/docs/modules/ROOT/pages/cops_style.adoc b/docs/modules/ROOT/pages/cops_style.adoc index 7b0b980ecb2..743f40afc3b 100644 --- a/docs/modules/ROOT/pages/cops_style.adoc +++ b/docs/modules/ROOT/pages/cops_style.adoc @@ -2425,6 +2425,37 @@ end * https://rubystyle.guide#no-bang-bang +== Style/DuplicateRegexpCharacterClassElement + +|=== +| Enabled by default | Safe | Supports autocorrection | VersionAdded | VersionChanged + +| Pending +| Yes +| Yes +| 0.94 +| - +|=== + +This cop checks for duplicate elements in Regexp character classes. + +=== Examples + +[source,ruby] +---- +# bad +r = /[xyx]/ + +# bad +r = /[0-9x0-9]/ + +# good +r = /[xy]/ + +# good +r = /[0-9x]/ +---- + == Style/EachForSimpleLoop |=== diff --git a/lib/rubocop.rb b/lib/rubocop.rb index a3e1b9b630b..c3e4f8d623c 100644 --- a/lib/rubocop.rb +++ b/lib/rubocop.rb @@ -421,6 +421,7 @@ require_relative 'rubocop/cop/style/documentation' require_relative 'rubocop/cop/style/double_cop_disable_directive' require_relative 'rubocop/cop/style/double_negation' +require_relative 'rubocop/cop/style/duplicate_regexp_character_class_element' require_relative 'rubocop/cop/style/each_for_simple_loop' require_relative 'rubocop/cop/style/each_with_object' require_relative 'rubocop/cop/style/empty_block_parameter' diff --git a/lib/rubocop/cop/style/duplicate_regexp_character_class_element.rb b/lib/rubocop/cop/style/duplicate_regexp_character_class_element.rb new file mode 100644 index 00000000000..ab425bf027b --- /dev/null +++ b/lib/rubocop/cop/style/duplicate_regexp_character_class_element.rb @@ -0,0 +1,73 @@ +# frozen_string_literal: true + +module RuboCop + module Cop + module Style + # This cop checks for duplicate elements in Regexp character classes. + # + # @example + # + # # bad + # r = /[xyx]/ + # + # # bad + # r = /[0-9x0-9]/ + # + # # good + # r = /[xy]/ + # + # # good + # r = /[0-9x]/ + class DuplicateRegexpCharacterClassElement < Base + include RangeHelp + extend AutoCorrector + + MSG_REPEATED_ELEMENT = 'Duplicate element inside regexp character class' + + def on_regexp(node) + each_repeated_character_class_element_loc(node) do |loc| + add_offense(loc, message: MSG_REPEATED_ELEMENT) do |corrector| + corrector.remove(loc) + end + end + end + + def each_repeated_character_class_element_loc(node) + node.parsed_tree&.each_expression do |expr| + next if expr.type != :set || expr.token == :intersection + + seen = Set.new + + expr.expressions.each do |child| + next if within_interpolation?(node, child) + + child_source = child.to_s + + yield node.parsed_tree_expr_loc(child) if seen.include?(child_source) + + seen << child_source + end + end + end + + private + + # Since we blank interpolations with a space for every char of the interpolation, we would + # mark every space (except the first) as duplicate if we do not skip regexp_parser nodes + # that are within an interpolation. + def within_interpolation?(node, child) + @interpolation_locs ||= {} + + # Cache by loc, not by regexp content, as content can be repeated in multiple patterns + key = node.loc + + @interpolation_locs[key] ||= node.children.select(&:begin_type?).map do |interpolation| + interpolation.loc.expression + end + + @interpolation_locs[key].any? { |il| il.overlaps?(node.parsed_tree_expr_loc(child)) } + end + end + end + end +end diff --git a/lib/rubocop/ext/regexp_node.rb b/lib/rubocop/ext/regexp_node.rb index 197bb06bf9d..2318a0bcfda 100644 --- a/lib/rubocop/ext/regexp_node.rb +++ b/lib/rubocop/ext/regexp_node.rb @@ -38,6 +38,11 @@ def each_capture(named: ANY) self end + # @return [Parser::Source::Range] the range of the parse-tree expression + def parsed_tree_expr_loc(expr) + loc.begin.end.adjust(begin_pos: expr.ts, end_pos: expr.ts).resize(expr.full_length) + end + private def with_interpolations_blanked diff --git a/spec/rubocop/cop/style/duplicate_regexp_character_class_element_spec.rb b/spec/rubocop/cop/style/duplicate_regexp_character_class_element_spec.rb new file mode 100644 index 00000000000..ff8040c1ebf --- /dev/null +++ b/spec/rubocop/cop/style/duplicate_regexp_character_class_element_spec.rb @@ -0,0 +1,125 @@ +# frozen_string_literal: true + +RSpec.describe RuboCop::Cop::Style::DuplicateRegexpCharacterClassElement do + subject(:cop) { described_class.new } + + context 'with a repeated character class element' do + it 'registers an offense and corrects' do + expect_offense(<<~RUBY) + foo = /[xyx]/ + ^ Duplicate element inside regexp character class + RUBY + + expect_correction(<<~RUBY) + foo = /[xy]/ + RUBY + end + end + + context 'with no repeated character class elements' do + it 'registers an offense and corrects' do + expect_no_offenses(<<~RUBY) + foo = /[xyz]/ + RUBY + end + end + + context 'with repeated elements in different character classes' do + it 'registers an offense and corrects' do + expect_no_offenses(<<~RUBY) + foo = /[xyz][xyz]/ + RUBY + end + end + + context 'with a repeated character class element and %r{} literal' do + it 'registers an offense and corrects' do + expect_offense(<<~RUBY) + foo = %r{[xyx]} + ^ Duplicate element inside regexp character class + RUBY + + expect_correction(<<~RUBY) + foo = %r{[xy]} + RUBY + end + end + + context 'with a repeated character class element inside a group' do + it 'registers an offense and corrects' do + expect_offense(<<~RUBY) + foo = /([xyx])/ + ^ Duplicate element inside regexp character class + RUBY + + expect_correction(<<~RUBY) + foo = /([xy])/ + RUBY + end + end + + context 'with a repeated character posix character class inside a group' do + it 'registers an offense and corrects' do + expect_offense(<<~RUBY) + foo = /([[:alnum:]y[:alnum:]])/ + ^^^^^^^^^ Duplicate element inside regexp character class + RUBY + + expect_correction(<<~RUBY) + foo = /([[:alnum:]y])/ + RUBY + end + end + + context 'with a repeated character class element with interpolation' do + it 'registers an offense and corrects' do + expect_offense(<<~'RUBY') + foo = /([a#{foo}a#{bar}a])/ + ^ Duplicate element inside regexp character class + ^ Duplicate element inside regexp character class + RUBY + + expect_correction(<<~'RUBY') + foo = /([a#{foo}#{bar}])/ + RUBY + end + end + + context 'with a repeated range element' do + it 'registers an offense and corrects' do + expect_offense(<<~RUBY) + foo = /[0-9x0-9]/ + ^^^ Duplicate element inside regexp character class + RUBY + + expect_correction(<<~RUBY) + foo = /[0-9x]/ + RUBY + end + end + + context 'with a repeated intersection character class' do + it 'does not register an offense' do + expect_no_offenses(<<~RUBY) + foo = /[ab&&ab]/ + RUBY + end + end + + context 'with a range that covers a repeated element character class' do + it 'does not register an offense' do + expect_no_offenses(<<~RUBY) + foo = /[a-cb]/ + RUBY + end + end + + context 'with multiple regexps with the same interpolation' do + it 'does not register an offense' do + expect_no_offenses(<<~'RUBY') + a_field.gsub!(/[#{bad_chars}]/, '') + some_other_field.gsub!(/[#{bad_chars}]/, '') + RUBY + end + end +end diff --git a/spec/rubocop/ext/regexp_node_spec.rb b/spec/rubocop/ext/regexp_node_spec.rb index bf131068228..fe4eb11856e 100644 --- a/spec/rubocop/ext/regexp_node_spec.rb +++ b/spec/rubocop/ext/regexp_node_spec.rb @@ -97,4 +97,28 @@ end end end + + describe '#parsed_node_loc' do + let(:source) { '/([a-z]+)\d*\s?(?:foo)/' } + + it 'returns the correct loc for each node in the parsed_tree' do + loc_sources = node.parsed_tree.each_expression.map do |regexp_node| + node.parsed_tree_expr_loc(regexp_node).source + end + + expect(loc_sources).to eq( + [ + '([a-z]+)', + '[a-z]+', + 'a-z', + 'a', + 'z', + '\d*', + '\s?', + '(?:foo)', + 'foo' + ] + ) + end + end end