From fd65a6bb99f2cb0eef46b5e99824b725a2d34288 Mon Sep 17 00:00:00 2001 From: Owen Stephens Date: Mon, 26 Oct 2020 23:13:28 +0000 Subject: [PATCH] Add new `Lint/DuplicateRegexpCharacterClassElement` cop (#8896) --- CHANGELOG.md | 1 + config/default.yml | 5 + docs/modules/ROOT/pages/cops.adoc | 1 + docs/modules/ROOT/pages/cops_lint.adoc | 31 ++++ lib/rubocop.rb | 1 + ...uplicate_regexp_character_class_element.rb | 77 ++++++++++ lib/rubocop/ext/regexp_node.rb | 5 + ...ate_regexp_character_class_element_spec.rb | 138 ++++++++++++++++++ spec/rubocop/ext/regexp_node_spec.rb | 24 +++ 9 files changed, 283 insertions(+) create mode 100644 lib/rubocop/cop/lint/duplicate_regexp_character_class_element.rb create mode 100644 spec/rubocop/cop/lint/duplicate_regexp_character_class_element_spec.rb diff --git a/CHANGELOG.md b/CHANGELOG.md index 94c8b5dd1ed..3a9f751bf45 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,7 @@ ### New features +* [#8896](https://github.com/rubocop-hq/rubocop/pull/8896): Add new `Lint/DuplicateRegexpCharacterClassElement` cop. ([@owst][]) * [#8895](https://github.com/rubocop-hq/rubocop/pull/8895): Add new `Lint/EmptyBlock` cop. ([@fatkodima][]) * [#8934](https://github.com/rubocop-hq/rubocop/pull/8934): Add new `Style/SwapValues` cop. ([@fatkodima][]) * [#7549](https://github.com/rubocop-hq/rubocop/issues/7549): Add new `Style/ArgumentsForwarding` cop. ([@koic][]) diff --git a/config/default.yml b/config/default.yml index 8de50edd42b..763c178aa79 100644 --- a/config/default.yml +++ b/config/default.yml @@ -1454,6 +1454,11 @@ Lint/DuplicateMethods: Enabled: true VersionAdded: '0.29' +Lint/DuplicateRegexpCharacterClassElement: + Description: 'Checks for duplicate elements in Regexp character classes.' + Enabled: pending + VersionAdded: '1.1' + Lint/DuplicateRequire: Description: 'Check for duplicate `require`s and `require_relative`s.' Enabled: true diff --git a/docs/modules/ROOT/pages/cops.adoc b/docs/modules/ROOT/pages/cops.adoc index a5dc4552690..a761ca25e75 100644 --- a/docs/modules/ROOT/pages/cops.adoc +++ b/docs/modules/ROOT/pages/cops.adoc @@ -200,6 +200,7 @@ In the following section you find all available cops: * xref:cops_lint.adoc#lintduplicateelsifcondition[Lint/DuplicateElsifCondition] * xref:cops_lint.adoc#lintduplicatehashkey[Lint/DuplicateHashKey] * xref:cops_lint.adoc#lintduplicatemethods[Lint/DuplicateMethods] +* xref:cops_lint.adoc#lintduplicateregexpcharacterclasselement[Lint/DuplicateRegexpCharacterClassElement] * xref:cops_lint.adoc#lintduplicaterequire[Lint/DuplicateRequire] * xref:cops_lint.adoc#lintduplicaterescueexception[Lint/DuplicateRescueException] * xref:cops_lint.adoc#linteachwithobjectargument[Lint/EachWithObjectArgument] diff --git a/docs/modules/ROOT/pages/cops_lint.adoc b/docs/modules/ROOT/pages/cops_lint.adoc index e2857c5f618..fbd2182b35a 100644 --- a/docs/modules/ROOT/pages/cops_lint.adoc +++ b/docs/modules/ROOT/pages/cops_lint.adoc @@ -848,6 +848,37 @@ end alias bar foo ---- +== Lint/DuplicateRegexpCharacterClassElement + +|=== +| Enabled by default | Safe | Supports autocorrection | VersionAdded | VersionChanged + +| Pending +| Yes +| Yes +| 1.1 +| - +|=== + +This cop checks for duplicate elements in Regexp character classes. + +=== Examples + +[source,ruby] +---- +# bad +r = /[xyx]/ + +# bad +r = /[0-9x0-9]/ + +# good +r = /[xy]/ + +# good +r = /[0-9x]/ +---- + == Lint/DuplicateRequire |=== diff --git a/lib/rubocop.rb b/lib/rubocop.rb index a2c86564e8f..c41bb147219 100644 --- a/lib/rubocop.rb +++ b/lib/rubocop.rb @@ -262,6 +262,7 @@ require_relative 'rubocop/cop/lint/duplicate_elsif_condition' require_relative 'rubocop/cop/lint/duplicate_hash_key' require_relative 'rubocop/cop/lint/duplicate_methods' +require_relative 'rubocop/cop/lint/duplicate_regexp_character_class_element' require_relative 'rubocop/cop/lint/duplicate_require' require_relative 'rubocop/cop/lint/duplicate_rescue_exception' require_relative 'rubocop/cop/lint/each_with_object_argument' diff --git a/lib/rubocop/cop/lint/duplicate_regexp_character_class_element.rb b/lib/rubocop/cop/lint/duplicate_regexp_character_class_element.rb new file mode 100644 index 00000000000..3358af83fad --- /dev/null +++ b/lib/rubocop/cop/lint/duplicate_regexp_character_class_element.rb @@ -0,0 +1,77 @@ +# frozen_string_literal: true + +module RuboCop + module Cop + module Lint + # This cop checks for duplicate elements in Regexp character classes. + # + # @example + # + # # bad + # r = /[xyx]/ + # + # # bad + # r = /[0-9x0-9]/ + # + # # good + # r = /[xy]/ + # + # # good + # r = /[0-9x]/ + class DuplicateRegexpCharacterClassElement < Base + include RangeHelp + extend AutoCorrector + + MSG_REPEATED_ELEMENT = 'Duplicate element inside regexp character class' + + def on_regexp(node) + each_repeated_character_class_element_loc(node) do |loc| + add_offense(loc, message: MSG_REPEATED_ELEMENT) do |corrector| + corrector.remove(loc) + end + end + end + + def each_repeated_character_class_element_loc(node) + node.parsed_tree&.each_expression do |expr| + next if expr.type != :set || expr.token == :intersection + + seen = Set.new + + expr.expressions.each do |child| + next if within_interpolation?(node, child) + + child_source = child.to_s + + yield node.parsed_tree_expr_loc(child) if seen.include?(child_source) + + seen << child_source + end + end + end + + private + + # Since we blank interpolations with a space for every char of the interpolation, we would + # mark every space (except the first) as duplicate if we do not skip regexp_parser nodes + # that are within an interpolation. + def within_interpolation?(node, child) + parse_tree_child_loc = node.parsed_tree_expr_loc(child) + + interpolation_locs(node).any? { |il| il.overlaps?(parse_tree_child_loc) } + end + + def interpolation_locs(node) + @interpolation_locs ||= {} + + # Cache by loc, not by regexp content, as content can be repeated in multiple patterns + key = node.loc + + @interpolation_locs[key] ||= node.children.select(&:begin_type?).map do |interpolation| + interpolation.loc.expression + end + end + end + end + end +end diff --git a/lib/rubocop/ext/regexp_node.rb b/lib/rubocop/ext/regexp_node.rb index 197bb06bf9d..2318a0bcfda 100644 --- a/lib/rubocop/ext/regexp_node.rb +++ b/lib/rubocop/ext/regexp_node.rb @@ -38,6 +38,11 @@ def each_capture(named: ANY) self end + # @return [Parser::Source::Range] the range of the parse-tree expression + def parsed_tree_expr_loc(expr) + loc.begin.end.adjust(begin_pos: expr.ts, end_pos: expr.ts).resize(expr.full_length) + end + private def with_interpolations_blanked diff --git a/spec/rubocop/cop/lint/duplicate_regexp_character_class_element_spec.rb b/spec/rubocop/cop/lint/duplicate_regexp_character_class_element_spec.rb new file mode 100644 index 00000000000..be10a74a84a --- /dev/null +++ b/spec/rubocop/cop/lint/duplicate_regexp_character_class_element_spec.rb @@ -0,0 +1,138 @@ +# frozen_string_literal: true + +RSpec.describe RuboCop::Cop::Lint::DuplicateRegexpCharacterClassElement do + subject(:cop) { described_class.new } + + context 'with a repeated character class element' do + it 'registers an offense and corrects' do + expect_offense(<<~RUBY) + foo = /[xyx]/ + ^ Duplicate element inside regexp character class + RUBY + + expect_correction(<<~RUBY) + foo = /[xy]/ + RUBY + end + end + + context 'with a repeated character class element with quantifier' do + it 'registers an offense and corrects' do + expect_offense(<<~RUBY) + foo = /[xyx]+/ + ^ Duplicate element inside regexp character class + RUBY + + expect_correction(<<~RUBY) + foo = /[xy]+/ + RUBY + end + end + + context 'with no repeated character class elements' do + it 'registers an offense and corrects' do + expect_no_offenses(<<~RUBY) + foo = /[xyz]/ + RUBY + end + end + + context 'with repeated elements in different character classes' do + it 'registers an offense and corrects' do + expect_no_offenses(<<~RUBY) + foo = /[xyz][xyz]/ + RUBY + end + end + + context 'with a repeated character class element and %r{} literal' do + it 'registers an offense and corrects' do + expect_offense(<<~RUBY) + foo = %r{[xyx]} + ^ Duplicate element inside regexp character class + RUBY + + expect_correction(<<~RUBY) + foo = %r{[xy]} + RUBY + end + end + + context 'with a repeated character class element inside a group' do + it 'registers an offense and corrects' do + expect_offense(<<~RUBY) + foo = /([xyx])/ + ^ Duplicate element inside regexp character class + RUBY + + expect_correction(<<~RUBY) + foo = /([xy])/ + RUBY + end + end + + context 'with a repeated character posix character class inside a group' do + it 'registers an offense and corrects' do + expect_offense(<<~RUBY) + foo = /([[:alnum:]y[:alnum:]])/ + ^^^^^^^^^ Duplicate element inside regexp character class + RUBY + + expect_correction(<<~RUBY) + foo = /([[:alnum:]y])/ + RUBY + end + end + + context 'with a repeated character class element with interpolation' do + it 'registers an offense and corrects' do + expect_offense(<<~'RUBY') + foo = /([a#{foo}a#{bar}a])/ + ^ Duplicate element inside regexp character class + ^ Duplicate element inside regexp character class + RUBY + + expect_correction(<<~'RUBY') + foo = /([a#{foo}#{bar}])/ + RUBY + end + end + + context 'with a repeated range element' do + it 'registers an offense and corrects' do + expect_offense(<<~RUBY) + foo = /[0-9x0-9]/ + ^^^ Duplicate element inside regexp character class + RUBY + + expect_correction(<<~RUBY) + foo = /[0-9x]/ + RUBY + end + end + + context 'with a repeated intersection character class' do + it 'does not register an offense' do + expect_no_offenses(<<~RUBY) + foo = /[ab&&ab]/ + RUBY + end + end + + context 'with a range that covers a repeated element character class' do + it 'does not register an offense' do + expect_no_offenses(<<~RUBY) + foo = /[a-cb]/ + RUBY + end + end + + context 'with multiple regexps with the same interpolation' do + it 'does not register an offense' do + expect_no_offenses(<<~'RUBY') + a_field.gsub!(/[#{bad_chars}]/, '') + some_other_field.gsub!(/[#{bad_chars}]/, '') + RUBY + end + end +end diff --git a/spec/rubocop/ext/regexp_node_spec.rb b/spec/rubocop/ext/regexp_node_spec.rb index bf131068228..fe4eb11856e 100644 --- a/spec/rubocop/ext/regexp_node_spec.rb +++ b/spec/rubocop/ext/regexp_node_spec.rb @@ -97,4 +97,28 @@ end end end + + describe '#parsed_node_loc' do + let(:source) { '/([a-z]+)\d*\s?(?:foo)/' } + + it 'returns the correct loc for each node in the parsed_tree' do + loc_sources = node.parsed_tree.each_expression.map do |regexp_node| + node.parsed_tree_expr_loc(regexp_node).source + end + + expect(loc_sources).to eq( + [ + '([a-z]+)', + '[a-z]+', + 'a-z', + 'a', + 'z', + '\d*', + '\s?', + '(?:foo)', + 'foo' + ] + ) + end + end end