From 269ac4c3f027eb60ce814e0c24633c4237ea174a Mon Sep 17 00:00:00 2001 From: Owen Stephens Date: Sun, 4 Oct 2020 11:33:29 +0100 Subject: [PATCH] Use `regexp_parser` to improve `Style/RedundantRegexp...` cops (#8625) As mentioned in #8593 by @marcandre --- CHANGELOG.md | 1 + lib/rubocop.rb | 1 - lib/rubocop/cop/mixin/regexp_literal_help.rb | 43 ------------ .../style/redundant_regexp_character_class.rb | 63 +++++++++++------- .../cop/style/redundant_regexp_escape.rb | 23 +++---- .../redundant_regexp_character_class_spec.rb | 65 +++++++++++++++++++ .../cop/style/redundant_regexp_escape_spec.rb | 36 +++++++++- 7 files changed, 148 insertions(+), 84 deletions(-) delete mode 100644 lib/rubocop/cop/mixin/regexp_literal_help.rb diff --git a/CHANGELOG.md b/CHANGELOG.md index 460a457cf2a..897aada697d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -20,6 +20,7 @@ ### Changes * [#8803](https://github.com/rubocop-hq/rubocop/pull/8803): **(Breaking)** `RegexpNode#parsed_tree` now processes regexps including interpolation (by blanking the interpolation before parsing, rather than skipping). ([@owst][]) +* [#8625](https://github.com/rubocop-hq/rubocop/pull/8625): Improve `Style/RedundantRegexpCharacterClass` and `Style/RedundantRegexpEscape` by using `regexp_parser` gem. ([@owst][]) * [#8646](https://github.com/rubocop-hq/rubocop/issues/8646): Faster find of all files in `TargetFinder` class which improves initial startup speed. ([@tleish][]) * [#8102](https://github.com/rubocop-hq/rubocop/issues/8102): Consider class length instead of block length for `Struct.new`. ([@tejasbubane][]) diff --git a/lib/rubocop.rb b/lib/rubocop.rb index 88984b1e257..5dc90731237 100644 --- a/lib/rubocop.rb +++ b/lib/rubocop.rb @@ -105,7 +105,6 @@ require_relative 'rubocop/cop/mixin/preceding_following_alignment' require_relative 'rubocop/cop/mixin/preferred_delimiters' require_relative 'rubocop/cop/mixin/rational_literal' -require_relative 'rubocop/cop/mixin/regexp_literal_help' require_relative 'rubocop/cop/mixin/rescue_node' require_relative 'rubocop/cop/mixin/safe_assignment' require_relative 'rubocop/cop/mixin/space_after_punctuation' diff --git a/lib/rubocop/cop/mixin/regexp_literal_help.rb b/lib/rubocop/cop/mixin/regexp_literal_help.rb deleted file mode 100644 index 5f00caa6b7b..00000000000 --- a/lib/rubocop/cop/mixin/regexp_literal_help.rb +++ /dev/null @@ -1,43 +0,0 @@ -# frozen_string_literal: true - -module RuboCop - module Cop - # Common functionality for handling Regexp literals. - module RegexpLiteralHelp - private - - def freespace_mode_regexp?(node) - regopt = node.children.find(&:regopt_type?) - - regopt.children.include?(:x) - end - - def pattern_source(node) - freespace_mode = freespace_mode_regexp?(node) - - node.children.reject(&:regopt_type?).map do |child| - source_with_comments_and_interpolations_blanked(child, freespace_mode) - end.join - end - - def source_with_comments_and_interpolations_blanked(child, freespace_mode) - source = child.source - - # We don't want to consider the contents of interpolations or free-space mode comments as - # part of the pattern source, but need to preserve their width, to allow offsets to - # correctly line up with the original source: spaces have no effect, and preserve width. - if child.begin_type? - replace_match_with_spaces(source, /.*/m) # replace all content - elsif freespace_mode - replace_match_with_spaces(source, /(?s` can be replaced with `%s`.' - PATTERN = / - ( - (? + expect_offense(<<~'RUBY') + foo = /[a]\g/ + ^^^ Redundant single-element character class, `[a]` can be replaced with `a`. + RUBY + + expect_correction(<<~'RUBY') + foo = /a\g/ + RUBY + end + end + context 'with a character class containing an escaped ]' do it 'registers an offense and corrects' do expect_offense(<<~'RUBY') @@ -224,6 +276,19 @@ end end + context 'with a redundant character class after an interpolation' do + it 'registers an offense and corrects' do + expect_offense(<<~'RUBY') + foo = /#{x}[a]/ + ^^^ Redundant single-element character class, `[a]` can be replaced with `a`. + RUBY + + expect_correction(<<~'RUBY') + foo = /#{x}a/ + RUBY + end + end + context 'with a multi-line interpolation' do it 'ignores offenses in the interpolated expression' do expect_no_offenses(<<~'RUBY') diff --git a/spec/rubocop/cop/style/redundant_regexp_escape_spec.rb b/spec/rubocop/cop/style/redundant_regexp_escape_spec.rb index f56596ec2b0..f081d983ded 100644 --- a/spec/rubocop/cop/style/redundant_regexp_escape_spec.rb +++ b/spec/rubocop/cop/style/redundant_regexp_escape_spec.rb @@ -29,7 +29,7 @@ end [ - ('a'..'z').to_a - %w[c n p u x], + ('a'..'z').to_a - %w[c g k n p u x], ('A'..'Z').to_a - %w[C M P], %w[n101 x41 u0041 u{0041} cc C-c p{alpha} P{alpha}] ].flatten.each do |escape| @@ -46,6 +46,14 @@ end end + context "with an invalid \g escape" do + it 'does not register an offense' do + # See https://ruby-doc.org/core-2.7.1/Regexp.html#class-Regexp-label-Subexpression+Calls + # \g should be \g + expect_no_offenses('foo = /\g/') + end + end + context "with an escaped 'M-a' outside a character class" do it 'does not register an offense' do expect_no_offenses('foo = /\\M-a/n') @@ -79,6 +87,19 @@ end end + context "with an escaped '+' inside a character class inside a group" do + it 'registers an offense and corrects' do + expect_offense(<<~'RUBY') + foo = /([\+])/ + ^^ Redundant escape inside regexp literal + RUBY + + expect_correction(<<~RUBY) + foo = /([+])/ + RUBY + end + end + context 'with an escaped . inside a character class beginning with :' do it 'registers an offense and corrects' do expect_offense(<<~'RUBY') @@ -421,6 +442,19 @@ end end + context 'with a # inside a character class' do + it 'does not register an offense' do + # See https://github.com/rubocop-hq/rubocop/issues/8805 - the # inside the character class + # must not be treated as starting a comment (which makes the following \. redundant) + expect_no_offenses(<<~'RUBY') + regexp = %r{ + \A[a-z#] # letters or # + \.[a-z]\z # dot + letters + }x + RUBY + end + end + context 'with redundantly-escaped slashes' do it 'registers an offense and corrects' do expect_offense(<<~'RUBY')