Skip to content

Commit

Permalink
Use regexp_parser to improve Style/RedundantRegexp... cops (#8625)
Browse files Browse the repository at this point in the history
As mentioned in #8593 by @marcandre
  • Loading branch information
owst committed Oct 4, 2020
1 parent 918f7bb commit 269ac4c
Show file tree
Hide file tree
Showing 7 changed files with 148 additions and 84 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Expand Up @@ -20,6 +20,7 @@
### Changes

* [#8803](https://github.com/rubocop-hq/rubocop/pull/8803): **(Breaking)** `RegexpNode#parsed_tree` now processes regexps including interpolation (by blanking the interpolation before parsing, rather than skipping). ([@owst][])
* [#8625](https://github.com/rubocop-hq/rubocop/pull/8625): Improve `Style/RedundantRegexpCharacterClass` and `Style/RedundantRegexpEscape` by using `regexp_parser` gem. ([@owst][])
* [#8646](https://github.com/rubocop-hq/rubocop/issues/8646): Faster find of all files in `TargetFinder` class which improves initial startup speed. ([@tleish][])
* [#8102](https://github.com/rubocop-hq/rubocop/issues/8102): Consider class length instead of block length for `Struct.new`. ([@tejasbubane][])

Expand Down
1 change: 0 additions & 1 deletion lib/rubocop.rb
Expand Up @@ -105,7 +105,6 @@
require_relative 'rubocop/cop/mixin/preceding_following_alignment'
require_relative 'rubocop/cop/mixin/preferred_delimiters'
require_relative 'rubocop/cop/mixin/rational_literal'
require_relative 'rubocop/cop/mixin/regexp_literal_help'
require_relative 'rubocop/cop/mixin/rescue_node'
require_relative 'rubocop/cop/mixin/safe_assignment'
require_relative 'rubocop/cop/mixin/space_after_punctuation'
Expand Down
43 changes: 0 additions & 43 deletions lib/rubocop/cop/mixin/regexp_literal_help.rb

This file was deleted.

63 changes: 39 additions & 24 deletions lib/rubocop/cop/style/redundant_regexp_character_class.rb
Expand Up @@ -22,32 +22,14 @@ module Style
# # good
# r = /[ab]/
class RedundantRegexpCharacterClass < Base
include MatchRange
include RegexpLiteralHelp
extend AutoCorrector

REQUIRES_ESCAPE_OUTSIDE_CHAR_CLASS_CHARS = '.*+?{}()|$'.chars.freeze
MSG_REDUNDANT_CHARACTER_CLASS = 'Redundant single-element character class, ' \
'`%<char_class>s` can be replaced with `%<element>s`.'

PATTERN = /
(
(?<!\\) # No \-prefix (i.e. not escaped)
\[ # Literal [
(?!\#\{) # Not (the start of) an interpolation
(?: # Either...
\\[^b] | # Any escaped character except b (which would change behaviour)
[^.*+?{}()|$] | # or one that doesn't require escaping outside the character class
\\[upP]\{[^}]+\} # or a unicode code-point or property
)
(?<!\\) # No \-prefix (i.e. not escaped)
\] # Literal ]
)
/x.freeze

def on_regexp(node)
each_redundant_character_class(node) do |loc|
next if whitespace_in_free_space_mode?(node, loc)

add_offense(
loc, message: format(
MSG_REDUNDANT_CHARACTER_CLASS,
Expand All @@ -63,19 +45,52 @@ def on_regexp(node)
private

def each_redundant_character_class(node)
pattern_source(node).scan(PATTERN) do
yield match_range(node.loc.begin.end, Regexp.last_match)
each_single_element_character_class(node) do |char_class|
next unless redundant_single_element_character_class?(node, char_class)

yield node.loc.begin.adjust(begin_pos: 1 + char_class.ts, end_pos: char_class.te)
end
end

def each_single_element_character_class(node)
node.parsed_tree&.each_expression do |expr|
next if expr.type != :set || expr.expressions.size != 1
next if expr.negative?
next if %i[set posixclass nonposixclass].include?(expr.expressions.first.type)

yield expr
end
end

def redundant_single_element_character_class?(node, char_class)
class_elem = char_class.expressions.first.text

non_redundant =
whitespace_in_free_space_mode?(node, class_elem) ||
backslash_b?(class_elem) ||
requires_escape_outside_char_class?(class_elem)

!non_redundant
end

def without_character_class(loc)
loc.source[1..-2]
end

def whitespace_in_free_space_mode?(node, loc)
return false unless freespace_mode_regexp?(node)
def whitespace_in_free_space_mode?(node, elem)
return false unless node.extended?

/\s/.match?(elem)
end

def backslash_b?(elem)
# \b's behaviour is different inside and outside of a character class, matching word
# boundaries outside but backspace (0x08) when inside.
elem == '\b'
end

/\[\s\]/.match?(loc.source)
def requires_escape_outside_char_class?(elem)
REQUIRES_ESCAPE_OUTSIDE_CHAR_CLASS_CHARS.include?(elem)
end
end
end
Expand Down
23 changes: 8 additions & 15 deletions lib/rubocop/cop/style/redundant_regexp_escape.rb
Expand Up @@ -34,7 +34,6 @@ module Style
# /[+\-]\d/
class RedundantRegexpEscape < Base
include RangeHelp
include RegexpLiteralHelp
extend AutoCorrector

MSG_REDUNDANT_ESCAPE = 'Redundant escape inside regexp literal'
Expand All @@ -59,9 +58,9 @@ def on_regexp(node)

def allowed_escape?(node, char, within_character_class)
# Strictly speaking a few single-letter metachars are currently
# unnecessary to "escape", e.g. g, i, E, F, but enumerating them is
# unnecessary to "escape", e.g. i, E, F, but enumerating them is
# rather difficult, and their behaviour could change over time with
# different versions of Ruby so that e.g. /\g/ != /g/
# different versions of Ruby so that e.g. /\i/ != /i/
return true if /[[:alnum:]]/.match?(char)
return true if ALLOWED_ALWAYS_ESCAPES.include?(char) || delimiter?(node, char)

Expand All @@ -82,19 +81,13 @@ def delimiter?(node, char)
end

def each_escape(node)
pattern_source(node).each_char.with_index.reduce(
[nil, 0]
) do |(previous, char_class_depth), (current, index)|
if previous == '\\'
yield [current, index - 1, !char_class_depth.zero?]

[nil, char_class_depth]
elsif previous == '['
[current, char_class_depth + 1]
elsif current == ']'
[current, char_class_depth - 1]
node.parsed_tree&.traverse&.reduce(0) do |char_class_depth, (event, expr)|
yield(expr.text[1], expr.ts, !char_class_depth.zero?) if expr.type == :escape

if expr.type == :set
char_class_depth + (event == :enter ? 1 : -1)
else
[current, char_class_depth]
char_class_depth
end
end
end
Expand Down
65 changes: 65 additions & 0 deletions spec/rubocop/cop/style/redundant_regexp_character_class_spec.rb
Expand Up @@ -30,6 +30,58 @@
end
end

context 'with a character class containing a single character inside a group' do
it 'registers an offense and corrects' do
expect_offense(<<~RUBY)
foo = /([a])/
^^^ Redundant single-element character class, `[a]` can be replaced with `a`.
RUBY

expect_correction(<<~RUBY)
foo = /(a)/
RUBY
end
end

context 'with a character class containing a single range' do
it 'does not register an offense' do
expect_no_offenses('foo = /[a-z]/')
end
end

context 'with a character class containing a posix bracket expression' do
it 'does not register an offense' do
expect_no_offenses('foo = /[[:alnum:]]/')
end
end

context 'with a character class containing a negated posix bracket expression' do
it 'does not register an offense' do
expect_no_offenses('foo = /[[:^alnum:]]/')
end
end

context 'with a character class containing set intersection' do
it 'does not register an offense' do
expect_no_offenses('foo = /[[:alnum:]&&a-d]/')
end
end

context "with a regexp containing invalid \g escape" do
it 'registers an offense and corrects' do
# See https://ruby-doc.org/core-2.7.1/Regexp.html#class-Regexp-label-Subexpression+Calls
# \g should be \g<name>
expect_offense(<<~'RUBY')
foo = /[a]\g/
^^^ Redundant single-element character class, `[a]` can be replaced with `a`.
RUBY

expect_correction(<<~'RUBY')
foo = /a\g/
RUBY
end
end

context 'with a character class containing an escaped ]' do
it 'registers an offense and corrects' do
expect_offense(<<~'RUBY')
Expand Down Expand Up @@ -224,6 +276,19 @@
end
end

context 'with a redundant character class after an interpolation' do
it 'registers an offense and corrects' do
expect_offense(<<~'RUBY')
foo = /#{x}[a]/
^^^ Redundant single-element character class, `[a]` can be replaced with `a`.
RUBY

expect_correction(<<~'RUBY')
foo = /#{x}a/
RUBY
end
end

context 'with a multi-line interpolation' do
it 'ignores offenses in the interpolated expression' do
expect_no_offenses(<<~'RUBY')
Expand Down
36 changes: 35 additions & 1 deletion spec/rubocop/cop/style/redundant_regexp_escape_spec.rb
Expand Up @@ -29,7 +29,7 @@
end

[
('a'..'z').to_a - %w[c n p u x],
('a'..'z').to_a - %w[c g k n p u x],
('A'..'Z').to_a - %w[C M P],
%w[n101 x41 u0041 u{0041} cc C-c p{alpha} P{alpha}]
].flatten.each do |escape|
Expand All @@ -46,6 +46,14 @@
end
end

context "with an invalid \g escape" do
it 'does not register an offense' do
# See https://ruby-doc.org/core-2.7.1/Regexp.html#class-Regexp-label-Subexpression+Calls
# \g should be \g<name>
expect_no_offenses('foo = /\g/')
end
end

context "with an escaped 'M-a' outside a character class" do
it 'does not register an offense' do
expect_no_offenses('foo = /\\M-a/n')
Expand Down Expand Up @@ -79,6 +87,19 @@
end
end

context "with an escaped '+' inside a character class inside a group" do
it 'registers an offense and corrects' do
expect_offense(<<~'RUBY')
foo = /([\+])/
^^ Redundant escape inside regexp literal
RUBY

expect_correction(<<~RUBY)
foo = /([+])/
RUBY
end
end

context 'with an escaped . inside a character class beginning with :' do
it 'registers an offense and corrects' do
expect_offense(<<~'RUBY')
Expand Down Expand Up @@ -421,6 +442,19 @@
end
end

context 'with a # inside a character class' do
it 'does not register an offense' do
# See https://github.com/rubocop-hq/rubocop/issues/8805 - the # inside the character class
# must not be treated as starting a comment (which makes the following \. redundant)
expect_no_offenses(<<~'RUBY')
regexp = %r{
\A[a-z#] # letters or #
\.[a-z]\z # dot + letters
}x
RUBY
end
end

context 'with redundantly-escaped slashes' do
it 'registers an offense and corrects' do
expect_offense(<<~'RUBY')
Expand Down

0 comments on commit 269ac4c

Please sign in to comment.