forked from rubocop/rubocop
/
select_by_regexp.rb
139 lines (121 loc) · 5.06 KB
/
select_by_regexp.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
# frozen_string_literal: true
module RuboCop
module Cop
module Style
# This cop looks for places where an subset of an Enumerable (array,
# range, set, etc.; see note below) is calculated based on a `Regexp`
# match, and suggests `grep` or `grep_v` instead.
#
# NOTE: Hashes do not behave as you may expect with `grep`, which
# means that `hash.grep` is not equivalent to `hash.select`. Although
# RuboCop is limited by static analysis, this cop attempts to avoid
# registering an offense when the receiver is a hash (hash literal,
# `Hash.new`, `Hash#[]`, or `to_h`/`to_hash`).
#
# NOTE: `grep` and `grep_v` were optimized when used without a block
# in Ruby 3.0, but may be slower in previous versions.
# See https://bugs.ruby-lang.org/issues/17030
#
# @safety
# Autocorrection is marked as unsafe because `MatchData` will
# not be created by `grep`, but may have previously been relied
# upon after the `match?` or `=~` call.
#
# Additionally, the cop cannot guarantee that the receiver of
# `select` or `reject` is actually an array by static analysis,
# so the correction may not be actually equivalent.
#
# @example
# # bad (select or find_all)
# array.select { |x| x.match? /regexp/ }
# array.select { |x| /regexp/.match?(x) }
# array.select { |x| x =~ /regexp/ }
# array.select { |x| /regexp/ =~ x }
#
# # bad (reject)
# array.reject { |x| x.match? /regexp/ }
# array.reject { |x| /regexp/.match?(x) }
# array.reject { |x| x =~ /regexp/ }
# array.reject { |x| /regexp/ =~ x }
#
# # good
# array.grep(regexp)
# array.grep_v(regexp)
class SelectByRegexp < Base
extend AutoCorrector
include RangeHelp
MSG = 'Prefer `%<replacement>s` to `%<original_method>s` with a regexp match.'
RESTRICT_ON_SEND = %i[select find_all reject].freeze
REPLACEMENTS = { select: 'grep', find_all: 'grep', reject: 'grep_v' }.freeze
REGEXP_METHODS = %i[match? =~].to_set.freeze
# @!method regexp_match?(node)
def_node_matcher :regexp_match?, <<~PATTERN
{
(block send (args (arg $_)) ${(send _ %REGEXP_METHODS _) match-with-lvasgn})
(numblock send $1 ${(send _ %REGEXP_METHODS _) match-with-lvasgn})
}
PATTERN
# Returns true if a node appears to return a hash
# @!method creates_hash?(node)
def_node_matcher :creates_hash?, <<~PATTERN
{
(send (const _ :Hash) {:new :[]} ...)
(block (send (const _ :Hash) :new ...) ...)
(send _ { :to_h :to_hash } ...)
}
PATTERN
# @!method calls_lvar?(node, name)
def_node_matcher :calls_lvar?, <<~PATTERN
{
(send (lvar %1) ...)
(send ... (lvar %1))
(match-with-lvasgn regexp (lvar %1))
}
PATTERN
def on_send(node)
return unless (block_node = node.block_node)
return if block_node.body.begin_type?
return if receiver_allowed?(block_node.receiver)
return unless (regexp_method_send_node = extract_send_node(block_node))
return if match_predicate_without_receiver?(regexp_method_send_node)
regexp = find_regexp(regexp_method_send_node, block_node)
register_offense(node, block_node, regexp)
end
private
def receiver_allowed?(node)
return false unless node
node.hash_type? || creates_hash?(node)
end
def register_offense(node, block_node, regexp)
replacement = REPLACEMENTS[node.method_name.to_sym]
message = format(MSG, replacement: replacement, original_method: node.method_name)
add_offense(block_node, message: message) do |corrector|
# Only correct if it can be determined what the regexp is
if regexp
range = range_between(node.loc.selector.begin_pos, block_node.loc.end.end_pos)
corrector.replace(range, "#{replacement}(#{regexp.source})")
end
end
end
def extract_send_node(block_node)
return unless (block_arg_name, regexp_method_send_node = regexp_match?(block_node))
block_arg_name = :"_#{block_arg_name}" if block_node.numblock_type?
return unless calls_lvar?(regexp_method_send_node, block_arg_name)
regexp_method_send_node
end
def find_regexp(node, block)
return node.child_nodes.first if node.match_with_lvasgn_type?
if node.receiver.lvar_type? &&
(block.numblock_type? || node.receiver.source == block.arguments.first.source)
node.first_argument
elsif node.first_argument.lvar_type?
node.receiver
end
end
def match_predicate_without_receiver?(node)
node.send_type? && node.method?(:match?) && node.receiver.nil?
end
end
end
end
end