From c6ef6247e5ba3da21b2328d9973bef532007720c Mon Sep 17 00:00:00 2001 From: Marc-Andre Lafortune Date: Sat, 6 Apr 2019 17:43:30 -0400 Subject: [PATCH] NodePattern: Optimize by using placeholders for post processing This as multiple benefits: - allows more flexible handling of future multiple match patterns. Handling of '...' was already using a less flexible post-processing. - deals automatically and better with generated temporary variables - much less argument passing to the various compiling methods - factorizes the `#{'.type' if seq_head}` pattern that was everywhere. - makes debugging easier with simpler & nicer generated code --- lib/rubocop/node_pattern.rb | 239 ++++++++++++++++-------------- spec/rubocop/node_pattern_spec.rb | 22 +++ 2 files changed, 153 insertions(+), 108 deletions(-) diff --git a/lib/rubocop/node_pattern.rb b/lib/rubocop/node_pattern.rb index fe03f0747e2..fa923879796 100644 --- a/lib/rubocop/node_pattern.rb +++ b/lib/rubocop/node_pattern.rb @@ -125,6 +125,12 @@ class Compiler SEQ_HEAD_INDEX = -1 + # Placeholders while compiling, see with_..._context methods + CUR_PLACEHOLDER = '@@@cur'.freeze + CUR_NODE = "#{CUR_PLACEHOLDER} node@@@".freeze + CUR_ELEMENT = "#{CUR_PLACEHOLDER} element@@@".freeze + SEQ_HEAD_GUARD = '@@@seq guard head@@@'.freeze + def initialize(str, node_var = 'node0') @string = str @root = node_var @@ -140,33 +146,39 @@ def initialize(str, node_var = 'node0') def run(node_var) @tokens = Compiler.tokens(@string) - @match_code = compile_expr(node_var, false) + @match_code = with_context(compile_expr, node_var, use_temp_node: false) fail_due_to('unbalanced pattern') unless tokens.empty? end # rubocop:disable Metrics/MethodLength, Metrics/AbcSize - def compile_expr(cur_node, seq_head) + def compile_expr # read a single pattern-matching expression from the token stream, # return Ruby code which performs the corresponding matching operation - # on 'cur_node' (which is Ruby code which evaluates to an AST node) # # the 'pattern-matching' expression may be a composite which - # contains an arbitrary number of sub-expressions + # contains an arbitrary number of sub-expressions, but that composite + # must all have precedence higher or equal to that of `&&` + # + # Expressions may use placeholders like: + # CUR_NODE: Ruby code that evaluates to an AST node + # CUR_ELEMENT: Either the node or the type if in first element of + # a sequence (aka seq_head, e.g. "(seq_head first_node_arg ...") + token = tokens.shift case token - when '(' then compile_seq(cur_node, seq_head) - when '{' then compile_union(cur_node, seq_head) - when '[' then compile_intersect(cur_node, seq_head) - when '!' then compile_negation(cur_node, seq_head) - when '$' then compile_capture(cur_node, seq_head) - when '^' then compile_ascend(cur_node) - when WILDCARD then compile_wildcard(cur_node, token[1..-1], seq_head) - when FUNCALL then compile_funcall(cur_node, token, seq_head) - when LITERAL then compile_literal(cur_node, token, seq_head) - when PREDICATE then compile_predicate(cur_node, token, seq_head) - when NODE then compile_nodetype(cur_node, token) - when PARAM then compile_param(cur_node, token[1..-1], seq_head) + when '(' then compile_seq + when '{' then compile_union + when '[' then compile_intersect + when '!' then compile_negation + when '$' then compile_capture + when '^' then compile_ascend + when WILDCARD then compile_wildcard(token[1..-1]) + when FUNCALL then compile_funcall(token) + when LITERAL then compile_literal(token) + when PREDICATE then compile_predicate(token) + when NODE then compile_nodetype(token) + when PARAM then compile_param(token[1..-1]) when CLOSING then fail_due_to("#{token} in invalid position") when nil then fail_due_to('pattern ended prematurely') else fail_due_to("invalid token #{token.inspect}") @@ -174,44 +186,36 @@ def compile_expr(cur_node, seq_head) end # rubocop:enable Metrics/MethodLength, Metrics/AbcSize - def compile_seq(cur_node, seq_head) + def compile_seq fail_due_to('empty parentheses') if tokens.first == ')' - fail_due_to('parentheses at sequence head') if seq_head - # 'cur_node' is a Ruby expression which evaluates to an AST node, - # but we don't know how expensive it is - # to be safe, cache the node in a temp variable and then use the - # temp variable as 'cur_node' - with_temp_node(cur_node) do |init, temp_node| - terms = compile_seq_terms(temp_node) - terms.unshift(compile_guard_clause(temp_node)) - - join_terms(init, terms, " &&\n") - end + terms = compile_seq_terms + terms.unshift(compile_guard_clause) + terms.join(" &&\n") << SEQ_HEAD_GUARD end - def compile_guard_clause(cur_node) - "#{cur_node}.is_a?(RuboCop::AST::Node)" + def compile_guard_clause + "#{CUR_NODE}.is_a?(RuboCop::AST::Node)" end - def compile_seq_terms(cur_node) + def compile_seq_terms ret = - compile_seq_terms_with_size(cur_node) do |token, terms, index| + compile_seq_terms_with_size do |token, terms, index| capture = next_capture if token == CAPTURED_REST if capture || token == REST index = 0 if index == SEQ_HEAD_INDEX # Consider ($...) as (_ $...) - return compile_ellipsis(cur_node, terms, index, capture) + return compile_ellipsis(terms, index, capture) end end - ret << "(#{cur_node}.children.size == #{ret.size - 1})" + ret << "#{CUR_NODE}.children.size == #{ret.size - 1}" end - def compile_seq_terms_with_size(cur_node) + def compile_seq_terms_with_size index = SEQ_HEAD_INDEX terms = [] until tokens.first == ')' yield tokens.first, terms, index - term = compile_expr_with_index(cur_node, index) + term = compile_expr_with_index(index) index += 1 terms << term end @@ -220,61 +224,51 @@ def compile_seq_terms_with_size(cur_node) terms end - def compile_expr_with_index(cur_node, index) + def compile_expr_with_index(index) if index == SEQ_HEAD_INDEX - # in 'sequence head' position; some expressions are compiled - # differently at 'sequence head' (notably 'node type' expressions) - # grep for seq_head to see where it makes a difference - compile_expr(cur_node, true) + with_seq_head_context(compile_expr) else - child_node = "#{cur_node}.children[#{index}]" - compile_expr(child_node, false) + with_child_context(compile_expr, index) end end - def compile_ellipsis(cur_node, terms, index, capture = nil) + def compile_ellipsis(terms, index, capture = nil) tokens.shift # drop ellipsis - tail = compile_seq_tail(cur_node) - terms << "(#{cur_node}.children.size >= #{index + tail.size})" + tail = compile_seq_tail + terms << "#{CUR_NODE}.children.size >= #{index + tail.size}" terms.concat tail if capture range = index..-tail.size - 1 - terms << "(#{capture} = #{cur_node}.children[#{range}])" + terms << "(#{capture} = #{CUR_NODE}.children[#{range}])" end terms end - def compile_seq_tail(cur_node) - child_node = "#{cur_node}.children[%i]" + def compile_seq_tail terms = [] - until tokens.first == ')' - terms << compile_expr(child_node, false) - end + terms << compile_expr until tokens.first == ')' tokens.shift # drop ')' - # E.g. for terms.size == 3, we want to replace the three [%i] - # with [-3], [-2] and [-1] - terms.map.with_index { |term, i| format term, revindex: i - terms.size } + terms.map.with_index do |term, i| + with_child_context(term, i - terms.size) + end end - def compile_union(cur_node, seq_head) + def compile_union fail_due_to('empty union') if tokens.first == '}' - with_temp_node(cur_node) do |init, temp_node| - terms = union_terms(temp_node, seq_head) - join_terms(init, terms, ' || ') - end + union = union_terms.join(' || ') + "(#{union})" end - def union_terms(temp_node, seq_head) + def union_terms # we need to ensure that each branch of the {} contains the same # number of captures (since only one branch of the {} can actually # match, the same variables are used to hold the captures for each # branch) - compile_expr_with_captures(temp_node, seq_head) do |term, before, after| + compile_expr_with_captures do |term, before, after| terms = [term] until tokens.first == '}' - terms << compile_expr_with_capture_check(temp_node, - seq_head, before, after) + terms << compile_expr_with_capture_check(before, after) end tokens.shift @@ -282,17 +276,16 @@ def union_terms(temp_node, seq_head) end end - def compile_expr_with_captures(temp_node, seq_head) + def compile_expr_with_captures captures_before = @captures - expr = compile_expr(temp_node, seq_head) + expr = compile_expr yield expr, captures_before, @captures end - def compile_expr_with_capture_check(temp_node, seq_head, before, - after) + def compile_expr_with_capture_check(before, after) @captures = before - expr = compile_expr(temp_node, seq_head) + expr = compile_expr if @captures != after fail_due_to('each branch of {} must have same # of captures') end @@ -300,84 +293,77 @@ def compile_expr_with_capture_check(temp_node, seq_head, before, expr end - def compile_intersect(cur_node, seq_head) + def compile_intersect fail_due_to('empty intersection') if tokens.first == ']' - with_temp_node(cur_node) do |init, temp_node| - terms = [] - until tokens.first == ']' - terms << compile_expr(temp_node, seq_head) - end - tokens.shift + terms = [] + terms << compile_expr until tokens.first == ']' + tokens.shift - join_terms(init, terms, ' && ') - end + terms.join(' && ') end - def compile_capture(cur_node, seq_head) - "(#{next_capture} = #{cur_node}#{'.type' if seq_head}; " \ - "#{compile_expr(cur_node, seq_head)})" + def compile_capture + "(#{next_capture} = #{CUR_ELEMENT}; #{compile_expr})" end - def compile_negation(cur_node, seq_head) - "(!#{compile_expr(cur_node, seq_head)})" + def compile_negation + "!(#{compile_expr})" end - def compile_ascend(cur_node) - "(#{cur_node}.parent && " \ - "#{compile_expr("#{cur_node}.parent", false)})" + def compile_ascend + with_context("#{CUR_NODE} && #{compile_expr}", "#{CUR_NODE}.parent") end - def compile_wildcard(cur_node, name, seq_head) + def compile_wildcard(name) if name.empty? 'true' elsif @unify.key?(name) # we have already seen a wildcard with this name before # so the value it matched the first time will already be stored # in a temp. check if this value matches the one stored in the temp - "(#{cur_node}#{'.type' if seq_head} == temp#{@unify[name]})" + "#{CUR_ELEMENT} == temp#{@unify[name]}" else n = @unify[name] = next_temp_value # double assign to temp#{n} to avoid "assigned but unused variable" - "(temp#{n} = #{cur_node}#{'.type' if seq_head}; " \ + "(temp#{n} = #{CUR_ELEMENT}; " \ "temp#{n} = temp#{n}; true)" end end - def compile_literal(cur_node, literal, seq_head) - "(#{cur_node}#{'.type' if seq_head} == #{literal})" + def compile_literal(literal) + "#{CUR_ELEMENT} == #{literal}" end - def compile_predicate(cur_node, predicate, seq_head) + def compile_predicate(predicate) if predicate.end_with?('(') # is there an arglist? args = compile_args(tokens) predicate = predicate[0..-2] # drop the trailing ( - "(#{cur_node}#{'.type' if seq_head}.#{predicate}(#{args.join(',')}))" + "#{CUR_ELEMENT}.#{predicate}(#{args.join(',')})" else - "(#{cur_node}#{'.type' if seq_head}.#{predicate})" + "#{CUR_ELEMENT}.#{predicate}" end end - def compile_funcall(cur_node, method, seq_head) + def compile_funcall(method) # call a method in the context which this pattern-matching # code is used in. pass target value as an argument method = method[1..-1] # drop the leading # if method.end_with?('(') # is there an arglist? args = compile_args(tokens) method = method[0..-2] # drop the trailing ( - "(#{method}(#{cur_node}#{'.type' if seq_head},#{args.join(',')}))" + "#{method}(#{CUR_ELEMENT},#{args.join(',')})" else - "(#{method}(#{cur_node}#{'.type' if seq_head}))" + "#{method}(#{CUR_ELEMENT})" end end - def compile_nodetype(cur_node, type) - "(#{cur_node}.is_a?(RuboCop::AST::Node) && " \ - "#{cur_node}.#{type.tr('-', '_')}_type?)" + def compile_nodetype(type) + "#{compile_guard_clause} && #{CUR_NODE}.#{type.tr('-', '_')}_type?" end - def compile_param(cur_node, number, seq_head) - "(#{cur_node}#{'.type' if seq_head} == #{get_param(number)})" + def compile_param(number) + "#{CUR_ELEMENT} == #{get_param(number)}" end def compile_args(tokens) @@ -414,10 +400,6 @@ def get_param(number) number.zero? ? @root : "param#{number}" end - def join_terms(init, terms, operator) - "(#{init};#{terms.join(operator)})" - end - def emit_capture_list (1..@captures).map { |n| "capture#{n}" }.join(',') end @@ -454,9 +436,9 @@ def fail_due_to(message) def with_temp_node(cur_node) with_temp_variable do |temp_var| - # double assign to temp#{n} to avoid "assigned but unused variable" - yield "#{temp_var} = #{cur_node}; #{temp_var} = #{temp_var}", temp_var + yield "(#{temp_var} = #{cur_node})", temp_var end + .gsub("\n", "\n ") # Nicer indent for debugging end def with_temp_variable @@ -467,6 +449,47 @@ def next_temp_value @temps += 1 end + def auto_use_temp_node?(code) + code.scan(CUR_PLACEHOLDER).count > 1 + end + + # with_<...>_context methods are used whenever the context, + # i.e the current node or the current element can be determined. + + def with_child_context(code, child_index) + with_context(code, "#{CUR_NODE}.children[#{child_index}]") + end + + def with_context(code, cur_node, + use_temp_node: auto_use_temp_node?(code)) + if use_temp_node + with_temp_node(cur_node) do |init, temp_var| + substitute_cur_node(code, temp_var, first_cur_node: init) + end + else + substitute_cur_node(code, cur_node) + end + end + + def with_seq_head_context(code) + if code.include?(SEQ_HEAD_GUARD) + fail_due_to('parentheses at sequence head') + end + + code.gsub CUR_ELEMENT, "#{CUR_NODE}.type" + end + + def substitute_cur_node(code, cur_node, first_cur_node: cur_node) + iter = 0 + code + .gsub(CUR_ELEMENT, CUR_NODE) + .gsub(CUR_NODE) do + iter += 1 + iter == 1 ? first_cur_node : cur_node + end + .gsub(SEQ_HEAD_GUARD, '') + end + def self.tokens(pattern) pattern.scan(TOKEN).reject { |token| token =~ /\A#{SEPARATORS}\Z/ } end diff --git a/spec/rubocop/node_pattern_spec.rb b/spec/rubocop/node_pattern_spec.rb index 9368a7940a3..cbb0fe7b4aa 100644 --- a/spec/rubocop/node_pattern_spec.rb +++ b/spec/rubocop/node_pattern_spec.rb @@ -223,6 +223,21 @@ it_behaves_like 'matching' end + + describe 'bare literal' do + let(:ruby) { ':bar' } + let(:pattern) { ':bar' } + + context 'on a node' do + it_behaves_like 'nonmatching' + end + + context 'on a matching literal' do + let(:node) { root_node.children[0] } + + it_behaves_like 'matching' + end + end end describe 'nil' do @@ -473,6 +488,13 @@ it_behaves_like 'matching' end + context 'containing mixed node and literals' do + let(:pattern) { '(send {int nil?} ...)' } + let(:ruby) { 'obj' } + + it_behaves_like 'matching' + end + context 'containing multiple []' do let(:pattern) { '{[(int odd?) int] [!nil float]}' }