diff --git a/.github/workflows/rubocop.yml b/.github/workflows/rubocop.yml index 35927de4c..f9d181be7 100644 --- a/.github/workflows/rubocop.yml +++ b/.github/workflows/rubocop.yml @@ -74,7 +74,7 @@ jobs: run: bundle exec rake spec - name: internal investigation if: matrix.internal_investigation - run: bundle exec rake internal_investigation + run: bundle exec rake generate internal_investigation rubocop_specs: name: >- Main Gem Specs | RuboCop: ${{ matrix.rubocop }} | ${{ matrix.ruby }} (${{ matrix.os }}) @@ -98,6 +98,8 @@ jobs: ruby-version: ${{ matrix.ruby }} - name: install dependencies run: bundle install --jobs 3 --retry 3 + - name: generate lexer and parser + run: bundle exec rake generate - name: clone rubocop from source for full specs -- master if: matrix.rubocop == 'master' run: git clone --branch ${{ matrix.rubocop }} https://github.com/rubocop-hq/rubocop.git ../rubocop diff --git a/.gitignore b/.gitignore index be1d4d69e..778367d25 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,8 @@ +# generated parser / lexer +/lib/rubocop/ast/node_pattern/parser.racc.rb +/lib/rubocop/ast/node_pattern/parser.output +/lib/rubocop/ast/node_pattern/lexer.rex.rb + # rcov generated coverage coverage.data diff --git a/.rubocop.yml b/.rubocop.yml index 018e4ba80..0be2dabd2 100644 --- a/.rubocop.yml +++ b/.rubocop.yml @@ -13,6 +13,9 @@ AllCops: - 'spec/fixtures/**/*' - 'tmp/**/*' - '.git/**/*' + - 'lib/rubocop/ast/node_pattern/parser.racc.rb' + - 'lib/rubocop/ast/node_pattern/lexer.rex.rb' + - 'spec/rubocop/ast/node_pattern/parse_helper.rb' TargetRubyVersion: 2.4 Naming/PredicateName: diff --git a/.rubocop_todo.yml b/.rubocop_todo.yml index 99518ef9b..2aa934c16 100644 --- a/.rubocop_todo.yml +++ b/.rubocop_todo.yml @@ -32,7 +32,7 @@ Metrics/MethodLength: # Offense count: 1 # Configuration parameters: CountComments. Metrics/ModuleLength: - Max: 101 + Max: 108 # Offense count: 1 # Configuration parameters: ExpectMatchingDefinition, Regex, IgnoreExecutableScripts, AllowedAcronyms. @@ -65,6 +65,7 @@ RSpec/ContextWording: - 'spec/rubocop/ast/resbody_node_spec.rb' - 'spec/rubocop/ast/token_spec.rb' - 'spec/spec_helper.rb' + - 'spec/rubocop/ast/node_pattern/helper.rb' # Offense count: 6 # Configuration parameters: Max. @@ -73,6 +74,7 @@ RSpec/ExampleLength: - 'spec/rubocop/ast/node_pattern_spec.rb' - 'spec/rubocop/ast/processed_source_spec.rb' - 'spec/rubocop/ast/send_node_spec.rb' + - 'spec/rubocop/ast/node_pattern/parser_spec.rb' # Offense count: 6 RSpec/LeakyConstantDeclaration: diff --git a/Gemfile b/Gemfile index c4c2e780d..e9cce0d05 100644 --- a/Gemfile +++ b/Gemfile @@ -5,8 +5,10 @@ source 'https://rubygems.org' gemspec gem 'bump', require: false +gem 'oedipus_lex', require: false gem 'pry' -gem 'rake', '~> 12.0' +gem 'racc' +gem 'rake', '~> 13.0' gem 'rspec', '~> 3.7' local_ast = File.expand_path('../rubocop', __dir__) if Dir.exist? local_ast @@ -20,7 +22,6 @@ gem 'rubocop-rspec', '~> 1.0' # Stop upgrading SimpleCov until the following issue will be resolved. # https://github.com/codeclimate/test-reporter/issues/418 gem 'simplecov', '~> 0.10', '< 0.18' - local_gemfile = File.expand_path('Gemfile.local', __dir__) eval_gemfile local_gemfile if File.exist?(local_gemfile) diff --git a/Rakefile b/Rakefile index adba5a44f..53c412254 100644 --- a/Rakefile +++ b/Rakefile @@ -15,7 +15,7 @@ end require 'rspec/core/rake_task' -RSpec::Core::RakeTask.new(:spec) do |spec| +RSpec::Core::RakeTask.new(spec: :generate) do |spec| spec.pattern = FileList['spec/**/*_spec.rb'] end diff --git a/docs/modules/ROOT/pages/node_pattern_compiler.adoc b/docs/modules/ROOT/pages/node_pattern_compiler.adoc new file mode 100644 index 000000000..ee0447944 --- /dev/null +++ b/docs/modules/ROOT/pages/node_pattern_compiler.adoc @@ -0,0 +1,252 @@ += Hacker's guide to the `NodePattern` compiler + +This documentation is aimed at anyone wanting to understand / modify the `NodePattern` compiler. +It assumes some familiarity with the syntax of https://github.com/rubocop-hq/rubocop-ast/blob/master/doc/modules/ROOT/pages/node_pattern.md[`NodePattern`], as well as the AST produced by the `parser` gem. + +== High level view + +The `NodePattern` compiler uses the same techniques as the `parser` gem: + +* a `Lexer` that breaks source into tokens +* a `Parser` that uses tokens and a `Builder` to emit an AST +* a `Compiler` that converts this AST into Ruby code + +Example: + +* Pattern: `+(send nil? {:puts :p} $...)+` +* Tokens: `+'(', [:tNODE_TYPE, :send], [:tPREDICATE, :nil?], '{', ...+` +* AST: `+s(:sequence, s(:node_type, :send), s(:predicate, :nil?), s(:union, ...+` +* Ruby code: ++ +[source,ruby] +---- +node.is_a?(::RuboCop::AST::Node) && node.children.size >= 2 && +node.send_type? && +node.children[0].nil?() && +(union2 = node.children[1]; ... +---- + +The different parts are described below + +== Vocabulary + +*"node pattern"*: something that can be matched against a single `AST::Node`. +While `(int 42)` and `#is_fun?` both correspond to node patterns, `+...+` (without the parenthesis) is not a node pattern. + +*"sequence"*: a node pattern that describes the sequence of children of a node (and its type): `+(type first_child second_child ...)+` + +*"variadic"*: element of a sequence that can match a variable number of children. +`+(send _ int* ...)+` has two variadic elements (`int*` and `+...+`). +`(send _ :name)` contains no variadic element. +Note that a sequence is itself never variadic. + +*"atom"*: element of a pattern that corresponds with a simple Ruby object. +`(send nil? +:puts (str 'hello'))` has two atoms: `:puts` and `'hello'`. + +== Lexer + +The `lexer.rb` defines `Lexer` and has the few definitions needed for the lexer to work. +The bulk of the processing is in the inherited class that is generated by https://github.com/seattlerb/oedipus_lex[`oedipus_lex`] + +[discrete] +==== Rules + +https://github.com/seattlerb/oedipus_lex[`oedipus_lex`] generates the Ruby file `lexer.rex.rb` from the rules defined in `lexer.rex`. + +These rules map a Regexp to code that emits a token. + +`oedipus_lex` aims to be simple and the generated file is readable. +It uses https://ruby-doc.org/stdlib-2.7.1/libdoc/strscan/rdoc/StringScanner.html[`StringScanner`] behind the scene. +It selects the first rule that matches, contrary to many lexing tools that prioritize longest match. + +[discrete] +==== Tokens + +The `Lexer` emits tokens with types that are: + +* string for the syntactic symbols (e.g. +`'('`, `'$'`, `+'...'+`) +* symbols of the form `:tTOKEN_TYPE` for the rest (e.g. +`:tPREDICATE`) + +Tokens are stored as `[type, value]`, or `[type, [value, location]]` if locations are emitted. + +[discrete] +==== Generation + +Use `rake generate:lexer` to generate the `lexer.rex.rb` from `lexer.rex` file. +This is done automatically by `rake spec`. + +NOTE: the `lexer.rex.rb` is not under source control, but is included in the gem. + +== Parser + +Similarly to the `Lexer`, the `parser.rb` defines `Parser` and has the few definitions needed for the parser to work. +The bulk of the processing is in the inherited class `parser.racc.rb` that is generated by https://ruby-doc.org/stdlib-2.7.0/libdoc/racc/parser/rdoc/Racc.html#module-Racc-label-Writing+A+Racc+Grammar+File[`racc`] from the rules in `parser.y`. + +[discrete] +==== Nodes + +The `Parser` emits `NodePattern::Node` which are similar to RuboCop's node. +They both inherit from ``parser``'s `Parser::AST::Source::Node`, and share additional methods too. + +Like for RuboCop's nodes, some nodes have specicialized classes (e.g. +`Sequence`) while other nodes use the base class directly (e.g. +`s(:number, 42)`) + +[discrete] +==== Rules + +The rules follow closely the definitions above. +In particular a distinction between `node_pattern_list`, which is a list of node patterns (each term can match a single node), while the more generic `variadic_pattern_list` is a list of elements, some of which could be variadic, others simple node patterns. + +[discrete] +==== Generation + +Similarly to the lexer, use `rake generate:parser` to generate the `parser.racc.rb` from `parser.y` file. +This is done automatically by `rake spec`. + +NOTE: the `parser.racc.rb` is not under source control, but is included in the gem. + +== Compiler + +The compiler's core is the `Compiler` class. +It holds the global state (e.g. +references to named arguments). +The goal of the compiler is to produce `matching_code`, Ruby code that can be run against an `AST::Node`, or any Ruby object for that matter. + +Packaging of that `matching_code` into code for a `lambda`, or method `def` is handled separately by the `MethodDefiner` module. + +The compilation itself is handled by three subcompilers: + +* `NodePatternSubcompiler` +* `AtomSubcompiler` +* `SequenceSubcompiler` + +=== Visitors + +The subcompilers use the visitor pattern [https://en.wikipedia.org/wiki/Visitor_pattern] + +The methods starting with `visit_` are used to process the different types of nodes. +For a node of type `:capture`, the method `visit_capture` will be called, or if none is defined then `visit_other_type` will be called. + +No argument is passed, as the visited node is accessible with the `node` attribute reader. + +=== NodePatternSubcompiler + +Given any `NodePattern::Node`, it generates the Ruby code that can return `true` or `false` for the given node, or node type for sequence head. + +==== `var` vs `access` + +The subcompiler can be called with the current node stored either in a variable (provided with the `var:` keyword argument) or via a Ruby expression (e.g. +`access: 'current_node.children[2]'`). + +The subcompiler will not generate code that executes this `access` expression more than once or twice. +If it might access the node more than that, `multiple_access` will store the result in a temporary variable (e.g. +`union`). + +==== Sequences + +Sequences are the most difficult elements to handle and are deferred to the `SequenceSubcompiler`. + +==== Atoms + +Atoms are handled with `visit_other_type`, which defers to the `AtomSubcompiler` and converts that result to a node pattern by appending `=== cur_node` (or `=== cur_node.type` if in sequence head). + +This way, the two arguments in `(_ #func?(%1) %2)` would be compiled differently; +`%1` would be compiled as `param1`, while `%2` gets compiled as `param2 === node.children[1]`. + +==== Precedence + +The code generated has higher or equal precedence to `&&`, so as to make chaining convenient. + +=== AtomSubcompiler + +This subcompiler produces Ruby code that gets evaluated to a Ruby object. +E.g. +`"42"`, `:a_symbol`, `param1`. + +A good way to think about it is when it has to be passed as arguments to a function call. +For example: + +[source,ruby] +---- +# Pattern '#func(42, %1)' compiles to +func(node, 42, param1) +---- + +Note that any node pattern can be output by this subcompiler, but those that don't correspond to a Ruby literal will be output as a lambda so they can be combined. +For example: + +[source,ruby] +---- +# Pattern '#func(int)' compiles to +func(node, ->(compare) { compare.is_a?(::RuboCop::AST::Node) && compare.int_type? }) +---- + +=== SequenceSubcompiler + +The subcompiler compiles the sequences' terms in turn, keeping track of which children of the `AST::Node` are being matched. + +==== Variadic terms + +The complexity comes from variadic elements, which have complex processing _and_ may make it impossible to know at compile time which children are matched by the subsequent terms. + +*Example* (no variadic terms) + +---- +(_type int _ str) +---- + +First child must match `int`, third child must match `str`. +The subcompiler will use `children[0]` and `children[2]`. + +*Example* (one variadic terms) + +---- +(_type int _* str) +---- + +First child must match `int` and _last_ child must match `str`. +The subcompiler will use `children[0]` and `children[-1]`. + +*Example* (multiple variadic terms) + +---- +(_type int+ sym str+) +---- + +The subcompiler can not use any integer and `children[]` to match `sym`. +This must be tracked at runtime in a variable (`cur_index`). + +The subcompiler will use fixed indices before the first variadic element and after the last one. + +==== Node pattern terms + +The node pattern terms are delegated to the `NodePatternSubcompiler`. + +In the pattern `(:sym :sym)`, both `:sym` will be compiled differently because the first `:sym` is in "sequence head": `:sym === node.type` and `:sym == node.children[0]` respectively. +The subcompiler indicates if the pattern is in "sequence head" or not, so the `NodePatternSubcompiler` can produce the right code. + +Variadic elements may not (currently) cover the sequence head. +As a convenience, `+(...)+` is understood as `+(_ ...)+`. +Other types of nodes will raise an error (e.g. +`()`; +see `Node#in_sequence_head`) + +==== Precedence + +Like the node pattern subcompiler, it generates code that has higher or equal precedence to `&&`, so as to make chaining convenient. + +== Variant: WithMeta + +These variants of the Parser / Builder / Lexer generate `location` information (exactly like the `parser` gem) for AST nodes as well as comments with their locations (like the `parser` gem). + +Since this information is not typically used when one ony wants to define methods, it is not loaded by default. + +== Variant: Debug + +These variants of the Compiler / Subcompilers works by adding tracing code before and after each compilation of `NodePatternSubcompiler` and `SequenceSubcompiler`. +A unique ID is assigned to each node and the tracing code flips a corresponding switch when the expression is about to be evaluated, and after (joined with `&&` so it only flips the switch if the node was a match). +Atoms are not compiled differently as they are not really matchable (when not compiled as a node pattern) diff --git a/lib/rubocop/ast.rb b/lib/rubocop/ast.rb index a3e78f5c1..a4eb269fa 100644 --- a/lib/rubocop/ast.rb +++ b/lib/rubocop/ast.rb @@ -6,8 +6,20 @@ require_relative 'ast/ext/range' require_relative 'ast/ext/set' +require_relative 'ast/node_pattern/method_definer' require_relative 'ast/node_pattern' require_relative 'ast/node/mixin/descendence' +require_relative 'ast/node_pattern/builder' +require_relative 'ast/node_pattern/comment' +require_relative 'ast/node_pattern/compiler' +require_relative 'ast/node_pattern/compiler/subcompiler' +require_relative 'ast/node_pattern/compiler/atom_subcompiler' +require_relative 'ast/node_pattern/compiler/binding' +require_relative 'ast/node_pattern/compiler/node_pattern_subcompiler' +require_relative 'ast/node_pattern/compiler/sequence_subcompiler' +require_relative 'ast/node_pattern/lexer' +require_relative 'ast/node_pattern/node' +require_relative 'ast/node_pattern/parser' require_relative 'ast/sexp' require_relative 'ast/node' require_relative 'ast/node/mixin/method_identifier_predicates' @@ -67,3 +79,6 @@ require_relative 'ast/token' require_relative 'ast/traversal' require_relative 'ast/version' + +::RuboCop::AST::NodePattern::Parser.autoload :WithMeta, "#{__dir__}/ast/node_pattern/with_meta" +::RuboCop::AST::NodePattern::Compiler.autoload :Debug, "#{__dir__}/ast/node_pattern/compiler/debug" diff --git a/lib/rubocop/ast/node_pattern.rb b/lib/rubocop/ast/node_pattern.rb index c95b32f66..1254a5c35 100644 --- a/lib/rubocop/ast/node_pattern.rb +++ b/lib/rubocop/ast/node_pattern.rb @@ -1,13 +1,13 @@ # frozen_string_literal: true require 'delegate' -require 'erb' -# rubocop:disable Metrics/ClassLength, Metrics/CyclomaticComplexity module RuboCop module AST # This class performs a pattern-matching operation on an AST node. # + # Detailed syntax: /doc/modules/ROOT/pages/node_pattern.md + # # Initialize a new `NodePattern` with `NodePattern.new(pattern_string)`, then # pass an AST node to `NodePattern#match`. Alternatively, use one of the class # macros in `NodePattern::Macros` to define your own pattern-matching method. @@ -23,838 +23,7 @@ module AST # - With no block, but multiple captures: captures are returned as an array. # - With no block and no captures: #match returns `true`. # - # ## Pattern string format examples - # - # ':sym' # matches a literal symbol - # '1' # matches a literal integer - # 'nil' # matches a literal nil - # 'send' # matches (send ...) - # '(send)' # matches (send) - # '(send ...)' # matches (send ...) - # '(op-asgn)' # node types with hyphenated names also work - # '{send class}' # matches (send ...) or (class ...) - # '({send class})' # matches (send) or (class) - # '(send const)' # matches (send (const ...)) - # '(send _ :new)' # matches (send :new) - # '(send $_ :new)' # as above, but whatever matches the $_ is captured - # '(send $_ $_)' # you can use as many captures as you want - # '(send !const ...)' # ! negates the next part of the pattern - # '$(send const ...)' # arbitrary matching can be performed on a capture - # '(send _recv _msg)' # wildcards can be named (for readability) - # '(send ... :new)' # you can match against the last children - # '(array )' # you can match children in any order. This - # # would match `['x', :y]` as well as `[:y, 'x'] - # '(_ )' # will match if arguments have at least a `str` and - # # a `sym` node, but can have more. - # '(array <$str $_>)' # captures are in the order of the pattern, - # # irrespective of the actual order of the children - # '(array int*)' # will match an array of 0 or more integers - # '(array int ?)' # will match 0 or 1 integer. - # # Note: Space needed to distinguish from int? - # '(array int+)' # will match an array of 1 or more integers - # '(array (int $_)+)' # as above and will capture the numbers in an array - # '(send $...)' # capture all the children as an array - # '(send $... int)' # capture all children but the last as an array - # '(send _x :+ _x)' # unification is performed on named wildcards - # # (like Prolog variables...) - # # (#== is used to see if values unify) - # '(int odd?)' # words which end with a ? are predicate methods, - # # are are called on the target to see if it matches - # # any Ruby method which the matched object supports - # # can be used - # # if a truthy value is returned, the match succeeds - # '(int [!1 !2])' # [] contains multiple patterns, ALL of which must - # # match in that position - # # in other words, while {} is pattern union (logical - # # OR), [] is intersection (logical AND) - # '(send %1 _)' # % stands for a parameter which must be supplied to - # # #match at matching time - # # it will be compared to the corresponding value in - # # the AST using #=== so you can pass Procs, Regexp, - # # etc. in addition to Nodes or literals. - # # `Array#===` will never match a node element, but - # # `Set#===` is an alias to `Set#include?` (Ruby 2.5+ - # # only), and so can be very useful to match within - # # many possible literals / Nodes. - # # a bare '%' is the same as '%1' - # # the number of extra parameters passed to #match - # # must equal the highest % value in the pattern - # # for consistency, %0 is the 'root node' which is - # # passed as the 1st argument to #match, where the - # # matching process starts - # '(send _ %named)' # arguments can also be passed as named - # # parameters (see `%1`) - # # Note that the macros `def_node_matcher` and - # # `def_node_search` accept default values for these. - # '(send _ %CONST)' # the named constant will act like `%1` and `%named`. - # '^^send' # each ^ ascends one level in the AST - # # so this matches against the grandparent node - # '`send' # descends any number of level in the AST - # # so this matches against any descendant node - # '#method' # we call this a 'funcall'; it calls a method in the - # # context where a pattern-matching method is defined - # # if that returns a truthy value, the match succeeds - # 'equal?(%1)' # predicates can be given 1 or more extra args - # '#method(%0, 1)' # funcalls can also be given 1 or more extra args - # # These arguments can be patterns themselves, in - # # which case a matcher responding to === will be - # # passed. - # '# comment' # comments are accepted at the end of lines - # - # You can nest arbitrarily deep: - # - # # matches node parsed from 'Const = Class.new' or 'Const = Module.new': - # '(casgn nil? :Const (send (const nil? {:Class :Module}) :new))' - # # matches a node parsed from an 'if', with a '==' comparison, - # # and no 'else' branch: - # '(if (send _ :== _) _ nil?)' - # - # Note that patterns like 'send' are implemented by calling `#send_type?` on - # the node being matched, 'const' by `#const_type?`, 'int' by `#int_type?`, - # and so on. Therefore, if you add methods which are named like - # `#prefix_type?` to the AST node class, then 'prefix' will become usable as - # a pattern. class NodePattern - # @private - Invalid = Class.new(StandardError) - - # @private - # Builds Ruby code which implements a pattern - class Compiler - SYMBOL = %r{:(?:[\w+@*/?!<>=~|%^-]+|\[\]=?)}.freeze - IDENTIFIER = /[a-zA-Z_][a-zA-Z0-9_-]*/.freeze - COMMENT = /#\s.*$/.freeze - - META = Regexp.union( - %w"( ) { } [ ] $< < > $... $ ! ^ ` ... + * ?" - ).freeze - NUMBER = /-?\d+(?:\.\d+)?/.freeze - STRING = /".+?"/.freeze - METHOD_NAME = /\#?#{IDENTIFIER}[!?]?\(?/.freeze - PARAM_CONST = /%[A-Z:][a-zA-Z_:]+/.freeze - KEYWORD_NAME = /%[a-z_]+/.freeze - PARAM_NUMBER = /%\d*/.freeze - - SEPARATORS = /\s+/.freeze - ONLY_SEPARATOR = /\A#{SEPARATORS}\Z/.freeze - - TOKENS = Regexp.union(META, PARAM_CONST, KEYWORD_NAME, PARAM_NUMBER, NUMBER, - METHOD_NAME, SYMBOL, STRING) - - TOKEN = /\G(?:#{SEPARATORS}|#{TOKENS}|.)/.freeze - - NODE = /\A#{IDENTIFIER}\Z/.freeze - PREDICATE = /\A#{IDENTIFIER}\?\(?\Z/.freeze - WILDCARD = /\A_(?:#{IDENTIFIER})?\Z/.freeze - - FUNCALL = /\A\##{METHOD_NAME}/.freeze - LITERAL = /\A(?:#{SYMBOL}|#{NUMBER}|#{STRING})\Z/.freeze - PARAM = /\A#{PARAM_NUMBER}\Z/.freeze - CONST = /\A#{PARAM_CONST}\Z/.freeze - KEYWORD = /\A#{KEYWORD_NAME}\Z/.freeze - CLOSING = /\A(?:\)|\}|\])\Z/.freeze - - REST = '...' - CAPTURED_REST = '$...' - - attr_reader :match_code, :tokens, :captures - - SEQ_HEAD_INDEX = -1 - - # Placeholders while compiling, see with_..._context methods - CUR_PLACEHOLDER = '@@@cur' - CUR_NODE = "#{CUR_PLACEHOLDER} node@@@" - CUR_ELEMENT = "#{CUR_PLACEHOLDER} element@@@" - SEQ_HEAD_GUARD = '@@@seq guard head@@@' - MULTIPLE_CUR_PLACEHOLDER = /#{CUR_PLACEHOLDER}.*#{CUR_PLACEHOLDER}/.freeze - - line = __LINE__ - ANY_ORDER_TEMPLATE = ERB.new <<~RUBY.gsub("-%>\n", '%>') - <% if capture_rest %>(<%= capture_rest %> = []) && <% end -%> - <% if capture_all %>(<%= capture_all %> = <% end -%> - <%= CUR_NODE %>.children[<%= range %>]<% if capture_all %>)<% end -%> - .each_with_object({}) { |<%= child %>, <%= matched %>| - case - <% patterns.each_with_index do |pattern, i| -%> - when !<%= matched %>[<%= i %>] && <%= - with_context(pattern, child, use_temp_node: false) - %> then <%= matched %>[<%= i %>] = true - <% end -%> - <% if !rest %> else break({}) - <% elsif capture_rest %> else <%= capture_rest %> << <%= child %> - <% end -%> - end - }.size == <%= patterns.size -%> - RUBY - ANY_ORDER_TEMPLATE.location = [__FILE__, line + 1] - - line = __LINE__ - REPEATED_TEMPLATE = ERB.new <<~RUBY.gsub("-%>\n", '%>') - <% if captured %>(<%= accumulate %> = Array.new) && <% end %> - <%= CUR_NODE %>.children[<%= range %>].all? do |<%= child %>| - <%= with_context(expr, child, use_temp_node: false) %><% if captured %>&& - <%= accumulate %>.push(<%= captured %>)<% end %> - end <% if captured %>&& - (<%= captured %> = if <%= accumulate %>.empty? - <%= captured %>.map{[]} # Transpose hack won't work for empty case - else - <%= accumulate %>.transpose - end) <% end -%> - RUBY - REPEATED_TEMPLATE.location = [__FILE__, line + 1] - - def initialize(str, root = 'node0', node_var = root) - @string = str - # For def_node_matcher, root == node_var - # For def_node_search, root is the root node to search on, - # and node_var is the current descendant being searched. - @root = root - @node_var = node_var - - @temps = 0 # avoid name clashes between temp variables - @captures = 0 # number of captures seen - @unify = {} # named wildcard -> temp variable - @params = 0 # highest % (param) number seen - @keywords = Set[] # keyword parameters seen - run - end - - def run - @tokens = Compiler.tokens(@string) - - @match_code = with_context(compile_expr, @node_var, use_temp_node: false) - @match_code.prepend("(captures = Array.new(#{@captures})) && ") \ - if @captures.positive? - - fail_due_to('unbalanced pattern') unless tokens.empty? - end - - # rubocop:disable Metrics/MethodLength, Metrics/AbcSize - def compile_expr(token = tokens.shift) - # read a single pattern-matching expression from the token stream, - # return Ruby code which performs the corresponding matching operation - # - # the 'pattern-matching' expression may be a composite which - # contains an arbitrary number of sub-expressions, but that composite - # must all have precedence higher or equal to that of `&&` - # - # Expressions may use placeholders like: - # CUR_NODE: Ruby code that evaluates to an AST node - # CUR_ELEMENT: Either the node or the type if in first element of - # a sequence (aka seq_head, e.g. "(seq_head first_node_arg ...") - if (atom = compile_atom(token)) - return atom_to_expr(atom) - end - - case token - when '(' then compile_seq - when '{' then compile_union - when '[' then compile_intersect - when '!' then compile_negation - when '$' then compile_capture - when '^' then compile_ascend - when '`' then compile_descend - when WILDCARD then compile_new_wildcard(token[1..-1]) - when FUNCALL then compile_funcall(token) - when PREDICATE then compile_predicate(token) - when NODE then compile_nodetype(token) - else fail_due_to("invalid token #{token.inspect}") - end - end - # rubocop:enable Metrics/MethodLength, Metrics/AbcSize - - def tokens_until(stop, what) - return to_enum __method__, stop, what unless block_given? - - fail_due_to("empty #{what}") if tokens.first == stop - yield until tokens.first == stop - tokens.shift - end - - def compile_seq - terms = tokens_until(')', 'sequence').map { variadic_seq_term } - Sequence.new(self, *terms).compile - end - - def compile_guard_clause - "#{CUR_NODE}.is_a?(RuboCop::AST::Node)" - end - - def variadic_seq_term - token = tokens.shift - case token - when CAPTURED_REST then compile_captured_ellipsis - when REST then compile_ellipsis - when '$<' then compile_any_order(next_capture) - when '<' then compile_any_order - else compile_repeated_expr(token) - end - end - - def compile_repeated_expr(token) - before = @captures - expr = compile_expr(token) - min, max = parse_repetition_token - return [1, expr] if min.nil? - - if @captures != before - captured = "captures[#{before}...#{@captures}]" - accumulate = next_temp_variable(:accumulate) - end - arity = min..max || Float::INFINITY - - [arity, repeated_generator(expr, captured, accumulate)] - end - - def repeated_generator(expr, captured, accumulate) - with_temp_variables do |child| - lambda do |range| - fail_due_to 'repeated pattern at beginning of sequence' if range.begin == SEQ_HEAD_INDEX - REPEATED_TEMPLATE.result(binding) - end - end - end - - def parse_repetition_token - case tokens.first - when '*' then min = 0 - when '+' then min = 1 - when '?' then min = 0 - max = 1 - else return - end - tokens.shift - [min, max] - end - - # @private - # Builds Ruby code for a sequence - # (head *first_terms variadic_term *last_terms) - class Sequence - extend Forwardable - def_delegators :@compiler, :compile_guard_clause, :with_seq_head_context, - :with_child_context, :fail_due_to - - def initialize(compiler, *arity_term_list) - @arities, @terms = arity_term_list.transpose - - @compiler = compiler - @variadic_index = @arities.find_index { |a| a.is_a?(Range) } - fail_due_to 'multiple variable patterns in same sequence' \ - if @variadic_index && !@arities.one? { |a| a.is_a?(Range) } - end - - def compile - [ - compile_guard_clause, - compile_child_nb_guard, - compile_seq_head, - *compile_first_terms, - compile_variadic_term, - *compile_last_terms - ].compact.join(" &&\n") << SEQ_HEAD_GUARD - end - - private - - def first_terms_arity - first_terms_range { |r| @arities[r].inject(0, :+) } || 0 - end - - def last_terms_arity - last_terms_range { |r| @arities[r].inject(0, :+) } || 0 - end - - def variadic_term_min_arity - @variadic_index ? @arities[@variadic_index].begin : 0 - end - - def first_terms_range - yield 1..(@variadic_index || @terms.size) - 1 if seq_head? - end - - def last_terms_range - yield @variadic_index + 1...@terms.size if @variadic_index - end - - def seq_head? - @variadic_index != 0 - end - - def compile_child_nb_guard - fixed = first_terms_arity + last_terms_arity - min = fixed + variadic_term_min_arity - op = if @variadic_index - max_variadic = @arities[@variadic_index].end - if max_variadic != Float::INFINITY - range = min..fixed + max_variadic - return "(#{range}).cover?(#{CUR_NODE}.children.size)" - end - '>=' - else - '==' - end - "#{CUR_NODE}.children.size #{op} #{min}" - end - - def term(index, range) - t = @terms[index] - if t.respond_to? :call - t.call(range) - else - with_child_context(t, range.begin) - end - end - - def compile_seq_head - return unless seq_head? - - fail_due_to 'sequences cannot start with <' \ - if @terms[0].respond_to? :call - - with_seq_head_context(@terms[0]) - end - - def compile_first_terms - first_terms_range { |range| compile_terms(range, 0) } - end - - def compile_last_terms - last_terms_range { |r| compile_terms(r, -last_terms_arity) } - end - - def compile_terms(index_range, start) - index_range.map do |i| - current = start - start += @arities.fetch(i) - term(i, current..start - 1) - end - end - - def compile_variadic_term - variadic_arity { |arity| term(@variadic_index, arity) } - end - - def variadic_arity - return unless @variadic_index - - first = @variadic_index.positive? ? first_terms_arity : SEQ_HEAD_INDEX - yield first..-last_terms_arity - 1 - end - end - private_constant :Sequence - - def compile_captured_ellipsis - capture = next_capture - block = lambda { |range| - # Consider ($...) like (_ $...): - range = 0..range.end if range.begin == SEQ_HEAD_INDEX - "(#{capture} = #{CUR_NODE}.children[#{range}])" - } - [0..Float::INFINITY, block] - end - - def compile_ellipsis - [0..Float::INFINITY, 'true'] - end - - # rubocop:disable Metrics/MethodLength - def compile_any_order(capture_all = nil) - rest = capture_rest = nil - patterns = [] - with_temp_variables do |child, matched| - tokens_until('>', 'any child') do - fail_due_to 'ellipsis must be at the end of <>' if rest - token = tokens.shift - case token - when CAPTURED_REST then rest = capture_rest = next_capture - when REST then rest = true - else patterns << compile_expr(token) - end - end - [rest ? patterns.size..Float::INFINITY : patterns.size, - ->(range) { ANY_ORDER_TEMPLATE.result(binding) }] - end - end - # rubocop:enable Metrics/MethodLength - - def insure_same_captures(enum, what) - return to_enum __method__, enum, what unless block_given? - - captures_before = captures_after = nil - enum.each do - captures_before ||= @captures - @captures = captures_before - yield - captures_after ||= @captures - fail_due_to("each #{what} must have same # of captures") if captures_after != @captures - end - end - - def access_unify(name) - var = @unify[name] - - if var == :forbidden_unification - fail_due_to "Wildcard #{name} was first seen in a subset of a" \ - " union and can't be used outside that union" - end - var - end - - def forbid_unification(*names) - names.each do |name| - @unify[name] = :forbidden_unification - end - end - - # rubocop:disable Metrics/MethodLength, Metrics/AbcSize - def unify_in_union(enum) - # We need to reset @unify before each branch is processed. - # Moreover we need to keep track of newly encountered wildcards. - # Var `new_unify_intersection` will hold those that are encountered - # in all branches; these are not a problem. - # Var `partial_unify` will hold those encountered in only a subset - # of the branches; these can't be used outside of the union. - - return to_enum __method__, enum unless block_given? - - new_unify_intersection = nil - partial_unify = [] - unify_before = @unify.dup - - result = enum.each do |e| - @unify = unify_before.dup if new_unify_intersection - yield e - new_unify = @unify.keys - unify_before.keys - if new_unify_intersection.nil? - # First iteration - new_unify_intersection = new_unify - else - union = new_unify_intersection | new_unify - new_unify_intersection &= new_unify - partial_unify |= union - new_unify_intersection - end - end - - # At this point, all members of `new_unify_intersection` can be used - # for unification outside of the union, but partial_unify may not - - forbid_unification(*partial_unify) - - result - end - # rubocop:enable Metrics/MethodLength, Metrics/AbcSize - - def compile_union - # we need to ensure that each branch of the {} contains the same - # number of captures (since only one branch of the {} can actually - # match, the same variables are used to hold the captures for each - # branch) - enum = tokens_until('}', 'union') - enum = unify_in_union(enum) - terms = insure_same_captures(enum, 'branch of {}') - .map { compile_expr } - - "(#{terms.join(' || ')})" - end - - def compile_intersect - tokens_until(']', 'intersection') - .map { compile_expr } - .join(' && ') - end - - def compile_capture - "(#{next_capture} = #{CUR_ELEMENT}; #{compile_expr})" - end - - def compile_negation - "!(#{compile_expr})" - end - - def compile_ascend - with_context("#{CUR_NODE} && #{compile_expr}", "#{CUR_NODE}.parent") - end - - def compile_descend - with_temp_variables do |descendant| - pattern = with_context(compile_expr, descendant, - use_temp_node: false) - [ - "RuboCop::AST::NodePattern.descend(#{CUR_ELEMENT}).", - "any? do |#{descendant}|", - " #{pattern}", - 'end' - ].join("\n") - end - end - - # Known wildcards are considered atoms, see `compile_atom` - def compile_new_wildcard(name) - return 'true' if name.empty? - - n = @unify[name] = "unify_#{name.gsub('-', '__')}" - # double assign to avoid "assigned but unused variable" - "(#{n} = #{CUR_ELEMENT}; #{n} = #{n}; true)" - end - - def compile_predicate(predicate) - if predicate.end_with?('(') # is there an arglist? - args = compile_args - predicate = predicate[0..-2] # drop the trailing ( - "#{CUR_ELEMENT}.#{predicate}(#{args.join(',')})" - else - "#{CUR_ELEMENT}.#{predicate}" - end - end - - def compile_funcall(method) - # call a method in the context which this pattern-matching - # code is used in. pass target value as an argument - method = method[1..-1] # drop the leading # - if method.end_with?('(') # is there an arglist? - args = compile_args - method = method[0..-2] # drop the trailing ( - "#{method}(#{CUR_ELEMENT},#{args.join(',')})" - else - "#{method}(#{CUR_ELEMENT})" - end - end - - def compile_nodetype(type) - "#{compile_guard_clause} && #{CUR_NODE}.#{type.tr('-', '_')}_type?" - end - - def compile_args - tokens_until(')', 'call arguments').map do - arg = compile_arg - tokens.shift if tokens.first == ',' - arg - end - end - - def atom_to_expr(atom) - "#{atom} === #{CUR_ELEMENT}" - end - - def expr_to_atom(expr) - with_temp_variables do |compare| - in_context = with_context(expr, compare, use_temp_node: false) - "::RuboCop::AST::NodePattern::Matcher.new{|#{compare}| #{in_context}}" - end - end - - # @return compiled atom (e.g. ":literal" or "SOME_CONST") - # or nil if not a simple atom (unknown wildcard, other tokens) - def compile_atom(token) - case token - when WILDCARD then access_unify(token[1..-1]) # could be nil - when LITERAL then token - when KEYWORD then get_keyword(token[1..-1]) - when CONST then get_const(token[1..-1]) - when PARAM then get_param(token[1..-1]) - when CLOSING then fail_due_to("#{token} in invalid position") - when nil then fail_due_to('pattern ended prematurely') - end - end - - def compile_arg - token = tokens.shift - compile_atom(token) || expr_to_atom(compile_expr(token)) - end - - def next_capture - index = @captures - @captures += 1 - "captures[#{index}]" - end - - def get_param(number) - number = number.empty? ? 1 : Integer(number) - @params = number if number > @params - number.zero? ? @root : "param#{number}" - end - - def get_keyword(name) - @keywords << name - name - end - - def get_const(const) - const # Output the constant exactly as given - end - - def emit_yield_capture(when_no_capture = '') - yield_val = if @captures.zero? - when_no_capture - elsif @captures == 1 - 'captures[0]' # Circumvent https://github.com/jruby/jruby/issues/5710 - else - '*captures' - end - "yield(#{yield_val})" - end - - def emit_retval - if @captures.zero? - 'true' - elsif @captures == 1 - 'captures[0]' - else - 'captures' - end - end - - def emit_param_list - (1..@params).map { |n| "param#{n}" }.join(',') - end - - def emit_keyword_list(forwarding: false) - pattern = "%s: #{'%s' if forwarding}" - @keywords.map { |k| format(pattern, keyword: k) }.join(',') - end - - def emit_params(*first, forwarding: false) - params = emit_param_list - keywords = emit_keyword_list(forwarding: forwarding) - [*first, params, keywords].reject(&:empty?).join(',') - end - - def emit_method_code - <<~RUBY - return unless #{@match_code} - block_given? ? #{emit_yield_capture} : (return #{emit_retval}) - RUBY - end - - def fail_due_to(message) - raise Invalid, "Couldn't compile due to #{message}. Pattern: #{@string}" - end - - def with_temp_node(cur_node) - with_temp_variables do |node| - yield "(#{node} = #{cur_node})", node - end - .gsub("\n", "\n ") # Nicer indent for debugging - end - - def with_temp_variables(&block) - names = block.parameters.map { |_, name| next_temp_variable(name) } - yield(*names) - end - - def next_temp_variable(name) - "#{name}#{next_temp_value}" - end - - def next_temp_value - @temps += 1 - end - - def auto_use_temp_node?(code) - code.match?(MULTIPLE_CUR_PLACEHOLDER) - end - - # with_<...>_context methods are used whenever the context, - # i.e the current node or the current element can be determined. - - def with_child_context(code, child_index) - with_context(code, "#{CUR_NODE}.children[#{child_index}]") - end - - def with_context(code, cur_node, - use_temp_node: auto_use_temp_node?(code)) - if use_temp_node - with_temp_node(cur_node) do |init, temp_var| - substitute_cur_node(code, temp_var, first_cur_node: init) - end - else - substitute_cur_node(code, cur_node) - end - end - - def with_seq_head_context(code) - fail_due_to('parentheses at sequence head') if code.include?(SEQ_HEAD_GUARD) - - code.gsub CUR_ELEMENT, "#{CUR_NODE}.type" - end - - def substitute_cur_node(code, cur_node, first_cur_node: cur_node) - iter = 0 - code - .gsub(CUR_ELEMENT, CUR_NODE) - .gsub(CUR_NODE) do - iter += 1 - iter == 1 ? first_cur_node : cur_node - end - .gsub(SEQ_HEAD_GUARD, '') - end - - def self.tokens(pattern) - pattern.gsub(COMMENT, '').scan(TOKEN).grep_v(ONLY_SEPARATOR) - end - - # This method minimizes the closure for our method - def wrapping_block(method_name, **defaults) - proc do |*args, **values| - send method_name, *args, **defaults, **values - end - end - - def def_helper(base, method_name, **defaults) - location = caller_locations(3, 1).first - unless defaults.empty? - call = :"without_defaults_#{method_name}" - base.send :define_method, method_name, &wrapping_block(call, **defaults) - method_name = call - end - src = yield method_name - base.class_eval(src, location.path, location.lineno) - end - - def def_node_matcher(base, method_name, **defaults) - def_helper(base, method_name, **defaults) do |name| - <<~RUBY - def #{name}(#{emit_params('node = self')}) - #{emit_method_code} - end - RUBY - end - end - - def def_node_search(base, method_name, **defaults) - def_helper(base, method_name, **defaults) do |name| - emit_node_search(name) - end - end - - def emit_node_search(method_name) - if method_name.to_s.end_with?('?') - on_match = 'return true' - else - args = emit_params(":#{method_name}", @root, forwarding: true) - prelude = "return enum_for(#{args}) unless block_given?\n" - on_match = emit_yield_capture(@node_var) - end - emit_node_search_body(method_name, prelude: prelude, on_match: on_match) - end - - def emit_node_search_body(method_name, prelude:, on_match:) - <<~RUBY - def #{method_name}(#{emit_params(@root)}) - #{prelude} - #{@root}.each_node do |#{@node_var}| - if #{match_code} - #{on_match} - end - end - nil - end - RUBY - end - end - private_constant :Compiler - # Helpers for defining methods based on a pattern string module Macros # Define a method which applies a pattern to an AST node @@ -865,8 +34,7 @@ module Macros # If the node matches, and no block is provided, the new method will # return the captures, or `true` if there were none. def def_node_matcher(method_name, pattern_str, **keyword_defaults) - Compiler.new(pattern_str, 'node') - .def_node_matcher(self, method_name, **keyword_defaults) + NodePattern.new(pattern_str).def_node_matcher(self, method_name, **keyword_defaults) end # Define a method which recurses over the descendants of an AST node, @@ -876,48 +44,60 @@ def def_node_matcher(method_name, pattern_str, **keyword_defaults) # as soon as it finds a descendant which matches. Otherwise, it will # yield all descendants which match. def def_node_search(method_name, pattern_str, **keyword_defaults) - Compiler.new(pattern_str, 'node0', 'node') - .def_node_search(self, method_name, **keyword_defaults) + NodePattern.new(pattern_str).def_node_search(self, method_name, **keyword_defaults) end end - attr_reader :pattern + extend Forwardable + include MethodDefiner + Invalid = Class.new(StandardError) - def initialize(str) + VAR = 'node' + + attr_reader :pattern, :ast, :match_code + + def_delegators :@compiler, :captures, :named_parameters, :positional_parameters + + def initialize(str, compiler: Compiler.new) @pattern = str - compiler = Compiler.new(str, 'node0') - src = "def match(#{compiler.emit_params('node0')});" \ - "#{compiler.emit_method_code}end" - instance_eval(src, __FILE__, __LINE__ + 1) + @ast = compiler.parser.new.parse(str) + @compiler = compiler + @match_code = @compiler.compile_as_node_pattern(@ast, var: VAR) + @cache = {} end - def match(*args, **rest) - # If we're here, it's because the singleton method has not been defined, - # either because we've been dup'ed or serialized through YAML - initialize(pattern) - if rest.empty? - match(*args) - else - match(*args, **rest) - end + def match(*args, **rest, &block) + @cache[:lambda] ||= as_lambda + @cache[:lambda].call(*args, block: block, **rest) + end + + def ==(other) + other.is_a?(NodePattern) && other.ast == ast + end + alias eql? == + + def to_s + "#<#{self.class} #{pattern}>" end - def marshal_load(pattern) + def marshal_load(pattern) #:nodoc: initialize pattern end - def marshal_dump + def marshal_dump #:nodoc: pattern end - def ==(other) - other.is_a?(NodePattern) && - Compiler.tokens(other.pattern) == Compiler.tokens(pattern) + def as_json(_options = nil) #:nodoc: + pattern end - alias eql? == - def to_s - "#<#{self.class} #{pattern}>" + def encode_with(coder) #:nodoc: + coder['pattern'] = pattern + end + + def init_with(coder) #:nodoc: + initialize(coder['pattern']) end # Yields its argument and any descendants, depth-first. @@ -936,17 +116,11 @@ def self.descend(element, &block) nil end - # @api private - class Matcher - def initialize(&block) - @block = block - end - - def ===(compare) - @block.call(compare) - end + def freeze + @match_code.freeze + @compiler.freeze + super end end end end -# rubocop:enable Metrics/ClassLength, Metrics/CyclomaticComplexity diff --git a/lib/rubocop/ast/node_pattern/builder.rb b/lib/rubocop/ast/node_pattern/builder.rb new file mode 100644 index 000000000..999cbd477 --- /dev/null +++ b/lib/rubocop/ast/node_pattern/builder.rb @@ -0,0 +1,42 @@ +# frozen_string_literal: true + +module RuboCop + module AST + class NodePattern + # Responsible to build the AST nodes for `NoePattern` + # + # Doc on how this fits in the compiling process: + # /doc/modules/ROOT/pages/node_pattern.md + class Builder + def emit_capture(capture_token, node) + return node if capture_token.nil? + + emit_unary_op(:capture, capture_token, node) + end + + def emit_atom(type, value) + n(type, [value]) + end + + def emit_unary_op(type, _operator = nil, *children) + n(type, children) + end + + def emit_list(type, _begin, children, _end) + n(type, children) + end + + def emit_call(type, selector, args = nil) + _begin_t, arg_nodes, _end_t = args + n(type, [selector, *arg_nodes]) + end + + private + + def n(type, *args) + Node::MAP[type].new(type, *args) + end + end + end + end +end diff --git a/lib/rubocop/ast/node_pattern/comment.rb b/lib/rubocop/ast/node_pattern/comment.rb new file mode 100644 index 000000000..9dc6f53f0 --- /dev/null +++ b/lib/rubocop/ast/node_pattern/comment.rb @@ -0,0 +1,45 @@ +# frozen_string_literal: true + +module RuboCop + module AST + class NodePattern + # A NodePattern comment, simplified version of ::Parser::Source::Comment + class Comment + attr_reader :location + alias loc location + + ## + # @param [Parser::Source::Range] range + # + def initialize(range) + @location = ::Parser::Source::Map.new(range) + freeze + end + + # @return [String] + def text + loc.expression.source.freeze + end + + ## + # Compares comments. Two comments are equal if they + # correspond to the same source range. + # + # @param [Object] other + # @return [Boolean] + # + def ==(other) + other.is_a?(Comment) && + @location == other.location + end + + ## + # @return [String] a human-readable representation of this comment + # + def inspect + "#" + end + end + end + end +end diff --git a/lib/rubocop/ast/node_pattern/compiler.rb b/lib/rubocop/ast/node_pattern/compiler.rb new file mode 100644 index 000000000..8bb4b5d93 --- /dev/null +++ b/lib/rubocop/ast/node_pattern/compiler.rb @@ -0,0 +1,96 @@ +# frozen_string_literal: true + +module RuboCop + module AST + class NodePattern + # The top-level compiler holding the global state + # Defers work to its subcompilers + # + # Doc on how this fits in the compiling process: + # /doc/modules/ROOT/pages/node_pattern.md + class Compiler + extend Forwardable + attr_reader :captures, :named_parameters, :positional_parameters, :binding + + def initialize + @temp_depth = 0 # avoid name clashes between temp variables + @captures = 0 # number of captures seen + @positional_parameters = 0 # highest % (param) number seen + @named_parameters = Set[] # keyword parameters + @binding = Binding.new # bound variables + @atom_subcompiler = self.class::AtomSubcompiler.new(self) + end + + def_delegators :binding, :bind, :union_bind + + def positional_parameter(number) + @positional_parameters = number if number > @positional_parameters + "param#{number}" + end + + def named_parameter(name) + @named_parameters << name + name + end + + def enforce_same_captures(enum) + return to_enum __method__, enum unless block_given? + + captures_before = captures_after = nil + enum.each do |node| + captures_before ||= @captures + @captures = captures_before + yield node + captures_after ||= @captures + raise Invalid, 'each branch must have same # of captures' if captures_after != @captures + end + end + + def compile_as_atom(node) + @atom_subcompiler.compile(node) + end + + def compile_as_node_pattern(node, **options) + self.class::NodePatternSubcompiler.new(self, **options).compile(node) + end + + def compile_sequence(sequence, var:) + self.class::SequenceSubcompiler.new(self, sequence: sequence, var: var).compile_sequence + end + + def parser + Parser + end + + # Utilities + + def with_temp_variables(*names, &block) + @temp_depth += 1 + suffix = @temp_depth if @temp_depth > 1 + names = block.parameters.map(&:last) if names.empty? + names.map! { |name| "#{name}#{suffix}" } + yield(*names) + ensure + @temp_depth -= 1 + end + + def next_capture + "captures[#{new_capture}]" + end + + def freeze + @named_parameters.freeze + super + end + + private + + def new_capture + @captures + ensure + @captures += 1 + end + end + end + end +end diff --git a/lib/rubocop/ast/node_pattern/compiler/atom_subcompiler.rb b/lib/rubocop/ast/node_pattern/compiler/atom_subcompiler.rb new file mode 100644 index 000000000..bca1875fb --- /dev/null +++ b/lib/rubocop/ast/node_pattern/compiler/atom_subcompiler.rb @@ -0,0 +1,50 @@ +# frozen_string_literal: true + +module RuboCop + module AST + class NodePattern + class Compiler + # Generates code that evaluates to a value (Ruby object) + # This value responds to `===`. + # + # Doc on how this fits in the compiling process: + # /doc/modules/ROOT/pages/node_pattern.md + class AtomSubcompiler < Subcompiler + private + + def visit_unify + compiler.bind(node.child) do + raise Invalid, 'unified variables can not appear first as argument' + end + end + + def visit_symbol + node.child.inspect + end + alias visit_number visit_symbol + alias visit_string visit_symbol + + def visit_const + node.child + end + + def visit_named_parameter + compiler.named_parameter(node.child) + end + + def visit_positional_parameter + compiler.positional_parameter(node.child) + end + + # Assumes other types are node patterns. + def visit_other_type + compiler.with_temp_variables do |compare| + code = compiler.compile_as_node_pattern(node, var: compare) + "->(#{compare}) { #{code} }" + end + end + end + end + end + end +end diff --git a/lib/rubocop/ast/node_pattern/compiler/binding.rb b/lib/rubocop/ast/node_pattern/compiler/binding.rb new file mode 100644 index 000000000..7359b014b --- /dev/null +++ b/lib/rubocop/ast/node_pattern/compiler/binding.rb @@ -0,0 +1,78 @@ +# frozen_string_literal: true + +module RuboCop + module AST + class NodePattern + class Compiler + # Holds the list of bound variable names + class Binding + def initialize + @bound = {} + end + + # Yields the first time a given name is bound + # + # @return [String] bound variable name + def bind(name) + var = @bound.fetch(name) do + yield n = @bound[name] = "unify_#{name.gsub('-', '__')}" + n + end + + if var == :forbidden_unification + raise Invalid, "Wildcard #{name} was first seen in a subset of a" \ + " union and can't be used outside that union" + end + var + end + + # rubocop:disable Metrics/MethodLength, Metrics/AbcSize + def union_bind(enum) + # We need to reset @bound before each branch is processed. + # Moreover we need to keep track of newly encountered wildcards. + # Var `newly_bound_intersection` will hold those that are encountered + # in all branches; these are not a problem. + # Var `partially_bound` will hold those encountered in only a subset + # of the branches; these can't be used outside of the union. + + return to_enum __method__, enum unless block_given? + + newly_bound_intersection = nil + partially_bound = [] + bound_before = @bound.dup + + result = enum.each do |e| + @bound = bound_before.dup if newly_bound_intersection + yield e + newly_bound = @bound.keys - bound_before.keys + if newly_bound_intersection.nil? + # First iteration + newly_bound_intersection = newly_bound + else + union = newly_bound_intersection | newly_bound + newly_bound_intersection &= newly_bound + partially_bound |= union - newly_bound_intersection + end + end + + # At this point, all members of `newly_bound_intersection` can be used + # for unification outside of the union, but partially_bound may not + + forbid(partially_bound) + + result + end + # rubocop:enable Metrics/MethodLength, Metrics/AbcSize + + private + + def forbid(names) + names.each do |name| + @bound[name] = :forbidden_unification + end + end + end + end + end + end +end diff --git a/lib/rubocop/ast/node_pattern/compiler/debug.rb b/lib/rubocop/ast/node_pattern/compiler/debug.rb new file mode 100644 index 000000000..8e6665a28 --- /dev/null +++ b/lib/rubocop/ast/node_pattern/compiler/debug.rb @@ -0,0 +1,91 @@ +# frozen_string_literal: true + +require 'rainbow' + +module RuboCop + module AST + class NodePattern + class Compiler + # Variant of the Compiler with tracing information for nodes + class Debug < Compiler + attr_reader :trace, :node_ids + + # @api private + Colorizer = Struct.new(:compiler) do # rubocop:disable Metrics/BlockLength + def colorize(ast, color_map: self.color_map(ast)) + ast.loc.expression.source_buffer.source.chars.map.with_index do |char, i| + Rainbow(char).color((color_map[i] || COLORS[:not_visitable])) + end.join + end + + def color_map(ast) + ast.each_descendant + .map { |node| color_map_for(node) } + .inject(color_map_for(ast), :merge) + end + + private + + COLORS = { + not_visited: :yellow, + not_visitable: :lightseagreen, + nil => :red, + true => :green + }.freeze + + def color_map_for(node) + return {} unless (range = node.loc&.expression) + + color = COLORS.fetch(visited(node)) + range.to_a.to_h { |char| [char, color] } + end + + def visited(node) + id = compiler.node_ids.fetch(node) { return :not_visitable } + return :not_visited unless compiler.trace[:enter][id] + + compiler.trace[:success][id] + end + end + def initialize + super + @node_ids = Hash.new { |h, k| h[k] = h.size }.compare_by_identity + @trace = { enter: {}, success: {} } + end + + def named_parameters + super << :trace + end + + def parser + Parser::WithMeta + end + + # @api private + module InstrumentationSubcompiler + def do_compile + "#{tracer(:enter)} && #{super} && #{tracer(:success)}" + end + + private + + def tracer(kind) + id = compiler.node_ids[node] + "(trace[:#{kind}][#{id}] ||= true)" + end + end + + # @api private + class NodePatternSubcompiler < Compiler::NodePatternSubcompiler + include InstrumentationSubcompiler + end + + # @api private + class SequenceSubcompiler < Compiler::SequenceSubcompiler + include InstrumentationSubcompiler + end + end + end + end + end +end diff --git a/lib/rubocop/ast/node_pattern/compiler/node_pattern_subcompiler.rb b/lib/rubocop/ast/node_pattern/compiler/node_pattern_subcompiler.rb new file mode 100644 index 000000000..f1478bffe --- /dev/null +++ b/lib/rubocop/ast/node_pattern/compiler/node_pattern_subcompiler.rb @@ -0,0 +1,147 @@ +# frozen_string_literal: true + +module RuboCop + module AST + class NodePattern + class Compiler + # Compiles code that evalues to true or false + # for a given value `var` (typically a RuboCop::AST::Node) + # or it's `node.type` if `seq_head` is true + # + # Doc on how this fits in the compiling process: + # /doc/modules/ROOT/pages/node_pattern.md + class NodePatternSubcompiler < Subcompiler + attr_reader :access, :seq_head + + def initialize(compiler, var: nil, access: var, seq_head: false) + super(compiler) + @var = var + @access = access + @seq_head = seq_head + end + + private + + def visit_negation + expr = compile(node.child) + "!(#{expr})" + end + + def visit_ascend + compiler.with_temp_variables do |ascend| + expr = compiler.compile_as_node_pattern(node.child, var: ascend) + "(#{ascend} = #{access_node}) && (#{ascend} = #{ascend}.parent) && #{expr}" + end + end + + def visit_descend + compiler.with_temp_variables { |descendant| <<~RUBY.chomp } + ::RuboCop::AST::NodePattern.descend(#{access}).any? do |#{descendant}| + #{compiler.compile_as_node_pattern(node.child, var: descendant)} + end + RUBY + end + + def visit_wildcard + 'true' + end + + def visit_unify + name = compiler.bind(node.child) do |unify_name| + # double assign to avoid "assigned but unused variable" + return "(#{unify_name} = #{access_element}; #{unify_name} = #{unify_name}; true)" + end + + compile_value_match(name) + end + + def visit_capture + "(#{compiler.next_capture} = #{access_element}; #{compile(node.child)})" + end + + ### Lists + + def visit_union + multiple_access(:union) do + enum = compiler.union_bind(node.children) + terms = compiler.enforce_same_captures(enum) + .map { |child| compile(child) } + + "(#{terms.join(' || ')})" + end + end + + def visit_intersection + multiple_access(:intersection) do + node.children.map { |child| compile(child) } + .join(' && ') + end + end + + def visit_predicate + "#{access_element}.#{node.method_name}#{compile_args(node.arg_list)}" + end + + def visit_function_call + "#{node.method_name}#{compile_args(node.arg_list, first: access_element)}" + end + + def visit_node_type + "#{access_node}.#{node.child.to_s.tr('-', '_')}_type?" + end + + def visit_sequence + multiple_access(:sequence) do |var| + term = compiler.compile_sequence(node, var: var) + "#{compile_guard_clause} && #{term}" + end + end + + # Assumes other types are atoms. + def visit_other_type + value = compiler.compile_as_atom(node) + compile_value_match(value) + end + + # Compiling helpers + + def compile_value_match(value) + "#{value} === #{access_element}" + end + + # @param [Array, nil] + # @return [String, nil] + def compile_args(arg_list, first: nil) + args = arg_list&.map { |arg| compiler.compile_as_atom(arg) } + args = [first, *args] if first + "(#{args.join(', ')})" if args + end + + def access_element + seq_head ? "#{access}.type" : access + end + + def access_node + return access if seq_head + + "#{compile_guard_clause} && #{access}" + end + + def compile_guard_clause + "#{access}.is_a?(::RuboCop::AST::Node)" + end + + def multiple_access(kind) + return yield @var if @var + + compiler.with_temp_variables(kind) do |var| + memo = "#{var} = #{access}" + @var = @access = var + "(#{memo}; #{yield @var})" + end + end + end + end + end + end +end diff --git a/lib/rubocop/ast/node_pattern/compiler/sequence_subcompiler.rb b/lib/rubocop/ast/node_pattern/compiler/sequence_subcompiler.rb new file mode 100644 index 000000000..279b6b954 --- /dev/null +++ b/lib/rubocop/ast/node_pattern/compiler/sequence_subcompiler.rb @@ -0,0 +1,326 @@ +# frozen_string_literal: true + +module RuboCop + module AST + class NodePattern + class Compiler + # Compiles terms within a sequence to code that evalues to true or false. + # Compilation of the nodes that can match only a single term is deferred to + # `NodePatternSubcompiler`; only nodes that can match multiple terms are + # compiled here. + # Assumes the given `var` is a `::RuboCop::AST::Node` + # + # Doc on how this fits in the compiling process: + # /doc/modules/ROOT/pages/node_pattern.md + # + # rubocop:disable Metrics/ClassLength + class SequenceSubcompiler < Subcompiler + DELTA = 1 + # Calls `compile_sequence`; the actual `compile` method + # will be used for the different terms of the sequence. + # The only case of re-entrant call to `compile` is `visit_capture` + def initialize(compiler, sequence:, var:) + @seq = sequence # The node to be compiled + @seq_var = var # Holds the name of the variable holding the AST::Node we are matching + super(compiler) + end + + def compile_sequence + # rubocop:disable Layout/CommentIndentation + compiler.with_temp_variables do |cur_child, cur_index, previous_index| + @cur_child_var = cur_child # To hold the current child node + @cur_index_var = cur_index # To hold the current child index (always >= 0) + @prev_index_var = previous_index # To hold the child index before we enter the + # variadic nodes + @cur_index = :seq_head # Can be any of: + # :seq_head : when the current child is actually the + # sequence head + # :variadic_mode : child index held by @cur_index_var + # >= 0 : when the current child index is known + # (from the begining) + # < 0 : when the index is known from the end, + # where -1 is *past the end*, + # -2 is the last child, etc... + # This shift of 1 from standard Ruby indices + # is stored in DELTA + @in_sync = false # `true` iff `@cur_child_var` and `@cur_index_var` + # correspond to `@cur_index` + # Must be true if `@cur_index` is `:variadic_mode` + compile_terms + end + # rubocop:enable Layout/CommentIndentation + end + + private + + private :compile # Not meant to be called from outside + + attr_reader :cur_child_var + + # Single node patterns are all handled here + def visit_other_type + access = case @cur_index + when :seq_head + { var: @seq_var, + seq_head: true } + when :variadic_mode + { var: @cur_child_var } + else + idx = @cur_index + (@cur_index.negative? ? DELTA : 0) + { access: "#{@seq_var}.children[#{idx}]" } + end + + term = compiler.compile_as_node_pattern(node, **access) + compile_and_advance(term) + end + + def visit_repetition + within_loop do + child_captures = node.child.nb_captures + child_code = compile(node.child) + next compile_loop(child_code) if child_captures.zero? + + compile_captured_repetition(child_code, child_captures) + end + end + + def visit_any_order + within_loop do + compiler.with_temp_variables do |matched| + case_terms = compile_any_order_branches(matched) + else_code, init = compile_any_order_else + term = "#{compile_case(case_terms, else_code)} && (#{compile_loop_advance}; true)" + + all_matched_check = "&&\n#{matched}.size == #{node.term_nodes.size}" if node.rest_node + <<~RUBY + (#{init}#{matched} = {}; true) && + #{compile_loop(term)} #{all_matched_check} \\ + RUBY + end + end + end + + def compile_case(when_branches, else_code) + <<~RUBY + case + #{when_branches.join(' ')} + else #{else_code} + end \\ + RUBY + end + + def compile_any_order_branches(matched_var) + node.term_nodes.map.with_index do |node, i| + code = compiler.compile_as_node_pattern(node, var: cur_child_var, seq_head: false) + var = "#{matched_var}[#{i}]" + "when !#{var} && #{code} then #{var} = true" + end + end + + # @return [Array] Else code, and init code (if any) + def compile_any_order_else + rest = node.rest_node + if !rest + 'false' + elsif rest.capture? + capture_rest = compiler.next_capture + init = "#{capture_rest} = [];" + ["#{capture_rest} << #{cur_child_var}", init] + else + 'true' + end + end + + def visit_capture + return visit_other_type if node.child.arity == 1 + + storage = compiler.next_capture + term = compile(node.child) + capture = "#{@seq_var}.children[#{compile_matched(:range)}]" + "#{term} && (#{storage} = #{capture})" + end + + def visit_rest + empty_loop + end + + # Compilation helpers + + def compile_and_advance(term) + case @cur_index + when :variadic_mode + "#{term} && (#{compile_loop_advance}; true)" + when :seq_head + # @in_sync = false # already the case + @cur_index = 0 + term + else + @in_sync = false + @cur_index += 1 + term + end + end + + def compile_captured_repetition(child_code, child_captures) + captured_range = "#{compiler.captures - child_captures}...#{compiler.captures}" + captured = "captures[#{captured_range}]" + compiler.with_temp_variables do |accumulate| + code = "#{child_code} && #{accumulate}.push(#{captured})" + <<~RUBY + (#{accumulate} = Array.new) && + #{compile_loop(code)} && + (#{captured} = if #{accumulate}.empty? + (#{captured_range}).map{[]} # Transpose hack won't work for empty case + else + #{accumulate}.transpose + end) \\ + RUBY + end + end + + # Assumes `@cur_index` is already updated + def compile_matched(kind) + to = compile_cur_index + from = if @prev_index == :variadic_mode + @prev_index_used = true + @prev_index_var + else + compile_index(@prev_index) + end + case kind + when :range + "#{from}...#{to}" + when :length + "#{to} - #{from}" + end + end + + def handle_prev + @prev_index = @cur_index + @prev_index_used = false + code = yield + if @prev_index_used + @prev_index_used = false + code = "(#{@prev_index_var} = #{@cur_index_var}; true) && #{code}" + end + + code + end + + def compile_terms + arities = remaining_arities + total_arity = arities.shift + terms = @seq.children.map do |child| + @remaining_arity = arities.shift + handle_prev { compile(child) } + end + [ + compile_child_nb_guard(total_arity), + terms + ].join(" &&\n") + end + + # @return [Array] total arities (as Ranges) of remaining children nodes + # E.g. For sequence `(_ _? <_ _>)`, arities are: 1, 0..1, 2 + # and remaining arities are: 3..4, 2..3, 2..2, 0..0 + def remaining_arities + last = 0..0 + arities = @seq.children + .reverse + .map(&:arity_range) + .map { |r| last = last.begin + r.begin..last.max + r.max } + .reverse! + arities.push 0..0 + end + + # @return [String] code that evaluates to `false` if the matched arity is too small + def compile_min_check + return 'false' unless node.variadic? + + unless @remaining_arity.end.infinite? + not_too_much_remaining = "#{compile_remaining} <= #{@remaining_arity.max}" + end + min_to_match = node.arity_range.begin + if min_to_match.positive? + enough_matched = "#{compile_matched(:length)} >= #{min_to_match}" + end + return 'true' unless not_too_much_remaining || enough_matched + + [not_too_much_remaining, enough_matched].compact.join(' && ') + end + + def compile_remaining + "#{@seq_var}.children.size - #{@cur_index_var}" + end + + def compile_max_matched + return node.arity unless node.variadic? + + min_remaining_children = "#{compile_remaining} - #{@remaining_arity.begin}" + return min_remaining_children if node.arity.end.infinite? + + "[#{min_remaining_children}, #{node.arity.max}].min" + end + + def empty_loop + @cur_index = -@remaining_arity.begin - DELTA + @in_sync = false + 'true' + end + + def compile_cur_index + return @cur_index_var if @in_sync + + compile_index + end + + def compile_index(cur = @cur_index) + return cur if cur >= 0 + + "#{@seq_var}.children.size - #{-(cur + DELTA)}" + end + + # Note: assumes `@cur_index != :seq_head`. Node types using `within_loop` must + # have `def in_sequence_head; :raise; end` + def within_loop + return yield if @in_sync + + init = compile_loop_advance("= #{compile_cur_index}") + @in_sync = true + @cur_index = :variadic_mode + "(#{init}; true) && #{yield}" + ensure + if @remaining_arity.begin == @remaining_arity.max + @cur_index = -@remaining_arity.begin - DELTA + end + end + + def compile_loop_advance(to = '+=1') + "#{@cur_child_var} = #{@seq_var}.children[#{@cur_index_var} #{to}]" + end + + def compile_loop(term) + <<~RUBY + (#{compile_max_matched}).times do + break #{compile_min_check} unless #{term} + end \\ + RUBY + end + + def compile_child_nb_guard(arity_range) + # The -1 are because of seq_head + case arity_range.max + when Float::INFINITY + "#{@seq_var}.children.size >= #{arity_range.begin - 1}" + when arity_range.begin + "#{@seq_var}.children.size == #{arity_range.begin - 1}" + else + "(#{arity_range.begin - 1}..#{arity_range.max - 1}).cover?(#{@seq_var}.children.size)" + end + end + end + # rubocop:enable Metrics/ClassLength + end + end + end +end diff --git a/lib/rubocop/ast/node_pattern/compiler/subcompiler.rb b/lib/rubocop/ast/node_pattern/compiler/subcompiler.rb new file mode 100644 index 000000000..299d2c5ae --- /dev/null +++ b/lib/rubocop/ast/node_pattern/compiler/subcompiler.rb @@ -0,0 +1,57 @@ +# frozen_string_literal: true + +module RuboCop + module AST + class NodePattern + class Compiler + # Base class for subcompilers + # Implements visitor pattern + # + # Doc on how this fits in the compiling process: + # /doc/modules/ROOT/pages/node_pattern.md + class Subcompiler + attr_reader :compiler + + def initialize(compiler) + @compiler = compiler + @node = nil + end + + def compile(node) + prev = @node + @node = node + do_compile + ensure + @node = prev + end + + # @api private + + private + + attr_reader :node + + def do_compile + send(self.class.registry.fetch(node.type, :visit_other_type)) + end + + @registry = {} + class << self + attr_reader :registry + + def method_added(method) + @registry[Regexp.last_match(1).to_sym] = method if method =~ /^visit_(.*)/ + super + end + + def inherited(base) + us = self + base.class_eval { @registry = us.registry.dup } + super + end + end + end + end + end + end +end diff --git a/lib/rubocop/ast/node_pattern/lexer.rb b/lib/rubocop/ast/node_pattern/lexer.rb new file mode 100644 index 000000000..4d5b981da --- /dev/null +++ b/lib/rubocop/ast/node_pattern/lexer.rb @@ -0,0 +1,48 @@ +# frozen_string_literal: true + +require_relative 'lexer.rex' + +module RuboCop + module AST + class NodePattern + # Lexer class for `NodePattern` + # + # Doc on how this fits in the compiling process: + # /doc/modules/ROOT/pages/node_pattern.md + class Lexer < LexerRex + Error = ScanError + + attr_reader :source_buffer, :comments, :tokens + + def initialize(source) + @tokens = [] + super() + parse(source) + end + + private + + # @return [token] + def emit(type) + value = ss.captures.first || ss.matched + value = yield value if block_given? + token = token(type, value) + @tokens << token + token + end + + def emit_comment + nil + end + + def do_parse + # Called by the generated `parse` method, do nothing here. + end + + def token(type, value) + [type, value] + end + end + end + end +end diff --git a/lib/rubocop/ast/node_pattern/lexer.rex b/lib/rubocop/ast/node_pattern/lexer.rex new file mode 100644 index 000000000..03e2b6261 --- /dev/null +++ b/lib/rubocop/ast/node_pattern/lexer.rex @@ -0,0 +1,36 @@ +# The only difficulty is to distinguish: `fn(argument)` from `fn (sequence)`. +# The presence of the whitespace determines if it is an _argument_ to the +# function call `fn` or if a _sequence_ follows the function call. +# +# If there is the potential for an argument list, the lexer enters the state `:ARG`. +# The rest of the times, the state is `nil`. +# +# In case of an argument list, :tARG_LIST is emitted instead of a '('. +# Therefore, the token '(' always signals the beginning of a sequence. + +class RuboCop::AST::NodePattern::LexerRex + +macros + SYMBOL_NAME /[\w+@*\/?!<>=~|%^-]+|\[\]=?/ + IDENTIFIER /[a-zA-Z_][a-zA-Z0-9_-]*/ +rules + /\s+/ + /:(#{SYMBOL_NAME})/o { emit :tSYMBOL, &:to_sym } + /"(.+?)"/ { emit :tSTRING } + /[-+]?\d+\.\d+/ { emit :tNUMBER, &:to_f } + /[-+]?\d+/ { emit :tNUMBER, &:to_i } + /#{Regexp.union( + %w"( ) { } [ ] < > $ ! ^ ` ... + * ? ," + )}/o { emit ss.matched, &:to_sym } + /%([A-Z:][a-zA-Z_:]+)/ { emit :tPARAM_CONST } + /%([a-z_]+)/ { emit :tPARAM_NAMED } + /%(\d*)/ { emit(:tPARAM_NUMBER) { |s| s.empty? ? 1 : s.to_i } } # Map `%` to `%1` + /_(#{IDENTIFIER})/o { emit :tUNIFY } + /_/o { emit :tWILDCARD } + /\#(#{IDENTIFIER}[!?]?)/o { @state = :ARG; emit :tFUNCTION_CALL, &:to_sym } + /#{IDENTIFIER}\?/o { @state = :ARG; emit :tPREDICATE, &:to_sym } + /#{IDENTIFIER}/o { emit :tNODE_TYPE, &:to_sym } + :ARG /\(/ { @state = nil; emit :tARG_LIST } + :ARG // { @state = nil } + /\#.*/ { emit_comment } +end diff --git a/lib/rubocop/ast/node_pattern/method_definer.rb b/lib/rubocop/ast/node_pattern/method_definer.rb new file mode 100644 index 000000000..6caac18f8 --- /dev/null +++ b/lib/rubocop/ast/node_pattern/method_definer.rb @@ -0,0 +1,143 @@ +# frozen_string_literal: true + +module RuboCop + module AST + class NodePattern + # Functionality to turn `match_code` into methods/lambda + module MethodDefiner + def def_node_matcher(base, method_name, **defaults) + def_helper(base, method_name, **defaults) do |name| + params = emit_params('param0 = self') + <<~RUBY + def #{name}(#{params}) + #{VAR} = param0 + #{compile_init} + #{emit_method_code} + end + RUBY + end + end + + def def_node_search(base, method_name, **defaults) + def_helper(base, method_name, **defaults) do |name| + emit_node_search(name) + end + end + + def compile_as_lambda + <<~RUBY + ->(#{emit_params('param0')}, block: nil) do + #{VAR} = param0 + #{compile_init} + #{emit_lambda_code} + end + RUBY + end + + def as_lambda + eval(compile_as_lambda) # rubocop:disable Security/Eval + end + + private + + # This method minimizes the closure for our method + def wrapping_block(method_name, **defaults) + proc do |*args, **values| + send method_name, *args, **defaults, **values + end + end + + def def_helper(base, method_name, **defaults) + location = caller_locations(3, 1).first + unless defaults.empty? + call = :"without_defaults_#{method_name}" + base.send :define_method, method_name, &wrapping_block(call, **defaults) + method_name = call + end + src = yield method_name + base.class_eval(src, location.path, location.lineno) + end + + def emit_node_search(method_name) + if method_name.to_s.end_with?('?') + on_match = 'return true' + else + args = emit_params(":#{method_name}", 'param0', forwarding: true) + prelude = "return enum_for(#{args}) unless block_given?\n" + on_match = emit_yield_capture(VAR) + end + emit_node_search_body(method_name, prelude: prelude, on_match: on_match) + end + + def emit_node_search_body(method_name, prelude:, on_match:) + <<~RUBY + def #{method_name}(#{emit_params('param0')}) + #{compile_init} + #{prelude} + param0.each_node do |#{VAR}| + if #{match_code} + #{on_match} + end + end + nil + end + RUBY + end + + def emit_yield_capture(when_no_capture = '', yield_with: 'yield') + yield_val = if captures.zero? + when_no_capture + elsif captures == 1 + 'captures[0]' # Circumvent https://github.com/jruby/jruby/issues/5710 + else + '*captures' + end + "#{yield_with}(#{yield_val})" + end + + def emit_retval + if captures.zero? + 'true' + elsif captures == 1 + 'captures[0]' + else + 'captures' + end + end + + def emit_param_list + (1..positional_parameters).map { |n| "param#{n}" }.join(',') + end + + def emit_keyword_list(forwarding: false) + pattern = "%s: #{'%s' if forwarding}" + named_parameters.map { |k| format(pattern, keyword: k) }.join(',') + end + + def emit_params(*first, forwarding: false) + params = emit_param_list + keywords = emit_keyword_list(forwarding: forwarding) + [*first, params, keywords].reject(&:empty?).join(',') + end + + def emit_method_code + <<~RUBY + return unless #{match_code} + block_given? ? #{emit_yield_capture} : (return #{emit_retval}) + RUBY + end + + def emit_lambda_code + <<~RUBY + return unless #{match_code} + block ? #{emit_yield_capture(yield_with: 'block.call')} : (return #{emit_retval}) + RUBY + end + + def compile_init + "captures = Array.new(#{captures})" if captures.positive? + end + end + end + end +end diff --git a/lib/rubocop/ast/node_pattern/node.rb b/lib/rubocop/ast/node_pattern/node.rb new file mode 100644 index 000000000..ccdf68561 --- /dev/null +++ b/lib/rubocop/ast/node_pattern/node.rb @@ -0,0 +1,195 @@ +# frozen_string_literal: true + +module RuboCop + module AST + class NodePattern + # Base class for AST Nodes of a `NodePattern` + class Node < ::Parser::AST::Node + extend Forwardable + include ::RuboCop::AST::Descendence + + ### + # To be overriden by subclasses + ### + + def rest? + false + end + + def capture? + false + end + + # @return [Integer, Range] An Integer for fixed length terms, otherwise a Range. + # Note: `arity.end` may be `Float::INFINITY` + def arity + 1 + end + + # Return [Symbol] either :ok, :raise, or :insert_wildcard + def in_sequence_head + :ok + end + + ### + # Utilities + ### + + # @return [Array] + def children_nodes + children.grep(Node) + end + + # @return [Node] most nodes have only one child + def child + children[0] + end + + # @return [Integer] nb of captures of that node and its descendants + def nb_captures + children_nodes.sum(&:nb_captures) + end + + # @return [Boolean] returns true iff matches variable number of elements + def variadic? + arity.is_a?(Range) + end + + # @return [Range] arity as a Range + def arity_range + a = arity + a.is_a?(Range) ? a : INT_TO_RANGE[a] + end + + INT_TO_RANGE = Hash.new { |h, k| h[k] = k..k } + private_constant :INT_TO_RANGE + + ### + # Subclasses for specific node types + ### + + # Registry + MAP = Hash.new(Node) + def self.setup_registry + %i[sequence repetition rest capture predicate any_order function_call].each do |type| + MAP[type] = const_get(type.to_s.gsub(/(_|^)(.)/) { Regexp.last_match(2).upcase }) + end + MAP.freeze + end + + # Node class for `$something` + class Capture < Node + # Delegate most introspection methods to it's only child + def_delegators :child, :arity, :rest?, :in_sequence_head + + def capture? + true + end + + def nb_captures + 1 + super + end + end + + # Node class for `(type first second ...)` + class Sequence < Node + def initialize(type, children = [], properties = {}) + case children.first.in_sequence_head + when :insert_wildcard + children = [Node.new(:wildcard), *children] + when :raise + raise NodePattern::Invalid, "A sequence can not start with #{children.first}" + end + + super + end + + def in_sequence_head + :raise + end + end + + # Node class for `predicate?(:arg, :list)` + class Predicate < Node + def method_name + children.first + end + + def arg_list + children[1..-1] + end + end + FunctionCall = Predicate + + # Node class for `int+` + class Repetition < Node + def operator + children[1] + end + + ARITIES = { + '*': 0..Float::INFINITY, + '+': 1..Float::INFINITY, + '?': 0..1 + }.freeze + + def arity + ARITIES[operator] + end + + def in_sequence_head + :raise + end + end + + # Node class for `...` + class Rest < Node + ARITY = (0..Float::INFINITY).freeze + private_constant :ARITY + + def rest? + true + end + + def arity + ARITY + end + + def in_sequence_head + :insert_wildcard + end + end + + # Node class for `` + class AnyOrder < Node + ARITIES = Hash.new { |h, k| h[k] = k - 1..Float::INFINITY } + private_constant :ARITIES + + def term_nodes + ends_with_rest? ? children[0...-1] : children + end + + def ends_with_rest? + children.last.rest? + end + + def rest_node + children.last if ends_with_rest? + end + + def arity + return children.size unless ends_with_rest? + + ARITIES[children.size] + end + + def in_sequence_head + :raise + end + end + + setup_registry + end + end + end +end diff --git a/lib/rubocop/ast/node_pattern/parser.rb b/lib/rubocop/ast/node_pattern/parser.rb new file mode 100644 index 000000000..96e75632f --- /dev/null +++ b/lib/rubocop/ast/node_pattern/parser.rb @@ -0,0 +1,60 @@ +# frozen_string_literal: true + +require_relative 'parser.racc' + +module RuboCop + module AST + class NodePattern + # Parser for NodePattern + # Note: class reopened in `parser.racc` + # + # Doc on how this fits in the compiling process: + # /doc/modules/ROOT/pages/node_pattern.md + class Parser < Racc::Parser + extend Forwardable + + Builder = NodePattern::Builder + Lexer = NodePattern::Lexer + + def initialize(builder = self.class::Builder.new) + super() + @builder = builder + end + + ## + # (Similar API to `parser` gem) + # Parses a source and returns the AST. + # + # @param [Parser::Source::Buffer, String] source_buffer The source buffer to parse. + # @return [NodePattern::Node] + # + def parse(source) + @lexer = self.class::Lexer.new(source) + ast = do_parse + return ast unless block_given? + + yield ast, @lexer + rescue Lexer::Error => e + raise NodePattern::Invalid, e.message + ensure + @lexer = nil # Don't keep references + end + + def inspect + "<##{self.class}>" + end + + private + + def_delegators :@builder, :emit_list, :emit_unary_op, :emit_atom, :emit_capture, :emit_call + def_delegators :@lexer, :next_token + + # Overrides Racc::Parser's method: + def on_error(token, val, _vstack) + detail = token_to_str(token) || '?' + raise NodePattern::Invalid, "parse error on value #{val.inspect} (#{detail})" + end + end + end + end +end diff --git a/lib/rubocop/ast/node_pattern/parser.y b/lib/rubocop/ast/node_pattern/parser.y new file mode 100644 index 000000000..14387069e --- /dev/null +++ b/lib/rubocop/ast/node_pattern/parser.y @@ -0,0 +1,87 @@ +class RuboCop::AST::NodePattern::Parser +options no_result_var +token tSYMBOL tNUMBER tSTRING tWILDCARD tPARAM_NAMED tPARAM_CONST tPARAM_NUMBER + tFUNCTION_CALL tPREDICATE tNODE_TYPE tARG_LIST tUNIFY +rule + node_pattern # @return Node + : '(' variadic_pattern_list ')' { emit_list :sequence, *val } + | '{' node_pattern_list '}' { emit_list :union, *val } + | '[' node_pattern_list ']' { emit_list :intersection, *val } + | '!' node_pattern { emit_unary_op :negation, *val } + | '^' node_pattern { emit_unary_op :ascend, *val } + | '`' node_pattern { emit_unary_op :descend, *val } + | '$' node_pattern { emit_capture(*val) } + | tFUNCTION_CALL args { emit_call :function_call, *val } + | tPREDICATE args { emit_call :predicate, *val } + | tNODE_TYPE { emit_call :node_type, *val } + | atom + ; + + atom # @return Node + : tSYMBOL { emit_atom :symbol, *val } + | tNUMBER { emit_atom :number, *val } + | tSTRING { emit_atom :string, *val } + | tPARAM_CONST { emit_atom :const, *val } + | tPARAM_NAMED { emit_atom :named_parameter, *val } + | tPARAM_NUMBER { emit_atom :positional_parameter, *val } + | tWILDCARD { emit_atom :wildcard, *val } + | tUNIFY { emit_atom :unify, *val } + ; + + variadic_pattern # @return Node + : node_pattern + | node_pattern repetition + { + main, repeat_t = val + emit_unary_op(:repetition, repeat_t, main, repeat_t) + } + | opt_capture '<' node_pattern_list opt_rest '>' + { + opt_capture, bracket, node_pattern_list, opt_rest, close_bracket = val + node_pattern_list << opt_rest if opt_rest + main = emit_list :any_order, bracket, node_pattern_list, close_bracket + emit_capture(opt_capture, main) + } + | rest + ; + + repetition # @return Token + : '?' + | '*' + | '+' + ; + + opt_capture # @return Token | nil + : + | '$' + ; + + rest # @return Node + : opt_capture '...' { emit_capture(val[0], emit_atom(:rest, val[1])) } + ; + + opt_rest # @return Node | nil + : + | rest + ; + + args # @return [Token, Array, Token] | nil + : + | tARG_LIST arg_list ')' { val } + ; + + arg_list # @return Array + : node_pattern { val } + | arg_list ',' node_pattern { val[0] << val[2] } + ; + + node_pattern_list # @return Array + : node_pattern { val } + | node_pattern_list node_pattern { val[0] << val[1] } + ; + + variadic_pattern_list # @return Array + : variadic_pattern { val } + | variadic_pattern_list variadic_pattern { val[0] << val[1] } + ; +end diff --git a/lib/rubocop/ast/node_pattern/with_meta.rb b/lib/rubocop/ast/node_pattern/with_meta.rb new file mode 100644 index 000000000..0416277e8 --- /dev/null +++ b/lib/rubocop/ast/node_pattern/with_meta.rb @@ -0,0 +1,130 @@ +# frozen_string_literal: true + +module RuboCop + module AST + class NodePattern + class Parser + # Overrides Parser to use `WithMeta` variants and provide additional methods + class WithMeta < Parser + # Overrides Lexer to token locations and comments + class Lexer < NodePattern::Lexer + attr_reader :source_buffer + + def initialize(str_or_buffer) + @source_buffer = if str_or_buffer.respond_to?(:source) + str_or_buffer + else + ::Parser::Source::Buffer.new('(string)', source: str_or_buffer) + end + @comments = [] + super(@source_buffer.source) + end + + def token(type, value) + super(type, [value, pos]) + end + + def emit_comment + @comments << Comment.new(pos) + super + end + + # @return [::Parser::Source::Range] last match's position + def pos + ::Parser::Source::Range.new(source_buffer, ss.pos - ss.matched_size, ss.pos) + end + end + + # Overrides Builder to emit nodes with locations + class Builder < NodePattern::Builder + def emit_atom(type, token) + value, loc = token + begin_l = loc.resize(1) + end_l = loc.end.adjust(begin_pos: -1) + begin_l = nil if begin_l.source.match?(/\w/) + end_l = nil if end_l.source.match?(/\w/) + n(type, [value], source_map(token, begin_t: begin_l, end_t: end_l)) + end + + def emit_unary_op(type, operator_t = nil, *children) + children[-1] = children[-1].first if children[-1].is_a?(Array) # token? + map = source_map(children.first.loc.expression, operator_t: operator_t) + n(type, children, map) + end + + def emit_list(type, begin_t, children, end_t) + expr = children.first.loc.expression.join(children.last.loc.expression) + map = source_map(expr, begin_t: begin_t, end_t: end_t) + n(type, children, map) + end + + def emit_call(type, selector_t, args = nil) + selector, = selector_t + begin_t, arg_nodes, end_t = args + + map = source_map(selector_t, begin_t: begin_t, end_t: end_t, selector_t: selector_t) + n(type, [selector, *arg_nodes], map) + end + + private + + def n(type, children, source_map) + super(type, children, { location: source_map }) + end + + def loc(token_or_range) + return token_or_range[1] if token_or_range.is_a?(Array) + + token_or_range + end + + def join_exprs(left_expr, right_expr) + left_expr.loc.expression + .join(right_expr.loc.expression) + end + + def source_map(token_or_range, begin_t: nil, end_t: nil, operator_t: nil, selector_t: nil) + expression_l = loc(token_or_range) + expression_l = expression_l.expression if expression_l.respond_to?(:expression) + locs = [begin_t, end_t, operator_t, selector_t].map { |token| loc(token) } + begin_l, end_l, operator_l, selector_l = locs + + expression_l = locs.compact.inject(expression_l, :join) + + ::Parser::Source::Map::Send.new(_dot_l = nil, selector_l, begin_l, end_l, expression_l) + .with_operator(operator_l) + end + end + + ## + # (Similar API to `parser` gem) + # Parses a source and returns the AST and the source code comments. + # + # @see #parse + # @return [Array] + # + def parse_with_comments(_source) + parse(source_buffer) do |ast, lexer| + [ast, lexer.comments, lexer.tokens] + end + end + + ## + # (Similar API to `parser` gem) + # Parses a source and returns the AST, the source code comments, + # and the tokens emitted by the lexer. + # + # @param [Parser::Source::Buffer, String] source + # @param [Boolean] recover If true, recover from syntax errors. False by default. + # @return [Array] + # + def tokenize(source) + parse(source) do |ast, lexer| + [ast, lexer.comments, lexer.tokens] + end + end + end + end + end + end +end diff --git a/rubocop-ast.gemspec b/rubocop-ast.gemspec index d0d84e852..cef4a1d51 100644 --- a/rubocop-ast.gemspec +++ b/rubocop-ast.gemspec @@ -15,7 +15,10 @@ Gem::Specification.new do |s| s.email = 'rubocop@googlegroups.com' s.files = `git ls-files lib LICENSE.txt README.md` - .split($RS) + .split($RS) + %w[ + lib/rubocop/ast/node_pattern/parser.racc.rb + lib/rubocop/ast/node_pattern/lexer.rex.rb + ] s.extra_rdoc_files = ['LICENSE.txt', 'README.md'] s.homepage = 'https://github.com/rubocop-hq/rubocop-ast' s.licenses = ['MIT'] @@ -30,6 +33,7 @@ Gem::Specification.new do |s| } s.add_runtime_dependency('parser', '>= 2.7.1.4') + s.add_runtime_dependency('strscan', '>= 1.0.0') # Ruby 2.4 doesn't provide `captures` s.add_development_dependency('bundler', '>= 1.15.0', '< 3.0') diff --git a/spec/rubocop/ast/node_pattern/helper.rb b/spec/rubocop/ast/node_pattern/helper.rb new file mode 100644 index 000000000..8f58980fb --- /dev/null +++ b/spec/rubocop/ast/node_pattern/helper.rb @@ -0,0 +1,28 @@ +# frozen_string_literal: true + +require_relative 'parse_helper' + +Failure = Struct.new(:expected, :actual) + +module NodePatternHelper + include ParseHelper + + def assert_equal(expected, actual, mess = nil) + expect(actual).to eq(expected), *mess + end + + def assert(test, mess = nil) + expect(test).to eq(true), *mess + end + + def expect_parsing(ast, source, source_maps) + version = '-' + try_parsing(ast, source, parser, source_maps, version) + end +end + +RSpec.shared_context 'parser' do + include NodePatternHelper + + let(:parser) { RuboCop::AST::NodePattern::Parser::WithMeta.new } +end diff --git a/spec/rubocop/ast/node_pattern/lexer_spec.rb b/spec/rubocop/ast/node_pattern/lexer_spec.rb new file mode 100644 index 000000000..53ad93443 --- /dev/null +++ b/spec/rubocop/ast/node_pattern/lexer_spec.rb @@ -0,0 +1,39 @@ +# frozen_string_literal: true + +RSpec.describe RuboCop::AST::NodePattern::Lexer do + let(:source) { '(send nil? #func(:foo) #func (bar))' } + let(:lexer) { RuboCop::AST::NodePattern::Parser::WithMeta::Lexer.new(source) } + let(:tokens) do + tokens = [] + while (token = lexer.next_token) + tokens << token + end + tokens + end + + it 'provides tokens via next_token' do # rubocop:disable RSpec/ExampleLength + type, (text, range) = tokens[3] + expect(type).to eq :tFUNCTION_CALL + expect(text).to eq :func + expect(range.to_range).to eq 11...16 + + expect(tokens.map(&:first)).to eq [ + '(', + :tNODE_TYPE, + :tPREDICATE, + :tFUNCTION_CALL, :tARG_LIST, :tSYMBOL, ')', + :tFUNCTION_CALL, + '(', :tNODE_TYPE, ')', + ')' + ] + end + + context 'with $type+' do + let(:source) { '(array sym $int+ x)' } + + it 'works' do + expect(tokens.map(&:last).map(&:first)).to eq \ + %i[( array sym $ int + x )] + end + end +end diff --git a/spec/rubocop/ast/node_pattern/parse_helper.rb b/spec/rubocop/ast/node_pattern/parse_helper.rb new file mode 100644 index 000000000..2adde05a2 --- /dev/null +++ b/spec/rubocop/ast/node_pattern/parse_helper.rb @@ -0,0 +1,329 @@ +# frozen_string_literal: true + +# Copied from Parser, some lines commented out with `# !!!` +module ParseHelper + include AST::Sexp + + # !!! require 'parser/all' + # !!! require 'parser/macruby' + # !!! require 'parser/rubymotion' + + ALL_VERSIONS = %w(1.8 1.9 2.0 2.1 2.2 2.3 2.4 2.5 2.6 2.7 2.8 mac ios) + + def setup + @diagnostics = [] + + super if defined?(super) + end + + def parser_for_ruby_version(version) + case version + when '1.8' then parser = Parser::Ruby18.new + when '1.9' then parser = Parser::Ruby19.new + when '2.0' then parser = Parser::Ruby20.new + when '2.1' then parser = Parser::Ruby21.new + when '2.2' then parser = Parser::Ruby22.new + when '2.3' then parser = Parser::Ruby23.new + when '2.4' then parser = Parser::Ruby24.new + when '2.5' then parser = Parser::Ruby25.new + when '2.6' then parser = Parser::Ruby26.new + when '2.7' then parser = Parser::Ruby27.new + when '2.8' then parser = Parser::Ruby28.new + when 'mac' then parser = Parser::MacRuby.new + when 'ios' then parser = Parser::RubyMotion.new + else raise "Unrecognized Ruby version #{version}" + end + + parser.diagnostics.consumer = lambda do |diagnostic| + @diagnostics << diagnostic + end + + parser + end + + def with_versions(versions) + (versions & ALL_VERSIONS).each do |version| + @diagnostics.clear + + parser = parser_for_ruby_version(version) + yield version, parser + end + end + + def assert_source_range(expect_range, range, version, what) + if expect_range == nil + # Avoid "Use assert_nil if expecting nil from .... This will fail in Minitest 6."" + assert_nil range, + "(#{version}) range of #{what}" + else + assert range.is_a?(Parser::Source::Range), + "(#{version}) #{range.inspect}.is_a?(Source::Range) for #{what}" + assert_equal expect_range, range.to_range, + "(#{version}) range of #{what}" + end + end + + # Use like this: + # ~~~ + # assert_parses( + # s(:send, s(:lit, 10), :+, s(:lit, 20)) + # %q{10 + 20}, + # %q{~~~~~~~ expression + # | ^ operator + # | ~~ expression (lit) + # }, + # %w(1.8 1.9) # optional + # ) + # ~~~ + def assert_parses(ast, code, source_maps='', versions=ALL_VERSIONS) + with_versions(versions) do |version, parser| + try_parsing(ast, code, parser, source_maps, version) + end + + # Also try parsing with lexer set to use UTF-32LE internally + with_versions(versions) do |version, parser| + parser.instance_eval { @lexer.force_utf32 = true } + try_parsing(ast, code, parser, source_maps, version) + end + end + + def try_parsing(ast, code, parser, source_maps, version) + source_file = Parser::Source::Buffer.new('(assert_parses)', source: code) + + begin + parsed_ast = parser.parse(source_file) + rescue => exc + backtrace = exc.backtrace + Exception.instance_method(:initialize).bind(exc). + call("(#{version}) #{exc.message}") + exc.set_backtrace(backtrace) + raise + end + + if ast.nil? + assert_nil parsed_ast, "(#{version}) AST equality" + return + end + + assert_equal ast, parsed_ast, + "(#{version}) AST equality" + + parse_source_map_descriptions(source_maps) do |range, map_field, ast_path, line| + + astlet = traverse_ast(parsed_ast, ast_path) + + if astlet.nil? + # This is a testsuite bug. + raise "No entity with AST path #{ast_path} in #{parsed_ast.inspect}" + end + + assert astlet.frozen? + + assert astlet.location.respond_to?(map_field), + "(#{version}) #{astlet.location.inspect}.respond_to?(#{map_field.inspect}) for:\n#{parsed_ast.inspect}" + + found_range = astlet.location.send(map_field) + + assert_source_range(range, found_range, version, line.inspect) + end + + # !!! assert parser.instance_eval { @lexer }.cmdarg.empty?, + # !!! "(#{version}) expected cmdarg to be empty after parsing" + + # !!! assert_equal 0, parser.instance_eval { @lexer.instance_eval { @paren_nest } }, + # !!! "(#{version}) expected paren_nest to be 0 after parsing" + end + + # Use like this: + # ~~~ + # assert_diagnoses( + # [:warning, :ambiguous_prefix, { prefix: '*' }], + # %q{foo *bar}, + # %q{ ^ location + # | ~~~ highlights (0)}) + # ~~~ + def assert_diagnoses(diagnostic, code, source_maps='', versions=ALL_VERSIONS) + with_versions(versions) do |version, parser| + source_file = Parser::Source::Buffer.new('(assert_diagnoses)', source: code) + + begin + parser = parser.parse(source_file) + rescue Parser::SyntaxError + # do nothing; the diagnostic was reported + end + + assert_equal 1, @diagnostics.count, + "(#{version}) emits a single diagnostic, not\n" \ + "#{@diagnostics.map(&:render).join("\n")}" + + emitted_diagnostic = @diagnostics.first + + level, reason, arguments = diagnostic + arguments ||= {} + message = Parser::Messages.compile(reason, arguments) + + assert_equal level, emitted_diagnostic.level + assert_equal reason, emitted_diagnostic.reason + assert_equal arguments, emitted_diagnostic.arguments + assert_equal message, emitted_diagnostic.message + + parse_source_map_descriptions(source_maps) do |range, map_field, ast_path, line| + + case map_field + when 'location' + assert_source_range range, + emitted_diagnostic.location, + version, 'location' + + when 'highlights' + index = ast_path.first.to_i + + assert_source_range range, + emitted_diagnostic.highlights[index], + version, "#{index}th highlight" + + else + raise "Unknown diagnostic range #{map_field}" + end + end + end + end + + # Use like this: + # ~~~ + # assert_diagnoses_many( + # [ + # [:warning, :ambiguous_literal], + # [:error, :unexpected_token, { :token => :tLCURLY }] + # ], + # %q{m /foo/ {}}, + # SINCE_2_4) + # ~~~ + def assert_diagnoses_many(diagnostics, code, versions=ALL_VERSIONS) + with_versions(versions) do |version, parser| + source_file = Parser::Source::Buffer.new('(assert_diagnoses_many)', source: code) + + begin + parser = parser.parse(source_file) + rescue Parser::SyntaxError + # do nothing; the diagnostic was reported + end + + assert_equal diagnostics.count, @diagnostics.count + + diagnostics.zip(@diagnostics) do |expected_diagnostic, actual_diagnostic| + level, reason, arguments = expected_diagnostic + arguments ||= {} + message = Parser::Messages.compile(reason, arguments) + + assert_equal level, actual_diagnostic.level + assert_equal reason, actual_diagnostic.reason + assert_equal arguments, actual_diagnostic.arguments + assert_equal message, actual_diagnostic.message + end + end + end + + def refute_diagnoses(code, versions=ALL_VERSIONS) + with_versions(versions) do |version, parser| + source_file = Parser::Source::Buffer.new('(refute_diagnoses)', source: code) + + begin + parser = parser.parse(source_file) + rescue Parser::SyntaxError + # do nothing; the diagnostic was reported + end + + assert_empty @diagnostics, + "(#{version}) emits no diagnostics, not\n" \ + "#{@diagnostics.map(&:render).join("\n")}" + end + end + + def assert_context(context, code, versions=ALL_VERSIONS) + with_versions(versions) do |version, parser| + source_file = Parser::Source::Buffer.new('(assert_context)', source: code) + + parsed_ast = parser.parse(source_file) + + nodes = find_matching_nodes(parsed_ast) { |node| node.type == :send && node.children[1] == :get_context } + assert_equal 1, nodes.count, "there must exactly 1 `get_context()` call" + + node = nodes.first + assert_equal context, node.context, "(#{version}) expect parsing context to match" + end + end + + SOURCE_MAP_DESCRIPTION_RE = + /(?x) + ^(?# $1 skip) ^(\s*) + (?# $2 highlight) ([~\^]+|\!) + \s+ + (?# $3 source_map_field) ([a-z_]+) + (?# $5 ast_path) (\s+\(([a-z_.\/0-9]+)\))? + $/ + + def parse_source_map_descriptions(descriptions) + unless block_given? + return to_enum(:parse_source_map_descriptions, descriptions) + end + + descriptions.each_line do |line| + # Remove leading " |", if it exists. + line = line.sub(/^\s*\|/, '').rstrip + + next if line.empty? + + if (match = SOURCE_MAP_DESCRIPTION_RE.match(line)) + if match[2] != '!' + begin_pos = match[1].length + end_pos = begin_pos + match[2].length + range = begin_pos...end_pos + end + source_map_field = match[3] + + if match[5] + ast_path = match[5].split('.') + else + ast_path = [] + end + + yield range, source_map_field, ast_path, line + else + raise "Cannot parse source map description line: #{line.inspect}." + end + end + end + + def traverse_ast(ast, path) + path.inject(ast) do |astlet, path_component| + # Split "dstr/2" to :dstr and 1 + type_str, index_str = path_component.split('/') + + type = type_str.to_sym + + if index_str.nil? + index = 0 + else + index = index_str.to_i - 1 + end + + matching_children = \ + astlet.children.select do |child| + AST::Node === child && child.type == type + end + + matching_children[index] + end + end + + def find_matching_nodes(ast, &block) + return [] unless ast.is_a?(AST::Node) + + result = [] + result << ast if block.call(ast) + ast.children.each { |child| result += find_matching_nodes(child, &block) } + + result + end +end diff --git a/spec/rubocop/ast/node_pattern/parser_spec.rb b/spec/rubocop/ast/node_pattern/parser_spec.rb new file mode 100644 index 000000000..28dc6d305 --- /dev/null +++ b/spec/rubocop/ast/node_pattern/parser_spec.rb @@ -0,0 +1,58 @@ +# frozen_string_literal: true + +require_relative 'helper' + +RSpec.describe RuboCop::AST::NodePattern::Parser do + include_context 'parser' + + describe 'sequences' do + it 'parses simple sequences properly' do + expect_parsing( + s(:sequence, s(:node_type, :int), s(:number, 42)), + '(int 42)', + '^ begin + | ^ end + |~~~~~~~~ expression + | ~~~ expression (node_type) + | ~~ expression (number)' + ) + end + + it 'parses capture vs repetition with correct priority ' do + s_int = s(:capture, s(:node_type, :int)) + s_str = s(:capture, s(:node_type, :str)) + expect_parsing( + s(:sequence, + s(:wildcard, '_'), + s(:repetition, s_int, :*), + s(:repetition, s(:sequence, s_str), :+)), + '(_ $int* ($str)+)', + '^ begin + | ^ end + |~~~~~~~~~~~~~~~~~ expression + | ~~~~~ expression (repetition) + | ^ operator (repetition) + | ^ operator (repetition.capture) + | ~~~ expression (repetition.capture.node_type)' + ) + end + + it 'parses function calls' do + expect_parsing( + s(:function_call, :func, s(:number, 1), s(:number, 2), s(:number, 3)), + '#func(1, 2, 3)', + ' ^ begin + | ^ end + |~~~~~ selector + | ^ expression (number)' + ) + end + + it 'generates specialized nodes' do + source_file = Parser::Source::Buffer.new('(spec)', source: '($_)') + ast = parser.parse(source_file) + expect(ast.class).to eq ::RuboCop::AST::NodePattern::Node::Sequence + expect(ast.child.class).to eq ::RuboCop::AST::NodePattern::Node::Capture + end + end +end diff --git a/spec/rubocop/ast/node_pattern_spec.rb b/spec/rubocop/ast/node_pattern_spec.rb index 3b4131efc..55e0d0205 100644 --- a/spec/rubocop/ast/node_pattern_spec.rb +++ b/spec/rubocop/ast/node_pattern_spec.rb @@ -1631,14 +1631,22 @@ def withargs(foo, bar, qux) context 'with an ellipsis inside and outside' do let(:pattern) { '(array <(str $_) (sym $_) ...> ...)' } + let(:captured_vals) { ['world', :hello] } - it_behaves_like 'invalid' + it_behaves_like 'multiple capture' end context 'doubled with ellipsis' do - let(:pattern) { '(array <(str $_) ...> <(str $_) ...>)' } + let(:pattern) { '(array <(sym $_) ...> <(int $_) ...>)' } + let(:captured_vals) { [:hello, 3] } - it_behaves_like 'invalid' + it_behaves_like 'multiple capture' + end + + context 'doubled with ellipsis in wrong order' do + let(:pattern) { '(array <(int $_) ...> <(sym $_) ...>)' } + + it_behaves_like 'nonmatching' end context 'nested' do @@ -1668,9 +1676,9 @@ def withargs(foo, bar, qux) end context 'with an ellipsis in the same sequence' do - let(:pattern) { "(array int #{symbol} ...)" } + let(:pattern) { "(array sym #{symbol} ...)" } - it_behaves_like 'invalid' + it_behaves_like 'matching' end end @@ -1876,9 +1884,10 @@ def withargs(foo, bar, qux) end context 'with doubled ellipsis' do + let(:ruby) { 'foo' } let(:pattern) { '(send ... ...)' } - it_behaves_like 'invalid' + it_behaves_like 'matching' # yet silly end context 'with doubled comma in arg list' do diff --git a/tasks/compile.rake b/tasks/compile.rake new file mode 100644 index 000000000..20d97f3a8 --- /dev/null +++ b/tasks/compile.rake @@ -0,0 +1,102 @@ +# frozen_string_literal: true + +Rake.application.rake_require 'oedipus_lex' + +# Patch gem, see https://github.com/seattlerb/oedipus_lex/pull/15 +class OedipusLex + remove_const :RE + RE = %r{(/(?:\\.|[^/])*/[ion]?)}.freeze +end + +def update_file(path) + content = File.read(path) + File.write(path, yield(content)) +end + +ENCODING_COMMENT = '# frozen_string_literal: true' +GENERATED_FILES = %w[ + lib/rubocop/ast/node_pattern/parser.racc.rb + lib/rubocop/ast/node_pattern/lexer.rex.rb +].freeze +desc 'Generate the lexer and parser files.' +task generate: %w[generate:lexer generate:parser] + +files = { + lexer: 'lib/rubocop/ast/node_pattern/lexer.rex.rb', + parser: 'lib/rubocop/ast/node_pattern/parser.racc.rb' +} + +CLEAN.include(files.values) +namespace :generate do + files.each do |kind, filename| + desc "Generate just the #{kind}" + task kind => filename do + update_file(filename) do |content| + content.prepend ENCODING_COMMENT, "\n" unless content.start_with?(ENCODING_COMMENT) + content.gsub 'module NodePattern', 'class NodePattern' + end + end + end +end + +rule '.racc.rb' => '.y' do |t| + cmd = "bundle exec racc -l -v -o #{t.name} #{t.source}" + sh cmd +end + +desc 'Compile pattern to Ruby code for debugging purposes' +task :compile do + require_relative '../lib/rubocop/ast' + if (pattern = ARGV[1]) + puts ::RuboCop::AST::NodePattern.new(pattern).compile_as_lambda + else + puts 'Usage:' + puts " rake compile '(send nil? :example...)'" + end + exit(0) +end + +desc 'Parse pattern to AST for debugging purposes' +task :parse do + require_relative '../lib/rubocop/ast' + if (pattern = ARGV[1]) + puts ::RuboCop::AST::NodePattern::Parser.new.parse(pattern) + else + puts 'Usage:' + puts " rake parse '(send nil? :example...)'" + end + exit(0) +end + +desc 'Tokens of pattern for debugging purposes' +task :tokenize do + require_relative '../lib/rubocop/ast' + if (pattern = ARGV[1]) + puts ::RuboCop::AST::NodePattern::Parser::WithMeta.new.tokenize(pattern).last + else + puts 'Usage:' + puts " rake parse '(send nil? :example...)'" + end + exit(0) +end + +desc 'Test pattern against ruby code' +task :test_pattern do + require_relative '../lib/rubocop/ast' + require 'parser/current' + + if (pattern = ARGV[1]) && (ruby = ARGV[2]) + require_relative '../lib/rubocop/ast/node_pattern/compiler/debug' + compiler = ::RuboCop::AST::NodePattern::Compiler::Debug.new + np = ::RuboCop::AST::NodePattern.new(pattern, compiler: compiler) + builder = ::RuboCop::AST::Builder.new + buffer = ::Parser::Source::Buffer.new('(ruby)', source: ruby) + ruby_ast = ::Parser::CurrentRuby.new(builder).parse(buffer) + np.as_lambda.call(ruby_ast, trace: compiler.trace) + puts ::RuboCop::AST::NodePattern::Compiler::Debug::Colorizer.new(compiler).colorize(np.ast) + else + puts 'Usage:' + puts " rake test-pattern '(send nil? :example...)' 'example(42)'" + end + exit(0) +end