diff --git a/changelog/new_add_regexpregexpexpressionloc_and.md b/changelog/new_add_regexpregexpexpressionloc_and.md new file mode 100644 index 00000000000..7dea865a6b1 --- /dev/null +++ b/changelog/new_add_regexpregexpexpressionloc_and.md @@ -0,0 +1 @@ +* [#8960](https://github.com/rubocop-hq/rubocop/pull/8960): Add `Regexp::Expression#loc` and `#expression` to replace `parsed_tree_expr_loc`. ([@marcandre][]) diff --git a/lib/rubocop.rb b/lib/rubocop.rb index daddae89079..fe414eaafe4 100644 --- a/lib/rubocop.rb +++ b/lib/rubocop.rb @@ -15,6 +15,7 @@ require_relative 'rubocop/ast_aliases' require_relative 'rubocop/ext/regexp_node' +require_relative 'rubocop/ext/regexp_parser' require_relative 'rubocop/core_ext/string' require_relative 'rubocop/ext/processed_source' diff --git a/lib/rubocop/cop/lint/duplicate_regexp_character_class_element.rb b/lib/rubocop/cop/lint/duplicate_regexp_character_class_element.rb index 3358af83fad..0b27bb2b774 100644 --- a/lib/rubocop/cop/lint/duplicate_regexp_character_class_element.rb +++ b/lib/rubocop/cop/lint/duplicate_regexp_character_class_element.rb @@ -43,7 +43,7 @@ def each_repeated_character_class_element_loc(node) child_source = child.to_s - yield node.parsed_tree_expr_loc(child) if seen.include?(child_source) + yield child.expression if seen.include?(child_source) seen << child_source end @@ -56,7 +56,7 @@ def each_repeated_character_class_element_loc(node) # mark every space (except the first) as duplicate if we do not skip regexp_parser nodes # that are within an interpolation. def within_interpolation?(node, child) - parse_tree_child_loc = node.parsed_tree_expr_loc(child) + parse_tree_child_loc = child.expression interpolation_locs(node).any? { |il| il.overlaps?(parse_tree_child_loc) } end diff --git a/lib/rubocop/ext/regexp_node.rb b/lib/rubocop/ext/regexp_node.rb index 2318a0bcfda..adbf67437f5 100644 --- a/lib/rubocop/ext/regexp_node.rb +++ b/lib/rubocop/ext/regexp_node.rb @@ -10,19 +10,22 @@ def ANY.==(_) end private_constant :ANY - class << self - attr_reader :parsed_cache - end - @parsed_cache = {} - # @return [Regexp::Expression::Root, nil] - def parsed_tree + # Note: we extend Regexp nodes to provide `loc` and `expression` + # see `ext/regexp_parser`. + attr_reader :parsed_tree + + def assign_properties(*) + super + str = with_interpolations_blanked - Ext::RegexpNode.parsed_cache[str] ||= begin + @parsed_tree = begin Regexp::Parser.parse(str, options: options) rescue StandardError nil end + origin = loc.begin.end + @parsed_tree&.each_expression(true) { |e| e.origin = origin } end def each_capture(named: ANY) @@ -38,11 +41,6 @@ def each_capture(named: ANY) self end - # @return [Parser::Source::Range] the range of the parse-tree expression - def parsed_tree_expr_loc(expr) - loc.begin.end.adjust(begin_pos: expr.ts, end_pos: expr.ts).resize(expr.full_length) - end - private def with_interpolations_blanked diff --git a/lib/rubocop/ext/regexp_parser.rb b/lib/rubocop/ext/regexp_parser.rb new file mode 100644 index 00000000000..87df5f1832c --- /dev/null +++ b/lib/rubocop/ext/regexp_parser.rb @@ -0,0 +1,77 @@ +# frozen_string_literal: true + +module RuboCop + module Ext + # Extensions for `regexp_parser` gem + module RegexpParser + # Source map for RegexpParser nodes + class Map < ::Parser::Source::Map + attr_reader :body, :quantifier, :begin, :end + + def initialize(expression, body:, quantifier: nil, begin_l: nil, end_l: nil) + @begin = begin_l + @end = end_l + @body = body + @quantifier = quantifier + super(expression) + end + end + + module Expression + # Add `expression` and `loc` to all `regexp_parser` nodes + module Base + attr_accessor :origin + + # Shortcut to `loc.expression` + def expression + @expression ||= origin.adjust(begin_pos: ts, end_pos: ts + full_length) + end + + # @returns a location map like `parser` does, with: + # - expression: complete expression + # - quantifier: for `+`, `{1,2}`, etc. + # - begin/end: for `[` and `]` (only CharacterSet for now) + # + # E.g. + # [a-z]{2,} + # ^^^^^^^^^ expression + # ^^^^ quantifier + # ^^^^^ body + # ^ begin + # ^ end + # + # Please open issue if you need other locations + def loc + @loc ||= begin + Map.new(expression, **build_location) + end + end + + private + + def build_location + return { body: expression } unless (q = quantifier) + + body = expression.adjust(end_pos: -q.text.length) + q_loc = expression.with(begin_pos: body.end_pos) + { body: body, quantifier: q_loc } + end + end + + # Provide `CharacterSet` with `begin` and `end` locations. + module CharacterSet + def build_location + h = super + body = h[:body] + h.merge!( + begin_l: body.with(end_pos: body.begin_pos + 1), + end_l: body.with(begin_pos: body.end_pos - 1) + ) + end + end + end + ::Regexp::Expression::Base.include Expression::Base + ::Regexp::Expression::CharacterSet.include Expression::CharacterSet + end + end +end diff --git a/spec/rubocop/ext/regexp_node_spec.rb b/spec/rubocop/ext/regexp_node_spec.rb index fe4eb11856e..adf2ae0a600 100644 --- a/spec/rubocop/ext/regexp_node_spec.rb +++ b/spec/rubocop/ext/regexp_node_spec.rb @@ -96,29 +96,23 @@ expect(tree.to_s).to eq('foobaz') end end - end - describe '#parsed_node_loc' do - let(:source) { '/([a-z]+)\d*\s?(?:foo)/' } + context 'with a regexp with subexpressions' do + let(:source) { '/([a-z]+)\d*\s?(?:foo)/' } - it 'returns the correct loc for each node in the parsed_tree' do - loc_sources = node.parsed_tree.each_expression.map do |regexp_node| - node.parsed_tree_expr_loc(regexp_node).source - end + it 'has location information' do + nodes = node.parsed_tree.each_expression.map { |exp, _index| exp } + + sources = nodes.map { |n| n.loc.expression.source } - expect(loc_sources).to eq( - [ - '([a-z]+)', - '[a-z]+', - 'a-z', - 'a', - 'z', - '\d*', - '\s?', - '(?:foo)', - 'foo' - ] - ) + expect(sources).to eq %w{ + ([a-z]+) [a-z]+ a-z a z \d* \s? (?:foo) foo + } + + loc = nodes[1].loc + delim = loc.begin, loc.body, loc.end, loc.quantifier + expect(delim.map(&:source)).to eq %w{[ [a-z] ] +} + end end end end