Skip to content

Commit

Permalink
Merge pull request #11562 from rwstauner/rwstauner/duplicate-regexp-c…
Browse files Browse the repository at this point in the history
…har-octals

Fix handling of escaped octals in duplicate regexp cop
  • Loading branch information
koic committed Jun 21, 2023
2 parents d9b431b + 4c11956 commit 9c9c04a
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 19 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
* [#11562](https://github.com/rubocop/rubocop/pull/11562): Fixed escaped octal handling and detection in `Lint/DuplicateRegexpCharacterClassElement`. ([@rwstauner][])
65 changes: 46 additions & 19 deletions lib/rubocop/cop/lint/duplicate_regexp_character_class_element.rb
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ class DuplicateRegexpCharacterClassElement < Base

MSG_REPEATED_ELEMENT = 'Duplicate element inside regexp character class'

OCTAL_DIGITS_AFTER_ESCAPE = 2

def on_regexp(node)
each_repeated_character_class_element_loc(node) do |loc|
add_offense(loc, message: MSG_REPEATED_ELEMENT) do |corrector|
Expand All @@ -32,35 +34,57 @@ def on_regexp(node)
end
end

# rubocop:disable Metrics/AbcSize, Metrics/MethodLength
def each_repeated_character_class_element_loc(node)
node.parsed_tree&.each_expression do |expr|
next if skip_expression?(expr)

seen = Set.new
enum = expr.expressions.to_enum
expression_count = expr.expressions.count
group_expressions(node, expr.expressions) do |group|
group_source = group.map(&:to_s).join

expression_count.times do |current_number|
current_child = enum.next
next if within_interpolation?(node, current_child)
yield source_range(group) if seen.include?(group_source)

current_child_source = current_child.to_s
next_child = enum.peek if current_number + 1 < expression_count
seen << group_source
end
end
end

if seen.include?(current_child_source)
next if start_with_escaped_zero_number?(current_child_source, next_child.to_s)
private

yield current_child.expression
end
def group_expressions(node, expressions)
# Create a mutable list to simplify state tracking while we iterate.
expressions = expressions.to_a

seen << current_child_source
end
until expressions.empty?
# With we may need to compose a group of multiple expressions.
group = [expressions.shift]
next if within_interpolation?(node, group.first)

# With regexp_parser < 2.7 escaped octal sequences may be up to 3
# separate expressions ("\\0", "0", "1").
pop_octal_digits(group, expressions) if escaped_octal?(group.first.to_s)

yield(group)
end
end
# rubocop:enable Metrics/AbcSize, Metrics/MethodLength

private
def pop_octal_digits(current_child, expressions)
OCTAL_DIGITS_AFTER_ESCAPE.times do
next_child = expressions.first
break unless octal?(next_child.to_s)

current_child << expressions.shift
end
end

def source_range(children)
return children.first.expression if children.size == 1

range_between(
children.first.expression.begin_pos,
children.last.expression.begin_pos + children.last.to_s.length
)
end

def skip_expression?(expr)
expr.type != :set || expr.token == :intersection
Expand All @@ -75,9 +99,12 @@ def within_interpolation?(node, child)
interpolation_locs(node).any? { |il| il.overlaps?(parse_tree_child_loc) }
end

def start_with_escaped_zero_number?(current_child, next_child)
# Represents escaped code from `"\00"` (`"\u0000"`) to `"\07"` (`"\a"`).
current_child == '\\0' && next_child.match?(/[0-7]/)
def escaped_octal?(string)
string.length == 2 && string[0] == '\\' && octal?(string[1])
end

def octal?(char)
('0'..'7').cover?(char)
end

def interpolation_locs(node)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,32 @@
end
end

context 'with repeated character class elements when `"\07\01\078"` (means `"\u0007\u0001\u00078"`)' do
it 'registers an offense' do
expect_offense(<<~'RUBY')
/[\07\01\078]/
^^^ Duplicate element inside regexp character class
RUBY

expect_correction(<<~'RUBY')
/[\07\018]/
RUBY
end
end

context 'with repeated character class elements when `"\177\01\1778"` (means `"\u007f\u0001\u007f8"`)' do
it 'registers an offense' do
expect_offense(<<~'RUBY')
/[\177\01\1778]/
^^^^ Duplicate element inside regexp character class
RUBY

expect_correction(<<~'RUBY')
/[\177\018]/
RUBY
end
end

context 'with a repeated character class element and %r{} literal' do
it 'registers an offense and corrects' do
expect_offense(<<~RUBY)
Expand Down

0 comments on commit 9c9c04a

Please sign in to comment.