From 1d36045798b2c068fd36081a6d88b1f0900911dc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Janosch=20Mu=CC=88ller?= Date: Mon, 28 Sep 2020 17:06:35 +0200 Subject: [PATCH] Fix scanning of comment-like text in normal mode ... Issue #70. The comment scanner was to greedy. In a way this bug always existed. Comment-like patterns with a specific shape have always been scanned incorrectly in normal mode, e.g. ```ruby /foo # \d / ``` This was just very rare. Prior to the fix of issue #66 via PR #67, the comment scanner only fired for a limited, incomplete subset of valid comments like the one above. With the broadening of the scanner, this bug became much easier to hit upon. --- lib/regexp_parser/scanner/scanner.rl | 4 +++- spec/parser/free_space_spec.rb | 29 ++++--------------------- spec/scanner/free_space_spec.rb | 32 ++++++++++++++++++++++++++++ 3 files changed, 39 insertions(+), 26 deletions(-) diff --git a/lib/regexp_parser/scanner/scanner.rl b/lib/regexp_parser/scanner/scanner.rl index aef48561..4377ad5e 100644 --- a/lib/regexp_parser/scanner/scanner.rl +++ b/lib/regexp_parser/scanner/scanner.rl @@ -649,7 +649,9 @@ if free_spacing emit(:free_space, :comment, *text(data, ts, te)) else - append_literal(data, ts, te) + # consume only the pound sign (#) and backtrack to do regular scanning + append_literal(data, ts, ts + 1) + fexec ts + 1; end }; diff --git a/spec/parser/free_space_spec.rb b/spec/parser/free_space_spec.rb index c3eb772a..e6841565 100644 --- a/spec/parser/free_space_spec.rb +++ b/spec/parser/free_space_spec.rb @@ -24,34 +24,13 @@ expect(root.first.text).to eq 'a b c d' end - specify('parse single-line free space comments without spaces') do - regexp = /a#b/x - - root = RP.parse(regexp) - expect(root.length).to eq 2 - - expect(root[0]).to be_instance_of(Literal) - expect(root[1]).to be_instance_of(Comment) - end - - specify('parse single-line free space comments with spaces') do - regexp = /a # b/x - - root = RP.parse(regexp) - expect(root.length).to eq 3 - - expect(root[0]).to be_instance_of(Literal) - expect(root[1]).to be_instance_of(WhiteSpace) - expect(root[2]).to be_instance_of(Comment) - end - specify('parse free space comments') do regexp = / a ? # One letter b {2,5} # Another one [c-g] + # A set (h|i|j) | # A group - klm#nospace before or after comment hash + klm * nop + /x @@ -72,11 +51,11 @@ alt_2 = alt.alternatives.last expect(alt_2).to be_instance_of(Alternative) - expect(alt_2.length).to eq 8 + expect(alt_2.length).to eq 7 - [0, 2, 5, 7].each { |i| expect(alt_2[i].class).to eq WhiteSpace } + [0, 2, 4, 6].each { |i| expect(alt_2[i].class).to eq WhiteSpace } - [1, 4].each { |i| expect(alt_2[i]).to be_instance_of(Comment) } + expect(alt_2[1]).to be_instance_of(Comment) end specify('parse free space nested comments') do diff --git a/spec/scanner/free_space_spec.rb b/spec/scanner/free_space_spec.rb index 41c8ec7b..76ff14a1 100644 --- a/spec/scanner/free_space_spec.rb +++ b/spec/scanner/free_space_spec.rb @@ -39,6 +39,17 @@ 11 => [:free_space, :comment, "# B ? comment\n", 37, 51], 17 => [:free_space, :comment, "# C {2,3} comment\n", 66, 84], 29 => [:free_space, :comment, "# D|E comment\n", 100, 114] + + # single line / no trailing newline (c.f. issue #66) + include_examples 'scan', /a # b/x, + 0 => [:literal, :literal, 'a', 0, 1], + 1 => [:free_space, :whitespace, ' ', 1, 2], + 2 => [:free_space, :comment, "# b", 2, 5] + + # without spaces (c.f. issue #66) + include_examples 'scan', /a#b/x, + 0 => [:literal, :literal, 'a', 0, 1], + 1 => [:free_space, :comment, "#b", 1, 3] end describe('scan free space inlined') do @@ -130,4 +141,25 @@ 26 => [:literal, :literal, 'i j', 35, 38], 27 => [:group, :close, ')', 38, 39] end + + describe('scanning `#` in regular (non-x mode)') do # c.f. issue 70 + include_examples 'scan', /a#bcd/, + 0 => [:literal, :literal, 'a#bcd', 0, 5] + include_examples 'scan', /a # bcd/, + 0 => [:literal, :literal, 'a # bcd', 0, 7] + + include_examples 'scan', /a#\d/, + 0 => [:literal, :literal, 'a#', 0, 2], + 1 => [:type, :digit, '\d', 2, 4] + include_examples 'scan', /a # \d/, + 0 => [:literal, :literal, 'a # ', 0, 4], + 1 => [:type, :digit, '\d', 4, 6] + + include_examples 'scan', /a#()/, + 0 => [:literal, :literal, 'a#', 0, 2], + 1 => [:group, :capture, '(', 2, 3] + include_examples 'scan', /a # ()/, + 0 => [:literal, :literal, 'a # ', 0, 4], + 1 => [:group, :capture, '(', 4, 5] + end end