From 1d36045798b2c068fd36081a6d88b1f0900911dc Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Janosch=20Mu=CC=88ller?= <janosch84@gmail.com>
Date: Mon, 28 Sep 2020 17:06:35 +0200
Subject: [PATCH] Fix scanning of comment-like text in normal mode ...

Issue #70.

The comment scanner was to greedy. In a way this bug always existed.

Comment-like patterns with a specific shape have always been scanned incorrectly in normal mode, e.g.

```ruby
/foo # \d
/
```

This was just very rare. Prior to the fix of issue #66 via PR #67, the comment scanner only fired for a limited, incomplete subset of valid comments like the one above. With the broadening of the scanner, this bug became much easier to hit upon.
---
 lib/regexp_parser/scanner/scanner.rl |  4 +++-
 spec/parser/free_space_spec.rb       | 29 ++++---------------------
 spec/scanner/free_space_spec.rb      | 32 ++++++++++++++++++++++++++++
 3 files changed, 39 insertions(+), 26 deletions(-)

diff --git a/lib/regexp_parser/scanner/scanner.rl b/lib/regexp_parser/scanner/scanner.rl
index aef48561..4377ad5e 100644
--- a/lib/regexp_parser/scanner/scanner.rl
+++ b/lib/regexp_parser/scanner/scanner.rl
@@ -649,7 +649,9 @@
       if free_spacing
         emit(:free_space, :comment, *text(data, ts, te))
       else
-        append_literal(data, ts, te)
+        # consume only the pound sign (#) and backtrack to do regular scanning
+        append_literal(data, ts, ts + 1)
+        fexec ts + 1;
       end
     };
 
diff --git a/spec/parser/free_space_spec.rb b/spec/parser/free_space_spec.rb
index c3eb772a..e6841565 100644
--- a/spec/parser/free_space_spec.rb
+++ b/spec/parser/free_space_spec.rb
@@ -24,34 +24,13 @@
     expect(root.first.text).to eq 'a b c d'
   end
 
-  specify('parse single-line free space comments without spaces') do
-    regexp = /a#b/x
-
-    root = RP.parse(regexp)
-    expect(root.length).to eq 2
-
-    expect(root[0]).to be_instance_of(Literal)
-    expect(root[1]).to be_instance_of(Comment)
-  end
-
-  specify('parse single-line free space comments with spaces') do
-    regexp = /a # b/x
-
-    root = RP.parse(regexp)
-    expect(root.length).to eq 3
-
-    expect(root[0]).to be_instance_of(Literal)
-    expect(root[1]).to be_instance_of(WhiteSpace)
-    expect(root[2]).to be_instance_of(Comment)
-  end
-
   specify('parse free space comments') do
     regexp = /
       a   ?     # One letter
       b {2,5}   # Another one
       [c-g]  +  # A set
       (h|i|j) | # A group
-      klm#nospace before or after comment hash
+      klm *
       nop +
     /x
 
@@ -72,11 +51,11 @@
 
     alt_2 = alt.alternatives.last
     expect(alt_2).to be_instance_of(Alternative)
-    expect(alt_2.length).to eq 8
+    expect(alt_2.length).to eq 7
 
-    [0, 2, 5, 7].each { |i| expect(alt_2[i].class).to eq WhiteSpace }
+    [0, 2, 4, 6].each { |i| expect(alt_2[i].class).to eq WhiteSpace }
 
-    [1, 4].each { |i| expect(alt_2[i]).to be_instance_of(Comment) }
+    expect(alt_2[1]).to be_instance_of(Comment)
   end
 
   specify('parse free space nested comments') do
diff --git a/spec/scanner/free_space_spec.rb b/spec/scanner/free_space_spec.rb
index 41c8ec7b..76ff14a1 100644
--- a/spec/scanner/free_space_spec.rb
+++ b/spec/scanner/free_space_spec.rb
@@ -39,6 +39,17 @@
       11 => [:free_space, :comment,  "# B ? comment\n",      37,  51],
       17 => [:free_space, :comment,  "# C {2,3} comment\n",  66,  84],
       29 => [:free_space, :comment,  "# D|E comment\n",     100, 114]
+
+    # single line / no trailing newline (c.f. issue #66)
+    include_examples 'scan', /a # b/x,
+      0 => [:literal,    :literal,    'a',   0,  1],
+      1 => [:free_space, :whitespace, ' ',   1,  2],
+      2 => [:free_space, :comment,    "# b", 2,  5]
+
+    # without spaces (c.f. issue #66)
+    include_examples 'scan', /a#b/x,
+      0 => [:literal,    :literal,  'a',   0,  1],
+      1 => [:free_space, :comment,  "#b", 1,  3]
   end
 
   describe('scan free space inlined') do
@@ -130,4 +141,25 @@
       26 => [:literal,    :literal,        'i j',   35, 38],
       27 => [:group,      :close,          ')',     38, 39]
   end
+
+  describe('scanning `#` in regular (non-x mode)') do # c.f. issue 70
+    include_examples 'scan', /a#bcd/,
+      0 => [:literal, :literal, 'a#bcd', 0, 5]
+    include_examples 'scan', /a # bcd/,
+      0 => [:literal, :literal, 'a # bcd', 0, 7]
+
+    include_examples 'scan', /a#\d/,
+      0 => [:literal, :literal, 'a#', 0, 2],
+      1 => [:type,    :digit,   '\d', 2, 4]
+    include_examples 'scan', /a # \d/,
+      0 => [:literal, :literal, 'a # ', 0, 4],
+      1 => [:type,    :digit,   '\d',   4, 6]
+
+    include_examples 'scan', /a#()/,
+      0 => [:literal, :literal, 'a#', 0, 2],
+      1 => [:group,   :capture, '(',  2, 3]
+    include_examples 'scan', /a # ()/,
+      0 => [:literal, :literal, 'a # ', 0, 4],
+      1 => [:group,   :capture, '(',    4, 5]
+  end
 end