whitequark · iliabylich · Jul 12, 2021 · Jul 10, 2021 · pocke · Jul 10, 2021
diff --git a/lib/parser/lexer.rl b/lib/parser/lexer.rl
@@ -518,7 +518,8 @@ class Parser::Lexer
   c_nl_zlen  = c_nl | zlen;
   c_line     = any - c_nl_zlen;
 
-  c_unicode  = c_any - 0x00..0x7f;
+  c_ascii    = 0x00..0x7f;
+  c_unicode  = c_any - c_ascii;
   c_upper    = [A-Z];
   c_lower    = [a-z_]  | c_unicode;
   c_alpha    = c_lower | c_upper;
@@ -1406,7 +1407,7 @@ class Parser::Lexer
       ':'
       => { fhold; fgoto expr_beg; };
 
-      '%s' c_any
+      '%s' (c_ascii - [A-Za-z0-9])
       => {
         if version?(23)
           type, delimiter = tok[0..-2], tok[-1].chr
@@ -1758,14 +1759,14 @@ class Parser::Lexer
       };
 
       # %<string>
-      '%' ( any - [A-Za-z] )
+      '%' ( c_ascii - [A-Za-z0-9] )
       => {
         type, delimiter = @source_buffer.slice(@ts).chr, tok[-1].chr
         fgoto *push_literal(type, delimiter, @ts);
       };
 
       # %w(we are the people)
-      '%' [A-Za-z]+ c_any
+      '%' [A-Za-z] (c_ascii - [A-Za-z0-9])
       => {
         type, delimiter = tok[0..-2], tok[-1].chr
         fgoto *push_literal(type, delimiter, @ts);

diff --git a/test/test_lexer.rb b/test/test_lexer.rb
@@ -2246,6 +2246,25 @@ def test_string_pct_pct
                    :tSTRING_END,     '%',    [6, 7])
   end
 
+  def test_string_pct_null
+    assert_scanned("%\0blah\0",
+                   :tSTRING_BEG,     "%\0",  [0, 2],
+                   :tSTRING_CONTENT, "blah", [2, 6],
+                   :tSTRING_END,     "\0",    [6, 7])
+  end
+
+  def test_string_pct_non_ascii
+    refute_scanned("%★foo★")
+  end
+
+  def test_string_pct_alphabet
+    refute_scanned("%AfooA")
+  end
+
+  def test_string_pct_number
+    refute_scanned("%1foo1")
+  end
+
   def test_string_pct_w
     assert_scanned("%w[s1 s2 ]",
                    :tQWORDS_BEG,     "%w[", [0, 3],
@@ -2293,6 +2312,26 @@ def test_string_pct_w_tab
                    :tSTRING_END,     ']',   [10, 11])
   end
 
+  def test_string_pct_w_null
+    assert_scanned("%w\0abc\0",
+                   :tQWORDS_BEG,     "%w\0", [0, 3],
+                   :tSTRING_CONTENT, "abc",  [3, 6],
+                   :tSPACE,          nil,    [6, 6],
+                   :tSTRING_END,     "\0",   [6, 7])
+  end
+
+  def test_string_pct_w_non_ascii
+    refute_scanned("%w★foo★")
+  end
+
+  def test_string_pct_w_alphabet
+    refute_scanned("%wAfooA")
+  end
+
+  def test_string_pct_w_number
+    refute_scanned("%w1foo1")
+  end
+
   def test_string_pct_i
     assert_scanned("%i(s1 s2)",
                    :tQSYMBOLS_BEG,   "%i(", [0, 3],