From a48a8f6068217bc02557f9780681f11377fdbee8 Mon Sep 17 00:00:00 2001 From: Masataka Pocke Kuwabara Date: Mon, 12 Jul 2021 17:59:19 +0900 Subject: [PATCH] - lexer.rl: fix incompatible delimiters on percent literal (#808) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CRuby only accepts ASCII characters except `[A-Za-z0-9]` as a delimiter of percent literal, but the lexer accepts different characters. For exmaple: * CRuby accepts `%w^Dfoo^D`, but parser didn't (note: `^D` means 0x04) * CRuby reject `%w1foo1`, but parser accepts * CRuby reject `%w★foo★`, but parser accepts This patch fixes the problems. --- lib/parser/lexer.rl | 9 +++++---- test/test_lexer.rb | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 44 insertions(+), 4 deletions(-) diff --git a/lib/parser/lexer.rl b/lib/parser/lexer.rl index 8574f95cc..718c1d8bf 100644 --- a/lib/parser/lexer.rl +++ b/lib/parser/lexer.rl @@ -518,7 +518,8 @@ class Parser::Lexer c_nl_zlen = c_nl | zlen; c_line = any - c_nl_zlen; - c_unicode = c_any - 0x00..0x7f; + c_ascii = 0x00..0x7f; + c_unicode = c_any - c_ascii; c_upper = [A-Z]; c_lower = [a-z_] | c_unicode; c_alpha = c_lower | c_upper; @@ -1406,7 +1407,7 @@ class Parser::Lexer ':' => { fhold; fgoto expr_beg; }; - '%s' c_any + '%s' (c_ascii - [A-Za-z0-9]) => { if version?(23) type, delimiter = tok[0..-2], tok[-1].chr @@ -1758,14 +1759,14 @@ class Parser::Lexer }; # % - '%' ( any - [A-Za-z] ) + '%' ( c_ascii - [A-Za-z0-9] ) => { type, delimiter = @source_buffer.slice(@ts).chr, tok[-1].chr fgoto *push_literal(type, delimiter, @ts); }; # %w(we are the people) - '%' [A-Za-z]+ c_any + '%' [A-Za-z] (c_ascii - [A-Za-z0-9]) => { type, delimiter = tok[0..-2], tok[-1].chr fgoto *push_literal(type, delimiter, @ts); diff --git a/test/test_lexer.rb b/test/test_lexer.rb index 9074ccc27..192247718 100644 --- a/test/test_lexer.rb +++ b/test/test_lexer.rb @@ -2246,6 +2246,25 @@ def test_string_pct_pct :tSTRING_END, '%', [6, 7]) end + def test_string_pct_null + assert_scanned("%\0blah\0", + :tSTRING_BEG, "%\0", [0, 2], + :tSTRING_CONTENT, "blah", [2, 6], + :tSTRING_END, "\0", [6, 7]) + end + + def test_string_pct_non_ascii + refute_scanned("%★foo★") + end + + def test_string_pct_alphabet + refute_scanned("%AfooA") + end + + def test_string_pct_number + refute_scanned("%1foo1") + end + def test_string_pct_w assert_scanned("%w[s1 s2 ]", :tQWORDS_BEG, "%w[", [0, 3], @@ -2293,6 +2312,26 @@ def test_string_pct_w_tab :tSTRING_END, ']', [10, 11]) end + def test_string_pct_w_null + assert_scanned("%w\0abc\0", + :tQWORDS_BEG, "%w\0", [0, 3], + :tSTRING_CONTENT, "abc", [3, 6], + :tSPACE, nil, [6, 6], + :tSTRING_END, "\0", [6, 7]) + end + + def test_string_pct_w_non_ascii + refute_scanned("%w★foo★") + end + + def test_string_pct_w_alphabet + refute_scanned("%wAfooA") + end + + def test_string_pct_w_number + refute_scanned("%w1foo1") + end + def test_string_pct_i assert_scanned("%i(s1 s2)", :tQSYMBOLS_BEG, "%i(", [0, 3],