Skip to content

Commit

Permalink
+ lexer.rl: parse meta-control-hex chars in regexes starting from 3.1 (
Browse files Browse the repository at this point in the history
…#828)

This commit tracks upstream commit ruby/ruby@11ae581.
  • Loading branch information
iliabylich committed Nov 19, 2021
1 parent 547d731 commit 24d2f68
Show file tree
Hide file tree
Showing 3 changed files with 76 additions and 3 deletions.
4 changes: 3 additions & 1 deletion lib/parser/lexer.rl
Expand Up @@ -738,12 +738,14 @@ class Parser::Lexer

maybe_escaped_char = (
'\\' c_any %unescape_char
| '\\x' xdigit{1,2} % { @escape = encode_escape(tok(p - 2, p).to_i(16)) } %slash_c_char
| ( c_any - [\\] ) %read_post_meta_or_ctrl_char
);

maybe_escaped_ctrl_char = ( # why?!
'\\' c_any %unescape_char %slash_c_char
| '?' % { @escape = "\x7f" }
| '\\x' xdigit{1,2} % { @escape = encode_escape(tok(p - 2, p).to_i(16)) } %slash_c_char
| ( c_any - [\\?] ) %read_post_meta_or_ctrl_char %slash_c_char
);

Expand Down Expand Up @@ -935,7 +937,7 @@ class Parser::Lexer
# b"
# must be parsed as "ab"
current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
elsif current_literal.regexp?
elsif current_literal.regexp? && @version < 31
# Regular expressions should include escape sequences in their
# escaped form. On the other hand, escaped newlines are removed (in cases like "\\C-\\\n\\M-x")
current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
Expand Down
25 changes: 25 additions & 0 deletions test/test_lexer.rb
Expand Up @@ -3719,4 +3719,29 @@ def test_meta_escape_slash_u__after_30
refute_scanned_meta_escape_slash_u('"\M-\u0000"')
refute_scanned_meta_escape_slash_u('"\M-\U0000"')
end

def test_meta_control_hex_escaped_char
setup_lexer(19)

assert_scanned("\"\\c\\xFF\"",
:tSTRING, "\x9F", [0, 8])

assert_scanned("\"\\c\\M-\\xFF\"",
:tSTRING, "\x9F", [0, 11])

assert_scanned("\"\\C-\\xFF\"",
:tSTRING, "\x9F", [0, 9])

assert_scanned("\"\\C-\\M-\\xFF\"",
:tSTRING, "\x9F", [0, 12])

assert_scanned("\"\\M-\\xFF\"",
:tSTRING, "\x9F", [0, 9])

assert_scanned("\"\\M-\\C-\\xFF\"",
:tSTRING, "\x9F", [0, 12])

assert_scanned("\"\\M-\\c\\xFF\"",
:tSTRING, "\x9F", [0, 11])
end
end
50 changes: 48 additions & 2 deletions test/test_parser.rb
Expand Up @@ -5608,7 +5608,7 @@ def test_regexp_encoding
s(:str, "")),
%q{/\xa8/n =~ ""}.dup.force_encoding(Encoding::UTF_8),
%{},
SINCE_1_9)
SINCE_3_1 - SINCE_1_9)
end

#
Expand Down Expand Up @@ -6513,7 +6513,7 @@ def test_parser_bug_198
s(:str, "#")),
%q{[/()\\1/, ?#]},
%q{},
SINCE_1_9)
SINCE_3_1 - SINCE_1_9)
end

def test_parser_bug_272
Expand Down Expand Up @@ -10672,4 +10672,50 @@ def test_warn_on_duplicate_hash_key
%q{ ~~~~~ location},
SINCE_3_1)
end

def test_control_meta_escape_chars_in_regexp
x9f = "\x9F".dup.force_encoding('ascii-8bit')

assert_parses(
s(:regexp, s(:str, x9f), s(:regopt)),
%q{/\c\xFF/}.dup.force_encoding('ascii-8bit'),
%q{},
SINCE_3_1)

assert_parses(
s(:regexp, s(:str, x9f), s(:regopt)),
%q{/\c\M-\xFF/}.dup.force_encoding('ascii-8bit'),
%q{},
SINCE_3_1)

assert_parses(
s(:regexp, s(:str, x9f), s(:regopt)),
%q{/\C-\xFF/}.dup.force_encoding('ascii-8bit'),
%q{},
SINCE_3_1)

assert_parses(
s(:regexp, s(:str, x9f), s(:regopt)),
%q{/\C-\M-\xFF/}.dup.force_encoding('ascii-8bit'),
%q{},
SINCE_3_1)

assert_parses(
s(:regexp, s(:str, x9f), s(:regopt)),
%q{/\M-\xFF/}.dup.force_encoding('ascii-8bit'),
%q{},
SINCE_3_1)

assert_parses(
s(:regexp, s(:str, x9f), s(:regopt)),
%q{/\M-\C-\xFF/}.dup.force_encoding('ascii-8bit'),
%q{},
SINCE_3_1)

assert_parses(
s(:regexp, s(:str, x9f), s(:regopt)),
%q{/\M-\c\xFF/}.dup.force_encoding('ascii-8bit'),
%q{},
SINCE_3_1)
end
end

0 comments on commit 24d2f68

Please sign in to comment.