Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

+ lexer.rl: parse meta-control-hex chars in regexes starting from 3.1 #828

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
4 changes: 3 additions & 1 deletion lib/parser/lexer.rl
Expand Up @@ -738,12 +738,14 @@ class Parser::Lexer

maybe_escaped_char = (
'\\' c_any %unescape_char
| '\\x' xdigit{1,2} % { @escape = encode_escape(tok(p - 2, p).to_i(16)) } %slash_c_char
| ( c_any - [\\] ) %read_post_meta_or_ctrl_char
);

maybe_escaped_ctrl_char = ( # why?!
'\\' c_any %unescape_char %slash_c_char
| '?' % { @escape = "\x7f" }
| '\\x' xdigit{1,2} % { @escape = encode_escape(tok(p - 2, p).to_i(16)) } %slash_c_char
| ( c_any - [\\?] ) %read_post_meta_or_ctrl_char %slash_c_char
);

Expand Down Expand Up @@ -935,7 +937,7 @@ class Parser::Lexer
# b"
# must be parsed as "ab"
current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
elsif current_literal.regexp?
elsif current_literal.regexp? && @version < 31
# Regular expressions should include escape sequences in their
# escaped form. On the other hand, escaped newlines are removed (in cases like "\\C-\\\n\\M-x")
current_literal.extend_string(tok.gsub("\\\n".freeze, ''.freeze), @ts, @te)
Expand Down
25 changes: 25 additions & 0 deletions test/test_lexer.rb
Expand Up @@ -3719,4 +3719,29 @@ def test_meta_escape_slash_u__after_30
refute_scanned_meta_escape_slash_u('"\M-\u0000"')
refute_scanned_meta_escape_slash_u('"\M-\U0000"')
end

def test_meta_control_hex_escaped_char
setup_lexer(19)

assert_scanned("\"\\c\\xFF\"",
:tSTRING, "\x9F", [0, 8])

assert_scanned("\"\\c\\M-\\xFF\"",
:tSTRING, "\x9F", [0, 11])

assert_scanned("\"\\C-\\xFF\"",
:tSTRING, "\x9F", [0, 9])

assert_scanned("\"\\C-\\M-\\xFF\"",
:tSTRING, "\x9F", [0, 12])

assert_scanned("\"\\M-\\xFF\"",
:tSTRING, "\x9F", [0, 9])

assert_scanned("\"\\M-\\C-\\xFF\"",
:tSTRING, "\x9F", [0, 12])

assert_scanned("\"\\M-\\c\\xFF\"",
:tSTRING, "\x9F", [0, 11])
end
end
50 changes: 48 additions & 2 deletions test/test_parser.rb
Expand Up @@ -5608,7 +5608,7 @@ def test_regexp_encoding
s(:str, "")),
%q{/\xa8/n =~ ""}.dup.force_encoding(Encoding::UTF_8),
%{},
SINCE_1_9)
SINCE_3_1 - SINCE_1_9)
end

#
Expand Down Expand Up @@ -6513,7 +6513,7 @@ def test_parser_bug_198
s(:str, "#")),
%q{[/()\\1/, ?#]},
%q{},
SINCE_1_9)
SINCE_3_1 - SINCE_1_9)
end

def test_parser_bug_272
Expand Down Expand Up @@ -10672,4 +10672,50 @@ def test_warn_on_duplicate_hash_key
%q{ ~~~~~ location},
SINCE_3_1)
end

def test_control_meta_escape_chars_in_regexp
x9f = "\x9F".dup.force_encoding('ascii-8bit')

assert_parses(
s(:regexp, s(:str, x9f), s(:regopt)),
%q{/\c\xFF/}.dup.force_encoding('ascii-8bit'),
%q{},
SINCE_3_1)

assert_parses(
s(:regexp, s(:str, x9f), s(:regopt)),
%q{/\c\M-\xFF/}.dup.force_encoding('ascii-8bit'),
%q{},
SINCE_3_1)

assert_parses(
s(:regexp, s(:str, x9f), s(:regopt)),
%q{/\C-\xFF/}.dup.force_encoding('ascii-8bit'),
%q{},
SINCE_3_1)

assert_parses(
s(:regexp, s(:str, x9f), s(:regopt)),
%q{/\C-\M-\xFF/}.dup.force_encoding('ascii-8bit'),
%q{},
SINCE_3_1)

assert_parses(
s(:regexp, s(:str, x9f), s(:regopt)),
%q{/\M-\xFF/}.dup.force_encoding('ascii-8bit'),
%q{},
SINCE_3_1)

assert_parses(
s(:regexp, s(:str, x9f), s(:regopt)),
%q{/\M-\C-\xFF/}.dup.force_encoding('ascii-8bit'),
%q{},
SINCE_3_1)

assert_parses(
s(:regexp, s(:str, x9f), s(:regopt)),
%q{/\M-\c\xFF/}.dup.force_encoding('ascii-8bit'),
%q{},
SINCE_3_1)
end
end