diff --git a/lib/regexp_parser/scanner/scanner.rl b/lib/regexp_parser/scanner/scanner.rl index d14146fe..82ca9073 100644 --- a/lib/regexp_parser/scanner/scanner.rl +++ b/lib/regexp_parser/scanner/scanner.rl @@ -128,7 +128,7 @@ utf8_4_byte = (0xf0..0xf4 0x80..0xbf 0x80..0xbf 0x80..0xbf); non_literal_escape = char_type_char | anchor_char | escaped_ascii | - group_ref | keep_mark | [xucCM]; + keep_mark | [xucCM]; non_set_escape = (anchor_char - 'b') | group_ref | keep_mark | multi_codepoint_char_type | [0-9cCM]; diff --git a/spec/scanner/escapes_spec.rb b/spec/scanner/escapes_spec.rb index 8579c2ca..9f84f4a0 100644 --- a/spec/scanner/escapes_spec.rb +++ b/spec/scanner/escapes_spec.rb @@ -13,6 +13,10 @@ include_examples 'scan', 'c\qt', 1 => [:escape, :literal, '\q', 1, 3] + # these incomplete ref/call sequences are treated as literal escapes by Ruby + include_examples 'scan', 'c\gt', 1 => [:escape, :literal, '\g', 1, 3] + include_examples 'scan', 'c\kt', 1 => [:escape, :literal, '\k', 1, 3] + include_examples 'scan', 'a\012c', 1 => [:escape, :octal, '\012', 1, 5] include_examples 'scan', 'a\0124', 1 => [:escape, :octal, '\012', 1, 5] include_examples 'scan', '\712+7', 0 => [:escape, :octal, '\712', 0, 4]