From 84b5650ab239309f382d5850a78c3fc48f08ade3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Janosch=20Mu=CC=88ller?= Date: Sat, 12 Sep 2020 11:57:19 +0200 Subject: [PATCH] Support \g, \k without group id as literal escapes [#65] --- lib/regexp_parser/scanner/scanner.rl | 2 +- spec/scanner/escapes_spec.rb | 4 ++++ 2 files changed, 5 insertions(+), 1 deletion(-) diff --git a/lib/regexp_parser/scanner/scanner.rl b/lib/regexp_parser/scanner/scanner.rl index d14146fe..82ca9073 100644 --- a/lib/regexp_parser/scanner/scanner.rl +++ b/lib/regexp_parser/scanner/scanner.rl @@ -128,7 +128,7 @@ utf8_4_byte = (0xf0..0xf4 0x80..0xbf 0x80..0xbf 0x80..0xbf); non_literal_escape = char_type_char | anchor_char | escaped_ascii | - group_ref | keep_mark | [xucCM]; + keep_mark | [xucCM]; non_set_escape = (anchor_char - 'b') | group_ref | keep_mark | multi_codepoint_char_type | [0-9cCM]; diff --git a/spec/scanner/escapes_spec.rb b/spec/scanner/escapes_spec.rb index 8579c2ca..9f84f4a0 100644 --- a/spec/scanner/escapes_spec.rb +++ b/spec/scanner/escapes_spec.rb @@ -13,6 +13,10 @@ include_examples 'scan', 'c\qt', 1 => [:escape, :literal, '\q', 1, 3] + # these incomplete ref/call sequences are treated as literal escapes by Ruby + include_examples 'scan', 'c\gt', 1 => [:escape, :literal, '\g', 1, 3] + include_examples 'scan', 'c\kt', 1 => [:escape, :literal, '\k', 1, 3] + include_examples 'scan', 'a\012c', 1 => [:escape, :octal, '\012', 1, 5] include_examples 'scan', 'a\0124', 1 => [:escape, :octal, '\012', 1, 5] include_examples 'scan', '\712+7', 0 => [:escape, :octal, '\712', 0, 4]