diff --git a/CHANGELOG.md b/CHANGELOG.md index 755b87f..b74afbf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0 ## [Unreleased] +### Fixed + +- fixed scanner errors for insignificant leading zeros in numerical group refs + * e.g. `(a)\k<01>`, `(a)\g<-01>`, `(a)?(?(01)b|c)` + * thanks to [Markus Schirp](https://github.com/mbj) for the report + ## [2.8.2] - 2023-10-10 - Janosch Müller ### Fixed diff --git a/lib/regexp_parser/scanner/scanner.rl b/lib/regexp_parser/scanner/scanner.rl index fd4163f..e5bce69 100644 --- a/lib/regexp_parser/scanner/scanner.rl +++ b/lib/regexp_parser/scanner/scanner.rl @@ -364,6 +364,7 @@ conditional_expression := |* group_lookup . ')' { text = copy(data, ts, te-1) + text =~ /[^0]/ or raise ValidationError.for(:backref, 'condition', 'invalid ref ID') emit(:conditional, :condition, text) emit(:conditional, :condition_close, ')') }; @@ -541,13 +542,13 @@ case text = copy(data, ts, te) when /^\\k(.)[^0-9\-][^+\-]*['>]$/ emit(:backref, $1 == '<' ? :name_ref_ab : :name_ref_sq, text) - when /^\\k(.)[1-9]\d*['>]$/ + when /^\\k(.)0*[1-9]\d*['>]$/ emit(:backref, $1 == '<' ? :number_ref_ab : :number_ref_sq, text) - when /^\\k(.)-[1-9]\d*['>]$/ + when /^\\k(.)-0*[1-9]\d*['>]$/ emit(:backref, $1 == '<' ? :number_rel_ref_ab : :number_rel_ref_sq, text) when /^\\k(.)[^0-9\-].*[+\-]\d+['>]$/ emit(:backref, $1 == '<' ? :name_recursion_ref_ab : :name_recursion_ref_sq, text) - when /^\\k(.)-?[1-9]\d*[+\-]\d+['>]$/ + when /^\\k(.)-?0*[1-9]\d*[+\-]\d+['>]$/ emit(:backref, $1 == '<' ? :number_recursion_ref_ab : :number_recursion_ref_sq, text) else raise ValidationError.for(:backref, 'backreference', 'invalid ref ID') @@ -560,9 +561,9 @@ case text = copy(data, ts, te) when /^\\g(.)[^0-9+\-].*['>]$/ emit(:backref, $1 == '<' ? :name_call_ab : :name_call_sq, text) - when /^\\g(.)\d+['>]$/ + when /^\\g(.)(?:0|0*[1-9]\d*)['>]$/ emit(:backref, $1 == '<' ? :number_call_ab : :number_call_sq, text) - when /^\\g(.)[+-]\d+/ + when /^\\g(.)[+-]0*[1-9]\d*/ emit(:backref, $1 == '<' ? :number_rel_call_ab : :number_rel_call_sq, text) else raise ValidationError.for(:backref, 'subexpression call', 'invalid ref ID') diff --git a/spec/parser/conditionals_spec.rb b/spec/parser/conditionals_spec.rb index 8be0142..0330f5c 100644 --- a/spec/parser/conditionals_spec.rb +++ b/spec/parser/conditionals_spec.rb @@ -66,4 +66,8 @@ include_examples 'parse', /(?a)(?()T|)/, [1] => [Conditional::Expression, count: 3, to_s: '(?()T|)'], [1, 2] => [Conditional::Branch, to_s: '', ts: 16] + + # test insignificant leading zeros in the condition's group number ref + include_examples 'parse', /(a)(?(001)T)/, + [1, 0] => [Conditional::Condition, to_s: '(001)', reference: 1] end diff --git a/spec/parser/refcalls_spec.rb b/spec/parser/refcalls_spec.rb index 12b3688..1df8468 100644 --- a/spec/parser/refcalls_spec.rb +++ b/spec/parser/refcalls_spec.rb @@ -6,12 +6,16 @@ include_examples 'parse', /(?abc)\k/, 1 => [Backreference::Name, name: 'X', reference: 'X'] include_examples 'parse', /(?abc)\k'X'/, 1 => [Backreference::Name, name: 'X', reference: 'X'] include_examples 'parse', /(abc)\k<1>/, 1 => [Backreference::Number, number: 1, reference: 1] + include_examples 'parse', /(abc)\k<001>/, 1 => [Backreference::Number, number: 1, reference: 1] include_examples 'parse', /(abc)\k<-1>/, 1 => [Backreference::NumberRelative, number: -1, reference: 1] include_examples 'parse', /(abc)\k'-1'/, 1 => [Backreference::NumberRelative, number: -1, reference: 1] + include_examples 'parse', /(abc)\k'-001'/, 1 => [Backreference::NumberRelative, number: -1, reference: 1] include_examples 'parse', /(?abc)\g/, 1 => [Backreference::NameCall, reference: 'X'] include_examples 'parse', /(abc)\g<1>/, 1 => [Backreference::NumberCall, reference: 1] + include_examples 'parse', '(abc)\g<001>', 1 => [Backreference::NumberCall, reference: 1] include_examples 'parse', '\g<0>', 0 => [Backreference::NumberCall, reference: 0] include_examples 'parse', /(abc)\g<-1>/, 1 => [Backreference::NumberCallRelative, reference: 1] + include_examples 'parse', /(abc)\g<-001>/, 1 => [Backreference::NumberCallRelative, reference: 1] include_examples 'parse', /\g<+1>(abc)/, 0 => [Backreference::NumberCallRelative, reference: 1] include_examples 'parse', /(?abc)\k/, diff --git a/spec/scanner/conditionals_spec.rb b/spec/scanner/conditionals_spec.rb index 6b51252..b129d66 100644 --- a/spec/scanner/conditionals_spec.rb +++ b/spec/scanner/conditionals_spec.rb @@ -14,6 +14,7 @@ include_examples 'scan', /(a)(?(1)TRUE|)11/, 9 => [:conditional, :close, ')', 13, 14] include_examples 'scan', /(?A)(?()T|F)1/, 5 => [:conditional, :condition, '', 10, 13] include_examples 'scan', /(?'N'A)(?('N')T|F)2/, 5 => [:conditional, :condition, "'N'", 10, 13] + include_examples 'scan', /(a)(?(001)T)/, 5 => [:conditional, :condition, '001', 6, 9] include_examples 'scan', /(a(b(c)))(?(1)(?(2)d|(?(3)e|f))|(?(2)(?(1)g|h)))/, 0 => [:group, :capture, '(', 0, 1], diff --git a/spec/scanner/errors_spec.rb b/spec/scanner/errors_spec.rb index 5b6862f..5eb7359 100644 --- a/spec/scanner/errors_spec.rb +++ b/spec/scanner/errors_spec.rb @@ -66,8 +66,15 @@ include_examples 'scan error', RS::InvalidBackrefError, 'invalid backref', '\k\'\'' include_examples 'scan error', RS::InvalidBackrefError, 'invalid backref', '\k<0>' include_examples 'scan error', RS::InvalidBackrefError, 'invalid backref', '\k\'0\'' + include_examples 'scan error', RS::InvalidBackrefError, 'invalid backref', '\k<-0>' + include_examples 'scan error', RS::InvalidBackrefError, 'invalid backref', '\k<000>' + include_examples 'scan error', RS::InvalidBackrefError, 'invalid backref', '\k<-000>' include_examples 'scan error', RS::InvalidBackrefError, 'invalid refcall', '\g<>' include_examples 'scan error', RS::InvalidBackrefError, 'invalid refcall', '\g\'\'' + include_examples 'scan error', RS::InvalidBackrefError, 'invalid refcall', '\g<000>' + include_examples 'scan error', RS::InvalidBackrefError, 'invalid refcall', '\g<-000>' + include_examples 'scan error', RS::InvalidBackrefError, 'invalid condition', '(a)(?(0)b)' + include_examples 'scan error', RS::InvalidBackrefError, 'invalid condition', '(a)(?(000)b)' include_examples 'scan error', RS::UnknownUnicodePropertyError, 'unknown property', '\p{foobar}' include_examples 'scan error', RS::UnknownPosixClassError, 'unknown POSIX class [::]', '[[::]]' include_examples 'scan error', RS::UnknownPosixClassError, 'unknown POSIX class [:^:]', '[[:^:]]' diff --git a/spec/scanner/refcalls_spec.rb b/spec/scanner/refcalls_spec.rb index 578b844..1206df0 100644 --- a/spec/scanner/refcalls_spec.rb +++ b/spec/scanner/refcalls_spec.rb @@ -20,9 +20,11 @@ include_examples 'scan', '(abc)\k<1>', 3 => [:backref, :number_ref_ab, '\k<1>', 5, 10] include_examples 'scan', "(abc)\\k'1'", 3 => [:backref, :number_ref_sq, "\\k'1'", 5, 10] + include_examples 'scan', "(abc)\\k'001'", 3 => [:backref, :number_ref_sq, "\\k'001'", 5, 12] include_examples 'scan', '(abc)\k<-1>', 3 => [:backref, :number_rel_ref_ab, '\k<-1>', 5, 11] include_examples 'scan', "(abc)\\k'-1'", 3 => [:backref, :number_rel_ref_sq, "\\k'-1'", 5, 11] + include_examples 'scan', '(abc)\k<-001>', 3 => [:backref, :number_rel_ref_ab, '\k<-001>', 5, 13] # Sub-expression invocation, named, numbered, and relative include_examples 'scan', '(?abc)\g', 3 => [:backref, :name_call_ab, '\g', 9, 14] @@ -33,12 +35,14 @@ include_examples 'scan', '(abc)\g<1>', 3 => [:backref, :number_call_ab, '\g<1>', 5, 10] include_examples 'scan', "(abc)\\g'1'", 3 => [:backref, :number_call_sq, "\\g'1'", 5, 10] + include_examples 'scan', '(abc)\g<001>', 3 => [:backref, :number_call_ab, '\g<001>', 5, 12] include_examples 'scan', 'a(b|\g<0>)', 4 => [:backref, :number_call_ab, '\g<0>', 4, 9] include_examples 'scan', "a(b|\\g'0')", 4 => [:backref, :number_call_sq, "\\g'0'", 4, 9] include_examples 'scan', '(abc)\g<-1>', 3 => [:backref, :number_rel_call_ab, '\g<-1>', 5, 11] include_examples 'scan', "(abc)\\g'-1'", 3 => [:backref, :number_rel_call_sq, "\\g'-1'", 5, 11] + include_examples 'scan', '(abc)\g<-001>', 3 => [:backref, :number_rel_call_ab, '\g<-001>', 5, 13] include_examples 'scan', '\g<+1>(abc)', 0 => [:backref, :number_rel_call_ab, '\g<+1>', 0, 6] include_examples 'scan', "\\g'+1'(abc)", 0 => [:backref, :number_rel_call_sq, "\\g'+1'", 0, 6]