Skip to content

Commit

Permalink
Handle leading 0s in group refs, fixes #88
Browse files Browse the repository at this point in the history
  • Loading branch information
jaynetics committed Oct 11, 2023
1 parent 0677c00 commit 69a7f05
Show file tree
Hide file tree
Showing 7 changed files with 32 additions and 5 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Expand Up @@ -7,6 +7,12 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

### Fixed

- fixed scanner errors for insignificant leading zeros in numerical group refs
* e.g. `(a)\k<01>`, `(a)\g<-01>`, `(a)?(?(01)b|c)`
* thanks to [Markus Schirp](https://github.com/mbj) for the report

## [2.8.2] - 2023-10-10 - Janosch Müller

### Fixed
Expand Down
11 changes: 6 additions & 5 deletions lib/regexp_parser/scanner/scanner.rl
Expand Up @@ -364,6 +364,7 @@
conditional_expression := |*
group_lookup . ')' {
text = copy(data, ts, te-1)
text =~ /[^0]/ or raise ValidationError.for(:backref, 'condition', 'invalid ref ID')
emit(:conditional, :condition, text)
emit(:conditional, :condition_close, ')')
};
Expand Down Expand Up @@ -541,13 +542,13 @@
case text = copy(data, ts, te)
when /^\\k(.)[^0-9\-][^+\-]*['>]$/
emit(:backref, $1 == '<' ? :name_ref_ab : :name_ref_sq, text)
when /^\\k(.)[1-9]\d*['>]$/
when /^\\k(.)0*[1-9]\d*['>]$/
emit(:backref, $1 == '<' ? :number_ref_ab : :number_ref_sq, text)
when /^\\k(.)-[1-9]\d*['>]$/
when /^\\k(.)-0*[1-9]\d*['>]$/
emit(:backref, $1 == '<' ? :number_rel_ref_ab : :number_rel_ref_sq, text)
when /^\\k(.)[^0-9\-].*[+\-]\d+['>]$/
emit(:backref, $1 == '<' ? :name_recursion_ref_ab : :name_recursion_ref_sq, text)
when /^\\k(.)-?[1-9]\d*[+\-]\d+['>]$/
when /^\\k(.)-?0*[1-9]\d*[+\-]\d+['>]$/
emit(:backref, $1 == '<' ? :number_recursion_ref_ab : :number_recursion_ref_sq, text)
else
raise ValidationError.for(:backref, 'backreference', 'invalid ref ID')
Expand All @@ -560,9 +561,9 @@
case text = copy(data, ts, te)
when /^\\g(.)[^0-9+\-].*['>]$/
emit(:backref, $1 == '<' ? :name_call_ab : :name_call_sq, text)
when /^\\g(.)\d+['>]$/
when /^\\g(.)(?:0|0*[1-9]\d*)['>]$/
emit(:backref, $1 == '<' ? :number_call_ab : :number_call_sq, text)
when /^\\g(.)[+-]\d+/
when /^\\g(.)[+-]0*[1-9]\d*/
emit(:backref, $1 == '<' ? :number_rel_call_ab : :number_rel_call_sq, text)
else
raise ValidationError.for(:backref, 'subexpression call', 'invalid ref ID')
Expand Down
4 changes: 4 additions & 0 deletions spec/parser/conditionals_spec.rb
Expand Up @@ -66,4 +66,8 @@
include_examples 'parse', /(?<A>a)(?(<A>)T|)/,
[1] => [Conditional::Expression, count: 3, to_s: '(?(<A>)T|)'],
[1, 2] => [Conditional::Branch, to_s: '', ts: 16]

# test insignificant leading zeros in the condition's group number ref
include_examples 'parse', /(a)(?(001)T)/,
[1, 0] => [Conditional::Condition, to_s: '(001)', reference: 1]
end
4 changes: 4 additions & 0 deletions spec/parser/refcalls_spec.rb
Expand Up @@ -6,12 +6,16 @@
include_examples 'parse', /(?<X>abc)\k<X>/, 1 => [Backreference::Name, name: 'X', reference: 'X']
include_examples 'parse', /(?<X>abc)\k'X'/, 1 => [Backreference::Name, name: 'X', reference: 'X']
include_examples 'parse', /(abc)\k<1>/, 1 => [Backreference::Number, number: 1, reference: 1]
include_examples 'parse', /(abc)\k<001>/, 1 => [Backreference::Number, number: 1, reference: 1]
include_examples 'parse', /(abc)\k<-1>/, 1 => [Backreference::NumberRelative, number: -1, reference: 1]
include_examples 'parse', /(abc)\k'-1'/, 1 => [Backreference::NumberRelative, number: -1, reference: 1]
include_examples 'parse', /(abc)\k'-001'/, 1 => [Backreference::NumberRelative, number: -1, reference: 1]
include_examples 'parse', /(?<X>abc)\g<X>/, 1 => [Backreference::NameCall, reference: 'X']
include_examples 'parse', /(abc)\g<1>/, 1 => [Backreference::NumberCall, reference: 1]
include_examples 'parse', '(abc)\g<001>', 1 => [Backreference::NumberCall, reference: 1]
include_examples 'parse', '\g<0>', 0 => [Backreference::NumberCall, reference: 0]
include_examples 'parse', /(abc)\g<-1>/, 1 => [Backreference::NumberCallRelative, reference: 1]
include_examples 'parse', /(abc)\g<-001>/, 1 => [Backreference::NumberCallRelative, reference: 1]
include_examples 'parse', /\g<+1>(abc)/, 0 => [Backreference::NumberCallRelative, reference: 1]

include_examples 'parse', /(?<X>abc)\k<X-0>/,
Expand Down
1 change: 1 addition & 0 deletions spec/scanner/conditionals_spec.rb
Expand Up @@ -14,6 +14,7 @@
include_examples 'scan', /(a)(?(1)TRUE|)11/, 9 => [:conditional, :close, ')', 13, 14]
include_examples 'scan', /(?<N>A)(?(<N>)T|F)1/, 5 => [:conditional, :condition, '<N>', 10, 13]
include_examples 'scan', /(?'N'A)(?('N')T|F)2/, 5 => [:conditional, :condition, "'N'", 10, 13]
include_examples 'scan', /(a)(?(001)T)/, 5 => [:conditional, :condition, '001', 6, 9]

include_examples 'scan', /(a(b(c)))(?(1)(?(2)d|(?(3)e|f))|(?(2)(?(1)g|h)))/,
0 => [:group, :capture, '(', 0, 1],
Expand Down
7 changes: 7 additions & 0 deletions spec/scanner/errors_spec.rb
Expand Up @@ -66,8 +66,15 @@
include_examples 'scan error', RS::InvalidBackrefError, 'invalid backref', '\k\'\''
include_examples 'scan error', RS::InvalidBackrefError, 'invalid backref', '\k<0>'
include_examples 'scan error', RS::InvalidBackrefError, 'invalid backref', '\k\'0\''
include_examples 'scan error', RS::InvalidBackrefError, 'invalid backref', '\k<-0>'
include_examples 'scan error', RS::InvalidBackrefError, 'invalid backref', '\k<000>'
include_examples 'scan error', RS::InvalidBackrefError, 'invalid backref', '\k<-000>'
include_examples 'scan error', RS::InvalidBackrefError, 'invalid refcall', '\g<>'
include_examples 'scan error', RS::InvalidBackrefError, 'invalid refcall', '\g\'\''
include_examples 'scan error', RS::InvalidBackrefError, 'invalid refcall', '\g<000>'
include_examples 'scan error', RS::InvalidBackrefError, 'invalid refcall', '\g<-000>'
include_examples 'scan error', RS::InvalidBackrefError, 'invalid condition', '(a)(?(0)b)'
include_examples 'scan error', RS::InvalidBackrefError, 'invalid condition', '(a)(?(000)b)'
include_examples 'scan error', RS::UnknownUnicodePropertyError, 'unknown property', '\p{foobar}'
include_examples 'scan error', RS::UnknownPosixClassError, 'unknown POSIX class [::]', '[[::]]'
include_examples 'scan error', RS::UnknownPosixClassError, 'unknown POSIX class [:^:]', '[[:^:]]'
Expand Down
4 changes: 4 additions & 0 deletions spec/scanner/refcalls_spec.rb
Expand Up @@ -20,9 +20,11 @@

include_examples 'scan', '(abc)\k<1>', 3 => [:backref, :number_ref_ab, '\k<1>', 5, 10]
include_examples 'scan', "(abc)\\k'1'", 3 => [:backref, :number_ref_sq, "\\k'1'", 5, 10]
include_examples 'scan', "(abc)\\k'001'", 3 => [:backref, :number_ref_sq, "\\k'001'", 5, 12]

include_examples 'scan', '(abc)\k<-1>', 3 => [:backref, :number_rel_ref_ab, '\k<-1>', 5, 11]
include_examples 'scan', "(abc)\\k'-1'", 3 => [:backref, :number_rel_ref_sq, "\\k'-1'", 5, 11]
include_examples 'scan', '(abc)\k<-001>', 3 => [:backref, :number_rel_ref_ab, '\k<-001>', 5, 13]

# Sub-expression invocation, named, numbered, and relative
include_examples 'scan', '(?<X>abc)\g<X>', 3 => [:backref, :name_call_ab, '\g<X>', 9, 14]
Expand All @@ -33,12 +35,14 @@

include_examples 'scan', '(abc)\g<1>', 3 => [:backref, :number_call_ab, '\g<1>', 5, 10]
include_examples 'scan', "(abc)\\g'1'", 3 => [:backref, :number_call_sq, "\\g'1'", 5, 10]
include_examples 'scan', '(abc)\g<001>', 3 => [:backref, :number_call_ab, '\g<001>', 5, 12]

include_examples 'scan', 'a(b|\g<0>)', 4 => [:backref, :number_call_ab, '\g<0>', 4, 9]
include_examples 'scan', "a(b|\\g'0')", 4 => [:backref, :number_call_sq, "\\g'0'", 4, 9]

include_examples 'scan', '(abc)\g<-1>', 3 => [:backref, :number_rel_call_ab, '\g<-1>', 5, 11]
include_examples 'scan', "(abc)\\g'-1'", 3 => [:backref, :number_rel_call_sq, "\\g'-1'", 5, 11]
include_examples 'scan', '(abc)\g<-001>', 3 => [:backref, :number_rel_call_ab, '\g<-001>', 5, 13]

include_examples 'scan', '\g<+1>(abc)', 0 => [:backref, :number_rel_call_ab, '\g<+1>', 0, 6]
include_examples 'scan', "\\g'+1'(abc)", 0 => [:backref, :number_rel_call_sq, "\\g'+1'", 0, 6]
Expand Down

0 comments on commit 69a7f05

Please sign in to comment.