From 7c4d2685a0190a4e944f3b53816692f17ff6361f Mon Sep 17 00:00:00 2001 From: Owen Stephens Date: Sat, 12 Sep 2020 21:43:19 +0100 Subject: [PATCH 1/2] Allow options to be passed when processing a String pattern --- README.md | 11 ++++++++ lib/regexp_parser/lexer.rb | 8 +++--- lib/regexp_parser/parser.rb | 26 +++++++++--------- lib/regexp_parser/scanner/scanner.rl | 23 +++++++++------- spec/parser/options_spec.rb | 27 +++++++++++++++++++ spec/scanner/options_spec.rb | 40 ++++++++++++++++++++++++++++ 6 files changed, 109 insertions(+), 26 deletions(-) create mode 100644 spec/parser/options_spec.rb create mode 100644 spec/scanner/options_spec.rb diff --git a/README.md b/README.md index 54852575..bc7ad849 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,17 @@ called with the results as follows: * **Parser**: after completion, the block gets passed the root expression. _The result of the block is returned._ +All three methods accept either a `Regexp` or `String` (containing the pattern) +- if a String is passed, `options` can be supplied: + +```ruby +require 'regexp_parser' + +Regexp::Parser.parse( + "a+ # Recognises a and A...", + options: ::Regexp::EXTENDED | ::Regexp::IGNORECASE +) +``` --- ## Components diff --git a/lib/regexp_parser/lexer.rb b/lib/regexp_parser/lexer.rb index 29468042..68772501 100644 --- a/lib/regexp_parser/lexer.rb +++ b/lib/regexp_parser/lexer.rb @@ -11,11 +11,11 @@ class Regexp::Lexer CLOSING_TOKENS = [:close].freeze - def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", &block) - new.lex(input, syntax, &block) + def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block) + new.lex(input, syntax, options: options, &block) end - def lex(input, syntax = "ruby/#{RUBY_VERSION}", &block) + def lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block) syntax = Regexp::Syntax.new(syntax) self.tokens = [] @@ -25,7 +25,7 @@ def lex(input, syntax = "ruby/#{RUBY_VERSION}", &block) self.shift = 0 last = nil - Regexp::Scanner.scan(input) do |type, token, text, ts, te| + Regexp::Scanner.scan(input, options: options) do |type, token, text, ts, te| type, token = *syntax.normalize(type, token) syntax.check! type, token diff --git a/lib/regexp_parser/parser.rb b/lib/regexp_parser/parser.rb index 2ac42f12..9176a789 100644 --- a/lib/regexp_parser/parser.rb +++ b/lib/regexp_parser/parser.rb @@ -18,12 +18,12 @@ def initialize(type, token) end end - def self.parse(input, syntax = "ruby/#{RUBY_VERSION}", &block) - new.parse(input, syntax, &block) + def self.parse(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block) + new.parse(input, syntax, options: options, &block) end - def parse(input, syntax = "ruby/#{RUBY_VERSION}", &block) - root = Root.build(options_from_input(input)) + def parse(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block) + root = Root.build(extract_options(input, options)) self.root = root self.node = root @@ -35,7 +35,7 @@ def parse(input, syntax = "ruby/#{RUBY_VERSION}", &block) self.captured_group_counts = Hash.new(0) - Regexp::Lexer.scan(input, syntax) do |token| + Regexp::Lexer.scan(input, syntax, options: options) do |token| parse_token(token) end @@ -54,14 +54,16 @@ def parse(input, syntax = "ruby/#{RUBY_VERSION}", &block) :options_stack, :switching_options, :conditional_nesting, :captured_group_counts - def options_from_input(input) - return {} unless input.is_a?(::Regexp) + def extract_options(input, options) + options = input.options if input.is_a?(::Regexp) - options = {} - options[:i] = true if input.options & ::Regexp::IGNORECASE != 0 - options[:m] = true if input.options & ::Regexp::MULTILINE != 0 - options[:x] = true if input.options & ::Regexp::EXTENDED != 0 - options + return {} unless options + + enabled_options = {} + enabled_options[:i] = true if options & ::Regexp::IGNORECASE != 0 + enabled_options[:m] = true if options & ::Regexp::MULTILINE != 0 + enabled_options[:x] = true if options & ::Regexp::EXTENDED != 0 + enabled_options end def nest(exp) diff --git a/lib/regexp_parser/scanner/scanner.rl b/lib/regexp_parser/scanner/scanner.rl index 82ca9073..607047c3 100644 --- a/lib/regexp_parser/scanner/scanner.rl +++ b/lib/regexp_parser/scanner/scanner.rl @@ -737,21 +737,16 @@ class Regexp::Scanner # # This method may raise errors if a syntax error is encountered. # -------------------------------------------------------------------------- - def self.scan(input_object, &block) - new.scan(input_object, &block) + def self.scan(input_object, options: nil, &block) + new.scan(input_object, options: options, &block) end - def scan(input_object, &block) + def scan(input_object, options: nil, &block) self.literal = nil stack = [] - if input_object.is_a?(Regexp) - input = input_object.source - self.free_spacing = (input_object.options & Regexp::EXTENDED != 0) - else - input = input_object - self.free_spacing = false - end + input = input_object.is_a?(Regexp) ? input_object.source : input_object + self.free_spacing = free_spacing?(input_object, options) self.spacing_stack = [{:free_spacing => free_spacing, :depth => 0}] data = input.unpack("c*") if input.is_a?(String) @@ -817,6 +812,14 @@ class Regexp::Scanner attr_accessor :tokens, :literal, :block, :free_spacing, :spacing_stack, :group_depth, :set_depth, :conditional_stack + def free_spacing?(input_object, options) + options = input_object.options if input_object.is_a?(::Regexp) + + return false unless options + + options & Regexp::EXTENDED != 0 + end + def in_group? group_depth > 0 end diff --git a/spec/parser/options_spec.rb b/spec/parser/options_spec.rb new file mode 100644 index 00000000..3c2c2ccb --- /dev/null +++ b/spec/parser/options_spec.rb @@ -0,0 +1,27 @@ +require 'spec_helper' + +RSpec.describe('passing options to parse') do + it 'ignores options if parsing from a Regexp' do + root = RP.parse(/a+/ix, options: ::Regexp::MULTILINE) + + expect(root.options).to eq(i: true, x: true) + end + + it 'sets options if parsing from a String' do + root = RP.parse('a+', options: ::Regexp::MULTILINE | ::Regexp::EXTENDED) + + expect(root.options).to eq(m: true, x: true) + end + + it 'allows options to not be supplied when parsing from a Regexp' do + root = RP.parse(/a+/ix) + + expect(root.options).to eq(i: true, x: true) + end + + it 'has an empty option-hash when parsing from a String and passing no options' do + root = RP.parse('a+') + + expect(root.options).to be_empty + end +end diff --git a/spec/scanner/options_spec.rb b/spec/scanner/options_spec.rb new file mode 100644 index 00000000..9b4737ae --- /dev/null +++ b/spec/scanner/options_spec.rb @@ -0,0 +1,40 @@ +require 'spec_helper' + +RSpec.describe('passing options to scan') do + def expect_type_tokens(tokens, type_tokens) + expect(tokens.map { |type, token, *| [type, token] }).to eq(type_tokens) + end + + it 'ignores options if parsing from a Regexp' do + expect_type_tokens( + RS.scan(/a+#c/im, options: ::Regexp::EXTENDED), + [ + %i[literal literal], + %i[quantifier one_or_more], + %i[literal literal] + ] + ) + end + + it 'sets free_spacing based on options if parsing from a String' do + expect_type_tokens( + RS.scan('a+#c', options: ::Regexp::MULTILINE | ::Regexp::EXTENDED), + [ + %i[literal literal], + %i[quantifier one_or_more], + %i[free_space comment] + ] + ) + end + + it 'does not set free_spacing if parsing from a String and passing no options' do + expect_type_tokens( + RS.scan('a+#c'), + [ + %i[literal literal], + %i[quantifier one_or_more], + %i[literal literal] + ] + ) + end +end From eea67dd9f4c8572e0e2adc1bfbe2aef60fce2a7a Mon Sep 17 00:00:00 2001 From: Owen Stephens Date: Sun, 13 Sep 2020 10:14:32 +0100 Subject: [PATCH 2/2] fixup! Allow options to be passed when processing a String pattern --- lib/regexp_parser/parser.rb | 4 ++++ lib/regexp_parser/scanner/scanner.rl | 4 ++++ spec/parser/options_spec.rb | 9 +++++---- spec/scanner/options_spec.rb | 16 ++++++---------- 4 files changed, 19 insertions(+), 14 deletions(-) diff --git a/lib/regexp_parser/parser.rb b/lib/regexp_parser/parser.rb index 9176a789..aa14a99f 100644 --- a/lib/regexp_parser/parser.rb +++ b/lib/regexp_parser/parser.rb @@ -55,6 +55,10 @@ def parse(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block) :captured_group_counts def extract_options(input, options) + if options && !input.is_a?(String) + raise ArgumentError, 'options cannot be supplied unless parsing a String' + end + options = input.options if input.is_a?(::Regexp) return {} unless options diff --git a/lib/regexp_parser/scanner/scanner.rl b/lib/regexp_parser/scanner/scanner.rl index 607047c3..aef48561 100644 --- a/lib/regexp_parser/scanner/scanner.rl +++ b/lib/regexp_parser/scanner/scanner.rl @@ -813,6 +813,10 @@ class Regexp::Scanner :group_depth, :set_depth, :conditional_stack def free_spacing?(input_object, options) + if options && !input_object.is_a?(String) + raise ArgumentError, 'options cannot be supplied unless scanning a String' + end + options = input_object.options if input_object.is_a?(::Regexp) return false unless options diff --git a/spec/parser/options_spec.rb b/spec/parser/options_spec.rb index 3c2c2ccb..fb9e669f 100644 --- a/spec/parser/options_spec.rb +++ b/spec/parser/options_spec.rb @@ -1,10 +1,11 @@ require 'spec_helper' RSpec.describe('passing options to parse') do - it 'ignores options if parsing from a Regexp' do - root = RP.parse(/a+/ix, options: ::Regexp::MULTILINE) - - expect(root.options).to eq(i: true, x: true) + it 'raises if if parsing from a Regexp and options are passed' do + expect { RP.parse(/a+/, options: ::Regexp::EXTENDED) }.to raise_error( + ArgumentError, + 'options cannot be supplied unless parsing a String' + ) end it 'sets options if parsing from a String' do diff --git a/spec/scanner/options_spec.rb b/spec/scanner/options_spec.rb index 9b4737ae..d7fd98df 100644 --- a/spec/scanner/options_spec.rb +++ b/spec/scanner/options_spec.rb @@ -5,18 +5,14 @@ def expect_type_tokens(tokens, type_tokens) expect(tokens.map { |type, token, *| [type, token] }).to eq(type_tokens) end - it 'ignores options if parsing from a Regexp' do - expect_type_tokens( - RS.scan(/a+#c/im, options: ::Regexp::EXTENDED), - [ - %i[literal literal], - %i[quantifier one_or_more], - %i[literal literal] - ] + it 'raises if if scanning from a Regexp and options are passed' do + expect { RS.scan(/a+/, options: ::Regexp::EXTENDED) }.to raise_error( + ArgumentError, + 'options cannot be supplied unless scanning a String' ) end - it 'sets free_spacing based on options if parsing from a String' do + it 'sets free_spacing based on options if scanning from a String' do expect_type_tokens( RS.scan('a+#c', options: ::Regexp::MULTILINE | ::Regexp::EXTENDED), [ @@ -27,7 +23,7 @@ def expect_type_tokens(tokens, type_tokens) ) end - it 'does not set free_spacing if parsing from a String and passing no options' do + it 'does not set free_spacing if scanning from a String and passing no options' do expect_type_tokens( RS.scan('a+#c'), [