diff --git a/README.md b/README.md index 5485257..bc7ad84 100644 --- a/README.md +++ b/README.md @@ -72,6 +72,17 @@ called with the results as follows: * **Parser**: after completion, the block gets passed the root expression. _The result of the block is returned._ +All three methods accept either a `Regexp` or `String` (containing the pattern) +- if a String is passed, `options` can be supplied: + +```ruby +require 'regexp_parser' + +Regexp::Parser.parse( + "a+ # Recognises a and A...", + options: ::Regexp::EXTENDED | ::Regexp::IGNORECASE +) +``` --- ## Components diff --git a/lib/regexp_parser/lexer.rb b/lib/regexp_parser/lexer.rb index 2946804..6877250 100644 --- a/lib/regexp_parser/lexer.rb +++ b/lib/regexp_parser/lexer.rb @@ -11,11 +11,11 @@ class Regexp::Lexer CLOSING_TOKENS = [:close].freeze - def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", &block) - new.lex(input, syntax, &block) + def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block) + new.lex(input, syntax, options: options, &block) end - def lex(input, syntax = "ruby/#{RUBY_VERSION}", &block) + def lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block) syntax = Regexp::Syntax.new(syntax) self.tokens = [] @@ -25,7 +25,7 @@ def lex(input, syntax = "ruby/#{RUBY_VERSION}", &block) self.shift = 0 last = nil - Regexp::Scanner.scan(input) do |type, token, text, ts, te| + Regexp::Scanner.scan(input, options: options) do |type, token, text, ts, te| type, token = *syntax.normalize(type, token) syntax.check! type, token diff --git a/lib/regexp_parser/parser.rb b/lib/regexp_parser/parser.rb index 2ac42f1..aa14a99 100644 --- a/lib/regexp_parser/parser.rb +++ b/lib/regexp_parser/parser.rb @@ -18,12 +18,12 @@ def initialize(type, token) end end - def self.parse(input, syntax = "ruby/#{RUBY_VERSION}", &block) - new.parse(input, syntax, &block) + def self.parse(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block) + new.parse(input, syntax, options: options, &block) end - def parse(input, syntax = "ruby/#{RUBY_VERSION}", &block) - root = Root.build(options_from_input(input)) + def parse(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block) + root = Root.build(extract_options(input, options)) self.root = root self.node = root @@ -35,7 +35,7 @@ def parse(input, syntax = "ruby/#{RUBY_VERSION}", &block) self.captured_group_counts = Hash.new(0) - Regexp::Lexer.scan(input, syntax) do |token| + Regexp::Lexer.scan(input, syntax, options: options) do |token| parse_token(token) end @@ -54,14 +54,20 @@ def parse(input, syntax = "ruby/#{RUBY_VERSION}", &block) :options_stack, :switching_options, :conditional_nesting, :captured_group_counts - def options_from_input(input) - return {} unless input.is_a?(::Regexp) + def extract_options(input, options) + if options && !input.is_a?(String) + raise ArgumentError, 'options cannot be supplied unless parsing a String' + end + + options = input.options if input.is_a?(::Regexp) + + return {} unless options - options = {} - options[:i] = true if input.options & ::Regexp::IGNORECASE != 0 - options[:m] = true if input.options & ::Regexp::MULTILINE != 0 - options[:x] = true if input.options & ::Regexp::EXTENDED != 0 - options + enabled_options = {} + enabled_options[:i] = true if options & ::Regexp::IGNORECASE != 0 + enabled_options[:m] = true if options & ::Regexp::MULTILINE != 0 + enabled_options[:x] = true if options & ::Regexp::EXTENDED != 0 + enabled_options end def nest(exp) diff --git a/lib/regexp_parser/scanner/scanner.rl b/lib/regexp_parser/scanner/scanner.rl index 82ca907..aef4856 100644 --- a/lib/regexp_parser/scanner/scanner.rl +++ b/lib/regexp_parser/scanner/scanner.rl @@ -737,21 +737,16 @@ class Regexp::Scanner # # This method may raise errors if a syntax error is encountered. # -------------------------------------------------------------------------- - def self.scan(input_object, &block) - new.scan(input_object, &block) + def self.scan(input_object, options: nil, &block) + new.scan(input_object, options: options, &block) end - def scan(input_object, &block) + def scan(input_object, options: nil, &block) self.literal = nil stack = [] - if input_object.is_a?(Regexp) - input = input_object.source - self.free_spacing = (input_object.options & Regexp::EXTENDED != 0) - else - input = input_object - self.free_spacing = false - end + input = input_object.is_a?(Regexp) ? input_object.source : input_object + self.free_spacing = free_spacing?(input_object, options) self.spacing_stack = [{:free_spacing => free_spacing, :depth => 0}] data = input.unpack("c*") if input.is_a?(String) @@ -817,6 +812,18 @@ class Regexp::Scanner attr_accessor :tokens, :literal, :block, :free_spacing, :spacing_stack, :group_depth, :set_depth, :conditional_stack + def free_spacing?(input_object, options) + if options && !input_object.is_a?(String) + raise ArgumentError, 'options cannot be supplied unless scanning a String' + end + + options = input_object.options if input_object.is_a?(::Regexp) + + return false unless options + + options & Regexp::EXTENDED != 0 + end + def in_group? group_depth > 0 end diff --git a/spec/parser/options_spec.rb b/spec/parser/options_spec.rb new file mode 100644 index 0000000..fb9e669 --- /dev/null +++ b/spec/parser/options_spec.rb @@ -0,0 +1,28 @@ +require 'spec_helper' + +RSpec.describe('passing options to parse') do + it 'raises if if parsing from a Regexp and options are passed' do + expect { RP.parse(/a+/, options: ::Regexp::EXTENDED) }.to raise_error( + ArgumentError, + 'options cannot be supplied unless parsing a String' + ) + end + + it 'sets options if parsing from a String' do + root = RP.parse('a+', options: ::Regexp::MULTILINE | ::Regexp::EXTENDED) + + expect(root.options).to eq(m: true, x: true) + end + + it 'allows options to not be supplied when parsing from a Regexp' do + root = RP.parse(/a+/ix) + + expect(root.options).to eq(i: true, x: true) + end + + it 'has an empty option-hash when parsing from a String and passing no options' do + root = RP.parse('a+') + + expect(root.options).to be_empty + end +end diff --git a/spec/scanner/options_spec.rb b/spec/scanner/options_spec.rb new file mode 100644 index 0000000..d7fd98d --- /dev/null +++ b/spec/scanner/options_spec.rb @@ -0,0 +1,36 @@ +require 'spec_helper' + +RSpec.describe('passing options to scan') do + def expect_type_tokens(tokens, type_tokens) + expect(tokens.map { |type, token, *| [type, token] }).to eq(type_tokens) + end + + it 'raises if if scanning from a Regexp and options are passed' do + expect { RS.scan(/a+/, options: ::Regexp::EXTENDED) }.to raise_error( + ArgumentError, + 'options cannot be supplied unless scanning a String' + ) + end + + it 'sets free_spacing based on options if scanning from a String' do + expect_type_tokens( + RS.scan('a+#c', options: ::Regexp::MULTILINE | ::Regexp::EXTENDED), + [ + %i[literal literal], + %i[quantifier one_or_more], + %i[free_space comment] + ] + ) + end + + it 'does not set free_spacing if scanning from a String and passing no options' do + expect_type_tokens( + RS.scan('a+#c'), + [ + %i[literal literal], + %i[quantifier one_or_more], + %i[literal literal] + ] + ) + end +end