Skip to content

Commit

Permalink
Merge pull request #68 from owst/allow_options_when_parsing_strings
Browse files Browse the repository at this point in the history
Allow options to be passed when processing a String pattern
  • Loading branch information
jaynetics committed Sep 20, 2020
2 parents 84b5650 + eea67dd commit d109b56
Show file tree
Hide file tree
Showing 6 changed files with 114 additions and 26 deletions.
11 changes: 11 additions & 0 deletions README.md
Expand Up @@ -72,6 +72,17 @@ called with the results as follows:
* **Parser**: after completion, the block gets passed the root expression.
_The result of the block is returned._

All three methods accept either a `Regexp` or `String` (containing the pattern)
- if a String is passed, `options` can be supplied:

```ruby
require 'regexp_parser'

Regexp::Parser.parse(
"a+ # Recognises a and A...",
options: ::Regexp::EXTENDED | ::Regexp::IGNORECASE
)
```

---
## Components
Expand Down
8 changes: 4 additions & 4 deletions lib/regexp_parser/lexer.rb
Expand Up @@ -11,11 +11,11 @@ class Regexp::Lexer

CLOSING_TOKENS = [:close].freeze

def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", &block)
new.lex(input, syntax, &block)
def self.lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
new.lex(input, syntax, options: options, &block)
end

def lex(input, syntax = "ruby/#{RUBY_VERSION}", &block)
def lex(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
syntax = Regexp::Syntax.new(syntax)

self.tokens = []
Expand All @@ -25,7 +25,7 @@ def lex(input, syntax = "ruby/#{RUBY_VERSION}", &block)
self.shift = 0

last = nil
Regexp::Scanner.scan(input) do |type, token, text, ts, te|
Regexp::Scanner.scan(input, options: options) do |type, token, text, ts, te|
type, token = *syntax.normalize(type, token)
syntax.check! type, token

Expand Down
30 changes: 18 additions & 12 deletions lib/regexp_parser/parser.rb
Expand Up @@ -18,12 +18,12 @@ def initialize(type, token)
end
end

def self.parse(input, syntax = "ruby/#{RUBY_VERSION}", &block)
new.parse(input, syntax, &block)
def self.parse(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
new.parse(input, syntax, options: options, &block)
end

def parse(input, syntax = "ruby/#{RUBY_VERSION}", &block)
root = Root.build(options_from_input(input))
def parse(input, syntax = "ruby/#{RUBY_VERSION}", options: nil, &block)
root = Root.build(extract_options(input, options))

self.root = root
self.node = root
Expand All @@ -35,7 +35,7 @@ def parse(input, syntax = "ruby/#{RUBY_VERSION}", &block)

self.captured_group_counts = Hash.new(0)

Regexp::Lexer.scan(input, syntax) do |token|
Regexp::Lexer.scan(input, syntax, options: options) do |token|
parse_token(token)
end

Expand All @@ -54,14 +54,20 @@ def parse(input, syntax = "ruby/#{RUBY_VERSION}", &block)
:options_stack, :switching_options, :conditional_nesting,
:captured_group_counts

def options_from_input(input)
return {} unless input.is_a?(::Regexp)
def extract_options(input, options)
if options && !input.is_a?(String)
raise ArgumentError, 'options cannot be supplied unless parsing a String'
end

options = input.options if input.is_a?(::Regexp)

return {} unless options

options = {}
options[:i] = true if input.options & ::Regexp::IGNORECASE != 0
options[:m] = true if input.options & ::Regexp::MULTILINE != 0
options[:x] = true if input.options & ::Regexp::EXTENDED != 0
options
enabled_options = {}
enabled_options[:i] = true if options & ::Regexp::IGNORECASE != 0
enabled_options[:m] = true if options & ::Regexp::MULTILINE != 0
enabled_options[:x] = true if options & ::Regexp::EXTENDED != 0
enabled_options
end

def nest(exp)
Expand Down
27 changes: 17 additions & 10 deletions lib/regexp_parser/scanner/scanner.rl
Expand Up @@ -737,21 +737,16 @@ class Regexp::Scanner
#
# This method may raise errors if a syntax error is encountered.
# --------------------------------------------------------------------------
def self.scan(input_object, &block)
new.scan(input_object, &block)
def self.scan(input_object, options: nil, &block)
new.scan(input_object, options: options, &block)
end

def scan(input_object, &block)
def scan(input_object, options: nil, &block)
self.literal = nil
stack = []

if input_object.is_a?(Regexp)
input = input_object.source
self.free_spacing = (input_object.options & Regexp::EXTENDED != 0)
else
input = input_object
self.free_spacing = false
end
input = input_object.is_a?(Regexp) ? input_object.source : input_object
self.free_spacing = free_spacing?(input_object, options)
self.spacing_stack = [{:free_spacing => free_spacing, :depth => 0}]

data = input.unpack("c*") if input.is_a?(String)
Expand Down Expand Up @@ -817,6 +812,18 @@ class Regexp::Scanner
attr_accessor :tokens, :literal, :block, :free_spacing, :spacing_stack,
:group_depth, :set_depth, :conditional_stack

def free_spacing?(input_object, options)
if options && !input_object.is_a?(String)
raise ArgumentError, 'options cannot be supplied unless scanning a String'
end

options = input_object.options if input_object.is_a?(::Regexp)

return false unless options

options & Regexp::EXTENDED != 0
end

def in_group?
group_depth > 0
end
Expand Down
28 changes: 28 additions & 0 deletions spec/parser/options_spec.rb
@@ -0,0 +1,28 @@
require 'spec_helper'

RSpec.describe('passing options to parse') do
it 'raises if if parsing from a Regexp and options are passed' do
expect { RP.parse(/a+/, options: ::Regexp::EXTENDED) }.to raise_error(
ArgumentError,
'options cannot be supplied unless parsing a String'
)
end

it 'sets options if parsing from a String' do
root = RP.parse('a+', options: ::Regexp::MULTILINE | ::Regexp::EXTENDED)

expect(root.options).to eq(m: true, x: true)
end

it 'allows options to not be supplied when parsing from a Regexp' do
root = RP.parse(/a+/ix)

expect(root.options).to eq(i: true, x: true)
end

it 'has an empty option-hash when parsing from a String and passing no options' do
root = RP.parse('a+')

expect(root.options).to be_empty
end
end
36 changes: 36 additions & 0 deletions spec/scanner/options_spec.rb
@@ -0,0 +1,36 @@
require 'spec_helper'

RSpec.describe('passing options to scan') do
def expect_type_tokens(tokens, type_tokens)
expect(tokens.map { |type, token, *| [type, token] }).to eq(type_tokens)
end

it 'raises if if scanning from a Regexp and options are passed' do
expect { RS.scan(/a+/, options: ::Regexp::EXTENDED) }.to raise_error(
ArgumentError,
'options cannot be supplied unless scanning a String'
)
end

it 'sets free_spacing based on options if scanning from a String' do
expect_type_tokens(
RS.scan('a+#c', options: ::Regexp::MULTILINE | ::Regexp::EXTENDED),
[
%i[literal literal],
%i[quantifier one_or_more],
%i[free_space comment]
]
)
end

it 'does not set free_spacing if scanning from a String and passing no options' do
expect_type_tokens(
RS.scan('a+#c'),
[
%i[literal literal],
%i[quantifier one_or_more],
%i[literal literal]
]
)
end
end

0 comments on commit d109b56

Please sign in to comment.