Skip to content

Commit

Permalink
[GR-19220] Avoid excessive splitting of nodes caused by a unique proc…
Browse files Browse the repository at this point in the history
… being created per call when a method reference would suffice (#3529)

PullRequest: truffleruby/4237
  • Loading branch information
andrykonchin committed Apr 12, 2024
2 parents b4fe248 + 2d0171e commit dc120e7
Show file tree
Hide file tree
Showing 2 changed files with 29 additions and 27 deletions.
55 changes: 28 additions & 27 deletions src/main/ruby/truffleruby/core/splitter.rb
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def split(string, pattern, limit, &orig_block)
# To simplify the code, we present a single block
# that either calls user (origin) block or adds a substring to the resulting array
# See motivation: https://github.com/oracle/truffleruby/pull/2052#issuecomment-663449395
block = orig_block || result.method(:<<).to_proc
callable = orig_block || result.method(:<<)

return (orig_block ? string : result) if string.empty?

Expand All @@ -57,7 +57,7 @@ def split(string, pattern, limit, &orig_block)
if limit == 1
dup_string = string.dup

block.call(dup_string)
callable.call(dup_string)
return orig_block ? dup_string : result
end

Expand All @@ -72,47 +72,48 @@ def split(string, pattern, limit, &orig_block)
# See motivation: https://github.com/oracle/truffleruby/pull/2052#issuecomment-663494235
return Primitive.string_awk_split string, awk_limit, orig_block
elsif Primitive.is_a?(pattern, Regexp)
split_type_regexp(string, pattern, limit, block)
split_type_regexp(string, pattern, limit, callable)
else
pattern = StringValue(pattern)

valid_encoding?(string)
valid_encoding?(pattern)

if pattern.empty?
split_type_chars(string, limit, block)
split_type_chars(string, limit, callable)
else
split_type_string(string, pattern, limit, block)
split_type_string(string, pattern, limit, callable)
end
end

orig_block ? string : result
end
Truffle::Graal.always_split(instance_method(:split))

private

def valid_encoding?(string)
raise ArgumentError, "invalid byte sequence in #{string.encoding.name}" unless string.valid_encoding?
end

def split_type_chars(string, limit, block)
def split_type_chars(string, limit, callable)
if limit > 0
last = string.size > (limit - 1) ? string[(limit - 1)..-1] : empty_string(string)

string.each_char.each_with_index do |char, index|
break if index == limit - 1
block.call(char)
callable.call(char)
end

block.call(last)
callable.call(last)
else
string.each_char(&block)
string.each_char { |c| callable.call(c) }

block.call(empty_string(string)) if tail_empty?(limit)
callable.call(empty_string(string)) if tail_empty?(limit)
end
end

def split_type_string(string, pattern, limit, block)
def split_type_string(string, pattern, limit, callable)
pos = 0
empty_count = 0
limited = limit > 0
Expand All @@ -128,24 +129,24 @@ def split_type_string(string, pattern, limit, block)
break unless nxt

match_size = nxt - pos
empty_count = add_substring(string, ret, string.byteslice(pos, match_size), empty_count, block)
empty_count = add_substring(string, ret, string.byteslice(pos, match_size), empty_count, callable)

pos = nxt + pat_size
count += 1
end

# No more separators, but we need to grab the last part still.
empty_count = add_substring(string, ret, string.byteslice(pos, str_size - pos), empty_count, block)
empty_count = add_substring(string, ret, string.byteslice(pos, str_size - pos), empty_count, callable)

if tail_empty?(limit)
add_empty(string, ret, empty_count, block)
add_empty(string, ret, empty_count, callable)
end
end

def split_type_regexp(string, pattern, limit, block)
def split_type_regexp(string, pattern, limit, callable)
# Handle // as a special case.
if pattern.source.empty?
return split_type_chars(string, limit, block)
return split_type_chars(string, limit, callable)
end

start = 0
Expand All @@ -164,12 +165,12 @@ def split_type_regexp(string, pattern, limit, block)

unless collapsed && (Primitive.match_data_byte_begin(match, 0) == last_match_end)
substring = Truffle::RegexpOperations.pre_match_from(match, last_match_end)
empty_count = add_substring(string, ret, substring, empty_count, block)
empty_count = add_substring(string, ret, substring, empty_count, callable)

# length > 1 means there are captures
if match.length > 1
match.captures.compact.each do |capture|
empty_count = add_substring(string, ret, capture, empty_count, block)
empty_count = add_substring(string, ret, capture, empty_count, callable)
end
end

Expand All @@ -186,31 +187,31 @@ def split_type_regexp(string, pattern, limit, block)
end

if last_match
empty_count = add_substring(string, ret, last_match.post_match, empty_count, block)
empty_count = add_substring(string, ret, last_match.post_match, empty_count, callable)
elsif ret.empty?
empty_count = add_substring(string, ret, string.dup, empty_count, block)
empty_count = add_substring(string, ret, string.dup, empty_count, callable)
end

if tail_empty?(limit)
add_empty(string, ret, empty_count, block)
add_empty(string, ret, empty_count, callable)
end

block ? string : ret
callable ? string : ret
end


def add_substring(string, array, substring, empty_count, block)
def add_substring(string, array, substring, empty_count, callable)
return empty_count + 1 if substring.length == 0 # remember another one empty match

add_empty(string, array, empty_count, block)
add_empty(string, array, empty_count, callable)

block.call(substring)
callable.call(substring)

0 # always release all empties when we get non empty substring
end

def add_empty(string, array, count, block)
count.times { block.call(empty_string(string)) }
def add_empty(string, array, count, callable)
count.times { callable.call(empty_string(string)) }
end

def empty_string(original)
Expand Down
1 change: 1 addition & 0 deletions src/main/ruby/truffleruby/core/string.rb
Original file line number Diff line number Diff line change
Expand Up @@ -320,6 +320,7 @@ def scan(pattern, &block)
def split(pattern = nil, limit = undefined, &block)
Truffle::Splitter.split(Primitive.dup_as_string_instance(self), pattern, limit, &block)
end
Truffle::Graal.always_split(instance_method(:split))

def squeeze(*strings)
str = Primitive.dup_as_string_instance(self)
Expand Down

0 comments on commit dc120e7

Please sign in to comment.