From 6d3c15cdc29dc139c8bebd1913c6c273aa673d16 Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Tue, 15 Mar 2016 21:56:53 +0900 Subject: [PATCH 1/4] Avoid re-parsing by pre-computing a method-body table RDoc::Parser::C#find_body parsed file_content every time to find the method body. Now it creates a method-body table first, which avoid mutliple parsing. --- lib/rdoc/parser/c.rb | 58 ++++++++++++++++++++++++++++++++------------ 1 file changed, 43 insertions(+), 15 deletions(-) diff --git a/lib/rdoc/parser/c.rb b/lib/rdoc/parser/c.rb index fd336f5f5b..5ab84a8866 100644 --- a/lib/rdoc/parser/c.rb +++ b/lib/rdoc/parser/c.rb @@ -605,18 +605,46 @@ def find_attr_comment var_name, attr_name, read = nil, write = nil RDoc::Comment.new comment, @top_level end + ## + # Generate a Ruby-method table + + def gen_body_table file_content + table = {} + file_content.scan(%r{ + ((?>/\*.*?\*/\s*)?) + ((?:(?:\w+)\s+)? + (?:intern\s+)?VALUE\s+(\w+) + \s*(?:\([^)]*\))(?:[^;]|$)) + | ((?>/\*.*?\*/\s*))^\s*(\#\s*define\s+(\w+)\s+(\w+)) + | ^\s*\#\s*define\s+(\w+)\s+(\w+) + }xm) do + case + when $1 + table[$3] = [:func_def, $1, $2, $~.offset(2)] if !table[$3] || table[$3][0] != :func_def + when $4 + table[$6] = [:macro_def, $4, $5, $~.offset(5), $7] if !table[$6] || table[$6][0] == :macro_alias + when $8 + table[$8] ||= [:macro_alias, $9] + end + end + table + end + ## # Find the C code corresponding to a Ruby method def find_body class_name, meth_name, meth_obj, file_content, quiet = false - case file_content - when %r%((?>/\*.*?\*/\s*)?) - ((?:(?:\w+)\s+)? - (?:intern\s+)?VALUE\s+#{meth_name} - \s*(\([^)]*\))([^;]|$))%xm then - comment = RDoc::Comment.new $1, @top_level - body = $2 - offset, = $~.offset(2) + if file_content + @body_table ||= {} + @body_table[file_content] ||= gen_body_table file_content + type, *args = @body_table[file_content][meth_name] + end + + case type + when :func_def + comment = RDoc::Comment.new args[0], @top_level + body = args[1] + offset, = args[2] comment.remove_private if comment @@ -645,12 +673,12 @@ def find_body class_name, meth_name, meth_obj, file_content, quiet = false meth_obj.line = file_content[0, offset].count("\n") + 1 body - when %r%((?>/\*.*?\*/\s*))^\s*(\#\s*define\s+#{meth_name}\s+(\w+))%m then - comment = RDoc::Comment.new $1, @top_level - body = $2 - offset = $~.offset(2).first + when :macro_def + comment = RDoc::Comment.new args[0], @top_level + body = args[1] + offset, = args[2] - find_body class_name, $3, meth_obj, file_content, true + find_body class_name, args[3], meth_obj, file_content, true comment.normalize find_modifiers comment, meth_obj @@ -664,11 +692,11 @@ def find_body class_name, meth_name, meth_obj, file_content, quiet = false meth_obj.line = file_content[0, offset].count("\n") + 1 body - when %r%^\s*\#\s*define\s+#{meth_name}\s+(\w+)%m then + when :macro_alias # with no comment we hope the aliased definition has it and use it's # definition - body = find_body(class_name, $1, meth_obj, file_content, true) + body = find_body(class_name, args[0], meth_obj, file_content, true) return body if body From bd698a44b17d97f42e1b1173c070bd1337f18507 Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Tue, 15 Mar 2016 22:03:00 +0900 Subject: [PATCH 2/4] Avoid re-parsing by pre-computing a const table Similar to 6d3c15cdc29dc139c8bebd1913c6c273aa673d16 --- lib/rdoc/parser/c.rb | 48 ++++++++++++++++++++++++++++---------------- 1 file changed, 31 insertions(+), 17 deletions(-) diff --git a/lib/rdoc/parser/c.rb b/lib/rdoc/parser/c.rb index 5ab84a8866..6c2a47fff0 100644 --- a/lib/rdoc/parser/c.rb +++ b/lib/rdoc/parser/c.rb @@ -791,28 +791,42 @@ def find_class_comment class_name, class_mod class_mod.add_comment comment, @top_level end + ## + # Generate a const table + + def gen_const_table file_content + table = {} + @content.scan(%r{ + ((?>^\s*/\*.*?\*/\s+)) + rb_define_(\w+)\((?:\s*(?:\w+),)?\s* + "(\w+)"\s*, + .*?\)\s*; + | Document-(?:const|global|variable):\s + ((?:\w+::)*\w+) + \s*?\n((?>.*?\*/)) + }mxi) do + case + when $1 then table[[$2, $3]] = $1 + when $4 then table[$4] = "/*\n" + $5 + end + end + table + end + ## # Finds a comment matching +type+ and +const_name+ either above the # comment or in the matching Document- section. def find_const_comment(type, const_name, class_name = nil) - comment = if @content =~ %r%((?>^\s*/\*.*?\*/\s+)) - rb_define_#{type}\((?:\s*(\w+),)?\s* - "#{const_name}"\s*, - .*?\)\s*;%xmi then - $1 - elsif class_name and - @content =~ %r%Document-(?:const|global|variable):\s - #{class_name}::#{const_name} - \s*?\n((?>.*?\*/))%xm then - "/*\n#{$1}" - elsif @content =~ %r%Document-(?:const|global|variable): - \s#{const_name} - \s*?\n((?>.*?\*/))%xm then - "/*\n#{$1}" - else - '' - end + @const_table ||= {} + @const_table[@content] ||= gen_const_table @content + table = @const_table[@content] + + comment = + table[[type, const_name]] || + (class_name && table[class_name + "::" + const_name]) || + table[const_name] || + '' RDoc::Comment.new comment, @top_level end From 869202ffecd6c4d033124a486703ee494c1dd169 Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Tue, 15 Mar 2016 22:04:01 +0900 Subject: [PATCH 3/4] Cache the result of Time.parse Time.parse is painfully slow. --- lib/rdoc/parser/changelog.rb | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/lib/rdoc/parser/changelog.rb b/lib/rdoc/parser/changelog.rb index a3567c1f81..60adeb88b9 100644 --- a/lib/rdoc/parser/changelog.rb +++ b/lib/rdoc/parser/changelog.rb @@ -101,9 +101,11 @@ def create_items items # Groups +entries+ by date. def group_entries entries + @time_cache ||= {} entries.group_by do |title, _| begin - Time.parse(title).strftime '%Y-%m-%d' + time = @time_cache[title] + (time || Time.parse(title)).strftime '%Y-%m-%d' rescue NoMethodError, ArgumentError time, = title.split ' ', 2 Time.parse(time).strftime '%Y-%m-%d' @@ -127,6 +129,7 @@ def group_entries entries # 'README.EXT.ja: ditto']] def parse_entries + @time_cache ||= {} entries = [] entry_name = nil entry_body = [] @@ -142,6 +145,7 @@ def parse_entries begin time = Time.parse entry_name + @time_cache[entry_name] = time # HACK Ruby 1.8 does not raise ArgumentError for Time.parse "Other" entry_name = nil unless entry_name =~ /#{time.year}/ rescue NoMethodError @@ -184,6 +188,7 @@ def parse_entries # Converts the ChangeLog into an RDoc::Markup::Document def scan + @time_cache = {} entries = parse_entries grouped_entries = group_entries entries From 72f14d827c861fb764f30bd5bd909fe5c6c66873 Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Tue, 15 Mar 2016 22:06:22 +0900 Subject: [PATCH 4/4] Remove an extra branch from the bottleneck RDoc::RubyLex#getc is the bottleneck in RDoc. It determined which ungetc buffer to use by branching upon `@here_header`. We can remove the branch by replacing `@here_header` flag with `@current_readed`, which represents the current buffer directly, --- lib/rdoc/ruby_lex.rb | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/lib/rdoc/ruby_lex.rb b/lib/rdoc/ruby_lex.rb index 91b90ab2cf..d56e693a3d 100644 --- a/lib/rdoc/ruby_lex.rb +++ b/lib/rdoc/ruby_lex.rb @@ -101,10 +101,10 @@ def initialize(content, options) @exp_line_no = @line_no = 1 @here_readed = [] @readed = [] + @current_readed = @readed @rests = [] @seek = 0 - @here_header = false @indent = 0 @indent_stack = [] @lex_state = :EXPR_BEG @@ -160,7 +160,7 @@ def get_readed end readed = @readed.join("") - @readed = [] + @readed.clear readed end @@ -170,13 +170,9 @@ def getc @rests.push nil unless buf_input end c = @rests.shift - if @here_header - @here_readed.push c - else - @readed.push c - end + @current_readed.push c @seek += 1 - if c == "\n" + if c == "\n".freeze @line_no += 1 @char_no = 0 else @@ -282,7 +278,7 @@ def initialize_input @indent_stack = [] @lex_state = :EXPR_BEG @space_seen = false - @here_header = false + @current_readed = @readed @continue = false prompt @@ -461,8 +457,8 @@ def lex_init() @indent_stack.pop end end - @here_header = false - @here_readed = [] + @current_readed = @readed + @here_readed.clear Token(TkNL) end @@ -1020,7 +1016,7 @@ def identify_here_document doc = '"' end - @here_header = false + @current_readed = @readed while l = gets l = l.sub(/(:?\r)?\n\z/, "\n") if (indent ? l.strip : l.chomp) == quoted @@ -1037,7 +1033,7 @@ def identify_here_document doc << '"' end - @here_header = true + @current_readed = @here_readed @here_readed.concat reserve while ch = reserve.pop ungetc ch