From 86132a429ca4b3d140b74afeff9b37102dc03bb5 Mon Sep 17 00:00:00 2001 From: tleish Date: Tue, 29 Sep 2020 03:10:00 -0600 Subject: [PATCH] [Fix #8646] Optimize the process of finding of all files on RuboCop startup (#8806) That's achieved by getting all directories first and then apply Exclude rules on directories before finding files. --- CHANGELOG.md | 4 ++++ lib/rubocop/target_finder.rb | 37 ++++++++++++++++++++++++------------ 2 files changed, 29 insertions(+), 12 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0a05e7ff2fc..bb66c9fec52 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,10 @@ * [#8796](https://github.com/rubocop-hq/rubocop/pull/8796): Add new `Lint/HashCompareByIdentity` cop. ([@fatkodima][]) +### Changes + +* [#8646](https://github.com/rubocop-hq/rubocop/issues/8646): Faster find of all files in `TargetFinder` class which improves rubocop initial startup speed. ([@tleish][]) + ## 0.92.0 (2020-09-25) ### New features diff --git a/lib/rubocop/target_finder.rb b/lib/rubocop/target_finder.rb index 76294783611..05907643f0f 100644 --- a/lib/rubocop/target_finder.rb +++ b/lib/rubocop/target_finder.rb @@ -81,20 +81,33 @@ def to_inspect?(file, hidden_files, base_dir_config) # the top level directories that are excluded in configuration in the # normal way (dir/**/*). def find_files(base_dir, flags) + # get all wanted directories first to improve speed of finding all files + patterns = wanted_dir_patterns(base_dir, flags) + # We need this special case to avoid creating the pattern + # /**/* which searches the entire file system. + patterns = ["#{base_dir}/**/*"] if patterns.empty? + + Dir.glob(patterns, flags).select { |path| FileTest.file?(path) } + end + + def wanted_dir_patterns(base_dir, flags) + exclude_pattern = combined_exclude_glob_patterns(base_dir) + flags = flags | File::FNM_PATHNAME | File::FNM_EXTGLOB | File::FNM_DOTMATCH wanted_toplevel_dirs = toplevel_dirs(base_dir, flags) - excluded_dirs(base_dir) - wanted_toplevel_dirs.map! { |dir| dir << '/**/*' } - - pattern = if wanted_toplevel_dirs.empty? - # We need this special case to avoid creating the pattern - # /**/* which searches the entire file system. - ["#{base_dir}/**/*"] - else - # Search the non-excluded top directories, but also add files - # on the top level, which would otherwise not be found. - wanted_toplevel_dirs.unshift("#{base_dir}/*") - end - Dir.glob(pattern, flags).select { |path| FileTest.file?(path) } + wanted_toplevel_dirs.map! { |dir| dir << '/**/' } + + Dir.glob(wanted_toplevel_dirs, flags) + .map { |dir| dir << '*' } # add file glob pattern to end of each dir + .reject { |dir| File.fnmatch?(exclude_pattern, dir, flags) } + .unshift("#{base_dir}/*") + end + + def combined_exclude_glob_patterns(base_dir) + all_cops_config = @config_store.for(base_dir).for_all_cops + patterns = all_cops_config['Exclude'].select { |pattern| pattern.is_a? String } + .map { |pattern| pattern.sub("#{base_dir}/", '') } + "#{base_dir}/{#{patterns.join(',')}}" end def toplevel_dirs(base_dir, flags)