forked from rubocop/rubocop
-
Notifications
You must be signed in to change notification settings - Fork 0
/
result_cache.rb
227 lines (193 loc) · 7.79 KB
/
result_cache.rb
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
# frozen_string_literal: true
require 'digest/sha1'
require 'find'
require 'etc'
require 'zlib'
module RuboCop
# Provides functionality for caching rubocop runs.
# @api private
class ResultCache
NON_CHANGING = %i[color format formatters out debug fail_level auto_correct
cache fail_fast stdin parallel].freeze
# Remove old files so that the cache doesn't grow too big. When the
# threshold MaxFilesInCache has been exceeded, the oldest 50% of all the
# files in the cache are removed. The reason for removing so much is that
# cleaning should be done relatively seldom, since there is a slight risk
# that some other RuboCop process was just about to read the file, when
# there's parallel execution and the cache is shared.
def self.cleanup(config_store, verbose, cache_root = nil)
return if inhibit_cleanup # OPTIMIZE: For faster testing
cache_root ||= cache_root(config_store)
return unless File.exist?(cache_root)
files, dirs = Find.find(cache_root).partition { |path| File.file?(path) }
return unless requires_file_removal?(files.length, config_store)
remove_oldest_files(files, dirs, cache_root, verbose)
end
class << self
# @api private
attr_accessor :rubocop_required_features
ResultCache.rubocop_required_features = []
private
def requires_file_removal?(file_count, config_store)
file_count > 1 &&
file_count > config_store.for_pwd.for_all_cops['MaxFilesInCache']
end
def remove_oldest_files(files, dirs, cache_root, verbose)
# Add 1 to half the number of files, so that we remove the file if
# there's only 1 left.
remove_count = 1 + files.length / 2
puts "Removing the #{remove_count} oldest files from #{cache_root}" if verbose
sorted = files.sort_by { |path| File.mtime(path) }
remove_files(sorted, dirs, remove_count)
rescue Errno::ENOENT
# This can happen if parallel RuboCop invocations try to remove the
# same files. No problem.
puts $ERROR_INFO if verbose
end
def remove_files(files, dirs, remove_count)
# Batch file deletions, deleting over 130,000+ files will crash
# File.delete.
files[0, remove_count].each_slice(10_000).each do |files_slice|
File.delete(*files_slice)
end
dirs.each { |dir| Dir.rmdir(dir) if Dir["#{dir}/*"].empty? }
end
end
def self.cache_root(config_store)
root = ENV['RUBOCOP_CACHE_ROOT']
root ||= config_store.for_pwd.for_all_cops['CacheRootDirectory']
root ||= if ENV.key?('XDG_CACHE_HOME')
# Include user ID in the path to make sure the user has write
# access.
File.join(ENV['XDG_CACHE_HOME'], Process.uid.to_s)
else
File.join(ENV['HOME'], '.cache')
end
File.join(root, 'rubocop_cache')
end
def self.allow_symlinks_in_cache_location?(config_store)
config_store.for_pwd.for_all_cops['AllowSymlinksInCacheRootDirectory']
end
attr :path
def initialize(file, team, options, config_store, cache_root = nil)
cache_root ||= options[:cache_root]
cache_root ||= ResultCache.cache_root(config_store)
@allow_symlinks_in_cache_location =
ResultCache.allow_symlinks_in_cache_location?(config_store)
@path = File.join(cache_root,
rubocop_checksum,
context_checksum(team, options),
file_checksum(file, config_store))
@cached_data = CachedData.new(file)
end
def valid?
File.exist?(@path)
end
def load
@cached_data.from_json(IO.read(@path, encoding: Encoding::UTF_8))
end
def save(offenses)
dir = File.dirname(@path)
begin
FileUtils.mkdir_p(dir)
rescue Errno::EACCES, Errno::EROFS => e
warn "Couldn't create cache directory. Continuing without cache."\
"\n #{e.message}"
return
end
preliminary_path = "#{@path}_#{rand(1_000_000_000)}"
# RuboCop must be in control of where its cached data is stored. A
# symbolic link anywhere in the cache directory tree can be an
# indication that a symlink attack is being waged.
return if symlink_protection_triggered?(dir)
File.open(preliminary_path, 'w', encoding: Encoding::UTF_8) do |f|
f.write(@cached_data.to_json(offenses))
end
# The preliminary path is used so that if there are multiple RuboCop
# processes trying to save data for the same inspected file
# simultaneously, the only problem we run in to is a competition who gets
# to write to the final file. The contents are the same, so no corruption
# of data should occur.
FileUtils.mv(preliminary_path, @path)
end
private
def symlink_protection_triggered?(path)
!@allow_symlinks_in_cache_location && any_symlink?(path)
end
def any_symlink?(path)
while path != File.dirname(path)
if File.symlink?(path)
warn "Warning: #{path} is a symlink, which is not allowed."
return true
end
path = File.dirname(path)
end
false
end
def file_checksum(file, config_store)
digester = Digest::SHA1.new
mode = File.stat(file).mode
digester.update(
"#{file}#{mode}#{config_store.for_file(file).signature}"
)
digester.file(file)
digester.hexdigest
rescue Errno::ENOENT
# Spurious files that come and go should not cause a crash, at least not
# here.
'_'
end
class << self
attr_accessor :source_checksum, :inhibit_cleanup
end
# The checksum of the rubocop program running the inspection.
def rubocop_checksum
ResultCache.source_checksum ||=
begin
digest = Digest::SHA1.new
rubocop_extra_features
.select { |path| File.file?(path) }
.sort!
.each do |path|
content = File.open(path, 'rb', &:read)
digest << Zlib.crc32(content).to_s # mtime not reliable
end
digest << RuboCop::Version::STRING << RuboCop::AST::Version::STRING
digest.hexdigest
end
end
def rubocop_extra_features
lib_root = File.join(File.dirname(__FILE__), '..')
exe_root = File.join(lib_root, '..', 'exe')
# These are all the files we have `require`d plus everything in the
# exe directory. A change to any of them could affect the cop output
# so we include them in the cache hash.
source_files = $LOADED_FEATURES + Find.find(exe_root).to_a
source_files -= ResultCache.rubocop_required_features # Rely on gem versions
source_files
end
# Return a hash of the options given at invocation, minus the ones that have
# no effect on which offenses and disabled line ranges are found, and thus
# don't affect caching.
def relevant_options_digest(options)
options = options.reject { |key, _| NON_CHANGING.include?(key) }
options.to_s.gsub(/[^a-z]+/i, '_')
end
# The external dependency checksums are cached per RuboCop team so that
# the checksums don't need to be recomputed for each file.
def team_checksum(team)
@checksum_by_team ||= {}.compare_by_identity
@checksum_by_team[team] ||= team.external_dependency_checksum
end
# We combine team and options into a single "context" checksum to avoid
# making file names that are too long for some filesystems to handle.
# This context is for anything that's not (1) the RuboCop executable
# checksum or (2) the inspected file checksum.
def context_checksum(team, options)
Digest::SHA1.hexdigest([
team_checksum(team),
relevant_options_digest(options)
].join)
end
end
end