Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle YAML.load_file on Psych 4 #392

Merged
merged 2 commits into from Jan 14, 2022
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
3 changes: 3 additions & 0 deletions .github/workflows/ci.yaml
Expand Up @@ -13,6 +13,7 @@ on:
jobs:
platforms:
strategy:
fail-fast: false
matrix:
os: [ubuntu, macos, windows]
ruby: ['2.5']
Expand Down Expand Up @@ -40,6 +41,7 @@ jobs:

psych4:
strategy:
fail-fast: false
matrix:
os: [ubuntu]
ruby: ['3.0']
Expand All @@ -57,6 +59,7 @@ jobs:

minimal:
strategy:
fail-fast: false
matrix:
os: [ubuntu]
ruby: ['jruby', 'truffleruby']
Expand Down
8 changes: 8 additions & 0 deletions CHANGELOG.md
@@ -1,5 +1,13 @@
# Unreleased

* Improve support of Psych 4. (#392)
Since `1.8.0`, `YAML.load_file` was no longer cached when Psych 4 was used. This is because `load_file` loads
in safe mode by default, so the Bootsnap cache could defeat that safety.
Now when precompiling YAML files, Bootsnap first try to parse them in safe mode, and if it can't fallback to unsafe mode,
and the cache contains a flag that records wether it was generated in safe mode or not.
`YAML.unsafe_load_file` will use safe caches just fine, but `YAML.load_file` will fallback to uncached YAML parsing
if the cache was generated using unsafe parsing.

* Minimize the Kernel.require extra stack frames. (#393)
This should reduce the noise generated by bootsnap on `LoadError`.

Expand Down
66 changes: 32 additions & 34 deletions ext/bootsnap/bootsnap.c
Expand Up @@ -75,7 +75,7 @@ struct bs_cache_key {
STATIC_ASSERT(sizeof(struct bs_cache_key) == KEY_SIZE);

/* Effectively a schema version. Bumping invalidates all previous caches */
static const uint32_t current_version = 3;
static const uint32_t current_version = 4;

/* hash of e.g. "x86_64-darwin17", invalidating when ruby is recompiled on a
* new OS ABI, etc. */
Expand All @@ -91,8 +91,7 @@ static mode_t current_umask;
static VALUE rb_mBootsnap;
static VALUE rb_mBootsnap_CompileCache;
static VALUE rb_mBootsnap_CompileCache_Native;
static VALUE rb_eBootsnap_CompileCache_Uncompilable;
static ID uncompilable;
static VALUE rb_cBootsnap_CompileCache_UNCOMPILABLE;
static ID instrumentation_method;
static VALUE sym_miss;
static VALUE sym_stale;
Expand Down Expand Up @@ -120,10 +119,8 @@ static uint32_t get_ruby_platform(void);
* exception.
*/
static int bs_storage_to_output(VALUE handler, VALUE args, VALUE storage_data, VALUE * output_data);
static VALUE prot_storage_to_output(VALUE arg);
static VALUE prot_input_to_output(VALUE arg);
static void bs_input_to_output(VALUE handler, VALUE args, VALUE input_data, VALUE * output_data, int * exception_tag);
static VALUE prot_input_to_storage(VALUE arg);
static int bs_input_to_storage(VALUE handler, VALUE args, VALUE input_data, VALUE pathval, VALUE * storage_data);
struct s2o_data;
struct i2o_data;
Expand Down Expand Up @@ -151,12 +148,12 @@ Init_bootsnap(void)
rb_mBootsnap = rb_define_module("Bootsnap");
rb_mBootsnap_CompileCache = rb_define_module_under(rb_mBootsnap, "CompileCache");
rb_mBootsnap_CompileCache_Native = rb_define_module_under(rb_mBootsnap_CompileCache, "Native");
rb_eBootsnap_CompileCache_Uncompilable = rb_define_class_under(rb_mBootsnap_CompileCache, "Uncompilable", rb_eStandardError);
rb_cBootsnap_CompileCache_UNCOMPILABLE = rb_const_get(rb_mBootsnap_CompileCache, rb_intern("UNCOMPILABLE"));
rb_global_variable(&rb_cBootsnap_CompileCache_UNCOMPILABLE);

current_ruby_revision = get_ruby_revision();
current_ruby_platform = get_ruby_platform();

uncompilable = rb_intern("__bootsnap_uncompilable__");
instrumentation_method = rb_intern("_instrument");

sym_miss = ID2SYM(rb_intern("miss"));
Expand Down Expand Up @@ -426,6 +423,7 @@ open_current_file(char * path, struct bs_cache_key * key, const char ** errno_pr
#define ERROR_WITH_ERRNO -1
#define CACHE_MISS -2
#define CACHE_STALE -3
#define CACHE_UNCOMPILABLE -4

/*
* Read the cache key from the given fd, which must have position 0 (e.g.
Expand Down Expand Up @@ -507,14 +505,14 @@ fetch_cached_data(int fd, ssize_t data_size, VALUE handler, VALUE args, VALUE *
if (data_size > 100000000000) {
*errno_provenance = "bs_fetch:fetch_cached_data:datasize";
errno = EINVAL; /* because wtf? */
ret = -1;
ret = ERROR_WITH_ERRNO;
goto done;
}
data = ALLOC_N(char, data_size);
nread = read(fd, data, data_size);
if (nread < 0) {
*errno_provenance = "bs_fetch:fetch_cached_data:read";
ret = -1;
ret = ERROR_WITH_ERRNO;
goto done;
}
if (nread != data_size) {
Expand All @@ -525,6 +523,10 @@ fetch_cached_data(int fd, ssize_t data_size, VALUE handler, VALUE args, VALUE *
storage_data = rb_str_new(data, data_size);

*exception_tag = bs_storage_to_output(handler, args, storage_data, output_data);
if (*output_data == rb_cBootsnap_CompileCache_UNCOMPILABLE) {
ret = CACHE_UNCOMPILABLE;
goto done;
}
ret = 0;
done:
if (data != NULL) xfree(data);
Expand Down Expand Up @@ -737,7 +739,15 @@ bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args
&output_data, &exception_tag, &errno_provenance
);
if (exception_tag != 0) goto raise;
else if (res == CACHE_MISS || res == CACHE_STALE) valid_cache = 0;
else if (res == CACHE_UNCOMPILABLE) {
/* If fetch_cached_data returned `Uncompilable` we fallback to `input_to_output`
This happens if we have say, an unsafe YAML cache, but try to load it in safe mode */
if (bs_read_contents(current_fd, current_key.size, &contents, &errno_provenance) < 0) goto fail_errno;
input_data = rb_str_new(contents, current_key.size);
bs_input_to_output(handler, args, input_data, &output_data, &exception_tag);
if (exception_tag != 0) goto raise;
goto succeed;
} else if (res == CACHE_MISS || res == CACHE_STALE) valid_cache = 0;
else if (res == ERROR_WITH_ERRNO) goto fail_errno;
else if (!NIL_P(output_data)) goto succeed; /* fast-path, goal */
}
Expand All @@ -754,7 +764,7 @@ bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args
if (exception_tag != 0) goto raise;
/* If input_to_storage raised Bootsnap::CompileCache::Uncompilable, don't try
* to cache anything; just return input_to_output(input_data) */
if (storage_data == uncompilable) {
if (storage_data == rb_cBootsnap_CompileCache_UNCOMPILABLE) {
bs_input_to_output(handler, args, input_data, &output_data, &exception_tag);
if (exception_tag != 0) goto raise;
goto succeed;
Expand All @@ -772,9 +782,13 @@ bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args
exception_tag = bs_storage_to_output(handler, args, storage_data, &output_data);
if (exception_tag != 0) goto raise;

/* If output_data is nil, delete the cache entry and generate the output
* using input_to_output */
if (NIL_P(output_data)) {
if (output_data == rb_cBootsnap_CompileCache_UNCOMPILABLE) {
/* If storage_to_output returned `Uncompilable` we fallback to `input_to_output` */
bs_input_to_output(handler, args, input_data, &output_data, &exception_tag);
if (exception_tag != 0) goto raise;
} else if (NIL_P(output_data)) {
/* If output_data is nil, delete the cache entry and generate the output
* using input_to_output */
if (unlink(cache_path) < 0) {
errno_provenance = "bs_fetch:unlink";
goto fail_errno;
Expand Down Expand Up @@ -856,7 +870,7 @@ bs_precompile(char * path, VALUE path_v, char * cache_path, VALUE handler)

/* If input_to_storage raised Bootsnap::CompileCache::Uncompilable, don't try
* to cache anything; just return false */
if (storage_data == uncompilable) {
if (storage_data == rb_cBootsnap_CompileCache_UNCOMPILABLE) {
goto fail;
}
/* If storage_data isn't a string, we can't cache it */
Expand Down Expand Up @@ -919,7 +933,7 @@ struct i2s_data {
};

static VALUE
prot_storage_to_output(VALUE arg)
try_storage_to_output(VALUE arg)
{
struct s2o_data * data = (struct s2o_data *)arg;
return rb_funcall(data->handler, rb_intern("storage_to_output"), 2, data->storage_data, data->args);
Expand All @@ -934,7 +948,7 @@ bs_storage_to_output(VALUE handler, VALUE args, VALUE storage_data, VALUE * outp
.args = args,
.storage_data = storage_data,
};
*output_data = rb_protect(prot_storage_to_output, (VALUE)&s2o_data, &state);
*output_data = rb_protect(try_storage_to_output, (VALUE)&s2o_data, &state);
return state;
}

Expand Down Expand Up @@ -963,22 +977,6 @@ try_input_to_storage(VALUE arg)
return rb_funcall(data->handler, rb_intern("input_to_storage"), 2, data->input_data, data->pathval);
}

static VALUE
rescue_input_to_storage(VALUE arg, VALUE e)
{
return uncompilable;
}

static VALUE
prot_input_to_storage(VALUE arg)
{
struct i2s_data * data = (struct i2s_data *)arg;
return rb_rescue2(
try_input_to_storage, (VALUE)data,
rescue_input_to_storage, Qnil,
rb_eBootsnap_CompileCache_Uncompilable, 0);
}

static int
bs_input_to_storage(VALUE handler, VALUE args, VALUE input_data, VALUE pathval, VALUE * storage_data)
{
Expand All @@ -988,6 +986,6 @@ bs_input_to_storage(VALUE handler, VALUE args, VALUE input_data, VALUE pathval,
.input_data = input_data,
.pathval = pathval,
};
*storage_data = rb_protect(prot_input_to_storage, (VALUE)&i2s_data, &state);
*storage_data = rb_protect(try_input_to_storage, (VALUE)&i2s_data, &state);
return state;
}
6 changes: 3 additions & 3 deletions lib/bootsnap/cli.rb
Expand Up @@ -138,7 +138,7 @@ def precompile_yaml_files(load_paths, exclude: self.exclude)

def precompile_yaml(*yaml_files)
Array(yaml_files).each do |yaml_file|
if CompileCache::YAML.precompile(yaml_file, cache_dir: cache_dir)
if CompileCache::YAML.precompile(yaml_file)
STDERR.puts(yaml_file) if verbose
end
end
Expand All @@ -161,7 +161,7 @@ def precompile_json_files(load_paths, exclude: self.exclude)

def precompile_json(*json_files)
Array(json_files).each do |json_file|
if CompileCache::JSON.precompile(json_file, cache_dir: cache_dir)
if CompileCache::JSON.precompile(json_file)
STDERR.puts(json_file) if verbose
end
end
Expand All @@ -183,7 +183,7 @@ def precompile_ruby_files(load_paths, exclude: self.exclude)

def precompile_ruby(*ruby_files)
Array(ruby_files).each do |ruby_file|
if CompileCache::ISeq.precompile(ruby_file, cache_dir: cache_dir)
if CompileCache::ISeq.precompile(ruby_file)
STDERR.puts(ruby_file) if verbose
end
end
Expand Down
4 changes: 3 additions & 1 deletion lib/bootsnap/compile_cache.rb
Expand Up @@ -2,7 +2,9 @@

module Bootsnap
module CompileCache
Error = Class.new(StandardError)
UNCOMPILABLE = BasicObject.new

Error = Class.new(StandardError)
PermissionError = Class.new(Error)

def self.setup(cache_dir:, iseq:, yaml:, json:)
Expand Down
14 changes: 9 additions & 5 deletions lib/bootsnap/compile_cache/iseq.rb
Expand Up @@ -7,7 +7,11 @@ module Bootsnap
module CompileCache
module ISeq
class << self
attr_accessor(:cache_dir)
attr_reader(:cache_dir)

def cache_dir=(cache_dir)
@cache_dir = cache_dir.end_with?("/") ? "#{cache_dir}iseq" : "#{cache_dir}-iseq"
end
end

has_ruby_bug_18250 = begin # https://bugs.ruby-lang.org/issues/18250
Expand All @@ -24,20 +28,20 @@ def self.input_to_storage(_, path)
iseq = begin
RubyVM::InstructionSequence.compile_file(path)
rescue SyntaxError
raise(Uncompilable, "syntax error")
return UNCOMPILABLE # syntax error
end

begin
iseq.to_binary
rescue TypeError
raise(Uncompilable, "ruby bug #18250")
return UNCOMPILABLE # ruby bug #18250
end
end
else
def self.input_to_storage(_, path)
RubyVM::InstructionSequence.compile_file(path).to_binary
rescue SyntaxError
raise(Uncompilable, "syntax error")
return UNCOMPILABLE # syntax error
end
end

Expand All @@ -61,7 +65,7 @@ def self.fetch(path, cache_dir: ISeq.cache_dir)
)
end

def self.precompile(path, cache_dir: ISeq.cache_dir)
def self.precompile(path)
Bootsnap::CompileCache::Native.precompile(
cache_dir,
path.to_s,
Expand Down
9 changes: 7 additions & 2 deletions lib/bootsnap/compile_cache/json.rb
Expand Up @@ -6,7 +6,12 @@ module Bootsnap
module CompileCache
module JSON
class << self
attr_accessor(:msgpack_factory, :cache_dir, :supported_options)
attr_accessor(:msgpack_factory, :supported_options)
attr_reader(:cache_dir)

def cache_dir=(cache_dir)
@cache_dir = cache_dir.end_with?("/") ? "#{cache_dir}json" : "#{cache_dir}-json"
end

def input_to_storage(payload, _)
obj = ::JSON.parse(payload)
Expand All @@ -24,7 +29,7 @@ def input_to_output(data, kwargs)
::JSON.parse(data, **(kwargs || {}))
end

def precompile(path, cache_dir: self.cache_dir)
def precompile(path)
Bootsnap::CompileCache::Native.precompile(
cache_dir,
path.to_s,
Expand Down