From 89158d4d1c08bfa45d174c07eb75c0faa24dd7e8 Mon Sep 17 00:00:00 2001 From: Jean Boussier Date: Thu, 13 Jan 2022 12:51:39 +0100 Subject: [PATCH] Handle YAML.load_file on Psych 4 --- ext/bootsnap/bootsnap.c | 55 ++++-- lib/bootsnap/compile_cache/yaml.rb | 266 +++++++++++++++++++------- test/compile_cache/yaml_test.rb | 76 ++++++-- test/compile_cache_key_format_test.rb | 2 +- 4 files changed, 296 insertions(+), 103 deletions(-) diff --git a/ext/bootsnap/bootsnap.c b/ext/bootsnap/bootsnap.c index fc2279c4..3bf2bc22 100644 --- a/ext/bootsnap/bootsnap.c +++ b/ext/bootsnap/bootsnap.c @@ -75,7 +75,7 @@ struct bs_cache_key { STATIC_ASSERT(sizeof(struct bs_cache_key) == KEY_SIZE); /* Effectively a schema version. Bumping invalidates all previous caches */ -static const uint32_t current_version = 3; +static const uint32_t current_version = 4; /* hash of e.g. "x86_64-darwin17", invalidating when ruby is recompiled on a * new OS ABI, etc. */ @@ -426,6 +426,7 @@ open_current_file(char * path, struct bs_cache_key * key, const char ** errno_pr #define ERROR_WITH_ERRNO -1 #define CACHE_MISS -2 #define CACHE_STALE -3 +#define CACHE_UNCOMPILABLE -4 /* * Read the cache key from the given fd, which must have position 0 (e.g. @@ -507,14 +508,14 @@ fetch_cached_data(int fd, ssize_t data_size, VALUE handler, VALUE args, VALUE * if (data_size > 100000000000) { *errno_provenance = "bs_fetch:fetch_cached_data:datasize"; errno = EINVAL; /* because wtf? */ - ret = -1; + ret = ERROR_WITH_ERRNO; goto done; } data = ALLOC_N(char, data_size); nread = read(fd, data, data_size); if (nread < 0) { *errno_provenance = "bs_fetch:fetch_cached_data:read"; - ret = -1; + ret = ERROR_WITH_ERRNO; goto done; } if (nread != data_size) { @@ -525,6 +526,10 @@ fetch_cached_data(int fd, ssize_t data_size, VALUE handler, VALUE args, VALUE * storage_data = rb_str_new(data, data_size); *exception_tag = bs_storage_to_output(handler, args, storage_data, output_data); + if (*output_data == uncompilable) { + ret = CACHE_UNCOMPILABLE; + goto done; + } ret = 0; done: if (data != NULL) xfree(data); @@ -737,7 +742,14 @@ bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args &output_data, &exception_tag, &errno_provenance ); if (exception_tag != 0) goto raise; - else if (res == CACHE_MISS || res == CACHE_STALE) valid_cache = 0; + else if (res == CACHE_UNCOMPILABLE) { + /* If fetch_cached_data returned `Uncompilable` we fallback to `input_to_output` + This happens if we have say, an unsafe YAML cache, but try to load it in safe mode */ + if (bs_read_contents(current_fd, current_key.size, &contents, &errno_provenance) < 0) goto fail_errno; + input_data = rb_str_new(contents, current_key.size); + bs_input_to_output(handler, args, input_data, &output_data, &exception_tag); + if (exception_tag != 0) goto raise; + } else if (res == CACHE_MISS || res == CACHE_STALE) valid_cache = 0; else if (res == ERROR_WITH_ERRNO) goto fail_errno; else if (!NIL_P(output_data)) goto succeed; /* fast-path, goal */ } @@ -772,9 +784,13 @@ bs_fetch(char * path, VALUE path_v, char * cache_path, VALUE handler, VALUE args exception_tag = bs_storage_to_output(handler, args, storage_data, &output_data); if (exception_tag != 0) goto raise; - /* If output_data is nil, delete the cache entry and generate the output - * using input_to_output */ - if (NIL_P(output_data)) { + if (output_data == uncompilable) { + /* If storage_to_output returned `Uncompilable` we fallback to `input_to_output` */ + bs_input_to_output(handler, args, input_data, &output_data, &exception_tag); + if (exception_tag != 0) goto raise; + } else if (NIL_P(output_data)) { + /* If output_data is nil, delete the cache entry and generate the output + * using input_to_output */ if (unlink(cache_path) < 0) { errno_provenance = "bs_fetch:unlink"; goto fail_errno; @@ -919,12 +935,27 @@ struct i2s_data { }; static VALUE -prot_storage_to_output(VALUE arg) +rescue_uncompilable(VALUE arg, VALUE e) +{ + return uncompilable; +} + +static VALUE +try_storage_to_output(VALUE arg) { struct s2o_data * data = (struct s2o_data *)arg; return rb_funcall(data->handler, rb_intern("storage_to_output"), 2, data->storage_data, data->args); } +static VALUE +prot_storage_to_output(VALUE arg) +{ + return rb_rescue2( + try_storage_to_output, arg, + rescue_uncompilable, Qnil, + rb_eBootsnap_CompileCache_Uncompilable, 0); +} + static int bs_storage_to_output(VALUE handler, VALUE args, VALUE storage_data, VALUE * output_data) { @@ -963,19 +994,13 @@ try_input_to_storage(VALUE arg) return rb_funcall(data->handler, rb_intern("input_to_storage"), 2, data->input_data, data->pathval); } -static VALUE -rescue_input_to_storage(VALUE arg, VALUE e) -{ - return uncompilable; -} - static VALUE prot_input_to_storage(VALUE arg) { struct i2s_data * data = (struct i2s_data *)arg; return rb_rescue2( try_input_to_storage, (VALUE)data, - rescue_input_to_storage, Qnil, + rescue_uncompilable, Qnil, rb_eBootsnap_CompileCache_Uncompilable, 0); } diff --git a/lib/bootsnap/compile_cache/yaml.rb b/lib/bootsnap/compile_cache/yaml.rb index 1088e924..71fb588f 100644 --- a/lib/bootsnap/compile_cache/yaml.rb +++ b/lib/bootsnap/compile_cache/yaml.rb @@ -7,50 +7,20 @@ module CompileCache module YAML class << self attr_accessor(:msgpack_factory, :cache_dir, :supported_options) - - def input_to_storage(contents, _) - obj = strict_load(contents) - msgpack_factory.dump(obj) - rescue NoMethodError, RangeError - # The object included things that we can't serialize - raise(Uncompilable) - end - - def storage_to_output(data, kwargs) - if kwargs&.key?(:symbolize_names) - kwargs[:symbolize_keys] = kwargs.delete(:symbolize_names) - end - msgpack_factory.load(data, kwargs) - end - - def input_to_output(data, kwargs) - if ::YAML.respond_to?(:unsafe_load) - ::YAML.unsafe_load(data, **(kwargs || {})) - else - ::YAML.load(data, **(kwargs || {})) - end - end - - def strict_load(payload, *args) - ast = ::YAML.parse(payload) - return ast unless ast - - strict_visitor.create(*args).visit(ast) - end - ruby2_keywords :strict_load if respond_to?(:ruby2_keywords, true) + attr_reader(:implementation) def precompile(path, cache_dir: YAML.cache_dir) Bootsnap::CompileCache::Native.precompile( cache_dir, path.to_s, - Bootsnap::CompileCache::YAML, + @implementation, ) end def install!(cache_dir) self.cache_dir = cache_dir init! - ::YAML.singleton_class.prepend(Patch) + ::YAML.singleton_class.prepend(@implementation::Patch) end def init! @@ -58,11 +28,9 @@ def init! require("msgpack") require("date") - if Patch.method_defined?(:unsafe_load_file) && !::YAML.respond_to?(:unsafe_load_file) - Patch.send(:remove_method, :unsafe_load_file) - end - if Patch.method_defined?(:load_file) && ::YAML::VERSION >= "4" - Patch.send(:remove_method, :load_file) + @implementation = ::YAML::VERSION >= "4" ? Psych4 : Psych3 + if @implementation::Patch.method_defined?(:unsafe_load_file) && !::YAML.respond_to?(:unsafe_load_file) + @implementation::Patch.send(:remove_method, :unsafe_load_file) end # MessagePack serializes symbols as strings by default. @@ -106,6 +74,17 @@ def init! supported_options.freeze end + def patch + @implementation::Patch + end + + def strict_load(payload) + ast = ::YAML.parse(payload) + return ast unless ast + + strict_visitor.create.visit(ast) + end + def strict_visitor self::NoTagsVisitor ||= Class.new(Psych::Visitors::ToRuby) do def visit(target) @@ -119,50 +98,199 @@ def visit(target) end end - module Patch - def load_file(path, *args) - return super if args.size > 1 + module Psych4 + extend self + + def input_to_storage(contents, _) + SafeLoad.input_to_storage(contents, nil) + rescue Uncompilable + UnsafeLoad.input_to_storage(contents, nil) + end + + module UnsafeLoad + extend self - if (kwargs = args.first) - return super unless kwargs.is_a?(Hash) - return super unless (kwargs.keys - ::Bootsnap::CompileCache::YAML.supported_options).empty? + def input_to_storage(contents, _) + obj = CompileCache::YAML.strict_load(contents) + packer = CompileCache::YAML.msgpack_factory.packer + packer.pack(false) # not safe loaded + packer.pack(obj) + packer.to_s + rescue NoMethodError, RangeError + # The object included things that we can't serialize + raise(Uncompilable) end - begin - ::Bootsnap::CompileCache::Native.fetch( - Bootsnap::CompileCache::YAML.cache_dir, - File.realpath(path), - ::Bootsnap::CompileCache::YAML, - kwargs, - ) - rescue Errno::EACCES - ::Bootsnap::CompileCache.permission_error(path) + def storage_to_output(data, kwargs) + if kwargs&.key?(:symbolize_names) + kwargs[:symbolize_keys] = kwargs.delete(:symbolize_names) + end + + unpacker = CompileCache::YAML.msgpack_factory.unpacker(kwargs) + unpacker.feed(data) + _safe_loaded = unpacker.unpack + unpacker.unpack + end + + def input_to_output(data, kwargs) + ::YAML.unsafe_load(data, **(kwargs || {})) end end - ruby2_keywords :load_file if respond_to?(:ruby2_keywords, true) + module SafeLoad + extend self - def unsafe_load_file(path, *args) - return super if args.size > 1 + def input_to_storage(contents, _) + obj = ::YAML.load(contents) + packer = CompileCache::YAML.msgpack_factory.packer + packer.pack(true) # safe loaded + packer.pack(obj) + packer.to_s + rescue NoMethodError, RangeError, Psych::DisallowedClass, Psych::BadAlias + # The object included things that we can't serialize + raise(Uncompilable) + end - if (kwargs = args.first) - return super unless kwargs.is_a?(Hash) - return super unless (kwargs.keys - ::Bootsnap::CompileCache::YAML.supported_options).empty? + def storage_to_output(data, kwargs) + if kwargs&.key?(:symbolize_names) + kwargs[:symbolize_keys] = kwargs.delete(:symbolize_names) + end + + unpacker = CompileCache::YAML.msgpack_factory.unpacker(kwargs) + unpacker.feed(data) + safe_loaded = unpacker.unpack + if safe_loaded + unpacker.unpack + else + raise(Uncompilable) + end end - begin - ::Bootsnap::CompileCache::Native.fetch( - Bootsnap::CompileCache::YAML.cache_dir, - File.realpath(path), - ::Bootsnap::CompileCache::YAML, - kwargs, - ) - rescue Errno::EACCES - ::Bootsnap::CompileCache.permission_error(path) + def input_to_output(data, kwargs) + ::YAML.load(data, **(kwargs || {})) end end - ruby2_keywords :unsafe_load_file if respond_to?(:ruby2_keywords, true) + module Patch + def load_file(path, *args) + return super if args.size > 1 + + if (kwargs = args.first) + return super unless kwargs.is_a?(Hash) + return super unless (kwargs.keys - ::Bootsnap::CompileCache::YAML.supported_options).empty? + end + + begin + ::Bootsnap::CompileCache::Native.fetch( + Bootsnap::CompileCache::YAML.cache_dir, + File.realpath(path), + ::Bootsnap::CompileCache::YAML::Psych4::SafeLoad, + kwargs, + ) + rescue Errno::EACCES + ::Bootsnap::CompileCache.permission_error(path) + end + end + + ruby2_keywords :load_file if respond_to?(:ruby2_keywords, true) + + def unsafe_load_file(path, *args) + return super if args.size > 1 + + if (kwargs = args.first) + return super unless kwargs.is_a?(Hash) + return super unless (kwargs.keys - ::Bootsnap::CompileCache::YAML.supported_options).empty? + end + + begin + ::Bootsnap::CompileCache::Native.fetch( + Bootsnap::CompileCache::YAML.cache_dir, + File.realpath(path), + ::Bootsnap::CompileCache::YAML::Psych4::UnsafeLoad, + kwargs, + ) + rescue Errno::EACCES + ::Bootsnap::CompileCache.permission_error(path) + end + end + + ruby2_keywords :unsafe_load_file if respond_to?(:ruby2_keywords, true) + end + end + + module Psych3 + extend self + + def input_to_storage(contents, _) + obj = CompileCache::YAML.strict_load(contents) + packer = CompileCache::YAML.msgpack_factory.packer + packer.pack(false) # not safe loaded + packer.pack(obj) + packer.to_s + rescue NoMethodError, RangeError + # The object included things that we can't serialize + raise(Uncompilable) + end + + def storage_to_output(data, kwargs) + if kwargs&.key?(:symbolize_names) + kwargs[:symbolize_keys] = kwargs.delete(:symbolize_names) + end + unpacker = CompileCache::YAML.msgpack_factory.unpacker(kwargs) + unpacker.feed(data) + _safe_loaded = unpacker.unpack + unpacker.unpack + end + + def input_to_output(data, kwargs) + ::YAML.load(data, **(kwargs || {})) + end + + module Patch + def load_file(path, *args) + return super if args.size > 1 + + if (kwargs = args.first) + return super unless kwargs.is_a?(Hash) + return super unless (kwargs.keys - ::Bootsnap::CompileCache::YAML.supported_options).empty? + end + + begin + ::Bootsnap::CompileCache::Native.fetch( + Bootsnap::CompileCache::YAML.cache_dir, + File.realpath(path), + ::Bootsnap::CompileCache::YAML::Psych3, + kwargs, + ) + rescue Errno::EACCES + ::Bootsnap::CompileCache.permission_error(path) + end + end + + ruby2_keywords :load_file if respond_to?(:ruby2_keywords, true) + + def unsafe_load_file(path, *args) + return super if args.size > 1 + + if (kwargs = args.first) + return super unless kwargs.is_a?(Hash) + return super unless (kwargs.keys - ::Bootsnap::CompileCache::YAML.supported_options).empty? + end + + begin + ::Bootsnap::CompileCache::Native.fetch( + Bootsnap::CompileCache::YAML.cache_dir, + File.realpath(path), + ::Bootsnap::CompileCache::YAML::Psych3, + kwargs, + ) + rescue Errno::EACCES + ::Bootsnap::CompileCache.permission_error(path) + end + end + + ruby2_keywords :unsafe_load_file if respond_to?(:ruby2_keywords, true) + end end end end diff --git a/test/compile_cache/yaml_test.rb b/test/compile_cache/yaml_test.rb index f9b6d6fd..734f1c45 100644 --- a/test/compile_cache/yaml_test.rb +++ b/test/compile_cache/yaml_test.rb @@ -21,7 +21,7 @@ def unsafe_load_file(_path, symbolize_names: false, freeze: false, fallback: nil def setup super Bootsnap::CompileCache::YAML.init! - FakeYaml.singleton_class.prepend(Bootsnap::CompileCache::YAML::Patch) + FakeYaml.singleton_class.prepend(Bootsnap::CompileCache::YAML.patch) end def test_yaml_strict_load @@ -37,19 +37,6 @@ def test_yaml_strict_load assert_equal expected, document end - def test_yaml_input_to_output - document = ::Bootsnap::CompileCache::YAML.input_to_output(<<~YAML, {}) - --- - :foo: 42 - bar: [1] - YAML - expected = { - foo: 42, - "bar" => [1], - } - assert_equal expected, document - end - def test_yaml_tags error = assert_raises Bootsnap::CompileCache::Uncompilable do ::Bootsnap::CompileCache::YAML.strict_load("!many Boolean") @@ -63,15 +50,68 @@ def test_yaml_tags end if YAML::VERSION >= "4" - def test_load_psych_4 - # Until we figure out a proper strategy, only `YAML.unsafe_load_file` - # is cached with Psych >= 4 + def test_load_psych_4_with_alias Help.set_file("a.yml", "foo: &foo\n bar: 42\nplop:\n <<: *foo", 100) - assert_raises FakeYaml::Fallback do + + foo = {"bar" => 42} + expected = {"foo" => foo, "plop" => foo} + assert_equal(expected, FakeYaml.unsafe_load_file("a.yml")) + + assert_raises Psych::BadAlias do FakeYaml.load_file("a.yml") end end + + def test_load_psych_4_with_unsafe_class + Help.set_file("a.yml", "---\nfoo: !ruby/regexp /bar/\n", 100) + + expected = {"foo" => /bar/} + assert_equal(expected, FakeYaml.unsafe_load_file("a.yml")) + + assert_raises Psych::DisallowedClass do + FakeYaml.load_file("a.yml") + end + end + + def test_yaml_input_to_output_safe + document = ::Bootsnap::CompileCache::YAML::Psych4::SafeLoad.input_to_output(<<~YAML, {}) + --- + :foo: 42 + bar: [1] + YAML + expected = { + foo: 42, + "bar" => [1], + } + assert_equal expected, document + end + + def test_yaml_input_to_output_unsafe + document = ::Bootsnap::CompileCache::YAML::Psych4::UnsafeLoad.input_to_output(<<~YAML, {}) + --- + :foo: 42 + bar: [1] + YAML + expected = { + foo: 42, + "bar" => [1], + } + assert_equal expected, document + end else + def test_yaml_input_to_output + document = ::Bootsnap::CompileCache::YAML::Psych3.input_to_output(<<~YAML, {}) + --- + :foo: 42 + bar: [1] + YAML + expected = { + foo: 42, + "bar" => [1], + } + assert_equal expected, document + end + def test_load_file Help.set_file("a.yml", "---\nfoo: bar", 100) assert_equal({"foo" => "bar"}, FakeYaml.load_file("a.yml")) diff --git a/test/compile_cache_key_format_test.rb b/test/compile_cache_key_format_test.rb index 522475ee..bf7f2e3c 100644 --- a/test/compile_cache_key_format_test.rb +++ b/test/compile_cache_key_format_test.rb @@ -21,7 +21,7 @@ class CompileCacheKeyFormatTest < Minitest::Test def test_key_version key = cache_key_for_file(FILE) - exp = [3].pack("L") + exp = [4].pack("L") assert_equal(exp, key[R[:version]]) end