Skip to content

Commit

Permalink
YAML compile cache: encoding aware symbols
Browse files Browse the repository at this point in the history
Ref: msgpack/msgpack-ruby#211

The default msgpack Symbol packer/unpacker is not encoding
aware which cause all non-ASCII symbols to be unpacked with
ASCII-8BIT encoding aka BINARY.

So we define a custom packer that prefix the symbol name with the encoding
index.

An alternative could be to simply assume non-ASCII symbols are UTF-8,
but it wouldn't work for people with non UTF-8 source files.
  • Loading branch information
byroot committed Jan 28, 2022
1 parent e3ef615 commit 70a2fa2
Show file tree
Hide file tree
Showing 4 changed files with 41 additions and 3 deletions.
2 changes: 1 addition & 1 deletion ext/bootsnap/bootsnap.c
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,7 @@ struct bs_cache_key {
STATIC_ASSERT(sizeof(struct bs_cache_key) == KEY_SIZE);

/* Effectively a schema version. Bumping invalidates all previous caches */
static const uint32_t current_version = 4;
static const uint32_t current_version = 5;

/* hash of e.g. "x86_64-darwin17", invalidating when ruby is recompiled on a
* new OS ABI, etc. */
Expand Down
34 changes: 33 additions & 1 deletion lib/bootsnap/compile_cache/yaml.rb
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,33 @@ def install!(cache_dir)
::YAML.singleton_class.prepend(@implementation::Patch)
end

module EncodingAwareSymbols
extend self

ENCODINGS = Encoding.list.freeze
ENCODINGS_INDEX = {}.compare_by_identity
ENCODINGS.each_with_index do |enc, index|
ENCODINGS_INDEX[enc] = -(index.chr.force_encoding(Encoding::BINARY))
end

if Symbol.method_defined?(:name)
def pack(symbol)
ENCODINGS_INDEX[symbol.encoding].dup << symbol.name
end
else
def pack(symbol)
ENCODINGS_INDEX[symbol.encoding].dup << symbol.to_s
end
end

def unpack(payload)
payload.freeze
string = payload.byteslice(1..-1)
string.force_encoding(ENCODINGS[payload.ord])
string.to_sym
end
end

def init!
require("yaml")
require("msgpack")
Expand All @@ -43,7 +70,12 @@ def init!
# We want them to roundtrip cleanly, so we use a custom factory.
# see: https://github.com/msgpack/msgpack-ruby/pull/122
factory = MessagePack::Factory.new
factory.register_type(0x00, Symbol)
factory.register_type(
0x00,
Symbol,
packer: EncodingAwareSymbols.method(:pack).to_proc,
unpacker: EncodingAwareSymbols.method(:unpack).to_proc,
)

if defined? MessagePack::Timestamp
factory.register_type(
Expand Down
6 changes: 6 additions & 0 deletions test/compile_cache/yaml_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,12 @@ def test_yaml_tags
assert_equal "YAML tags are not supported: !ruby/object", error.message
end

def test_symbols_encoding
symbols = [:ascii, :utf8_fée]
Help.set_file("a.yml", YAML.dump(symbols), 100)
assert_equal(symbols, FakeYaml.load_file("a.yml"))
end

if YAML::VERSION >= "4"
def test_load_psych_4_with_alias
Help.set_file("a.yml", "foo: &foo\n bar: 42\nplop:\n <<: *foo", 100)
Expand Down
2 changes: 1 addition & 1 deletion test/compile_cache_key_format_test.rb
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ class CompileCacheKeyFormatTest < Minitest::Test

def test_key_version
key = cache_key_for_file(FILE)
exp = [4].pack("L")
exp = [5].pack("L")
assert_equal(exp, key[R[:version]])
end

Expand Down

0 comments on commit 70a2fa2

Please sign in to comment.