Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Externalize compilation cache by compilers #747

Merged
merged 18 commits into from Aug 18, 2022
31 changes: 31 additions & 0 deletions experimental/compilation_cache.go
@@ -0,0 +1,31 @@
package experimental

import (
"context"

"github.com/tetratelabs/wazero/internal/compilationcache"
)

// WithCompilationCacheDirName configures the destination directory of the compilation cache.
// Regardless of the usage of this, the compiled functions are cached in memory, but its lifetime is
// bound to the lifetime of wazero.Runtime or wazero.CompiledModule.
//
// With the given non-empty directory, wazero persists the cache into the directory and that cache
// will be used as long as the running wazero version match the version of compilation wazero.
//
// A cache is only valid for use in one wazero.Runtime at a time. Concurrent use
// of a wazero.Runtime is supported, but multiple runtimes must not share the
// same directory.
//
// Note: The embedder must safeguard this directory from external changes.
//
// Usage:
//
// ctx := experimental.WithCompilationCacheDirName(context.Background(), "/home/me/.cache/wazero")
// r := wazero.NewRuntimeWithConfig(ctx, wazero.NewRuntimeConfigCompiler())
func WithCompilationCacheDirName(ctx context.Context, dirname string) context.Context {
if len(dirname) != 0 {
ctx = context.WithValue(ctx, compilationcache.FileCachePathKey{}, dirname)
}
return ctx
}
42 changes: 42 additions & 0 deletions internal/compilationcache/compilationcache.go
@@ -0,0 +1,42 @@
package compilationcache

import (
"crypto/sha256"
"io"
)

// Cache allows the compiler engine to skip compilation of wasm to machine code
// where doing so is redundant for the same wasm binary and version of wazero.
//
// This augments the default in-memory cache of compiled functions, by
// decoupling it from a wazero.Runtime instance. Concretely, a runtime loses
// its cache once closed. This cache allows the runtime to rebuild its
// in-memory cache quicker, significantly reducing first-hit penalty on a hit.
//
// See NewFileCache for the example implementation.
type Cache interface {
// Get is called when the runtime is trying to get the cached compiled functions.
// Implementations are supposed to return compiled function in io.Reader with ok=true
// if the key exists on the cache. In the case of not-found, this should return
// ok=false with err=nil. content.Close() is automatically called by
// the caller of this Get.
//
// Note: the returned content won't go through the validation pass of Wasm binary
// which is applied when the binary is compiled from scratch without cache hit.
Get(key Key) (content io.ReadCloser, ok bool, err error)
//
// Add is called when the runtime is trying to add the new cache entry.
mathetake marked this conversation as resolved.
Show resolved Hide resolved
// The given `content` must be un-modified, and returned as-is in Get method.
//
// Note: the `content` is ensured to be safe through the validation phase applied on the Wasm binary.
Add(key Key, content io.Reader) (err error)
//
// Delete is called when the cache on the `key` returned by Get is no longer usable, and
mathetake marked this conversation as resolved.
Show resolved Hide resolved
// must be purged. Specifically, this is called happens when the wazero's version has been changed.
// For example, that is when there's a difference between the version of compiling wazero and the
// version of the currently used wazero.
Delete(key Key) (err error)
}

// Key represents the 256-bit unique identifier assigned to each cache entry.
type Key = [sha256.Size]byte
99 changes: 99 additions & 0 deletions internal/compilationcache/file_cache.go
@@ -0,0 +1,99 @@
package compilationcache

import (
"context"
"encoding/hex"
"errors"
"io"
"os"
"path"
"sync"
)

// FileCachePathKey is a context.Context Value key. Its value is a string
// representing the compilation cache directory.
type FileCachePathKey struct{}

// NewFileCache returns a new Cache implemented by fileCache.
func NewFileCache(ctx context.Context) Cache {
if fsValue := ctx.Value(FileCachePathKey{}); fsValue != nil {
return newFileCache(fsValue.(string))
}
return nil
}

func newFileCache(dir string) *fileCache {
return &fileCache{dirPath: dir}
}

// fileCache persists compiled functions into dirPath.
//
// Note: this can be expanded to do binary signing/verification, set TTL on each entry, etc.
type fileCache struct {
dirPath string
mux sync.RWMutex
}

type fileReadCloser struct {
*os.File
fc *fileCache
}

func (fc *fileCache) path(key Key) string {
return path.Join(fc.dirPath, hex.EncodeToString(key[:]))
}

func (fc *fileCache) Get(key Key) (content io.ReadCloser, ok bool, err error) {
// TODO: take lock per key for more efficiency vs the complexity of impl.
fc.mux.RLock()
unlock := fc.mux.RUnlock
defer func() {
if unlock != nil {
unlock()
}
}()

f, err := os.Open(fc.path(key))
if errors.Is(err, os.ErrNotExist) {
return nil, false, nil
} else if err != nil {
return nil, false, err
} else {
// Unlock is done inside the content.Close() at the call site.
unlock = nil
return &fileReadCloser{File: f, fc: fc}, true, nil
}
}

// Close wraps the os.File Close to release the read lock on fileCache.
func (f *fileReadCloser) Close() (err error) {
defer f.fc.mux.RUnlock()
err = f.File.Close()
return
}

func (fc *fileCache) Add(key Key, content io.Reader) (err error) {
// TODO: take lock per key for more efficiency vs the complexity of impl.
fc.mux.Lock()
defer fc.mux.Unlock()

file, err := os.Create(fc.path(key))
if err != nil {
return
}
defer file.Close()
_, err = io.Copy(file, content)
return
}

func (fc *fileCache) Delete(key Key) (err error) {
// TODO: take lock per key for more efficiency vs the complexity of impl.
fc.mux.Lock()
defer fc.mux.Unlock()

err = os.Remove(fc.path(key))
if errors.Is(err, os.ErrNotExist) {
err = nil
}
return
}
135 changes: 135 additions & 0 deletions internal/compilationcache/file_cache_test.go
@@ -0,0 +1,135 @@
package compilationcache

import (
"bytes"
"io"
"os"
"testing"

"github.com/tetratelabs/wazero/internal/testing/require"
)

func TestFileReadCloser_Close(t *testing.T) {
fc := newFileCache(t.TempDir())
key := Key{1, 2, 3}

err := fc.Add(key, bytes.NewReader([]byte{1, 2, 3, 4}))
require.NoError(t, err)

c, ok, err := fc.Get(key)
require.NoError(t, err)
require.True(t, ok)

// At this point, file is not closed, therefore TryLock should fail.
require.False(t, fc.mux.TryLock())

// Close, and then TryLock should succeed this time.
require.NoError(t, c.Close())
require.True(t, fc.mux.TryLock())
}

func TestFileCache_Add(t *testing.T) {
fc := newFileCache(t.TempDir())

t.Run("not exist", func(t *testing.T) {
content := []byte{1, 2, 3, 4, 5}
id := Key{1, 2, 3, 4, 5, 6, 7}
err := fc.Add(id, bytes.NewReader(content))
require.NoError(t, err)

// Ensures that file exists.
cached, err := os.ReadFile(fc.path(id))
require.NoError(t, err)

// Check if the saved content is the same as the given one.
require.Equal(t, content, cached)
})

t.Run("already exists", func(t *testing.T) {
content := []byte{1, 2, 3, 4, 5}

id := Key{1, 2, 3}

// Writes the pre-existing file for the same ID.
p := fc.path(id)
f, err := os.Create(p)
require.NoError(t, err)
_, err = f.Write(content)
require.NoError(t, err)
require.NoError(t, f.Close())

err = fc.Add(id, bytes.NewReader(content))
require.NoError(t, err)

// Ensures that file exists.
cached, err := os.ReadFile(fc.path(id))
require.NoError(t, err)

// Check if the saved content is the same as the given one.
require.Equal(t, content, cached)
})
}

func TestFileCache_Delete(t *testing.T) {
fc := newFileCache(t.TempDir())
t.Run("non-exist", func(t *testing.T) {
id := Key{0}
err := fc.Delete(id)
require.NoError(t, err)
})
t.Run("exist", func(t *testing.T) {
id := Key{1, 2, 3}
p := fc.path(id)
f, err := os.Create(p)
require.NoError(t, err)
require.NoError(t, f.Close())

// Ensures that file exists now.
f, err = os.Open(p)
require.NoError(t, err)
require.NoError(t, f.Close())

// Delete the cache.
err = fc.Delete(id)
require.NoError(t, err)

// Ensures that file no longer exists.
_, err = os.Open(p)
require.ErrorIs(t, err, os.ErrNotExist)
})
}

func TestFileCache_Get(t *testing.T) {
fc := newFileCache(t.TempDir())

t.Run("exist", func(t *testing.T) {
content := []byte{1, 2, 3, 4, 5}
id := Key{1, 2, 3}

// Writes the pre-existing file for the ID.
p := fc.path(id)
f, err := os.Create(p)
require.NoError(t, err)
_, err = f.Write(content)
require.NoError(t, err)
require.NoError(t, f.Close())

result, ok, err := fc.Get(id)
require.NoError(t, err)
require.True(t, ok)
defer func() {
require.NoError(t, result.Close())
}()

actual, err := io.ReadAll(result)
require.NoError(t, err)

require.Equal(t, content, actual)
})
t.Run("not exist", func(t *testing.T) {
_, ok, err := fc.Get(Key{0xf})
// Non-exist should not be error.
require.NoError(t, err)
require.False(t, ok)
})
}