Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Externalize compilation cache by compilers #747

Merged
merged 18 commits into from
Aug 18, 2022
31 changes: 31 additions & 0 deletions experimental/compilation_cache.go
@@ -0,0 +1,31 @@
package experimental

import (
"context"

"github.com/tetratelabs/wazero/internal/compilationcache"
)

// WithCompilationCacheDirName configures the destination directory of the compilation cache.
// Regardless of the usage of this, the compiled functions are cached in memory, but its lifetime is
// bound to the lifetime of wazero.Runtime or wazero.CompiledModule.
//
// With the given non-empty directory, wazero persists the cache into the directory and that cache
// will be used as long as the running wazero version match the version of compilation wazero.
//
// A cache is only valid for use in one wazero.Runtime at a time. Concurrent use
// of a wazero.Runtime is supported, but multiple runtimes must not share the
// same directory.
//
// Note: The embedder must safeguard this directory from external changes.
//
// Usage:
//
// ctx := experimental.WithCompilationCacheDirName(context.Background(), "/home/me/.cache/wazero")
// r := wazero.NewRuntimeWithConfig(ctx, wazero.NewRuntimeConfigCompiler())
func WithCompilationCacheDirName(ctx context.Context, dirname string) context.Context {
if len(dirname) != 0 {
ctx = context.WithValue(ctx, compilationcache.FileCachePathKey{}, dirname)
}
return ctx
}
46 changes: 46 additions & 0 deletions internal/compilationcache/compilationcache.go
@@ -0,0 +1,46 @@
package compilationcache

import (
"crypto/sha256"
"io"
)

// Cache is the interface for compilation caches. Internally, the cache
mathetake marked this conversation as resolved.
Show resolved Hide resolved
// here means that the compiled binary cache by compilers. Regardless of the usage of
// ExternCache, the compiled functions are cached in memory, but its lifetime is
// bound to the lifetime of wazero.Runtime or wazero.CompiledModule.
// Usually, the compilation of Wasm binary is time-consuming. Therefore, you might
// want to cache the compilation result across the processes of wazero users.
//
// See NewFileCache for the example implementation.
type Cache interface {
// Get is called when the runtime is trying to get the cached content.
// Implementations are supposed to return `content` which can be used to
mathetake marked this conversation as resolved.
Show resolved Hide resolved
// read the content passed by Add as-is. Returns ok=true if the
// content was found on the cache. That means the content is not empty
// if and only if ok=true. In the case of not-found, this should return
// ok=false with err=nil. content.Close() is automatically called by
// the caller of this Get.
//
// Note: the returned content won't go through the validation pass of Wasm binary
// which is applied when the binary is compiled from scratch without cache hit.
// Its implication is that the implementors of ExternCache might want to have
mathetake marked this conversation as resolved.
Show resolved Hide resolved
// their own validation phases. For example, sign the binary passed to Add, and
mathetake marked this conversation as resolved.
Show resolved Hide resolved
// verify the signature of the stored cache before returning it via Get, etc.
Get(key Key) (content io.ReadCloser, ok bool, err error)
//
// Add is called when the runtime is trying to add the new cache entry.
mathetake marked this conversation as resolved.
Show resolved Hide resolved
// The given `content` must be un-modified, and returned as-is in Get method.
//
// Note: the `content` is ensured to be safe through the validation phase applied on the Wasm binary.
Add(key Key, content io.Reader) (err error)
//
// Delete is called when the cache on the `key` returned by Get is no longer usable, and
mathetake marked this conversation as resolved.
Show resolved Hide resolved
// must be purged. Specifically, this is called happens when the wazero's version has been changed.
// For example, that is when there's a difference between the version of compiling wazero and the
// version of the currently used wazero.
Delete(key Key) (err error)
}

// Key represents the 256-bit unique identifier assigned to each cache content.
mathetake marked this conversation as resolved.
Show resolved Hide resolved
type Key = [sha256.Size]byte
92 changes: 92 additions & 0 deletions internal/compilationcache/file_cache.go
@@ -0,0 +1,92 @@
package compilationcache

import (
"context"
"encoding/hex"
"errors"
"io"
"os"
"path"
"sync"
)

// FileCachePathKey is a context.Context Value key. It allows overriding fs.FS for WASI.
mathetake marked this conversation as resolved.
Show resolved Hide resolved
type FileCachePathKey struct{}

// NewFileCache returns a new Cache implemented by fileCache.
func NewFileCache(ctx context.Context) Cache {
if fsValue := ctx.Value(FileCachePathKey{}); fsValue != nil {
return newFileCache(fsValue.(string))
}
return nil
}

func newFileCache(dir string) *fileCache {
return &fileCache{dirPath: dir}
}

// fileCache is an example implementation of Cache which writes/reads cache into/from the fileCache.dirPath.
mathetake marked this conversation as resolved.
Show resolved Hide resolved
mathetake marked this conversation as resolved.
Show resolved Hide resolved
//
// Note: this can be expanded to do binary signing/verification, set TTL on each entry, etc.
type fileCache struct {
dirPath string
mux sync.RWMutex
}

type fileReadCloser struct {
*os.File
fc *fileCache
}

func (fc *fileCache) path(key Key) string {
return path.Join(fc.dirPath, hex.EncodeToString(key[:]))
}

func (fc *fileCache) Get(key Key) (content io.ReadCloser, ok bool, err error) {
// TODO: take lock per key for more efficiency vs the complexity of impl.
fc.mux.RLock()
f, err := os.Open(fc.path(key))
if errors.Is(err, os.ErrNotExist) {
fc.mux.RUnlock()
return nil, false, nil
} else if err != nil {
fc.mux.RUnlock()
return nil, false, err
} else {
// Unlock is done inside the content.Close() at the call site.
return &fileReadCloser{File: f, fc: fc}, true, nil
}
}

// Close wraps the os.File Close to release the read lock on fileCache.
func (f *fileReadCloser) Close() (err error) {
defer f.fc.mux.RUnlock()
err = f.File.Close()
return
}

func (fc *fileCache) Add(key Key, content io.Reader) (err error) {
// TODO: take lock per key for more efficiency vs the complexity of impl.
fc.mux.Lock()
defer fc.mux.Unlock()

file, err := os.Create(fc.path(key))
if err != nil {
return
}
defer file.Close()
_, err = io.Copy(file, content)
return
}

func (fc *fileCache) Delete(key Key) (err error) {
// TODO: take lock per key for more efficiency vs the complexity of impl.
fc.mux.Lock()
defer fc.mux.Unlock()

err = os.Remove(fc.path(key))
if errors.Is(err, os.ErrNotExist) {
err = nil
}
return
}
135 changes: 135 additions & 0 deletions internal/compilationcache/file_cache_test.go
@@ -0,0 +1,135 @@
package compilationcache

import (
"bytes"
"io"
"os"
"testing"

"github.com/tetratelabs/wazero/internal/testing/require"
)

func TestFileReadCloser_Close(t *testing.T) {
fc := newFileCache(t.TempDir())
key := Key{1, 2, 3}

err := fc.Add(key, bytes.NewReader([]byte{1, 2, 3, 4}))
require.NoError(t, err)

c, ok, err := fc.Get(key)
require.NoError(t, err)
require.True(t, ok)

// At this point, file is not closed, therefore TryLock should fail.
require.False(t, fc.mux.TryLock())

// Close, and then TryLock should succeed this time.
require.NoError(t, c.Close())
require.True(t, fc.mux.TryLock())
}

func TestFileCache_Add(t *testing.T) {
fc := newFileCache(t.TempDir())

t.Run("not exist", func(t *testing.T) {
content := []byte{1, 2, 3, 4, 5}
id := Key{1, 2, 3, 4, 5, 6, 7}
err := fc.Add(id, bytes.NewReader(content))
require.NoError(t, err)

// Ensures that file exists.
cached, err := os.ReadFile(fc.path(id))
require.NoError(t, err)

// Check if the saved content is the same as the given one.
require.Equal(t, content, cached)
})

t.Run("already exists", func(t *testing.T) {
content := []byte{1, 2, 3, 4, 5}

id := Key{1, 2, 3}

// Writes the pre-existing file for the same ID.
p := fc.path(id)
f, err := os.Create(p)
require.NoError(t, err)
_, err = f.Write(content)
require.NoError(t, err)
require.NoError(t, f.Close())

err = fc.Add(id, bytes.NewReader(content))
require.NoError(t, err)

// Ensures that file exists.
cached, err := os.ReadFile(fc.path(id))
require.NoError(t, err)

// Check if the saved content is the same as the given one.
require.Equal(t, content, cached)
})
}

func TestFileCache_Delete(t *testing.T) {
fc := newFileCache(t.TempDir())
t.Run("non-exist", func(t *testing.T) {
id := Key{0}
err := fc.Delete(id)
require.NoError(t, err)
})
t.Run("exist", func(t *testing.T) {
id := Key{1, 2, 3}
p := fc.path(id)
f, err := os.Create(p)
require.NoError(t, err)
require.NoError(t, f.Close())

// Ensures that file exists now.
f, err = os.Open(p)
require.NoError(t, err)
require.NoError(t, f.Close())

// Delete the cache.
err = fc.Delete(id)
require.NoError(t, err)

// Ensures that file no longer exists.
_, err = os.Open(p)
require.ErrorIs(t, err, os.ErrNotExist)
})
}

func TestFileCache_Get(t *testing.T) {
fc := newFileCache(t.TempDir())

t.Run("exist", func(t *testing.T) {
content := []byte{1, 2, 3, 4, 5}
id := Key{1, 2, 3}

// Writes the pre-existing file for the ID.
p := fc.path(id)
f, err := os.Create(p)
require.NoError(t, err)
_, err = f.Write(content)
require.NoError(t, err)
require.NoError(t, f.Close())

result, ok, err := fc.Get(id)
require.NoError(t, err)
require.True(t, ok)
defer func() {
require.NoError(t, result.Close())
}()

actual, err := io.ReadAll(result)
require.NoError(t, err)

require.Equal(t, content, actual)
})
t.Run("not exist", func(t *testing.T) {
_, ok, err := fc.Get(Key{0xf})
// Non-exist should not be error.
require.NoError(t, err)
require.False(t, ok)
})
}