Skip to content

Commit

Permalink
Externalize compilation cache by compilers (#747)
Browse files Browse the repository at this point in the history
This adds the experimental support of the file system compilation cache.
Notably, experimental.WithCompilationCacheDirName allows users to configure
where the compiler writes the cache into.

Versioning/validation of binary compatibility has been done via the release tag
(which will be created from the end of this month). More specifically, the cache
file starts with a header with the hardcoded wazero version.


Fixes #618

Signed-off-by: Takeshi Yoneda <takeshi@tetrate.io>
Co-authored-by: Crypt Keeper <64215+codefromthecrypt@users.noreply.github.com>
  • Loading branch information
mathetake and codefromthecrypt committed Aug 18, 2022
1 parent 076d324 commit 3b32c20
Show file tree
Hide file tree
Showing 20 changed files with 1,015 additions and 89 deletions.
31 changes: 31 additions & 0 deletions experimental/compilation_cache.go
@@ -0,0 +1,31 @@
package experimental

import (
"context"

"github.com/tetratelabs/wazero/internal/compilationcache"
)

// WithCompilationCacheDirName configures the destination directory of the compilation cache.
// Regardless of the usage of this, the compiled functions are cached in memory, but its lifetime is
// bound to the lifetime of wazero.Runtime or wazero.CompiledModule.
//
// With the given non-empty directory, wazero persists the cache into the directory and that cache
// will be used as long as the running wazero version match the version of compilation wazero.
//
// A cache is only valid for use in one wazero.Runtime at a time. Concurrent use
// of a wazero.Runtime is supported, but multiple runtimes must not share the
// same directory.
//
// Note: The embedder must safeguard this directory from external changes.
//
// Usage:
//
// ctx := experimental.WithCompilationCacheDirName(context.Background(), "/home/me/.cache/wazero")
// r := wazero.NewRuntimeWithConfig(ctx, wazero.NewRuntimeConfigCompiler())
func WithCompilationCacheDirName(ctx context.Context, dirname string) context.Context {
if len(dirname) != 0 {
ctx = context.WithValue(ctx, compilationcache.FileCachePathKey{}, dirname)
}
return ctx
}
42 changes: 42 additions & 0 deletions internal/compilationcache/compilationcache.go
@@ -0,0 +1,42 @@
package compilationcache

import (
"crypto/sha256"
"io"
)

// Cache allows the compiler engine to skip compilation of wasm to machine code
// where doing so is redundant for the same wasm binary and version of wazero.
//
// This augments the default in-memory cache of compiled functions, by
// decoupling it from a wazero.Runtime instance. Concretely, a runtime loses
// its cache once closed. This cache allows the runtime to rebuild its
// in-memory cache quicker, significantly reducing first-hit penalty on a hit.
//
// See NewFileCache for the example implementation.
type Cache interface {
// Get is called when the runtime is trying to get the cached compiled functions.
// Implementations are supposed to return compiled function in io.Reader with ok=true
// if the key exists on the cache. In the case of not-found, this should return
// ok=false with err=nil. content.Close() is automatically called by
// the caller of this Get.
//
// Note: the returned content won't go through the validation pass of Wasm binary
// which is applied when the binary is compiled from scratch without cache hit.
Get(key Key) (content io.ReadCloser, ok bool, err error)
//
// Add is called when the runtime is trying to add the new cache entry.
// The given `content` must be un-modified, and returned as-is in Get method.
//
// Note: the `content` is ensured to be safe through the validation phase applied on the Wasm binary.
Add(key Key, content io.Reader) (err error)
//
// Delete is called when the cache on the `key` returned by Get is no longer usable, and
// must be purged. Specifically, this is called happens when the wazero's version has been changed.
// For example, that is when there's a difference between the version of compiling wazero and the
// version of the currently used wazero.
Delete(key Key) (err error)
}

// Key represents the 256-bit unique identifier assigned to each cache entry.
type Key = [sha256.Size]byte
99 changes: 99 additions & 0 deletions internal/compilationcache/file_cache.go
@@ -0,0 +1,99 @@
package compilationcache

import (
"context"
"encoding/hex"
"errors"
"io"
"os"
"path"
"sync"
)

// FileCachePathKey is a context.Context Value key. Its value is a string
// representing the compilation cache directory.
type FileCachePathKey struct{}

// NewFileCache returns a new Cache implemented by fileCache.
func NewFileCache(ctx context.Context) Cache {
if fsValue := ctx.Value(FileCachePathKey{}); fsValue != nil {
return newFileCache(fsValue.(string))
}
return nil
}

func newFileCache(dir string) *fileCache {
return &fileCache{dirPath: dir}
}

// fileCache persists compiled functions into dirPath.
//
// Note: this can be expanded to do binary signing/verification, set TTL on each entry, etc.
type fileCache struct {
dirPath string
mux sync.RWMutex
}

type fileReadCloser struct {
*os.File
fc *fileCache
}

func (fc *fileCache) path(key Key) string {
return path.Join(fc.dirPath, hex.EncodeToString(key[:]))
}

func (fc *fileCache) Get(key Key) (content io.ReadCloser, ok bool, err error) {
// TODO: take lock per key for more efficiency vs the complexity of impl.
fc.mux.RLock()
unlock := fc.mux.RUnlock
defer func() {
if unlock != nil {
unlock()
}
}()

f, err := os.Open(fc.path(key))
if errors.Is(err, os.ErrNotExist) {
return nil, false, nil
} else if err != nil {
return nil, false, err
} else {
// Unlock is done inside the content.Close() at the call site.
unlock = nil
return &fileReadCloser{File: f, fc: fc}, true, nil
}
}

// Close wraps the os.File Close to release the read lock on fileCache.
func (f *fileReadCloser) Close() (err error) {
defer f.fc.mux.RUnlock()
err = f.File.Close()
return
}

func (fc *fileCache) Add(key Key, content io.Reader) (err error) {
// TODO: take lock per key for more efficiency vs the complexity of impl.
fc.mux.Lock()
defer fc.mux.Unlock()

file, err := os.Create(fc.path(key))
if err != nil {
return
}
defer file.Close()
_, err = io.Copy(file, content)
return
}

func (fc *fileCache) Delete(key Key) (err error) {
// TODO: take lock per key for more efficiency vs the complexity of impl.
fc.mux.Lock()
defer fc.mux.Unlock()

err = os.Remove(fc.path(key))
if errors.Is(err, os.ErrNotExist) {
err = nil
}
return
}
135 changes: 135 additions & 0 deletions internal/compilationcache/file_cache_test.go
@@ -0,0 +1,135 @@
package compilationcache

import (
"bytes"
"io"
"os"
"testing"

"github.com/tetratelabs/wazero/internal/testing/require"
)

func TestFileReadCloser_Close(t *testing.T) {
fc := newFileCache(t.TempDir())
key := Key{1, 2, 3}

err := fc.Add(key, bytes.NewReader([]byte{1, 2, 3, 4}))
require.NoError(t, err)

c, ok, err := fc.Get(key)
require.NoError(t, err)
require.True(t, ok)

// At this point, file is not closed, therefore TryLock should fail.
require.False(t, fc.mux.TryLock())

// Close, and then TryLock should succeed this time.
require.NoError(t, c.Close())
require.True(t, fc.mux.TryLock())
}

func TestFileCache_Add(t *testing.T) {
fc := newFileCache(t.TempDir())

t.Run("not exist", func(t *testing.T) {
content := []byte{1, 2, 3, 4, 5}
id := Key{1, 2, 3, 4, 5, 6, 7}
err := fc.Add(id, bytes.NewReader(content))
require.NoError(t, err)

// Ensures that file exists.
cached, err := os.ReadFile(fc.path(id))
require.NoError(t, err)

// Check if the saved content is the same as the given one.
require.Equal(t, content, cached)
})

t.Run("already exists", func(t *testing.T) {
content := []byte{1, 2, 3, 4, 5}

id := Key{1, 2, 3}

// Writes the pre-existing file for the same ID.
p := fc.path(id)
f, err := os.Create(p)
require.NoError(t, err)
_, err = f.Write(content)
require.NoError(t, err)
require.NoError(t, f.Close())

err = fc.Add(id, bytes.NewReader(content))
require.NoError(t, err)

// Ensures that file exists.
cached, err := os.ReadFile(fc.path(id))
require.NoError(t, err)

// Check if the saved content is the same as the given one.
require.Equal(t, content, cached)
})
}

func TestFileCache_Delete(t *testing.T) {
fc := newFileCache(t.TempDir())
t.Run("non-exist", func(t *testing.T) {
id := Key{0}
err := fc.Delete(id)
require.NoError(t, err)
})
t.Run("exist", func(t *testing.T) {
id := Key{1, 2, 3}
p := fc.path(id)
f, err := os.Create(p)
require.NoError(t, err)
require.NoError(t, f.Close())

// Ensures that file exists now.
f, err = os.Open(p)
require.NoError(t, err)
require.NoError(t, f.Close())

// Delete the cache.
err = fc.Delete(id)
require.NoError(t, err)

// Ensures that file no longer exists.
_, err = os.Open(p)
require.ErrorIs(t, err, os.ErrNotExist)
})
}

func TestFileCache_Get(t *testing.T) {
fc := newFileCache(t.TempDir())

t.Run("exist", func(t *testing.T) {
content := []byte{1, 2, 3, 4, 5}
id := Key{1, 2, 3}

// Writes the pre-existing file for the ID.
p := fc.path(id)
f, err := os.Create(p)
require.NoError(t, err)
_, err = f.Write(content)
require.NoError(t, err)
require.NoError(t, f.Close())

result, ok, err := fc.Get(id)
require.NoError(t, err)
require.True(t, ok)
defer func() {
require.NoError(t, result.Close())
}()

actual, err := io.ReadAll(result)
require.NoError(t, err)

require.Equal(t, content, actual)
})
t.Run("not exist", func(t *testing.T) {
_, ok, err := fc.Get(Key{0xf})
// Non-exist should not be error.
require.NoError(t, err)
require.False(t, ok)
})
}

0 comments on commit 3b32c20

Please sign in to comment.