Skip to content

Commit

Permalink
cache: update to Go tip as of April 2023
Browse files Browse the repository at this point in the history
As of commit 0fd6ae548f550bdbee4a434285ff052fb9dc7417.

Besides rewriting import paths, we swapped base.Fatalf with log.Fatalf,
and replaced cfg.Getenv with os.Getenv, adding a note about the
difference in behavior. The old code already had this limitation.

We hadn't updated this package since it was first copied in 2018,
so quite a few changes have taken place.
Of note, it now supports mmap; leave that out for now, to keep this
commit simple and to leave adding the mmap package for another patch.

A minor API change is that Trim now returns an error.
While technically a breaking change, the vast majority of users will be
simply calling the API without expecting a result, and that will
continue to work like it did before.
Checking for errors on trim is useful, which is why upstream added it.

Finally, the cache now uses lockedfile, which we already copied over.
  • Loading branch information
mvdan committed May 15, 2023
1 parent eeed7e8 commit 5821053
Show file tree
Hide file tree
Showing 6 changed files with 231 additions and 217 deletions.
179 changes: 138 additions & 41 deletions cache/cache.go
Expand Up @@ -12,12 +12,14 @@ import (
"errors"
"fmt"
"io"
"io/ioutil"
"io/fs"
"os"
"path/filepath"
"strconv"
"strings"
"time"

"github.com/rogpeppe/go-internal/lockedfile"
)

// An ActionID is a cache action key, the hash of a complete description of a
Expand All @@ -31,7 +33,6 @@ type OutputID [HashSize]byte
// A Cache is a package cache, backed by a file system directory tree.
type Cache struct {
dir string
log *os.File
now func() time.Time
}

Expand All @@ -52,21 +53,16 @@ func Open(dir string) (*Cache, error) {
return nil, err
}
if !info.IsDir() {
return nil, &os.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")}
return nil, &fs.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")}
}
for i := 0; i < 256; i++ {
name := filepath.Join(dir, fmt.Sprintf("%02x", i))
if err := os.MkdirAll(name, 0o777); err != nil {
if err := os.MkdirAll(name, 0777); err != nil {
return nil, err
}
}
f, err := os.OpenFile(filepath.Join(dir, "log.txt"), os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0o666)
if err != nil {
return nil, err
}
c := &Cache{
dir: dir,
log: f,
now: time.Now,
}
return c, nil
Expand All @@ -77,7 +73,22 @@ func (c *Cache) fileName(id [HashSize]byte, key string) string {
return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key)
}

var errMissing = errors.New("cache entry not found")
// An entryNotFoundError indicates that a cache entry was not found, with an
// optional underlying reason.
type entryNotFoundError struct {
Err error
}

func (e *entryNotFoundError) Error() string {
if e.Err == nil {
return "cache entry not found"
}
return fmt.Sprintf("cache entry not found: %v", e.Err)
}

func (e *entryNotFoundError) Unwrap() error {
return e.Err
}

const (
// action entry file is "v1 <hex id> <hex out> <decimal size space-padded to 20 bytes> <unixnano space-padded to 20 bytes>\n"
Expand All @@ -96,6 +107,8 @@ const (
// GODEBUG=gocacheverify=1.
var verify = false

var errVerifyMode = errors.New("gocacheverify=1")

// DebugTest is set when GODEBUG=gocachetest=1 is in the environment.
var DebugTest = false

Expand Down Expand Up @@ -124,7 +137,7 @@ func initEnv() {
// saved file for that output ID is still available.
func (c *Cache) Get(id ActionID) (Entry, error) {
if verify {
return Entry{}, errMissing
return Entry{}, &entryNotFoundError{Err: errVerifyMode}
}
return c.get(id)
}
Expand All @@ -137,52 +150,62 @@ type Entry struct {

// get is Get but does not respect verify mode, so that Put can use it.
func (c *Cache) get(id ActionID) (Entry, error) {
missing := func() (Entry, error) {
fmt.Fprintf(c.log, "%d miss %x\n", c.now().Unix(), id)
return Entry{}, errMissing
missing := func(reason error) (Entry, error) {
return Entry{}, &entryNotFoundError{Err: reason}
}
f, err := os.Open(c.fileName(id, "a"))
if err != nil {
return missing()
return missing(err)
}
defer f.Close()
entry := make([]byte, entrySize+1) // +1 to detect whether f is too long
if n, err := io.ReadFull(f, entry); n != entrySize || err != io.ErrUnexpectedEOF {
return missing()
if n, err := io.ReadFull(f, entry); n > entrySize {
return missing(errors.New("too long"))
} else if err != io.ErrUnexpectedEOF {
if err == io.EOF {
return missing(errors.New("file is empty"))
}
return missing(err)
} else if n < entrySize {
return missing(errors.New("entry file incomplete"))
}
if entry[0] != 'v' || entry[1] != '1' || entry[2] != ' ' || entry[3+hexSize] != ' ' || entry[3+hexSize+1+hexSize] != ' ' || entry[3+hexSize+1+hexSize+1+20] != ' ' || entry[entrySize-1] != '\n' {
return missing()
return missing(errors.New("invalid header"))
}
eid, entry := entry[3:3+hexSize], entry[3+hexSize:]
eout, entry := entry[1:1+hexSize], entry[1+hexSize:]
esize, entry := entry[1:1+20], entry[1+20:]
etime, entry := entry[1:1+20], entry[1+20:]
var buf [HashSize]byte
if _, err := hex.Decode(buf[:], eid); err != nil || buf != id {
return missing()
if _, err := hex.Decode(buf[:], eid); err != nil {
return missing(fmt.Errorf("decoding ID: %v", err))
} else if buf != id {
return missing(errors.New("mismatched ID"))
}
if _, err := hex.Decode(buf[:], eout); err != nil {
return missing()
return missing(fmt.Errorf("decoding output ID: %v", err))
}
i := 0
for i < len(esize) && esize[i] == ' ' {
i++
}
size, err := strconv.ParseInt(string(esize[i:]), 10, 64)
if err != nil || size < 0 {
return missing()
if err != nil {
return missing(fmt.Errorf("parsing size: %v", err))
} else if size < 0 {
return missing(errors.New("negative size"))
}
i = 0
for i < len(etime) && etime[i] == ' ' {
i++
}
tm, err := strconv.ParseInt(string(etime[i:]), 10, 64)
if err != nil || size < 0 {
return missing()
if err != nil {
return missing(fmt.Errorf("parsing timestamp: %v", err))
} else if tm < 0 {
return missing(errors.New("negative timestamp"))
}

fmt.Fprintf(c.log, "%d get %x\n", c.now().Unix(), id)

c.used(c.fileName(id, "a"))

return Entry{buf, size, time.Unix(0, tm)}, nil
Expand All @@ -197,8 +220,11 @@ func (c *Cache) GetFile(id ActionID) (file string, entry Entry, err error) {
}
file = c.OutputFile(entry.OutputID)
info, err := os.Stat(file)
if err != nil || info.Size() != entry.Size {
return "", Entry{}, errMissing
if err != nil {
return "", Entry{}, &entryNotFoundError{Err: err}
}
if info.Size() != entry.Size {
return "", Entry{}, &entryNotFoundError{Err: errors.New("file incomplete")}
}
return file, entry, nil
}
Expand All @@ -211,13 +237,35 @@ func (c *Cache) GetBytes(id ActionID) ([]byte, Entry, error) {
if err != nil {
return nil, entry, err
}
data, _ := ioutil.ReadFile(c.OutputFile(entry.OutputID))
data, _ := os.ReadFile(c.OutputFile(entry.OutputID))
if sha256.Sum256(data) != entry.OutputID {
return nil, entry, errMissing
return nil, entry, &entryNotFoundError{Err: errors.New("bad checksum")}
}
return data, entry, nil
}

/*
TODO: consider copying cmd/go/internal/mmap over for this method
// GetMmap looks up the action ID in the cache and returns
// the corresponding output bytes.
// GetMmap should only be used for data that can be expected to fit in memory.
func (c *Cache) GetMmap(id ActionID) ([]byte, Entry, error) {
entry, err := c.Get(id)
if err != nil {
return nil, entry, err
}
md, err := mmap.Mmap(c.OutputFile(entry.OutputID))
if err != nil {
return nil, Entry{}, err
}
if int64(len(md.Data)) != entry.Size {
return nil, Entry{}, &entryNotFoundError{Err: errors.New("file incomplete")}
}
return md.Data, entry, nil
}
*/

// OutputFile returns the name of the cache file storing output with the given OutputID.
func (c *Cache) OutputFile(out OutputID) string {
file := c.fileName(out, "d")
Expand Down Expand Up @@ -261,16 +309,23 @@ func (c *Cache) used(file string) {
}

// Trim removes old cache entries that are likely not to be reused.
func (c *Cache) Trim() {
func (c *Cache) Trim() error {
now := c.now()

// We maintain in dir/trim.txt the time of the last completed cache trim.
// If the cache has been trimmed recently enough, do nothing.
// This is the common case.
data, _ := ioutil.ReadFile(filepath.Join(c.dir, "trim.txt"))
t, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64)
if err == nil && now.Sub(time.Unix(t, 0)) < trimInterval {
return
// If the trim file is corrupt, detected if the file can't be parsed, or the
// trim time is too far in the future, attempt the trim anyway. It's possible that
// the cache was full when the corruption happened. Attempting a trim on
// an empty cache is cheap, so there wouldn't be a big performance hit in that case.
if data, err := lockedfile.Read(filepath.Join(c.dir, "trim.txt")); err == nil {
if t, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64); err == nil {
lastTrim := time.Unix(t, 0)
if d := now.Sub(lastTrim); d < trimInterval && d > -mtimeInterval {
return nil
}
}
}

// Trim each of the 256 subdirectories.
Expand All @@ -282,7 +337,15 @@ func (c *Cache) Trim() {
c.trimSubdir(subdir, cutoff)
}

ioutil.WriteFile(filepath.Join(c.dir, "trim.txt"), []byte(fmt.Sprintf("%d", now.Unix())), 0o666)
// Ignore errors from here: if we don't write the complete timestamp, the
// cache will appear older than it is, and we'll trim it again next time.
var b bytes.Buffer
fmt.Fprintf(&b, "%d", now.Unix())
if err := lockedfile.Write(filepath.Join(c.dir, "trim.txt"), &b, 0666); err != nil {
return err
}

return nil
}

// trimSubdir trims a single cache subdirectory.
Expand Down Expand Up @@ -326,7 +389,7 @@ func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify
// in verify mode we are double-checking that the cache entries
// are entirely reproducible. As just noted, this may be unrealistic
// in some cases but the check is also useful for shaking out real bugs.
entry := []byte(fmt.Sprintf("v1 %x %x %20d %20d\n", id, out, size, time.Now().UnixNano()))
entry := fmt.Sprintf("v1 %x %x %20d %20d\n", id, out, size, time.Now().UnixNano())
if verify && allowVerify {
old, err := c.get(id)
if err == nil && (old.OutputID != out || old.Size != size) {
Expand All @@ -336,13 +399,35 @@ func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify
}
}
file := c.fileName(id, "a")
if err := ioutil.WriteFile(file, entry, 0o666); err != nil {

// Copy file to cache directory.
mode := os.O_WRONLY | os.O_CREATE
f, err := os.OpenFile(file, mode, 0666)
if err != nil {
return err
}
_, err = f.WriteString(entry)
if err == nil {
// Truncate the file only *after* writing it.
// (This should be a no-op, but truncate just in case of previous corruption.)
//
// This differs from os.WriteFile, which truncates to 0 *before* writing
// via os.O_TRUNC. Truncating only after writing ensures that a second write
// of the same content to the same file is idempotent, and does not — even
// temporarily! — undo the effect of the first write.
err = f.Truncate(int64(len(entry)))
}
if closeErr := f.Close(); err == nil {
err = closeErr
}
if err != nil {
// TODO(bcmills): This Remove potentially races with another go command writing to file.
// Can we eliminate it?
os.Remove(file)
return err
}
os.Chtimes(file, c.now(), c.now()) // mainly for tests

fmt.Fprintf(c.log, "%d put %x %x %d\n", c.now().Unix(), id, out, size)
return nil
}

Expand Down Expand Up @@ -413,7 +498,7 @@ func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error {
if err == nil && info.Size() > size { // shouldn't happen but fix in case
mode |= os.O_TRUNC
}
f, err := os.OpenFile(name, mode, 0o666)
f, err := os.OpenFile(name, mode, 0666)
if err != nil {
return err
}
Expand Down Expand Up @@ -471,3 +556,15 @@ func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error {

return nil
}

// FuzzDir returns a subdirectory within the cache for storing fuzzing data.
// The subdirectory may not exist.
//
// This directory is managed by the internal/fuzz package. Files in this
// directory aren't removed by the 'go clean -cache' command or by Trim.
// They may be removed with 'go clean -fuzzcache'.
//
// TODO(#48526): make Trim remove unused files from this directory.
func (c *Cache) FuzzDir() string {
return filepath.Join(c.dir, "fuzz")
}

0 comments on commit 5821053

Please sign in to comment.