cache: update to Go tip as of April 2023

As of commit 0fd6ae548f550bdbee4a434285ff052fb9dc7417. Besides rewriting import paths, we swapped base.Fatalf with log.Fatalf, and replaced cfg.Getenv with os.Getenv, adding a note about the difference in behavior. The old code already had this limitation. We hadn't updated this package since it was first copied in 2018, so quite a few changes have taken place. Of note, it now supports mmap; leave that out for now, to keep this commit simple and to leave adding the mmap package for another patch. A minor API change is that Trim now returns an error. While technically a breaking change, the vast majority of users will be simply calling the API without expecting a result, and that will continue to work like it did before. Checking for errors on trim is useful, which is why upstream added it. Finally, the cache now uses lockedfile, which we already copied over.
rogpeppe · May 15, 2023 · 5821053 · 5821053
1 parent eeed7e8
commit 5821053
Show file tree

Hide file tree

Showing 6 changed files with 231 additions and 217 deletions.
diff --git a/cache/cache.go b/cache/cache.go
@@ -12,12 +12,14 @@ import (
 	"errors"
 	"fmt"
 	"io"
-	"io/ioutil"
+	"io/fs"
 	"os"
 	"path/filepath"
 	"strconv"
 	"strings"
 	"time"
+
+	"github.com/rogpeppe/go-internal/lockedfile"
 )
 
 // An ActionID is a cache action key, the hash of a complete description of a
@@ -31,7 +33,6 @@ type OutputID [HashSize]byte
 // A Cache is a package cache, backed by a file system directory tree.
 type Cache struct {
 	dir string
-	log *os.File
 	now func() time.Time
 }
 
@@ -52,21 +53,16 @@ func Open(dir string) (*Cache, error) {
 		return nil, err
 	}
 	if !info.IsDir() {
-		return nil, &os.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")}
+		return nil, &fs.PathError{Op: "open", Path: dir, Err: fmt.Errorf("not a directory")}
 	}
 	for i := 0; i < 256; i++ {
 		name := filepath.Join(dir, fmt.Sprintf("%02x", i))
-		if err := os.MkdirAll(name, 0o777); err != nil {
+		if err := os.MkdirAll(name, 0777); err != nil {
 			return nil, err
 		}
 	}
-	f, err := os.OpenFile(filepath.Join(dir, "log.txt"), os.O_WRONLY|os.O_APPEND|os.O_CREATE, 0o666)
-	if err != nil {
-		return nil, err
-	}
 	c := &Cache{
 		dir: dir,
-		log: f,
 		now: time.Now,
 	}
 	return c, nil
@@ -77,7 +73,22 @@ func (c *Cache) fileName(id [HashSize]byte, key string) string {
 	return filepath.Join(c.dir, fmt.Sprintf("%02x", id[0]), fmt.Sprintf("%x", id)+"-"+key)
 }
 
-var errMissing = errors.New("cache entry not found")
+// An entryNotFoundError indicates that a cache entry was not found, with an
+// optional underlying reason.
+type entryNotFoundError struct {
+	Err error
+}
+
+func (e *entryNotFoundError) Error() string {
+	if e.Err == nil {
+		return "cache entry not found"
+	}
+	return fmt.Sprintf("cache entry not found: %v", e.Err)
+}
+
+func (e *entryNotFoundError) Unwrap() error {
+	return e.Err
+}
 
 const (
 	// action entry file is "v1 <hex id> <hex out> <decimal size space-padded to 20 bytes> <unixnano space-padded to 20 bytes>\n"
@@ -96,6 +107,8 @@ const (
 // GODEBUG=gocacheverify=1.
 var verify = false
 
+var errVerifyMode = errors.New("gocacheverify=1")
+
 // DebugTest is set when GODEBUG=gocachetest=1 is in the environment.
 var DebugTest = false
 
@@ -124,7 +137,7 @@ func initEnv() {
 // saved file for that output ID is still available.
 func (c *Cache) Get(id ActionID) (Entry, error) {
 	if verify {
-		return Entry{}, errMissing
+		return Entry{}, &entryNotFoundError{Err: errVerifyMode}
 	}
 	return c.get(id)
 }
@@ -137,52 +150,62 @@ type Entry struct {
 
 // get is Get but does not respect verify mode, so that Put can use it.
 func (c *Cache) get(id ActionID) (Entry, error) {
-	missing := func() (Entry, error) {
-		fmt.Fprintf(c.log, "%d miss %x\n", c.now().Unix(), id)
-		return Entry{}, errMissing
+	missing := func(reason error) (Entry, error) {
+		return Entry{}, &entryNotFoundError{Err: reason}
 	}
 	f, err := os.Open(c.fileName(id, "a"))
 	if err != nil {
-		return missing()
+		return missing(err)
 	}
 	defer f.Close()
 	entry := make([]byte, entrySize+1) // +1 to detect whether f is too long
-	if n, err := io.ReadFull(f, entry); n != entrySize || err != io.ErrUnexpectedEOF {
-		return missing()
+	if n, err := io.ReadFull(f, entry); n > entrySize {
+		return missing(errors.New("too long"))
+	} else if err != io.ErrUnexpectedEOF {
+		if err == io.EOF {
+			return missing(errors.New("file is empty"))
+		}
+		return missing(err)
+	} else if n < entrySize {
+		return missing(errors.New("entry file incomplete"))
 	}
 	if entry[0] != 'v' || entry[1] != '1' || entry[2] != ' ' || entry[3+hexSize] != ' ' || entry[3+hexSize+1+hexSize] != ' ' || entry[3+hexSize+1+hexSize+1+20] != ' ' || entry[entrySize-1] != '\n' {
-		return missing()
+		return missing(errors.New("invalid header"))
 	}
 	eid, entry := entry[3:3+hexSize], entry[3+hexSize:]
 	eout, entry := entry[1:1+hexSize], entry[1+hexSize:]
 	esize, entry := entry[1:1+20], entry[1+20:]
 	etime, entry := entry[1:1+20], entry[1+20:]
 	var buf [HashSize]byte
-	if _, err := hex.Decode(buf[:], eid); err != nil || buf != id {
-		return missing()
+	if _, err := hex.Decode(buf[:], eid); err != nil {
+		return missing(fmt.Errorf("decoding ID: %v", err))
+	} else if buf != id {
+		return missing(errors.New("mismatched ID"))
 	}
 	if _, err := hex.Decode(buf[:], eout); err != nil {
-		return missing()
+		return missing(fmt.Errorf("decoding output ID: %v", err))
 	}
 	i := 0
 	for i < len(esize) && esize[i] == ' ' {
 		i++
 	}
 	size, err := strconv.ParseInt(string(esize[i:]), 10, 64)
-	if err != nil || size < 0 {
-		return missing()
+	if err != nil {
+		return missing(fmt.Errorf("parsing size: %v", err))
+	} else if size < 0 {
+		return missing(errors.New("negative size"))
 	}
 	i = 0
 	for i < len(etime) && etime[i] == ' ' {
 		i++
 	}
 	tm, err := strconv.ParseInt(string(etime[i:]), 10, 64)
-	if err != nil || size < 0 {
-		return missing()
+	if err != nil {
+		return missing(fmt.Errorf("parsing timestamp: %v", err))
+	} else if tm < 0 {
+		return missing(errors.New("negative timestamp"))
 	}
 
-	fmt.Fprintf(c.log, "%d get %x\n", c.now().Unix(), id)
-
 	c.used(c.fileName(id, "a"))
 
 	return Entry{buf, size, time.Unix(0, tm)}, nil
@@ -197,8 +220,11 @@ func (c *Cache) GetFile(id ActionID) (file string, entry Entry, err error) {
 	}
 	file = c.OutputFile(entry.OutputID)
 	info, err := os.Stat(file)
-	if err != nil || info.Size() != entry.Size {
-		return "", Entry{}, errMissing
+	if err != nil {
+		return "", Entry{}, &entryNotFoundError{Err: err}
+	}
+	if info.Size() != entry.Size {
+		return "", Entry{}, &entryNotFoundError{Err: errors.New("file incomplete")}
 	}
 	return file, entry, nil
 }
@@ -211,13 +237,35 @@ func (c *Cache) GetBytes(id ActionID) ([]byte, Entry, error) {
 	if err != nil {
 		return nil, entry, err
 	}
-	data, _ := ioutil.ReadFile(c.OutputFile(entry.OutputID))
+	data, _ := os.ReadFile(c.OutputFile(entry.OutputID))
 	if sha256.Sum256(data) != entry.OutputID {
-		return nil, entry, errMissing
+		return nil, entry, &entryNotFoundError{Err: errors.New("bad checksum")}
 	}
 	return data, entry, nil
 }
 
+/*
+TODO: consider copying cmd/go/internal/mmap over for this method
+
+// GetMmap looks up the action ID in the cache and returns
+// the corresponding output bytes.
+// GetMmap should only be used for data that can be expected to fit in memory.
+func (c *Cache) GetMmap(id ActionID) ([]byte, Entry, error) {
+	entry, err := c.Get(id)
+	if err != nil {
+		return nil, entry, err
+	}
+	md, err := mmap.Mmap(c.OutputFile(entry.OutputID))
+	if err != nil {
+		return nil, Entry{}, err
+	}
+	if int64(len(md.Data)) != entry.Size {
+		return nil, Entry{}, &entryNotFoundError{Err: errors.New("file incomplete")}
+	}
+	return md.Data, entry, nil
+}
+*/
+
 // OutputFile returns the name of the cache file storing output with the given OutputID.
 func (c *Cache) OutputFile(out OutputID) string {
 	file := c.fileName(out, "d")
@@ -261,16 +309,23 @@ func (c *Cache) used(file string) {
 }
 
 // Trim removes old cache entries that are likely not to be reused.
-func (c *Cache) Trim() {
+func (c *Cache) Trim() error {
 	now := c.now()
 
 	// We maintain in dir/trim.txt the time of the last completed cache trim.
 	// If the cache has been trimmed recently enough, do nothing.
 	// This is the common case.
-	data, _ := ioutil.ReadFile(filepath.Join(c.dir, "trim.txt"))
-	t, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64)
-	if err == nil && now.Sub(time.Unix(t, 0)) < trimInterval {
-		return
+	// If the trim file is corrupt, detected if the file can't be parsed, or the
+	// trim time is too far in the future, attempt the trim anyway. It's possible that
+	// the cache was full when the corruption happened. Attempting a trim on
+	// an empty cache is cheap, so there wouldn't be a big performance hit in that case.
+	if data, err := lockedfile.Read(filepath.Join(c.dir, "trim.txt")); err == nil {
+		if t, err := strconv.ParseInt(strings.TrimSpace(string(data)), 10, 64); err == nil {
+			lastTrim := time.Unix(t, 0)
+			if d := now.Sub(lastTrim); d < trimInterval && d > -mtimeInterval {
+				return nil
+			}
+		}
 	}
 
 	// Trim each of the 256 subdirectories.
@@ -282,7 +337,15 @@ func (c *Cache) Trim() {
 		c.trimSubdir(subdir, cutoff)
 	}
 
-	ioutil.WriteFile(filepath.Join(c.dir, "trim.txt"), []byte(fmt.Sprintf("%d", now.Unix())), 0o666)
+	// Ignore errors from here: if we don't write the complete timestamp, the
+	// cache will appear older than it is, and we'll trim it again next time.
+	var b bytes.Buffer
+	fmt.Fprintf(&b, "%d", now.Unix())
+	if err := lockedfile.Write(filepath.Join(c.dir, "trim.txt"), &b, 0666); err != nil {
+		return err
+	}
+
+	return nil
 }
 
 // trimSubdir trims a single cache subdirectory.
@@ -326,7 +389,7 @@ func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify
 	// in verify mode we are double-checking that the cache entries
 	// are entirely reproducible. As just noted, this may be unrealistic
 	// in some cases but the check is also useful for shaking out real bugs.
-	entry := []byte(fmt.Sprintf("v1 %x %x %20d %20d\n", id, out, size, time.Now().UnixNano()))
+	entry := fmt.Sprintf("v1 %x %x %20d %20d\n", id, out, size, time.Now().UnixNano())
 	if verify && allowVerify {
 		old, err := c.get(id)
 		if err == nil && (old.OutputID != out || old.Size != size) {
@@ -336,13 +399,35 @@ func (c *Cache) putIndexEntry(id ActionID, out OutputID, size int64, allowVerify
 		}
 	}
 	file := c.fileName(id, "a")
-	if err := ioutil.WriteFile(file, entry, 0o666); err != nil {
+
+	// Copy file to cache directory.
+	mode := os.O_WRONLY | os.O_CREATE
+	f, err := os.OpenFile(file, mode, 0666)
+	if err != nil {
+		return err
+	}
+	_, err = f.WriteString(entry)
+	if err == nil {
+		// Truncate the file only *after* writing it.
+		// (This should be a no-op, but truncate just in case of previous corruption.)
+		//
+		// This differs from os.WriteFile, which truncates to 0 *before* writing
+		// via os.O_TRUNC. Truncating only after writing ensures that a second write
+		// of the same content to the same file is idempotent, and does not — even
+		// temporarily! — undo the effect of the first write.
+		err = f.Truncate(int64(len(entry)))
+	}
+	if closeErr := f.Close(); err == nil {
+		err = closeErr
+	}
+	if err != nil {
+		// TODO(bcmills): This Remove potentially races with another go command writing to file.
+		// Can we eliminate it?
 		os.Remove(file)
 		return err
 	}
 	os.Chtimes(file, c.now(), c.now()) // mainly for tests
 
-	fmt.Fprintf(c.log, "%d put %x %x %d\n", c.now().Unix(), id, out, size)
 	return nil
 }
 
@@ -413,7 +498,7 @@ func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error {
 	if err == nil && info.Size() > size { // shouldn't happen but fix in case
 		mode |= os.O_TRUNC
 	}
-	f, err := os.OpenFile(name, mode, 0o666)
+	f, err := os.OpenFile(name, mode, 0666)
 	if err != nil {
 		return err
 	}
@@ -471,3 +556,15 @@ func (c *Cache) copyFile(file io.ReadSeeker, out OutputID, size int64) error {
 
 	return nil
 }
+
+// FuzzDir returns a subdirectory within the cache for storing fuzzing data.
+// The subdirectory may not exist.
+//
+// This directory is managed by the internal/fuzz package. Files in this
+// directory aren't removed by the 'go clean -cache' command or by Trim.
+// They may be removed with 'go clean -fuzzcache'.
+//
+// TODO(#48526): make Trim remove unused files from this directory.
+func (c *Cache) FuzzDir() string {
+	return filepath.Join(c.dir, "fuzz")
+}