diff --git a/copy/compression.go b/copy/compression.go index 52556304f..c8feac343 100644 --- a/copy/compression.go +++ b/copy/compression.go @@ -11,6 +11,7 @@ import ( "github.com/containers/image/v5/pkg/compression" compressiontypes "github.com/containers/image/v5/pkg/compression/types" "github.com/containers/image/v5/types" + chunkedToc "github.com/containers/storage/pkg/chunked/toc" imgspecv1 "github.com/opencontainers/image-spec/specs-go/v1" "github.com/sirupsen/logrus" ) @@ -308,6 +309,15 @@ func (d *bpCompressionStepData) recordValidatedDigestData(c *copier, uploadedInf // No useful information case bpcOpCompressUncompressed: c.blobInfoCache.RecordDigestUncompressedPair(uploadedInfo.Digest, srcInfo.Digest) + if d.uploadedAnnotations != nil { + tocDigest, err := chunkedToc.GetTOCDigest(d.uploadedAnnotations) + if err != nil { + return fmt.Errorf("parsing just-created compression annotations: %w", err) + } + if tocDigest != nil { + c.blobInfoCache.RecordTOCUncompressedPair(*tocDigest, srcInfo.Digest) + } + } case bpcOpDecompressCompressed: c.blobInfoCache.RecordDigestUncompressedPair(srcInfo.Digest, uploadedInfo.Digest) case bpcOpRecompressCompressed, bpcOpPreserveCompressed: diff --git a/internal/blobinfocache/blobinfocache.go b/internal/blobinfocache/blobinfocache.go index 893aa959d..60f8e6a02 100644 --- a/internal/blobinfocache/blobinfocache.go +++ b/internal/blobinfocache/blobinfocache.go @@ -27,6 +27,13 @@ func (bic *v1OnlyBlobInfoCache) Open() { func (bic *v1OnlyBlobInfoCache) Close() { } +func (bic *v1OnlyBlobInfoCache) UncompressedDigestForTOC(tocDigest digest.Digest) digest.Digest { + return "" +} + +func (bic *v1OnlyBlobInfoCache) RecordTOCUncompressedPair(tocDigest digest.Digest, uncompressed digest.Digest) { +} + func (bic *v1OnlyBlobInfoCache) RecordDigestCompressorName(anyDigest digest.Digest, compressorName string) { } diff --git a/internal/blobinfocache/types.go b/internal/blobinfocache/types.go index c9e4aaa48..5298bdc3e 100644 --- a/internal/blobinfocache/types.go +++ b/internal/blobinfocache/types.go @@ -26,6 +26,15 @@ type BlobInfoCache2 interface { // Close destroys state created by Open(). Close() + // UncompressedDigestForTOC returns an uncompressed digest corresponding to anyDigest. + // Returns "" if the uncompressed digest is unknown. + UncompressedDigestForTOC(tocDigest digest.Digest) digest.Digest + // RecordDigestUncompressedPair records that the tocDigest corresponds to uncompressed. + // WARNING: Only call this for LOCALLY VERIFIED data; don’t record a digest pair just because some remote author claims so (e.g. + // because a manifest/config pair exists); otherwise the cache could be poisoned and allow substituting unexpected blobs. + // (Eventually, the DiffIDs in image config could detect the substitution, but that may be too late, and not all image formats contain that data.) + RecordTOCUncompressedPair(tocDigest digest.Digest, uncompressed digest.Digest) + // RecordDigestCompressorName records a compressor for the blob with the specified digest, // or Uncompressed or UnknownCompression. // WARNING: Only call this with LOCALLY VERIFIED data; don’t record a compressor for a diff --git a/pkg/blobinfocache/boltdb/boltdb.go b/pkg/blobinfocache/boltdb/boltdb.go index 07230f873..dfda9d19c 100644 --- a/pkg/blobinfocache/boltdb/boltdb.go +++ b/pkg/blobinfocache/boltdb/boltdb.go @@ -25,6 +25,8 @@ var ( // uncompressedDigestBucket stores a mapping from any digest to an uncompressed digest. uncompressedDigestBucket = []byte("uncompressedDigest") + // uncompressedDigestByTOCBucket stores a mapping from a TOC digest to an uncompressed digest. + uncompressedDigestByTOCBucket = []byte("uncompressedDigestByTOC") // digestCompressorBucket stores a mapping from any digest to a compressor, or blobinfocache.Uncompressed (not blobinfocache.UnknownCompression). // It may not exist in caches created by older versions, even if uncompressedDigestBucket is present. digestCompressorBucket = []byte("digestCompressor") @@ -243,6 +245,56 @@ func (bdc *cache) RecordDigestUncompressedPair(anyDigest digest.Digest, uncompre }) // FIXME? Log error (but throttle the log volume on repeated accesses)? } +// UncompressedDigestForTOC returns an uncompressed digest corresponding to anyDigest. +// Returns "" if the uncompressed digest is unknown. +func (bdc *cache) UncompressedDigestForTOC(tocDigest digest.Digest) digest.Digest { + var res digest.Digest + if err := bdc.view(func(tx *bolt.Tx) error { + if b := tx.Bucket(uncompressedDigestByTOCBucket); b != nil { + if uncompressedBytes := b.Get([]byte(tocDigest.String())); uncompressedBytes != nil { + d, err := digest.Parse(string(uncompressedBytes)) + if err == nil { + res = d + return nil + } + // FIXME? Log err (but throttle the log volume on repeated accesses)? + } + } + res = "" + return nil + }); err != nil { // Including os.IsNotExist(err) + return "" // FIXME? Log err (but throttle the log volume on repeated accesses)? + } + return res +} + +// RecordDigestUncompressedPair records that the tocDigest corresponds to uncompressed. +// WARNING: Only call this for LOCALLY VERIFIED data; don’t record a digest pair just because some remote author claims so (e.g. +// because a manifest/config pair exists); otherwise the cache could be poisoned and allow substituting unexpected blobs. +// (Eventually, the DiffIDs in image config could detect the substitution, but that may be too late, and not all image formats contain that data.) +func (bdc *cache) RecordTOCUncompressedPair(tocDigest digest.Digest, uncompressed digest.Digest) { + _ = bdc.update(func(tx *bolt.Tx) error { + b, err := tx.CreateBucketIfNotExists(uncompressedDigestByTOCBucket) + if err != nil { + return err + } + key := []byte(tocDigest.String()) + if previousBytes := b.Get(key); previousBytes != nil { + previous, err := digest.Parse(string(previousBytes)) + if err != nil { + return err + } + if previous != uncompressed { + logrus.Warnf("Uncompressed digest for blob with TOC %q previously recorded as %q, now %q", tocDigest, previous, uncompressed) + } + } + if err := b.Put(key, []byte(uncompressed.String())); err != nil { + return err + } + return nil + }) // FIXME? Log error (but throttle the log volume on repeated accesses)? +} + // RecordDigestCompressorName records that the blob with digest anyDigest was compressed with the specified // compressor, or is blobinfocache.Uncompressed. // WARNING: Only call this for LOCALLY VERIFIED data; don’t record a digest pair just because some remote author claims so (e.g. diff --git a/pkg/blobinfocache/internal/test/test.go b/pkg/blobinfocache/internal/test/test.go index 66d6d0c4a..9d932f51c 100644 --- a/pkg/blobinfocache/internal/test/test.go +++ b/pkg/blobinfocache/internal/test/test.go @@ -43,6 +43,8 @@ func GenericCache(t *testing.T, newTestCache func(t *testing.T) blobinfocache.Bl }{ {"UncompressedDigest", testGenericUncompressedDigest}, {"RecordDigestUncompressedPair", testGenericRecordDigestUncompressedPair}, + {"UncompressedDigestForTOC", testGenericUncompressedDigestForTOC}, + {"RecordTOCUncompressedPair", testGenericRecordTOCUncompressedPair}, {"RecordKnownLocations", testGenericRecordKnownLocations}, {"CandidateLocations", testGenericCandidateLocations}, {"CandidateLocations2", testGenericCandidateLocations2}, @@ -99,6 +101,28 @@ func testGenericRecordDigestUncompressedPair(t *testing.T, cache blobinfocache.B } } +func testGenericUncompressedDigestForTOC(t *testing.T, cache blobinfocache.BlobInfoCache2) { + // Nothing is known. + assert.Equal(t, digest.Digest(""), cache.UncompressedDigestForTOC(digestUnknown)) + + cache.RecordTOCUncompressedPair(digestCompressedA, digestUncompressed) + cache.RecordTOCUncompressedPair(digestCompressedB, digestUncompressed) + // Known TOC→uncompressed mapping + assert.Equal(t, digestUncompressed, cache.UncompressedDigestForTOC(digestCompressedA)) + assert.Equal(t, digestUncompressed, cache.UncompressedDigestForTOC(digestCompressedB)) +} + +func testGenericRecordTOCUncompressedPair(t *testing.T, cache blobinfocache.BlobInfoCache2) { + for i := 0; i < 2; i++ { // Record the same data twice to ensure redundant writes don’t break things. + // Known TOC→uncompressed mapping + cache.RecordTOCUncompressedPair(digestCompressedA, digestUncompressed) + assert.Equal(t, digestUncompressed, cache.UncompressedDigestForTOC(digestCompressedA)) + // Two mappings to the same uncompressed digest + cache.RecordTOCUncompressedPair(digestCompressedB, digestUncompressed) + assert.Equal(t, digestUncompressed, cache.UncompressedDigestForTOC(digestCompressedB)) + } +} + func testGenericRecordKnownLocations(t *testing.T, cache blobinfocache.BlobInfoCache2) { transport := mocks.NameImageTransport("==BlobInfocache transport mock") for i := 0; i < 2; i++ { // Record the same data twice to ensure redundant writes don’t break things. diff --git a/pkg/blobinfocache/memory/memory.go b/pkg/blobinfocache/memory/memory.go index 1185e9d45..d853f340a 100644 --- a/pkg/blobinfocache/memory/memory.go +++ b/pkg/blobinfocache/memory/memory.go @@ -24,10 +24,11 @@ type locationKey struct { type cache struct { mutex sync.Mutex // The following fields can only be accessed with mutex held. - uncompressedDigests map[digest.Digest]digest.Digest - digestsByUncompressed map[digest.Digest]*set.Set[digest.Digest] // stores a set of digests for each uncompressed digest - knownLocations map[locationKey]map[types.BICLocationReference]time.Time // stores last known existence time for each location reference - compressors map[digest.Digest]string // stores a compressor name, or blobinfocache.Unknown (not blobinfocache.UnknownCompression), for each digest + uncompressedDigests map[digest.Digest]digest.Digest + uncompressedDigestsByTOC map[digest.Digest]digest.Digest + digestsByUncompressed map[digest.Digest]*set.Set[digest.Digest] // stores a set of digests for each uncompressed digest + knownLocations map[locationKey]map[types.BICLocationReference]time.Time // stores last known existence time for each location reference + compressors map[digest.Digest]string // stores a compressor name, or blobinfocache.Unknown (not blobinfocache.UnknownCompression), for each digest } // New returns a BlobInfoCache implementation which is in-memory only. @@ -44,10 +45,11 @@ func New() types.BlobInfoCache { func new2() *cache { return &cache{ - uncompressedDigests: map[digest.Digest]digest.Digest{}, - digestsByUncompressed: map[digest.Digest]*set.Set[digest.Digest]{}, - knownLocations: map[locationKey]map[types.BICLocationReference]time.Time{}, - compressors: map[digest.Digest]string{}, + uncompressedDigests: map[digest.Digest]digest.Digest{}, + uncompressedDigestsByTOC: map[digest.Digest]digest.Digest{}, + digestsByUncompressed: map[digest.Digest]*set.Set[digest.Digest]{}, + knownLocations: map[locationKey]map[types.BICLocationReference]time.Time{}, + compressors: map[digest.Digest]string{}, } } @@ -104,6 +106,30 @@ func (mem *cache) RecordDigestUncompressedPair(anyDigest digest.Digest, uncompre anyDigestSet.Add(anyDigest) } +// UncompressedDigestForTOC returns an uncompressed digest corresponding to anyDigest. +// Returns "" if the uncompressed digest is unknown. +func (mem *cache) UncompressedDigestForTOC(tocDigest digest.Digest) digest.Digest { + mem.mutex.Lock() + defer mem.mutex.Unlock() + if d, ok := mem.uncompressedDigestsByTOC[tocDigest]; ok { + return d + } + return "" +} + +// RecordDigestUncompressedPair records that the tocDigest corresponds to uncompressed. +// WARNING: Only call this for LOCALLY VERIFIED data; don’t record a digest pair just because some remote author claims so (e.g. +// because a manifest/config pair exists); otherwise the cache could be poisoned and allow substituting unexpected blobs. +// (Eventually, the DiffIDs in image config could detect the substitution, but that may be too late, and not all image formats contain that data.) +func (mem *cache) RecordTOCUncompressedPair(tocDigest digest.Digest, uncompressed digest.Digest) { + mem.mutex.Lock() + defer mem.mutex.Unlock() + if previous, ok := mem.uncompressedDigestsByTOC[tocDigest]; ok && previous != uncompressed { + logrus.Warnf("Uncompressed digest for blob with TOC %q previously recorded as %q, now %q", tocDigest, previous, uncompressed) + } + mem.uncompressedDigestsByTOC[tocDigest] = uncompressed +} + // RecordKnownLocation records that a blob with the specified digest exists within the specified (transport, scope) scope, // and can be reused given the opaque location data. func (mem *cache) RecordKnownLocation(transport types.ImageTransport, scope types.BICTransportScope, blobDigest digest.Digest, location types.BICLocationReference) { diff --git a/pkg/blobinfocache/none/none.go b/pkg/blobinfocache/none/none.go index 4b7122f92..d1ba71f62 100644 --- a/pkg/blobinfocache/none/none.go +++ b/pkg/blobinfocache/none/none.go @@ -34,6 +34,19 @@ func (noCache) UncompressedDigest(anyDigest digest.Digest) digest.Digest { func (noCache) RecordDigestUncompressedPair(anyDigest digest.Digest, uncompressed digest.Digest) { } +// UncompressedDigestForTOC returns an uncompressed digest corresponding to anyDigest. +// Returns "" if the uncompressed digest is unknown. +func (noCache) UncompressedDigestForTOC(tocDigest digest.Digest) digest.Digest { + return "" +} + +// RecordDigestUncompressedPair records that the tocDigest corresponds to uncompressed. +// WARNING: Only call this for LOCALLY VERIFIED data; don’t record a digest pair just because some remote author claims so (e.g. +// because a manifest/config pair exists); otherwise the cache could be poisoned and allow substituting unexpected blobs. +// (Eventually, the DiffIDs in image config could detect the substitution, but that may be too late, and not all image formats contain that data.) +func (noCache) RecordTOCUncompressedPair(tocDigest digest.Digest, uncompressed digest.Digest) { +} + // RecordKnownLocation records that a blob with the specified digest exists within the specified (transport, scope) scope, // and can be reused given the opaque location data. func (noCache) RecordKnownLocation(transport types.ImageTransport, scope types.BICTransportScope, blobDigest digest.Digest, location types.BICLocationReference) { diff --git a/pkg/blobinfocache/sqlite/sqlite.go b/pkg/blobinfocache/sqlite/sqlite.go index a5be85a65..081fd611d 100644 --- a/pkg/blobinfocache/sqlite/sqlite.go +++ b/pkg/blobinfocache/sqlite/sqlite.go @@ -295,6 +295,14 @@ func ensureDBHasCurrentSchema(db *sql.DB) error { `PRIMARY KEY (transport, scope, digest, location) )`, }, + { + "DigestTOCUncompressedPairs", + `CREATE TABLE IF NOT EXISTS DigestTOCUncompressedPairs(` + + // index implied by PRIMARY KEY + `tocDigest TEXT PRIMARY KEY NOT NULL,` + + `uncompressedDigest TEXT NOT NULL + )`, + }, } _, err := dbTransaction(db, func(tx *sql.Tx) (void, error) { @@ -385,6 +393,57 @@ func (sqc *cache) RecordDigestUncompressedPair(anyDigest digest.Digest, uncompre }) // FIXME? Log error (but throttle the log volume on repeated accesses)? } +// UncompressedDigestForTOC returns an uncompressed digest corresponding to anyDigest. +// Returns "" if the uncompressed digest is unknown. +func (sqc *cache) UncompressedDigestForTOC(tocDigest digest.Digest) digest.Digest { + res, err := transaction(sqc, func(tx *sql.Tx) (digest.Digest, error) { + uncompressedString, found, err := querySingleValue[string](tx, "SELECT uncompressedDigest FROM DigestTOCUncompressedPairs WHERE tocDigest = ?", tocDigest.String()) + if err != nil { + return "", err + } + if found { + d, err := digest.Parse(uncompressedString) + if err != nil { + return "", err + } + return d, nil + + } + return "", nil + }) + if err != nil { + return "" // FIXME? Log err (but throttle the log volume on repeated accesses)? + } + return res +} + +// RecordDigestUncompressedPair records that the tocDigest corresponds to uncompressed. +// WARNING: Only call this for LOCALLY VERIFIED data; don’t record a digest pair just because some remote author claims so (e.g. +// because a manifest/config pair exists); otherwise the cache could be poisoned and allow substituting unexpected blobs. +// (Eventually, the DiffIDs in image config could detect the substitution, but that may be too late, and not all image formats contain that data.) +func (sqc *cache) RecordTOCUncompressedPair(tocDigest digest.Digest, uncompressed digest.Digest) { + _, _ = transaction(sqc, func(tx *sql.Tx) (void, error) { + previousString, gotPrevious, err := querySingleValue[string](tx, "SELECT uncompressedDigest FROM DigestTOCUncompressedPairs WHERE tocDigest = ?", tocDigest.String()) + if err != nil { + return void{}, fmt.Errorf("looking for uncompressed digest for blob with TOC %q", tocDigest) + } + if gotPrevious { + previous, err := digest.Parse(previousString) + if err != nil { + return void{}, err + } + if previous != uncompressed { + logrus.Warnf("Uncompressed digest for blob with TOC %q previously recorded as %q, now %q", tocDigest, previous, uncompressed) + } + } + if _, err := tx.Exec("INSERT OR REPLACE INTO DigestTOCUncompressedPairs(tocDigest, uncompressedDigest) VALUES (?, ?)", + tocDigest.String(), uncompressed.String()); err != nil { + return void{}, fmt.Errorf("recording uncompressed digest %q for blob with TOC %q: %w", uncompressed, tocDigest, err) + } + return void{}, nil + }) // FIXME? Log error (but throttle the log volume on repeated accesses)? +} + // RecordKnownLocation records that a blob with the specified digest exists within the specified (transport, scope) scope, // and can be reused given the opaque location data. func (sqc *cache) RecordKnownLocation(transport types.ImageTransport, scope types.BICTransportScope, digest digest.Digest, location types.BICLocationReference) { diff --git a/storage/storage_dest.go b/storage/storage_dest.go index a0b347410..01b8ea652 100644 --- a/storage/storage_dest.go +++ b/storage/storage_dest.go @@ -84,18 +84,25 @@ type storageImageDestinationLockProtected struct { currentIndex int // The index of the layer to be committed (i.e., lower indices have already been committed) indexToAddedLayerInfo map[int]addedLayerInfo // Mapping from layer (by index) to blob to add to the image - // In general, a layer is identified either by (compressed) digest, or by TOC digest. + // In general, a layer is identified either by (compressed) digest, or by TOC digest + // (and we assume the TOC digest also uniquely identifies the contents, i.e. there aren’t two + // different formats/ways to parse a single TOC). // When creating a layer, the c/storage layer metadata and image IDs must _only_ be based on trusted values // we have computed ourselves. (Layer reuse can then look up against such trusted values, but it might not - // recompute those values for incomding layers — the point of the reuse is that we don’t need to consume the incoming layer.) - - // Layer identification: For a layer, at least one of indexToTOCDigest and blobDiffIDs must be available before commitLayer is called. - // The presence of an indexToTOCDigest is what decides how the layer is identified, i.e. which fields must be trusted. + // recompute those values for incoming layers — the point of the reuse is that we don’t need to consume the incoming layer.) + // + // Layer identification: For a layer, at least one of (indexToDiffID, indexToTOCDigest, blobDiffIDs) must be available + // before commitLayer is called. + // Layer is identified by the first of the three fields which exists, in that order (and the value must be trusted). + // + // Ideally we wouldn’t have blobDiffIDs, and just keep records by index, but the public API does not require the caller to provide layer + // indices; and configs don’t have layer indices. blobDiffIDs needs to exist for those cases. + indexToDiffID map[int]digest.Digest // Mapping from layer index to DiffID; exclusive with indexToTOCDigest + indexToTOCDigest map[int]digest.Digest // Mapping from layer index to a TOC Digest, IFF the layer was created/found/reused by TOC digest; exclusive with indexToDiffID blobDiffIDs map[digest.Digest]digest.Digest // Mapping from layer blobsums to their corresponding DiffIDs - indexToTOCDigest map[int]digest.Digest // Mapping from layer index to a TOC Digest, IFF the layer was created/found/reused by TOC digest // Layer data: Before commitLayer is called, either at least one of (diffOutputs, indexToAdditionalLayer, filenames) - // should be available; or indexToTOCDigest/blobDiffIDs should be enough to locate an existing c/storage layer. + // should be available; or indexToDiffID/indexToTOCDigest/blobDiffIDs should be enough to locate an existing c/storage layer. // They are looked up in the order they are mentioned above. diffOutputs map[int]*graphdriver.DriverWithDifferOutput // Mapping from layer index to a partially-pulled layer intermediate data indexToAdditionalLayer map[int]storage.AdditionalLayer // Mapping from layer index to their corresponding additional layer @@ -145,9 +152,12 @@ func newImageDestination(sys *types.SystemContext, imageRef storageReference) (* }, indexToStorageID: make(map[int]string), lockProtected: storageImageDestinationLockProtected{ - indexToAddedLayerInfo: make(map[int]addedLayerInfo), - blobDiffIDs: make(map[digest.Digest]digest.Digest), - indexToTOCDigest: make(map[int]digest.Digest), + indexToAddedLayerInfo: make(map[int]addedLayerInfo), + + indexToDiffID: make(map[int]digest.Digest), + indexToTOCDigest: make(map[int]digest.Digest), + blobDiffIDs: make(map[digest.Digest]digest.Digest), + diffOutputs: make(map[int]*graphdriver.DriverWithDifferOutput), indexToAdditionalLayer: make(map[int]storage.AdditionalLayer), filenames: make(map[digest.Digest]string), @@ -322,15 +332,26 @@ func (s *storageImageDestination) PutBlobPartial(ctx context.Context, chunkAcces blobDigest := srcInfo.Digest s.lock.Lock() - if out.UncompressedDigest != "" { + if out.UncompressedDigest != "" { // UncompressedDigest was computed by ApplyDiffWithDiffer , i.e. it has consumed _all_ of the blob (probably converting it) + s.lockProtected.indexToDiffID[options.LayerIndex] = out.UncompressedDigest + if out.TOCDigest != "" { + options.Cache.RecordTOCUncompressedPair(out.TOCDigest, out.UncompressedDigest) + } + // Don’t set indexToTOCDigest on this path: + // - Using UncompressedDigest allows image reuse with non-partially-pulled layers + // - If UncompressedDigest has been computed, that means the layer was read completely, and the TOC has been created from scratch. + // That TOC is quite unlikely to match with any other TOC value. + // // The computation of UncompressedDigest means the whole layer has been consumed; while doing that, chunked.GetDiffer is // responsible for ensuring blobDigest has been validated. + // So, record also information about blobDigest, that might benefit reuse. s.lockProtected.blobDiffIDs[blobDigest] = out.UncompressedDigest + options.Cache.RecordDigestUncompressedPair(blobDigest, out.UncompressedDigest) } else { - // Don’t identify layers by TOC if UncompressedDigest is available. - // - Using UncompressedDigest allows image reuse with non-partially-pulled layers - // - If UncompressedDigest has been computed, that means the layer was read completely, and the TOC has been created from scratch. - // That TOC is quite unlikely to match with any other TOC value. + // Use diffID for layer identity if it is known. + if uncompressedDigest := options.Cache.UncompressedDigestForTOC(out.TOCDigest); uncompressedDigest != "" { + s.lockProtected.indexToDiffID[options.LayerIndex] = uncompressedDigest + } s.lockProtected.indexToTOCDigest[options.LayerIndex] = out.TOCDigest } s.lockProtected.diffOutputs[options.LayerIndex] = out @@ -459,49 +480,40 @@ func (s *storageImageDestination) tryReusingBlobAsPending(blobDigest digest.Dige if err != nil && !errors.Is(err, storage.ErrLayerUnknown) { return false, private.ReusedBlob{}, fmt.Errorf(`looking for layers with digest %q: %w`, uncompressedDigest, err) } - if len(layers) > 0 { - if size != -1 { - s.lockProtected.blobDiffIDs[blobDigest] = uncompressedDigest - return true, private.ReusedBlob{ - Digest: blobDigest, - Size: size, - }, nil - } - if !options.CanSubstitute { - return false, private.ReusedBlob{}, fmt.Errorf("Internal error: options.CanSubstitute was expected to be true for blob with digest %s", blobDigest) - } - s.lockProtected.blobDiffIDs[uncompressedDigest] = uncompressedDigest - return true, private.ReusedBlob{ - Digest: uncompressedDigest, - Size: layers[0].UncompressedSize, - }, nil + if found, reused := reusedBlobFromLayerLookupLocked(layers, blobDigest, size, options); found { + s.lockProtected.blobDiffIDs[blobDigest] = uncompressedDigest + return true, reused, nil } } } if options.TOCDigest != "" && options.LayerIndex != nil { + // Check if we know which which UncompressedDigest the TOC digest resolves to, and we have a match for that. + // Prefer this over LayersByTOCDigest because we can identify the layer using UncompressedDigest, maximizing reuse. + uncompressedDigest := options.Cache.UncompressedDigestForTOC(options.TOCDigest) + if uncompressedDigest != "" { + layers, err = s.imageRef.transport.store.LayersByUncompressedDigest(uncompressedDigest) + if err != nil && !errors.Is(err, storage.ErrLayerUnknown) { + return false, private.ReusedBlob{}, fmt.Errorf(`looking for layers with digest %q: %w`, uncompressedDigest, err) + } + if found, reused := reusedBlobFromLayerLookupLocked(layers, blobDigest, size, options); found { + s.lockProtected.indexToDiffID[*options.LayerIndex] = uncompressedDigest + reused.MatchedByTOCDigest = true + return true, reused, nil + } + } // Check if we have a chunked layer in storage with the same TOC digest. layers, err := s.imageRef.transport.store.LayersByTOCDigest(options.TOCDigest) - if err != nil && !errors.Is(err, storage.ErrLayerUnknown) { return false, private.ReusedBlob{}, fmt.Errorf(`looking for layers with TOC digest %q: %w`, options.TOCDigest, err) } - if len(layers) > 0 { - if size != -1 { - s.lockProtected.indexToTOCDigest[*options.LayerIndex] = options.TOCDigest - return true, private.ReusedBlob{ - Digest: blobDigest, - Size: size, - MatchedByTOCDigest: true, - }, nil - } else if options.CanSubstitute && layers[0].UncompressedDigest != "" { - s.lockProtected.indexToTOCDigest[*options.LayerIndex] = options.TOCDigest - return true, private.ReusedBlob{ - Digest: layers[0].UncompressedDigest, - Size: layers[0].UncompressedSize, - MatchedByTOCDigest: true, - }, nil + if found, reused := reusedBlobFromLayerLookupLocked(layers, blobDigest, size, options); found { + if uncompressedDigest != "" { + s.lockProtected.indexToDiffID[*options.LayerIndex] = uncompressedDigest } + s.lockProtected.indexToTOCDigest[*options.LayerIndex] = options.TOCDigest + reused.MatchedByTOCDigest = true + return true, reused, nil } } @@ -509,6 +521,26 @@ func (s *storageImageDestination) tryReusingBlobAsPending(blobDigest digest.Dige return false, private.ReusedBlob{}, nil } +// reusedBlobFromLayerLookupLocked returns (true, ReusedBlob) if layers contain a usable match; or (false, ...) if not. +// The caller is still responsible for setting the layer identification fields, to allow the layer to be found again. +// The caller must hold s.lock. +func reusedBlobFromLayerLookupLocked(layers []storage.Layer, blobDigest digest.Digest, blobSize int64, options *private.TryReusingBlobOptions) (bool, private.ReusedBlob) { + if len(layers) > 0 { + if blobSize != -1 { + return true, private.ReusedBlob{ + Digest: blobDigest, + Size: blobSize, + } + } else if options.CanSubstitute && layers[0].UncompressedDigest != "" { + return true, private.ReusedBlob{ + Digest: layers[0].UncompressedDigest, + Size: layers[0].UncompressedSize, + } + } + } + return false, private.ReusedBlob{} +} + // computeID computes a recommended image ID based on information we have so far. If // the manifest is not of a type that we recognize, we return an empty value, indicating // that since we don't have a recommendation, a random ID should be used if one needs @@ -566,7 +598,7 @@ func (s *storageImageDestination) computeID(m manifest.Manifest) string { hasLayerPulledByTOC := false for i := range m.LayerInfos() { layerValue := "" // An empty string is not a valid digest, so this is unambiguous with the TOC case. - tocDigest, ok := s.lockProtected.indexToTOCDigest[i] // "" if not a TOC + tocDigest, ok := s.lockProtected.indexToTOCDigest[i] // "" if not a TOC, incl. the case where indexToDiffID is set if ok { hasLayerPulledByTOC = true layerValue = tocDigest.String() @@ -662,15 +694,21 @@ func (s *storageImageDestination) queueOrCommit(index int, info addedLayerInfo) // and an indication whether the input already has the shape of a layer ID. // It returns ("", false) if the layer is not found at all (which should never happen) func (s *storageImageDestination) singleLayerIDComponent(layerIndex int, blobDigest digest.Digest) (string, bool) { + diffIDcomponent := func(d digest.Digest) string { + return d.Encoded() // This looks like chain IDs, and it uses the traditional value. + } + s.lock.Lock() defer s.lock.Unlock() + if d, found := s.lockProtected.indexToDiffID[layerIndex]; found { + return diffIDcomponent(d), true + } if d, found := s.lockProtected.indexToTOCDigest[layerIndex]; found { return "@TOC=" + d.Encoded(), false // "@" is not a valid start of a digest.Digest, so this is unambiguous. } - if d, found := s.lockProtected.blobDiffIDs[blobDigest]; found { - return d.Encoded(), true // This looks like chain IDs, and it uses the traditional value. + return diffIDcomponent(d), true } return "", false } @@ -772,6 +810,13 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D diffOutput, ok := s.lockProtected.diffOutputs[index] s.lock.Unlock() if ok { + s.lock.Lock() + trustedUncompressedDigest, ok := s.lockProtected.indexToDiffID[index] + s.lock.Unlock() + if ok { + diffOutput.UncompressedDigest = trustedUncompressedDigest + } + var untrustedUncompressedDigest digest.Digest if diffOutput.UncompressedDigest == "" { d, err := s.untrustedLayerDiffID(index) @@ -828,8 +873,13 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D // then we need to read the desired contents from a layer. var trustedUncompressedDigest, trustedOriginalDigest digest.Digest // For storage.LayerOptions s.lock.Lock() - tocDigest := s.lockProtected.indexToTOCDigest[index] // "" if not set - optionalDiffID := s.lockProtected.blobDiffIDs[layerDigest] // "" if not set + tocDigest := s.lockProtected.indexToTOCDigest[index] // "" if not set + // We look up optionalDiffID in blobDiffIDs first, contrary to singleLayerIDComponent, to bias towards + // setting layerDigestIsTrusted to true. + optionalDiffID, layerDigestIsTrusted := s.lockProtected.blobDiffIDs[layerDigest] + if !layerDigestIsTrusted { + optionalDiffID = s.lockProtected.indexToDiffID[index] // "" if not set + } filename, gotFilename := s.lockProtected.filenames[layerDigest] s.lock.Unlock() if gotFilename && tocDigest == "" { @@ -845,27 +895,31 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D } else { // Try to find the layer with contents matching the data we use. var layer *storage.Layer // = nil - if tocDigest != "" { - layers, err2 := s.imageRef.transport.store.LayersByTOCDigest(tocDigest) - if err2 == nil && len(layers) > 0 { + if optionalDiffID != "" { + if layers, err2 := s.imageRef.transport.store.LayersByUncompressedDigest(optionalDiffID); err2 == nil && len(layers) > 0 { layer = &layers[0] - } else { - return nil, fmt.Errorf("locating layer for TOC digest %q: %w", tocDigest, err2) } - } else { - // Because tocDigest == "", optionaldiffID must have been set - layers, err2 := s.imageRef.transport.store.LayersByUncompressedDigest(optionalDiffID) - if err2 == nil && len(layers) > 0 { - layer = &layers[0] + } + if layer == nil { + if tocDigest != "" { + if layers, err2 := s.imageRef.transport.store.LayersByTOCDigest(tocDigest); err2 == nil && len(layers) > 0 { + layer = &layers[0] + } else { + return nil, fmt.Errorf("locating layer for TOC digest %q (diffID %q): %w", tocDigest, optionalDiffID, err2) + } } else { - layers, err2 = s.imageRef.transport.store.LayersByCompressedDigest(layerDigest) - if err2 == nil && len(layers) > 0 { + // Because tocDigest == "", optionaldiffID must have been set. + if !layerDigestIsTrusted { + // We can get here if we got optionalDiffID from a TOC lookup in BlobInfoCache, and we originally found a layer + // … and that layer is no longer available. + return nil, fmt.Errorf("layer for diffID %q is no longer available", optionalDiffID) + } + if layers, err2 := s.imageRef.transport.store.LayersByCompressedDigest(layerDigest); err2 == nil && len(layers) > 0 { layer = &layers[0] + } else { + return nil, fmt.Errorf("locating layer for blob %q (diffID %q): %w", layerDigest, optionalDiffID, err2) } } - if layer == nil { - return nil, fmt.Errorf("locating layer for blob %q: %w", layerDigest, err2) - } } // Read the layer's contents. noCompression := archive.Uncompressed @@ -874,7 +928,7 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D } diff, err2 := s.imageRef.transport.store.Diff("", layer.ID, diffOptions) if err2 != nil { - return nil, fmt.Errorf("reading layer %q for blob %q: %w", layer.ID, layerDigest, err2) + return nil, fmt.Errorf("reading layer %q for blob %q/%q/%q: %w", layer.ID, layerDigest, tocDigest, optionalDiffID, err2) } // Copy the layer diff to a file. Diff() takes a lock that it holds // until the ReadCloser that it returns is closed, and PutLayer() wants @@ -902,7 +956,7 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D // The stream we have is uncompressed, this matches contents of the stream. // If tocDigest != "", trustedUncompressedDigest might still be ""; in that case PutLayer will compute the value from the stream. trustedUncompressedDigest = optionalDiffID - // FIXME? trustedOriginalDigest could be set to layerDigest IF tocDigest == "" (otherwise layerDigest is untrusted). + // FIXME? trustedOriginalDigest could be set to layerDigest IF layerDigestIsTrusted. // But for c/storage to reasonably use it (as a CompressedDigest value), we should also ensure the CompressedSize of the created // layer is correct, and the API does not currently make it possible (.CompressedSize is set from the input stream). // @@ -938,7 +992,7 @@ func (s *storageImageDestination) createNewLayer(index int, layerDigest digest.D UncompressedDigest: trustedUncompressedDigest, }, file) if err != nil && !errors.Is(err, storage.ErrDuplicateID) { - return nil, fmt.Errorf("adding layer with blob %q: %w", layerDigest, err) + return nil, fmt.Errorf("adding layer with blob %q/%q/%q: %w", layerDigest, tocDigest, optionalDiffID, err) } return layer, nil }