Skip to content

Commit

Permalink
Merge pull request #1533 from mtrmac/uncompressed-reuse
Browse files Browse the repository at this point in the history
Fix unwanted reuse of encrypted layers
  • Loading branch information
rhatdan committed May 1, 2022
2 parents f01b06f + d1d16eb commit 7f8c6e0
Showing 1 changed file with 42 additions and 24 deletions.
66 changes: 42 additions & 24 deletions copy/copy.go
Expand Up @@ -711,8 +711,6 @@ func (c *copier) copyOneImage(ctx context.Context, policyContext *signature.Poli

// If src.UpdatedImageNeedsLayerDiffIDs(ic.manifestUpdates) will be true, it needs to be true by the time we get here.
ic.diffIDsAreNeeded = src.UpdatedImageNeedsLayerDiffIDs(*ic.manifestUpdates)
// If encrypted and decryption keys provided, we should try to decrypt
ic.diffIDsAreNeeded = ic.diffIDsAreNeeded || (isEncrypted(src) && ic.c.ociDecryptConfig != nil) || ic.c.ociEncryptConfig != nil

// If enabled, fetch and compare the destination's manifest. And as an optimization skip updating the destination iff equal
if options.OptimizeDestinationImageAlreadyExists {
Expand Down Expand Up @@ -1139,11 +1137,15 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to
}

cachedDiffID := ic.c.blobInfoCache.UncompressedDigest(srcInfo.Digest) // May be ""
// Diffs are needed if we are encrypting an image or trying to decrypt an image
diffIDIsNeeded := ic.diffIDsAreNeeded && cachedDiffID == "" || toEncrypt || (isOciEncrypted(srcInfo.MediaType) && ic.c.ociDecryptConfig != nil)

// If we already have the blob, and we don't need to compute the diffID, then we don't need to read it from the source.
if !diffIDIsNeeded {
diffIDIsNeeded := ic.diffIDsAreNeeded && cachedDiffID == ""
// When encrypting to decrypting, only use the simple code path. We might be able to optimize more
// (e.g. if we know the DiffID of an encrypted compressed layer, it might not be necessary to pull, decrypt and decompress again),
// but it’s not trivially safe to do such things, so until someone takes the effort to make a comprehensive argument, let’s not.
encryptingOrDecrypting := toEncrypt || (isOciEncrypted(srcInfo.MediaType) && ic.c.ociDecryptConfig != nil)
canAvoidProcessingCompleteLayer := !diffIDIsNeeded && !encryptingOrDecrypting

// Don’t read the layer from the source if we already have the blob, and optimizations are acceptable.
if canAvoidProcessingCompleteLayer {
// TODO: at this point we don't know whether or not a blob we end up reusing is compressed using an algorithm
// that is acceptable for use on layers in the manifest that we'll be writing later, so if we end up reusing
// a blob that's compressed with e.g. zstd, but we're only allowed to write a v2s2 manifest, this will cause
Expand Down Expand Up @@ -1196,7 +1198,7 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to
// of the source file are not known yet and must be fetched.
// Attempt a partial only when the source allows to retrieve a blob partially and
// the destination has support for it.
if ic.c.rawSource.SupportsGetBlobAt() && ic.c.dest.SupportsPutBlobPartial() && !diffIDIsNeeded {
if canAvoidProcessingCompleteLayer && ic.c.rawSource.SupportsGetBlobAt() && ic.c.dest.SupportsPutBlobPartial() {
if reused, blobInfo := func() (bool, types.BlobInfo) { // A scope for defer
bar := ic.c.createProgressBar(pool, true, srcInfo, "blob", "done")
hideProgressBar := true
Expand Down Expand Up @@ -1251,9 +1253,17 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to
return types.BlobInfo{}, "", errors.Wrap(diffIDResult.err, "computing layer DiffID")
}
logrus.Debugf("Computed DiffID %s for layer %s", diffIDResult.digest, srcInfo.Digest)
// This is safe because we have just computed diffIDResult.Digest ourselves, and in the process
// we have read all of the input blob, so srcInfo.Digest must have been validated by digestingReader.
ic.c.blobInfoCache.RecordDigestUncompressedPair(srcInfo.Digest, diffIDResult.digest)
// Don’t record any associations that involve encrypted data. This is a bit crude,
// some blob substitutions (replacing pulls of encrypted data with local reuse of known decryption outcomes)
// might be safe, but it’s not trivially obvious, so let’s be conservative for now.
// This crude approach also means we don’t need to record whether a blob is encrypted
// in the blob info cache (which would probably be necessary for any more complex logic),
// and the simplicity is attractive.
if !encryptingOrDecrypting {
// This is safe because we have just computed diffIDResult.Digest ourselves, and in the process
// we have read all of the input blob, so srcInfo.Digest must have been validated by digestingReader.
ic.c.blobInfoCache.RecordDigestUncompressedPair(srcInfo.Digest, diffIDResult.digest)
}
diffID = diffIDResult.digest
}
}
Expand Down Expand Up @@ -1602,19 +1612,27 @@ func (c *copier) copyBlobFromStream(ctx context.Context, srcStream io.Reader, sr
return types.BlobInfo{}, errors.Errorf("Internal error writing blob %s, blob with digest %s saved with digest %s", srcInfo.Digest, inputInfo.Digest, uploadedInfo.Digest)
}
if digestingReader.validationSucceeded {
// If compressionOperation != types.PreserveOriginal, we now have two reliable digest values:
// srcinfo.Digest describes the pre-compressionOperation input, verified by digestingReader
// uploadedInfo.Digest describes the post-compressionOperation output, computed by PutBlob
// (because inputInfo.Digest == "", this must have been computed afresh).
switch compressionOperation {
case types.PreserveOriginal:
break // Do nothing, we have only one digest and we might not have even verified it.
case types.Compress:
c.blobInfoCache.RecordDigestUncompressedPair(uploadedInfo.Digest, srcInfo.Digest)
case types.Decompress:
c.blobInfoCache.RecordDigestUncompressedPair(srcInfo.Digest, uploadedInfo.Digest)
default:
return types.BlobInfo{}, errors.Errorf("Internal error: Unexpected compressionOperation value %#v", compressionOperation)
// Don’t record any associations that involve encrypted data. This is a bit crude,
// some blob substitutions (replacing pulls of encrypted data with local reuse of known decryption outcomes)
// might be safe, but it’s not trivially obvious, so let’s be conservative for now.
// This crude approach also means we don’t need to record whether a blob is encrypted
// in the blob info cache (which would probably be necessary for any more complex logic),
// and the simplicity is attractive.
if !encrypted && !decrypted {
// If compressionOperation != types.PreserveOriginal, we now have two reliable digest values:
// srcinfo.Digest describes the pre-compressionOperation input, verified by digestingReader
// uploadedInfo.Digest describes the post-compressionOperation output, computed by PutBlob
// (because inputInfo.Digest == "", this must have been computed afresh).
switch compressionOperation {
case types.PreserveOriginal:
break // Do nothing, we have only one digest and we might not have even verified it.
case types.Compress:
c.blobInfoCache.RecordDigestUncompressedPair(uploadedInfo.Digest, srcInfo.Digest)
case types.Decompress:
c.blobInfoCache.RecordDigestUncompressedPair(srcInfo.Digest, uploadedInfo.Digest)
default:
return types.BlobInfo{}, errors.Errorf("Internal error: Unexpected compressionOperation value %#v", compressionOperation)
}
}
if uploadCompressorName != "" && uploadCompressorName != internalblobinfocache.UnknownCompression {
c.blobInfoCache.RecordDigestCompressorName(uploadedInfo.Digest, uploadCompressorName)
Expand Down

0 comments on commit 7f8c6e0

Please sign in to comment.