From 1cb66535b0e968098c9c29068272dbc6a8831590 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 27 Apr 2022 22:34:01 +0200 Subject: [PATCH 1/2] Don't require DiffID computation when encryption is involved MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AFAICS encryption doesn't _actually_ need diffIDs, at least I can't see any encryption-related use of the computed value; the boolean seems to only be set to avoid reuse (which risks unwanted reuse of plaintext data, or revealing relationships between ciphertext/plaintext to network observers). So, use two extra variables to be clear about the logic of the code. (Also reorder one condition to check a local variable first before calling methods, a microoptimizaion.) Should not change outcome of the operation (but the time and CPU usage no longer spent to unnecessarily compute DiffID values might be noticeable). Signed-off-by: Miloslav Trmač --- copy/copy.go | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/copy/copy.go b/copy/copy.go index 74da68eb3..74ab9db38 100644 --- a/copy/copy.go +++ b/copy/copy.go @@ -711,8 +711,6 @@ func (c *copier) copyOneImage(ctx context.Context, policyContext *signature.Poli // If src.UpdatedImageNeedsLayerDiffIDs(ic.manifestUpdates) will be true, it needs to be true by the time we get here. ic.diffIDsAreNeeded = src.UpdatedImageNeedsLayerDiffIDs(*ic.manifestUpdates) - // If encrypted and decryption keys provided, we should try to decrypt - ic.diffIDsAreNeeded = ic.diffIDsAreNeeded || (isEncrypted(src) && ic.c.ociDecryptConfig != nil) || ic.c.ociEncryptConfig != nil // If enabled, fetch and compare the destination's manifest. And as an optimization skip updating the destination iff equal if options.OptimizeDestinationImageAlreadyExists { @@ -1139,11 +1137,15 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to } cachedDiffID := ic.c.blobInfoCache.UncompressedDigest(srcInfo.Digest) // May be "" - // Diffs are needed if we are encrypting an image or trying to decrypt an image - diffIDIsNeeded := ic.diffIDsAreNeeded && cachedDiffID == "" || toEncrypt || (isOciEncrypted(srcInfo.MediaType) && ic.c.ociDecryptConfig != nil) - - // If we already have the blob, and we don't need to compute the diffID, then we don't need to read it from the source. - if !diffIDIsNeeded { + diffIDIsNeeded := ic.diffIDsAreNeeded && cachedDiffID == "" + // When encrypting to decrypting, only use the simple code path. We might be able to optimize more + // (e.g. if we know the DiffID of an encrypted compressed layer, it might not be necessary to pull, decrypt and decompress again), + // but it’s not trivially safe to do such things, so until someone takes the effort to make a comprehensive argument, let’s not. + encryptingOrDecrypting := toEncrypt || (isOciEncrypted(srcInfo.MediaType) && ic.c.ociDecryptConfig != nil) + canAvoidProcessingCompleteLayer := !diffIDIsNeeded && !encryptingOrDecrypting + + // Don’t read the layer from the source if we already have the blob, and optimizations are acceptable. + if canAvoidProcessingCompleteLayer { // TODO: at this point we don't know whether or not a blob we end up reusing is compressed using an algorithm // that is acceptable for use on layers in the manifest that we'll be writing later, so if we end up reusing // a blob that's compressed with e.g. zstd, but we're only allowed to write a v2s2 manifest, this will cause @@ -1196,7 +1198,7 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to // of the source file are not known yet and must be fetched. // Attempt a partial only when the source allows to retrieve a blob partially and // the destination has support for it. - if ic.c.rawSource.SupportsGetBlobAt() && ic.c.dest.SupportsPutBlobPartial() && !diffIDIsNeeded { + if canAvoidProcessingCompleteLayer && ic.c.rawSource.SupportsGetBlobAt() && ic.c.dest.SupportsPutBlobPartial() { if reused, blobInfo := func() (bool, types.BlobInfo) { // A scope for defer bar := ic.c.createProgressBar(pool, true, srcInfo, "blob", "done") hideProgressBar := true From d1d16ebe4929dc9b8dffe69c73c269709f88f511 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Miloslav=20Trma=C4=8D?= Date: Wed, 27 Apr 2022 22:45:01 +0200 Subject: [PATCH 2/2] Avoid calls to RecordDigestUncompressedPair that involve encrypted data MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Operations that involve encryption/decryption are already restricted e.g. not to use TryReusingBlob; but operations that don't themselves involve encryption could still find encrypted blobs in the blob info cache, and potentially use them in other contexts. To avoid that, use a somewhat big hammer of just not calling RecordDigestUncompressedPair on that. Note that this does not help if the blob info cache has already added such entries before this change; it only makes a difference for the future. We continue to call RecordKnownLocation with encrypted data; simple copies of encrypted images from one registry to another (which don't encrypt/decrypt as part of the copy) can benefit from e.g. cross-repo blob reuse just fine. It seems likely that a more precise logic which records more data and allows more blob reuse could be built, but it's not trivially obvious to me that it would be safe, so this change only does the conservative thing to avoid known breakage. There is another RecordDigestUncompressedPair call in c/image/storage; that one is safe, because it only works on a pair of unencrypted digests (for a compressed layer, PutBlobWithOptions receives an empty digest value, and a necessarily decrypted data stream; using that, it computes is own digests of the decrypted possibly-compressed and unencrypted uncommpressed data streams). Signed-off-by: Miloslav Trmač --- copy/copy.go | 48 ++++++++++++++++++++++++++++++++---------------- 1 file changed, 32 insertions(+), 16 deletions(-) diff --git a/copy/copy.go b/copy/copy.go index 74ab9db38..d28cc4a3f 100644 --- a/copy/copy.go +++ b/copy/copy.go @@ -1253,9 +1253,17 @@ func (ic *imageCopier) copyLayer(ctx context.Context, srcInfo types.BlobInfo, to return types.BlobInfo{}, "", errors.Wrap(diffIDResult.err, "computing layer DiffID") } logrus.Debugf("Computed DiffID %s for layer %s", diffIDResult.digest, srcInfo.Digest) - // This is safe because we have just computed diffIDResult.Digest ourselves, and in the process - // we have read all of the input blob, so srcInfo.Digest must have been validated by digestingReader. - ic.c.blobInfoCache.RecordDigestUncompressedPair(srcInfo.Digest, diffIDResult.digest) + // Don’t record any associations that involve encrypted data. This is a bit crude, + // some blob substitutions (replacing pulls of encrypted data with local reuse of known decryption outcomes) + // might be safe, but it’s not trivially obvious, so let’s be conservative for now. + // This crude approach also means we don’t need to record whether a blob is encrypted + // in the blob info cache (which would probably be necessary for any more complex logic), + // and the simplicity is attractive. + if !encryptingOrDecrypting { + // This is safe because we have just computed diffIDResult.Digest ourselves, and in the process + // we have read all of the input blob, so srcInfo.Digest must have been validated by digestingReader. + ic.c.blobInfoCache.RecordDigestUncompressedPair(srcInfo.Digest, diffIDResult.digest) + } diffID = diffIDResult.digest } } @@ -1604,19 +1612,27 @@ func (c *copier) copyBlobFromStream(ctx context.Context, srcStream io.Reader, sr return types.BlobInfo{}, errors.Errorf("Internal error writing blob %s, blob with digest %s saved with digest %s", srcInfo.Digest, inputInfo.Digest, uploadedInfo.Digest) } if digestingReader.validationSucceeded { - // If compressionOperation != types.PreserveOriginal, we now have two reliable digest values: - // srcinfo.Digest describes the pre-compressionOperation input, verified by digestingReader - // uploadedInfo.Digest describes the post-compressionOperation output, computed by PutBlob - // (because inputInfo.Digest == "", this must have been computed afresh). - switch compressionOperation { - case types.PreserveOriginal: - break // Do nothing, we have only one digest and we might not have even verified it. - case types.Compress: - c.blobInfoCache.RecordDigestUncompressedPair(uploadedInfo.Digest, srcInfo.Digest) - case types.Decompress: - c.blobInfoCache.RecordDigestUncompressedPair(srcInfo.Digest, uploadedInfo.Digest) - default: - return types.BlobInfo{}, errors.Errorf("Internal error: Unexpected compressionOperation value %#v", compressionOperation) + // Don’t record any associations that involve encrypted data. This is a bit crude, + // some blob substitutions (replacing pulls of encrypted data with local reuse of known decryption outcomes) + // might be safe, but it’s not trivially obvious, so let’s be conservative for now. + // This crude approach also means we don’t need to record whether a blob is encrypted + // in the blob info cache (which would probably be necessary for any more complex logic), + // and the simplicity is attractive. + if !encrypted && !decrypted { + // If compressionOperation != types.PreserveOriginal, we now have two reliable digest values: + // srcinfo.Digest describes the pre-compressionOperation input, verified by digestingReader + // uploadedInfo.Digest describes the post-compressionOperation output, computed by PutBlob + // (because inputInfo.Digest == "", this must have been computed afresh). + switch compressionOperation { + case types.PreserveOriginal: + break // Do nothing, we have only one digest and we might not have even verified it. + case types.Compress: + c.blobInfoCache.RecordDigestUncompressedPair(uploadedInfo.Digest, srcInfo.Digest) + case types.Decompress: + c.blobInfoCache.RecordDigestUncompressedPair(srcInfo.Digest, uploadedInfo.Digest) + default: + return types.BlobInfo{}, errors.Errorf("Internal error: Unexpected compressionOperation value %#v", compressionOperation) + } } if uploadCompressorName != "" && uploadCompressorName != internalblobinfocache.UnknownCompression { c.blobInfoCache.RecordDigestCompressorName(uploadedInfo.Digest, uploadCompressorName)