From 53183a7b73008d5eb89825f8aa167f5c0a82b588 Mon Sep 17 00:00:00 2001 From: Aditya R Date: Thu, 1 Sep 2022 11:11:47 +0530 Subject: [PATCH] docker, BlobInfoCache: try to reuse compressed blobs when pushing across registries It seems we try to reuse blobs only for the specified registry, however we can have valid known compressed digests across registry as well following pr attempts to use that by doing following steps. * `CandidateLocations2` now processes all known blobs and appends them to returned candidates at the lowest priority. As a result when `TryReusingBlob` tries to process these candidates and if the blobs filtered by the `Opaque` set by the `transport` fail to match then attempt is made against all known blobs (ones which do not belong to the current registry). * Increase the sample set of potential blob reuse to all known compressed digests , also involving the one which do not belong to current registry. * If a blob is found match it against the registry where we are attempting to push. If blob is already there consider it a `CACHE HIT!` and reply skipping blob, since its already there. How to verify this ? * Remove all images `buildah rmi --all` // needed so all new blobs can be tagged again in common bucket * Remove any previous `blob-info-cache` by ```console rm /home//.local/share/containers/cache/blob-info-cache-v1.boltdb ``` ```console $ skopeo copy docker://registry.fedoraproject.org/fedora-minimal docker://quay.io/fl/test:some-tag $ buildah pull registry.fedoraproject.org/fedora-minimal $ buildah tag registry.fedoraproject.org/fedora-minimal quay.io/fl/test $ buildah push quay.io/fl/test ``` ```console Getting image source signatures Copying blob a3497ca15bbf skipped: already exists Copying config f7e02de757 done Writing manifest to image destination Storing signatures ``` Signed-off-by: Aditya R --- docker/docker_image_dest.go | 4 +- pkg/blobinfocache/boltdb/boltdb.go | 62 +++++++++++++++++++++++++++++- 2 files changed, 64 insertions(+), 2 deletions(-) diff --git a/docker/docker_image_dest.go b/docker/docker_image_dest.go index 44b45c472c..044feb6eed 100644 --- a/docker/docker_image_dest.go +++ b/docker/docker_image_dest.go @@ -346,7 +346,9 @@ func (d *dockerImageDestination) TryReusingBlobWithOptions(ctx context.Context, // Sanity checks: if reference.Domain(candidateRepo) != reference.Domain(d.ref.ref) { logrus.Debugf("... Internal error: domain %s does not match destination %s", reference.Domain(candidateRepo), reference.Domain(d.ref.ref)) - continue + // found following blob in different registry but we need to check blob presence against the registry + // where we are planning to push, hence switch back the candidate repo to the one where we are planning to push + candidateRepo, _ = parseBICLocationReference(types.BICLocationReference{Opaque: string(d.ref.ref.Name())}) } if candidateRepo.Name() == d.ref.ref.Name() && candidate.Digest == info.Digest { logrus.Debug("... Already tried the primary destination") diff --git a/pkg/blobinfocache/boltdb/boltdb.go b/pkg/blobinfocache/boltdb/boltdb.go index a472efd95b..5c85502ce9 100644 --- a/pkg/blobinfocache/boltdb/boltdb.go +++ b/pkg/blobinfocache/boltdb/boltdb.go @@ -325,7 +325,67 @@ func (bdc *cache) appendReplacementCandidates(candidates []prioritize.CandidateW // data from previous RecordDigestUncompressedPair calls is used to also look up variants of the blob which have the same // uncompressed digest. func (bdc *cache) CandidateLocations2(transport types.ImageTransport, scope types.BICTransportScope, primaryDigest digest.Digest, canSubstitute bool) []blobinfocache.BICReplacementCandidate2 { - return bdc.candidateLocations(transport, scope, primaryDigest, canSubstitute, true) + resAllBlobs := []prioritize.CandidateWithTime{} + var uncompressedDigestValue digest.Digest // = "" + requireCompressionInfo := true + resultPrioritized := bdc.candidateLocations(transport, scope, primaryDigest, canSubstitute, requireCompressionInfo) + // Reprocess all blobs and append them to resultPrioritized in lower priority + if err := bdc.view(func(tx *bolt.Tx) error { + scopeBucket := tx.Bucket(knownLocationsBucket) + if scopeBucket == nil { + return nil + } + scopeBucket = scopeBucket.Bucket([]byte(transport.Name())) + if scopeBucket == nil { + return nil + } + if err := scopeBucket.ForEach(func(name []byte, v []byte) error { + bucket := scopeBucket.Bucket(name) + // compressionBucket won't have been created if previous writers never recorded info about compression, + // and we don't want to fail just because of that + compressionBucket := tx.Bucket(digestCompressorBucket) + + resAllBlobs = append(resAllBlobs, bdc.appendReplacementCandidates(resAllBlobs, bucket, compressionBucket, primaryDigest, requireCompressionInfo)...) + if canSubstitute { + if uncompressedDigestValue = bdc.uncompressedDigest(tx, primaryDigest); uncompressedDigestValue != "" { + b := tx.Bucket(digestByUncompressedBucket) + if b != nil { + b = b.Bucket([]byte(uncompressedDigestValue.String())) + if b != nil { + if err := b.ForEach(func(k, _ []byte) error { + d, err := digest.Parse(string(k)) + if err != nil { + return err + } + if d != primaryDigest && d != uncompressedDigestValue { + resAllBlobs = append(resAllBlobs, bdc.appendReplacementCandidates(resAllBlobs, bucket, compressionBucket, d, requireCompressionInfo)...) + } + return nil + }); err != nil { + return err + } + } + } + if uncompressedDigestValue != primaryDigest { + resAllBlobs = append(resAllBlobs, bdc.appendReplacementCandidates(resAllBlobs, bucket, compressionBucket, uncompressedDigestValue, requireCompressionInfo)...) + } + } + } + return nil + }); err != nil { + return err + } + return nil + }); err != nil { + return []blobinfocache.BICReplacementCandidate2{} + } + + resultPrioritizedAllBlobs := prioritize.DestructivelyPrioritizeReplacementCandidates(resAllBlobs, primaryDigest, uncompressedDigestValue) + // Append `resultPrioritizedAllBlobs` after blobs + // which are generated from bucket with provided `scope` + // as a result `resultPrioritizedAllBlobs` will always + // get lower priority while processing candidates. + return append(resultPrioritized, resultPrioritizedAllBlobs...) } func (bdc *cache) candidateLocations(transport types.ImageTransport, scope types.BICTransportScope, primaryDigest digest.Digest, canSubstitute, requireCompressionInfo bool) []blobinfocache.BICReplacementCandidate2 {