From 0fcbf47c4ef838206fcd8271844944cc0d97ba15 Mon Sep 17 00:00:00 2001 From: Aditya R Date: Thu, 1 Sep 2022 11:11:47 +0530 Subject: [PATCH] docker, BlobInfoCache: try to reuse compressed blobs when pushing across registries It seems we try to reuse blobs only for the specified registry, however we can have valid known compressed digests across registry as well following pr attempts to use that by doing following steps. * `CandidateLocations2` now processes all known blobs and appends them to returned candidates at the lowest priority. As a result when `TryReusingBlob` tries to process these candidates and if the blobs filtered by the `Opaque` set by the `transport` fail to match then attempt is made against all known blobs (ones which do not belong to the current registry). * Increase the sample set of potential blob reuse to all known compressed digests , also involving the one which do not belong to current registry. * If a blob is found match it against the registry where we are attempting to push. If blob is already there consider it a `CACHE HIT!` and reply skipping blob, since its already there. How to verify this ? * Remove all images `buildah rmi --all` // needed so all new blobs can be tagged again in common bucket * Remove any previous `blob-info-cache` by ```console rm /home//.local/share/containers/cache/blob-info-cache-v1.boltdb ``` ```console $ skopeo copy docker://registry.fedoraproject.org/fedora-minimal docker://quay.io/fl/test:some-tag $ buildah pull registry.fedoraproject.org/fedora-minimal $ buildah tag registry.fedoraproject.org/fedora-minimal quay.io/fl/test $ buildah push quay.io/fl/test ``` ```console Getting image source signatures Copying blob a3497ca15bbf skipped: already exists Copying config f7e02de757 done Writing manifest to image destination Storing signatures ``` Signed-off-by: Aditya R --- docker/docker_image_dest.go | 31 +++++--- internal/blobinfocache/types.go | 7 +- pkg/blobinfocache/boltdb/boltdb.go | 121 +++++++++++++++++++---------- pkg/blobinfocache/memory/memory.go | 67 +++++++++++----- 4 files changed, 152 insertions(+), 74 deletions(-) diff --git a/docker/docker_image_dest.go b/docker/docker_image_dest.go index 44b45c472c..3e65c4c972 100644 --- a/docker/docker_image_dest.go +++ b/docker/docker_image_dest.go @@ -332,21 +332,28 @@ func (d *dockerImageDestination) TryReusingBlobWithOptions(ctx context.Context, // Then try reusing blobs from other locations. candidates := options.Cache.CandidateLocations2(d.ref.Transport(), bicTransportScope(d.ref), info.Digest, options.CanSubstitute) for _, candidate := range candidates { - candidateRepo, err := parseBICLocationReference(candidate.Location) - if err != nil { - logrus.Debugf("Error parsing BlobInfoCache location reference: %s", err) - continue - } + var candidateRepo reference.Named if candidate.CompressorName != blobinfocache.Uncompressed { - logrus.Debugf("Trying to reuse cached location %s compressed with %s in %s", candidate.Digest.String(), candidate.CompressorName, candidateRepo.Name()) + logrus.Debugf("Trying to reuse cached location %s compressed with %s", candidate.Digest.String(), candidate.CompressorName) } else { - logrus.Debugf("Trying to reuse cached location %s with no compression in %s", candidate.Digest.String(), candidateRepo.Name()) + logrus.Debugf("Trying to reuse cached location %s with no compression", candidate.Digest.String()) } - - // Sanity checks: - if reference.Domain(candidateRepo) != reference.Domain(d.ref.ref) { - logrus.Debugf("... Internal error: domain %s does not match destination %s", reference.Domain(candidateRepo), reference.Domain(d.ref.ref)) - continue + if !candidate.UnknownLocation { + candidateRepo, err = parseBICLocationReference(candidate.Location) + if err != nil { + logrus.Debugf("Error parsing BlobInfoCache location reference: %s", err) + continue + } + // Sanity checks: + if reference.Domain(candidateRepo) != reference.Domain(d.ref.ref) { + logrus.Debugf("... Internal error: domain %s does not match destination %s", reference.Domain(candidateRepo), reference.Domain(d.ref.ref)) + continue + } + } else { + // This digest is a known variant of this blob but we don’t + // have a recorded location in this registry, let’s try looking + // for it in the current repo. + candidateRepo = reference.TrimNamed(d.ref.ref) } if candidateRepo.Name() == d.ref.ref.Name() && candidate.Digest == info.Digest { logrus.Debug("... Already tried the primary destination") diff --git a/internal/blobinfocache/types.go b/internal/blobinfocache/types.go index 3c2be57f32..d28bbfdd19 100644 --- a/internal/blobinfocache/types.go +++ b/internal/blobinfocache/types.go @@ -39,7 +39,8 @@ type BlobInfoCache2 interface { // BICReplacementCandidate2 is an item returned by BlobInfoCache2.CandidateLocations2. type BICReplacementCandidate2 struct { - Digest digest.Digest - CompressorName string // either the Name() of a known pkg/compression.Algorithm, or Uncompressed or UnknownCompression - Location types.BICLocationReference + Digest digest.Digest + CompressorName string // either the Name() of a known pkg/compression.Algorithm, or Uncompressed or UnknownCompression + UnknownLocation bool // is true when `Location` for this blob is not set + Location types.BICLocationReference // not set if UnknownLocation is set to `true` } diff --git a/pkg/blobinfocache/boltdb/boltdb.go b/pkg/blobinfocache/boltdb/boltdb.go index a472efd95b..dfc01e3281 100644 --- a/pkg/blobinfocache/boltdb/boltdb.go +++ b/pkg/blobinfocache/boltdb/boltdb.go @@ -282,12 +282,12 @@ func (bdc *cache) RecordKnownLocation(transport types.ImageTransport, scope type }) // FIXME? Log error (but throttle the log volume on repeated accesses)? } -// appendReplacementCandidates creates prioritize.CandidateWithTime values for digest in scopeBucket with corresponding compression info from compressionBucket (if compressionBucket is not nil), and returns the result of appending them to candidates. +// appendReplacementCandidates creates prioritize.CandidateWithTime values for digest in scopeBucket with corresponding compression info from compressionBucket (if compressionBucket is not nil), and returns the result of appending them to candidates. If scope is nil, final result also contains blobs with lowest priority without locations that could be possibly be reused. func (bdc *cache) appendReplacementCandidates(candidates []prioritize.CandidateWithTime, scopeBucket, compressionBucket *bolt.Bucket, digest digest.Digest, requireCompressionInfo bool) []prioritize.CandidateWithTime { digestKey := []byte(digest.String()) - b := scopeBucket.Bucket(digestKey) - if b == nil { - return candidates + var b *bolt.Bucket + if scopeBucket != nil { + b = scopeBucket.Bucket(digestKey) } compressorName := blobinfocache.UnknownCompression if compressionBucket != nil { @@ -300,21 +300,33 @@ func (bdc *cache) appendReplacementCandidates(candidates []prioritize.CandidateW if compressorName == blobinfocache.UnknownCompression && requireCompressionInfo { return candidates } - _ = b.ForEach(func(k, v []byte) error { - t := time.Time{} - if err := t.UnmarshalBinary(v); err != nil { - return err - } + if b != nil { + _ = b.ForEach(func(k, v []byte) error { + t := time.Time{} + if err := t.UnmarshalBinary(v); err != nil { + return err + } + candidates = append(candidates, prioritize.CandidateWithTime{ + Candidate: blobinfocache.BICReplacementCandidate2{ + Digest: digest, + CompressorName: compressorName, + Location: types.BICLocationReference{Opaque: string(k)}, + }, + LastSeen: t, + }) + return nil + }) // FIXME? Log error (but throttle the log volume on repeated accesses)? + } else { candidates = append(candidates, prioritize.CandidateWithTime{ Candidate: blobinfocache.BICReplacementCandidate2{ - Digest: digest, - CompressorName: compressorName, - Location: types.BICLocationReference{Opaque: string(k)}, + Digest: digest, + CompressorName: compressorName, + UnknownLocation: true, + Location: types.BICLocationReference{Opaque: ""}, }, - LastSeen: t, + LastSeen: time.Time{}, }) - return nil - }) // FIXME? Log error (but throttle the log volume on repeated accesses)? + } return candidates } @@ -328,9 +340,39 @@ func (bdc *cache) CandidateLocations2(transport types.ImageTransport, scope type return bdc.candidateLocations(transport, scope, primaryDigest, canSubstitute, true) } -func (bdc *cache) candidateLocations(transport types.ImageTransport, scope types.BICTransportScope, primaryDigest digest.Digest, canSubstitute, requireCompressionInfo bool) []blobinfocache.BICReplacementCandidate2 { +func (bdc *cache) candidateLocations(transport types.ImageTransport, scope types.BICTransportScope, primaryDigest digest.Digest, canSubstitute, v2Output bool) []blobinfocache.BICReplacementCandidate2 { + var err error res := []prioritize.CandidateWithTime{} + resAllBlobs := []prioritize.CandidateWithTime{} var uncompressedDigestValue digest.Digest // = "" + + getCandidatesForUncompressedDigest := func(tx *bolt.Tx, res []prioritize.CandidateWithTime, scopeBucket, compressionBucket *bolt.Bucket, primaryDigest digest.Digest, v2Output bool) ([]prioritize.CandidateWithTime, error) { + if uncompressedDigestValue = bdc.uncompressedDigest(tx, primaryDigest); uncompressedDigestValue != "" { + b := tx.Bucket(digestByUncompressedBucket) + if b != nil { + b = b.Bucket([]byte(uncompressedDigestValue.String())) + if b != nil { + if err := b.ForEach(func(k, _ []byte) error { + d, err := digest.Parse(string(k)) + if err != nil { + return err + } + if d != primaryDigest && d != uncompressedDigestValue { + res = bdc.appendReplacementCandidates(res, scopeBucket, compressionBucket, d, v2Output) + } + return nil + }); err != nil { + return nil, err + } + } + } + if uncompressedDigestValue != primaryDigest { + res = bdc.appendReplacementCandidates(res, scopeBucket, compressionBucket, uncompressedDigestValue, v2Output) + } + } + return res, nil + } + if err := bdc.view(func(tx *bolt.Tx) error { scopeBucket := tx.Bucket(knownLocationsBucket) if scopeBucket == nil { @@ -348,38 +390,37 @@ func (bdc *cache) candidateLocations(transport types.ImageTransport, scope types // and we don't want to fail just because of that compressionBucket := tx.Bucket(digestCompressorBucket) - res = bdc.appendReplacementCandidates(res, scopeBucket, compressionBucket, primaryDigest, requireCompressionInfo) + res = bdc.appendReplacementCandidates(res, scopeBucket, compressionBucket, primaryDigest, v2Output) if canSubstitute { - if uncompressedDigestValue = bdc.uncompressedDigest(tx, primaryDigest); uncompressedDigestValue != "" { - b := tx.Bucket(digestByUncompressedBucket) - if b != nil { - b = b.Bucket([]byte(uncompressedDigestValue.String())) - if b != nil { - if err := b.ForEach(func(k, _ []byte) error { - d, err := digest.Parse(string(k)) - if err != nil { - return err - } - if d != primaryDigest && d != uncompressedDigestValue { - res = bdc.appendReplacementCandidates(res, scopeBucket, compressionBucket, d, requireCompressionInfo) - } - return nil - }); err != nil { - return err - } - } - } - if uncompressedDigestValue != primaryDigest { - res = bdc.appendReplacementCandidates(res, scopeBucket, compressionBucket, uncompressedDigestValue, requireCompressionInfo) - } + res, err = getCandidatesForUncompressedDigest(tx, res, scopeBucket, compressionBucket, primaryDigest, v2Output) + if err != nil { + return err } } return nil }); err != nil { // Including os.IsNotExist(err) return []blobinfocache.BICReplacementCandidate2{} // FIXME? Log err (but throttle the log volume on repeated accesses)? } + // priortize the result + resultPrioritized := prioritize.DestructivelyPrioritizeReplacementCandidates(res, primaryDigest, uncompressedDigestValue) - return prioritize.DestructivelyPrioritizeReplacementCandidates(res, primaryDigest, uncompressedDigestValue) + // Reprocess all blobs and append them to resultPrioritized in lower priority + if err := bdc.view(func(tx *bolt.Tx) error { + compressionBucket := tx.Bucket(digestCompressorBucket) + resAllBlobs, err = getCandidatesForUncompressedDigest(tx, resAllBlobs, nil, compressionBucket, primaryDigest, v2Output) + if err != nil { + return err + } + return nil + }); err != nil { // Including os.IsNotExist(err) + return []blobinfocache.BICReplacementCandidate2{} // FIXME? Log err (but throttle the log volume on repeated accesses)? + } + resultPrioritizedAllBlobs := prioritize.DestructivelyPrioritizeReplacementCandidates(resAllBlobs, primaryDigest, uncompressedDigestValue) + // Append `resultPrioritizedAllBlobs` after blobs + // which are generated from bucket with provided `scope` + // as a result `resultPrioritizedAllBlobs` will always + // get lower priority while processing candidates. + return append(resultPrioritized, resultPrioritizedAllBlobs...) } // CandidateLocations returns a prioritized, limited, number of blobs and their locations that could possibly be reused diff --git a/pkg/blobinfocache/memory/memory.go b/pkg/blobinfocache/memory/memory.go index 426640366f..9b33ee4ce8 100644 --- a/pkg/blobinfocache/memory/memory.go +++ b/pkg/blobinfocache/memory/memory.go @@ -121,23 +121,37 @@ func (mem *cache) RecordDigestCompressorName(blobDigest digest.Digest, compresso } // appendReplacementCandidates creates prioritize.CandidateWithTime values for (transport, scope, digest), and returns the result of appending them to candidates. -func (mem *cache) appendReplacementCandidates(candidates []prioritize.CandidateWithTime, transport types.ImageTransport, scope types.BICTransportScope, digest digest.Digest, requireCompressionInfo bool) []prioritize.CandidateWithTime { - locations := mem.knownLocations[locationKey{transport: transport.Name(), scope: scope, blobDigest: digest}] // nil if not present - for l, t := range locations { - compressorName, compressorKnown := mem.compressors[digest] - if !compressorKnown { - if requireCompressionInfo { - continue - } - compressorName = blobinfocache.UnknownCompression +// If scope is nil, final result also contains blobs with lowest priority without locations that could be possibly be reused. +func (mem *cache) appendReplacementCandidates(candidates []prioritize.CandidateWithTime, transport types.ImageTransport, scope *types.BICTransportScope, digest digest.Digest, requireCompressionInfo bool) []prioritize.CandidateWithTime { + compressorName := blobinfocache.UnknownCompression + compressorNameValue, compressorKnown := mem.compressors[digest] + if compressorKnown { + compressorName = compressorNameValue + } + if compressorName == blobinfocache.UnknownCompression && requireCompressionInfo { + return candidates + } + if scope != nil { + locations := mem.knownLocations[locationKey{transport: transport.Name(), scope: *scope, blobDigest: digest}] // nil if not present + for l, t := range locations { + candidates = append(candidates, prioritize.CandidateWithTime{ + Candidate: blobinfocache.BICReplacementCandidate2{ + Digest: digest, + CompressorName: compressorName, + Location: l, + }, + LastSeen: t, + }) } + } else { candidates = append(candidates, prioritize.CandidateWithTime{ Candidate: blobinfocache.BICReplacementCandidate2{ - Digest: digest, - CompressorName: compressorName, - Location: l, + Digest: digest, + CompressorName: compressorName, + UnknownLocation: true, + Location: types.BICLocationReference{Opaque: ""}, }, - LastSeen: t, + LastSeen: time.Time{}, }) } return candidates @@ -159,28 +173,43 @@ func (mem *cache) CandidateLocations(transport types.ImageTransport, scope types // If !canSubstitute, the returned cadidates will match the submitted digest exactly; if canSubstitute, // data from previous RecordDigestUncompressedPair calls is used to also look up variants of the blob which have the same // uncompressed digest. +// +// If scope is nil, final result also contains blobs with lowest priority without locations that could be possibly be reused. func (mem *cache) CandidateLocations2(transport types.ImageTransport, scope types.BICTransportScope, primaryDigest digest.Digest, canSubstitute bool) []blobinfocache.BICReplacementCandidate2 { return mem.candidateLocations(transport, scope, primaryDigest, canSubstitute, true) } -func (mem *cache) candidateLocations(transport types.ImageTransport, scope types.BICTransportScope, primaryDigest digest.Digest, canSubstitute, requireCompressionInfo bool) []blobinfocache.BICReplacementCandidate2 { +func (mem *cache) candidateLocations(transport types.ImageTransport, scope types.BICTransportScope, primaryDigest digest.Digest, canSubstitute, v2Output bool) []blobinfocache.BICReplacementCandidate2 { mem.mutex.Lock() defer mem.mutex.Unlock() res := []prioritize.CandidateWithTime{} - res = mem.appendReplacementCandidates(res, transport, scope, primaryDigest, requireCompressionInfo) + resAllBlobs := []prioritize.CandidateWithTime{} var uncompressedDigest digest.Digest // = "" - if canSubstitute { + getCandidatesForUncompressedDigest := func(res []prioritize.CandidateWithTime, scope *types.BICTransportScope, digest digest.Digest, v2Output bool) []prioritize.CandidateWithTime { if uncompressedDigest = mem.uncompressedDigestLocked(primaryDigest); uncompressedDigest != "" { otherDigests := mem.digestsByUncompressed[uncompressedDigest] // nil if not present in the map for d := range otherDigests { if d != primaryDigest && d != uncompressedDigest { - res = mem.appendReplacementCandidates(res, transport, scope, d, requireCompressionInfo) + res = mem.appendReplacementCandidates(res, transport, scope, d, v2Output) } } if uncompressedDigest != primaryDigest { - res = mem.appendReplacementCandidates(res, transport, scope, uncompressedDigest, requireCompressionInfo) + res = mem.appendReplacementCandidates(res, transport, scope, uncompressedDigest, v2Output) } } + return res + } + res = mem.appendReplacementCandidates(res, transport, &scope, primaryDigest, v2Output) + if canSubstitute { + res = getCandidatesForUncompressedDigest(res, &scope, primaryDigest, v2Output) } - return prioritize.DestructivelyPrioritizeReplacementCandidates(res, primaryDigest, uncompressedDigest) + resultPrioritized := prioritize.DestructivelyPrioritizeReplacementCandidates(res, primaryDigest, uncompressedDigest) + // Get allBlobs including with unknown locations + resAllBlobs = getCandidatesForUncompressedDigest(resAllBlobs, nil, primaryDigest, v2Output) + resultPrioritizedAllBlobs := prioritize.DestructivelyPrioritizeReplacementCandidates(resAllBlobs, primaryDigest, uncompressedDigest) + // Append `resultPrioritizedAllBlobs` after blobs + // which are generated from bucket with provided `scope` + // as a result `resultPrioritizedAllBlobs` will always + // get lower priority while processing candidates. + return append(resultPrioritized, resultPrioritizedAllBlobs...) }