Skip to content

Commit

Permalink
docker, BlobInfoCache: try to reuse compressed blobs when pushing acr…
Browse files Browse the repository at this point in the history
…oss registries

It seems we try to reuse blobs only for the specified registry, however
we can have valid known compressed digests across registry as well
following pr attempts to use that by doing following steps.

* `CandidateLocations2` now processes all known blobs and appends them
  to returned candidates at the lowest priority. As a result when
`TryReusingBlob` tries to process these candidates and if the blobs
filtered by the `Opaque` set by the `transport` fail to match then
attempt is made against all known blobs (ones which do not belong to the
current registry).

* Increase the sample set of potential blob reuse to all known
  compressed digests , also involving the one which do not belong to
current registry.

* If a blob is found match it against the registry where we are
  attempting to push. If blob is already there consider it a `CACHE
HIT!` and reply skipping blob, since its already there.

How to verify this ?

* Remove all images `buildah rmi --all` // needed so all new blobs can
  be tagged again in common bucket
* Remove any previous `blob-info-cache` by

```console
rm /home/<user>/.local/share/containers/cache/blob-info-cache-v1.boltdb
```

```console
$ skopeo copy docker://registry.fedoraproject.org/fedora-minimal docker://quay.io/fl/test:some-tag
$ buildah pull registry.fedoraproject.org/fedora-minimal
$ buildah tag registry.fedoraproject.org/fedora-minimal quay.io/fl/test
$ buildah push quay.io/fl/test
```

```console
Getting image source signatures
Copying blob a3497ca15bbf skipped: already exists
Copying config f7e02de757 done
Writing manifest to image destination
Storing signatures
```

Signed-off-by: Aditya R <arajan@redhat.com>
  • Loading branch information
flouthoc committed Sep 2, 2022
1 parent c5c37ef commit 0b5a3be
Show file tree
Hide file tree
Showing 4 changed files with 139 additions and 43 deletions.
28 changes: 19 additions & 9 deletions docker/docker_image_dest.go
Original file line number Diff line number Diff line change
Expand Up @@ -332,21 +332,31 @@ func (d *dockerImageDestination) TryReusingBlobWithOptions(ctx context.Context,
// Then try reusing blobs from other locations.
candidates := options.Cache.CandidateLocations2(d.ref.Transport(), bicTransportScope(d.ref), info.Digest, options.CanSubstitute)
for _, candidate := range candidates {
candidateRepo, err := parseBICLocationReference(candidate.Location)
if err != nil {
logrus.Debugf("Error parsing BlobInfoCache location reference: %s", err)
continue
var candidateRepo reference.Named
if !candidate.UnknownLocation {
candidateRepo, err = parseBICLocationReference(candidate.Location)
if err != nil {
logrus.Debugf("Error parsing BlobInfoCache location reference: %s", err)
continue
}
}
if candidate.CompressorName != blobinfocache.Uncompressed {
logrus.Debugf("Trying to reuse cached location %s compressed with %s in %s", candidate.Digest.String(), candidate.CompressorName, candidateRepo.Name())
logrus.Debugf("Trying to reuse cached location %s compressed with %s", candidate.Digest.String(), candidate.CompressorName)
} else {
logrus.Debugf("Trying to reuse cached location %s with no compression in %s", candidate.Digest.String(), candidateRepo.Name())
logrus.Debugf("Trying to reuse cached location %s with no compression", candidate.Digest.String())
}

// Sanity checks:
if reference.Domain(candidateRepo) != reference.Domain(d.ref.ref) {
logrus.Debugf("... Internal error: domain %s does not match destination %s", reference.Domain(candidateRepo), reference.Domain(d.ref.ref))
continue
if candidate.UnknownLocation || reference.Domain(candidateRepo) != reference.Domain(d.ref.ref) {
if !candidate.UnknownLocation {
logrus.Debugf("... Internal error: domain %s does not match destination %s", reference.Domain(candidateRepo), reference.Domain(d.ref.ref))
continue
}
// Found following blob in different registry but
// we need to check blob presence against the registry
// where we are planning to push, hence switch back the
// candidate repo to the one where we are planning to push.
candidateRepo = d.ref.ref
}
if candidateRepo.Name() == d.ref.ref.Name() && candidate.Digest == info.Digest {
logrus.Debug("... Already tried the primary destination")
Expand Down
7 changes: 4 additions & 3 deletions internal/blobinfocache/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,8 @@ type BlobInfoCache2 interface {

// BICReplacementCandidate2 is an item returned by BlobInfoCache2.CandidateLocations2.
type BICReplacementCandidate2 struct {
Digest digest.Digest
CompressorName string // either the Name() of a known pkg/compression.Algorithm, or Uncompressed or UnknownCompression
Location types.BICLocationReference
Digest digest.Digest
CompressorName string // either the Name() of a known pkg/compression.Algorithm, or Uncompressed or UnknownCompression
Location types.BICLocationReference
UnknownLocation bool // is true when `Location` for this blob is not set
}
84 changes: 69 additions & 15 deletions pkg/blobinfocache/boltdb/boltdb.go
Original file line number Diff line number Diff line change
Expand Up @@ -285,9 +285,9 @@ func (bdc *cache) RecordKnownLocation(transport types.ImageTransport, scope type
// appendReplacementCandidates creates prioritize.CandidateWithTime values for digest in scopeBucket with corresponding compression info from compressionBucket (if compressionBucket is not nil), and returns the result of appending them to candidates.
func (bdc *cache) appendReplacementCandidates(candidates []prioritize.CandidateWithTime, scopeBucket, compressionBucket *bolt.Bucket, digest digest.Digest, requireCompressionInfo bool) []prioritize.CandidateWithTime {
digestKey := []byte(digest.String())
b := scopeBucket.Bucket(digestKey)
if b == nil {
return candidates
var b *bolt.Bucket
if scopeBucket != nil {
b = scopeBucket.Bucket(digestKey)
}
compressorName := blobinfocache.UnknownCompression
if compressionBucket != nil {
Expand All @@ -300,21 +300,33 @@ func (bdc *cache) appendReplacementCandidates(candidates []prioritize.CandidateW
if compressorName == blobinfocache.UnknownCompression && requireCompressionInfo {
return candidates
}
_ = b.ForEach(func(k, v []byte) error {
t := time.Time{}
if err := t.UnmarshalBinary(v); err != nil {
return err
}
if b != nil {
_ = b.ForEach(func(k, v []byte) error {
t := time.Time{}
if err := t.UnmarshalBinary(v); err != nil {
return err
}
candidates = append(candidates, prioritize.CandidateWithTime{
Candidate: blobinfocache.BICReplacementCandidate2{
Digest: digest,
CompressorName: compressorName,
Location: types.BICLocationReference{Opaque: string(k)},
},
LastSeen: t,
})
return nil
}) // FIXME? Log error (but throttle the log volume on repeated accesses)?
} else {
candidates = append(candidates, prioritize.CandidateWithTime{
Candidate: blobinfocache.BICReplacementCandidate2{
Digest: digest,
CompressorName: compressorName,
Location: types.BICLocationReference{Opaque: string(k)},
Digest: digest,
CompressorName: compressorName,
Location: types.BICLocationReference{Opaque: ""},
UnknownLocation: true,
},
LastSeen: t,
LastSeen: time.Time{},
})
return nil
}) // FIXME? Log error (but throttle the log volume on repeated accesses)?
}
return candidates
}

Expand All @@ -325,7 +337,49 @@ func (bdc *cache) appendReplacementCandidates(candidates []prioritize.CandidateW
// data from previous RecordDigestUncompressedPair calls is used to also look up variants of the blob which have the same
// uncompressed digest.
func (bdc *cache) CandidateLocations2(transport types.ImageTransport, scope types.BICTransportScope, primaryDigest digest.Digest, canSubstitute bool) []blobinfocache.BICReplacementCandidate2 {
return bdc.candidateLocations(transport, scope, primaryDigest, canSubstitute, true)
resAllBlobs := []prioritize.CandidateWithTime{}
var uncompressedDigestValue digest.Digest // = ""
requireCompressionInfo := true
resultPrioritized := bdc.candidateLocations(transport, scope, primaryDigest, canSubstitute, requireCompressionInfo)
// Reprocess all blobs and append them to resultPrioritized in lower priority
if err := bdc.view(func(tx *bolt.Tx) error {
// compressionBucket won't have been created if previous writers never recorded info about compression,
// and we don't want to fail just because of that
compressionBucket := tx.Bucket(digestCompressorBucket)
if uncompressedDigestValue = bdc.uncompressedDigest(tx, primaryDigest); uncompressedDigestValue != "" {
b := tx.Bucket(digestByUncompressedBucket)
if b != nil {
b = b.Bucket([]byte(uncompressedDigestValue.String()))
if b != nil {
if err := b.ForEach(func(k, _ []byte) error {
d, err := digest.Parse(string(k))
if err != nil {
return err
}
if d != primaryDigest && d != uncompressedDigestValue {
resAllBlobs = append(resAllBlobs, bdc.appendReplacementCandidates(resAllBlobs, nil, compressionBucket, d, requireCompressionInfo)...)
}
return nil
}); err != nil {
return err
}
}
}
if uncompressedDigestValue != primaryDigest {
resAllBlobs = append(resAllBlobs, bdc.appendReplacementCandidates(resAllBlobs, nil, compressionBucket, uncompressedDigestValue, requireCompressionInfo)...)
}
}
return nil
}); err != nil {
return []blobinfocache.BICReplacementCandidate2{}
}

resultPrioritizedAllBlobs := prioritize.DestructivelyPrioritizeReplacementCandidates(resAllBlobs, primaryDigest, uncompressedDigestValue)
// Append `resultPrioritizedAllBlobs` after blobs
// which are generated from bucket with provided `scope`
// as a result `resultPrioritizedAllBlobs` will always
// get lower priority while processing candidates.
return append(resultPrioritized, resultPrioritizedAllBlobs...)
}

func (bdc *cache) candidateLocations(transport types.ImageTransport, scope types.BICTransportScope, primaryDigest digest.Digest, canSubstitute, requireCompressionInfo bool) []blobinfocache.BICReplacementCandidate2 {
Expand Down
63 changes: 47 additions & 16 deletions pkg/blobinfocache/memory/memory.go
Original file line number Diff line number Diff line change
Expand Up @@ -121,23 +121,34 @@ func (mem *cache) RecordDigestCompressorName(blobDigest digest.Digest, compresso
}

// appendReplacementCandidates creates prioritize.CandidateWithTime values for (transport, scope, digest), and returns the result of appending them to candidates.
func (mem *cache) appendReplacementCandidates(candidates []prioritize.CandidateWithTime, transport types.ImageTransport, scope types.BICTransportScope, digest digest.Digest, requireCompressionInfo bool) []prioritize.CandidateWithTime {
locations := mem.knownLocations[locationKey{transport: transport.Name(), scope: scope, blobDigest: digest}] // nil if not present
for l, t := range locations {
compressorName, compressorKnown := mem.compressors[digest]
if !compressorKnown {
if requireCompressionInfo {
continue
}
func (mem *cache) appendReplacementCandidates(candidates []prioritize.CandidateWithTime, transport types.ImageTransport, scope *types.BICTransportScope, digest digest.Digest, requireCompressionInfo bool) []prioritize.CandidateWithTime {
compressorName, compressorKnown := mem.compressors[digest]
if !compressorKnown {
if !requireCompressionInfo {
compressorName = blobinfocache.UnknownCompression
}
}
if scope != nil {
locations := mem.knownLocations[locationKey{transport: transport.Name(), scope: *scope, blobDigest: digest}] // nil if not present
for l, t := range locations {
candidates = append(candidates, prioritize.CandidateWithTime{
Candidate: blobinfocache.BICReplacementCandidate2{
Digest: digest,
CompressorName: compressorName,
Location: l,
},
LastSeen: t,
})
}
} else {
candidates = append(candidates, prioritize.CandidateWithTime{
Candidate: blobinfocache.BICReplacementCandidate2{
Digest: digest,
CompressorName: compressorName,
Location: l,
Digest: digest,
CompressorName: compressorName,
Location: types.BICLocationReference{Opaque: ""},
UnknownLocation: true,
},
LastSeen: t,
LastSeen: time.Time{},
})
}
return candidates
Expand All @@ -160,25 +171,45 @@ func (mem *cache) CandidateLocations(transport types.ImageTransport, scope types
// data from previous RecordDigestUncompressedPair calls is used to also look up variants of the blob which have the same
// uncompressed digest.
func (mem *cache) CandidateLocations2(transport types.ImageTransport, scope types.BICTransportScope, primaryDigest digest.Digest, canSubstitute bool) []blobinfocache.BICReplacementCandidate2 {
return mem.candidateLocations(transport, scope, primaryDigest, canSubstitute, true)
resAllBlobs := []prioritize.CandidateWithTime{}
requireCompressionInfo := true
resultPrioritized := mem.candidateLocations(transport, scope, primaryDigest, canSubstitute, requireCompressionInfo)
var uncompressedDigest digest.Digest // = ""
if uncompressedDigest = mem.uncompressedDigestLocked(primaryDigest); uncompressedDigest != "" {
otherDigests := mem.digestsByUncompressed[uncompressedDigest] // nil if not present in the map
for d := range otherDigests {
if d != primaryDigest && d != uncompressedDigest {
resAllBlobs = append(resAllBlobs, mem.appendReplacementCandidates(resAllBlobs, transport, nil, d, requireCompressionInfo)...)
}
}
if uncompressedDigest != primaryDigest {
resAllBlobs = append(resAllBlobs, mem.appendReplacementCandidates(resAllBlobs, transport, nil, uncompressedDigest, requireCompressionInfo)...)
}
}
resultPrioritizedAllBlobs := prioritize.DestructivelyPrioritizeReplacementCandidates(resAllBlobs, primaryDigest, uncompressedDigest)
// Append `resultPrioritizedAllBlobs` after blobs
// which are generated from bucket with provided `scope`
// as a result `resultPrioritizedAllBlobs` will always
// get lower priority while processing candidates.
return append(resultPrioritized, resultPrioritizedAllBlobs...)
}

func (mem *cache) candidateLocations(transport types.ImageTransport, scope types.BICTransportScope, primaryDigest digest.Digest, canSubstitute, requireCompressionInfo bool) []blobinfocache.BICReplacementCandidate2 {
mem.mutex.Lock()
defer mem.mutex.Unlock()
res := []prioritize.CandidateWithTime{}
res = mem.appendReplacementCandidates(res, transport, scope, primaryDigest, requireCompressionInfo)
res = mem.appendReplacementCandidates(res, transport, &scope, primaryDigest, requireCompressionInfo)
var uncompressedDigest digest.Digest // = ""
if canSubstitute {
if uncompressedDigest = mem.uncompressedDigestLocked(primaryDigest); uncompressedDigest != "" {
otherDigests := mem.digestsByUncompressed[uncompressedDigest] // nil if not present in the map
for d := range otherDigests {
if d != primaryDigest && d != uncompressedDigest {
res = mem.appendReplacementCandidates(res, transport, scope, d, requireCompressionInfo)
res = mem.appendReplacementCandidates(res, transport, &scope, d, requireCompressionInfo)
}
}
if uncompressedDigest != primaryDigest {
res = mem.appendReplacementCandidates(res, transport, scope, uncompressedDigest, requireCompressionInfo)
res = mem.appendReplacementCandidates(res, transport, &scope, uncompressedDigest, requireCompressionInfo)
}
}
}
Expand Down

0 comments on commit 0b5a3be

Please sign in to comment.