Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Record zstd:chunked format, and annotations, in BlobInfoCache #2487

Merged
merged 7 commits into from
Aug 7, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
120 changes: 69 additions & 51 deletions copy/compression.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,16 @@ func blobPipelineDetectCompressionStep(stream *sourceStream, srcInfo types.BlobI
}
stream.reader = reader

if decompressor != nil && format.Name() == compressiontypes.ZstdAlgorithmName {
tocDigest, err := chunkedToc.GetTOCDigest(srcInfo.Annotations)
if err != nil {
return bpDetectCompressionStepData{}, err
}
if tocDigest != nil {
format = compression.ZstdChunked
}

}
res := bpDetectCompressionStepData{
isCompressed: decompressor != nil,
format: format,
Expand All @@ -71,13 +81,14 @@ func blobPipelineDetectCompressionStep(stream *sourceStream, srcInfo types.BlobI

// bpCompressionStepData contains data that the copy pipeline needs about the compression step.
type bpCompressionStepData struct {
operation bpcOperation // What we are actually doing
uploadedOperation types.LayerCompression // Operation to use for updating the blob metadata (matching the end state, not necessarily what we do)
uploadedAlgorithm *compressiontypes.Algorithm // An algorithm parameter for the compressionOperation edits.
uploadedAnnotations map[string]string // Compression-related annotations that should be set on the uploaded blob. WARNING: This is only set after the srcStream.reader is fully consumed.
srcCompressorBaseVariantName string // Compressor base variant name to record in the blob info cache for the source blob.
uploadedCompressorName string // Compressor name to record in the blob info cache for the uploaded blob.
closers []io.Closer // Objects to close after the upload is done, if any.
operation bpcOperation // What we are actually doing
uploadedOperation types.LayerCompression // Operation to use for updating the blob metadata (matching the end state, not necessarily what we do)
uploadedAlgorithm *compressiontypes.Algorithm // An algorithm parameter for the compressionOperation edits.
uploadedAnnotations map[string]string // Compression-related annotations that should be set on the uploaded blob. WARNING: This is only set after the srcStream.reader is fully consumed.
srcCompressorBaseVariantName string // Compressor base variant name to record in the blob info cache for the source blob.
uploadedCompressorBaseVariantName string // Compressor base variant name to record in the blob info cache for the uploaded blob.
uploadedCompressorSpecificVariantName string // Compressor specific variant name to record in the blob info cache for the uploaded blob.
closers []io.Closer // Objects to close after the upload is done, if any.
}

type bpcOperation int
Expand Down Expand Up @@ -129,11 +140,12 @@ func (ic *imageCopier) bpcPreserveEncrypted(stream *sourceStream, _ bpDetectComp
// We can’t do anything with an encrypted blob unless decrypted.
logrus.Debugf("Using original blob without modification for encrypted blob")
return &bpCompressionStepData{
operation: bpcOpPreserveOpaque,
uploadedOperation: types.PreserveOriginal,
uploadedAlgorithm: nil,
srcCompressorBaseVariantName: internalblobinfocache.UnknownCompression,
uploadedCompressorName: internalblobinfocache.UnknownCompression,
operation: bpcOpPreserveOpaque,
uploadedOperation: types.PreserveOriginal,
uploadedAlgorithm: nil,
srcCompressorBaseVariantName: internalblobinfocache.UnknownCompression,
uploadedCompressorBaseVariantName: internalblobinfocache.UnknownCompression,
uploadedCompressorSpecificVariantName: internalblobinfocache.UnknownCompression,
}, nil
}
return nil, nil
Expand All @@ -157,14 +169,19 @@ func (ic *imageCopier) bpcCompressUncompressed(stream *sourceStream, detected bp
Digest: "",
Size: -1,
}
specificVariantName := uploadedAlgorithm.Name()
if specificVariantName == uploadedAlgorithm.BaseVariantName() {
specificVariantName = internalblobinfocache.UnknownCompression
}
return &bpCompressionStepData{
operation: bpcOpCompressUncompressed,
uploadedOperation: types.Compress,
uploadedAlgorithm: uploadedAlgorithm,
uploadedAnnotations: annotations,
srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName,
uploadedCompressorName: uploadedAlgorithm.Name(),
closers: []io.Closer{reader},
operation: bpcOpCompressUncompressed,
uploadedOperation: types.Compress,
uploadedAlgorithm: uploadedAlgorithm,
uploadedAnnotations: annotations,
srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName,
uploadedCompressorBaseVariantName: uploadedAlgorithm.BaseVariantName(),
uploadedCompressorSpecificVariantName: specificVariantName,
closers: []io.Closer{reader},
}, nil
}
return nil, nil
Expand Down Expand Up @@ -197,15 +214,20 @@ func (ic *imageCopier) bpcRecompressCompressed(stream *sourceStream, detected bp
Digest: "",
Size: -1,
}
specificVariantName := ic.compressionFormat.Name()
if specificVariantName == ic.compressionFormat.BaseVariantName() {
specificVariantName = internalblobinfocache.UnknownCompression
}
succeeded = true
return &bpCompressionStepData{
operation: bpcOpRecompressCompressed,
uploadedOperation: types.PreserveOriginal,
uploadedAlgorithm: ic.compressionFormat,
uploadedAnnotations: annotations,
srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName,
uploadedCompressorName: ic.compressionFormat.Name(),
closers: []io.Closer{decompressed, recompressed},
operation: bpcOpRecompressCompressed,
uploadedOperation: types.PreserveOriginal,
uploadedAlgorithm: ic.compressionFormat,
uploadedAnnotations: annotations,
srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName,
uploadedCompressorBaseVariantName: ic.compressionFormat.BaseVariantName(),
uploadedCompressorSpecificVariantName: specificVariantName,
closers: []io.Closer{decompressed, recompressed},
}, nil
}
return nil, nil
Expand All @@ -226,12 +248,13 @@ func (ic *imageCopier) bpcDecompressCompressed(stream *sourceStream, detected bp
Size: -1,
}
return &bpCompressionStepData{
operation: bpcOpDecompressCompressed,
uploadedOperation: types.Decompress,
uploadedAlgorithm: nil,
srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName,
uploadedCompressorName: internalblobinfocache.Uncompressed,
closers: []io.Closer{s},
operation: bpcOpDecompressCompressed,
uploadedOperation: types.Decompress,
uploadedAlgorithm: nil,
srcCompressorBaseVariantName: detected.srcCompressorBaseVariantName,
uploadedCompressorBaseVariantName: internalblobinfocache.Uncompressed,
uploadedCompressorSpecificVariantName: internalblobinfocache.UnknownCompression,
closers: []io.Closer{s},
}, nil
}
return nil, nil
Expand Down Expand Up @@ -276,7 +299,8 @@ func (ic *imageCopier) bpcPreserveOriginal(_ *sourceStream, detected bpDetectCom
// We only record the base variant of the format on upload; we didn’t do anything with
// the TOC, we don’t know whether it matches the blob digest, so we don’t want to trigger
// reuse of any kind between the blob digest and the TOC digest.
uploadedCompressorName: detected.srcCompressorBaseVariantName,
uploadedCompressorBaseVariantName: detected.srcCompressorBaseVariantName,
uploadedCompressorSpecificVariantName: internalblobinfocache.UnknownCompression,
}
}

Expand Down Expand Up @@ -336,32 +360,26 @@ func (d *bpCompressionStepData) recordValidatedDigestData(c *copier, uploadedInf
return fmt.Errorf("Internal error: Unexpected d.operation value %#v", d.operation)
}
}
if d.srcCompressorBaseVariantName == "" || d.uploadedCompressorName == "" {
return fmt.Errorf("internal error: missing compressor names (src base: %q, uploaded: %q)",
d.srcCompressorBaseVariantName, d.uploadedCompressorName)
if d.srcCompressorBaseVariantName == "" || d.uploadedCompressorBaseVariantName == "" || d.uploadedCompressorSpecificVariantName == "" {
return fmt.Errorf("internal error: missing compressor names (src base: %q, uploaded base: %q, uploaded specific: %q)",
d.srcCompressorBaseVariantName, d.uploadedCompressorBaseVariantName, d.uploadedCompressorSpecificVariantName)
}
if d.uploadedCompressorName != internalblobinfocache.UnknownCompression {
if d.uploadedCompressorName != compressiontypes.ZstdChunkedAlgorithmName {
// HACK: Don’t record zstd:chunked algorithms.
// There is already a similar hack in internal/imagedestination/impl/helpers.CandidateMatchesTryReusingBlobOptions,
// and that one prevents reusing zstd:chunked blobs, so recording the algorithm here would be mostly harmless.
//
// We skip that here anyway to work around the inability of blobPipelineDetectCompressionStep to differentiate
// between zstd and zstd:chunked; so we could, in varying situations over time, call RecordDigestCompressorName
// with the same digest and both ZstdAlgorithmName and ZstdChunkedAlgorithmName , which causes warnings about
// inconsistent data to be logged.
c.blobInfoCache.RecordDigestCompressorData(uploadedInfo.Digest, internalblobinfocache.DigestCompressorData{
BaseVariantCompressor: d.uploadedCompressorName,
})
}
if d.uploadedCompressorBaseVariantName != internalblobinfocache.UnknownCompression {
c.blobInfoCache.RecordDigestCompressorData(uploadedInfo.Digest, internalblobinfocache.DigestCompressorData{
BaseVariantCompressor: d.uploadedCompressorBaseVariantName,
SpecificVariantCompressor: d.uploadedCompressorSpecificVariantName,
SpecificVariantAnnotations: d.uploadedAnnotations,
})
}
if srcInfo.Digest != "" && srcInfo.Digest != uploadedInfo.Digest &&
d.srcCompressorBaseVariantName != internalblobinfocache.UnknownCompression {
// If the source is already using some TOC-dependent variant, we either copied the
// blob as is, or perhaps decompressed it; either way we don’t trust the TOC digest,
// so record neither the variant name, nor the TOC digest.
c.blobInfoCache.RecordDigestCompressorData(srcInfo.Digest, internalblobinfocache.DigestCompressorData{
BaseVariantCompressor: d.srcCompressorBaseVariantName,
BaseVariantCompressor: d.srcCompressorBaseVariantName,
SpecificVariantCompressor: internalblobinfocache.UnknownCompression,
SpecificVariantAnnotations: nil,
})
}
return nil
Expand Down
31 changes: 22 additions & 9 deletions copy/single.go
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ import (
"errors"
"fmt"
"io"
"maps"
"reflect"
"slices"
"strings"
Expand Down Expand Up @@ -162,7 +163,7 @@ func (c *copier) copySingleImage(ctx context.Context, unparsedImage *image.Unpar
if format == nil {
format = defaultCompressionFormat
}
if format.Name() == compression.ZstdChunked.Name() {
if format.Name() == compressiontypes.ZstdChunkedAlgorithmName {
if ic.requireCompressionFormatMatch {
return copySingleImageResult{}, errors.New("explicitly requested to combine zstd:chunked with encryption, which is not beneficial; use plain zstd instead")
}
Expand Down Expand Up @@ -888,21 +889,33 @@ func updatedBlobInfoFromReuse(inputInfo types.BlobInfo, reusedBlob private.Reuse
// Handling of compression, encryption, and the related MIME types and the like are all the responsibility
// of the generic code in this package.
res := types.BlobInfo{
Digest: reusedBlob.Digest,
Size: reusedBlob.Size,
URLs: nil, // This _must_ be cleared if Digest changes; clear it in other cases as well, to preserve previous behavior.
Annotations: inputInfo.Annotations, // FIXME: This should remove zstd:chunked annotations (but those annotations being left with incorrect values should not break pulls)
MediaType: inputInfo.MediaType, // Mostly irrelevant, MediaType is updated based on Compression*/CryptoOperation.
Digest: reusedBlob.Digest,
Size: reusedBlob.Size,
URLs: nil, // This _must_ be cleared if Digest changes; clear it in other cases as well, to preserve previous behavior.
// FIXME: This should remove zstd:chunked annotations IF the original was chunked and the new one isn’t
// (but those annotations being left with incorrect values should not break pulls).
Annotations: maps.Clone(inputInfo.Annotations),
MediaType: inputInfo.MediaType, // Mostly irrelevant, MediaType is updated based on Compression*/CryptoOperation.
CompressionOperation: reusedBlob.CompressionOperation,
CompressionAlgorithm: reusedBlob.CompressionAlgorithm,
CryptoOperation: inputInfo.CryptoOperation, // Expected to be unset anyway.
}
// The transport is only expected to fill CompressionOperation and CompressionAlgorithm
// if the blob was substituted; otherwise, fill it in based
// if the blob was substituted; otherwise, it is optional, and if not set, fill it in based
// on what we know from the srcInfos we were given.
if reusedBlob.Digest == inputInfo.Digest {
res.CompressionOperation = inputInfo.CompressionOperation
res.CompressionAlgorithm = inputInfo.CompressionAlgorithm
if res.CompressionOperation == types.PreserveOriginal {
res.CompressionOperation = inputInfo.CompressionOperation
}
if res.CompressionAlgorithm == nil {
res.CompressionAlgorithm = inputInfo.CompressionAlgorithm
}
}
if len(reusedBlob.CompressionAnnotations) != 0 {
if res.Annotations == nil {
res.Annotations = map[string]string{}
}
maps.Copy(res.Annotations, reusedBlob.CompressionAnnotations)
}
return res
}
Expand Down
30 changes: 25 additions & 5 deletions copy/single_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -55,22 +55,42 @@ func TestUpdatedBlobInfoFromReuse(t *testing.T) {
},
{ // Reuse with substitution
reused: private.ReusedBlob{
Digest: "sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
Size: 513543640,
CompressionOperation: types.Decompress,
CompressionAlgorithm: nil,
Digest: "sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
Size: 513543640,
CompressionOperation: types.Decompress,
CompressionAlgorithm: nil,
CompressionAnnotations: map[string]string{"decompressed": "value"},
},
expected: types.BlobInfo{
Digest: "sha256:aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
Size: 513543640,
URLs: nil,
Annotations: map[string]string{"test-annotation-2": "two"},
Annotations: map[string]string{"test-annotation-2": "two", "decompressed": "value"},
MediaType: imgspecv1.MediaTypeImageLayerGzip,
CompressionOperation: types.Decompress,
CompressionAlgorithm: nil,
// CryptoOperation is set to the zero value
},
},
{ // Reuse turning zstd into zstd:chunked
reused: private.ReusedBlob{
Digest: "sha256:6a5a5368e0c2d3e5909184fa28ddfd56072e7ff3ee9a945876f7eee5896ef5bb",
Size: 51354364,
CompressionOperation: types.Compress,
CompressionAlgorithm: &compression.ZstdChunked,
CompressionAnnotations: map[string]string{"zstd-toc": "value"},
},
expected: types.BlobInfo{
Digest: "sha256:6a5a5368e0c2d3e5909184fa28ddfd56072e7ff3ee9a945876f7eee5896ef5bb",
Size: 51354364,
URLs: nil,
Annotations: map[string]string{"test-annotation-2": "two", "zstd-toc": "value"},
MediaType: imgspecv1.MediaTypeImageLayerGzip,
CompressionOperation: types.Compress,
CompressionAlgorithm: &compression.ZstdChunked,
// CryptoOperation is set to the zero value
},
},
} {
res := updatedBlobInfoFromReuse(srcInfo, c.reused)
assert.Equal(t, c.expected, res, fmt.Sprintf("%#v", c.reused))
Expand Down
22 changes: 17 additions & 5 deletions docker/docker_image_dest.go
Original file line number Diff line number Diff line change
Expand Up @@ -332,6 +332,7 @@ func (d *dockerImageDestination) TryReusingBlobWithOptions(ctx context.Context,
return false, private.ReusedBlob{}, errors.New("Can not check for a blob with unknown digest")
}

originalCandidateKnownToBeMissing := false
if impl.OriginalCandidateMatchesTryReusingBlobOptions(options) {
// First, check whether the blob happens to already exist at the destination.
haveBlob, reusedInfo, err := d.tryReusingExactBlob(ctx, info, options.Cache)
Expand All @@ -341,9 +342,17 @@ func (d *dockerImageDestination) TryReusingBlobWithOptions(ctx context.Context,
if haveBlob {
return true, reusedInfo, nil
}
originalCandidateKnownToBeMissing = true
} else {
logrus.Debugf("Ignoring exact blob match, compression %s does not match required %s or MIME types %#v",
optionalCompressionName(options.OriginalCompression), optionalCompressionName(options.RequiredCompression), options.PossibleManifestFormats)
// We can get here with a blob detected to be zstd when the user wants a zstd:chunked.
// In that case we keep originalCandiateKnownToBeMissing = false, so that if we find
// a BIC entry for this blob, we do use that entry and return a zstd:chunked entry
// with the BIC’s annotations.
// This is not quite correct, it only works if the BIC also contains an acceptable _location_.
// Ideally, we could look up just the compression algorithm/annotations for info.digest,
// and use it even if no location candidate exists and the original dandidate is present.
}

// Then try reusing blobs from other locations.
Expand Down Expand Up @@ -387,7 +396,8 @@ func (d *dockerImageDestination) TryReusingBlobWithOptions(ctx context.Context,
// for it in the current repo.
candidateRepo = reference.TrimNamed(d.ref.ref)
}
if candidateRepo.Name() == d.ref.ref.Name() && candidate.Digest == info.Digest {
if originalCandidateKnownToBeMissing &&
candidateRepo.Name() == d.ref.ref.Name() && candidate.Digest == info.Digest {
logrus.Debug("... Already tried the primary destination")
continue
}
Expand Down Expand Up @@ -427,10 +437,12 @@ func (d *dockerImageDestination) TryReusingBlobWithOptions(ctx context.Context,
options.Cache.RecordKnownLocation(d.ref.Transport(), bicTransportScope(d.ref), candidate.Digest, newBICLocationReference(d.ref))

return true, private.ReusedBlob{
Digest: candidate.Digest,
Size: size,
CompressionOperation: candidate.CompressionOperation,
CompressionAlgorithm: candidate.CompressionAlgorithm}, nil
Digest: candidate.Digest,
Size: size,
CompressionOperation: candidate.CompressionOperation,
CompressionAlgorithm: candidate.CompressionAlgorithm,
CompressionAnnotations: candidate.CompressionAnnotations,
}, nil
}

return false, private.ReusedBlob{}, nil
Expand Down
Loading