Skip to content

Commit

Permalink
fix(machinepool): clear VirtualMachineProfile if no model/custom data…
Browse files Browse the repository at this point in the history
… update

Scale up/down with MachinePool always updates the VM image model to use. This change sets the VirtualMachineProfile to nil when no change is necessary and ensures therefore less churn on scale up/downs leading to model updates which may require manual fixing

Fixes some linting errors related to unnecessary params, too.
  • Loading branch information
mweibel authored and k8s-infra-cherrypick-robot committed Oct 23, 2024
1 parent db56f50 commit 8a0d7ea
Show file tree
Hide file tree
Showing 5 changed files with 254 additions and 40 deletions.
26 changes: 13 additions & 13 deletions azure/scope/machinepool.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ type (
capiMachinePoolPatchHelper *patch.Helper
vmssState *azure.VMSS
cache *MachinePoolCache
skuCache *resourceskus.Cache
}

// NodeStatus represents the status of a Kubernetes node.
Expand Down Expand Up @@ -163,12 +164,15 @@ func (m *MachinePoolScope) InitMachinePoolCache(ctx context.Context) error {
return err
}

skuCache, err := resourceskus.GetCache(m, m.Location())
if err != nil {
return err
if m.skuCache == nil {
skuCache, err := resourceskus.GetCache(m, m.Location())
if err != nil {
return errors.Wrap(err, "failed to init resourceskus cache")
}
m.skuCache = skuCache

Check warning on line 172 in azure/scope/machinepool.go

View check run for this annotation

Codecov / codecov/patch

azure/scope/machinepool.go#L168-L172

Added lines #L168 - L172 were not covered by tests
}

m.cache.VMSKU, err = skuCache.Get(ctx, m.AzureMachinePool.Spec.Template.VMSize, resourceskus.VirtualMachines)
m.cache.VMSKU, err = m.skuCache.Get(ctx, m.AzureMachinePool.Spec.Template.VMSize, resourceskus.VirtualMachines)
if err != nil {
return errors.Wrapf(err, "failed to get VM SKU %s in compute api", m.AzureMachinePool.Spec.Template.VMSize)
}
Expand Down Expand Up @@ -221,10 +225,8 @@ func (m *MachinePoolScope) ScaleSetSpec(ctx context.Context) azure.ResourceSpecG
}

if m.cache != nil {
if m.HasReplicasExternallyManaged(ctx) {
spec.ShouldPatchCustomData = m.cache.HasBootstrapDataChanges
log.V(4).Info("has bootstrap data changed?", "shouldPatchCustomData", spec.ShouldPatchCustomData)
}
spec.ShouldPatchCustomData = m.cache.HasBootstrapDataChanges
log.V(4).Info("has bootstrap data changed?", "shouldPatchCustomData", spec.ShouldPatchCustomData)
spec.VMSSExtensionSpecs = m.VMSSExtensionSpecs()
spec.SKU = m.cache.VMSKU
spec.VMImage = m.cache.VMImage
Expand Down Expand Up @@ -705,11 +707,9 @@ func (m *MachinePoolScope) Close(ctx context.Context) error {
if err := m.updateReplicasAndProviderIDs(ctx); err != nil {
return errors.Wrap(err, "failed to update replicas and providerIDs")
}
if m.HasReplicasExternallyManaged(ctx) {
if err := m.updateCustomDataHash(ctx); err != nil {
// ignore errors to calculating the custom data hash since it's not absolutely crucial.
log.V(4).Error(err, "unable to update custom data hash, ignoring.")
}
if err := m.updateCustomDataHash(ctx); err != nil {
// ignore errors to calculating the custom data hash since it's not absolutely crucial.
log.V(4).Error(err, "unable to update custom data hash, ignoring.")

Check warning on line 712 in azure/scope/machinepool.go

View check run for this annotation

Codecov / codecov/patch

azure/scope/machinepool.go#L710-L712

Added lines #L710 - L712 were not covered by tests
}
}

Expand Down
149 changes: 149 additions & 0 deletions azure/scope/machinepool_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,17 @@ package scope

import (
"context"
"crypto/sha256"
"encoding/base64"
"fmt"
"io"
"reflect"
"testing"
"time"

"github.com/Azure/azure-sdk-for-go/sdk/azidentity"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/authorization/armauthorization/v2"
"github.com/Azure/azure-sdk-for-go/sdk/resourcemanager/compute/armcompute/v5"
azureautorest "github.com/Azure/go-autorest/autorest/azure"
"github.com/Azure/go-autorest/autorest/azure/auth"
. "github.com/onsi/gomega"
Expand Down Expand Up @@ -1576,3 +1580,148 @@ func TestMachinePoolScope_applyAzureMachinePoolMachines(t *testing.T) {
})
}
}

func TestBootstrapDataChanges(t *testing.T) {
ctx, cancel := context.WithCancel(context.Background())
defer cancel()
scheme := runtime.NewScheme()
_ = clusterv1.AddToScheme(scheme)
_ = infrav1.AddToScheme(scheme)
_ = infrav1exp.AddToScheme(scheme)
_ = corev1.AddToScheme(scheme)

var (
g = NewWithT(t)
mockCtrl = gomock.NewController(t)
cb = fake.NewClientBuilder().WithScheme(scheme)
cluster = &clusterv1.Cluster{
ObjectMeta: metav1.ObjectMeta{
Name: "cluster1",
Namespace: "default",
},
Spec: clusterv1.ClusterSpec{
InfrastructureRef: &corev1.ObjectReference{
Name: "azCluster1",
},
},
Status: clusterv1.ClusterStatus{
InfrastructureReady: true,
},
}
azureCluster = &infrav1.AzureCluster{
ObjectMeta: metav1.ObjectMeta{
Name: "azCluster1",
Namespace: "default",
},
Spec: infrav1.AzureClusterSpec{
AzureClusterClassSpec: infrav1.AzureClusterClassSpec{
Location: "test",
},
},
}
mp = &expv1.MachinePool{
ObjectMeta: metav1.ObjectMeta{
Name: "mp1",
Namespace: "default",
OwnerReferences: []metav1.OwnerReference{
{
Name: "cluster1",
Kind: "Cluster",
APIVersion: clusterv1.GroupVersion.String(),
},
},
},
Spec: expv1.MachinePoolSpec{
Template: clusterv1.MachineTemplateSpec{
Spec: clusterv1.MachineSpec{
Bootstrap: clusterv1.Bootstrap{
DataSecretName: ptr.To("mp-secret"),
},
Version: ptr.To("v1.31.0"),
},
},
},
}
bootstrapData = "test"
bootstrapDataHash = sha256Hash(base64.StdEncoding.EncodeToString([]byte(bootstrapData)))
bootstrapSecret = corev1.Secret{
ObjectMeta: metav1.ObjectMeta{
Name: "mp-secret",
Namespace: "default",
},
Data: map[string][]byte{"value": []byte(bootstrapData)},
}
amp = &infrav1exp.AzureMachinePool{
ObjectMeta: metav1.ObjectMeta{
Name: "amp1",
Namespace: "default",
OwnerReferences: []metav1.OwnerReference{
{
Name: "mp1",
Kind: "MachinePool",
APIVersion: expv1.GroupVersion.String(),
},
},
Annotations: map[string]string{
azure.CustomDataHashAnnotation: fmt.Sprintf("%x", bootstrapDataHash),
},
},
Spec: infrav1exp.AzureMachinePoolSpec{
Template: infrav1exp.AzureMachinePoolMachineTemplate{
Image: &infrav1.Image{},
NetworkInterfaces: []infrav1.NetworkInterface{
{
SubnetName: "test",
},
},
VMSize: "VM_SIZE",
},
},
}
vmssState = &azure.VMSS{}
)
defer mockCtrl.Finish()

s := &MachinePoolScope{
client: cb.
WithObjects(&bootstrapSecret).
Build(),
ClusterScoper: &ClusterScope{
Cluster: cluster,
AzureCluster: azureCluster,
},
skuCache: resourceskus.NewStaticCache([]armcompute.ResourceSKU{
{
Name: ptr.To("VM_SIZE"),
},
}, "test"),
MachinePool: mp,
AzureMachinePool: amp,
vmssState: vmssState,
}

g.Expect(s.InitMachinePoolCache(ctx)).NotTo(HaveOccurred())

spec := s.ScaleSetSpec(ctx)
sSpec := spec.(*scalesets.ScaleSetSpec)
g.Expect(sSpec.ShouldPatchCustomData).To(Equal(false))

amp.Annotations[azure.CustomDataHashAnnotation] = "old"

// reset cache to be able to build up the cache again
s.cache = nil
g.Expect(s.InitMachinePoolCache(ctx)).NotTo(HaveOccurred())

spec = s.ScaleSetSpec(ctx)
sSpec = spec.(*scalesets.ScaleSetSpec)
g.Expect(sSpec.ShouldPatchCustomData).To(Equal(true))
}

func sha256Hash(text string) []byte {
h := sha256.New()
_, err := io.WriteString(h, text)
if err != nil {
panic(err)
}
return h.Sum(nil)
}
4 changes: 2 additions & 2 deletions azure/services/scalesets/scalesets_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -742,8 +742,8 @@ func newWindowsVMSSSpec() ScaleSetSpec {
return vmss
}

func newDefaultExistingVMSS(vmSize string) armcompute.VirtualMachineScaleSet {
vmss := newDefaultVMSS(vmSize)
func newDefaultExistingVMSS() armcompute.VirtualMachineScaleSet {
vmss := newDefaultVMSS("VM_SIZE")
vmss.ID = ptr.To("subscriptions/1234/resourceGroups/my_resource_group/providers/Microsoft.Compute/virtualMachines/my-vm")
return vmss
}
Expand Down
12 changes: 12 additions & 0 deletions azure/services/scalesets/spec.go
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,9 @@ func (s *ScaleSetSpec) OwnerResourceName() string {
}

func (s *ScaleSetSpec) existingParameters(ctx context.Context, existing interface{}) (parameters interface{}, err error) {
ctx, log, done := tele.StartSpanWithLogger(ctx, "scalesets.ScaleSetSpec.existingParameters")
defer done()

existingVMSS, ok := existing.(armcompute.VirtualMachineScaleSet)
if !ok {
return nil, errors.Errorf("%T is not an armcompute.VirtualMachineScaleSet", existing)
Expand Down Expand Up @@ -131,6 +134,15 @@ func (s *ScaleSetSpec) existingParameters(ctx context.Context, existing interfac
return nil, nil
}

// if there are no model changes and no change in custom data, remove VirtualMachineProfile to avoid unnecessary VMSS model
// updates.
if !hasModelChanges && !s.ShouldPatchCustomData {
log.V(4).Info("removing virtual machine profile from parameters", "hasModelChanges", hasModelChanges, "shouldPatchCustomData", s.ShouldPatchCustomData)
vmss.Properties.VirtualMachineProfile = nil
} else {
log.V(4).Info("has changes, not removing virtual machine profile from parameters", "hasModelChanges", hasModelChanges, "shouldPatchCustomData", s.ShouldPatchCustomData)
}

return vmss, nil
}

Expand Down
Loading

0 comments on commit 8a0d7ea

Please sign in to comment.