From d3d386ec4d751fa405bf9f36ed95f2123ac72b23 Mon Sep 17 00:00:00 2001 From: Alexey Makhov Date: Tue, 14 May 2024 14:11:25 +0300 Subject: [PATCH] K0sControlPlane update strategies Signed-off-by: Alexey Makhov --- api/controlplane/v1beta1/k0s_types.go | 3 +- .../controlplane_bootstrap_controller.go | 111 +++--- .../k0s_controlplane_controller.go | 161 ++++++--- .../remote_machine_controller.go | 2 +- inttest/Makefile | 1 + inttest/Makefile.variables | 1 + ...er_machine_template_update_rollout_test.go | 315 ++++++++++++++++++ ...api_remote_machine_template_update_test.go | 8 +- .../capi_remote_machine_template_test.go | 35 +- 9 files changed, 544 insertions(+), 93 deletions(-) create mode 100644 inttest/capi-docker-machine-template-update-rollout/capi_docker_machine_template_update_rollout_test.go diff --git a/api/controlplane/v1beta1/k0s_types.go b/api/controlplane/v1beta1/k0s_types.go index b73d5fe2..eb131f62 100644 --- a/api/controlplane/v1beta1/k0s_types.go +++ b/api/controlplane/v1beta1/k0s_types.go @@ -32,6 +32,7 @@ type UpdateStrategy string const ( UpdateInPlace UpdateStrategy = "InPlace" + UpdateRecreate UpdateStrategy = "Recreate" ) // +kubebuilder:object:root=true @@ -55,7 +56,7 @@ type K0sControlPlaneSpec struct { Replicas int32 `json:"replicas,omitempty"` // UpdateStrategy defines the strategy to use when updating the control plane. Currently only InPlace is supported. //+kubebuilder:validation:Optional - //+kubebuilder:validation:Enum:InPlace + //+kubebuilder:validation:Enum:InPlace,Recreate //+kubebuilder:default=InPlace UpdateStrategy UpdateStrategy `json:"updateStrategy,omitempty"` // Version defines the k0s version to be deployed. You can use a specific k0s version (e.g. v1.27.1+k0s.0) or diff --git a/internal/controller/bootstrap/controlplane_bootstrap_controller.go b/internal/controller/bootstrap/controlplane_bootstrap_controller.go index 36f8546b..70cf34b3 100644 --- a/internal/controller/bootstrap/controlplane_bootstrap_controller.go +++ b/internal/controller/bootstrap/controlplane_bootstrap_controller.go @@ -20,6 +20,7 @@ import ( "context" "errors" "fmt" + "sort" "strings" "time" @@ -39,6 +40,7 @@ import ( "sigs.k8s.io/cluster-api/controllers/remote" capiutil "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/annotations" + "sigs.k8s.io/cluster-api/util/collections" "sigs.k8s.io/cluster-api/util/secret" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -213,14 +215,23 @@ func (c *ControlPlaneController) Reconcile(ctx context.Context, req ctrl.Request return ctrl.Result{}, fmt.Errorf("control plane endpoint is not set") } - if strings.HasSuffix(config.Name, "-0") { + machines, err := collections.GetFilteredMachinesForCluster(ctx, c.Client, cluster, collections.ControlPlaneMachines(cluster.Name), collections.ActiveMachines) + if err != nil { + return ctrl.Result{}, fmt.Errorf("error collecting machines: %w", err) + } + + if machines.Oldest().Name == config.Name { files, err = c.genInitialControlPlaneFiles(ctx, scope, files) if err != nil { return ctrl.Result{}, fmt.Errorf("error generating initial control plane files: %v", err) } installCmd = createCPInstallCmd(scope) } else { - files, err = c.genControlPlaneJoinFiles(ctx, scope, files) + oldest := getFirstRunningMachineWithLatestVersion(machines) + if oldest == nil { + return ctrl.Result{}, fmt.Errorf("wait for initial control plane provisioning") + } + files, err = c.genControlPlaneJoinFiles(ctx, scope, files, oldest) if err != nil { return ctrl.Result{}, fmt.Errorf("error generating control plane join files: %v", err) } @@ -323,7 +334,7 @@ func (c *ControlPlaneController) genInitialControlPlaneFiles(ctx context.Context return files, nil } -func (c *ControlPlaneController) genControlPlaneJoinFiles(ctx context.Context, scope *ControllerScope, files []cloudinit.File) ([]cloudinit.File, error) { +func (c *ControlPlaneController) genControlPlaneJoinFiles(ctx context.Context, scope *ControllerScope, files []cloudinit.File, firstControllerMachine *clusterv1.Machine) ([]cloudinit.File, error) { log := log.FromContext(ctx).WithValues("K0sControllerConfig cluster", scope.Cluster.Name) _, ca, err := c.getCerts(ctx, scope) @@ -350,7 +361,7 @@ func (c *ControlPlaneController) genControlPlaneJoinFiles(ctx context.Context, s return nil, err } - host, err := c.findFirstControllerIP(ctx, scope.Config) + host, err := c.findFirstControllerIP(ctx, firstControllerMachine) if err != nil { log.Error(err, "Failed to get controller IP") return nil, err @@ -576,22 +587,9 @@ func createCPInstallCmdWithJoinToken(scope *ControllerScope, tokenPath string) s return strings.Join(installCmd, " ") } -func (c *ControlPlaneController) findFirstControllerIP(ctx context.Context, config *bootstrapv1.K0sControllerConfig) (string, error) { - // Dirty first controller name generation - nameParts := strings.Split(config.Name, "-") - nameParts[len(nameParts)-1] = "0" - name := strings.Join(nameParts, "-") - machine, machineImpl, err := c.getMachineImplementation(ctx, name, config) - if err != nil { - return "", fmt.Errorf("error getting machine implementation: %w", err) - } - addresses, found, err := unstructured.NestedSlice(machineImpl.UnstructuredContent(), "status", "addresses") - if err != nil { - return "", err - } - +func (c *ControlPlaneController) findFirstControllerIP(ctx context.Context, firstControllerMachine *clusterv1.Machine) (string, error) { extAddr, intAddr := "", "" - for _, addr := range machine.Status.Addresses { + for _, addr := range firstControllerMachine.Status.Addresses { if addr.Type == clusterv1.MachineExternalIP { extAddr = addr.Address break @@ -602,16 +600,29 @@ func (c *ControlPlaneController) findFirstControllerIP(ctx context.Context, conf } } - if found { - for _, addr := range addresses { - addrMap, _ := addr.(map[string]interface{}) - if addrMap["type"] == string(v1.NodeExternalIP) { - extAddr = addrMap["address"].(string) - break - } - if addrMap["type"] == string(v1.NodeInternalIP) { - intAddr = addrMap["address"].(string) - break + name := firstControllerMachine.Name + + if extAddr == "" && intAddr == "" { + machineImpl, err := c.getMachineImplementation(ctx, firstControllerMachine) + if err != nil { + return "", fmt.Errorf("error getting machine implementation: %w", err) + } + addresses, found, err := unstructured.NestedSlice(machineImpl.UnstructuredContent(), "status", "addresses") + if err != nil { + return "", err + } + + if found { + for _, addr := range addresses { + addrMap, _ := addr.(map[string]interface{}) + if addrMap["type"] == string(v1.NodeExternalIP) { + extAddr = addrMap["address"].(string) + break + } + if addrMap["type"] == string(v1.NodeInternalIP) { + intAddr = addrMap["address"].(string) + break + } } } } @@ -627,13 +638,7 @@ func (c *ControlPlaneController) findFirstControllerIP(ctx context.Context, conf return "", fmt.Errorf("no address found for machine %s", name) } -func (c *ControlPlaneController) getMachineImplementation(ctx context.Context, name string, config *bootstrapv1.K0sControllerConfig) (*clusterv1.Machine, *unstructured.Unstructured, error) { - var machine clusterv1.Machine - err := c.Get(ctx, client.ObjectKey{Name: name, Namespace: config.Namespace}, &machine) - if err != nil { - return nil, nil, fmt.Errorf("error getting machine object: %w", err) - } - +func (c *ControlPlaneController) getMachineImplementation(ctx context.Context, machine *clusterv1.Machine) (*unstructured.Unstructured, error) { infRef := machine.Spec.InfrastructureRef machineImpl := new(unstructured.Unstructured) @@ -643,11 +648,11 @@ func (c *ControlPlaneController) getMachineImplementation(ctx context.Context, n key := client.ObjectKey{Name: infRef.Name, Namespace: infRef.Namespace} - err = c.Get(ctx, key, machineImpl) + err := c.Get(ctx, key, machineImpl) if err != nil { - return nil, nil, fmt.Errorf("error getting machine implementation object: %w", err) + return nil, fmt.Errorf("error getting machine implementation object: %w", err) } - return &machine, machineImpl, nil + return machineImpl, nil } func genShutdownServiceFiles() []cloudinit.File { @@ -700,3 +705,31 @@ command="/etc/bin/k0sleave.sh" }, } } + +func getFirstRunningMachineWithLatestVersion(machines collections.Machines) *clusterv1.Machine { + res := make(machinesByVersionAndCreationTimestamp, 0, len(machines)) + for _, value := range machines { + if value.Status.Phase == string(clusterv1.MachinePhasePending) { + continue + } + res = append(res, value) + } + if len(res) == 0 { + return nil + } + sort.Sort(res) + return res[0] +} + +// machinesByCreationTimestamp sorts a list of Machine by creation timestamp, using their names as a tie breaker. +type machinesByVersionAndCreationTimestamp []*clusterv1.Machine + +func (o machinesByVersionAndCreationTimestamp) Len() int { return len(o) } +func (o machinesByVersionAndCreationTimestamp) Swap(i, j int) { o[i], o[j] = o[j], o[i] } +func (o machinesByVersionAndCreationTimestamp) Less(i, j int) bool { + + if o[i].CreationTimestamp.Equal(&o[j].CreationTimestamp) { + return o[i].Name < o[j].Name + } + return *o[i].Spec.Version < *o[j].Spec.Version && o[i].CreationTimestamp.Before(&o[j].CreationTimestamp) +} diff --git a/internal/controller/controlplane/k0s_controlplane_controller.go b/internal/controller/controlplane/k0s_controlplane_controller.go index 1dedceb8..6d8dd1a1 100644 --- a/internal/controller/controlplane/k0s_controlplane_controller.go +++ b/internal/controller/controlplane/k0s_controlplane_controller.go @@ -21,14 +21,18 @@ import ( "errors" "fmt" "strings" + "time" + "github.com/Masterminds/semver" "github.com/google/uuid" + autopilot "github.com/k0sproject/k0s/pkg/apis/autopilot/v1beta2" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/runtime" "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/apiserver/pkg/storage/names" "k8s.io/client-go/kubernetes" "k8s.io/client-go/rest" "k8s.io/utils/ptr" @@ -36,6 +40,7 @@ import ( kubeadmbootstrapv1 "sigs.k8s.io/cluster-api/bootstrap/kubeadm/api/v1beta1" capiutil "sigs.k8s.io/cluster-api/util" "sigs.k8s.io/cluster-api/util/annotations" + "sigs.k8s.io/cluster-api/util/collections" "sigs.k8s.io/cluster-api/util/kubeconfig" "sigs.k8s.io/cluster-api/util/secret" ctrl "sigs.k8s.io/controller-runtime" @@ -211,54 +216,32 @@ func (c *K0sController) reconcile(ctx context.Context, cluster *clusterv1.Cluste func (c *K0sController) reconcileMachines(ctx context.Context, cluster *clusterv1.Cluster, kcp *cpv1beta1.K0sControlPlane) (int32, error) { replicasToReport := kcp.Spec.Replicas - // TODO: Scale down machines if needed - if kcp.Status.Replicas > kcp.Spec.Replicas { - kubeClient, err := c.getKubeClient(ctx, cluster) - if err != nil { - return replicasToReport, fmt.Errorf("error getting cluster client set for deletion: %w", err) - } - // Remove the last machine and report the new number of replicas to status - // On the next reconcile, the next machine will be removed - if kcp.Status.Replicas > kcp.Spec.Replicas { - // Wait for the previous machine to be deleted to avoid etcd issues - previousMachineName := machineName(kcp.Name, int(kcp.Status.Replicas)) - exist, err := c.machineExist(ctx, previousMachineName, kcp) - if err != nil { - return kcp.Status.Replicas, fmt.Errorf("error checking machine existance: %w", err) - } - if exist { - return kcp.Status.Replicas, fmt.Errorf("waiting for previous machine to be deleted") - } - - replicasToReport = kcp.Status.Replicas - 1 - name := machineName(kcp.Name, int(kcp.Status.Replicas-1)) - - if err := c.markChildControlNodeToLeave(ctx, name, kubeClient); err != nil { - return replicasToReport, fmt.Errorf("error marking controlnode to leave: %w", err) - } - - if err := c.deleteBootstrapConfig(ctx, name, kcp); err != nil { - return replicasToReport, fmt.Errorf("error deleting machine from template: %w", err) - } - - if err := c.deleteMachineFromTemplate(ctx, name, cluster, kcp); err != nil { - return replicasToReport, fmt.Errorf("error deleting machine from template: %w", err) - } - - if err := c.deleteMachine(ctx, name, kcp); err != nil { - return replicasToReport, fmt.Errorf("error deleting machine from template: %w", err) - } + machines, err := collections.GetFilteredMachinesForCluster(ctx, c, cluster, collections.ControlPlaneMachines(cluster.Name), collections.ActiveMachines) + if err != nil { + return replicasToReport, fmt.Errorf("error collecting machines: %w", err) + } - return replicasToReport, nil - } + currentReplicas := machines.Len() + desiredReplicas := kcp.Spec.Replicas + machinesToDelete := 0 + if currentReplicas > int(desiredReplicas) { + machinesToDelete = currentReplicas - int(desiredReplicas) + replicasToReport = kcp.Status.Replicas } + fmt.Println("asdfsadfsdafsdafa111") + if kcp.Status.Version != "" && kcp.Spec.Version != kcp.Status.Version { - kubeClient, err := c.getKubeClient(ctx, cluster) - if err != nil { - return replicasToReport, fmt.Errorf("error getting cluster client set for machine update: %w", err) - } + if kcp.Spec.UpdateStrategy == "rollout" { + desiredReplicas += kcp.Spec.Replicas + machinesToDelete = int(kcp.Spec.Replicas) + replicasToReport = desiredReplicas + } else { + kubeClient, err := c.getKubeClient(ctx, cluster) + if err != nil { + return replicasToReport, fmt.Errorf("error getting cluster client set for machine update: %w", err) + } err = c.createAutopilotPlan(ctx, kcp, cluster, kubeClient) if err != nil { @@ -266,8 +249,10 @@ func (c *K0sController) reconcileMachines(ctx context.Context, cluster *clusterv } } - for i := 0; i < int(kcp.Spec.Replicas); i++ { - name := machineName(kcp.Name, i) + for i := currentReplicas; i < int(desiredReplicas); i++ { + name := names.SimpleNameGenerator.GenerateName(fmt.Sprintf("%s-%d", kcp.Name, i)) + //for i := 0; i < int(kcp.Spec.Replicas); i++ { + // name := machineName(kcp.Name, i) machineFromTemplate, err := c.createMachineFromTemplate(ctx, name, cluster, kcp) if err != nil { @@ -285,6 +270,7 @@ func (c *K0sController) reconcileMachines(ctx context.Context, cluster *clusterv if err != nil { return replicasToReport, fmt.Errorf("error creating machine: %w", err) } + machines[machine.Name] = machine err = c.createBootstrapConfig(ctx, name, cluster, kcp, machine) if err != nil { @@ -292,6 +278,91 @@ func (c *K0sController) reconcileMachines(ctx context.Context, cluster *clusterv } } + fmt.Println("asdfsadfsdafsdafa") + fmt.Println("machines", machines) + fmt.Println("machinesToDelete", machinesToDelete) + + //var isNewMachineReady bool + for _, m := range machines { + ver := semver.MustParse(kcp.Spec.Version) + fmt.Println("machines ver", machinesToDelete, *m.Spec.Version, fmt.Sprintf("v%d.%d.%d", ver.Major(), ver.Minor(), ver.Patch()), m.Spec.Version != nil && *m.Spec.Version != fmt.Sprintf("v%d.%d.%d", ver.Major(), ver.Minor(), ver.Patch())) + if m.Spec.Version != nil && *m.Spec.Version != fmt.Sprintf("v%d.%d.%d", ver.Major(), ver.Minor(), ver.Patch()) { + continue + } + + if machinesToDelete > 0 { + + kubeClient, err := c.getKubeClient(ctx, cluster) + if err != nil { + return replicasToReport, fmt.Errorf("error getting cluster client set for machine update: %w", err) + } + var cn autopilot.ControlNode + err = kubeClient.RESTClient().Get().AbsPath("/apis/autopilot.k0sproject.io/v1beta2/controlnodes").Name(m.Name).Do(ctx).Into(&cn) + fmt.Println("machines !!!", cn.Name, cn.Status) + if err != nil { + if apierrors.IsNotFound(err) { + return int32(machines.Len()), fmt.Errorf("waiting for new machines") + } + return replicasToReport, fmt.Errorf("error getting controlnode: %w", err) + } + } + } + + //if machinesToDelete > 0 && !isNewMachineReady { + // return replicasToReport, fmt.Errorf("waiting for new machines") + //} + + //if kcp.Status.Version != "" && kcp.Spec.Version != kcp.Status.Version { + // kubeClient, err := c.getKubeClient(ctx, cluster) + // if err != nil { + // return replicasToReport, fmt.Errorf("error getting cluster client set for machine update: %w", err) + // } + // + // err = c.createAutopilotPlan(ctx, kcp, kubeClient) + // if err != nil { + // return replicasToReport, fmt.Errorf("error creating autopilot plan: %w", err) + // } + //} + + // TODO: Scale down machines if needed + //if kcp.Status.Replicas > kcp.Spec.Replicas { + if machinesToDelete > 0 { + kubeClient, err := c.getKubeClient(ctx, cluster) + if err != nil { + return replicasToReport, fmt.Errorf("error getting cluster client set for deletion: %w", err) + } + + // Remove the last machine and report the new number of replicas to status + // On the next reconcile, the next machine will be removed + // Wait for the previous machine to be deleted to avoid etcd issues + machine := machines.Oldest() + if machine.Status.Phase == string(clusterv1.MachinePhaseDeleting) { + return kcp.Status.Replicas, fmt.Errorf("waiting for previous machine to be deleted") + } + + time.Sleep(time.Second * 10) + + replicasToReport -= 1 + name := machine.Name + if err := c.markChildControlNodeToLeave(ctx, name, kubeClient); err != nil { + return replicasToReport, fmt.Errorf("error marking controlnode to leave: %w", err) + } + + if err := c.deleteBootstrapConfig(ctx, name, kcp); err != nil { + return replicasToReport, fmt.Errorf("error deleting machine from template: %w", err) + } + + if err := c.deleteMachineFromTemplate(ctx, name, cluster, kcp); err != nil { + return replicasToReport, fmt.Errorf("error deleting machine from template: %w", err) + } + + if err := c.deleteMachine(ctx, name, kcp); err != nil { + return replicasToReport, fmt.Errorf("error deleting machine from template: %w", err) + } + + return replicasToReport, nil + } + return replicasToReport, nil } diff --git a/internal/controller/infrastructure/remote_machine_controller.go b/internal/controller/infrastructure/remote_machine_controller.go index 8d9ddc99..3904d495 100644 --- a/internal/controller/infrastructure/remote_machine_controller.go +++ b/internal/controller/infrastructure/remote_machine_controller.go @@ -351,7 +351,7 @@ func (r *RemoteMachineController) returnMachineToPool(ctx context.Context, rm *i } } log := log.FromContext(ctx).WithValues("remotemachine", rm.Name) - log.Error(fmt.Errorf("no pooled machine found for remote machine"), "pooled machine not found", "namespace", rm.Namespace, "name", rm.Name) + log.Error(fmt.Errorf("no pooled machine found for remote machine"), "failed to found pooled remote machine", "namespace", rm.Namespace, "name", rm.Name) return nil } diff --git a/inttest/Makefile b/inttest/Makefile index 668aa993..3c9d763b 100644 --- a/inttest/Makefile +++ b/inttest/Makefile @@ -48,3 +48,4 @@ check-capi-remote-machine-template: TIMEOUT=12m check-capi-remote-machine-template-update: TIMEOUT=10m check-capi-docker-machine-template-update: TIMEOUT=10m check-capi-remote-machine-job-provision: TIMEOUT=10m +check-capi-docker-machine-template-update-rollout: TIMEOUT=10m diff --git a/inttest/Makefile.variables b/inttest/Makefile.variables index 77293411..8b99c2ad 100644 --- a/inttest/Makefile.variables +++ b/inttest/Makefile.variables @@ -30,3 +30,4 @@ smoketests := \ check-capi-remote-machine-template \ check-capi-remote-machine-template-update \ check-capi-docker-machine-template-update \ + check-capi-docker-machine-template-update-rollout \ diff --git a/inttest/capi-docker-machine-template-update-rollout/capi_docker_machine_template_update_rollout_test.go b/inttest/capi-docker-machine-template-update-rollout/capi_docker_machine_template_update_rollout_test.go new file mode 100644 index 00000000..45a04650 --- /dev/null +++ b/inttest/capi-docker-machine-template-update-rollout/capi_docker_machine_template_update_rollout_test.go @@ -0,0 +1,315 @@ +/* +Copyright 2023. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +package capidockermachinetemplateupdaterollout + +import ( + "context" + "fmt" + "os" + "os/exec" + "strconv" + "strings" + "testing" + "time" + + k0stestutil "github.com/k0sproject/k0s/inttest/common" + "github.com/k0sproject/k0smotron/inttest/util" + + "github.com/stretchr/testify/suite" + corev1 "k8s.io/api/core/v1" + "k8s.io/apimachinery/pkg/util/json" + "k8s.io/apimachinery/pkg/util/wait" + "k8s.io/client-go/kubernetes" + "k8s.io/client-go/rest" + "k8s.io/client-go/tools/clientcmd" +) + +type CAPIDockerMachineTemplateUpdateRollout struct { + //type CAPIDockerMachineTemplateUpdateRollout struct { + suite.Suite + client *kubernetes.Clientset + restConfig *rest.Config + clusterYamlsPath string + clusterYamlsUpdatePath string + ctx context.Context +} + +func TestCAPIDockerMachineTemplateUpdateRollout(t *testing.T) { + s := CAPIDockerMachineTemplateUpdateRollout{} + suite.Run(t, &s) +} + +func (s *CAPIDockerMachineTemplateUpdateRollout) SetupSuite() { + kubeConfigPath := os.Getenv("KUBECONFIG") + s.Require().NotEmpty(kubeConfigPath, "KUBECONFIG env var must be set and point to kind cluster") + // Get kube client from kubeconfig + restCfg, err := clientcmd.BuildConfigFromFlags("", kubeConfigPath) + s.Require().NoError(err) + s.Require().NotNil(restCfg) + s.restConfig = restCfg + + // Get kube client from kubeconfig + kubeClient, err := kubernetes.NewForConfig(restCfg) + s.Require().NoError(err) + s.Require().NotNil(kubeClient) + s.client = kubeClient + + tmpDir := s.T().TempDir() + s.clusterYamlsPath = tmpDir + "/cluster.yaml" + s.Require().NoError(os.WriteFile(s.clusterYamlsPath, []byte(dockerClusterYaml), 0644)) + s.clusterYamlsUpdatePath = tmpDir + "/update.yaml" + s.Require().NoError(os.WriteFile(s.clusterYamlsUpdatePath, []byte(controlPlaneUpdate), 0644)) + + s.ctx, _ = util.NewSuiteContext(s.T()) +} + +func (s *CAPIDockerMachineTemplateUpdateRollout) TestCAPIControlPlaneDockerDownScaling() { + + // Apply the child cluster objects + s.applyClusterObjects() + defer func() { + keep := os.Getenv("KEEP_AFTER_TEST") + if keep == "true" { + return + } + if keep == "on-failure" && s.T().Failed() { + return + } + s.T().Log("Deleting cluster objects") + s.deleteCluster() + }() + s.T().Log("cluster objects applied, waiting for cluster to be ready") + + var localPort int + // nolint:staticcheck + err := wait.PollImmediateUntilWithContext(s.ctx, 1*time.Second, func(ctx context.Context) (bool, error) { + localPort, _ = getLBPort("docker-test-lb") + return localPort > 0, nil + }) + s.Require().NoError(err) + + s.T().Log("waiting to see admin kubeconfig secret") + kmcKC, err := util.GetKMCClientSet(s.ctx, s.client, "docker-test", "default", localPort) + s.Require().NoError(err) + + // nolint:staticcheck + err = wait.PollImmediateUntilWithContext(s.ctx, 1*time.Second, func(ctx context.Context) (bool, error) { + b, _ := s.client.RESTClient(). + Get(). + AbsPath("/healthz"). + DoRaw(context.Background()) + + return string(b) == "ok", nil + }) + s.Require().NoError(err) + + //for i := 0; i < 3; i++ { + // // nolint:staticcheck + // err = wait.PollImmediateUntilWithContext(s.ctx, 1*time.Second, func(ctx context.Context) (bool, error) { + // nodeName := fmt.Sprintf("docker-test-%d", i) + // output, err := exec.Command("docker", "exec", nodeName, "k0s", "status").Output() + // if err != nil { + // return false, nil + // } + // + // return strings.Contains(string(output), "Version:"), nil + // }) + // s.Require().NoError(err) + //} + time.Sleep(time.Minute * 3) + + s.T().Log("waiting for node to be ready") + s.Require().NoError(k0stestutil.WaitForNodeReadyStatus(s.ctx, kmcKC, "docker-test-worker-0", corev1.ConditionTrue)) + + s.updateClusterObjects() + // nolint:staticcheck + err = wait.PollImmediateUntilWithContext(s.ctx, 1*time.Second, func(ctx context.Context) (bool, error) { + output, err := exec.Command("docker", "exec", "docker-test-0", "k0s", "status").CombinedOutput() + if err != nil { + return false, nil + } + + return strings.Contains(string(output), "Version: v1.28"), nil + }) + s.Require().NoError(err) + + s.Require().NoError(k0stestutil.WaitForNodeReadyStatus(s.ctx, kmcKC, "docker-test-worker-0", corev1.ConditionTrue)) +} + +func (s *CAPIDockerMachineTemplateUpdateRollout) applyClusterObjects() { + // Exec via kubectl + out, err := exec.Command("kubectl", "apply", "-f", s.clusterYamlsPath).CombinedOutput() + s.Require().NoError(err, "failed to apply cluster objects: %s", string(out)) +} + +func (s *CAPIDockerMachineTemplateUpdateRollout) updateClusterObjects() { + // Exec via kubectl + out, err := exec.Command("kubectl", "apply", "-f", s.clusterYamlsUpdatePath).CombinedOutput() + s.Require().NoError(err, "failed to update cluster objects: %s", string(out)) +} + +func (s *CAPIDockerMachineTemplateUpdateRollout) deleteCluster() { + // Exec via kubectl + out, err := exec.Command("kubectl", "delete", "-f", s.clusterYamlsPath).CombinedOutput() + s.Require().NoError(err, "failed to delete cluster objects: %s", string(out)) +} + +func getLBPort(name string) (int, error) { + b, err := exec.Command("docker", "inspect", name, "--format", "{{json .NetworkSettings.Ports}}").Output() + if err != nil { + return 0, fmt.Errorf("failed to get inspect info from container %s: %w", name, err) + } + + var ports map[string][]map[string]string + err = json.Unmarshal(b, &ports) + if err != nil { + return 0, fmt.Errorf("failed to unmarshal inspect info from container %s: %w", name, err) + } + + return strconv.Atoi(ports["6443/tcp"][0]["HostPort"]) +} + +var dockerClusterYaml = ` +apiVersion: cluster.x-k8s.io/v1beta1 +kind: Cluster +metadata: + name: docker-test + namespace: default +spec: + clusterNetwork: + pods: + cidrBlocks: + - 192.168.0.0/16 + serviceDomain: cluster.local + services: + cidrBlocks: + - 10.128.0.0/12 + controlPlaneRef: + apiVersion: controlplane.cluster.x-k8s.io/v1beta1 + kind: K0sControlPlane + name: docker-test + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: DockerCluster + name: docker-test +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: DockerMachineTemplate +metadata: + name: docker-test-cp-template + namespace: default +spec: + template: + spec: {} +--- +apiVersion: controlplane.cluster.x-k8s.io/v1beta1 +kind: K0sControlPlane +metadata: + name: docker-test +spec: + replicas: 3 + version: v1.27.1+k0s.0 + updateStrategy: rollout + k0sConfigSpec: + k0s: + apiVersion: k0s.k0sproject.io/v1beta1 + kind: ClusterConfig + metadata: + name: k0s + spec: + api: + extraArgs: + anonymous-auth: "true" + telemetry: + enabled: false + machineTemplate: + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: DockerMachineTemplate + name: docker-test-cp-template + namespace: default +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: DockerCluster +metadata: + name: docker-test + namespace: default +spec: +--- +apiVersion: cluster.x-k8s.io/v1beta1 +kind: Machine +metadata: + name: docker-test-worker-0 + namespace: default +spec: + version: v1.27.1 + clusterName: docker-test + bootstrap: + configRef: + apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 + kind: K0sWorkerConfig + name: docker-test-worker-0 + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: DockerMachine + name: docker-test-worker-0 +--- +apiVersion: bootstrap.cluster.x-k8s.io/v1beta1 +kind: K0sWorkerConfig +metadata: + name: docker-test-worker-0 + namespace: default +spec: + # version is deliberately different to be able to verify we actually pick it up :) + version: v1.27.1+k0s.0 +--- +apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 +kind: DockerMachine +metadata: + name: docker-test-worker-0 + namespace: default +spec: +` + +var controlPlaneUpdate = ` +apiVersion: controlplane.cluster.x-k8s.io/v1beta1 +kind: K0sControlPlane +metadata: + name: docker-test +spec: + replicas: 3 + version: v1.28.7+k0s.0 + updateStrategy: rollout + k0sConfigSpec: + k0s: + apiVersion: k0s.k0sproject.io/v1beta1 + kind: ClusterConfig + metadata: + name: k0s + spec: + api: + extraArgs: + anonymous-auth: "true" + telemetry: + enabled: false + machineTemplate: + infrastructureRef: + apiVersion: infrastructure.cluster.x-k8s.io/v1beta1 + kind: DockerMachineTemplate + name: docker-test-cp-template + namespace: default +` diff --git a/inttest/capi-remote-machine-template-update/capi_remote_machine_template_update_test.go b/inttest/capi-remote-machine-template-update/capi_remote_machine_template_update_test.go index ce956695..58d8402d 100644 --- a/inttest/capi-remote-machine-template-update/capi_remote_machine_template_update_test.go +++ b/inttest/capi-remote-machine-template-update/capi_remote_machine_template_update_test.go @@ -184,7 +184,7 @@ func (s *RemoteMachineTemplateUpdateSuite) TestCAPIRemoteMachine() { s.Require().NoError(err) s.T().Log("waiting for node to be ready") - s.Require().NoError(common.WaitForNodeReadyStatus(ctx, kmcKC, "remote-test-0", corev1.ConditionTrue)) + s.Require().NoError(common.WaitForNodeReadyStatus(ctx, kmcKC, rmName, corev1.ConditionTrue)) s.T().Log("update cluster") s.updateCluster() @@ -200,7 +200,7 @@ func (s *RemoteMachineTemplateUpdateSuite) TestCAPIRemoteMachine() { s.Require().NoError(err) s.T().Log("waiting for node to be ready in updated cluster") - s.Require().NoError(common.WaitForNodeReadyStatus(ctx, kmcKC, "remote-test-0", corev1.ConditionTrue)) + s.Require().NoError(common.WaitForNodeReadyStatus(ctx, kmcKC, rmName, corev1.ConditionTrue)) } func (s *RemoteMachineTemplateUpdateSuite) findRemoteMachines(namespace string) ([]infra.RemoteMachine, error) { @@ -230,7 +230,7 @@ func (s *RemoteMachineTemplateUpdateSuite) getRemoteMachine(name string, namespa } func (s *RemoteMachineTemplateUpdateSuite) deleteCluster() { - out, err := exec.Command("kubectl", "delete", "-f", s.clusterYamlsPath).CombinedOutput() + out, err := exec.Command("kubectl", "delete", "cluster", "remote-test-cluster").CombinedOutput() s.Require().NoError(err, "failed to delete cluster objects: %s", string(out)) } @@ -269,8 +269,8 @@ func (s *RemoteMachineTemplateUpdateSuite) createCluster() { s.Require().NoError(os.WriteFile(s.clusterYamlsPath, bytes, 0644)) out, err := exec.Command("kubectl", "apply", "-f", s.clusterYamlsPath).CombinedOutput() + s.Require().NoError(err, "failed to apply cluster objects: %s", string(out)) s.Require().NoError(os.WriteFile(s.updatedClusterYamlsPath, []byte(updatedClusterYaml), 0644)) - s.Require().NoError(err, "failed to update cluster objects: %s", string(out)) } func getLBPort(name string) (int, error) { diff --git a/inttest/capi-remote-machine-template/capi_remote_machine_template_test.go b/inttest/capi-remote-machine-template/capi_remote_machine_template_test.go index f2377f6e..abe9de58 100644 --- a/inttest/capi-remote-machine-template/capi_remote_machine_template_test.go +++ b/inttest/capi-remote-machine-template/capi_remote_machine_template_test.go @@ -151,11 +151,27 @@ func (s *RemoteMachineTemplateSuite) TestCAPIRemoteMachine() { s.Require().NoError(err) // Verify the RemoteMachine is at expected state + var rmName string + // nolint:staticcheck + err = wait.PollImmediateUntilWithContext(ctx, 1*time.Second, func(ctx context.Context) (bool, error) { + rm, err := s.findRemoteMachines("default") + if err != nil { + return false, err + } + + if len(rm) == 0 { + return true, nil + } + + rmName = rm[0].GetName() + return true, nil + }) + s.Require().NoError(err) expectedProviderID := fmt.Sprintf("remote-machine://%s:22", s.getWorkerIP()) // nolint:staticcheck err = wait.PollImmediateUntilWithContext(ctx, 1*time.Second, func(ctx context.Context) (bool, error) { - rm, err := s.getRemoteMachine("remote-test-0", "default") + rm, err := s.getRemoteMachine(rmName, "default") if err != nil { return false, err } @@ -165,7 +181,7 @@ func (s *RemoteMachineTemplateSuite) TestCAPIRemoteMachine() { s.Require().NoError(err) s.T().Log("waiting for node to be ready") - s.Require().NoError(common.WaitForNodeReadyStatus(ctx, kmcKC, "remote-test-0", corev1.ConditionTrue)) + s.Require().NoError(common.WaitForNodeReadyStatus(ctx, kmcKC, rmName, corev1.ConditionTrue)) err = wait.PollUntilContextCancel(ctx, time.Second, true, func(ctx context.Context) (done bool, err error) { node, err := kmcKC.CoreV1().Nodes().Get(ctx, "remote-test-0", metav1.GetOptions{}) @@ -178,7 +194,7 @@ func (s *RemoteMachineTemplateSuite) TestCAPIRemoteMachine() { s.Require().NoError(err) s.T().Log("deleting node from cluster") - s.Require().NoError(s.deleteRemoteMachine("remote-test-0", "default")) + s.Require().NoError(s.deleteRemoteMachine(rmName, "default")) nodes, err := kmcKC.CoreV1().Nodes().List(ctx, metav1.ListOptions{}) s.Require().NoError(err) @@ -186,6 +202,19 @@ func (s *RemoteMachineTemplateSuite) TestCAPIRemoteMachine() { } +func (s *RemoteMachineTemplateSuite) findRemoteMachines(namespace string) ([]infra.RemoteMachine, error) { + apiPath := fmt.Sprintf("/apis/infrastructure.cluster.x-k8s.io/v1beta1/namespaces/%s/remotemachines", namespace) + result, err := s.client.RESTClient().Get().AbsPath(apiPath).DoRaw(s.Context()) + if err != nil { + return nil, err + } + rm := &infra.RemoteMachineList{} + if err := yaml.Unmarshal(result, rm); err != nil { + return nil, err + } + return rm.Items, nil +} + func (s *RemoteMachineTemplateSuite) getRemoteMachine(name string, namespace string) (*infra.RemoteMachine, error) { apiPath := fmt.Sprintf("/apis/infrastructure.cluster.x-k8s.io/v1beta1/namespaces/%s/remotemachines/%s", namespace, name) result, err := s.client.RESTClient().Get().AbsPath(apiPath).DoRaw(s.Context())