From 4ff1622f6cabaaa487f72e0368731e8cb11760ab Mon Sep 17 00:00:00 2001 From: pavel <36902936+pavel-github@users.noreply.github.com> Date: Fri, 23 Aug 2024 22:33:16 +0200 Subject: [PATCH] feat(instances): implement InstanceV2 interface for node metadata (#22) --- go.mod | 6 +- go.sum | 4 +- internal/xelon/cloud.go | 5 +- internal/xelon/instances_v2.go | 225 ++++++++++++++++++++++++++++ internal/xelon/instances_v2_test.go | 65 ++++++++ 5 files changed, 299 insertions(+), 6 deletions(-) create mode 100644 internal/xelon/instances_v2.go create mode 100644 internal/xelon/instances_v2_test.go diff --git a/go.mod b/go.mod index 0643068..b70f8e4 100644 --- a/go.mod +++ b/go.mod @@ -1,9 +1,11 @@ module github.com/Xelon-AG/xelon-cloud-controller-manager -go 1.22 +go 1.23 + +toolchain go1.23.0 require ( - github.com/Xelon-AG/xelon-sdk-go v0.15.0 + github.com/Xelon-AG/xelon-sdk-go v0.15.1 github.com/go-logr/logr v1.4.1 github.com/stretchr/testify v1.9.0 k8s.io/api v0.28.13 diff --git a/go.sum b/go.sum index 9dbff02..4f542e6 100644 --- a/go.sum +++ b/go.sum @@ -43,8 +43,8 @@ github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym github.com/NYTimes/gziphandler v1.1.1 h1:ZUDjpQae29j0ryrS0u/B8HZfJBtBQHjqw2rQ2cqUQ3I= github.com/NYTimes/gziphandler v1.1.1/go.mod h1:n/CVRwUEOgIxrgPvAQhUUr9oeUtvrhMomdKFjzJNB0c= github.com/OneOfOne/xxhash v1.2.2/go.mod h1:HSdplMjZKSmBqAxg5vPj2TmRDmfkzw+cTzAElWljhcU= -github.com/Xelon-AG/xelon-sdk-go v0.15.0 h1:7TzyLTCWFs9m86kQqrluazo4Kdu1UBSuEfy93SiOZAo= -github.com/Xelon-AG/xelon-sdk-go v0.15.0/go.mod h1:hc1DgVNBbWJPn0GK+HIiNv4npvPsFLY5wMiD//f6/OE= +github.com/Xelon-AG/xelon-sdk-go v0.15.1 h1:G8u8wleUvZiKw/6PWCfU7WzMqKi1lins92tX1R61tGI= +github.com/Xelon-AG/xelon-sdk-go v0.15.1/go.mod h1:Z8t2YsyXFopxTf6OTUWxIzYrdoPxb8XtzX6Ll50z4Kw= github.com/antihax/optional v1.0.0/go.mod h1:uupD/76wgC+ih3iEmQUL+0Ugr19nfwCT1kdvxnR2qWY= github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.0.0-20230305170008-8188dc5388df h1:7RFfzj4SSt6nnvCPbCqijJi1nWCd+TqAT3bYCStRC18= github.com/antlr/antlr4/runtime/Go/antlr/v4 v4.0.0-20230305170008-8188dc5388df/go.mod h1:pSwJ0fSY5KhvocuWSx4fz3BA8OrA1bQn+K1Eli3BRwM= diff --git a/internal/xelon/cloud.go b/internal/xelon/cloud.go index 4622dbd..6a88fd7 100644 --- a/internal/xelon/cloud.go +++ b/internal/xelon/cloud.go @@ -30,6 +30,7 @@ type clients struct { type cloud struct { clients *clients + instances cloudprovider.InstancesV2 loadBalancers cloudprovider.LoadBalancer } @@ -79,6 +80,7 @@ func newCloud() (cloudprovider.Interface, error) { return &cloud{ clients: clients, + instances: newInstances(clients, clusterID), loadBalancers: newLoadBalancers(clients, tenant.TenantID, cloudID, clusterID), }, nil } @@ -97,7 +99,7 @@ func (c *cloud) Instances() (cloudprovider.Instances, bool) { } func (c *cloud) InstancesV2() (cloudprovider.InstancesV2, bool) { - return nil, false + return c.instances, true } func (c *cloud) Zones() (cloudprovider.Zones, bool) { @@ -113,7 +115,6 @@ func (c *cloud) Routes() (cloudprovider.Routes, bool) { } func (c *cloud) ProviderName() string { - klog.V(5).Info("called ProviderName") return ProviderName } diff --git a/internal/xelon/instances_v2.go b/internal/xelon/instances_v2.go new file mode 100644 index 0000000..f661d10 --- /dev/null +++ b/internal/xelon/instances_v2.go @@ -0,0 +1,225 @@ +package xelon + +import ( + "context" + "errors" + "fmt" + "slices" + "strings" + "sync" + "time" + + v1 "k8s.io/api/core/v1" + cloudprovider "k8s.io/cloud-provider" + "k8s.io/klog/v2" + + "github.com/Xelon-AG/xelon-sdk-go/xelon" +) + +const providerIDPrefix = ProviderName + "://" + +var _ cloudprovider.InstancesV2 = (*instances)(nil) + +type xelonNode struct { + localVMID string + name string + nodeType string +} + +type instances struct { + client *clients + clusterID string + + nodes []xelonNode + lastUpdate time.Time + ttl time.Duration + + sync.RWMutex +} + +func newInstances(clients *clients, clusterID string) cloudprovider.InstancesV2 { + return &instances{ + client: clients, + clusterID: clusterID, + + nodes: make([]xelonNode, 0), + ttl: 15 * time.Second, + } +} + +func (i *instances) InstanceExists(ctx context.Context, node *v1.Node) (bool, error) { + _, err := i.lookupXelonNode(ctx, node) + if err != nil { + if errors.Is(err, cloudprovider.InstanceNotFound) { + return false, nil + } + return false, err + } + return true, nil +} + +func (i *instances) InstanceShutdown(ctx context.Context, node *v1.Node) (bool, error) { + _, err := i.lookupXelonNode(ctx, node) + if err != nil { + if errors.Is(err, cloudprovider.InstanceNotFound) { + return false, nil + } + return false, err + } + return false, nil +} + +func (i *instances) InstanceMetadata(ctx context.Context, node *v1.Node) (*cloudprovider.InstanceMetadata, error) { + meta := &cloudprovider.InstanceMetadata{} + if node == nil { + return meta, nil + } + + xn, err := i.lookupXelonNode(ctx, node) + if err != nil { + return meta, err + } + + meta.ProviderID = fmt.Sprintf("%s%s", providerIDPrefix, xn.localVMID) + meta.NodeAddresses = node.Status.Addresses + meta.InstanceType = xn.nodeType + + klog.V(5).InfoS("Setting instance metadata for node", "node", node.Name, "metadata", meta) + + return meta, nil +} + +func (i *instances) lookupXelonNode(ctx context.Context, node *v1.Node) (*xelonNode, error) { + err := i.refreshNodes(ctx) + if err != nil { + return nil, err + } + + providerID := node.Spec.ProviderID + if providerID != "" && isXelonProviderID(providerID) { + klog.V(5).InfoS("Use providerID to get Xelon node", "provider_id", providerID) + + localVMID, err := parseProviderID(providerID) + if err != nil { + return nil, err + } + xn, err := i.getXelonNodeByLocalVMID(localVMID) + if err != nil { + return nil, err + } + return xn, nil + } else { + klog.V(5).InfoS("Use name to get Xelon node", "name", node.Name) + + xn, err := i.getXelonNodeByName(node.Name) + if err != nil { + return nil, err + } + return xn, nil + } +} + +// refreshNodes conditionally loads all control plane nodes, cluster pool nodes from Xelon API +// and caches them. It does not refresh if the last update happened less than 'nodeCache.ttl' ago. +func (i *instances) refreshNodes(ctx context.Context) error { + i.Lock() + defer i.Unlock() + + sinceLastUpdate := time.Since(i.lastUpdate) + if sinceLastUpdate < i.ttl { + klog.V(2).InfoS("Skip refreshing nodes", "since_last_update", sinceLastUpdate, "ttl", i.ttl) + return nil + } + + klog.V(5).InfoS("Getting control planes from Xelon API", "cluster_id", i.clusterID) + controlPlane, _, err := i.client.xelon.Kubernetes.ListControlPlanes(ctx, i.clusterID) + if err != nil { + return err + } + klog.V(5).InfoS("Got control planes from Xelon API", "data", controlPlane) + var controlPlaneNodes []xelonNode + for _, controlPlaneNode := range controlPlane.Nodes { + controlPlaneNodes = append(controlPlaneNodes, xelonNode{ + localVMID: controlPlaneNode.LocalVMID, + name: controlPlaneNode.Name, + nodeType: getNodeTypeFromControlPlaneNode(controlPlane), + }) + } + + klog.V(5).InfoS("Getting cluster pools from Xelon API", "cluster_id", i.clusterID) + clusterPools, _, err := i.client.xelon.Kubernetes.ListClusterPools(ctx, i.clusterID) + if err != nil { + return err + } + klog.V(5).InfoS("Got cluster pools from Xelon API", "data", clusterPools) + var clusterPoolNodes []xelonNode + for _, clusterPool := range clusterPools { + for _, clusterPoolNode := range clusterPool.Nodes { + clusterPoolNodes = append(clusterPoolNodes, xelonNode{ + localVMID: clusterPoolNode.LocalVMID, + name: clusterPoolNode.Name, + nodeType: getNodeTypeFromClusterPool(&clusterPool), + }) + } + } + + i.nodes = slices.Concat(controlPlaneNodes, clusterPoolNodes) + i.lastUpdate = time.Now() + + return nil +} + +func (i *instances) getXelonNodeByLocalVMID(localVMID string) (*xelonNode, error) { + for _, node := range i.nodes { + if node.localVMID == localVMID { + return &node, nil + } + } + + return nil, cloudprovider.InstanceNotFound +} + +func (i *instances) getXelonNodeByName(name string) (*xelonNode, error) { + for _, node := range i.nodes { + if node.name == name { + return &node, nil + } + } + + return nil, cloudprovider.InstanceNotFound +} + +// getNodeTypeFromControlPlaneNode formats a node type from control plane parameters +// in the following form --: +// - cpu_info: shows CPU core count (e.g. c2c - 2 cores) +// - memory_info: shows RAM in gigabytes (e.g. m4g - 4 GB) +// - disk_info: shows disk size in gigabytes (e.g. d50g - 50 GB) +func getNodeTypeFromControlPlaneNode(controlPlane *xelon.ClusterControlPlane) string { + if controlPlane == nil { + return "" + } + return fmt.Sprintf("c%dc-m%dg-d%dg", controlPlane.CPUCoreCount, controlPlane.Memory, controlPlane.DiskSize) +} + +// getNodeTypeFromClusterPool formats a node type from cluster pool parameters +// in the following form --: +// - cpu_info: shows CPU core count (e.g. c2c - 2 cores) +// - memory_info: shows RAM in gigabytes (e.g. m4g - 4 GB) +// - disk_info: shows disk size in gigabytes (e.g. d50g - 50 GB) +func getNodeTypeFromClusterPool(clusterPool *xelon.ClusterPool) string { + if clusterPool == nil { + return "" + } + return fmt.Sprintf("c%dc-m%dg-d%dg", clusterPool.CPUCoreCount, clusterPool.Memory, clusterPool.DiskSize) +} + +func parseProviderID(providerID string) (string, error) { + if !isXelonProviderID(providerID) { + return "", fmt.Errorf("invalid provider ID: %s", providerID) + } + return strings.TrimPrefix(providerID, providerIDPrefix), nil +} + +func isXelonProviderID(providerID string) bool { + return strings.HasPrefix(providerID, providerIDPrefix) +} diff --git a/internal/xelon/instances_v2_test.go b/internal/xelon/instances_v2_test.go new file mode 100644 index 0000000..a4ce46e --- /dev/null +++ b/internal/xelon/instances_v2_test.go @@ -0,0 +1,65 @@ +package xelon + +import ( + "testing" + + "github.com/stretchr/testify/assert" + + "github.com/Xelon-AG/xelon-sdk-go/xelon" +) + +func TestInstances_getNodeTypeFromControlPlaneNode(t *testing.T) { + type testCase struct { + input *xelon.ClusterControlPlane + expected string + } + tests := map[string]testCase{ + "nil": { + input: nil, + expected: "", + }, + "valid values": { + input: &xelon.ClusterControlPlane{ + CPUCoreCount: 2, + DiskSize: 50, + Memory: 4, + }, + expected: "c2c-m4g-d50g", + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + actual := getNodeTypeFromControlPlaneNode(test.input) + assert.Equal(t, test.expected, actual) + }) + } +} + +func TestInstances_getNodeTypeFromClusterPool(t *testing.T) { + type testCase struct { + input *xelon.ClusterPool + expected string + } + tests := map[string]testCase{ + "nil": { + input: nil, + expected: "", + }, + "valid values": { + input: &xelon.ClusterPool{ + CPUCoreCount: 2, + DiskSize: 50, + Memory: 4, + }, + expected: "c2c-m4g-d50g", + }, + } + + for name, test := range tests { + t.Run(name, func(t *testing.T) { + actual := getNodeTypeFromClusterPool(test.input) + assert.Equal(t, test.expected, actual) + }) + } +}