From 32c9ad3e79af2b86d49734c18d8cce307c960a5f Mon Sep 17 00:00:00 2001 From: Hari Radhakrishnan <94331832+hari-rad@users.noreply.github.com> Date: Tue, 31 Dec 2024 22:26:03 +0530 Subject: [PATCH] Add SQS InterruptionEvent.InstanceType (#1104) * Add SQS InterruptionEvent.InstanceType * Fix indentation * Merge InstanceType fields in webhook.combinedDrainData * Fix formatting --- pkg/monitor/sqsevent/asg-lifecycle-event.go | 1 + .../sqsevent/ec2-state-change-event.go | 1 + .../rebalance-recommendation-event.go | 1 + .../sqsevent/scheduled-change-event.go | 1 + pkg/monitor/sqsevent/spot-itn-event.go | 1 + pkg/monitor/sqsevent/sqs-monitor.go | 24 ++++++++++--------- .../sqsevent/sqs-monitor_internal_test.go | 1 + pkg/monitor/sqsevent/sqs-monitor_test.go | 1 + pkg/monitor/types.go | 1 + pkg/webhook/webhook.go | 12 +++++++--- 10 files changed, 30 insertions(+), 14 deletions(-) diff --git a/pkg/monitor/sqsevent/asg-lifecycle-event.go b/pkg/monitor/sqsevent/asg-lifecycle-event.go index dede29cc..fc034931 100644 --- a/pkg/monitor/sqsevent/asg-lifecycle-event.go +++ b/pkg/monitor/sqsevent/asg-lifecycle-event.go @@ -91,6 +91,7 @@ func (m SQSMonitor) asgTerminationToInterruptionEvent(event *EventBridgeEvent, m IsManaged: nodeInfo.IsManaged, InstanceID: lifecycleDetail.EC2InstanceID, ProviderID: nodeInfo.ProviderID, + InstanceType: nodeInfo.InstanceType, Description: fmt.Sprintf("ASG Lifecycle Termination event received. Instance will be interrupted at %s \n", event.getTime()), } diff --git a/pkg/monitor/sqsevent/ec2-state-change-event.go b/pkg/monitor/sqsevent/ec2-state-change-event.go index ba4f08c5..9ea065b6 100644 --- a/pkg/monitor/sqsevent/ec2-state-change-event.go +++ b/pkg/monitor/sqsevent/ec2-state-change-event.go @@ -75,6 +75,7 @@ func (m SQSMonitor) ec2StateChangeToInterruptionEvent(event *EventBridgeEvent, m AutoScalingGroupName: nodeInfo.AsgName, InstanceID: ec2StateChangeDetail.InstanceID, ProviderID: nodeInfo.ProviderID, + InstanceType: nodeInfo.InstanceType, Description: fmt.Sprintf("EC2 State Change event received. Instance %s went into %s at %s \n", ec2StateChangeDetail.InstanceID, ec2StateChangeDetail.State, event.getTime()), } diff --git a/pkg/monitor/sqsevent/rebalance-recommendation-event.go b/pkg/monitor/sqsevent/rebalance-recommendation-event.go index 9b2b9f42..0ecc30fb 100644 --- a/pkg/monitor/sqsevent/rebalance-recommendation-event.go +++ b/pkg/monitor/sqsevent/rebalance-recommendation-event.go @@ -67,6 +67,7 @@ func (m SQSMonitor) rebalanceRecommendationToInterruptionEvent(event *EventBridg IsManaged: nodeInfo.IsManaged, InstanceID: nodeInfo.InstanceID, ProviderID: nodeInfo.ProviderID, + InstanceType: nodeInfo.InstanceType, Description: fmt.Sprintf("Rebalance recommendation event received. Instance %s will be cordoned at %s \n", rebalanceRecDetail.InstanceID, event.getTime()), } interruptionEvent.PostDrainTask = func(interruptionEvent monitor.InterruptionEvent, n node.Node) error { diff --git a/pkg/monitor/sqsevent/scheduled-change-event.go b/pkg/monitor/sqsevent/scheduled-change-event.go index e66f0453..1fe68cda 100644 --- a/pkg/monitor/sqsevent/scheduled-change-event.go +++ b/pkg/monitor/sqsevent/scheduled-change-event.go @@ -102,6 +102,7 @@ func (m SQSMonitor) scheduledEventToInterruptionEvents(event *EventBridgeEvent, NodeName: nodeInfo.Name, InstanceID: nodeInfo.InstanceID, ProviderID: nodeInfo.ProviderID, + InstanceType: nodeInfo.InstanceType, IsManaged: nodeInfo.IsManaged, Description: fmt.Sprintf("AWS Health scheduled change event received. Instance %s will be interrupted at %s \n", nodeInfo.InstanceID, event.getTime()), } diff --git a/pkg/monitor/sqsevent/spot-itn-event.go b/pkg/monitor/sqsevent/spot-itn-event.go index d8acdeba..d0aa476e 100644 --- a/pkg/monitor/sqsevent/spot-itn-event.go +++ b/pkg/monitor/sqsevent/spot-itn-event.go @@ -69,6 +69,7 @@ func (m SQSMonitor) spotITNTerminationToInterruptionEvent(event *EventBridgeEven IsManaged: nodeInfo.IsManaged, InstanceID: spotInterruptionDetail.InstanceID, ProviderID: nodeInfo.ProviderID, + InstanceType: nodeInfo.InstanceType, Description: fmt.Sprintf("Spot Interruption notice for instance %s was sent at %s \n", spotInterruptionDetail.InstanceID, event.getTime()), } interruptionEvent.PostDrainTask = func(interruptionEvent monitor.InterruptionEvent, n node.Node) error { diff --git a/pkg/monitor/sqsevent/sqs-monitor.go b/pkg/monitor/sqsevent/sqs-monitor.go index e46fc95e..7ad0513d 100644 --- a/pkg/monitor/sqsevent/sqs-monitor.go +++ b/pkg/monitor/sqsevent/sqs-monitor.go @@ -340,12 +340,13 @@ func (m SQSMonitor) completeLifecycleAction(input *autoscaling.CompleteLifecycle // NodeInfo is relevant information about a single node type NodeInfo struct { - AsgName string - InstanceID string - ProviderID string - IsManaged bool - Name string - Tags map[string]string + AsgName string + InstanceID string + ProviderID string + InstanceType string + IsManaged bool + Name string + Tags map[string]string } // getNodeInfo returns the NodeInfo record for the given instanceID. @@ -411,11 +412,12 @@ func (m SQSMonitor) getNodeInfo(instanceID string) (*NodeInfo, error) { } nodeInfo := &NodeInfo{ - Name: *instance.PrivateDnsName, - InstanceID: instanceID, - ProviderID: providerID, - Tags: make(map[string]string), - IsManaged: true, + Name: *instance.PrivateDnsName, + InstanceID: instanceID, + ProviderID: providerID, + InstanceType: *instance.InstanceType, + Tags: make(map[string]string), + IsManaged: true, } for _, t := range (*instance).Tags { nodeInfo.Tags[*t.Key] = *t.Value diff --git a/pkg/monitor/sqsevent/sqs-monitor_internal_test.go b/pkg/monitor/sqsevent/sqs-monitor_internal_test.go index 128be769..c7caa10f 100644 --- a/pkg/monitor/sqsevent/sqs-monitor_internal_test.go +++ b/pkg/monitor/sqsevent/sqs-monitor_internal_test.go @@ -199,6 +199,7 @@ func getDescribeInstancesResp(instanceID string, privateDNSName string, tags map GroupName: aws.String(""), Tenancy: aws.String("default"), }, + InstanceType: aws.String("t3.medium"), PrivateDnsName: aws.String(privateDNSName), Tags: awsTags, }, diff --git a/pkg/monitor/sqsevent/sqs-monitor_test.go b/pkg/monitor/sqsevent/sqs-monitor_test.go index 8e827377..2b93085e 100644 --- a/pkg/monitor/sqsevent/sqs-monitor_test.go +++ b/pkg/monitor/sqsevent/sqs-monitor_test.go @@ -929,6 +929,7 @@ func getDescribeInstancesResp(privateDNSName string, withASGTag bool, withManage GroupName: aws.String(""), Tenancy: aws.String("default"), }, + InstanceType: aws.String("t3.medium"), PrivateDnsName: &privateDNSName, Tags: tags, }, diff --git a/pkg/monitor/types.go b/pkg/monitor/types.go index 93d56625..6367868c 100644 --- a/pkg/monitor/types.go +++ b/pkg/monitor/types.go @@ -53,6 +53,7 @@ type InterruptionEvent struct { Pods []string InstanceID string ProviderID string + InstanceType string IsManaged bool StartTime time.Time EndTime time.Time diff --git a/pkg/webhook/webhook.go b/pkg/webhook/webhook.go index f3446f81..4c6ced4d 100644 --- a/pkg/webhook/webhook.go +++ b/pkg/webhook/webhook.go @@ -33,7 +33,8 @@ import ( type combinedDrainData struct { ec2metadata.NodeMetadata monitor.InterruptionEvent - InstanceID string + InstanceID string + InstanceType string } // Post makes a http post to send drain event data to webhook url @@ -60,12 +61,17 @@ func Post(additionalInfo ec2metadata.NodeMetadata, event *monitor.InterruptionEv return } - // Need to merge the two data sources manually since both have an InstanceID field + // Need to merge the two data sources manually since both have + // InstanceID and InstanceType fields instanceID := additionalInfo.InstanceID if event.InstanceID != "" { instanceID = event.InstanceID } - var combined = combinedDrainData{NodeMetadata: additionalInfo, InterruptionEvent: *event, InstanceID: instanceID} + instanceType := additionalInfo.InstanceType + if event.InstanceType != "" { + instanceType = event.InstanceType + } + var combined = combinedDrainData{NodeMetadata: additionalInfo, InterruptionEvent: *event, InstanceID: instanceID, InstanceType: instanceType} var byteBuffer bytes.Buffer err = webhookTemplate.Execute(&byteBuffer, combined)