From 9c74844210ab841cc2439ec0860298aaa382b41b Mon Sep 17 00:00:00 2001 From: Matt Miller Date: Sat, 27 Sep 2025 21:31:38 -0400 Subject: [PATCH 1/6] Initial testing --- pkg/config/controller_config.go | 7 +++++++ pkg/targetgroupbinding/targets_manager.go | 12 +++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/pkg/config/controller_config.go b/pkg/config/controller_config.go index 001952076c..4bb0329776 100644 --- a/pkg/config/controller_config.go +++ b/pkg/config/controller_config.go @@ -34,6 +34,7 @@ const ( flagBackendSecurityGroup = "backend-security-group" flagEnableEndpointSlices = "enable-endpoint-slices" flagDisableRestrictedSGRules = "disable-restricted-sg-rules" + flagMaxTargetsPerInstance = "max-targets-per-instance" defaultLogLevel = "info" defaultMaxConcurrentReconciles = 3 defaultMaxExponentialBackoffDelay = time.Second * 1000 @@ -43,6 +44,7 @@ const ( defaultEnableEndpointSlices = true defaultDisableRestrictedSGRules = false defaultLbStabilizationMonitorInterval = time.Second * 120 + defaultMaxTargetsPerInstance = 0 ) var ( @@ -133,6 +135,9 @@ type ControllerConfig struct { // LBStabilizationMonitorInterval specifies the duration of interval to monitor the load balancer state for stabilization LBStabilizationMonitorInterval time.Duration + // MaxTargetsPerInstance limits the number of targets that will be added to an ELB instance + MaxTargetsPerInstance int + FeatureGates FeatureGates } @@ -177,6 +182,8 @@ func (cfg *ControllerConfig) BindFlags(fs *pflag.FlagSet) { "Disable the usage of restricted security group rules") fs.StringToStringVar(&cfg.ServiceTargetENISGTags, flagServiceTargetENISGTags, nil, "AWS Tags, in addition to cluster tags, for finding the target ENI security group to which to add inbound rules from NLBs") + fs.IntVar(&cfg.MaxTargetsPerInstance, flagMaxTargetsPerInstance, defaultMaxTargetsPerInstance, + "Maximum number of targets that can be added to an ELB instance. Use this to prevent TargetGroup quotas being exceeded from blocking reconciliation.") cfg.FeatureGates.BindFlags(fs) cfg.AWSConfig.BindFlags(fs) cfg.RuntimeConfig.BindFlags(fs) diff --git a/pkg/targetgroupbinding/targets_manager.go b/pkg/targetgroupbinding/targets_manager.go index 990c2cefda..1900e45924 100644 --- a/pkg/targetgroupbinding/targets_manager.go +++ b/pkg/targetgroupbinding/targets_manager.go @@ -18,6 +18,7 @@ const ( defaultTargetsCacheTTL = 5 * time.Minute defaultRegisterTargetsChunkSize = 200 defaultDeregisterTargetsChunkSize = 200 + maxTargetsPerInstance = 500 ) // TargetsManager is an abstraction around ELBV2's targets API. @@ -80,7 +81,16 @@ type targetsCacheItem struct { func (m *cachedTargetsManager) RegisterTargets(ctx context.Context, tgb *elbv2api.TargetGroupBinding, targets []elbv2types.TargetDescription) error { tgARN := tgb.Spec.TargetGroupARN - targetsChunks := chunkTargetDescriptions(targets, m.registerTargetsChunkSize) + sampledTargets := targets + + m.logger.Info("Number of targets", len(targets), "registering a subset per max-targets-per-instance", maxTargetsPerInstance) + if maxTargetsPerInstance > 0 && len(targets) > maxTargetsPerInstance { + m.logger.Info("Max number of targets exceeded", len(targets), "registering a subset per max-targets-per-instance", maxTargetsPerInstance) + m.logger.Info("Max number of targets exceeded", len(targets), "registering a subset per max-targets-per-instance", maxTargetsPerInstance) + sampledTargets = targets[:maxTargetsPerInstance] + } + + targetsChunks := chunkTargetDescriptions(sampledTargets, m.registerTargetsChunkSize) for _, targetsChunk := range targetsChunks { req := &elbv2sdk.RegisterTargetsInput{ TargetGroupArn: aws.String(tgARN), From 7a2218b2116db68174e6520f708e26ad9305865b Mon Sep 17 00:00:00 2001 From: Matt Miller Date: Sat, 27 Sep 2025 21:31:38 -0400 Subject: [PATCH 2/6] working implementation on both target-type IP and NodePort --- main.go | 2 +- pkg/targetgroupbinding/resource_manager.go | 40 +++++++++++++++++++++- pkg/targetgroupbinding/targets_manager.go | 12 +------ 3 files changed, 41 insertions(+), 13 deletions(-) diff --git a/main.go b/main.go index f90441857c..f4077c9845 100644 --- a/main.go +++ b/main.go @@ -185,7 +185,7 @@ func main() { tgbResManager := targetgroupbinding.NewDefaultResourceManager(mgr.GetClient(), cloud.ELBV2(), podInfoRepo, networkingManager, vpcInfoProvider, multiClusterManager, lbcMetricsCollector, cloud.VpcID(), controllerCFG.FeatureGates.Enabled(config.EndpointsFailOpen), controllerCFG.EnableEndpointSlices, - mgr.GetEventRecorderFor("targetGroupBinding"), ctrl.Log) + mgr.GetEventRecorderFor("targetGroupBinding"), ctrl.Log, controllerCFG.MaxTargetsPerInstance) backendSGProvider := networking.NewBackendSGProvider(controllerCFG.ClusterName, controllerCFG.BackendSecurityGroup, cloud.VpcID(), cloud.EC2(), mgr.GetClient(), controllerCFG.DefaultTags, nlbGatewayEnabled || albGatewayEnabled, ctrl.Log.WithName("backend-sg-provider")) sgResolver := networking.NewDefaultSecurityGroupResolver(cloud.EC2(), cloud.VpcID()) diff --git a/pkg/targetgroupbinding/resource_manager.go b/pkg/targetgroupbinding/resource_manager.go index 3d9418d3e9..312a13fedd 100644 --- a/pkg/targetgroupbinding/resource_manager.go +++ b/pkg/targetgroupbinding/resource_manager.go @@ -48,7 +48,8 @@ func NewDefaultResourceManager(k8sClient client.Client, elbv2Client services.ELB podInfoRepo k8s.PodInfoRepo, networkingManager networking.NetworkingManager, vpcInfoProvider networking.VPCInfoProvider, multiClusterManager MultiClusterManager, metricsCollector lbcmetrics.MetricCollector, vpcID string, failOpenEnabled bool, endpointSliceEnabled bool, - eventRecorder record.EventRecorder, logger logr.Logger) *defaultResourceManager { + eventRecorder record.EventRecorder, logger logr.Logger, maxTargetsPerInstance int) *defaultResourceManager { + targetsManager := NewCachedTargetsManager(elbv2Client, logger) endpointResolver := backend.NewDefaultEndpointResolver(k8sClient, podInfoRepo, failOpenEnabled, endpointSliceEnabled, logger) return &defaultResourceManager{ @@ -61,6 +62,7 @@ func NewDefaultResourceManager(k8sClient client.Client, elbv2Client services.ELB vpcID: vpcID, vpcInfoProvider: vpcInfoProvider, podInfoRepo: podInfoRepo, + maxTargetsPerInstance: maxTargetsPerInstance, multiClusterManager: multiClusterManager, metricsCollector: metricsCollector, @@ -83,6 +85,7 @@ type defaultResourceManager struct { logger logr.Logger vpcInfoProvider networking.VPCInfoProvider podInfoRepo k8s.PodInfoRepo + maxTargetsPerInstance int multiClusterManager MultiClusterManager metricsCollector lbcmetrics.MetricCollector vpcID string @@ -180,6 +183,7 @@ func (m *defaultResourceManager) reconcileWithIPTargetType(ctx context.Context, if err != nil { return "", "", false, ctrlerrors.NewErrorWithMetrics(controllerName, "list_targets_error", err, m.metricsCollector) } + totalTargets := len(targets) notDrainingTargets, _ := partitionTargetsByDrainingStatus(targets) matchedEndpointAndTargets, unmatchedEndpoints, unmatchedTargets := matchPodEndpointWithTargets(endpoints, notDrainingTargets) @@ -240,6 +244,23 @@ func (m *defaultResourceManager) reconcileWithIPTargetType(ctx context.Context, return "", "", false, ctrlerrors.NewErrorWithMetrics(controllerName, "update_tracked_ip_targets_error", err, m.metricsCollector) } + // Log that we're witholding target additions to prevent exceeding max-targets-per-instance + var limitedUnmatchedEndpoints []backend.PodEndpoint + + if m.maxTargetsPerInstance > 0 && len(unmatchedEndpoints) + totalTargets > m.maxTargetsPerInstance { + maxAdditions := m.maxTargetsPerInstance - totalTargets + if maxAdditions > 0 { + limitedUnmatchedEndpoints = unmatchedEndpoints[:maxAdditions] + } + tgbScopedLogger.Info("Limiting target additions due to max-targets-per-instance configuration", + "currentTargets", totalTargets, + "maxTargetsPerInstance", m.maxTargetsPerInstance, + "proposedAdditions", len(unmatchedEndpoints), + "numberOmitted", len(unmatchedEndpoints) - len(limitedUnmatchedEndpoints)) + + unmatchedEndpoints = limitedUnmatchedEndpoints + } + if err := m.registerPodEndpoints(ctx, tgb, unmatchedEndpoints); err != nil { return "", "", false, ctrlerrors.NewErrorWithMetrics(controllerName, "register_pod_endpoint_error", err, m.metricsCollector) } @@ -307,6 +328,7 @@ func (m *defaultResourceManager) reconcileWithInstanceTargetType(ctx context.Con if err != nil { return "", "", false, ctrlerrors.NewErrorWithMetrics(controllerName, "list_targets_error", err, m.metricsCollector) } + totalTargets := len(targets) notDrainingTargets, _ := partitionTargetsByDrainingStatus(targets) @@ -341,6 +363,22 @@ func (m *defaultResourceManager) reconcileWithInstanceTargetType(ctx context.Con return "", "", false, ctrlerrors.NewErrorWithMetrics(controllerName, "update_tracked_instance_targets_error", err, m.metricsCollector) } + var limitedUnmatchedEndpoints []backend.NodePortEndpoint + + if m.maxTargetsPerInstance > 0 && len(unmatchedEndpoints) + totalTargets > m.maxTargetsPerInstance { + maxAdditions := m.maxTargetsPerInstance - totalTargets + if maxAdditions > 0 { + limitedUnmatchedEndpoints = unmatchedEndpoints[:maxAdditions] + } + tgbScopedLogger.Info("Limiting target additions due to max-targets-per-instance configuration", + "currentTargets", totalTargets, + "maxTargetsPerInstance", m.maxTargetsPerInstance, + "proposedAdditions", len(unmatchedEndpoints), + "numberOmitted", len(unmatchedEndpoints) - len(limitedUnmatchedEndpoints)) + + unmatchedEndpoints = limitedUnmatchedEndpoints + } + if err := m.registerNodePortEndpoints(ctx, tgb, unmatchedEndpoints); err != nil { return "", "", false, ctrlerrors.NewErrorWithMetrics(controllerName, "update_node_port_endpoints_error", err, m.metricsCollector) } diff --git a/pkg/targetgroupbinding/targets_manager.go b/pkg/targetgroupbinding/targets_manager.go index 1900e45924..990c2cefda 100644 --- a/pkg/targetgroupbinding/targets_manager.go +++ b/pkg/targetgroupbinding/targets_manager.go @@ -18,7 +18,6 @@ const ( defaultTargetsCacheTTL = 5 * time.Minute defaultRegisterTargetsChunkSize = 200 defaultDeregisterTargetsChunkSize = 200 - maxTargetsPerInstance = 500 ) // TargetsManager is an abstraction around ELBV2's targets API. @@ -81,16 +80,7 @@ type targetsCacheItem struct { func (m *cachedTargetsManager) RegisterTargets(ctx context.Context, tgb *elbv2api.TargetGroupBinding, targets []elbv2types.TargetDescription) error { tgARN := tgb.Spec.TargetGroupARN - sampledTargets := targets - - m.logger.Info("Number of targets", len(targets), "registering a subset per max-targets-per-instance", maxTargetsPerInstance) - if maxTargetsPerInstance > 0 && len(targets) > maxTargetsPerInstance { - m.logger.Info("Max number of targets exceeded", len(targets), "registering a subset per max-targets-per-instance", maxTargetsPerInstance) - m.logger.Info("Max number of targets exceeded", len(targets), "registering a subset per max-targets-per-instance", maxTargetsPerInstance) - sampledTargets = targets[:maxTargetsPerInstance] - } - - targetsChunks := chunkTargetDescriptions(sampledTargets, m.registerTargetsChunkSize) + targetsChunks := chunkTargetDescriptions(targets, m.registerTargetsChunkSize) for _, targetsChunk := range targetsChunks { req := &elbv2sdk.RegisterTargetsInput{ TargetGroupArn: aws.String(tgARN), From 4cc76b0ad6c4853c0670fcd6d6a9b7aa44fca2ac Mon Sep 17 00:00:00 2001 From: Matt Miller Date: Sat, 27 Sep 2025 21:31:38 -0400 Subject: [PATCH 3/6] dedup max addition calculation --- pkg/targetgroupbinding/resource_manager.go | 92 +++++++++------------- 1 file changed, 39 insertions(+), 53 deletions(-) diff --git a/pkg/targetgroupbinding/resource_manager.go b/pkg/targetgroupbinding/resource_manager.go index 312a13fedd..c5c393da22 100644 --- a/pkg/targetgroupbinding/resource_manager.go +++ b/pkg/targetgroupbinding/resource_manager.go @@ -53,18 +53,18 @@ func NewDefaultResourceManager(k8sClient client.Client, elbv2Client services.ELB targetsManager := NewCachedTargetsManager(elbv2Client, logger) endpointResolver := backend.NewDefaultEndpointResolver(k8sClient, podInfoRepo, failOpenEnabled, endpointSliceEnabled, logger) return &defaultResourceManager{ - k8sClient: k8sClient, - targetsManager: targetsManager, - endpointResolver: endpointResolver, - networkingManager: networkingManager, - eventRecorder: eventRecorder, - logger: logger, - vpcID: vpcID, - vpcInfoProvider: vpcInfoProvider, - podInfoRepo: podInfoRepo, + k8sClient: k8sClient, + targetsManager: targetsManager, + endpointResolver: endpointResolver, + networkingManager: networkingManager, + eventRecorder: eventRecorder, + logger: logger, + vpcID: vpcID, + vpcInfoProvider: vpcInfoProvider, + podInfoRepo: podInfoRepo, maxTargetsPerInstance: maxTargetsPerInstance, - multiClusterManager: multiClusterManager, - metricsCollector: metricsCollector, + multiClusterManager: multiClusterManager, + metricsCollector: metricsCollector, invalidVpcCache: cache.NewExpiring(), invalidVpcCacheTTL: defaultTargetsCacheTTL, @@ -77,18 +77,18 @@ var _ ResourceManager = &defaultResourceManager{} // default implementation for ResourceManager. type defaultResourceManager struct { - k8sClient client.Client - targetsManager TargetsManager - endpointResolver backend.EndpointResolver - networkingManager networking.NetworkingManager - eventRecorder record.EventRecorder - logger logr.Logger - vpcInfoProvider networking.VPCInfoProvider - podInfoRepo k8s.PodInfoRepo + k8sClient client.Client + targetsManager TargetsManager + endpointResolver backend.EndpointResolver + networkingManager networking.NetworkingManager + eventRecorder record.EventRecorder + logger logr.Logger + vpcInfoProvider networking.VPCInfoProvider + podInfoRepo k8s.PodInfoRepo maxTargetsPerInstance int - multiClusterManager MultiClusterManager - metricsCollector lbcmetrics.MetricCollector - vpcID string + multiClusterManager MultiClusterManager + metricsCollector lbcmetrics.MetricCollector + vpcID string invalidVpcCache *cache.Expiring invalidVpcCacheTTL time.Duration @@ -244,22 +244,8 @@ func (m *defaultResourceManager) reconcileWithIPTargetType(ctx context.Context, return "", "", false, ctrlerrors.NewErrorWithMetrics(controllerName, "update_tracked_ip_targets_error", err, m.metricsCollector) } - // Log that we're witholding target additions to prevent exceeding max-targets-per-instance - var limitedUnmatchedEndpoints []backend.PodEndpoint - - if m.maxTargetsPerInstance > 0 && len(unmatchedEndpoints) + totalTargets > m.maxTargetsPerInstance { - maxAdditions := m.maxTargetsPerInstance - totalTargets - if maxAdditions > 0 { - limitedUnmatchedEndpoints = unmatchedEndpoints[:maxAdditions] - } - tgbScopedLogger.Info("Limiting target additions due to max-targets-per-instance configuration", - "currentTargets", totalTargets, - "maxTargetsPerInstance", m.maxTargetsPerInstance, - "proposedAdditions", len(unmatchedEndpoints), - "numberOmitted", len(unmatchedEndpoints) - len(limitedUnmatchedEndpoints)) - - unmatchedEndpoints = limitedUnmatchedEndpoints - } + eligibleTargetsCount := m.getMaxNewTargets(len(unmatchedEndpoints), totalTargets, tgbScopedLogger) + unmatchedEndpoints = unmatchedEndpoints[:eligibleTargetsCount] if err := m.registerPodEndpoints(ctx, tgb, unmatchedEndpoints); err != nil { return "", "", false, ctrlerrors.NewErrorWithMetrics(controllerName, "register_pod_endpoint_error", err, m.metricsCollector) @@ -363,21 +349,8 @@ func (m *defaultResourceManager) reconcileWithInstanceTargetType(ctx context.Con return "", "", false, ctrlerrors.NewErrorWithMetrics(controllerName, "update_tracked_instance_targets_error", err, m.metricsCollector) } - var limitedUnmatchedEndpoints []backend.NodePortEndpoint - - if m.maxTargetsPerInstance > 0 && len(unmatchedEndpoints) + totalTargets > m.maxTargetsPerInstance { - maxAdditions := m.maxTargetsPerInstance - totalTargets - if maxAdditions > 0 { - limitedUnmatchedEndpoints = unmatchedEndpoints[:maxAdditions] - } - tgbScopedLogger.Info("Limiting target additions due to max-targets-per-instance configuration", - "currentTargets", totalTargets, - "maxTargetsPerInstance", m.maxTargetsPerInstance, - "proposedAdditions", len(unmatchedEndpoints), - "numberOmitted", len(unmatchedEndpoints) - len(limitedUnmatchedEndpoints)) - - unmatchedEndpoints = limitedUnmatchedEndpoints - } + eligibleTargetsCount := m.getMaxNewTargets(len(unmatchedEndpoints), totalTargets, tgbScopedLogger) + unmatchedEndpoints = unmatchedEndpoints[:eligibleTargetsCount] if err := m.registerNodePortEndpoints(ctx, tgb, unmatchedEndpoints); err != nil { return "", "", false, ctrlerrors.NewErrorWithMetrics(controllerName, "update_node_port_endpoints_error", err, m.metricsCollector) @@ -841,3 +814,16 @@ func isVPCNotFoundError(err error) bool { } return false } + +func (m *defaultResourceManager) getMaxNewTargets(newTargetCount int, currentTargetCount int, tgbScopedLogger logr.Logger) (maxAdditions int) { + if m.maxTargetsPerInstance > 0 && newTargetCount+currentTargetCount > m.maxTargetsPerInstance { + maxAdditions = m.maxTargetsPerInstance - currentTargetCount + tgbScopedLogger.Info("Limiting target additions due to max-targets-per-instance configuration", + "currentTargets", currentTargetCount, + "maxTargetsPerInstance", m.maxTargetsPerInstance, + "proposedAdditions", newTargetCount) + return maxAdditions + } + + return newTargetCount +} From 79d47c04e06150eff2c914563b55bad3f70bce4a Mon Sep 17 00:00:00 2001 From: Matt Miller Date: Sat, 27 Sep 2025 21:31:38 -0400 Subject: [PATCH 4/6] update docs --- docs/deploy/configurations.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/deploy/configurations.md b/docs/deploy/configurations.md index af8d03a0fc..b73ab142dc 100644 --- a/docs/deploy/configurations.md +++ b/docs/deploy/configurations.md @@ -117,6 +117,7 @@ Currently, you can set only 1 namespace to watch in this flag. See [this Kuberne | kube-ca-pem-filepath | string | | The file path to the CA to validate webhook callers, when unspecified all webhook callers are permitted. | | alb-gateway-max-concurrent-reconciles | int | 3 | Maximum number of concurrently running reconcile loops for ALB gateways, if enabled | | nlb-gateway-max-concurrent-reconciles | int | 3 | Maximum number of concurrently running reconcile loops for NLB gateways, if enabled | +| max-targets-per-instance | int | 0 | Maximum number of targets that will be added to a given ELB instance. The default value of zero will leave the number of targets unlimited | ### disable-ingress-class-annotation `--disable-ingress-class-annotation` controls whether to disable new usage of the `kubernetes.io/ingress.class` annotation. From ca9d382ff8da9e01c4e29e6bb6122dc89f7338f7 Mon Sep 17 00:00:00 2001 From: Matt Miller Date: Sat, 27 Sep 2025 22:13:46 -0400 Subject: [PATCH 5/6] some tidying --- pkg/targetgroupbinding/resource_manager.go | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/pkg/targetgroupbinding/resource_manager.go b/pkg/targetgroupbinding/resource_manager.go index c5c393da22..8f6c170eb2 100644 --- a/pkg/targetgroupbinding/resource_manager.go +++ b/pkg/targetgroupbinding/resource_manager.go @@ -183,7 +183,6 @@ func (m *defaultResourceManager) reconcileWithIPTargetType(ctx context.Context, if err != nil { return "", "", false, ctrlerrors.NewErrorWithMetrics(controllerName, "list_targets_error", err, m.metricsCollector) } - totalTargets := len(targets) notDrainingTargets, _ := partitionTargetsByDrainingStatus(targets) matchedEndpointAndTargets, unmatchedEndpoints, unmatchedTargets := matchPodEndpointWithTargets(endpoints, notDrainingTargets) @@ -244,7 +243,7 @@ func (m *defaultResourceManager) reconcileWithIPTargetType(ctx context.Context, return "", "", false, ctrlerrors.NewErrorWithMetrics(controllerName, "update_tracked_ip_targets_error", err, m.metricsCollector) } - eligibleTargetsCount := m.getMaxNewTargets(len(unmatchedEndpoints), totalTargets, tgbScopedLogger) + eligibleTargetsCount := m.getMaxNewTargets(len(unmatchedEndpoints), len(targets), tgbScopedLogger) unmatchedEndpoints = unmatchedEndpoints[:eligibleTargetsCount] if err := m.registerPodEndpoints(ctx, tgb, unmatchedEndpoints); err != nil { @@ -314,7 +313,6 @@ func (m *defaultResourceManager) reconcileWithInstanceTargetType(ctx context.Con if err != nil { return "", "", false, ctrlerrors.NewErrorWithMetrics(controllerName, "list_targets_error", err, m.metricsCollector) } - totalTargets := len(targets) notDrainingTargets, _ := partitionTargetsByDrainingStatus(targets) @@ -349,7 +347,7 @@ func (m *defaultResourceManager) reconcileWithInstanceTargetType(ctx context.Con return "", "", false, ctrlerrors.NewErrorWithMetrics(controllerName, "update_tracked_instance_targets_error", err, m.metricsCollector) } - eligibleTargetsCount := m.getMaxNewTargets(len(unmatchedEndpoints), totalTargets, tgbScopedLogger) + eligibleTargetsCount := m.getMaxNewTargets(len(unmatchedEndpoints), len(targets), tgbScopedLogger) unmatchedEndpoints = unmatchedEndpoints[:eligibleTargetsCount] if err := m.registerNodePortEndpoints(ctx, tgb, unmatchedEndpoints); err != nil { From 1becaac642b1697195e4fe619ebabffbf08387e8 Mon Sep 17 00:00:00 2001 From: Matt Miller Date: Mon, 29 Sep 2025 21:24:37 -0400 Subject: [PATCH 6/6] updates per review suggestions --- docs/deploy/configurations.md | 2 +- main.go | 2 +- pkg/config/controller_config.go | 10 +-- pkg/targetgroupbinding/resource_manager.go | 80 ++++++++++++---------- 4 files changed, 49 insertions(+), 45 deletions(-) diff --git a/docs/deploy/configurations.md b/docs/deploy/configurations.md index b73ab142dc..09feb3cb44 100644 --- a/docs/deploy/configurations.md +++ b/docs/deploy/configurations.md @@ -117,7 +117,7 @@ Currently, you can set only 1 namespace to watch in this flag. See [this Kuberne | kube-ca-pem-filepath | string | | The file path to the CA to validate webhook callers, when unspecified all webhook callers are permitted. | | alb-gateway-max-concurrent-reconciles | int | 3 | Maximum number of concurrently running reconcile loops for ALB gateways, if enabled | | nlb-gateway-max-concurrent-reconciles | int | 3 | Maximum number of concurrently running reconcile loops for NLB gateways, if enabled | -| max-targets-per-instance | int | 0 | Maximum number of targets that will be added to a given ELB instance. The default value of zero will leave the number of targets unlimited | +| max-targets-per-target-group | int | 0 | Maximum number of targets that will be added to a given Target Group. The default value of zero will leave the number of targets unlimited | ### disable-ingress-class-annotation `--disable-ingress-class-annotation` controls whether to disable new usage of the `kubernetes.io/ingress.class` annotation. diff --git a/main.go b/main.go index f4077c9845..0d68b6e66c 100644 --- a/main.go +++ b/main.go @@ -185,7 +185,7 @@ func main() { tgbResManager := targetgroupbinding.NewDefaultResourceManager(mgr.GetClient(), cloud.ELBV2(), podInfoRepo, networkingManager, vpcInfoProvider, multiClusterManager, lbcMetricsCollector, cloud.VpcID(), controllerCFG.FeatureGates.Enabled(config.EndpointsFailOpen), controllerCFG.EnableEndpointSlices, - mgr.GetEventRecorderFor("targetGroupBinding"), ctrl.Log, controllerCFG.MaxTargetsPerInstance) + mgr.GetEventRecorderFor("targetGroupBinding"), ctrl.Log, controllerCFG.MaxTargetsPerTargetGroup) backendSGProvider := networking.NewBackendSGProvider(controllerCFG.ClusterName, controllerCFG.BackendSecurityGroup, cloud.VpcID(), cloud.EC2(), mgr.GetClient(), controllerCFG.DefaultTags, nlbGatewayEnabled || albGatewayEnabled, ctrl.Log.WithName("backend-sg-provider")) sgResolver := networking.NewDefaultSecurityGroupResolver(cloud.EC2(), cloud.VpcID()) diff --git a/pkg/config/controller_config.go b/pkg/config/controller_config.go index 4bb0329776..89d51ade3f 100644 --- a/pkg/config/controller_config.go +++ b/pkg/config/controller_config.go @@ -34,7 +34,7 @@ const ( flagBackendSecurityGroup = "backend-security-group" flagEnableEndpointSlices = "enable-endpoint-slices" flagDisableRestrictedSGRules = "disable-restricted-sg-rules" - flagMaxTargetsPerInstance = "max-targets-per-instance" + flagMaxTargetsPerTargetGroup = "max-targets-per-target-group" defaultLogLevel = "info" defaultMaxConcurrentReconciles = 3 defaultMaxExponentialBackoffDelay = time.Second * 1000 @@ -44,7 +44,7 @@ const ( defaultEnableEndpointSlices = true defaultDisableRestrictedSGRules = false defaultLbStabilizationMonitorInterval = time.Second * 120 - defaultMaxTargetsPerInstance = 0 + defaultMaxTargetsPerTargetGroup = 0 ) var ( @@ -135,8 +135,8 @@ type ControllerConfig struct { // LBStabilizationMonitorInterval specifies the duration of interval to monitor the load balancer state for stabilization LBStabilizationMonitorInterval time.Duration - // MaxTargetsPerInstance limits the number of targets that will be added to an ELB instance - MaxTargetsPerInstance int + // MaxTargetsPerTargetGroup limits the number of targets that will be added to an ELB instance + MaxTargetsPerTargetGroup int FeatureGates FeatureGates } @@ -182,7 +182,7 @@ func (cfg *ControllerConfig) BindFlags(fs *pflag.FlagSet) { "Disable the usage of restricted security group rules") fs.StringToStringVar(&cfg.ServiceTargetENISGTags, flagServiceTargetENISGTags, nil, "AWS Tags, in addition to cluster tags, for finding the target ENI security group to which to add inbound rules from NLBs") - fs.IntVar(&cfg.MaxTargetsPerInstance, flagMaxTargetsPerInstance, defaultMaxTargetsPerInstance, + fs.IntVar(&cfg.MaxTargetsPerTargetGroup, flagMaxTargetsPerTargetGroup, defaultMaxTargetsPerTargetGroup, "Maximum number of targets that can be added to an ELB instance. Use this to prevent TargetGroup quotas being exceeded from blocking reconciliation.") cfg.FeatureGates.BindFlags(fs) cfg.AWSConfig.BindFlags(fs) diff --git a/pkg/targetgroupbinding/resource_manager.go b/pkg/targetgroupbinding/resource_manager.go index 8f6c170eb2..ec424f637e 100644 --- a/pkg/targetgroupbinding/resource_manager.go +++ b/pkg/targetgroupbinding/resource_manager.go @@ -48,23 +48,23 @@ func NewDefaultResourceManager(k8sClient client.Client, elbv2Client services.ELB podInfoRepo k8s.PodInfoRepo, networkingManager networking.NetworkingManager, vpcInfoProvider networking.VPCInfoProvider, multiClusterManager MultiClusterManager, metricsCollector lbcmetrics.MetricCollector, vpcID string, failOpenEnabled bool, endpointSliceEnabled bool, - eventRecorder record.EventRecorder, logger logr.Logger, maxTargetsPerInstance int) *defaultResourceManager { + eventRecorder record.EventRecorder, logger logr.Logger, maxTargetsPerTargetGroup int) *defaultResourceManager { targetsManager := NewCachedTargetsManager(elbv2Client, logger) endpointResolver := backend.NewDefaultEndpointResolver(k8sClient, podInfoRepo, failOpenEnabled, endpointSliceEnabled, logger) return &defaultResourceManager{ - k8sClient: k8sClient, - targetsManager: targetsManager, - endpointResolver: endpointResolver, - networkingManager: networkingManager, - eventRecorder: eventRecorder, - logger: logger, - vpcID: vpcID, - vpcInfoProvider: vpcInfoProvider, - podInfoRepo: podInfoRepo, - maxTargetsPerInstance: maxTargetsPerInstance, - multiClusterManager: multiClusterManager, - metricsCollector: metricsCollector, + k8sClient: k8sClient, + targetsManager: targetsManager, + endpointResolver: endpointResolver, + networkingManager: networkingManager, + eventRecorder: eventRecorder, + logger: logger, + vpcID: vpcID, + vpcInfoProvider: vpcInfoProvider, + podInfoRepo: podInfoRepo, + maxTargetsPerTargetGroup: maxTargetsPerTargetGroup, + multiClusterManager: multiClusterManager, + metricsCollector: metricsCollector, invalidVpcCache: cache.NewExpiring(), invalidVpcCacheTTL: defaultTargetsCacheTTL, @@ -77,18 +77,18 @@ var _ ResourceManager = &defaultResourceManager{} // default implementation for ResourceManager. type defaultResourceManager struct { - k8sClient client.Client - targetsManager TargetsManager - endpointResolver backend.EndpointResolver - networkingManager networking.NetworkingManager - eventRecorder record.EventRecorder - logger logr.Logger - vpcInfoProvider networking.VPCInfoProvider - podInfoRepo k8s.PodInfoRepo - maxTargetsPerInstance int - multiClusterManager MultiClusterManager - metricsCollector lbcmetrics.MetricCollector - vpcID string + k8sClient client.Client + targetsManager TargetsManager + endpointResolver backend.EndpointResolver + networkingManager networking.NetworkingManager + eventRecorder record.EventRecorder + logger logr.Logger + vpcInfoProvider networking.VPCInfoProvider + podInfoRepo k8s.PodInfoRepo + maxTargetsPerTargetGroup int + multiClusterManager MultiClusterManager + metricsCollector lbcmetrics.MetricCollector + vpcID string invalidVpcCache *cache.Expiring invalidVpcCacheTTL time.Duration @@ -243,8 +243,10 @@ func (m *defaultResourceManager) reconcileWithIPTargetType(ctx context.Context, return "", "", false, ctrlerrors.NewErrorWithMetrics(controllerName, "update_tracked_ip_targets_error", err, m.metricsCollector) } - eligibleTargetsCount := m.getMaxNewTargets(len(unmatchedEndpoints), len(targets), tgbScopedLogger) - unmatchedEndpoints = unmatchedEndpoints[:eligibleTargetsCount] + if m.maxTargetsPerTargetGroup != 0 { + eligibleTargetsCount := m.getMaxNewTargets(len(unmatchedEndpoints), len(targets), tgbScopedLogger) + unmatchedEndpoints = unmatchedEndpoints[:eligibleTargetsCount] + } if err := m.registerPodEndpoints(ctx, tgb, unmatchedEndpoints); err != nil { return "", "", false, ctrlerrors.NewErrorWithMetrics(controllerName, "register_pod_endpoint_error", err, m.metricsCollector) @@ -347,8 +349,10 @@ func (m *defaultResourceManager) reconcileWithInstanceTargetType(ctx context.Con return "", "", false, ctrlerrors.NewErrorWithMetrics(controllerName, "update_tracked_instance_targets_error", err, m.metricsCollector) } - eligibleTargetsCount := m.getMaxNewTargets(len(unmatchedEndpoints), len(targets), tgbScopedLogger) - unmatchedEndpoints = unmatchedEndpoints[:eligibleTargetsCount] + if m.maxTargetsPerTargetGroup != 0 { + eligibleTargetsCount := m.getMaxNewTargets(len(unmatchedEndpoints), len(targets), tgbScopedLogger) + unmatchedEndpoints = unmatchedEndpoints[:eligibleTargetsCount] + } if err := m.registerNodePortEndpoints(ctx, tgb, unmatchedEndpoints); err != nil { return "", "", false, ctrlerrors.NewErrorWithMetrics(controllerName, "update_node_port_endpoints_error", err, m.metricsCollector) @@ -814,14 +818,14 @@ func isVPCNotFoundError(err error) bool { } func (m *defaultResourceManager) getMaxNewTargets(newTargetCount int, currentTargetCount int, tgbScopedLogger logr.Logger) (maxAdditions int) { - if m.maxTargetsPerInstance > 0 && newTargetCount+currentTargetCount > m.maxTargetsPerInstance { - maxAdditions = m.maxTargetsPerInstance - currentTargetCount - tgbScopedLogger.Info("Limiting target additions due to max-targets-per-instance configuration", - "currentTargets", currentTargetCount, - "maxTargetsPerInstance", m.maxTargetsPerInstance, - "proposedAdditions", newTargetCount) - return maxAdditions - } + if newTargetCount+currentTargetCount > m.maxTargetsPerTargetGroup { + maxAdditions = m.maxTargetsPerTargetGroup - currentTargetCount + tgbScopedLogger.Info("Limiting target additions due to max-targets-per-instance configuration", + "currentTargets", currentTargetCount, + "maxTargetsPerTargetGroup", m.maxTargetsPerTargetGroup, + "proposedAdditions", newTargetCount) + return maxAdditions + } - return newTargetCount + return newTargetCount }