Skip to content

Commit 3eb2b52

Browse files
Refactor infra creation to improve the overall infra setup time (#1869)
1 parent eea78e7 commit 3eb2b52

File tree

2 files changed

+172
-80
lines changed

2 files changed

+172
-80
lines changed

cloud/scope/powervs_cluster.go

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -823,13 +823,13 @@ func (s *PowerVSClusterScope) createServiceInstance() (*resourcecontrollerv2.Res
823823
func (s *PowerVSClusterScope) ReconcileNetwork() (bool, error) {
824824
if s.GetDHCPServerID() != nil {
825825
s.V(3).Info("DHCP server ID is set, fetching details", "id", s.GetDHCPServerID())
826-
requeue, err := s.isDHCPServerActive()
826+
active, err := s.isDHCPServerActive()
827827
if err != nil {
828828
return false, err
829829
}
830830
// if dhcp server exist and in active state, its assumed that dhcp network exist
831831
// TODO(Phase 2): Verify that dhcp network is exist.
832-
return requeue, nil
832+
return active, nil
833833
// TODO(karthik-k-n): If needed set dhcp status here
834834
}
835835
// check network exist in cloud
@@ -851,7 +851,7 @@ func (s *PowerVSClusterScope) ReconcileNetwork() (bool, error) {
851851

852852
s.Info("Created DHCP Server", "id", *dhcpServer)
853853
s.SetStatus(infrav1beta2.ResourceTypeDHCPServer, infrav1beta2.ResourceReference{ID: dhcpServer, ControllerCreated: ptr.To(true)})
854-
return true, nil
854+
return false, nil
855855
}
856856

857857
// checkNetwork checks the network exist in cloud.
@@ -913,25 +913,25 @@ func (s *PowerVSClusterScope) isDHCPServerActive() (bool, error) {
913913
return false, err
914914
}
915915

916-
requeue, err := s.checkDHCPServerStatus(*dhcpServer)
916+
active, err := s.checkDHCPServerStatus(*dhcpServer)
917917
if err != nil {
918918
return false, err
919919
}
920-
return requeue, nil
920+
return active, nil
921921
}
922922

923923
// checkDHCPServerStatus checks the state of a DHCP server.
924-
// If state is BUILD, true is returned indicating a requeue for reconciliation.
924+
// If state is active, true is returned.
925925
// In all other cases, it returns false.
926926
func (s *PowerVSClusterScope) checkDHCPServerStatus(dhcpServer models.DHCPServerDetail) (bool, error) {
927927
s.V(3).Info("Checking the status of DHCP server", "id", *dhcpServer.ID)
928928
switch *dhcpServer.Status {
929929
case string(infrav1beta2.DHCPServerStateActive):
930930
s.V(3).Info("DHCP server is in active state")
931-
return false, nil
931+
return true, nil
932932
case string(infrav1beta2.DHCPServerStateBuild):
933933
s.V(3).Info("DHCP server is in build state")
934-
return true, nil
934+
return false, nil
935935
case string(infrav1beta2.DHCPServerStateError):
936936
return false, fmt.Errorf("DHCP server creation failed and is in error state")
937937
}
@@ -1917,9 +1917,9 @@ func (s *PowerVSClusterScope) ReconcileLoadBalancers() (bool, error) {
19171917
}
19181918
s.Info("Created VPC load balancer", "id", loadBalancerStatus.ID)
19191919
s.SetLoadBalancerStatus(loadBalancer.Name, *loadBalancerStatus)
1920-
return true, nil
1920+
return false, nil
19211921
}
1922-
return false, nil
1922+
return true, nil
19231923
}
19241924

19251925
// checkLoadBalancerStatus checks the state of a VPC load balancer.

controllers/ibmpowervscluster_controller.go

Lines changed: 162 additions & 70 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,12 @@ import (
2020
"context"
2121
"fmt"
2222
"strings"
23+
"sync"
2324
"time"
2425

2526
"github.com/pkg/errors"
2627

28+
corev1 "k8s.io/api/core/v1"
2729
apierrors "k8s.io/apimachinery/pkg/api/errors"
2830
"k8s.io/apimachinery/pkg/api/meta"
2931
"k8s.io/apimachinery/pkg/runtime"
@@ -114,6 +116,116 @@ func (r *IBMPowerVSClusterReconciler) Reconcile(ctx context.Context, req ctrl.Re
114116
return r.reconcile(clusterScope)
115117
}
116118

119+
type powerVSCluster struct {
120+
cluster *infrav1beta2.IBMPowerVSCluster
121+
mu sync.Mutex
122+
}
123+
124+
type reconcileResult struct {
125+
reconcile.Result
126+
error
127+
}
128+
129+
func (update *powerVSCluster) updateCondition(condition bool, conditionArgs ...interface{}) {
130+
update.mu.Lock()
131+
defer update.mu.Unlock()
132+
if condition {
133+
conditions.MarkTrue(update.cluster, conditionArgs[0].(capiv1beta1.ConditionType))
134+
return
135+
}
136+
137+
conditions.MarkFalse(update.cluster, conditionArgs[0].(capiv1beta1.ConditionType), conditionArgs[1].(string), conditionArgs[2].(capiv1beta1.ConditionSeverity), conditionArgs[3].(string), conditionArgs[4:]...)
138+
}
139+
140+
func (r *IBMPowerVSClusterReconciler) reconcilePowerVSResources(clusterScope *scope.PowerVSClusterScope, powerVSCluster *powerVSCluster, ch chan reconcileResult, wg *sync.WaitGroup) {
141+
defer wg.Done()
142+
powerVSLog := clusterScope.WithName("powervs")
143+
// reconcile PowerVS service instance
144+
powerVSLog.Info("Reconciling PowerVS service instance")
145+
if requeue, err := clusterScope.ReconcilePowerVSServiceInstance(); err != nil {
146+
powerVSLog.Error(err, "failed to reconcile PowerVS service instance")
147+
powerVSCluster.updateCondition(false, infrav1beta2.ServiceInstanceReadyCondition, infrav1beta2.ServiceInstanceReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
148+
ch <- reconcileResult{reconcile.Result{}, err}
149+
return
150+
} else if requeue {
151+
powerVSLog.Info("PowerVS service instance creation is pending, requeuing")
152+
ch <- reconcileResult{reconcile.Result{Requeue: true}, nil}
153+
return
154+
}
155+
powerVSCluster.updateCondition(true, infrav1beta2.ServiceInstanceReadyCondition)
156+
157+
clusterScope.IBMPowerVSClient.WithClients(powervs.ServiceOptions{CloudInstanceID: clusterScope.GetServiceInstanceID()})
158+
159+
// reconcile network
160+
powerVSLog.Info("Reconciling network")
161+
if dhcpServerActive, err := clusterScope.ReconcileNetwork(); err != nil {
162+
powerVSLog.Error(err, "failed to reconcile PowerVS network")
163+
powerVSCluster.updateCondition(false, infrav1beta2.NetworkReadyCondition, infrav1beta2.NetworkReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
164+
ch <- reconcileResult{reconcile.Result{}, err}
165+
return
166+
} else if dhcpServerActive {
167+
powerVSCluster.updateCondition(true, infrav1beta2.NetworkReadyCondition)
168+
return
169+
}
170+
// Do not want to block the reconciliation of other resources like setting up TG and COS, so skipping the requeue and only logging the info.
171+
powerVSLog.Info("PowerVS network creation is pending")
172+
}
173+
174+
func (r *IBMPowerVSClusterReconciler) reconcileVPCResources(clusterScope *scope.PowerVSClusterScope, powerVSCluster *powerVSCluster, ch chan reconcileResult, wg *sync.WaitGroup) {
175+
defer wg.Done()
176+
vpcLog := clusterScope.WithName("vpc")
177+
vpcLog.Info("Reconciling VPC")
178+
if requeue, err := clusterScope.ReconcileVPC(); err != nil {
179+
clusterScope.Error(err, "failed to reconcile VPC")
180+
powerVSCluster.updateCondition(false, infrav1beta2.VPCReadyCondition, infrav1beta2.VPCReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
181+
ch <- reconcileResult{reconcile.Result{}, err}
182+
return
183+
} else if requeue {
184+
vpcLog.Info("VPC creation is pending, requeuing")
185+
ch <- reconcileResult{reconcile.Result{Requeue: true}, nil}
186+
return
187+
}
188+
powerVSCluster.updateCondition(true, infrav1beta2.VPCReadyCondition)
189+
190+
// reconcile VPC Subnet
191+
vpcLog.Info("Reconciling VPC subnets")
192+
if requeue, err := clusterScope.ReconcileVPCSubnets(); err != nil {
193+
vpcLog.Error(err, "failed to reconcile VPC subnets")
194+
powerVSCluster.updateCondition(false, infrav1beta2.VPCSubnetReadyCondition, infrav1beta2.VPCSubnetReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
195+
ch <- reconcileResult{reconcile.Result{}, err}
196+
return
197+
} else if requeue {
198+
vpcLog.Info("VPC subnet creation is pending, requeuing")
199+
ch <- reconcileResult{reconcile.Result{Requeue: true}, nil}
200+
return
201+
}
202+
powerVSCluster.updateCondition(true, infrav1beta2.VPCSubnetReadyCondition)
203+
204+
// reconcile VPC security group
205+
vpcLog.Info("Reconciling VPC security group")
206+
if err := clusterScope.ReconcileVPCSecurityGroups(); err != nil {
207+
vpcLog.Error(err, "failed to reconcile VPC security groups")
208+
powerVSCluster.updateCondition(false, infrav1beta2.VPCSecurityGroupReadyCondition, infrav1beta2.VPCSecurityGroupReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
209+
ch <- reconcileResult{reconcile.Result{}, err}
210+
return
211+
}
212+
powerVSCluster.updateCondition(true, infrav1beta2.VPCSecurityGroupReadyCondition)
213+
214+
// reconcile LoadBalancer
215+
vpcLog.Info("Reconciling VPC load balancers")
216+
if loadBalancerReady, err := clusterScope.ReconcileLoadBalancers(); err != nil {
217+
vpcLog.Error(err, "failed to reconcile VPC load balancers")
218+
powerVSCluster.updateCondition(false, infrav1beta2.LoadBalancerReadyCondition, infrav1beta2.LoadBalancerReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
219+
ch <- reconcileResult{reconcile.Result{}, err}
220+
return
221+
} else if loadBalancerReady {
222+
powerVSCluster.updateCondition(true, infrav1beta2.LoadBalancerReadyCondition)
223+
return
224+
}
225+
// Do not want to block the reconciliation of other resources like setting up TG and COS, so skipping the requeue and only logging the info.
226+
vpcLog.Info("VPC load balancer creation is pending")
227+
}
228+
117229
func (r *IBMPowerVSClusterReconciler) reconcile(clusterScope *scope.PowerVSClusterScope) (ctrl.Result, error) { //nolint:gocyclo
118230
if controllerutil.AddFinalizer(clusterScope.IBMPowerVSCluster, infrav1beta2.IBMPowerVSClusterFinalizer) {
119231
return ctrl.Result{}, nil
@@ -140,116 +252,96 @@ func (r *IBMPowerVSClusterReconciler) reconcile(clusterScope *scope.PowerVSClust
140252
return reconcile.Result{}, err
141253
}
142254

143-
powerVSCluster := clusterScope.IBMPowerVSCluster
144-
// reconcile PowerVS service instance
145-
clusterScope.Info("Reconciling PowerVS service instance")
146-
if requeue, err := clusterScope.ReconcilePowerVSServiceInstance(); err != nil {
147-
clusterScope.Error(err, "failed to reconcile PowerVS service instance")
148-
conditions.MarkFalse(powerVSCluster, infrav1beta2.ServiceInstanceReadyCondition, infrav1beta2.ServiceInstanceReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
149-
return reconcile.Result{}, err
150-
} else if requeue {
151-
clusterScope.Info("PowerVS service instance creation is pending, requeuing")
152-
return reconcile.Result{RequeueAfter: 1 * time.Minute}, nil
255+
powerVSCluster := &powerVSCluster{
256+
cluster: clusterScope.IBMPowerVSCluster,
153257
}
154-
conditions.MarkTrue(powerVSCluster, infrav1beta2.ServiceInstanceReadyCondition)
155258

156-
clusterScope.IBMPowerVSClient.WithClients(powervs.ServiceOptions{CloudInstanceID: clusterScope.GetServiceInstanceID()})
259+
var wg sync.WaitGroup
260+
ch := make(chan reconcileResult)
157261

158-
// reconcile network
159-
clusterScope.Info("Reconciling network")
160-
if requeue, err := clusterScope.ReconcileNetwork(); err != nil {
161-
clusterScope.Error(err, "failed to reconcile PowerVS network")
162-
conditions.MarkFalse(powerVSCluster, infrav1beta2.NetworkReadyCondition, infrav1beta2.NetworkReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
163-
return reconcile.Result{}, err
164-
} else if requeue {
165-
clusterScope.Info("PowerVS network creation is pending, requeuing")
166-
return reconcile.Result{RequeueAfter: 1 * time.Minute}, nil
167-
}
168-
conditions.MarkTrue(powerVSCluster, infrav1beta2.NetworkReadyCondition)
262+
// reconcile PowerVS resources
263+
wg.Add(1)
264+
go r.reconcilePowerVSResources(clusterScope, powerVSCluster, ch, &wg)
169265

170266
// reconcile VPC
171-
clusterScope.Info("Reconciling VPC")
172-
if requeue, err := clusterScope.ReconcileVPC(); err != nil {
173-
clusterScope.Error(err, "failed to reconcile VPC")
174-
conditions.MarkFalse(powerVSCluster, infrav1beta2.VPCReadyCondition, infrav1beta2.VPCReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
175-
return reconcile.Result{}, err
176-
} else if requeue {
177-
clusterScope.Info("VPC creation is pending, requeuing")
178-
return reconcile.Result{RequeueAfter: 15 * time.Second}, nil
179-
}
180-
conditions.MarkTrue(powerVSCluster, infrav1beta2.VPCReadyCondition)
267+
wg.Add(1)
268+
go r.reconcileVPCResources(clusterScope, powerVSCluster, ch, &wg)
181269

182-
// reconcile VPC Subnet
183-
clusterScope.Info("Reconciling VPC subnets")
184-
if requeue, err := clusterScope.ReconcileVPCSubnets(); err != nil {
185-
clusterScope.Error(err, "failed to reconcile VPC subnets")
186-
conditions.MarkFalse(powerVSCluster, infrav1beta2.VPCSubnetReadyCondition, infrav1beta2.VPCSubnetReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
187-
return reconcile.Result{}, err
188-
} else if requeue {
189-
clusterScope.Info("VPC subnet creation is pending, requeuing")
190-
return reconcile.Result{RequeueAfter: 15 * time.Second}, nil
270+
// wait for above reconcile to complete and close the channel
271+
go func() {
272+
wg.Wait()
273+
close(ch)
274+
}()
275+
276+
var requeue bool
277+
var errList []error
278+
// receive return values from the channel and decide the requeue
279+
for val := range ch {
280+
if val.Requeue {
281+
requeue = true
282+
}
283+
if val.error != nil {
284+
errList = append(errList, val.error)
285+
}
191286
}
192-
conditions.MarkTrue(powerVSCluster, infrav1beta2.VPCSubnetReadyCondition)
193287

194-
// reconcile VPC security group
195-
clusterScope.Info("Reconciling VPC security group")
196-
if err := clusterScope.ReconcileVPCSecurityGroups(); err != nil {
197-
clusterScope.Error(err, "failed to reconcile VPC security groups")
198-
conditions.MarkFalse(powerVSCluster, infrav1beta2.VPCSecurityGroupReadyCondition, infrav1beta2.VPCSecurityGroupReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
199-
return reconcile.Result{}, err
288+
if requeue && len(errList) > 1 {
289+
return ctrl.Result{RequeueAfter: 30 * time.Second}, kerrors.NewAggregate(errList)
290+
} else if requeue {
291+
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
292+
} else if len(errList) > 1 {
293+
return ctrl.Result{}, kerrors.NewAggregate(errList)
200294
}
201-
conditions.MarkTrue(powerVSCluster, infrav1beta2.VPCSecurityGroupReadyCondition)
202295

203296
// reconcile Transit Gateway
204297
clusterScope.Info("Reconciling Transit Gateway")
205298
if requeue, err := clusterScope.ReconcileTransitGateway(); err != nil {
206299
clusterScope.Error(err, "failed to reconcile transit gateway")
207-
conditions.MarkFalse(powerVSCluster, infrav1beta2.TransitGatewayReadyCondition, infrav1beta2.TransitGatewayReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
300+
conditions.MarkFalse(powerVSCluster.cluster, infrav1beta2.TransitGatewayReadyCondition, infrav1beta2.TransitGatewayReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
208301
return reconcile.Result{}, err
209302
} else if requeue {
210303
clusterScope.Info("Transit gateway creation is pending, requeuing")
211304
return reconcile.Result{RequeueAfter: 1 * time.Minute}, nil
212305
}
213-
conditions.MarkTrue(powerVSCluster, infrav1beta2.TransitGatewayReadyCondition)
214-
215-
// reconcile LoadBalancer
216-
clusterScope.Info("Reconciling VPC load balancers")
217-
if requeue, err := clusterScope.ReconcileLoadBalancers(); err != nil {
218-
clusterScope.Error(err, "failed to reconcile VPC load balancers")
219-
conditions.MarkFalse(powerVSCluster, infrav1beta2.LoadBalancerReadyCondition, infrav1beta2.LoadBalancerReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
220-
return reconcile.Result{}, err
221-
} else if requeue {
222-
clusterScope.Info("VPC load balancer creation is pending, requeuing")
223-
return reconcile.Result{RequeueAfter: 1 * time.Minute}, nil
224-
}
306+
conditions.MarkTrue(powerVSCluster.cluster, infrav1beta2.TransitGatewayReadyCondition)
225307

226308
// reconcile COSInstance
227309
if clusterScope.IBMPowerVSCluster.Spec.Ignition != nil {
228310
clusterScope.Info("Reconciling COS service instance")
229311
if err := clusterScope.ReconcileCOSInstance(); err != nil {
230-
conditions.MarkFalse(powerVSCluster, infrav1beta2.COSInstanceReadyCondition, infrav1beta2.COSInstanceReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
312+
conditions.MarkFalse(powerVSCluster.cluster, infrav1beta2.COSInstanceReadyCondition, infrav1beta2.COSInstanceReconciliationFailedReason, capiv1beta1.ConditionSeverityError, err.Error())
231313
return reconcile.Result{}, err
232314
}
233-
conditions.MarkTrue(powerVSCluster, infrav1beta2.COSInstanceReadyCondition)
315+
conditions.MarkTrue(powerVSCluster.cluster, infrav1beta2.COSInstanceReadyCondition)
316+
}
317+
318+
var networkReady, loadBalancerReady bool
319+
for _, cond := range clusterScope.IBMPowerVSCluster.Status.Conditions {
320+
if cond.Type == infrav1beta2.NetworkReadyCondition && cond.Status == corev1.ConditionTrue {
321+
networkReady = true
322+
}
323+
if cond.Type == infrav1beta2.LoadBalancerReadyCondition && cond.Status == corev1.ConditionTrue {
324+
loadBalancerReady = true
325+
}
326+
}
327+
328+
if !networkReady || !loadBalancerReady {
329+
clusterScope.Info("Network or LoadBalancer still not ready, requeuing")
330+
return ctrl.Result{RequeueAfter: 30 * time.Second}, nil
234331
}
235332

236333
// update cluster object with loadbalancer host
237334
loadBalancer := clusterScope.PublicLoadBalancer()
238335
if loadBalancer == nil {
239336
return reconcile.Result{}, fmt.Errorf("failed to fetch public loadbalancer")
240337
}
241-
if clusterScope.GetLoadBalancerState(loadBalancer.Name) == nil || *clusterScope.GetLoadBalancerState(loadBalancer.Name) != infrav1beta2.VPCLoadBalancerStateActive {
242-
clusterScope.Info("LoadBalancer state is not active")
243-
return reconcile.Result{RequeueAfter: time.Minute}, nil
244-
}
245338

246339
clusterScope.Info("Getting load balancer host")
247340
hostName := clusterScope.GetLoadBalancerHostName(loadBalancer.Name)
248341
if hostName == nil || *hostName == "" {
249342
clusterScope.Info("LoadBalancer hostname is not yet available, requeuing")
250343
return reconcile.Result{RequeueAfter: time.Minute}, nil
251344
}
252-
conditions.MarkTrue(powerVSCluster, infrav1beta2.LoadBalancerReadyCondition)
253345

254346
clusterScope.IBMPowerVSCluster.Spec.ControlPlaneEndpoint.Host = *clusterScope.GetLoadBalancerHostName(loadBalancer.Name)
255347
clusterScope.IBMPowerVSCluster.Spec.ControlPlaneEndpoint.Port = clusterScope.APIServerPort()

0 commit comments

Comments
 (0)