Skip to content

Commit

Permalink
feat: add a timeoutSeconds field to check if the opsRequest is runnin…
Browse files Browse the repository at this point in the history
…g timed out (#7680)

(cherry picked from commit fe34a3a)
  • Loading branch information
wangyelei committed Jul 4, 2024
1 parent cb45da5 commit 96cf553
Show file tree
Hide file tree
Showing 9 changed files with 111 additions and 6 deletions.
6 changes: 3 additions & 3 deletions apis/apps/v1alpha1/opsrequest_conditions.go
Original file line number Diff line number Diff line change
Expand Up @@ -111,13 +111,13 @@ func NewCancelFailedCondition(ops *OpsRequest, err error) *metav1.Condition {
}

// NewAbortedCondition creates a condition for aborted phase.
func NewAbortedCondition(ops *OpsRequest) metav1.Condition {
return metav1.Condition{
func NewAbortedCondition(message string) *metav1.Condition {
return &metav1.Condition{
Type: ConditionTypeAborted,
Status: metav1.ConditionTrue,
Reason: ConditionTypeAborted,
LastTransitionTime: metav1.Now(),
Message: fmt.Sprintf(`Aborted as a result of the latest opsRequest "%s"`, ops.Name),
Message: message,
}
}

Expand Down
7 changes: 7 additions & 0 deletions apis/apps/v1alpha1/opsrequest_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,13 @@ type OpsRequestSpec struct {
// +optional
PreConditionDeadlineSeconds *int32 `json:"preConditionDeadlineSeconds,omitempty"`

// Specifies the maximum duration (in seconds) that an opsRequest is allowed to run.
// If the opsRequest runs longer than this duration, its phase will be marked as Aborted.
// If this value is not set or set to 0, the timeout will be ignored and the opsRequest will run indefinitely.
// +optional
// +kubebuilder:Minimum=0
TimeoutSeconds *int32 `json:"timeoutSeconds,omitempty"`

// Exactly one of its members must be set.
SpecificOpsRequest `json:",inline"`
}
Expand Down
5 changes: 5 additions & 0 deletions apis/apps/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

7 changes: 7 additions & 0 deletions config/crd/bases/apps.kubeblocks.io_opsrequests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3634,6 +3634,13 @@ spec:
x-kubernetes-validations:
- message: forbidden to update spec.switchover
rule: self == oldSelf
timeoutSeconds:
description: |-
Specifies the maximum duration (in seconds) that an opsRequest is allowed to run.
If the opsRequest runs longer than this duration, its phase will be marked as Aborted.
If this value is not set or set to 0, the timeout will be ignored and the opsRequest will run indefinitely.
format: int32
type: integer
ttlSecondsAfterSucceed:
description: Specifies the duration in seconds that an OpsRequest
will remain in the system after successfully completing (when `opsRequest.status.phase`
Expand Down
22 changes: 21 additions & 1 deletion controllers/apps/operations/ops_manager.go
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ func (opsMgr *OpsManager) Reconcile(reqCtx intctrlutil.RequestCtx, cli client.Cl
return 0, opsMgr.handleOpsCompleted(reqCtx, cli, opsRes, opsRequestPhase,
appsv1alpha1.NewCancelFailedCondition(opsRequest, err), appsv1alpha1.NewFailedCondition(opsRequest, err))
default:
return requeueAfter, nil
return opsMgr.checkAndHandleOpsTimeout(reqCtx, cli, opsRes, requeueAfter)
}
}

Expand Down Expand Up @@ -231,6 +231,26 @@ func (opsMgr *OpsManager) validateDependOnSuccessfulOps(reqCtx intctrlutil.Reque
return true, nil
}

// handleOpsIsRunningTimedOut handles if the opsRequest is timed out.
func (opsMgr *OpsManager) checkAndHandleOpsTimeout(reqCtx intctrlutil.RequestCtx,
cli client.Client,
opsRes *OpsResource,
requeueAfter time.Duration) (time.Duration, error) {
timeoutSeconds := opsRes.OpsRequest.Spec.TimeoutSeconds
if timeoutSeconds == nil || *timeoutSeconds == 0 {
return requeueAfter, nil
}
timeoutPoint := opsRes.OpsRequest.Status.StartTimestamp.Add(time.Duration(*timeoutSeconds))
if !time.Now().Before(timeoutPoint) {
return 0, PatchOpsStatus(reqCtx.Ctx, cli, opsRes, appsv1alpha1.OpsAbortedPhase,
appsv1alpha1.NewAbortedCondition("Aborted due to exceeding the specified timeout period (timeoutSeconds)"))
}
if requeueAfter != 0 {
return requeueAfter, nil
}
return time.Until(timeoutPoint), nil
}

func GetOpsManager() *OpsManager {
opsManagerOnce.Do(func() {
opsManager = &OpsManager{OpsMap: make(map[appsv1alpha1.OpsType]OpsBehaviour)}
Expand Down
4 changes: 2 additions & 2 deletions controllers/apps/operations/ops_util.go
Original file line number Diff line number Diff line change
Expand Up @@ -268,8 +268,8 @@ func abortEarlierOpsRequestWithSameKind(reqCtx intctrlutil.RequestCtx,
// abort the opsRequest that matches the abort condition.
patch := client.MergeFrom(earlierOps.DeepCopy())
earlierOps.Status.Phase = appsv1alpha1.OpsAbortedPhase
abortedCondition := appsv1alpha1.NewAbortedCondition(earlierOps)
earlierOps.SetStatusCondition(abortedCondition)
abortedCondition := appsv1alpha1.NewAbortedCondition(fmt.Sprintf(`Aborted as a result of the latest opsRequest "%s" being overridden`, earlierOps.Name))
earlierOps.SetStatusCondition(*abortedCondition)
earlierOps.Status.CompletionTimestamp = metav1.Time{Time: time.Now()}
if err = cli.Status().Patch(reqCtx.Ctx, earlierOps, patch); err != nil {
return err
Expand Down
31 changes: 31 additions & 0 deletions controllers/apps/opsrequest_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -885,5 +885,36 @@ var _ = Describe("OpsRequest Controller", func() {
Eventually(testapps.GetOpsRequestPhase(&testCtx, client.ObjectKeyFromObject(restartOps2))).Should(Equal(appsv1alpha1.OpsRunningPhase))
Eventually(testapps.GetOpsRequestPhase(&testCtx, client.ObjectKeyFromObject(exposeOps2))).Should(Equal(appsv1alpha1.OpsSucceedPhase))
})

It("test opsRequest timeout", func() {
By("create cluster and mock it to running")
replicas := int32(3)
createMysqlCluster(replicas)
mockCompRunning(replicas, false)

By("create a opsRequest and specified the timeoutSeconds to 1")
ops := testapps.NewOpsRequestObj("restart-ops-1", testCtx.DefaultNamespace,
clusterObj.Name, appsv1alpha1.HorizontalScalingType)
ops.Spec.TimeoutSeconds = pointer.Int32(1)
ops.Spec.HorizontalScalingList = []appsv1alpha1.HorizontalScaling{
{
ComponentOps: appsv1alpha1.ComponentOps{ComponentName: mysqlCompName},
ScaleOut: &appsv1alpha1.ScaleOut{ReplicaChanger: appsv1alpha1.ReplicaChanger{ReplicaChanges: pointer.Int32(1)}},
},
}
testapps.CreateOpsRequest(ctx, testCtx, ops)
Eventually(testapps.GetOpsRequestPhase(&testCtx, client.ObjectKeyFromObject(ops))).Should(Equal(appsv1alpha1.OpsRunningPhase))

By("create a next opsRequest")
ops1 := createRestartOps(clusterObj.Name, 2, true)
Eventually(testapps.GetOpsRequestPhase(&testCtx, client.ObjectKeyFromObject(ops1))).Should(Equal(appsv1alpha1.OpsPendingPhase))

By("mock timeout")
time.Sleep(time.Second)
Eventually(testapps.GetOpsRequestPhase(&testCtx, client.ObjectKeyFromObject(ops))).Should(Equal(appsv1alpha1.OpsAbortedPhase))

By("expect for the next ops is running")
Eventually(testapps.GetOpsRequestPhase(&testCtx, client.ObjectKeyFromObject(ops1))).ShouldNot(Equal(appsv1alpha1.OpsPendingPhase))
})
})
})
7 changes: 7 additions & 0 deletions deploy/helm/crds/apps.kubeblocks.io_opsrequests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3634,6 +3634,13 @@ spec:
x-kubernetes-validations:
- message: forbidden to update spec.switchover
rule: self == oldSelf
timeoutSeconds:
description: |-
Specifies the maximum duration (in seconds) that an opsRequest is allowed to run.
If the opsRequest runs longer than this duration, its phase will be marked as Aborted.
If this value is not set or set to 0, the timeout will be ignored and the opsRequest will run indefinitely.
format: int32
type: integer
ttlSecondsAfterSucceed:
description: Specifies the duration in seconds that an OpsRequest
will remain in the system after successfully completing (when `opsRequest.status.phase`
Expand Down
28 changes: 28 additions & 0 deletions docs/developer_docs/api-reference/cluster.md
Original file line number Diff line number Diff line change
Expand Up @@ -2890,6 +2890,20 @@ If set to 0 (default), pre-conditions must be satisfied immediately for the OpsR
</tr>
<tr>
<td>
<code>timeoutSeconds</code><br/>
<em>
int32
</em>
</td>
<td>
<em>(Optional)</em>
<p>Specifies the maximum duration (in seconds) that an opsRequest is allowed to run.
If the opsRequest runs longer than this duration, its phase will be marked as Aborted.
If this value is not set or set to 0, the timeout will be ignored and the opsRequest will run indefinitely.</p>
</td>
</tr>
<tr>
<td>
<code>SpecificOpsRequest</code><br/>
<em>
<a href="#apps.kubeblocks.io/v1alpha1.SpecificOpsRequest">
Expand Down Expand Up @@ -15147,6 +15161,20 @@ If set to 0 (default), pre-conditions must be satisfied immediately for the OpsR
</tr>
<tr>
<td>
<code>timeoutSeconds</code><br/>
<em>
int32
</em>
</td>
<td>
<em>(Optional)</em>
<p>Specifies the maximum duration (in seconds) that an opsRequest is allowed to run.
If the opsRequest runs longer than this duration, its phase will be marked as Aborted.
If this value is not set or set to 0, the timeout will be ignored and the opsRequest will run indefinitely.</p>
</td>
</tr>
<tr>
<td>
<code>SpecificOpsRequest</code><br/>
<em>
<a href="#apps.kubeblocks.io/v1alpha1.SpecificOpsRequest">
Expand Down

0 comments on commit 96cf553

Please sign in to comment.