Skip to content

Commit a490857

Browse files
committed
Add successfulJobsHistoryLimit to UpgradeConfig spec
Allows cleanup of older successful jobs. I did not add the same field for failed jobs since they need to be cleaned up manually anyways. It is not possible by design to delete the most recent successful job.
1 parent 02ad41c commit a490857

File tree

4 files changed

+158
-1
lines changed

4 files changed

+158
-1
lines changed

api/v1beta1/upgradeconfig_types.go

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,10 @@ type UpgradeConfigSpec struct {
2424
// +kubebuilder:validation:Format=duration
2525
// +kubebuilder:default:="1h"
2626
MaxUpgradeStartDelay metav1.Duration `json:"maxUpgradeStartDelay"`
27+
// SuccessfulJobsHistoryLimit is the number of successful jobs to keep.
28+
// A value smaller or equal to zero indicates no limit.
29+
// +optional
30+
SuccessfulJobsHistoryLimit int `json:"successfulJobsHistoryLimit,omitempty"`
2731

2832
// JobTemplate defines the template for the upgrade job
2933
JobTemplate UpgradeConfigJobTemplate `json:"jobTemplate"`

config/crd/bases/managedupgrade.appuio.io_upgradeconfigs.yaml

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -244,6 +244,11 @@ spec:
244244
- cron
245245
- location
246246
type: object
247+
successfulJobsHistoryLimit:
248+
description: |-
249+
SuccessfulJobsHistoryLimit is the number of successful jobs to keep.
250+
A value smaller or equal to zero indicates no limit.
251+
type: integer
247252
required:
248253
- jobTemplate
249254
- maxSchedulingDelay

controllers/upgradeconfig_controller.go

Lines changed: 38 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ import (
44
"context"
55
"fmt"
66
"math"
7+
"slices"
78
"strconv"
89
"strings"
910
"time"
@@ -55,6 +56,8 @@ type UpgradeConfigReconciler struct {
5556
//+kubebuilder:rbac:groups=managedupgrade.appuio.io,resources=upgradesuspensionwindows,verbs=get;list;watch
5657
//+kubebuilder:rbac:groups=managedupgrade.appuio.io,resources=upgradesuspensionwindows/status,verbs=get
5758

59+
//+kubebuilder:rbac:groups=managedupgrade.appuio.io,resources=upgradejobs,verbs=get;list;watch;create;update;patch;delete
60+
5861
// Reconcile implements the reconcile loop for UpgradeConfig.
5962
// It schedules UpgradeJobs based on the UpgradeConfig's schedule - if an update is available.
6063
func (r *UpgradeConfigReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ctrl.Result, error) {
@@ -86,7 +89,8 @@ func (r *UpgradeConfigReconciler) Reconcile(ctx context.Context, req ctrl.Reques
8689

8790
jobSchedRequeue, schedErr := r.scheduleJob(ctx, &uc, sched, location)
8891
statusRequeue, stErr := r.updateNextPossibleSchedulesStatus(ctx, &uc, sched, location)
89-
if err := multierr.Combine(schedErr, stErr); err != nil {
92+
cleanupErr := r.cleanupSuccessfulJobs(ctx, &uc)
93+
if err := multierr.Combine(schedErr, stErr, cleanupErr); err != nil {
9094
return ctrl.Result{}, err
9195
}
9296

@@ -195,6 +199,39 @@ findNextRun:
195199
return 0, r.setLastScheduledUpgrade(ctx, uc, nextRun)
196200
}
197201

202+
func (r *UpgradeConfigReconciler) cleanupSuccessfulJobs(ctx context.Context, uc *managedupgradev1beta1.UpgradeConfig) error {
203+
l := log.FromContext(ctx).WithName("UpgradeConfigReconciler.cleanupSuccessfulJobs")
204+
205+
jobs, err := r.getControlledJobs(ctx, uc)
206+
if err != nil {
207+
return fmt.Errorf("could not get controlled jobs: %w", err)
208+
}
209+
210+
successfulJobs := make([]managedupgradev1beta1.UpgradeJob, 0, len(jobs))
211+
for _, job := range jobs {
212+
if apimeta.IsStatusConditionTrue(job.Status.Conditions, managedupgradev1beta1.UpgradeJobConditionSucceeded) {
213+
successfulJobs = append(successfulJobs, job)
214+
}
215+
}
216+
217+
slices.SortFunc(successfulJobs, func(a, b managedupgradev1beta1.UpgradeJob) int {
218+
return b.Spec.StartAfter.Time.Compare(a.Spec.StartAfter.Time)
219+
})
220+
221+
var delErrs []error
222+
if uc.Spec.SuccessfulJobsHistoryLimit > 0 && len(successfulJobs) > uc.Spec.SuccessfulJobsHistoryLimit {
223+
for _, job := range successfulJobs[uc.Spec.SuccessfulJobsHistoryLimit:] {
224+
l.Info("deleting successful job", "job", job.Name)
225+
delErrs = append(delErrs, r.Delete(ctx, &job))
226+
}
227+
}
228+
if err := multierr.Combine(delErrs...); err != nil {
229+
return fmt.Errorf("could not delete jobs: %w", err)
230+
}
231+
232+
return nil
233+
}
234+
198235
func (r *UpgradeConfigReconciler) setLastScheduledUpgrade(ctx context.Context, uc *managedupgradev1beta1.UpgradeConfig, t time.Time) error {
199236
uc.Status.LastScheduledUpgrade = &metav1.Time{Time: t}
200237
return r.Status().Update(ctx, uc)

controllers/upgradeconfig_controller_test.go

Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,12 @@ import (
1010
configv1 "github.com/openshift/api/config/v1"
1111
"github.com/stretchr/testify/assert"
1212
"github.com/stretchr/testify/require"
13+
apimeta "k8s.io/apimachinery/pkg/api/meta"
1314
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
1415
"k8s.io/apimachinery/pkg/types"
1516
"k8s.io/client-go/tools/record"
1617
"sigs.k8s.io/controller-runtime/pkg/client"
18+
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"
1719
"sigs.k8s.io/controller-runtime/pkg/reconcile"
1820

1921
managedupgradev1beta1 "github.com/appuio/openshift-upgrade-controller/api/v1beta1"
@@ -409,6 +411,115 @@ func requireEventMatches(t *testing.T, recorder *record.FakeRecorder, substrings
409411
require.NotEmpty(t, matchingEvent, "no event matches %v, got %v", substrings, events)
410412
}
411413

414+
func Test_UpgradeConfigReconciler_Reconcile_CleanupSuccessfulJobs(t *testing.T) {
415+
ctx := context.Background()
416+
clock := mockClock{now: time.Date(2022, time.April, 4, 8, 0, 0, 0, time.UTC)}
417+
t.Log("Now: ", clock.Now())
418+
require.Equal(t, 14, func() int { _, isoweek := clock.Now().ISOWeek(); return isoweek }())
419+
require.Equal(t, time.Monday, clock.Now().Weekday())
420+
421+
ucv := &configv1.ClusterVersion{
422+
ObjectMeta: metav1.ObjectMeta{
423+
Name: "version",
424+
},
425+
}
426+
427+
upgradeConfig := &managedupgradev1beta1.UpgradeConfig{
428+
ObjectMeta: metav1.ObjectMeta{
429+
Name: "daily-maintenance",
430+
Namespace: "appuio-openshift-upgrade-controller",
431+
CreationTimestamp: metav1.Time{Time: clock.Now().Add(-time.Hour)},
432+
},
433+
Spec: managedupgradev1beta1.UpgradeConfigSpec{
434+
Schedule: managedupgradev1beta1.UpgradeConfigSchedule{
435+
Cron: "0 22 * * *", // At 22:00 every day
436+
Location: "UTC",
437+
Suspend: true,
438+
},
439+
MaxSchedulingDelay: metav1.Duration{Duration: time.Minute},
440+
SuccessfulJobsHistoryLimit: 2,
441+
},
442+
}
443+
444+
client := controllerClient(t, ucv, upgradeConfig)
445+
446+
jobStartTs := []time.Time{
447+
clock.Now().Add(-24 * time.Hour),
448+
clock.Now().Add(-24 * time.Hour * 2),
449+
clock.Now().Add(-24 * time.Hour * 3),
450+
}
451+
452+
for i, ts := range jobStartTs {
453+
job := &managedupgradev1beta1.UpgradeJob{
454+
ObjectMeta: metav1.ObjectMeta{
455+
Name: "job-" + strconv.Itoa(i),
456+
Namespace: upgradeConfig.Namespace,
457+
},
458+
Spec: managedupgradev1beta1.UpgradeJobSpec{
459+
StartAfter: metav1.NewTime(ts),
460+
},
461+
}
462+
{
463+
successfulOwned := job.DeepCopy()
464+
successfulOwned.Name = "successful-owned-" + successfulOwned.Name
465+
require.NoError(t, controllerutil.SetControllerReference(upgradeConfig, successfulOwned, client.Scheme()))
466+
require.NoError(t, client.Create(ctx, successfulOwned))
467+
apimeta.SetStatusCondition(&successfulOwned.Status.Conditions, metav1.Condition{
468+
Type: managedupgradev1beta1.UpgradeJobConditionSucceeded,
469+
Status: metav1.ConditionTrue,
470+
})
471+
client.Status().Update(ctx, successfulOwned)
472+
}
473+
{
474+
notSuccessfulOwned := job.DeepCopy()
475+
notSuccessfulOwned.Name = "not-successful-owned-" + notSuccessfulOwned.Name
476+
require.NoError(t, controllerutil.SetControllerReference(upgradeConfig, notSuccessfulOwned, client.Scheme()))
477+
require.NoError(t, client.Create(ctx, notSuccessfulOwned))
478+
}
479+
{
480+
successfulNotOwned := job.DeepCopy()
481+
successfulNotOwned.Name = "successful-not-owned-" + successfulNotOwned.Name
482+
require.NoError(t, client.Create(ctx, successfulNotOwned))
483+
apimeta.SetStatusCondition(&successfulNotOwned.Status.Conditions, metav1.Condition{
484+
Type: managedupgradev1beta1.UpgradeJobConditionSucceeded,
485+
Status: metav1.ConditionTrue,
486+
})
487+
client.Status().Update(ctx, successfulNotOwned)
488+
}
489+
}
490+
491+
recorder := record.NewFakeRecorder(5)
492+
subject := &UpgradeConfigReconciler{
493+
Client: client,
494+
Scheme: client.Scheme(),
495+
Recorder: recorder,
496+
497+
Clock: &clock,
498+
499+
ManagedUpstreamClusterVersionName: "version",
500+
}
501+
502+
_, err := subject.Reconcile(ctx, requestForObject(upgradeConfig))
503+
require.NoError(t, err)
504+
505+
var nj managedupgradev1beta1.UpgradeJobList
506+
require.NoError(t, client.List(ctx, &nj))
507+
names := []string{}
508+
for _, j := range nj.Items {
509+
names = append(names, j.Name)
510+
}
511+
require.ElementsMatch(t, names, []string{
512+
"not-successful-owned-job-0",
513+
"not-successful-owned-job-1",
514+
"not-successful-owned-job-2",
515+
"successful-not-owned-job-0",
516+
"successful-not-owned-job-1",
517+
"successful-not-owned-job-2",
518+
"successful-owned-job-0",
519+
"successful-owned-job-1",
520+
}, "only owned and successful jobs should be cleaned up, higher numbers are older")
521+
}
522+
412523
// requireTimeEqual asserts that two times are equal, ignoring their timezones.
413524
func requireTimeEqual(t *testing.T, expected, actual time.Time, msgAndArgs ...any) {
414525
t.Helper()

0 commit comments

Comments
 (0)