Skip to content

Commit

Permalink
fix(recurringjob): keep snapshots for the backup
Browse files Browse the repository at this point in the history
Add a new global setting that allows users to keep the retain
number of snapshots when the recurring job does a backup.

Ref: 2997

Signed-off-by: James Lu <james.lu@suse.com>
  • Loading branch information
mantissahz committed Oct 5, 2023
1 parent 6b910e9 commit a471f6c
Show file tree
Hide file tree
Showing 2 changed files with 41 additions and 6 deletions.
32 changes: 26 additions & 6 deletions app/recurring_job.go
Original file line number Diff line number Diff line change
Expand Up @@ -445,7 +445,12 @@ func (job *Job) doSnapshotCleanup(backupDone bool) (err error) {
return err
}

cleanupSnapshotNames := job.listSnapshotNamesToCleanup(collection.Data, backupDone)
allowBackupSnapshotDeleted, err := job.GetSettingAsBool(string(types.SettingNameAutoCleanupRecurringJobBackupSnapshot))
if err != nil {
return err
}

cleanupSnapshotNames := job.listSnapshotNamesToCleanup(collection.Data, backupDone, allowBackupSnapshotDeleted)
for _, snapshotName := range cleanupSnapshotNames {
if _, err := job.api.Volume.ActionSnapshotCRDelete(volume, &longhornclient.SnapshotCRInput{
Name: snapshotName,
Expand Down Expand Up @@ -545,18 +550,18 @@ type NameWithTimestamp struct {
Timestamp time.Time
}

func (job *Job) listSnapshotNamesToCleanup(snapshotCRs []longhornclient.SnapshotCR, backupDone bool) []string {
func (job *Job) listSnapshotNamesToCleanup(snapshotCRs []longhornclient.SnapshotCR, backupDone, allowBackupSnapshotDeleted bool) []string {
switch job.task {
case longhorn.RecurringJobTypeSnapshotDelete:
return job.filterExpiredSnapshots(snapshotCRs)
case longhorn.RecurringJobTypeSnapshotCleanup:
return []string{}
default:
return job.filterExpiredSnapshotsOfCurrentRecurringJob(snapshotCRs, backupDone)
return job.filterExpiredSnapshotsOfCurrentRecurringJob(snapshotCRs, backupDone, allowBackupSnapshotDeleted)
}
}

func (job *Job) filterExpiredSnapshotsOfCurrentRecurringJob(snapshotCRs []longhornclient.SnapshotCR, backupDone bool) []string {
func (job *Job) filterExpiredSnapshotsOfCurrentRecurringJob(snapshotCRs []longhornclient.SnapshotCR, backupDone, allowBackupSnapshotDeleted bool) []string {
jobLabel, found := job.labels[types.RecurringJobLabel]
if !found {
return []string{}
Expand All @@ -565,11 +570,12 @@ func (job *Job) filterExpiredSnapshotsOfCurrentRecurringJob(snapshotCRs []longho
// Only consider deleting the snapshots that were created by our current job
snapshotCRs = filterSnapshotCRsWithLabel(snapshotCRs, types.RecurringJobLabel, jobLabel)

if job.task == longhorn.RecurringJobTypeSnapshot || job.task == longhorn.RecurringJobTypeSnapshotForceCreate {
// For recurring snapshot job and AutoCleanupRecurringJobBackupSnapshot is disabled, keeps the number of the snapshots as job.retain.
if job.task == longhorn.RecurringJobTypeSnapshot || job.task == longhorn.RecurringJobTypeSnapshotForceCreate || !allowBackupSnapshotDeleted {
return filterExpiredItems(snapshotCRsToNameWithTimestamps(snapshotCRs), job.retain)
}

// For the recurring backup job, only keep the snapshot of the last backup and the current snapshot
// For the recurring backup job, only keep the snapshot of the last backup and the current snapshot when AutoCleanupRecurringJobBackupSnapshot is enabled.
retainingSnapshotCRs := map[string]struct{}{job.snapshotName: {}}
if !backupDone {
lastBackup, err := job.getLastBackup()
Expand Down Expand Up @@ -785,6 +791,20 @@ func (job *Job) UpdateVolumeStatus(v *longhorn.Volume) (*longhorn.Volume, error)
return job.lhClient.LonghornV1beta2().Volumes(job.namespace).UpdateStatus(context.TODO(), v, metav1.UpdateOptions{})
}

// GetSettingAsBool returns boolean of the setting value searching by name.
func (job *Job) GetSettingAsBool(name string) (bool, error) {
obj, err := job.lhClient.LonghornV1beta2().Settings(job.namespace).Get(context.TODO(), name, metav1.GetOptions{})
if err != nil {
return false, err
}
value, err := strconv.ParseBool(obj.Value)
if err != nil {
return false, err
}

return value, nil
}

// waitForVolumeState timeout in second
func (job *Job) waitForVolumeState(state string, timeout int) (*longhornclient.Volume, error) {
volumeAPI := job.api.Volume
Expand Down
15 changes: 15 additions & 0 deletions types/setting.go
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ const (
SettingNameSystemManagedPodsImagePullPolicy = SettingName("system-managed-pods-image-pull-policy")
SettingNameAllowVolumeCreationWithDegradedAvailability = SettingName("allow-volume-creation-with-degraded-availability")
SettingNameAutoCleanupSystemGeneratedSnapshot = SettingName("auto-cleanup-system-generated-snapshot")
SettingNameAutoCleanupRecurringJobBackupSnapshot = SettingName("auto-cleanup-recurring-job-backup-snapshot")
SettingNameConcurrentAutomaticEngineUpgradePerNodeLimit = SettingName("concurrent-automatic-engine-upgrade-per-node-limit")
SettingNameBackingImageCleanupWaitInterval = SettingName("backing-image-cleanup-wait-interval")
SettingNameBackingImageRecoveryWaitInterval = SettingName("backing-image-recovery-wait-interval")
Expand Down Expand Up @@ -157,6 +158,7 @@ var (
SettingNameSystemManagedPodsImagePullPolicy,
SettingNameAllowVolumeCreationWithDegradedAvailability,
SettingNameAutoCleanupSystemGeneratedSnapshot,
SettingNameAutoCleanupRecurringJobBackupSnapshot,
SettingNameConcurrentAutomaticEngineUpgradePerNodeLimit,
SettingNameBackingImageCleanupWaitInterval,
SettingNameBackingImageRecoveryWaitInterval,
Expand Down Expand Up @@ -258,6 +260,7 @@ var (
SettingNameSystemManagedPodsImagePullPolicy: SettingDefinitionSystemManagedPodsImagePullPolicy,
SettingNameAllowVolumeCreationWithDegradedAvailability: SettingDefinitionAllowVolumeCreationWithDegradedAvailability,
SettingNameAutoCleanupSystemGeneratedSnapshot: SettingDefinitionAutoCleanupSystemGeneratedSnapshot,
SettingNameAutoCleanupRecurringJobBackupSnapshot: SettingDefinitionAutoCleanupRecurringJobBackupSnapshot,
SettingNameConcurrentAutomaticEngineUpgradePerNodeLimit: SettingDefinitionConcurrentAutomaticEngineUpgradePerNodeLimit,
SettingNameBackingImageCleanupWaitInterval: SettingDefinitionBackingImageCleanupWaitInterval,
SettingNameBackingImageRecoveryWaitInterval: SettingDefinitionBackingImageRecoveryWaitInterval,
Expand Down Expand Up @@ -805,6 +808,16 @@ var (
Default: "true",
}

SettingDefinitionAutoCleanupRecurringJobBackupSnapshot = SettingDefinition{
DisplayName: "Automatically Cleanup Recurring Job Backup Snapshot",
Description: "This setting enables Longhorn to automatically cleanup the snapshot generated by a recurring backup job.",
Category: SettingCategorySnapshot,
Type: SettingTypeBool,
Required: true,
ReadOnly: false,
Default: "true",
}

SettingDefinitionConcurrentAutomaticEngineUpgradePerNodeLimit = SettingDefinition{
DisplayName: "Concurrent Automatic Engine Upgrade Per Node Limit",
Description: "This setting controls how Longhorn automatically upgrades volumes' engines after upgrading Longhorn manager. " +
Expand Down Expand Up @@ -1231,6 +1244,8 @@ func ValidateSetting(name, value string) (err error) {
fallthrough
case SettingNameAutoCleanupSystemGeneratedSnapshot:
fallthrough
case SettingNameAutoCleanupRecurringJobBackupSnapshot:
fallthrough
case SettingNameAutoDeletePodWhenVolumeDetachedUnexpectedly:
fallthrough
case SettingNameKubernetesClusterAutoscalerEnabled:
Expand Down

0 comments on commit a471f6c

Please sign in to comment.