Skip to content

Commit

Permalink
add restore finalizer to clean up external resources
Browse files Browse the repository at this point in the history
Signed-off-by: allenxu404 <qix2@vmware.com>
  • Loading branch information
allenxu404 committed Jul 17, 2023
1 parent 55987c3 commit 3b0db77
Show file tree
Hide file tree
Showing 4 changed files with 168 additions and 20 deletions.
1 change: 1 addition & 0 deletions changelogs/unreleased/6479-allenxu404
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Add restore finalizer to clean up external resources
54 changes: 53 additions & 1 deletion pkg/cmd/cli/uninstall/uninstall.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@ import (
"k8s.io/apimachinery/pkg/util/wait"
kbclient "sigs.k8s.io/controller-runtime/pkg/client"

velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
"github.com/vmware-tanzu/velero/pkg/client"
"github.com/vmware-tanzu/velero/pkg/cmd"
"github.com/vmware-tanzu/velero/pkg/cmd/cli"
Expand Down Expand Up @@ -159,6 +160,12 @@ func Run(ctx context.Context, kbClient kbclient.Client, namespace string) error
}

func deleteNamespace(ctx context.Context, kbClient kbclient.Client, namespace string) error {
// delete resources with finalizer attached first to ensure finalizer can be handled by corresponding controller and resources can be deleted successfully before controller's pod is deleted.
// otherwise the process of deleting namespace will get stuck in deleting those resources forever.
if err := deleteResourcesWithFinalizer(ctx, kbClient, namespace); err != nil {
return errors.Wrap(err, "Fail to remove finalizer from restores")
}

ns := &corev1.Namespace{}
key := kbclient.ObjectKey{Name: namespace}
if err := kbClient.Get(ctx, key, ns); err != nil {
Expand All @@ -176,7 +183,7 @@ func deleteNamespace(ctx context.Context, kbClient kbclient.Client, namespace st
}
return err
}

fmt.Println()
fmt.Printf("Waiting for velero namespace %q to be deleted\n", namespace)
ctx, cancel := context.WithCancel(ctx)
defer cancel()
Expand All @@ -203,3 +210,48 @@ func deleteNamespace(ctx context.Context, kbClient kbclient.Client, namespace st
fmt.Printf("Velero namespace %q deleted\n", namespace)
return nil
}

func deleteResourcesWithFinalizer(ctx context.Context, kbClient kbclient.Client, namespace string) error {
// delete restores
restoreList := &velerov1api.RestoreList{}
if err := kbClient.List(ctx, restoreList, &kbclient.ListOptions{Namespace: namespace}); err != nil {
return err
}

for i := range restoreList.Items {
if err := kbClient.Delete(ctx, &restoreList.Items[i]); err != nil {
if apierrors.IsNotFound(err) {
continue
}
return err
}
}

fmt.Println("Waiting for resource with finalizer attached to be deleted")
ctx, cancel := context.WithCancel(ctx)
defer cancel()

var err error
checkFunc := func() {
restoreList := &velerov1api.RestoreList{}
if err := kbClient.List(ctx, restoreList, &kbclient.ListOptions{Namespace: namespace}); err != nil {
cancel()
return
}

if len(restoreList.Items) > 0 {
fmt.Print(".")
} else {
cancel()
return
}
}

// wait until all the restores are deleted
wait.Until(checkFunc, 5*time.Millisecond, ctx.Done())
if err != nil {
return err
}

return nil
}
104 changes: 88 additions & 16 deletions pkg/controller/restore_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@ import (

"github.com/pkg/errors"
"github.com/sirupsen/logrus"
apierrors "k8s.io/apimachinery/pkg/api/errors"
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
"k8s.io/apimachinery/pkg/labels"
"k8s.io/apimachinery/pkg/types"
Expand All @@ -37,6 +38,7 @@ import (
"k8s.io/utils/clock"
ctrl "sigs.k8s.io/controller-runtime"
"sigs.k8s.io/controller-runtime/pkg/client"
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"

"github.com/vmware-tanzu/velero/internal/hook"
api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
Expand Down Expand Up @@ -84,6 +86,8 @@ var nonRestorableResources = []string{
"backuprepositories.velero.io",
}

var ExternalResourcesFinalizer = "restores.velero.io/external-resources-finalizer"

type restoreReconciler struct {
ctx context.Context
namespace string
Expand Down Expand Up @@ -155,10 +159,58 @@ func (r *restoreReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
restore := &api.Restore{}
err := r.kbClient.Get(ctx, client.ObjectKey{Namespace: req.Namespace, Name: req.Name}, restore)
if err != nil {
log.Infof("Fail to get restore %s: %s", req.NamespacedName.String(), err.Error())
if apierrors.IsNotFound(err) {
log.Debugf("restore[%s] not found", req.Name)
return ctrl.Result{}, nil
}

log.Errorf("Fail to get restore %s: %s", req.NamespacedName.String(), err.Error())
return ctrl.Result{}, err
}

// deal with finalizer
if !restore.DeletionTimestamp.IsZero() && len(restore.Finalizers) != 0 {
// check the finalizer and run clean-up
if controllerutil.ContainsFinalizer(restore, ExternalResourcesFinalizer) {
if err := r.deleteExternalResources(restore); err != nil {
log.Errorf("fail to delete external resources: %s", err.Error())
return ctrl.Result{}, err
}
// once finish clean-up, remove the finalizer from the restore so that the restore will be unlocked and deleted.
original := restore.DeepCopy()
controllerutil.RemoveFinalizer(restore, ExternalResourcesFinalizer)
if err := kubeutil.PatchResource(original, restore, r.kbClient); err != nil {
log.Errorf("fail to remove finalizer: %s", err.Error())
return ctrl.Result{}, err
}
return ctrl.Result{}, nil
} else {
log.Error("DeletionTimestamp is marked but can't find the expected finalizer")
return ctrl.Result{}, fmt.Errorf("DeletionTimestamp is marked but can't find the expected finalizer")
}
}

// add finalizer
if restore.DeletionTimestamp.IsZero() && !controllerutil.ContainsFinalizer(restore, ExternalResourcesFinalizer) {
original := restore.DeepCopy()
controllerutil.AddFinalizer(restore, ExternalResourcesFinalizer)
if err := kubeutil.PatchResource(original, restore, r.kbClient); err != nil {
log.Errorf("fail to add finalizer: %s", err.Error())
return ctrl.Result{}, err
}
}

switch restore.Status.Phase {
case "", api.RestorePhaseNew:
// only process new restores
default:
r.logger.WithFields(logrus.Fields{
"restore": kubeutil.NamespaceAndName(restore),
"phase": restore.Status.Phase,
}).Debug("Restore is not handled")
return ctrl.Result{}, nil
}

// store a copy of the original restore for creating patch
original := restore.DeepCopy()

Expand Down Expand Up @@ -211,6 +263,7 @@ func (r *restoreReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct
restore.Status.CompletionTimestamp = &metav1.Time{Time: r.clock.Now()}
}
log.Debug("Updating restore's final status")

if err = kubeutil.PatchResource(original, restore, r.kbClient); err != nil {
log.WithError(errors.WithStack(err)).Info("Error updating restore's final status")
// No need to re-enqueue here, because restore's already set to InProgress before.
Expand All @@ -222,21 +275,6 @@ func (r *restoreReconciler) Reconcile(ctx context.Context, req ctrl.Request) (ct

func (r *restoreReconciler) SetupWithManager(mgr ctrl.Manager) error {
return ctrl.NewControllerManagedBy(mgr).
WithEventFilter(kubeutil.NewCreateEventPredicate(func(obj client.Object) bool {
restore := obj.(*api.Restore)

switch restore.Status.Phase {
case "", api.RestorePhaseNew:
// only process new restores
return true
default:
r.logger.WithFields(logrus.Fields{
"restore": kubeutil.NamespaceAndName(restore),
"phase": restore.Status.Phase,
}).Debug("Restore is not new, skipping")
return false
}
})).
For(&api.Restore{}).
Complete(r)
}
Expand Down Expand Up @@ -600,6 +638,40 @@ func (r *restoreReconciler) updateTotalRestoreMetric() {
}()
}

// deleteExternalResources deletes all the external resources related to the restore
func (r *restoreReconciler) deleteExternalResources(restore *api.Restore) error {
r.logger.Infof("Finalizer is deleting external resources, backup: %s", restore.Spec.BackupName)

if restore.Spec.BackupName == "" {
return nil
}

backupInfo, err := r.fetchBackupInfo(restore.Spec.BackupName)
if err != nil {
return errors.Wrap(err, fmt.Sprintf("can't get backup info, backup: %s", restore.Spec.BackupName))
}

// if storage locations is read-only, skip deletion
if backupInfo.location.Spec.AccessMode == api.BackupStorageLocationAccessModeReadOnly {
return nil
}

// delete restore files in object storage
pluginManager := r.newPluginManager(r.logger)
defer pluginManager.CleanupClients()

backupStore, err := r.backupStoreGetter.Get(backupInfo.location, pluginManager, r.logger)
if err != nil {
return errors.Wrap(err, fmt.Sprintf("can't get backupStore, backup: %s", restore.Spec.BackupName))
}

if err = backupStore.DeleteRestore(restore.Name); err != nil {
return errors.Wrap(err, fmt.Sprintf("can't delete restore files in object storage, backup: %s", restore.Spec.BackupName))
}

return nil
}

func putResults(restore *api.Restore, results map[string]results.Result, backupStore persistence.BackupStore) error {
buf := new(bytes.Buffer)
gzw := gzip.NewWriter(buf)
Expand Down
29 changes: 26 additions & 3 deletions pkg/controller/restore_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -161,10 +161,10 @@ func TestProcessQueueItemSkips(t *testing.T) {
expectError: true,
},
{
name: "missing restore returns error",
name: "missing restore returns nil",
namespace: "foo",
restoreName: "bar",
expectError: true,
expectError: false,
},
}

Expand Down Expand Up @@ -234,6 +234,7 @@ func TestRestoreReconcile(t *testing.T) {
backupStoreGetBackupContentsErr error
putRestoreLogErr error
expectedFinalPhase string
addValidFinalizer bool
}{
{
name: "restore with both namespace in both includedNamespaces and excludedNamespaces fails validation",
Expand Down Expand Up @@ -391,6 +392,22 @@ func TestRestoreReconcile(t *testing.T) {
backupStoreGetBackupContentsErr: errors.New("Couldn't download backup"),
backup: defaultBackup().StorageLocation("default").Result(),
},
{
name: "restore attached with an expected finalizer gets cleaned up successfully",
location: defaultStorageLocation,
restore: NewRestore("foo", "bar", "backup-1", "ns-1", "", velerov1api.RestorePhaseCompleted).ObjectMeta(builder.WithFinalizers(ExternalResourcesFinalizer), builder.WithDeletionTimestamp(timestamp.Time)).Result(),
backup: defaultBackup().StorageLocation("default").Result(),
expectedErr: false,
addValidFinalizer: true,
},
{
name: "restore attached with an unknown finalizer is not supported",
location: defaultStorageLocation,
restore: NewRestore("foo", "bar", "backup-1", "ns-1", "", velerov1api.RestorePhaseCompleted).ObjectMeta(builder.WithFinalizers("restores.velero.io/unknown-finalizer"), builder.WithDeletionTimestamp(timestamp.Time)).Result(),
backup: defaultBackup().StorageLocation("default").Result(),
expectedErr: true,
addValidFinalizer: false,
},
}

formatFlag := logging.FormatText
Expand Down Expand Up @@ -483,6 +500,10 @@ func TestRestoreReconcile(t *testing.T) {
pluginManager.On("CleanupClients")
}

if test.addValidFinalizer {
backupStore.On("DeleteRestore", test.restore.Name).Return(nil)
}

//err = r.processQueueItem(key)
_, err = r.Reconcile(context.Background(), ctrl.Request{NamespacedName: types.NamespacedName{
Namespace: test.restore.Namespace,
Expand Down Expand Up @@ -536,7 +557,9 @@ func TestRestoreReconcile(t *testing.T) {
assert.Zero(t, restorer.calledWithArg)
return
}
assert.Equal(t, 1, len(restorer.Calls))
if !test.addValidFinalizer {
assert.Equal(t, 1, len(restorer.Calls))
}

// validate Patch call 2 (setting phase)

Expand Down

0 comments on commit 3b0db77

Please sign in to comment.