Skip to content

Commit

Permalink
Fix delete dataupload datadownload failure when Velero uninstall
Browse files Browse the repository at this point in the history
Signed-off-by: Ming <mqiu@vmware.com>
  • Loading branch information
qiuming-best committed Aug 24, 2023
1 parent 3e61386 commit 7f3b7fe
Show file tree
Hide file tree
Showing 5 changed files with 175 additions and 61 deletions.
200 changes: 157 additions & 43 deletions pkg/cmd/cli/uninstall/uninstall.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ package uninstall
import (
"context"
"fmt"
"reflect"
"sync"
"time"

"github.com/pkg/errors"
Expand All @@ -40,6 +42,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"

velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
velerov2alpha1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v2alpha1"
"github.com/vmware-tanzu/velero/pkg/client"
"github.com/vmware-tanzu/velero/pkg/cmd"
"github.com/vmware-tanzu/velero/pkg/cmd/cli"
Expand All @@ -50,6 +53,12 @@ import (

var gracefulDeletionMaximumDuration = 1 * time.Minute

var resToDelete = []kbclient.ObjectList{
&velerov1api.RestoreList{},
&velerov2alpha1api.DataUploadList{},
&velerov2alpha1api.DataDownloadList{},
}

// uninstallOptions collects all the options for uninstalling Velero from a Kubernetes cluster.
type uninstallOptions struct {
wait bool // deprecated
Expand Down Expand Up @@ -167,11 +176,7 @@ func Run(ctx context.Context, kbClient kbclient.Client, namespace string) error
}

func deleteNamespace(ctx context.Context, kbClient kbclient.Client, namespace string) error {
// Deal with resources with attached finalizers to ensure proper handling of those finalizers.
if err := deleteResourcesWithFinalizer(ctx, kbClient, namespace); err != nil {
return errors.Wrap(err, "Fail to remove finalizer from restores")
}

// First check if it's already been deleted
ns := &corev1.Namespace{}
key := kbclient.ObjectKey{Name: namespace}
if err := kbClient.Get(ctx, key, ns); err != nil {
Expand All @@ -182,6 +187,11 @@ func deleteNamespace(ctx context.Context, kbClient kbclient.Client, namespace st
return err
}

// Deal with resources with attached finalizers to ensure proper handling of those finalizers.
if err := deleteResourcesWithFinalizer(ctx, kbClient, namespace); err != nil {
return errors.Wrap(err, "Fail to remove finalizer from restores")
}

if err := kbClient.Delete(ctx, ns); err != nil {
if apierrors.IsNotFound(err) {
fmt.Printf("Velero namespace %q does not exist, skipping.\n", namespace)
Expand Down Expand Up @@ -223,34 +233,46 @@ func deleteNamespace(ctx context.Context, kbClient kbclient.Client, namespace st
// 2. The controller may encounter errors while handling the finalizer, in such case, the controller will keep trying until it succeeds.
// So it is important to set a timeout, once the process exceed the timeout, we will forcedly delete the resources by removing the finalizer from them,
// otherwise the deletion process may get stuck indefinitely.
// 3. There is only restore finalizer supported as of v1.12. If any new finalizers are added in the future, the corresponding deletion logic can be
// 3. There is only resources finalizer supported as of v1.12. If any new finalizers are added in the future, the corresponding deletion logic can be
// incorporated into this function.
func deleteResourcesWithFinalizer(ctx context.Context, kbClient kbclient.Client, namespace string) error {
fmt.Println("Waiting for resource with attached finalizer to be deleted")
return deleteRestore(ctx, kbClient, namespace)
return deleteResources(ctx, kbClient, namespace)
}

func deleteRestore(ctx context.Context, kbClient kbclient.Client, namespace string) error {
// Check if restore crd exists, if it does not exist, return immediately.
func checkResources(ctx context.Context, kbClient kbclient.Client) error {
checkCRDs := []string{"restores.velero.io", "datauploads.velero.io", "datadownloads.velero.io"}
var err error
v1crd := &apiextv1.CustomResourceDefinition{}
key := kbclient.ObjectKey{Name: "restores.velero.io"}
if err = kbClient.Get(ctx, key, v1crd); err != nil {
if apierrors.IsNotFound(err) {
return nil
} else {
return errors.Wrap(err, "Error getting restore crd")
for _, crd := range checkCRDs {
key := kbclient.ObjectKey{Name: crd}
if err = kbClient.Get(ctx, key, v1crd); err != nil {
if apierrors.IsNotFound(err) {
return nil
} else {
return errors.Wrapf(err, "Error getting %s crd", crd)
}
}
}
return nil
}

func deleteResources(ctx context.Context, kbClient kbclient.Client, namespace string) error {
// Check if resources crd exists, if it does not exist, return immediately.
err := checkResources(ctx, kbClient)
if err != nil {
return err
}

// First attempt to gracefully delete all the restore within a specified time frame, If the process exceeds the timeout limit,
// First attempt to gracefully delete all the resources within a specified time frame, If the process exceeds the timeout limit,
// it is likely that there may be errors during the finalization of restores. In such cases, we should proceed with forcefully deleting the restores.
err = gracefullyDeleteRestore(ctx, kbClient, namespace)
err = gracefullyDeleteResources(ctx, kbClient, namespace)
if err != nil && err != wait.ErrWaitTimeout {
return errors.Wrap(err, "Error deleting restores")
return errors.Wrap(err, "Error deleting resources")
}

if err == wait.ErrWaitTimeout {
err = forcedlyDeleteRestore(ctx, kbClient, namespace)
err = forcedlyDeleteResources(ctx, kbClient, namespace)
if err != nil {
return errors.Wrap(err, "Error deleting restores")
}
Expand All @@ -259,30 +281,89 @@ func deleteRestore(ctx context.Context, kbClient kbclient.Client, namespace stri
return nil
}

func gracefullyDeleteRestore(ctx context.Context, kbClient kbclient.Client, namespace string) error {
func gracefullyDeleteResources(ctx context.Context, kbClient kbclient.Client, namespace string) error {
errorChan := make(chan error)

var wg sync.WaitGroup
wg.Add(len(resToDelete))

for i := range resToDelete {
go func(index int) {
defer wg.Done()
errorChan <- gracefullyDeleteResource(ctx, kbClient, namespace, resToDelete[index])
}(i)
}

go func() {
wg.Wait()
close(errorChan)
}()

for err := range errorChan {
if err != nil {
return err
}
}

return waitDeletingResources(ctx, kbClient, namespace)
}

func gracefullyDeleteResource(ctx context.Context, kbClient kbclient.Client, namespace string, list kbclient.ObjectList) error {
var err error
restoreList := &velerov1api.RestoreList{}
if err = kbClient.List(ctx, restoreList, &kbclient.ListOptions{Namespace: namespace}); err != nil {
return errors.Wrap(err, "Error getting restores during graceful deletion")

if err = kbClient.List(ctx, list, &kbclient.ListOptions{Namespace: namespace}); err != nil {
return errors.Wrap(err, "Error getting resources during graceful deletion")
}

for i := range restoreList.Items {
if err = kbClient.Delete(ctx, &restoreList.Items[i]); err != nil {
var objectsToDelete []kbclient.Object
items := reflect.ValueOf(list).Elem().FieldByName("Items")

for i := 0; i < items.Len(); i++ {
item := items.Index(i).Addr().Interface()
// Type assertion to cast item to the appropriate type
switch typedItem := item.(type) {
case *velerov1api.Restore:
objectsToDelete = append(objectsToDelete, typedItem)
case *velerov2alpha1api.DataUpload:
objectsToDelete = append(objectsToDelete, typedItem)
case *velerov2alpha1api.DataDownload:
objectsToDelete = append(objectsToDelete, typedItem)
default:
return errors.New("Unsupported resource type")
}
}

// Delete collected resources in a batch
for _, resource := range objectsToDelete {
if err = kbClient.Delete(ctx, resource); err != nil {
if apierrors.IsNotFound(err) {
continue
}
return errors.Wrap(err, "Error deleting restores during graceful deletion")
return errors.Wrap(err, "Error deleting resources during graceful deletion")
}
}
return err
}

func waitDeletingResources(ctx context.Context, kbClient kbclient.Client, namespace string) error {
// Wait for the deletion of all the restores within a specified time frame
err = wait.PollImmediate(time.Second, gracefulDeletionMaximumDuration, func() (bool, error) {
err := wait.PollImmediate(time.Second, gracefulDeletionMaximumDuration, func() (bool, error) {
restoreList := &velerov1api.RestoreList{}
if errList := kbClient.List(ctx, restoreList, &kbclient.ListOptions{Namespace: namespace}); errList != nil {
return false, errList
}

if len(restoreList.Items) > 0 {
dataUploadList := &velerov2alpha1api.DataUploadList{}
if errList := kbClient.List(ctx, dataUploadList, &kbclient.ListOptions{Namespace: namespace}); errList != nil {
return false, errList
}

dataDownloadList := &velerov2alpha1api.DataDownloadList{}
if errList := kbClient.List(ctx, dataDownloadList, &kbclient.ListOptions{Namespace: namespace}); errList != nil {
return false, errList
}

if len(restoreList.Items)+len(dataUploadList.Items)+len(dataDownloadList.Items) > 0 {
fmt.Print(".")
return false, nil
} else {
Expand All @@ -293,10 +374,10 @@ func gracefullyDeleteRestore(ctx context.Context, kbClient kbclient.Client, name
return err
}

func forcedlyDeleteRestore(ctx context.Context, kbClient kbclient.Client, namespace string) error {
func forcedlyDeleteResources(ctx context.Context, kbClient kbclient.Client, namespace string) error {
// Delete velero deployment first in case:
// 1. finalizers will be added back by restore controller after they are removed at next step;
// 2. new restores attached with finalizer will be created by restore controller after we remove all the restores' finalizer at next step;
// 1. finalizers will be added back by resources related controller after they are removed at next step;
// 2. new resources attached with finalizer will be created by controller after we remove all the resources' finalizer at next step;
deploy := &appsv1api.Deployment{
ObjectMeta: metav1.ObjectMeta{
Namespace: "velero",
Expand Down Expand Up @@ -330,23 +411,56 @@ func forcedlyDeleteRestore(ctx context.Context, kbClient kbclient.Client, namesp
if err != nil {
return errors.Wrap(err, "Error deleting velero deployment during force deletion")
}
return removeResourcesFinalizer(ctx, kbClient, namespace)
}

// Remove all the restores' finalizer so they can be deleted during the deletion of velero namespace.
restoreList := &velerov1api.RestoreList{}
if err := kbClient.List(ctx, restoreList, &kbclient.ListOptions{Namespace: namespace}); err != nil {
return errors.Wrap(err, "Error getting restores during force deletion")
func removeResourcesFinalizer(ctx context.Context, kbClient kbclient.Client, namespace string) error {
for i := range resToDelete {
if err := removeResourceFinalizer(ctx, kbClient, namespace, resToDelete[i]); err != nil {
return err
}
}
return nil
}

for i := range restoreList.Items {
if controllerutil.ContainsFinalizer(&restoreList.Items[i], controller.ExternalResourcesFinalizer) {
update := &restoreList.Items[i]
original := update.DeepCopy()
controllerutil.RemoveFinalizer(update, controller.ExternalResourcesFinalizer)
if err := kubeutil.PatchResource(original, update, kbClient); err != nil {
return errors.Wrap(err, "Error removing restore finalizer during force deletion")
}
func removeResourceFinalizer(ctx context.Context, kbClient kbclient.Client, namespace string, resourceList kbclient.ObjectList) error {
listOptions := &kbclient.ListOptions{Namespace: namespace}
if err := kbClient.List(ctx, resourceList, listOptions); err != nil {
return errors.Wrap(err, fmt.Sprintf("Error getting resources of type %T during force deletion", resourceList))
}

items := reflect.ValueOf(resourceList).Elem().FieldByName("Items")
var err error
for i := 0; i < items.Len(); i++ {
item := items.Index(i).Addr().Interface()
// Type assertion to cast item to the appropriate type
switch typedItem := item.(type) {
case *velerov1api.Restore:
err = removeFinalizerForObject(typedItem, controller.ExternalResourcesFinalizer, kbClient)
case *velerov2alpha1api.DataUpload:
err = removeFinalizerForObject(typedItem, controller.DataUploadDownloadFinalizer, kbClient)
case *velerov2alpha1api.DataDownload:
err = removeFinalizerForObject(typedItem, controller.DataUploadDownloadFinalizer, kbClient)
default:
err = errors.Errorf("Unsupported resource type %T", typedItem)
}
if err != nil {
return err
}
}

return nil
}

func removeFinalizerForObject(obj kbclient.Object, finalizer string, kbClient kbclient.Client) error {
if controllerutil.ContainsFinalizer(obj, finalizer) {
update := obj.DeepCopyObject().(kbclient.Object)
original := obj.DeepCopyObject().(kbclient.Object)

controllerutil.RemoveFinalizer(update, finalizer)
if err := kubeutil.PatchResource(original, update, kbClient); err != nil {
return errors.Wrap(err, fmt.Sprintf("Error removing finalizer %q during force deletion", finalizer))
}
}
return nil
}
10 changes: 5 additions & 5 deletions pkg/controller/data_download_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,9 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
// Add finalizer
// Logic for clear resources when datadownload been deleted
if dd.DeletionTimestamp.IsZero() { // add finalizer for all cr at beginning
if !isDataDownloadInFinalState(dd) && !controllerutil.ContainsFinalizer(dd, dataUploadDownloadFinalizer) {
if !isDataDownloadInFinalState(dd) && !controllerutil.ContainsFinalizer(dd, DataUploadDownloadFinalizer) {
succeeded, err := r.exclusiveUpdateDataDownload(ctx, dd, func(dd *velerov2alpha1api.DataDownload) {
controllerutil.AddFinalizer(dd, dataUploadDownloadFinalizer)
controllerutil.AddFinalizer(dd, DataUploadDownloadFinalizer)
})
if err != nil {
log.Errorf("failed to add finalizer with error %s for %s/%s", err.Error(), dd.Namespace, dd.Name)
Expand All @@ -135,7 +135,7 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
return ctrl.Result{Requeue: true}, nil
}
}
} else if controllerutil.ContainsFinalizer(dd, dataUploadDownloadFinalizer) && !dd.Spec.Cancel && !isDataDownloadInFinalState(dd) {
} else if controllerutil.ContainsFinalizer(dd, DataUploadDownloadFinalizer) && !dd.Spec.Cancel && !isDataDownloadInFinalState(dd) {
// when delete cr we need to clear up internal resources created by Velero, here we use the cancel mechanism
// to help clear up resources instead of clear them directly in case of some conflict with Expose action
if err := UpdateDataDownloadWithRetry(ctx, r.client, req.NamespacedName, log, func(dataDownload *velerov2alpha1api.DataDownload) {
Expand Down Expand Up @@ -309,9 +309,9 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
} else {
// put the finilizer remove action here for all cr will goes to the final status, we could check finalizer and do remove action in final status
// instead of intermediate state
if isDataDownloadInFinalState(dd) && !dd.DeletionTimestamp.IsZero() && controllerutil.ContainsFinalizer(dd, dataUploadDownloadFinalizer) {
if isDataDownloadInFinalState(dd) && !dd.DeletionTimestamp.IsZero() && controllerutil.ContainsFinalizer(dd, DataUploadDownloadFinalizer) {
original := dd.DeepCopy()
controllerutil.RemoveFinalizer(dd, dataUploadDownloadFinalizer)
controllerutil.RemoveFinalizer(dd, DataUploadDownloadFinalizer)
if err := r.client.Patch(ctx, dd, client.MergeFrom(original)); err != nil {
log.WithError(err).Error("error to remove finalizer")
}
Expand Down
8 changes: 4 additions & 4 deletions pkg/controller/data_download_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ func TestDataDownloadReconcile(t *testing.T) {
name: "dataDownload with enabled cancel",
dd: func() *velerov2alpha1api.DataDownload {
dd := dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Result()
controllerutil.AddFinalizer(dd, dataUploadDownloadFinalizer)
controllerutil.AddFinalizer(dd, DataUploadDownloadFinalizer)
dd.DeletionTimestamp = &metav1.Time{Time: time.Now()}
return dd
}(),
Expand All @@ -312,12 +312,12 @@ func TestDataDownloadReconcile(t *testing.T) {
name: "dataDownload with remove finalizer and should not be retrieved",
dd: func() *velerov2alpha1api.DataDownload {
dd := dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseFailed).Cancel(true).Result()
controllerutil.AddFinalizer(dd, dataUploadDownloadFinalizer)
controllerutil.AddFinalizer(dd, DataUploadDownloadFinalizer)
dd.DeletionTimestamp = &metav1.Time{Time: time.Now()}
return dd
}(),
checkFunc: func(dd velerov2alpha1api.DataDownload) bool {
return !controllerutil.ContainsFinalizer(&dd, dataUploadDownloadFinalizer)
return !controllerutil.ContainsFinalizer(&dd, DataUploadDownloadFinalizer)
},
},
}
Expand Down Expand Up @@ -428,7 +428,7 @@ func TestDataDownloadReconcile(t *testing.T) {
assert.Contains(t, dd.Status.Message, test.expectedStatusMsg)
}
if test.dd.Namespace == velerov1api.DefaultNamespace {
if controllerutil.ContainsFinalizer(test.dd, dataUploadDownloadFinalizer) {
if controllerutil.ContainsFinalizer(test.dd, DataUploadDownloadFinalizer) {
assert.True(t, true, apierrors.IsNotFound(err))
} else {
require.Nil(t, err)
Expand Down
Loading

0 comments on commit 7f3b7fe

Please sign in to comment.