Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix delete dataupload datadownload failure when Velero uninstall #6689

Merged
merged 1 commit into from
Aug 25, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
200 changes: 157 additions & 43 deletions pkg/cmd/cli/uninstall/uninstall.go
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,8 @@ package uninstall
import (
"context"
"fmt"
"reflect"
"sync"
"time"

"github.com/pkg/errors"
Expand All @@ -40,6 +42,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/controller/controllerutil"

velerov1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v1"
velerov2alpha1api "github.com/vmware-tanzu/velero/pkg/apis/velero/v2alpha1"
"github.com/vmware-tanzu/velero/pkg/client"
"github.com/vmware-tanzu/velero/pkg/cmd"
"github.com/vmware-tanzu/velero/pkg/cmd/cli"
Expand All @@ -50,6 +53,12 @@ import (

var gracefulDeletionMaximumDuration = 1 * time.Minute

var resToDelete = []kbclient.ObjectList{
&velerov1api.RestoreList{},
&velerov2alpha1api.DataUploadList{},
&velerov2alpha1api.DataDownloadList{},
}

// uninstallOptions collects all the options for uninstalling Velero from a Kubernetes cluster.
type uninstallOptions struct {
wait bool // deprecated
Expand Down Expand Up @@ -167,11 +176,7 @@ func Run(ctx context.Context, kbClient kbclient.Client, namespace string) error
}

func deleteNamespace(ctx context.Context, kbClient kbclient.Client, namespace string) error {
// Deal with resources with attached finalizers to ensure proper handling of those finalizers.
if err := deleteResourcesWithFinalizer(ctx, kbClient, namespace); err != nil {
return errors.Wrap(err, "Fail to remove finalizer from restores")
}

// First check if it's already been deleted
ns := &corev1.Namespace{}
key := kbclient.ObjectKey{Name: namespace}
if err := kbClient.Get(ctx, key, ns); err != nil {
Expand All @@ -182,6 +187,11 @@ func deleteNamespace(ctx context.Context, kbClient kbclient.Client, namespace st
return err
}

// Deal with resources with attached finalizers to ensure proper handling of those finalizers.
if err := deleteResourcesWithFinalizer(ctx, kbClient, namespace); err != nil {
return errors.Wrap(err, "Fail to remove finalizer from restores")
}

if err := kbClient.Delete(ctx, ns); err != nil {
if apierrors.IsNotFound(err) {
fmt.Printf("Velero namespace %q does not exist, skipping.\n", namespace)
Expand Down Expand Up @@ -223,34 +233,46 @@ func deleteNamespace(ctx context.Context, kbClient kbclient.Client, namespace st
// 2. The controller may encounter errors while handling the finalizer, in such case, the controller will keep trying until it succeeds.
// So it is important to set a timeout, once the process exceed the timeout, we will forcedly delete the resources by removing the finalizer from them,
// otherwise the deletion process may get stuck indefinitely.
// 3. There is only restore finalizer supported as of v1.12. If any new finalizers are added in the future, the corresponding deletion logic can be
// 3. There is only resources finalizer supported as of v1.12. If any new finalizers are added in the future, the corresponding deletion logic can be
// incorporated into this function.
func deleteResourcesWithFinalizer(ctx context.Context, kbClient kbclient.Client, namespace string) error {
fmt.Println("Waiting for resource with attached finalizer to be deleted")
return deleteRestore(ctx, kbClient, namespace)
return deleteResources(ctx, kbClient, namespace)
}

func deleteRestore(ctx context.Context, kbClient kbclient.Client, namespace string) error {
// Check if restore crd exists, if it does not exist, return immediately.
func checkResources(ctx context.Context, kbClient kbclient.Client) error {
checkCRDs := []string{"restores.velero.io", "datauploads.velero.io", "datadownloads.velero.io"}
var err error
v1crd := &apiextv1.CustomResourceDefinition{}
key := kbclient.ObjectKey{Name: "restores.velero.io"}
if err = kbClient.Get(ctx, key, v1crd); err != nil {
if apierrors.IsNotFound(err) {
return nil
} else {
return errors.Wrap(err, "Error getting restore crd")
for _, crd := range checkCRDs {
key := kbclient.ObjectKey{Name: crd}
if err = kbClient.Get(ctx, key, v1crd); err != nil {
if apierrors.IsNotFound(err) {
return nil
} else {
return errors.Wrapf(err, "Error getting %s crd", crd)
}
}
}
return nil
}

func deleteResources(ctx context.Context, kbClient kbclient.Client, namespace string) error {
// Check if resources crd exists, if it does not exist, return immediately.
err := checkResources(ctx, kbClient)
if err != nil {
return err
}

// First attempt to gracefully delete all the restore within a specified time frame, If the process exceeds the timeout limit,
// First attempt to gracefully delete all the resources within a specified time frame, If the process exceeds the timeout limit,
// it is likely that there may be errors during the finalization of restores. In such cases, we should proceed with forcefully deleting the restores.
err = gracefullyDeleteRestore(ctx, kbClient, namespace)
err = gracefullyDeleteResources(ctx, kbClient, namespace)
if err != nil && err != wait.ErrWaitTimeout {
return errors.Wrap(err, "Error deleting restores")
return errors.Wrap(err, "Error deleting resources")
}

if err == wait.ErrWaitTimeout {
err = forcedlyDeleteRestore(ctx, kbClient, namespace)
err = forcedlyDeleteResources(ctx, kbClient, namespace)
if err != nil {
return errors.Wrap(err, "Error deleting restores")
}
Expand All @@ -259,30 +281,89 @@ func deleteRestore(ctx context.Context, kbClient kbclient.Client, namespace stri
return nil
}

func gracefullyDeleteRestore(ctx context.Context, kbClient kbclient.Client, namespace string) error {
func gracefullyDeleteResources(ctx context.Context, kbClient kbclient.Client, namespace string) error {
errorChan := make(chan error)

var wg sync.WaitGroup
wg.Add(len(resToDelete))

for i := range resToDelete {
go func(index int) {
defer wg.Done()
errorChan <- gracefullyDeleteResource(ctx, kbClient, namespace, resToDelete[index])
}(i)
}

go func() {
wg.Wait()
close(errorChan)
}()

for err := range errorChan {
if err != nil {
return err
}
}

return waitDeletingResources(ctx, kbClient, namespace)
}

func gracefullyDeleteResource(ctx context.Context, kbClient kbclient.Client, namespace string, list kbclient.ObjectList) error {
var err error
restoreList := &velerov1api.RestoreList{}
if err = kbClient.List(ctx, restoreList, &kbclient.ListOptions{Namespace: namespace}); err != nil {
return errors.Wrap(err, "Error getting restores during graceful deletion")

if err = kbClient.List(ctx, list, &kbclient.ListOptions{Namespace: namespace}); err != nil {
return errors.Wrap(err, "Error getting resources during graceful deletion")
}

for i := range restoreList.Items {
if err = kbClient.Delete(ctx, &restoreList.Items[i]); err != nil {
var objectsToDelete []kbclient.Object
items := reflect.ValueOf(list).Elem().FieldByName("Items")

for i := 0; i < items.Len(); i++ {
item := items.Index(i).Addr().Interface()
// Type assertion to cast item to the appropriate type
switch typedItem := item.(type) {
case *velerov1api.Restore:
objectsToDelete = append(objectsToDelete, typedItem)
case *velerov2alpha1api.DataUpload:
objectsToDelete = append(objectsToDelete, typedItem)
case *velerov2alpha1api.DataDownload:
objectsToDelete = append(objectsToDelete, typedItem)
default:
return errors.New("Unsupported resource type")
}
}

// Delete collected resources in a batch
for _, resource := range objectsToDelete {
if err = kbClient.Delete(ctx, resource); err != nil {
if apierrors.IsNotFound(err) {
continue
}
return errors.Wrap(err, "Error deleting restores during graceful deletion")
return errors.Wrap(err, "Error deleting resources during graceful deletion")
}
}
return err
}

func waitDeletingResources(ctx context.Context, kbClient kbclient.Client, namespace string) error {
// Wait for the deletion of all the restores within a specified time frame
err = wait.PollImmediate(time.Second, gracefulDeletionMaximumDuration, func() (bool, error) {
err := wait.PollImmediate(time.Second, gracefulDeletionMaximumDuration, func() (bool, error) {
restoreList := &velerov1api.RestoreList{}
if errList := kbClient.List(ctx, restoreList, &kbclient.ListOptions{Namespace: namespace}); errList != nil {
return false, errList
}

if len(restoreList.Items) > 0 {
dataUploadList := &velerov2alpha1api.DataUploadList{}
if errList := kbClient.List(ctx, dataUploadList, &kbclient.ListOptions{Namespace: namespace}); errList != nil {
return false, errList
}

dataDownloadList := &velerov2alpha1api.DataDownloadList{}
if errList := kbClient.List(ctx, dataDownloadList, &kbclient.ListOptions{Namespace: namespace}); errList != nil {
return false, errList
}

if len(restoreList.Items)+len(dataUploadList.Items)+len(dataDownloadList.Items) > 0 {
fmt.Print(".")
return false, nil
} else {
Expand All @@ -293,10 +374,10 @@ func gracefullyDeleteRestore(ctx context.Context, kbClient kbclient.Client, name
return err
}

func forcedlyDeleteRestore(ctx context.Context, kbClient kbclient.Client, namespace string) error {
func forcedlyDeleteResources(ctx context.Context, kbClient kbclient.Client, namespace string) error {
// Delete velero deployment first in case:
// 1. finalizers will be added back by restore controller after they are removed at next step;
// 2. new restores attached with finalizer will be created by restore controller after we remove all the restores' finalizer at next step;
// 1. finalizers will be added back by resources related controller after they are removed at next step;
// 2. new resources attached with finalizer will be created by controller after we remove all the resources' finalizer at next step;
deploy := &appsv1api.Deployment{
ObjectMeta: metav1.ObjectMeta{
Namespace: "velero",
Expand Down Expand Up @@ -330,23 +411,56 @@ func forcedlyDeleteRestore(ctx context.Context, kbClient kbclient.Client, namesp
if err != nil {
return errors.Wrap(err, "Error deleting velero deployment during force deletion")
}
return removeResourcesFinalizer(ctx, kbClient, namespace)
}

// Remove all the restores' finalizer so they can be deleted during the deletion of velero namespace.
restoreList := &velerov1api.RestoreList{}
if err := kbClient.List(ctx, restoreList, &kbclient.ListOptions{Namespace: namespace}); err != nil {
return errors.Wrap(err, "Error getting restores during force deletion")
func removeResourcesFinalizer(ctx context.Context, kbClient kbclient.Client, namespace string) error {
for i := range resToDelete {
if err := removeResourceFinalizer(ctx, kbClient, namespace, resToDelete[i]); err != nil {
return err
}
}
return nil
}

for i := range restoreList.Items {
if controllerutil.ContainsFinalizer(&restoreList.Items[i], controller.ExternalResourcesFinalizer) {
update := &restoreList.Items[i]
original := update.DeepCopy()
controllerutil.RemoveFinalizer(update, controller.ExternalResourcesFinalizer)
if err := kubeutil.PatchResource(original, update, kbClient); err != nil {
return errors.Wrap(err, "Error removing restore finalizer during force deletion")
}
func removeResourceFinalizer(ctx context.Context, kbClient kbclient.Client, namespace string, resourceList kbclient.ObjectList) error {
listOptions := &kbclient.ListOptions{Namespace: namespace}
if err := kbClient.List(ctx, resourceList, listOptions); err != nil {
return errors.Wrap(err, fmt.Sprintf("Error getting resources of type %T during force deletion", resourceList))
}

items := reflect.ValueOf(resourceList).Elem().FieldByName("Items")
var err error
for i := 0; i < items.Len(); i++ {
item := items.Index(i).Addr().Interface()
// Type assertion to cast item to the appropriate type
switch typedItem := item.(type) {
case *velerov1api.Restore:
err = removeFinalizerForObject(typedItem, controller.ExternalResourcesFinalizer, kbClient)
case *velerov2alpha1api.DataUpload:
err = removeFinalizerForObject(typedItem, controller.DataUploadDownloadFinalizer, kbClient)
case *velerov2alpha1api.DataDownload:
err = removeFinalizerForObject(typedItem, controller.DataUploadDownloadFinalizer, kbClient)
default:
err = errors.Errorf("Unsupported resource type %T", typedItem)
}
if err != nil {
return err
}
}

return nil
}

func removeFinalizerForObject(obj kbclient.Object, finalizer string, kbClient kbclient.Client) error {
if controllerutil.ContainsFinalizer(obj, finalizer) {
update := obj.DeepCopyObject().(kbclient.Object)
original := obj.DeepCopyObject().(kbclient.Object)

controllerutil.RemoveFinalizer(update, finalizer)
if err := kubeutil.PatchResource(original, update, kbClient); err != nil {
return errors.Wrap(err, fmt.Sprintf("Error removing finalizer %q during force deletion", finalizer))
}
}
return nil
}
10 changes: 5 additions & 5 deletions pkg/controller/data_download_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,9 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
// Add finalizer
// Logic for clear resources when datadownload been deleted
if dd.DeletionTimestamp.IsZero() { // add finalizer for all cr at beginning
if !isDataDownloadInFinalState(dd) && !controllerutil.ContainsFinalizer(dd, dataUploadDownloadFinalizer) {
if !isDataDownloadInFinalState(dd) && !controllerutil.ContainsFinalizer(dd, DataUploadDownloadFinalizer) {
succeeded, err := r.exclusiveUpdateDataDownload(ctx, dd, func(dd *velerov2alpha1api.DataDownload) {
controllerutil.AddFinalizer(dd, dataUploadDownloadFinalizer)
controllerutil.AddFinalizer(dd, DataUploadDownloadFinalizer)
})
if err != nil {
log.Errorf("failed to add finalizer with error %s for %s/%s", err.Error(), dd.Namespace, dd.Name)
Expand All @@ -135,7 +135,7 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
return ctrl.Result{Requeue: true}, nil
}
}
} else if controllerutil.ContainsFinalizer(dd, dataUploadDownloadFinalizer) && !dd.Spec.Cancel && !isDataDownloadInFinalState(dd) {
} else if controllerutil.ContainsFinalizer(dd, DataUploadDownloadFinalizer) && !dd.Spec.Cancel && !isDataDownloadInFinalState(dd) {
// when delete cr we need to clear up internal resources created by Velero, here we use the cancel mechanism
// to help clear up resources instead of clear them directly in case of some conflict with Expose action
if err := UpdateDataDownloadWithRetry(ctx, r.client, req.NamespacedName, log, func(dataDownload *velerov2alpha1api.DataDownload) {
Expand Down Expand Up @@ -309,9 +309,9 @@ func (r *DataDownloadReconciler) Reconcile(ctx context.Context, req ctrl.Request
} else {
// put the finilizer remove action here for all cr will goes to the final status, we could check finalizer and do remove action in final status
// instead of intermediate state
if isDataDownloadInFinalState(dd) && !dd.DeletionTimestamp.IsZero() && controllerutil.ContainsFinalizer(dd, dataUploadDownloadFinalizer) {
if isDataDownloadInFinalState(dd) && !dd.DeletionTimestamp.IsZero() && controllerutil.ContainsFinalizer(dd, DataUploadDownloadFinalizer) {
original := dd.DeepCopy()
controllerutil.RemoveFinalizer(dd, dataUploadDownloadFinalizer)
controllerutil.RemoveFinalizer(dd, DataUploadDownloadFinalizer)
if err := r.client.Patch(ctx, dd, client.MergeFrom(original)); err != nil {
log.WithError(err).Error("error to remove finalizer")
}
Expand Down
8 changes: 4 additions & 4 deletions pkg/controller/data_download_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -299,7 +299,7 @@ func TestDataDownloadReconcile(t *testing.T) {
name: "dataDownload with enabled cancel",
dd: func() *velerov2alpha1api.DataDownload {
dd := dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseAccepted).Result()
controllerutil.AddFinalizer(dd, dataUploadDownloadFinalizer)
controllerutil.AddFinalizer(dd, DataUploadDownloadFinalizer)
dd.DeletionTimestamp = &metav1.Time{Time: time.Now()}
return dd
}(),
Expand All @@ -312,12 +312,12 @@ func TestDataDownloadReconcile(t *testing.T) {
name: "dataDownload with remove finalizer and should not be retrieved",
dd: func() *velerov2alpha1api.DataDownload {
dd := dataDownloadBuilder().Phase(velerov2alpha1api.DataDownloadPhaseFailed).Cancel(true).Result()
controllerutil.AddFinalizer(dd, dataUploadDownloadFinalizer)
controllerutil.AddFinalizer(dd, DataUploadDownloadFinalizer)
dd.DeletionTimestamp = &metav1.Time{Time: time.Now()}
return dd
}(),
checkFunc: func(dd velerov2alpha1api.DataDownload) bool {
return !controllerutil.ContainsFinalizer(&dd, dataUploadDownloadFinalizer)
return !controllerutil.ContainsFinalizer(&dd, DataUploadDownloadFinalizer)
},
},
}
Expand Down Expand Up @@ -428,7 +428,7 @@ func TestDataDownloadReconcile(t *testing.T) {
assert.Contains(t, dd.Status.Message, test.expectedStatusMsg)
}
if test.dd.Namespace == velerov1api.DefaultNamespace {
if controllerutil.ContainsFinalizer(test.dd, dataUploadDownloadFinalizer) {
if controllerutil.ContainsFinalizer(test.dd, DataUploadDownloadFinalizer) {
assert.True(t, true, apierrors.IsNotFound(err))
} else {
require.Nil(t, err)
Expand Down
Loading
Loading