From ec84e71fbdc02027d0178d0a22030ce0bf5ebc78 Mon Sep 17 00:00:00 2001 From: Phan Le Date: Fri, 31 Jan 2025 15:04:32 -0800 Subject: [PATCH] fix: do not panic when looking up BackupVolume of non-existing Volume 1. One Volume can have multiple BackupVolumes, need to check all BackupVolumes when finding backups 2. It is totally valid to look up BackupVolumes of non-existing Volume. A deleted Volume can still have Backups in the cluster. Do not panic Signed-off-by: Phan Le --- api/backup.go | 6 +-- csi/controller_server.go | 100 +++++++++++++++++++++------------------ manager/engine.go | 4 +- 3 files changed, 59 insertions(+), 51 deletions(-) diff --git a/api/backup.go b/api/backup.go index 4abea91b82..a96e196305 100644 --- a/api/backup.go +++ b/api/backup.go @@ -317,7 +317,7 @@ func (s *Server) BackupGet(w http.ResponseWriter, req *http.Request) error { } backupVolumeName := mux.Vars(req)["backupVolumeName"] - backup, err := s.m.GetBackup(input.Name, backupVolumeName) + backup, err := s.m.GetBackup(input.Name) if err != nil { return errors.Wrapf(err, "failed to get backup '%v' of volume '%v'", input.Name, backupVolumeName) } @@ -344,13 +344,13 @@ func (s *Server) BackupDelete(w http.ResponseWriter, req *http.Request) error { backupVolumeName := mux.Vars(req)["backupVolumeName"] - backup, err := s.m.GetBackup(input.Name, backupVolumeName) + backup, err := s.m.GetBackup(input.Name) if err != nil { logrus.WithError(err).Warnf("failed to get backup '%v' of volume '%v'", input.Name, backupVolumeName) } if backup != nil { - if err := s.m.DeleteBackup(input.Name, backupVolumeName); err != nil { + if err := s.m.DeleteBackup(input.Name); err != nil { return errors.Wrapf(err, "failed to delete backup '%v' of volume '%v'", input.Name, backupVolumeName) } } diff --git a/csi/controller_server.go b/csi/controller_server.go index 8d6e0f3369..722409dd67 100644 --- a/csi/controller_server.go +++ b/csi/controller_server.go @@ -126,17 +126,25 @@ func (cs *ControllerServer) CreateVolume(ctx context.Context, req *csi.CreateVol if id == "" { return nil, status.Errorf(codes.NotFound, "volume source snapshot %v is not found", snapshot.SnapshotId) } - backupVolume, backupName := sourceVolumeName, id - bv, err := cs.getBackupVolume(backupVolume) + backupName := id + bvs, err := cs.getBackupVolumes(sourceVolumeName) if err != nil { - return nil, status.Errorf(codes.Internal, "failed to retrieve backup volume %s: %v", backupVolume, err) + return nil, status.Errorf(codes.Internal, "failed to retrieve backup volumes of volume %v: %v", sourceVolumeName, err) } - if bv == nil { - return nil, status.Errorf(codes.NotFound, "failed to restore CSI snapshot %s backup volume %s unavailable", snapshot.SnapshotId, backupVolume) + if len(bvs) == 0 { + return nil, status.Errorf(codes.NotFound, "failed to restore CSI snapshot %v of volume %v: there is no backup volume", snapshot.SnapshotId, sourceVolumeName) } - - backup, err := cs.apiClient.BackupVolume.ActionBackupGet(bv, &longhornclient.BackupInput{Name: backupName}) - if err != nil { + var backup *longhornclient.Backup + for _, bv := range bvs { + backup, err = cs.apiClient.BackupVolume.ActionBackupGet(bv, &longhornclient.BackupInput{Name: backupName}) + if err != nil { + return nil, status.Errorf(codes.NotFound, "failed to restore CSI snapshot %v : failed to get backup %v: %v", snapshot.SnapshotId, backupName, err) + } + if backup != nil { + break + } + } + if backup == nil { return nil, status.Errorf(codes.NotFound, "failed to restore CSI snapshot %v backup %s unavailable", snapshot.SnapshotId, backupName) } @@ -268,29 +276,22 @@ func (cs *ControllerServer) CreateVolume(ctx context.Context, req *csi.CreateVol }, nil } -func (cs *ControllerServer) getBackupVolume(volumeName string) (*longhornclient.BackupVolume, error) { +func (cs *ControllerServer) getBackupVolumes(volumeName string) ([]*longhornclient.BackupVolume, error) { + bvs := []*longhornclient.BackupVolume{} log := cs.log.WithFields(logrus.Fields{"function": "getBackupVolume"}) list, err := cs.apiClient.BackupVolume.List(&longhornclient.ListOpts{}) if err != nil { return nil, err } - if len(list.Data) == 0 { - return nil, nil - } - - vol, err := cs.apiClient.Volume.ById(volumeName) - if err != nil { - return nil, errors.Wrapf(err, "getBackupVolume: fail to get source volume %v", volumeName) - } - for _, bv := range list.Data { - if bv.BackupTargetName == vol.BackupTargetName && bv.VolumeName == volumeName { - return &bv, nil + if bv.VolumeName == volumeName { + bvs = append(bvs, &bv) } } - log.Debugf("cannot find backup volume with backup target %s and volume %s", vol.BackupTargetName, volumeName) - // return no error if no backup volume found as backup is not completed and backup volume is not created yet. - return nil, nil + if len(bvs) == 0 { + log.Debugf("cannot find backup volume for volume %s", volumeName) + } + return bvs, nil } func (cs *ControllerServer) checkAndPrepareBackingImage(volumeName, backingImageName string, volumeParameters map[string]string, dataEngine string) error { @@ -1032,7 +1033,7 @@ func (cs *ControllerServer) DeleteSnapshot(ctx context.Context, req *csi.DeleteS if id == "" { return nil, status.Errorf(codes.NotFound, "volume source snapshot %v is not found", snapshotID) } - if err := cs.cleanupBackupVolume(sourceVolumeName, id); err != nil { + if err := cs.cleanupBackup(sourceVolumeName, id); err != nil { return nil, status.Error(codes.Internal, err.Error()) } } @@ -1069,19 +1070,28 @@ func (cs *ControllerServer) cleanupSnapshot(sourceVolumeName, id string) error { return nil } -func (cs *ControllerServer) cleanupBackupVolume(sourceVolumeName, id string) error { +func (cs *ControllerServer) cleanupBackup(sourceVolumeName, id string) error { backupVolumeName, backupName := sourceVolumeName, id - backupVolume, err := cs.getBackupVolume(backupVolumeName) + backupVolumes, err := cs.getBackupVolumes(backupVolumeName) if err != nil { return err } - if backupVolume != nil && backupVolume.Name != "" { - if _, err = cs.apiClient.BackupVolume.ActionBackupDelete(backupVolume, &longhornclient.BackupInput{ - Name: backupName, - }); err != nil { - return err + for _, bv := range backupVolumes { + // The BackupDelete API actually doesn't care about bv when deleting a backup. + // The bv is there for backward compatibility. + // Any bv will work + if bv.Name != "" { + _, err = cs.apiClient.BackupVolume.ActionBackupDelete(bv, &longhornclient.BackupInput{ + Name: backupName, + }) + if err != nil { + return err + } else { + return nil + } } } + return nil } @@ -1285,30 +1295,28 @@ func (cs *ControllerServer) waitForBackupControllerSync(volumeName, snapshotName // snapshot. It does not rely on volume.BackupStatus because volume.BackupStatus.Snapshot is only set if the backup // successfully initializes and after the backup monitor has had a chance to sync. We want to retrieve the backup // (and in particular, its name) as quickly as possible and in any state. +// Note: if the backup doesn't exist it will return nil for the backup and nil for the error func (cs *ControllerServer) getBackup(volumeName, snapshotName string) (*longhornclient.Backup, error) { - // Successfully returns an empty BackupVolume with volumeName even if one doesn't exist. - backupVolume, err := cs.getBackupVolume(volumeName) - if err != nil { - return nil, status.Error(codes.Internal, err.Error()) - } - if backupVolume == nil { - return nil, nil - } - - backupListOutput, err := cs.apiClient.BackupVolume.ActionBackupList(backupVolume) + backupVolumes, err := cs.getBackupVolumes(volumeName) if err != nil { return nil, status.Error(codes.Internal, err.Error()) } var backup *longhornclient.Backup - for _, b := range backupListOutput.Data { - if b.SnapshotName == snapshotName { - backup = &b - break + for _, bv := range backupVolumes { + backupListOutput, err := cs.apiClient.BackupVolume.ActionBackupList(bv) + if err != nil { + return nil, status.Error(codes.Internal, err.Error()) + } + for _, b := range backupListOutput.Data { + if b.SnapshotName == snapshotName { + backup = &b + return backup, nil + } } } - return backup, nil + return nil, nil } func (cs *ControllerServer) validateVolumeCapabilities(volumeCaps []*csi.VolumeCapability) error { diff --git a/manager/engine.go b/manager/engine.go index 1157e266a3..58ff97f8c8 100644 --- a/manager/engine.go +++ b/manager/engine.go @@ -527,10 +527,10 @@ func (m *VolumeManager) ListBackupsForBackupVolumeSorted(backupVolumeName string return backups, nil } -func (m *VolumeManager) GetBackup(backupName, volumeName string) (*longhorn.Backup, error) { +func (m *VolumeManager) GetBackup(backupName string) (*longhorn.Backup, error) { return m.ds.GetBackupRO(backupName) } -func (m *VolumeManager) DeleteBackup(backupName, volumeName string) error { +func (m *VolumeManager) DeleteBackup(backupName string) error { return m.ds.DeleteBackup(backupName) }