From 7ddb25a0f7c78afab4fca1e7de473e62b17ba956 Mon Sep 17 00:00:00 2001 From: Phan Le Date: Wed, 28 Jun 2023 15:23:57 -0700 Subject: [PATCH] Fix migration stuck due to snapshot chains in old engine and new engine mismatch Longhorn-6215 Signed-off-by: Phan Le --- controller/volume_attachment_controller.go | 31 +++++++++++++--------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/controller/volume_attachment_controller.go b/controller/volume_attachment_controller.go index 86708a1194..27768b6d45 100644 --- a/controller/volume_attachment_controller.go +++ b/controller/volume_attachment_controller.go @@ -352,7 +352,7 @@ func (vac *VolumeAttachmentController) handleVolumeMigrationConfirmation(va *lon break } } - migratingEngineSnapSynced, err := vac.checkMigratingEngineSyncSnapshots(vol) + migratingEngineSnapSynced, err := vac.checkMigratingEngineSyncSnapshots(va, vol) if err != nil { vac.logger.WithError(err).Warn("Failed to check migrating engine snapshot status") return @@ -364,7 +364,7 @@ func (vac *VolumeAttachmentController) handleVolumeMigrationConfirmation(va *lon } } -func (vac *VolumeAttachmentController) checkMigratingEngineSyncSnapshots(vol *longhorn.Volume) (bool, error) { +func (vac *VolumeAttachmentController) checkMigratingEngineSyncSnapshots(va *longhorn.VolumeAttachment, vol *longhorn.Volume) (bool, error) { engines, err := vac.ds.ListVolumeEngines(vol.Name) if err != nil { return false, err @@ -390,26 +390,31 @@ func (vac *VolumeAttachmentController) checkMigratingEngineSyncSnapshots(vol *lo return false, fmt.Errorf("failed to find the migrating engine for volume %v", vol.Name) } - if !reflect.DeepEqual(oldEngine.Status.Snapshots, migratingEngine.Status.Snapshots) { + if !hasSameKeys(oldEngine.Status.Snapshots, migratingEngine.Status.Snapshots) { vac.logger.Infof("Volume migration (%v) is in progress for synchronizing snapshots", vol.Name) - // there is a chance that synchronizing engine snapshots does not finish and volume attachment controller will not receive changes anymore // check volumeAttachments again to ensure that migration will be finished - volumeAttachments, err := vac.ds.ListLonghornVolumeAttachmentByVolumeRO(migratingEngine.Spec.VolumeName) - if err != nil { - utilruntime.HandleError(fmt.Errorf("failed to list Longhorn VolumeAttachment of volume %v: %v", migratingEngine.Name, err)) - return false, err - } - - for _, va := range volumeAttachments { - vac.enqueueVolumeAttachmentAfter(va, 10*time.Second) - } + vac.enqueueVolumeAttachmentAfter(va, 10*time.Second) return false, nil } return true, nil } +func hasSameKeys(map1, map2 map[string]*longhorn.SnapshotInfo) bool { + if len(map1) != len(map2) { + return false + } + + for key := range map1 { + if _, ok := map2[key]; !ok { + return false + } + } + + return true +} + func (vac *VolumeAttachmentController) handleVolumeMigrationRollback(va *longhorn.VolumeAttachment, vol *longhorn.Volume) { // Nothing to rollback if vol.Spec.MigrationNodeID == "" {