Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix migration stuck due to snapshot chains in old engine and new engine mismatch #2035

Merged
merged 2 commits into from
Jun 29, 2023
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 18 additions & 13 deletions controller/volume_attachment_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -352,7 +352,7 @@ func (vac *VolumeAttachmentController) handleVolumeMigrationConfirmation(va *lon
break
}
}
migratingEngineSnapSynced, err := vac.checkMigratingEngineSyncSnapshots(vol)
migratingEngineSnapSynced, err := vac.checkMigratingEngineSyncSnapshots(va, vol)
if err != nil {
vac.logger.WithError(err).Warn("Failed to check migrating engine snapshot status")
return
Expand All @@ -364,7 +364,7 @@ func (vac *VolumeAttachmentController) handleVolumeMigrationConfirmation(va *lon
}
}

func (vac *VolumeAttachmentController) checkMigratingEngineSyncSnapshots(vol *longhorn.Volume) (bool, error) {
func (vac *VolumeAttachmentController) checkMigratingEngineSyncSnapshots(va *longhorn.VolumeAttachment, vol *longhorn.Volume) (bool, error) {
engines, err := vac.ds.ListVolumeEngines(vol.Name)
if err != nil {
return false, err
Expand All @@ -390,26 +390,31 @@ func (vac *VolumeAttachmentController) checkMigratingEngineSyncSnapshots(vol *lo
return false, fmt.Errorf("failed to find the migrating engine for volume %v", vol.Name)
}

if !reflect.DeepEqual(oldEngine.Status.Snapshots, migratingEngine.Status.Snapshots) {
if !hasSameKeys(oldEngine.Status.Snapshots, migratingEngine.Status.Snapshots) {
vac.logger.Infof("Volume migration (%v) is in progress for synchronizing snapshots", vol.Name)

// there is a chance that synchronizing engine snapshots does not finish and volume attachment controller will not receive changes anymore
// check volumeAttachments again to ensure that migration will be finished
volumeAttachments, err := vac.ds.ListLonghornVolumeAttachmentByVolumeRO(migratingEngine.Spec.VolumeName)
if err != nil {
utilruntime.HandleError(fmt.Errorf("failed to list Longhorn VolumeAttachment of volume %v: %v", migratingEngine.Name, err))
return false, err
}

for _, va := range volumeAttachments {
vac.enqueueVolumeAttachmentAfter(va, 10*time.Second)
}
vac.enqueueVolumeAttachmentAfter(va, 10*time.Second)
return false, nil
}

return true, nil
}

func hasSameKeys(map1, map2 map[string]*longhorn.SnapshotInfo) bool {
if len(map1) != len(map2) {
return false
}

for key := range map1 {
if _, ok := map2[key]; !ok {
return false
}
}

return true
}

func (vac *VolumeAttachmentController) handleVolumeMigrationRollback(va *longhorn.VolumeAttachment, vol *longhorn.Volume) {
// Nothing to rollback
if vol.Spec.MigrationNodeID == "" {
Expand Down