Skip to content

Commit

Permalink
Try to reattach volume if volume is detached or engine is dead unexpe…
Browse files Browse the repository at this point in the history
…ctedly

Reattach volume if
- volume is detached unexpectedly and there are still healthy replicas
- engine dead unexpectedly and there are still healthy replicas when the volume is not attached

Longhorn 6155

Signed-off-by: Derek Su <derek.su@suse.com>
  • Loading branch information
derekbit committed Jun 24, 2023
1 parent ec130dc commit a74ad4a
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 15 deletions.
6 changes: 3 additions & 3 deletions constant/events.go
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ const (
EventReasonUnknown = "Unknown"
EventReasonFailedEviction = "FailedEviction"

EventReasonDetachedUnexpectly = "DetachedUnexpectly"
EventReasonRemount = "Remount"
EventReasonAutoSalvaged = "AutoSalvaged"
EventReasonDetachedUnexpectedly = "DetachedUnexpectedly"
EventReasonRemount = "Remount"
EventReasonAutoSalvaged = "AutoSalvaged"

EventReasonFetching = "Fetching"
EventReasonFetched = "Fetched"
Expand Down
26 changes: 15 additions & 11 deletions controller/volume_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -1330,19 +1330,23 @@ func (c *VolumeController) ReconcileVolumeState(v *longhorn.Volume, es map[strin
return nil
}

// reattach volume if detached unexpected and there are still healthy replicas
if e.Status.CurrentState == longhorn.InstanceStateError && v.Status.CurrentNodeID != "" {
log.Warn("Reattaching the volume since engine of volume dead unexpectedly")
msg := fmt.Sprintf("Engine of volume %v dead unexpectedly, reattach the volume", v.Name)
c.eventRecorder.Event(v, corev1.EventTypeWarning, constant.EventReasonDetachedUnexpectly, msg)
e.Spec.LogRequested = true
for _, r := range rs {
if r.Status.CurrentState == longhorn.InstanceStateRunning {
r.Spec.LogRequested = true
rs[r.Name] = r
// Reattach volume if
// - volume is detached unexpectedly and there are still healthy replicas
// - engine dead unexpectedly and there are still healthy replicas when the volume is not attached
if e.Status.CurrentState == longhorn.InstanceStateError {
if v.Status.CurrentNodeID != "" || (v.Spec.NodeID != "" && v.Status.CurrentNodeID == "" && v.Status.State != longhorn.VolumeStateAttached) {
log.Warn("Reattaching the volume since engine of volume dead unexpectedly")
msg := fmt.Sprintf("Engine of volume %v dead unexpectedly, reattach the volume", v.Name)
c.eventRecorder.Event(v, corev1.EventTypeWarning, constant.EventReasonDetachedUnexpectedly, msg)
e.Spec.LogRequested = true
for _, r := range rs {
if r.Status.CurrentState == longhorn.InstanceStateRunning {
r.Spec.LogRequested = true
rs[r.Name] = r
}
}
v.Status.Robustness = longhorn.VolumeRobustnessFaulted
}
v.Status.Robustness = longhorn.VolumeRobustnessFaulted
}
}

Expand Down
2 changes: 1 addition & 1 deletion manager/volume.go
Original file line number Diff line number Diff line change
Expand Up @@ -754,7 +754,7 @@ func (m *VolumeManager) EngineUpgrade(volumeName, image string) (v *longhorn.Vol
return nil, err
}
if image != defaultEngineImage {
return nil, fmt.Errorf("updrading to %v is not allowed. "+
return nil, fmt.Errorf("upgrading to %v is not allowed. "+
"Only allow to upgrade to the default engine image %v because the setting "+
"`Concurrent Automatic Engine Upgrade Per Node Limit` is greater than 0",
image, defaultEngineImage)
Expand Down

0 comments on commit a74ad4a

Please sign in to comment.