Skip to content

Commit

Permalink
Do not create any instance manager pods for v2 data engine if one is …
Browse files Browse the repository at this point in the history
…already existing

An extra instance manager pod for v2 data engine requires an additional CPU core
and 2 GiB of memory. If the resources are insufficient, the instance manager pod
will be recreated and retried. To minimize unnecessary retries, do not create any
new instance manager pods if one is already running.

Longhorn 8456

Signed-off-by: Derek Su <derek.su@suse.com>
  • Loading branch information
derekbit committed May 16, 2024
1 parent 6c7ab5a commit 1abde1f
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 7 deletions.
9 changes: 9 additions & 0 deletions controller/instance_manager_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -537,6 +537,15 @@ func (imc *InstanceManagerController) handlePod(im *longhorn.InstanceManager) er
return err
}

// An instance manager pod for v2 volume need to consume huge pages, and disks managed by the
// pod is unable to managed by another pod. Therefore, if an instance manager pod is running on a node,
// an extra instance manager pod for v2 volume should not be created.
if types.IsDataEngineV2(im.Spec.DataEngine) {
if im.Spec.DesireState == longhorn.InstanceManagerStateStopped {
return nil
}
}

if err := imc.createInstanceManagerPod(im); err != nil {
return err
}
Expand Down
47 changes: 40 additions & 7 deletions controller/node_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -1126,13 +1126,30 @@ func (nc *NodeController) syncInstanceManagers(node *longhorn.Node) error {
if err := nc.ds.DeleteInstanceManager(im.Name); err != nil {
return err
}

if types.IsDataEngineV2(dataEngine) {
im, err := nc.ds.GetDefaultInstanceManagerByNodeRO(nc.controllerID, dataEngine)
if err != nil {
return errors.Wrap(err, "failed to get default instance manager for v2 data engine")
}

if im.Spec.DesireState != longhorn.InstanceManagerStateRunning {
nc.logger.Infof("Updating default instance manager %v to running state for v2 data engine", im.Name)
im.Spec.DesireState = longhorn.InstanceManagerStateRunning
if _, err := nc.ds.UpdateInstanceManager(im); err != nil {
return errors.Wrap(err, "failed to update default instance manager for v2 data engine")
}
}
}
}
}
if !defaultInstanceManagerCreated && imType == longhorn.InstanceManagerTypeAllInOne {
imName, err := types.GetInstanceManagerName(imType, node.Name, defaultInstanceManagerImage, string(dataEngine))
if err != nil {
return err
}

desireState := longhorn.InstanceManagerStateRunning
if types.IsDataEngineV2(dataEngine) {
disabled, err := nc.ds.IsV2DataEngineDisabledForNode(node.Name)
if err != nil {
Expand All @@ -1141,10 +1158,25 @@ func (nc *NodeController) syncInstanceManagers(node *longhorn.Node) error {
if disabled {
continue
}

ims, err := nc.ds.ListInstanceManagersBySelectorRO(nc.controllerID, "", longhorn.InstanceManagerTypeAllInOne, dataEngine)
if err != nil {
return errors.Wrap(err, "failed to list instance managers for v2 data engine")
}
foundRunningInstanceManager := false
for _, im := range ims {
if im.Status.CurrentState == longhorn.InstanceManagerStateRunning {
foundRunningInstanceManager = true
break
}
}
if foundRunningInstanceManager {
desireState = longhorn.InstanceManagerStateStopped
}
}

log.Infof("Creating default instance manager %v, image: %v, dataEngine: %v", imName, defaultInstanceManagerImage, dataEngine)
if _, err := nc.createInstanceManager(node, imName, defaultInstanceManagerImage, imType, dataEngine); err != nil {
log.Infof("Creating default instance manager %v, image: %v, dataEngine: %v, desireState: %v", imName, defaultInstanceManagerImage, dataEngine, desireState)
if _, err := nc.createInstanceManager(node, imName, defaultInstanceManagerImage, imType, dataEngine, desireState); err != nil {
return err
}
}
Expand All @@ -1153,16 +1185,17 @@ func (nc *NodeController) syncInstanceManagers(node *longhorn.Node) error {
return nil
}

func (nc *NodeController) createInstanceManager(node *longhorn.Node, imName, imImage string, imType longhorn.InstanceManagerType, dataEngine longhorn.DataEngineType) (*longhorn.InstanceManager, error) {
func (nc *NodeController) createInstanceManager(node *longhorn.Node, imName, imImage string, imType longhorn.InstanceManagerType, dataEngine longhorn.DataEngineType, desireState longhorn.InstanceManagerState) (*longhorn.InstanceManager, error) {
instanceManager := &longhorn.InstanceManager{
ObjectMeta: metav1.ObjectMeta{
Name: imName,
},
Spec: longhorn.InstanceManagerSpec{
Image: imImage,
NodeID: node.Name,
Type: imType,
DataEngine: dataEngine,
Image: imImage,
NodeID: node.Name,
Type: imType,
DataEngine: dataEngine,
DesireState: desireState,
},
}

Expand Down

0 comments on commit 1abde1f

Please sign in to comment.