Skip to content

Commit

Permalink
Do not create any instance manager pods if one is already existing
Browse files Browse the repository at this point in the history
An extra v2 instance manager requires an additional CPU core and 2 GiB of memory.
If the resources are insufficient, the instance manager pod will be recreated and
retried. To minimize unnecessary retries, do not create any new instance manager
pods if one is already running.

Longhorn 6001
Longhorn 8456

Signed-off-by: Derek Su <derek.su@suse.com>
  • Loading branch information
derekbit committed Apr 27, 2024
1 parent 4fe5c79 commit 9d96d0f
Show file tree
Hide file tree
Showing 2 changed files with 49 additions and 7 deletions.
9 changes: 9 additions & 0 deletions controller/instance_manager_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -532,6 +532,15 @@ func (imc *InstanceManagerController) handlePod(im *longhorn.InstanceManager) er
return err
}

// An instance manager pod for v2 volume need to consume huge pages, and disks managed by the
// pod is unable to managed by another pod. Therefore, if an instance manager pod is running on a node,
// an extra instance manager pod for v2 volume should not be created.
if types.IsDataEngineV2(im.Spec.DataEngine) {
if im.Spec.DesireState == longhorn.InstanceManagerStateStopped {
return nil
}
}

if err := imc.createInstanceManagerPod(im); err != nil {
return err
}
Expand Down
47 changes: 40 additions & 7 deletions controller/node_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -1122,13 +1122,30 @@ func (nc *NodeController) syncInstanceManagers(node *longhorn.Node) error {
if err := nc.ds.DeleteInstanceManager(im.Name); err != nil {
return err
}

if types.IsDataEngineV2(dataEngine) {
im, err := nc.ds.GetDefaultInstanceManagerByNodeRO(nc.controllerID, dataEngine)
if err != nil {
return errors.Wrap(err, "failed to get default instance manager for v2 data engine")
}

if im.Spec.DesireState != longhorn.InstanceManagerStateRunning {
nc.logger.Infof("Updating default instance manager %v to running state for v2 data engine", im.Name)
im.Spec.DesireState = longhorn.InstanceManagerStateRunning
if _, err := nc.ds.UpdateInstanceManager(im); err != nil {
return errors.Wrap(err, "failed to update default instance manager for v2 data engine")
}
}
}
}
}
if !defaultInstanceManagerCreated && imType == longhorn.InstanceManagerTypeAllInOne {
imName, err := types.GetInstanceManagerName(imType, node.Name, defaultInstanceManagerImage, string(dataEngine))
if err != nil {
return err
}

desireState := longhorn.InstanceManagerStateRunning
if types.IsDataEngineV2(dataEngine) {
disabled, err := nc.ds.IsV2DataEngineDisabledForNode(node.Name)
if err != nil {
Expand All @@ -1137,10 +1154,25 @@ func (nc *NodeController) syncInstanceManagers(node *longhorn.Node) error {
if disabled {
continue
}

ims, err := nc.ds.ListInstanceManagersBySelectorRO(nc.controllerID, "", longhorn.InstanceManagerTypeAllInOne, dataEngine)
if err != nil {
return errors.Wrap(err, "failed to list instance managers for v2 data engine")
}
foundRunningInstanceManager := false
for _, im := range ims {
if im.Status.CurrentState == longhorn.InstanceManagerStateRunning {
foundRunningInstanceManager = true
break
}
}
if foundRunningInstanceManager {
desireState = longhorn.InstanceManagerStateStopped
}
}

log.Infof("Creating default instance manager %v, image: %v, dataEngine: %v", imName, defaultInstanceManagerImage, dataEngine)
if _, err := nc.createInstanceManager(node, imName, defaultInstanceManagerImage, imType, dataEngine); err != nil {
log.Infof("Creating default instance manager %v, image: %v, dataEngine: %v, desireState: %v", imName, defaultInstanceManagerImage, dataEngine, desireState)
if _, err := nc.createInstanceManager(node, imName, defaultInstanceManagerImage, imType, dataEngine, desireState); err != nil {
return err
}
}
Expand All @@ -1149,16 +1181,17 @@ func (nc *NodeController) syncInstanceManagers(node *longhorn.Node) error {
return nil
}

func (nc *NodeController) createInstanceManager(node *longhorn.Node, imName, imImage string, imType longhorn.InstanceManagerType, dataEngine longhorn.DataEngineType) (*longhorn.InstanceManager, error) {
func (nc *NodeController) createInstanceManager(node *longhorn.Node, imName, imImage string, imType longhorn.InstanceManagerType, dataEngine longhorn.DataEngineType, desireState longhorn.InstanceManagerState) (*longhorn.InstanceManager, error) {
instanceManager := &longhorn.InstanceManager{
ObjectMeta: metav1.ObjectMeta{
Name: imName,
},
Spec: longhorn.InstanceManagerSpec{
Image: imImage,
NodeID: node.Name,
Type: imType,
DataEngine: dataEngine,
Image: imImage,
NodeID: node.Name,
Type: imType,
DataEngine: dataEngine,
DesireState: desireState,
},
}

Expand Down

0 comments on commit 9d96d0f

Please sign in to comment.