From 1abde1f45a719af0fbcfed12a06a5abf65df6638 Mon Sep 17 00:00:00 2001 From: Derek Su Date: Tue, 27 Feb 2024 05:57:10 +0000 Subject: [PATCH] Do not create any instance manager pods for v2 data engine if one is already existing An extra instance manager pod for v2 data engine requires an additional CPU core and 2 GiB of memory. If the resources are insufficient, the instance manager pod will be recreated and retried. To minimize unnecessary retries, do not create any new instance manager pods if one is already running. Longhorn 8456 Signed-off-by: Derek Su --- controller/instance_manager_controller.go | 9 +++++ controller/node_controller.go | 47 +++++++++++++++++++---- 2 files changed, 49 insertions(+), 7 deletions(-) diff --git a/controller/instance_manager_controller.go b/controller/instance_manager_controller.go index 10e568f109..84190737af 100644 --- a/controller/instance_manager_controller.go +++ b/controller/instance_manager_controller.go @@ -537,6 +537,15 @@ func (imc *InstanceManagerController) handlePod(im *longhorn.InstanceManager) er return err } + // An instance manager pod for v2 volume need to consume huge pages, and disks managed by the + // pod is unable to managed by another pod. Therefore, if an instance manager pod is running on a node, + // an extra instance manager pod for v2 volume should not be created. + if types.IsDataEngineV2(im.Spec.DataEngine) { + if im.Spec.DesireState == longhorn.InstanceManagerStateStopped { + return nil + } + } + if err := imc.createInstanceManagerPod(im); err != nil { return err } diff --git a/controller/node_controller.go b/controller/node_controller.go index 85c9961ed2..14e5996386 100644 --- a/controller/node_controller.go +++ b/controller/node_controller.go @@ -1126,6 +1126,21 @@ func (nc *NodeController) syncInstanceManagers(node *longhorn.Node) error { if err := nc.ds.DeleteInstanceManager(im.Name); err != nil { return err } + + if types.IsDataEngineV2(dataEngine) { + im, err := nc.ds.GetDefaultInstanceManagerByNodeRO(nc.controllerID, dataEngine) + if err != nil { + return errors.Wrap(err, "failed to get default instance manager for v2 data engine") + } + + if im.Spec.DesireState != longhorn.InstanceManagerStateRunning { + nc.logger.Infof("Updating default instance manager %v to running state for v2 data engine", im.Name) + im.Spec.DesireState = longhorn.InstanceManagerStateRunning + if _, err := nc.ds.UpdateInstanceManager(im); err != nil { + return errors.Wrap(err, "failed to update default instance manager for v2 data engine") + } + } + } } } if !defaultInstanceManagerCreated && imType == longhorn.InstanceManagerTypeAllInOne { @@ -1133,6 +1148,8 @@ func (nc *NodeController) syncInstanceManagers(node *longhorn.Node) error { if err != nil { return err } + + desireState := longhorn.InstanceManagerStateRunning if types.IsDataEngineV2(dataEngine) { disabled, err := nc.ds.IsV2DataEngineDisabledForNode(node.Name) if err != nil { @@ -1141,10 +1158,25 @@ func (nc *NodeController) syncInstanceManagers(node *longhorn.Node) error { if disabled { continue } + + ims, err := nc.ds.ListInstanceManagersBySelectorRO(nc.controllerID, "", longhorn.InstanceManagerTypeAllInOne, dataEngine) + if err != nil { + return errors.Wrap(err, "failed to list instance managers for v2 data engine") + } + foundRunningInstanceManager := false + for _, im := range ims { + if im.Status.CurrentState == longhorn.InstanceManagerStateRunning { + foundRunningInstanceManager = true + break + } + } + if foundRunningInstanceManager { + desireState = longhorn.InstanceManagerStateStopped + } } - log.Infof("Creating default instance manager %v, image: %v, dataEngine: %v", imName, defaultInstanceManagerImage, dataEngine) - if _, err := nc.createInstanceManager(node, imName, defaultInstanceManagerImage, imType, dataEngine); err != nil { + log.Infof("Creating default instance manager %v, image: %v, dataEngine: %v, desireState: %v", imName, defaultInstanceManagerImage, dataEngine, desireState) + if _, err := nc.createInstanceManager(node, imName, defaultInstanceManagerImage, imType, dataEngine, desireState); err != nil { return err } } @@ -1153,16 +1185,17 @@ func (nc *NodeController) syncInstanceManagers(node *longhorn.Node) error { return nil } -func (nc *NodeController) createInstanceManager(node *longhorn.Node, imName, imImage string, imType longhorn.InstanceManagerType, dataEngine longhorn.DataEngineType) (*longhorn.InstanceManager, error) { +func (nc *NodeController) createInstanceManager(node *longhorn.Node, imName, imImage string, imType longhorn.InstanceManagerType, dataEngine longhorn.DataEngineType, desireState longhorn.InstanceManagerState) (*longhorn.InstanceManager, error) { instanceManager := &longhorn.InstanceManager{ ObjectMeta: metav1.ObjectMeta{ Name: imName, }, Spec: longhorn.InstanceManagerSpec{ - Image: imImage, - NodeID: node.Name, - Type: imType, - DataEngine: dataEngine, + Image: imImage, + NodeID: node.Name, + Type: imType, + DataEngine: dataEngine, + DesireState: desireState, }, }