From b11d5e4eb21c12793ef76473e7c7480178138731 Mon Sep 17 00:00:00 2001 From: Derek Su Date: Sun, 23 Jun 2024 16:43:27 +0000 Subject: [PATCH] Do not create any instance manager pods for v2 volumes if one is already existing An extra instance manager pod for v2 data engine requires an additional CPU core and 2 GiB of memory. If the resources are insufficient, the instance manager pod will be recreated and retried. To minimize unnecessary retries, do not create any new instance manager pods if one is already running. Longhorn 8456 Signed-off-by: Derek Su --- controller/instance_manager_controller.go | 34 +++++++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/controller/instance_manager_controller.go b/controller/instance_manager_controller.go index 10e568f109..87f40e3293 100644 --- a/controller/instance_manager_controller.go +++ b/controller/instance_manager_controller.go @@ -334,6 +334,21 @@ func (imc *InstanceManagerController) syncInstanceManager(key string) (err error } }() + // An instance manager pod for v2 volume need to consume huge pages, and disks managed by the + // pod is unable to managed by another pod. Therefore, if an instance manager pod is running on a node, + // an extra instance manager pod for v2 volume should not be created. + if types.IsDataEngineV2(im.Spec.DataEngine) { + if im.Status.CurrentState == longhorn.InstanceManagerStateStopped { + syncable, err := imc.canProceedWithInstanceManagerSync(im) + if err != nil { + return err + } + if !syncable { + return nil + } + } + } + if err := imc.syncStatusWithPod(im); err != nil { return err } @@ -365,6 +380,25 @@ func (imc *InstanceManagerController) syncInstanceManager(key string) (err error return nil } +func (imc *InstanceManagerController) canProceedWithInstanceManagerSync(im *longhorn.InstanceManager) (bool, error) { + ims, err := imc.ds.ListInstanceManagersByNodeRO(im.Spec.NodeID, longhorn.InstanceManagerTypeAllInOne, longhorn.DataEngineTypeV2) + if err != nil { + return false, err + } + for _, im := range ims { + if im.Status.CurrentState != longhorn.InstanceManagerStateStopped { + return false, nil + } + } + + defaultInstanceManagerImage, err := imc.ds.GetSettingValueExisted(types.SettingNameDefaultInstanceManagerImage) + if err != nil { + return false, err + } + + return im.Spec.Image == defaultInstanceManagerImage, nil +} + // syncStatusWithPod updates the InstanceManager based on the pod current phase only, // regardless of the InstanceManager previous status. func (imc *InstanceManagerController) syncStatusWithPod(im *longhorn.InstanceManager) error {