Skip to content

Commit

Permalink
fix(storage-network): annotated pod in creation loop
Browse files Browse the repository at this point in the history
longhorn-7640

Co-authored-by: Chin-Ya Huang <chin-ya.huang@suse.com>
Signed-off-by: Eric Weber <eric.weber@suse.com>
(cherry picked from commit 43e997c)
  • Loading branch information
ejweber committed Apr 4, 2024
1 parent 4ec5242 commit 5136187
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 126 deletions.
125 changes: 0 additions & 125 deletions controller/instance_manager_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -507,131 +507,6 @@ func (imc *InstanceManagerController) annotateCASafeToEvict(im *longhorn.Instanc
return nil
}

<<<<<<< HEAD
=======
func (imc *InstanceManagerController) areDangerZoneSettingsSyncedToIMPod(im *longhorn.InstanceManager) (isSynced, isPodDeletedOrNotRunning, areInstancesRunningInPod bool, err error) {
if im.Status.CurrentState != longhorn.InstanceManagerStateRunning {
return false, true, false, nil
}

// nolint:all
for _, instance := range types.ConsolidateInstances(im.Status.InstanceEngines, im.Status.InstanceReplicas, im.Status.Instances) {
if instance.Status.State == longhorn.InstanceStateRunning || instance.Status.State == longhorn.InstanceStateStarting {
return false, false, true, nil
}
}

pod, err := imc.ds.GetPodRO(im.Namespace, im.Name)
if err != nil {
return false, false, false, errors.Wrapf(err, "cannot get pod for instance manager %v", im.Name)
}
if pod == nil {
return false, true, false, nil
}

for settingName := range types.GetDangerZoneSettings() {
isSettingSynced := true
setting, err := imc.ds.GetSettingWithAutoFillingRO(settingName)
if err != nil {
return false, false, false, err
}
switch settingName {
case types.SettingNameTaintToleration:
isSettingSynced, err = imc.isSettingTaintTolerationSynced(setting, pod)
case types.SettingNameSystemManagedComponentsNodeSelector:
isSettingSynced, err = imc.isSettingNodeSelectorSynced(setting, pod)
case types.SettingNameGuaranteedInstanceManagerCPU, types.SettingNameV2DataEngineGuaranteedInstanceManagerCPU:
isSettingSynced, err = imc.isSettingGuaranteedInstanceManagerCPUSynced(setting, pod)
case types.SettingNamePriorityClass:
isSettingSynced, err = imc.isSettingPriorityClassSynced(setting, pod)
case types.SettingNameStorageNetwork:
isSettingSynced, err = imc.isSettingStorageNetworkSynced(setting, pod)
case types.SettingNameV1DataEngine, types.SettingNameV2DataEngine:
isSettingSynced, err = imc.isSettingDataEngineSynced(settingName, im)
}
if err != nil {
return false, false, false, err
}
if !isSettingSynced {
return false, false, false, nil
}
}

return true, false, false, nil
}

func (imc *InstanceManagerController) isSettingTaintTolerationSynced(setting *longhorn.Setting, pod *corev1.Pod) (bool, error) {
newTolerationsList, err := types.UnmarshalTolerations(setting.Value)
if err != nil {
return false, err
}
newTolerationsMap := util.TolerationListToMap(newTolerationsList)
lastAppliedTolerations, err := getLastAppliedTolerationsList(pod)
if err != nil {
return false, err
}

return reflect.DeepEqual(util.TolerationListToMap(lastAppliedTolerations), newTolerationsMap), nil
}

func (imc *InstanceManagerController) isSettingNodeSelectorSynced(setting *longhorn.Setting, pod *corev1.Pod) (bool, error) {
newNodeSelector, err := types.UnmarshalNodeSelector(setting.Value)
if err != nil {
return false, err
}
if pod.Spec.NodeSelector == nil && len(newNodeSelector) == 0 {
return true, nil
}

return reflect.DeepEqual(pod.Spec.NodeSelector, newNodeSelector), nil
}

func (imc *InstanceManagerController) isSettingGuaranteedInstanceManagerCPUSynced(setting *longhorn.Setting, pod *corev1.Pod) (bool, error) {
lhNode, err := imc.ds.GetNode(pod.Spec.NodeName)
if err != nil {
return false, err
}
if types.GetCondition(lhNode.Status.Conditions, longhorn.NodeConditionTypeReady).Status != longhorn.ConditionStatusTrue {
return true, nil
}

resourceReq, err := GetInstanceManagerCPURequirement(imc.ds, pod.Name)
if err != nil {
return false, err
}
podResourceReq := pod.Spec.Containers[0].Resources
return IsSameGuaranteedCPURequirement(resourceReq, &podResourceReq), nil
}

func (imc *InstanceManagerController) isSettingPriorityClassSynced(setting *longhorn.Setting, pod *corev1.Pod) (bool, error) {
return pod.Spec.PriorityClassName == setting.Value, nil
}

func (imc *InstanceManagerController) isSettingStorageNetworkSynced(setting *longhorn.Setting, pod *corev1.Pod) (bool, error) {
nadAnnot := string(types.CNIAnnotationNetworks)

return pod.Annotations[nadAnnot] == setting.Value, nil
}

func (imc *InstanceManagerController) isSettingDataEngineSynced(settingName types.SettingName, im *longhorn.InstanceManager) (bool, error) {
enabled, err := imc.ds.GetSettingAsBool(settingName)
if err != nil {
return false, errors.Wrapf(err, "failed to get %v setting for updating data engine", settingName)
}
var dataEngine longhorn.DataEngineType
switch settingName {
case types.SettingNameV1DataEngine:
dataEngine = longhorn.DataEngineTypeV1
case types.SettingNameV2DataEngine:
dataEngine = longhorn.DataEngineTypeV2
}
if !enabled && im.Spec.DataEngine == dataEngine {
return false, nil
}
return true, nil
}

>>>>>>> 6c305ae3 (perf: remove redundant deep copy)
func (imc *InstanceManagerController) syncInstanceManagerAPIVersion(im *longhorn.InstanceManager) error {
// Avoid changing API versions when InstanceManagers are state Unknown.
// Then once required (in the future), the monitor could still talk with the pod and update processes in some corner cases. e.g., kubelet restart.
Expand Down
10 changes: 9 additions & 1 deletion controller/setting_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -743,6 +743,8 @@ func (sc *SettingController) updateCNI() error {
}

nadAnnot := string(types.CNIAnnotationNetworks)
nadAnnotValue := types.CreateCniAnnotationFromSetting(storageNetwork)

imPodList, err := sc.ds.ListInstanceManagerPods()
if err != nil {
return errors.Wrapf(err, "failed to list instance manager Pods for %v setting update", types.SettingNameStorageNetwork)
Expand All @@ -755,10 +757,16 @@ func (sc *SettingController) updateCNI() error {

pods := append(imPodList, bimPodList...)
for _, pod := range pods {
if pod.Annotations[nadAnnot] == storageNetwork.Value {
if pod.Annotations[nadAnnot] == nadAnnotValue {
continue
}

logrus.WithFields(logrus.Fields{
"pod": pod.Name,
"oldValue": pod.Annotations[nadAnnot],
"newValue": nadAnnotValue,
}).Infof("Deleting pod to update the %v annotation", nadAnnot)

if err := sc.ds.DeletePod(pod.Name); err != nil {
return err
}
Expand Down

0 comments on commit 5136187

Please sign in to comment.