From 8bf826dd08cf23d5f17b80b09c8d65f6e4cb6564 Mon Sep 17 00:00:00 2001 From: James Lu Date: Fri, 27 Sep 2024 14:26:00 +0800 Subject: [PATCH] fix(node): add the node.Status.Condition `ModulesLoaded` Check if the module `dm_crypt` is enabled as the first module. Add a unit test to check the kernel modules condition. ref: longhorn/longhorn 9153 Signed-off-by: James Lu (cherry picked from commit 6370a249fa9fa72179be8458f90b6a9a3225ea26) --- controller/controller_test.go | 7 + controller/node_controller.go | 183 +++++++++++++++++++------- controller/node_controller_test.go | 131 ++++++++++++++++-- k8s/pkg/apis/longhorn/v1beta2/node.go | 16 ++- 4 files changed, 273 insertions(+), 64 deletions(-) diff --git a/controller/controller_test.go b/controller/controller_test.go index eefcaea71a..bf541a9fe1 100644 --- a/controller/controller_test.go +++ b/controller/controller_test.go @@ -108,6 +108,10 @@ const ( TestVolumeAttachmentName = "test-volume" TestDiskPathFSType = "ext4" + + TestKernelVersion = "6.2.0-32-generic" + TestKernelConfigDIR = "/host/boot" + TestKernelConfigFilePath = TestKernelConfigDIR + "/config-" + TestKernelVersion ) var ( @@ -515,6 +519,9 @@ func newKubernetesNode(name string, readyStatus, diskPressureStatus, memoryStatu Status: networkStatus, }, }, + NodeInfo: corev1.NodeSystemInfo{ + KernelVersion: TestKernelVersion, + }, }, } } diff --git a/controller/node_controller.go b/controller/node_controller.go index 1c2d48c489..1efe732be7 100644 --- a/controller/node_controller.go +++ b/controller/node_controller.go @@ -26,6 +26,7 @@ import ( v1core "k8s.io/client-go/kubernetes/typed/core/v1" lhexec "github.com/longhorn/go-common-libs/exec" + lhio "github.com/longhorn/go-common-libs/io" lhns "github.com/longhorn/go-common-libs/ns" lhtypes "github.com/longhorn/go-common-libs/types" @@ -47,9 +48,16 @@ const ( unknownDiskID = "UNKNOWN_DISKID" + kernelConfigFilePathPrefix = "/host/boot/config-" + snapshotChangeEventQueueMax = 1048576 ) +var ( + kernelModules = map[string]string{"CONFIG_DM_CRYPT": "dm_crypt"} + nfsClientVersions = map[string]string{"CONFIG_NFS_V4_2": "nfs", "CONFIG_NFS_V4_1": "nfs", "CONFIG_NFS_V4": "nfs"} +) + type NodeController struct { *baseController @@ -922,6 +930,7 @@ func (nc *NodeController) environmentCheck(kubeNode *corev1.Node, node *longhorn namespaces := []lhtypes.Namespace{lhtypes.NamespaceMnt, lhtypes.NamespaceNet} nc.syncPackagesInstalled(kubeNode, node, namespaces) nc.syncMultipathd(node, namespaces) + nc.checkKernelModulesLoaded(kubeNode, node, namespaces) nc.syncNFSClientVersion(kubeNode, node, namespaces) } @@ -1005,7 +1014,8 @@ func (nc *NodeController) syncPackagesInstalled(kubeNode *corev1.Node, node *lon return } - node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusTrue, "", "") + node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusTrue, "", + fmt.Sprintf("All required packages %v are installed on node %v", packages, node.Name)) } func (nc *NodeController) syncMultipathd(node *longhorn.Node, namespaces []lhtypes.Namespace) { @@ -1027,66 +1037,145 @@ func (nc *NodeController) syncMultipathd(node *longhorn.Node, namespaces []lhtyp node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, "", "") } -func (nc *NodeController) syncNFSClientVersion(kubeNode *corev1.Node, node *longhorn.Node, namespaces []lhtypes.Namespace) { - kernelVersion := kubeNode.Status.NodeInfo.KernelVersion - nfsClientVersions := []string{"CONFIG_NFS_V4_2", "CONFIG_NFS_V4_1", "CONFIG_NFS_V4"} - - nsexec, err := lhns.NewNamespaceExecutor(lhtypes.ProcessNone, lhtypes.HostProcDirectory, namespaces) +func (nc *NodeController) checkKernelModulesLoaded(kubeNode *corev1.Node, node *longhorn.Node, namespaces []lhtypes.Namespace) { + notFoundModulesUsingkmod, err := checkModulesLoadedUsingkmod(kernelModules) if err != nil { - node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, + node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusFalse, string(longhorn.NodeConditionReasonNamespaceExecutorErr), - fmt.Sprintf("Failed to get namespace executor: %v", err.Error())) + fmt.Sprintf("Failed to check kernel modules: %v", err.Error())) return } - kernelConfigPath := "/boot/config-" + kernelVersion - args := []string{kernelConfigPath} - if _, err := nsexec.Execute(nil, "ls", args, lhtypes.ExecuteDefaultTimeout); err != nil { - node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, - string(longhorn.NodeConditionReasonKernelConfigIsNotFound), - fmt.Sprintf("Unable to find %v for checking %v: %v", kernelConfigPath, nfsClientVersions, err.Error())) + if len(notFoundModulesUsingkmod) == 0 { + node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusTrue, "", + fmt.Sprintf("Kernel modules %v are loaded on node %v", getModulesConfigsList(kernelModules, false), node.Name)) return } - for _, ver := range nfsClientVersions { - args := []string{ver + "=", kernelConfigPath} - result, err := nsexec.Execute(nil, "grep", args, lhtypes.ExecuteDefaultTimeout) + notLoadedModules, err := checkModulesLoadedByConfigFile(nc.logger, notFoundModulesUsingkmod, kubeNode.Status.NodeInfo.KernelVersion) + if err != nil { + node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusFalse, + string(longhorn.NodeConditionReasonCheckKernelConfigFailed), + fmt.Sprintf("Failed to check kernel config file for kernel modules %v: %v", notFoundModulesUsingkmod, err.Error())) + return + } + + if len(notLoadedModules) != 0 { + node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusFalse, + string(longhorn.NodeConditionReasonKernelModulesNotLoaded), + fmt.Sprintf("Kernel modules %v are not loaded on node %v", notLoadedModules, node.Name)) + return + } + + node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusTrue, "", + fmt.Sprintf("Kernel modules %v are loaded on node %v", getModulesConfigsList(kernelModules, false), node.Name)) +} + +func checkModulesLoadedUsingkmod(modules map[string]string) (map[string]string, error) { + kmodResult, err := lhexec.NewExecutor().Execute(nil, "kmod", []string{"list"}, lhtypes.ExecuteDefaultTimeout) + if err != nil { + return nil, err + } + + notFoundModules := map[string]string{} + for config, module := range modules { + if !strings.Contains(kmodResult, module) { + notFoundModules[config] = module + } + } + + return notFoundModules, nil +} + +func checkModulesLoadedByConfigFile(log *logrus.Entry, modules map[string]string, kernelVersion string) ([]string, error) { + kernelConfigPath := kernelConfigFilePathPrefix + kernelVersion + kernelConfigContent, err := lhio.ReadFileContent(kernelConfigPath) + if err != nil { + return nil, err + } + kernelConfigMap := getKernelModuleConfigMap(kernelConfigContent) + + notLoadedModules := []string{} + for config, module := range modules { + moduleEnabled, err := checkKernelModuleEnabled(log, kernelConfigContent, config, module, kernelConfigMap) if err != nil { - nc.logger.WithError(err).Debugf("Failed to find kernel config %v on node %v", ver, node.Name) + return nil, err + } + if !moduleEnabled { + notLoadedModules = append(notLoadedModules, module) + } + } + + return notLoadedModules, nil +} + +func getKernelModuleConfigMap(kernelConfigContent string) map[string]string { + configMap := map[string]string{} + configs := strings.Split(kernelConfigContent, "\n") + for _, config := range configs { + if !strings.HasPrefix(config, "CONFIG_") { continue } - enabled := strings.TrimSpace(strings.Split(result, "=")[1]) - switch enabled { - case "y": - node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusTrue, "", "") - return - case "m": - kmodResult, err := lhexec.NewExecutor().Execute(nil, "kmod", []string{"list"}, lhtypes.ExecuteDefaultTimeout) - if err != nil { - node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, - string(longhorn.NodeConditionReasonNFSClientIsNotFound), - fmt.Sprintf("Failed to execute command `kmod`: %v", err.Error())) - return - } - res, err := lhexec.NewExecutor().ExecuteWithStdinPipe("grep", []string{"nfs"}, kmodResult, lhtypes.ExecuteDefaultTimeout) - if err != nil { - node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, - string(longhorn.NodeConditionReasonNFSClientIsNotFound), - fmt.Sprintf("Failed to execute command `grep`: %v", err.Error())) - return - } - if res != "" { - node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusTrue, "", "") - return - } - default: - nc.logger.Debugf("Unknown kernel config value for %v: %v", ver, enabled) + configSplits := strings.Split(config, "=") + configMap[strings.TrimSpace(configSplits[0])] = strings.TrimSpace(configSplits[1]) + } + return configMap +} + +func checkKernelModuleEnabled(log *logrus.Entry, kernelConfigContent, module, kmodName string, kernelConfigMap map[string]string) (bool, error) { + enabled, exists := kernelConfigMap[module] + if !exists { + log.Debugf("Kernel config value for %v is not found", module) + return false, nil + } + + switch enabled { + case "y": + return true, nil + case "m": + kmodResult, err := lhexec.NewExecutor().Execute(nil, "kmod", []string{"list"}, lhtypes.ExecuteDefaultTimeout) + if err != nil { + return false, errors.Wrap(err, "Failed to execute command `kmod`") + } + if strings.Contains(kmodResult, kmodName) { + return true, nil } + default: + log.Debugf("Unknown kernel config value for %v: %v", module, enabled) + } + + return false, nil +} + +func getModulesConfigsList(modulesMap map[string]string, needModules bool) []string { + modulesConfigs := []string{} + for mod, config := range modulesMap { + appendingObj := config + if needModules { + appendingObj = mod + } + modulesConfigs = append(modulesConfigs, appendingObj) + } + return modulesConfigs +} + +func (nc *NodeController) syncNFSClientVersion(kubeNode *corev1.Node, node *longhorn.Node, namespaces []lhtypes.Namespace) { + notLoadedModules, err := checkModulesLoadedByConfigFile(nc.logger, nfsClientVersions, kubeNode.Status.NodeInfo.KernelVersion) + if err != nil { + node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, + string(longhorn.NodeConditionReasonCheckKernelConfigFailed), + fmt.Sprintf("Failed to check kernel config file for kernel modules %v: %v", nfsClientVersions, err.Error())) + return + } + + if len(notLoadedModules) == len(nfsClientVersions) { + node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, + string(longhorn.NodeConditionReasonNFSClientIsNotFound), + fmt.Sprintf("NFS clients %v not found. At least one should be enabled", getModulesConfigsList(nfsClientVersions, true))) + return } - node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, - string(longhorn.NodeConditionReasonNFSClientIsNotFound), - fmt.Sprintf("NFS clients %v not found. At least one should be enabled", nfsClientVersions)) + node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusTrue, "", "") } func (nc *NodeController) getImTypeDataEngines(node *longhorn.Node) map[longhorn.InstanceManagerType][]longhorn.DataEngineType { diff --git a/controller/node_controller_test.go b/controller/node_controller_test.go index 1dfb971260..db72be125a 100644 --- a/controller/node_controller_test.go +++ b/controller/node_controller_test.go @@ -3,6 +3,7 @@ package controller import ( "context" "fmt" + "os" "strings" "github.com/sirupsen/logrus" @@ -188,7 +189,8 @@ func (s *NodeControllerSuite) TestManagerPodUp(c *C) { newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""), newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS), newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""), - newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound), + newNodeCondition(longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed), + newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed), }, }, TestNode2: { @@ -275,7 +277,8 @@ func (s *NodeControllerSuite) TestManagerPodDown(c *C) { newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonNoMountPropagationSupport), newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS), newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""), - newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound), + newNodeCondition(longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed), + newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed), }, }, TestNode2: { @@ -362,7 +365,8 @@ func (s *NodeControllerSuite) TestKubeNodeDown(c *C) { newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""), newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS), newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""), - newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound), + newNodeCondition(longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed), + newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed), }, }, TestNode2: { @@ -449,7 +453,8 @@ func (s *NodeControllerSuite) TestKubeNodePressure(c *C) { newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""), newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS), newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""), - newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound), + newNodeCondition(longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed), + newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed), }, }, TestNode2: { @@ -571,7 +576,8 @@ func (s *NodeControllerSuite) TestUpdateDiskStatus(c *C) { newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""), newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS), newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""), - newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound), + newNodeCondition(longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed), + newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed), }, DiskStatus: map[string]*longhorn.DiskStatus{ TestDiskID1: { @@ -722,7 +728,8 @@ func (s *NodeControllerSuite) TestCleanDiskStatus(c *C) { newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""), newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS), newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""), - newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound), + newNodeCondition(longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed), + newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed), }, DiskStatus: map[string]*longhorn.DiskStatus{ TestDiskID1: { @@ -879,7 +886,8 @@ func (s *NodeControllerSuite) TestDisableDiskOnFilesystemChange(c *C) { newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""), newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS), newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""), - newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound), + newNodeCondition(longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed), + newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed), }, DiskStatus: map[string]*longhorn.DiskStatus{ TestDiskID1: { @@ -1007,7 +1015,8 @@ func (s *NodeControllerSuite) TestCreateDefaultInstanceManager(c *C) { newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""), newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS), newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""), - newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound), + newNodeCondition(longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed), + newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed), }, DiskStatus: map[string]*longhorn.DiskStatus{ TestDiskID1: { @@ -1152,7 +1161,8 @@ func (s *NodeControllerSuite) TestCleanupRedundantInstanceManagers(c *C) { newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""), newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS), newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""), - newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound), + newNodeCondition(longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed), + newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed), }, DiskStatus: map[string]*longhorn.DiskStatus{ TestDiskID1: { @@ -1267,7 +1277,8 @@ func (s *NodeControllerSuite) TestCleanupAllInstanceManagers(c *C) { newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""), newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS), newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""), - newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound), + newNodeCondition(longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed), + newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed), }, DiskStatus: map[string]*longhorn.DiskStatus{}, }, @@ -1929,6 +1940,106 @@ func (s *NodeControllerSuite) TestSyncInstanceManagers(c *C) { } } +func (s *NodeControllerSuite) TestKubeNodeKernelModulesCondition(c *C) { + var err error + + // Create a temporary Kernel config file + err = os.MkdirAll(TestKernelConfigDIR, 0755) + c.Assert(err, IsNil) + tmpKernelConfigFile, err := os.Create(TestKernelConfigFilePath) + c.Assert(err, IsNil) + defer tmpKernelConfigFile.Close() + defer os.Remove(TestKernelConfigFilePath) + + // Write some fake content to the temporary file + fakeFileContent := `CONFIG_DM_CRYPT=y +CONFIG_NFS_V4=m +CONFIG_NFS_V4_1=m +CONFIG_NFS_V4_2=y` + + _, err = tmpKernelConfigFile.Write([]byte(fakeFileContent)) + c.Assert(err, IsNil) + + fixture := &NodeControllerFixture{ + lhNodes: map[string]*longhorn.Node{ + TestNode1: newNode(TestNode1, TestNamespace, true, longhorn.ConditionStatusUnknown, ""), + TestNode2: newNode(TestNode2, TestNamespace, true, longhorn.ConditionStatusUnknown, ""), + }, + lhSettings: map[string]*longhorn.Setting{ + string(types.SettingNameDefaultInstanceManagerImage): newDefaultInstanceManagerImageSetting(), + }, + lhInstanceManagers: map[string]*longhorn.InstanceManager{ + TestInstanceManagerName: DefaultInstanceManagerTestNode1, + }, + lhOrphans: map[string]*longhorn.Orphan{ + DefaultOrphanTestNode1.Name: DefaultOrphanTestNode1, + }, + pods: map[string]*corev1.Pod{ + TestDaemon1: newDaemonPod(corev1.PodRunning, TestDaemon1, TestNamespace, TestNode1, TestIP1, &MountPropagationBidirectional), + TestDaemon2: newDaemonPod(corev1.PodRunning, TestDaemon2, TestNamespace, TestNode2, TestIP2, &MountPropagationBidirectional), + }, + nodes: map[string]*corev1.Node{ + TestNode1: newKubernetesNode( + TestNode1, + corev1.ConditionTrue, + corev1.ConditionFalse, + corev1.ConditionFalse, + corev1.ConditionFalse, + corev1.ConditionFalse, + corev1.ConditionTrue, + ), + TestNode2: newKubernetesNode( + TestNode2, + corev1.ConditionTrue, + corev1.ConditionFalse, + corev1.ConditionFalse, + corev1.ConditionFalse, + corev1.ConditionFalse, + corev1.ConditionTrue, + ), + }, + } + + expectation := &NodeControllerExpectation{ + nodeStatus: map[string]*longhorn.NodeStatus{ + TestNode1: { + Conditions: []longhorn.Condition{ + newNodeCondition(longhorn.NodeConditionTypeSchedulable, longhorn.ConditionStatusTrue, ""), + newNodeCondition(longhorn.NodeConditionTypeReady, longhorn.ConditionStatusTrue, ""), + newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""), + newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS), + newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""), + newNodeCondition(longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusTrue, ""), + newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusTrue, ""), + }, + }, + TestNode2: { + Conditions: []longhorn.Condition{ + newNodeCondition(longhorn.NodeConditionTypeSchedulable, longhorn.ConditionStatusTrue, ""), + newNodeCondition(longhorn.NodeConditionTypeReady, longhorn.ConditionStatusTrue, ""), + }, + }, + }, + } + + s.initTest(c, fixture) + + for _, node := range fixture.lhNodes { + if s.controller.controllerID == node.Name { + err = s.controller.diskMonitor.RunOnce() + c.Assert(err, IsNil) + } + + err = s.controller.syncNode(getKey(node, c)) + c.Assert(err, IsNil) + + n, err := s.lhClient.LonghornV1beta2().Nodes(TestNamespace).Get(context.TODO(), node.Name, metav1.GetOptions{}) + c.Assert(err, IsNil) + + s.checkNodeConditions(c, expectation, n) + } +} + // -- Helpers -- func (s *NodeControllerSuite) checkNodeConditions(c *C, expectation *NodeControllerExpectation, node *longhorn.Node) { diff --git a/k8s/pkg/apis/longhorn/v1beta2/node.go b/k8s/pkg/apis/longhorn/v1beta2/node.go index 7943543bbf..f79dbe1c67 100644 --- a/k8s/pkg/apis/longhorn/v1beta2/node.go +++ b/k8s/pkg/apis/longhorn/v1beta2/node.go @@ -3,12 +3,13 @@ package v1beta2 import metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" const ( - NodeConditionTypeReady = "Ready" - NodeConditionTypeMountPropagation = "MountPropagation" - NodeConditionTypeMultipathd = "Multipathd" - NodeConditionTypeRequiredPackages = "RequiredPackages" - NodeConditionTypeNFSClientInstalled = "NFSClientInstalled" - NodeConditionTypeSchedulable = "Schedulable" + NodeConditionTypeReady = "Ready" + NodeConditionTypeMountPropagation = "MountPropagation" + NodeConditionTypeMultipathd = "Multipathd" + NodeConditionTypeKernelModulesLoaded = "KernelModulesLoaded" + NodeConditionTypeRequiredPackages = "RequiredPackages" + NodeConditionTypeNFSClientInstalled = "NFSClientInstalled" + NodeConditionTypeSchedulable = "Schedulable" ) const ( @@ -22,8 +23,9 @@ const ( NodeConditionReasonMultipathdIsRunning = "MultipathdIsRunning" NodeConditionReasonUnknownOS = "UnknownOS" NodeConditionReasonNamespaceExecutorErr = "NamespaceExecutorErr" + NodeConditionReasonKernelModulesNotLoaded = "KernelModulesNotLoaded" NodeConditionReasonPackagesNotInstalled = "PackagesNotInstalled" - NodeConditionReasonKernelConfigIsNotFound = "KernelConfigIsNotFound" + NodeConditionReasonCheckKernelConfigFailed = "CheckKernelConfigFailed" NodeConditionReasonNFSClientIsNotFound = "NFSClientIsNotFound" NodeConditionReasonKubernetesNodeCordoned = "KubernetesNodeCordoned" )