Skip to content

Commit

Permalink
fix(node): add the node.Status.Condition ModulesLoaded
Browse files Browse the repository at this point in the history
Check if the module `dm_crypt` is enabled as the first module.

ref: longhorn/longhorn 9153

Signed-off-by: James Lu <james.lu@suse.com>
  • Loading branch information
mantissahz committed Sep 26, 2024
1 parent 8bc7575 commit 4687cac
Show file tree
Hide file tree
Showing 3 changed files with 159 additions and 65 deletions.
178 changes: 130 additions & 48 deletions controller/node_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
v1core "k8s.io/client-go/kubernetes/typed/core/v1"

lhexec "github.com/longhorn/go-common-libs/exec"
lhio "github.com/longhorn/go-common-libs/io"
lhns "github.com/longhorn/go-common-libs/ns"
lhtypes "github.com/longhorn/go-common-libs/types"

Expand All @@ -47,9 +48,16 @@ const (

unknownDiskID = "UNKNOWN_DISKID"

kernelConfigFilePathPrefix = "/host/boot/config-"

snapshotChangeEventQueueMax = 1048576
)

var (
kernelModules = map[string]string{"CONFIG_DM_CRYPT": "dm_crypt"}
nfsClientVersions = map[string]string{"CONFIG_NFS_V4_2": "nfs", "CONFIG_NFS_V4_1": "nfs", "CONFIG_NFS_V4": "nfs"}
)

type NodeController struct {
*baseController

Expand Down Expand Up @@ -922,6 +930,7 @@ func (nc *NodeController) environmentCheck(kubeNode *corev1.Node, node *longhorn
namespaces := []lhtypes.Namespace{lhtypes.NamespaceMnt, lhtypes.NamespaceNet}
nc.syncPackagesInstalled(kubeNode, node, namespaces)
nc.syncMultipathd(node, namespaces)
nc.checkKernelModulesLoaded(kubeNode, node, namespaces)
nc.syncNFSClientVersion(kubeNode, node, namespaces)
}

Expand Down Expand Up @@ -1005,7 +1014,8 @@ func (nc *NodeController) syncPackagesInstalled(kubeNode *corev1.Node, node *lon
return
}

node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusTrue, "", "")
node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusTrue, "",
fmt.Sprintf("All required packages %v are installed on node %v", packages, node.Name))
}

func (nc *NodeController) syncMultipathd(node *longhorn.Node, namespaces []lhtypes.Namespace) {
Expand All @@ -1027,66 +1037,138 @@ func (nc *NodeController) syncMultipathd(node *longhorn.Node, namespaces []lhtyp
node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, "", "")
}

func (nc *NodeController) syncNFSClientVersion(kubeNode *corev1.Node, node *longhorn.Node, namespaces []lhtypes.Namespace) {
kernelVersion := kubeNode.Status.NodeInfo.KernelVersion
nfsClientVersions := []string{"CONFIG_NFS_V4_2", "CONFIG_NFS_V4_1", "CONFIG_NFS_V4"}

nsexec, err := lhns.NewNamespaceExecutor(lhtypes.ProcessNone, lhtypes.HostProcDirectory, namespaces)
func (nc *NodeController) checkKernelModulesLoaded(kubeNode *corev1.Node, node *longhorn.Node, namespaces []lhtypes.Namespace) {
notFoundModulesUsingkmod, err := checkModulesLoadedUsingkmod(kernelModules)
if err != nil {
node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse,
node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusFalse,
string(longhorn.NodeConditionReasonNamespaceExecutorErr),
fmt.Sprintf("Failed to get namespace executor: %v", err.Error()))
fmt.Sprintf("Failed to check kernel modules: %v", err.Error()))
return
}

kernelConfigPath := "/boot/config-" + kernelVersion
args := []string{kernelConfigPath}
if _, err := nsexec.Execute(nil, "ls", args, lhtypes.ExecuteDefaultTimeout); err != nil {
node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse,
string(longhorn.NodeConditionReasonKernelConfigIsNotFound),
fmt.Sprintf("Unable to find %v for checking %v: %v", kernelConfigPath, nfsClientVersions, err.Error()))
if len(notFoundModulesUsingkmod) == 0 {
node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusTrue, "",
fmt.Sprintf("Kernel modules %v are loaded on node %v", getModulesConfigsList(kernelModules), node.Name))
return
}

for _, ver := range nfsClientVersions {
args := []string{ver + "=", kernelConfigPath}
result, err := nsexec.Execute(nil, "grep", args, lhtypes.ExecuteDefaultTimeout)
notLoadedModules, err := checkModulesLoadedByConfigFile(nc.logger, notFoundModulesUsingkmod, kubeNode.Status.NodeInfo.KernelVersion)
if err != nil {
node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusFalse,
string(longhorn.NodeConditionReasonCheckKernelConfigFailed),
fmt.Sprintf("Failed to check kernel config file for kernel modules %v: %v", notFoundModulesUsingkmod, err.Error()))
return
}

if len(notLoadedModules) != 0 {
node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusFalse,
string(longhorn.NodeConditionReasonKernelModulesNotLoaded),
fmt.Sprintf("Kernel modules %v are not loaded on node %v", notLoadedModules, node.Name))
return
}

node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusTrue, "",
fmt.Sprintf("Kernel modules %v are loaded on node %v", getModulesConfigsList(kernelModules), node.Name))
}

func checkModulesLoadedUsingkmod(modules map[string]string) (map[string]string, error) {
kmodResult, err := lhexec.NewExecutor().Execute(nil, "kmod", []string{"list"}, lhtypes.ExecuteDefaultTimeout)
if err != nil {
return nil, err
}

notFoundModules := map[string]string{}
for config, module := range modules {
if !strings.Contains(kmodResult, module) {
notFoundModules[config] = module
}
}

return notFoundModules, nil
}

func checkModulesLoadedByConfigFile(log *logrus.Entry, modules map[string]string, kernelVersion string) ([]string, error) {
kernelConfigPath := kernelConfigFilePathPrefix + kernelVersion
kernelConfigContent, err := lhio.ReadFileContent(kernelConfigPath)
if err != nil {
return nil, err
}

notLoadedModules := []string{}
for config, module := range modules {
moduleEnabled, err := checkKernelModuleEnabled(log, kernelConfigContent, config, module)
if err != nil {
nc.logger.WithError(err).Debugf("Failed to find kernel config %v on node %v", ver, node.Name)
continue
return nil, err
}
enabled := strings.TrimSpace(strings.Split(result, "=")[1])
switch enabled {
case "y":
node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusTrue, "", "")
return
case "m":
kmodResult, err := lhexec.NewExecutor().Execute(nil, "kmod", []string{"list"}, lhtypes.ExecuteDefaultTimeout)
if err != nil {
node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse,
string(longhorn.NodeConditionReasonNFSClientIsNotFound),
fmt.Sprintf("Failed to execute command `kmod`: %v", err.Error()))
return
}
res, err := lhexec.NewExecutor().ExecuteWithStdinPipe("grep", []string{"nfs"}, kmodResult, lhtypes.ExecuteDefaultTimeout)
if err != nil {
node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse,
string(longhorn.NodeConditionReasonNFSClientIsNotFound),
fmt.Sprintf("Failed to execute command `grep`: %v", err.Error()))
return
}
if res != "" {
node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusTrue, "", "")
return
}
default:
nc.logger.Debugf("Unknown kernel config value for %v: %v", ver, enabled)
if !moduleEnabled {
notLoadedModules = append(notLoadedModules, module)
}
}

return notLoadedModules, nil
}

func checkKernelModuleEnabled(log *logrus.Entry, kernelConfigContent, module, kmodName string) (bool, error) {
configLine := getModuleConfigInKernelConfigFile(module, kernelConfigContent)
if configLine == "" {
log.Debugf("Kernel config %v not found", module)
return false, nil
}

enabled := strings.TrimSpace(strings.Split(configLine, "=")[1])
switch enabled {
case "y":
return true, nil
case "m":
kmodResult, err := lhexec.NewExecutor().Execute(nil, "kmod", []string{"list"}, lhtypes.ExecuteDefaultTimeout)
if err != nil {
return false, errors.Wrap(err, "Failed to execute command `kmod`")
}
if strings.Contains(kmodResult, kmodName) {
return true, nil
}
default:
log.Debugf("Unknown kernel config value for %v: %v", module, enabled)
}

return false, nil
}

func getModuleConfigInKernelConfigFile(module, kernelConfigContent string) string {
configs := strings.Split(kernelConfigContent, "\n")
for _, config := range configs {
if strings.Contains(config, module) {
return config
}
}
return ""
}

func getModulesConfigsList(modulesMap map[string]string) []string {
modulesConfigs := []string{}
for config := range modulesMap {
modulesConfigs = append(modulesConfigs, config)
}
return modulesConfigs
}

func (nc *NodeController) syncNFSClientVersion(kubeNode *corev1.Node, node *longhorn.Node, namespaces []lhtypes.Namespace) {
notLoadedModules, err := checkModulesLoadedByConfigFile(nc.logger, nfsClientVersions, kubeNode.Status.NodeInfo.KernelVersion)
if err != nil {
node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse,
string(longhorn.NodeConditionReasonCheckKernelConfigFailed),
fmt.Sprintf("Failed to check kernel config file for kernel modules %v: %v", nfsClientVersions, err.Error()))
return
}

if len(notLoadedModules) == len(nfsClientVersions) {
node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse,
string(longhorn.NodeConditionReasonNFSClientIsNotFound),
fmt.Sprintf("NFS clients %v not found. At least one should be enabled", getModulesConfigsList(nfsClientVersions)))
return
}

node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse,
string(longhorn.NodeConditionReasonNFSClientIsNotFound),
fmt.Sprintf("NFS clients %v not found. At least one should be enabled", nfsClientVersions))
node.Status.Conditions = types.SetCondition(node.Status.Conditions, longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusTrue, "", "")
}

func (nc *NodeController) getImTypeDataEngines(node *longhorn.Node) map[longhorn.InstanceManagerType][]longhorn.DataEngineType {
Expand Down
30 changes: 20 additions & 10 deletions controller/node_controller_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -188,7 +188,8 @@ func (s *NodeControllerSuite) TestManagerPodUp(c *C) {
newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS),
newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound),
newNodeCondition(longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed),
newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed),
},
},
TestNode2: {
Expand Down Expand Up @@ -275,7 +276,8 @@ func (s *NodeControllerSuite) TestManagerPodDown(c *C) {
newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonNoMountPropagationSupport),
newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS),
newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound),
newNodeCondition(longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed),
newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed),
},
},
TestNode2: {
Expand Down Expand Up @@ -362,7 +364,8 @@ func (s *NodeControllerSuite) TestKubeNodeDown(c *C) {
newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS),
newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound),
newNodeCondition(longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed),
newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed),
},
},
TestNode2: {
Expand Down Expand Up @@ -449,7 +452,8 @@ func (s *NodeControllerSuite) TestKubeNodePressure(c *C) {
newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS),
newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound),
newNodeCondition(longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed),
newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed),
},
},
TestNode2: {
Expand Down Expand Up @@ -571,7 +575,8 @@ func (s *NodeControllerSuite) TestUpdateDiskStatus(c *C) {
newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS),
newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound),
newNodeCondition(longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed),
newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed),
},
DiskStatus: map[string]*longhorn.DiskStatus{
TestDiskID1: {
Expand Down Expand Up @@ -722,7 +727,8 @@ func (s *NodeControllerSuite) TestCleanDiskStatus(c *C) {
newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS),
newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound),
newNodeCondition(longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed),
newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed),
},
DiskStatus: map[string]*longhorn.DiskStatus{
TestDiskID1: {
Expand Down Expand Up @@ -879,7 +885,8 @@ func (s *NodeControllerSuite) TestDisableDiskOnFilesystemChange(c *C) {
newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS),
newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound),
newNodeCondition(longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed),
newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed),
},
DiskStatus: map[string]*longhorn.DiskStatus{
TestDiskID1: {
Expand Down Expand Up @@ -1007,7 +1014,8 @@ func (s *NodeControllerSuite) TestCreateDefaultInstanceManager(c *C) {
newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS),
newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound),
newNodeCondition(longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed),
newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed),
},
DiskStatus: map[string]*longhorn.DiskStatus{
TestDiskID1: {
Expand Down Expand Up @@ -1152,7 +1160,8 @@ func (s *NodeControllerSuite) TestCleanupRedundantInstanceManagers(c *C) {
newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS),
newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound),
newNodeCondition(longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed),
newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed),
},
DiskStatus: map[string]*longhorn.DiskStatus{
TestDiskID1: {
Expand Down Expand Up @@ -1267,7 +1276,8 @@ func (s *NodeControllerSuite) TestCleanupAllInstanceManagers(c *C) {
newNodeCondition(longhorn.NodeConditionTypeMountPropagation, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeRequiredPackages, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonUnknownOS),
newNodeCondition(longhorn.NodeConditionTypeMultipathd, longhorn.ConditionStatusTrue, ""),
newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonKernelConfigIsNotFound),
newNodeCondition(longhorn.NodeConditionTypeKernelModulesLoaded, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed),
newNodeCondition(longhorn.NodeConditionTypeNFSClientInstalled, longhorn.ConditionStatusFalse, longhorn.NodeConditionReasonCheckKernelConfigFailed),
},
DiskStatus: map[string]*longhorn.DiskStatus{},
},
Expand Down
Loading

0 comments on commit 4687cac

Please sign in to comment.