From 135eee08fdcf82bc1807a00f78a55933806900b1 Mon Sep 17 00:00:00 2001 From: Robin Lu Date: Fri, 17 May 2024 11:24:28 +0800 Subject: [PATCH] iocost: adding strict mode adding a strict mode to prevent a potential kernel deadlock bug Signed-off-by: Robin Lu --- .../app/options/qrm/io_plugin.go | 5 +++ .../qrm-plugins/io/handlers/iocost/const.go | 2 + .../io/handlers/iocost/iocost_linux.go | 39 +++++++++++++++++++ .../io/handlers/iocost/iocost_linux_test.go | 13 +++++++ pkg/config/agent/qrm/io_plugin.go | 1 + 5 files changed, 60 insertions(+) diff --git a/cmd/katalyst-agent/app/options/qrm/io_plugin.go b/cmd/katalyst-agent/app/options/qrm/io_plugin.go index f0bf4afe8..81000423e 100644 --- a/cmd/katalyst-agent/app/options/qrm/io_plugin.go +++ b/cmd/katalyst-agent/app/options/qrm/io_plugin.go @@ -41,6 +41,7 @@ type WritebackThrottlingOption struct { type IOCostOption struct { EnableSettingIOCost bool + CheckWBTDisabled bool IOCostQoSConfigFile string IOCostModelConfigFile string } @@ -62,6 +63,7 @@ func NewIOOptions() *IOOptions { }, IOCostOption: IOCostOption{ EnableSettingIOCost: false, + CheckWBTDisabled: true, IOCostQoSConfigFile: "", IOCostModelConfigFile: "", }, @@ -88,6 +90,8 @@ func (o *IOOptions) AddFlags(fss *cliflag.NamedFlagSets) { o.WBTValueNVME, "writeback throttling value for NVME") fs.BoolVar(&o.EnableSettingIOCost, "enable-io-cost", o.EnableSettingIOCost, "if set it to true, io.cost setting will be executed") + fs.BoolVar(&o.CheckWBTDisabled, "check-wbt-disabled", + o.CheckWBTDisabled, "if set it to true, wbt should be disabled") fs.StringVar(&o.IOCostQoSConfigFile, "io-cost-qos-config-file", o.IOCostQoSConfigFile, "the absolute path of io.cost.qos qos config file") fs.StringVar(&o.IOCostModelConfigFile, "io-cost-model-config-file", @@ -107,6 +111,7 @@ func (o *IOOptions) ApplyTo(conf *qrmconfig.IOQRMPluginConfig) error { conf.WBTValueSSD = o.WBTValueSSD conf.WBTValueNVME = o.WBTValueNVME conf.EnableSettingIOCost = o.EnableSettingIOCost + conf.CheckWBTDisabled = o.CheckWBTDisabled conf.IOCostQoSConfigFile = o.IOCostQoSConfigFile conf.IOCostModelConfigFile = o.IOCostModelConfigFile conf.EnableSettingIOWeight = o.EnableSettingIOWeight diff --git a/pkg/agent/qrm-plugins/io/handlers/iocost/const.go b/pkg/agent/qrm-plugins/io/handlers/iocost/const.go index b0e0f3d18..0ad9f1508 100644 --- a/pkg/agent/qrm-plugins/io/handlers/iocost/const.go +++ b/pkg/agent/qrm-plugins/io/handlers/iocost/const.go @@ -29,6 +29,8 @@ const ( DevModelBcache DevModel = "bcache" DevModelDeviceMapper DevModel = "dm" + sysDiskPrefix = "/sys/block" + IOStatMetricCostVrate = "cost.vrate" MetricNameIOCostVrate = "iocost_vrate" diff --git a/pkg/agent/qrm-plugins/io/handlers/iocost/iocost_linux.go b/pkg/agent/qrm-plugins/io/handlers/iocost/iocost_linux.go index 64d6ba1ea..097dee6f8 100644 --- a/pkg/agent/qrm-plugins/io/handlers/iocost/iocost_linux.go +++ b/pkg/agent/qrm-plugins/io/handlers/iocost/iocost_linux.go @@ -21,6 +21,7 @@ package iocost import ( "fmt" + "io/ioutil" "path/filepath" "strconv" "sync" @@ -28,7 +29,9 @@ import ( "github.com/kubewharf/katalyst-core/pkg/config" coreconfig "github.com/kubewharf/katalyst-core/pkg/config" dynamicconfig "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic" + coreconsts "github.com/kubewharf/katalyst-core/pkg/consts" "github.com/kubewharf/katalyst-core/pkg/metaserver" + "github.com/kubewharf/katalyst-core/pkg/metaserver/agent/metric/helper" "github.com/kubewharf/katalyst-core/pkg/metrics" "github.com/kubewharf/katalyst-core/pkg/util/cgroup/common" cgcommon "github.com/kubewharf/katalyst-core/pkg/util/cgroup/common" @@ -236,6 +239,33 @@ func applyIOCostConfig(conf *config.Configuration, emitter metrics.MetricEmitter applyIOCostModelWithDefault(ioCostModelConfigs, devsIDToModel) } +func checkWBTDisabled(targetDiskType float64, diskPath string, emitter metrics.MetricEmitter, metaServer *metaserver.MetaServer) (bool, error) { + dir, err := ioutil.ReadDir(diskPath) + if err != nil { + general.Errorf("failed to readdir:%v, err:%v", sysDiskPrefix, err) + return false, err + } + for _, entry := range dir { + diskType, err := helper.GetDeviceMetric(metaServer.MetricsFetcher, emitter, coreconsts.MetricIODiskType, entry.Name()) + if err != nil { + general.Errorf("failed to read MetricIODiskType, err:%v", err) + return false, err + } + + if diskType == targetDiskType { + WBTValue, err := helper.GetDeviceMetric(metaServer.MetricsFetcher, emitter, coreconsts.MetricIODiskWBTValue, entry.Name()) + if err != nil { + general.Errorf("failed to read MetricIODiskWBTValue, err:%v", err) + return false, err + } + if WBTValue != 0 { + return false, nil + } + } + } + return true, nil +} + func SetIOCost(conf *coreconfig.Configuration, _ interface{}, _ *dynamicconfig.DynamicAgentConfiguration, @@ -265,6 +295,15 @@ func SetIOCost(conf *coreconfig.Configuration, return } + // Strict mode: checking wbt file. + if conf.CheckWBTDisabled { + disabled, err := checkWBTDisabled(coreconsts.DiskTypeHDD, sysDiskPrefix, emitter, metaServer) + if !disabled { + general.Infof("wbt for HDD disks should be disabled, err=%v", err) + return + } + } + if !cgcommon.CheckCgroup2UnifiedMode() { general.Infof("not in cgv2 environment, skip IOAsyncTaskFunc") return diff --git a/pkg/agent/qrm-plugins/io/handlers/iocost/iocost_linux_test.go b/pkg/agent/qrm-plugins/io/handlers/iocost/iocost_linux_test.go index 8e17c46a3..f26e4ff3b 100644 --- a/pkg/agent/qrm-plugins/io/handlers/iocost/iocost_linux_test.go +++ b/pkg/agent/qrm-plugins/io/handlers/iocost/iocost_linux_test.go @@ -133,6 +133,7 @@ func TestSetIOCost(t *testing.T) { IOQRMPluginConfig: &qrm.IOQRMPluginConfig{ IOCostOption: qrm.IOCostOption{ EnableSettingIOCost: true, + CheckWBTDisabled: true, }, }, }, @@ -411,3 +412,15 @@ func TestApplyIOCostQoSWithDefault(t *testing.T) { // Call the function under test applyIOCostQoSWithDefault(ioCostQoSConfigs, devsIDToModel) } + +func TestCheckWBTDisabled(t *testing.T) { + t.Parallel() + metaServer, err := makeMetaServer() + assert.NoError(t, err) + + result, _ := checkWBTDisabled(1.0, sysDiskPrefix, metrics.DummyMetrics{}, metaServer) + assert.False(t, result) + + result, _ = checkWBTDisabled(1.0, "notExist", metrics.DummyMetrics{}, metaServer) + assert.False(t, result) +} diff --git a/pkg/config/agent/qrm/io_plugin.go b/pkg/config/agent/qrm/io_plugin.go index 589063675..1c25fc811 100644 --- a/pkg/config/agent/qrm/io_plugin.go +++ b/pkg/config/agent/qrm/io_plugin.go @@ -34,6 +34,7 @@ type WritebackThrottlingOption struct { type IOCostOption struct { EnableSettingIOCost bool + CheckWBTDisabled bool IOCostQoSConfigFile string IOCostModelConfigFile string }