Skip to content

Commit

Permalink
feat: adding support to qos level memory.low protection
Browse files Browse the repository at this point in the history
This patch provied the feature of memory.low protection.
There are a couple of benefits about memory.low protection:

1, it provides a gradient of protection.
 As a cgroup's usage grows past the protected amount,
 the protected amount remains protected,
 but reclaim pressure for the excess amount gradually increases.
2, it's work-conserving - if the protected cgroup doesn't use the memory,
 it's available for others to use.

Signed-off-by: Robin Lu <robin.lu@bytedance.com>
  • Loading branch information
lubinszARM committed Mar 1, 2024
1 parent 8dbc7d5 commit 531c7f8
Show file tree
Hide file tree
Showing 6 changed files with 453 additions and 0 deletions.
16 changes: 16 additions & 0 deletions cmd/katalyst-agent/app/options/qrm/memory_plugin.go
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ type MemoryOptions struct {
OOMPriorityPinnedMapAbsPath string

SockMemOptions
MemProtectionOptions
}

type SockMemOptions struct {
Expand All @@ -43,6 +44,11 @@ type SockMemOptions struct {
SetCgroupTCPMemRatio int
}

type MemProtectionOptions struct {
EnableSettingMemProtection bool
MemSoftLimitQoSLevelConfigFile string
}

func NewMemoryOptions() *MemoryOptions {
return &MemoryOptions{
PolicyName: "dynamic",
Expand All @@ -56,6 +62,10 @@ func NewMemoryOptions() *MemoryOptions {
SetGlobalTCPMemRatio: 20, // default: 20% * {host total memory}
SetCgroupTCPMemRatio: 100, // default: 100% * {cgroup memory}
},
MemProtectionOptions: MemProtectionOptions{
EnableSettingMemProtection: false,
MemSoftLimitQoSLevelConfigFile: "",
},
}
}

Expand Down Expand Up @@ -84,6 +94,10 @@ func (o *MemoryOptions) AddFlags(fss *cliflag.NamedFlagSets) {
o.SetGlobalTCPMemRatio, "limit global max tcp memory usage")
fs.IntVar(&o.SetCgroupTCPMemRatio, "qrm-memory-cgroup-tcpmem-ratio",
o.SetCgroupTCPMemRatio, "limit cgroup max tcp memory usage")
fs.BoolVar(&o.EnableSettingMemProtection, "enable-setting-mem-protection",
o.EnableSettingMemProtection, "if set true, we will do memory protection in qos level")
fs.StringVar(&o.MemSoftLimitQoSLevelConfigFile, "mem-softlimit-qos-config-file",
o.MemSoftLimitQoSLevelConfigFile, "the absolute path of mem.low qos level config file")
}
func (o *MemoryOptions) ApplyTo(conf *qrmconfig.MemoryQRMPluginConfig) error {
conf.PolicyName = o.PolicyName
Expand All @@ -97,5 +111,7 @@ func (o *MemoryOptions) ApplyTo(conf *qrmconfig.MemoryQRMPluginConfig) error {
conf.EnableSettingSockMem = o.EnableSettingSockMem
conf.SetGlobalTCPMemRatio = o.SetGlobalTCPMemRatio
conf.SetCgroupTCPMemRatio = o.SetCgroupTCPMemRatio
conf.EnableSettingMemProtection = o.EnableSettingMemProtection
conf.MemSoftLimitQoSLevelConfigFile = o.MemSoftLimitQoSLevelConfigFile
return nil
}
12 changes: 12 additions & 0 deletions pkg/agent/qrm-plugins/memory/dynamicpolicy/policy.go
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ import (
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/dynamicpolicy/memoryadvisor"
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/dynamicpolicy/oom"
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/dynamicpolicy/state"
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/handlers/memprotection"
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/memory/handlers/sockmem"
"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/util"
"github.com/kubewharf/katalyst-core/pkg/agent/utilcomponent/periodicalhandler"
Expand Down Expand Up @@ -130,6 +131,7 @@ type DynamicPolicy struct {

enableSettingMemoryMigrate bool
enableSettingSockMem bool
enableSettingMemProtection bool
enableMemoryAdvisor bool
memoryAdvisorSocketAbsPath string
memoryPluginSocketAbsPath string
Expand Down Expand Up @@ -191,6 +193,7 @@ func NewDynamicPolicy(agentCtx *agent.GenericContext, conf *config.Configuration
asyncWorkers: asyncworker.NewAsyncWorkers(memoryPluginAsyncWorkersName, wrappedEmitter),
enableSettingMemoryMigrate: conf.EnableSettingMemoryMigrate,
enableSettingSockMem: conf.EnableSettingSockMem,
enableSettingMemProtection: conf.EnableSettingMemProtection,
enableMemoryAdvisor: conf.EnableMemoryAdvisor,
memoryAdvisorSocketAbsPath: conf.MemoryAdvisorSocketAbsPath,
memoryPluginSocketAbsPath: conf.MemoryPluginSocketAbsPath,
Expand Down Expand Up @@ -300,6 +303,15 @@ func (p *DynamicPolicy) Start() (err error) {
}
}

if p.enableSettingMemProtection {
general.Infof("setMemProtection enabled")
err := periodicalhandler.RegisterPeriodicalHandler(qrm.QRMMemoryPluginPeriodicalHandlerGroupName,
memprotection.EnableSetMemProtectionPeriodicalHandlerName, memprotection.MemProtectionTaskFunc, 60*time.Second)
if err != nil {
general.Infof("setSockMem failed, err=%v", err)
}
}

go wait.Until(func() {
periodicalhandler.ReadyToStartHandlersByGroup(qrm.QRMMemoryPluginPeriodicalHandlerGroupName)
}, 5*time.Second, p.stopCh)
Expand Down
31 changes: 31 additions & 0 deletions pkg/agent/qrm-plugins/memory/handlers/memprotection/const.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
/*
Copyright 2022 The Katalyst Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package memprotection

const EnableSetMemProtectionPeriodicalHandlerName = "SetCGMemProtection"

const (
// Constants for cgroup memory statistics
cgroupMemoryLimit2G = 2147483648
cgroupMemoryLimit128M = 134217728

controlKnobKeyMemSoftLimit = "mem_softlimit"
)

const (
metricNameMemLow = "async_handler_cgroup_memlow"
)
Original file line number Diff line number Diff line change
@@ -0,0 +1,162 @@
//go:build linux
// +build linux

/*
Copyright 2022 The Katalyst Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package memprotection

import (
"context"
"strconv"

"github.com/kubewharf/katalyst-core/pkg/agent/qrm-plugins/commonstate"
coreconfig "github.com/kubewharf/katalyst-core/pkg/config"
dynamicconfig "github.com/kubewharf/katalyst-core/pkg/config/agent/dynamic"
"github.com/kubewharf/katalyst-core/pkg/metaserver"
"github.com/kubewharf/katalyst-core/pkg/metrics"
cgroupcm "github.com/kubewharf/katalyst-core/pkg/util/cgroup/common"
cgroupmgr "github.com/kubewharf/katalyst-core/pkg/util/cgroup/manager"
"github.com/kubewharf/katalyst-core/pkg/util/general"
"github.com/kubewharf/katalyst-core/pkg/util/native"
)

func convertMemRatioToBytes(memLimit, memRatio uint64) uint64 {
limitInBytes := memLimit / 100 * memRatio
// Any value related to cgroup memory limitation should be aligned with the page size.
limitInBytes = general.AlignToPageSize(limitInBytes)

return limitInBytes
}

func getMemProtectionInBytes(memLimit, memRatio uint64) uint64 {
// Step1, convert ratio into bytes
result := convertMemRatioToBytes(memLimit, memRatio)
// Step2, performing specific operations within the memory.low
// Notice: we limited memory.low between {128M, 2G}
result = uint64(general.Clamp(float64(result), float64(cgroupMemoryLimit128M), float64(cgroupMemoryLimit2G)))

return result
}

func getUserSpecifiedMemoryProtectionInBytes(relCgroupPath, ratio string) int64 {
memStat, err := cgroupmgr.GetMemoryWithRelativePath(relCgroupPath)
if err != nil {
general.Warningf("getUserSpecifiedMemoryProtectionInBytes failed with err: %v", err)
return 0
}

memProtectionRatio, err := strconv.Atoi(ratio)
if err != nil {
general.Warningf("getUserSpecifiedMemoryProtectionInBytes failed with err: %v", err)
return 0
}

bytes := getMemProtectionInBytes(memStat.Limit, uint64(memProtectionRatio))
return int64(bytes)
}

func applyMemSoftLimitQoSLevelConfig(conf *coreconfig.Configuration,
emitter metrics.MetricEmitter, metaServer *metaserver.MetaServer) {
if conf.MemSoftLimitQoSLevelConfigFile == "" {
general.Infof("no MemSoftLimitQoSLevelConfigFile found")
return
}

var extraControlKnobConfigs commonstate.ExtraControlKnobConfigs
if err := general.LoadJsonConfig(conf.MemSoftLimitQoSLevelConfigFile, &extraControlKnobConfigs); err != nil {
general.Errorf("MemSoftLimitQoSLevelConfigFile load failed:%v", err)
return
}
ctx := context.Background()
podList, err := metaServer.GetPodList(ctx, native.PodIsActive)
if err != nil {
general.Infof("get pod list failed: %v", err)
return
}

for _, pod := range podList {
if pod == nil {
general.Warningf("get nil pod from metaServer")
continue
}
qosConfig := conf.QoSConfiguration
qosLevel, err := qosConfig.GetQoSLevelForPod(pod)
if err != nil {
general.Warningf("GetQoSLevelForPod failed:%v", err)
continue
}
qosLevelDefaultValue, ok := extraControlKnobConfigs[controlKnobKeyMemSoftLimit].QoSLevelToDefaultValue[qosLevel]
if !ok {
continue
}

for _, containerStatus := range pod.Status.ContainerStatuses {
podUID, containerID := string(pod.UID), native.TrimContainerIDPrefix(containerStatus.ContainerID)
relCgPath, err := cgroupcm.GetContainerRelativeCgroupPath(podUID, containerID)
if err != nil {
general.Warningf("GetContainerRelativeCgroupPath failed, pod=%v, container=%v, err=%v", podUID, containerID, err)
continue
}
softLimit := getUserSpecifiedMemoryProtectionInBytes(relCgPath, qosLevelDefaultValue)
if softLimit == 0 {
general.Warningf("getUserSpecifiedMemoryProtectionBytes return 0")
continue
}

var data *cgroupcm.MemoryData
data = &cgroupcm.MemoryData{SoftLimitInBytes: softLimit}
if err := cgroupmgr.ApplyMemoryWithRelativePath(relCgPath, data); err != nil {
general.Warningf("ApplyMemoryWithRelativePath failed, cgpath=%v, err=%v", relCgPath, err)
continue
}

_ = emitter.StoreInt64(metricNameMemLow, softLimit, metrics.MetricTypeNameRaw,
metrics.ConvertMapToTags(map[string]string{
"podUID": podUID,
"containerID": containerID,
})...)
}
}
}

func MemProtectionTaskFunc(conf *coreconfig.Configuration,
_ interface{}, _ *dynamicconfig.DynamicAgentConfiguration,
emitter metrics.MetricEmitter, metaServer *metaserver.MetaServer) {
general.Infof("called")

if conf == nil {
general.Errorf("nil extraConf")
return
} else if emitter == nil {
general.Errorf("nil emitter")
return
} else if metaServer == nil {
general.Errorf("nil metaServer")
return
}

// SettingMemProtection featuregate.
if !conf.EnableSettingMemProtection {
general.Infof("EnableSettingMemProtection disabled")
return
}

// checking qos-level memory.low configuration.
if len(conf.MemSoftLimitQoSLevelConfigFile) > 0 {
applyMemSoftLimitQoSLevelConfig(conf, emitter, metaServer)
}
}
Loading

0 comments on commit 531c7f8

Please sign in to comment.