Skip to content

Commit

Permalink
fix(sysadvisor): fix target offloading memory of last round
Browse files Browse the repository at this point in the history
Signed-off-by: linzhecheng <linzhecheng@bytedance.com>
  • Loading branch information
cheney-lin committed Apr 29, 2024
1 parent d2f559c commit 1be73c5
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 27 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -64,14 +64,14 @@ var tmoPolicyFuncs sync.Map
var tmoBlockFuncs sync.Map

type TmoStats struct {
memUsage float64
memInactive float64
memPsiAvg60 float64
pgscan float64
pgsteal float64
refault float64
refaultActivate float64
lastOffloadingTargetSize float64
memUsage float64
memInactive float64
memPsiAvg60 float64
pgscan float64
pgsteal float64
refault float64
refaultActivate float64
offloadingTargetSize float64
}

type TmoPolicyFn func(
Expand Down Expand Up @@ -100,13 +100,20 @@ func refaultPolicyFunc(lastStats TmoStats, currStats TmoStats, conf *tmoconf.TMO
reclaimScanEfficiencyRatio = pgstealDelta / pgscanDelta
}

var result float64
if reclaimAccuracyRatio < conf.RefaultPolicyConf.ReclaimAccuracyTarget || reclaimScanEfficiencyRatio < conf.RefaultPolicyConf.ReclaimScanEfficiencyTarget {
// Decrease offloading size if detecting the reclaim accuracy or scan efficiency is below the targets
return nil, math.Max(0, currStats.lastOffloadingTargetSize*reclaimAccuracyRatio)
result = math.Max(0, lastStats.offloadingTargetSize*reclaimAccuracyRatio)
} else {
// Try to increase offloading size but make sure not exceed the max probe of memory usage and 10% of inactive memory
return nil, math.Min(math.Max(currStats.lastOffloadingTargetSize*OffloadingSizeScaleCoeff, currStats.memInactive*InactiveProbe), currStats.memUsage*conf.RefaultPolicyConf.MaxProbe)
// Try to increase offloading size but make sure not exceed the max probe of memory usage and 10% of inactive memory when the target size of last round is relatively small,
// which means reclaim accuracy and reclaim scan efficiency is low.
result = math.Min(math.Max(lastStats.offloadingTargetSize*OffloadingSizeScaleCoeff, currStats.memInactive*InactiveProbe), currStats.memUsage*conf.RefaultPolicyConf.MaxProbe)
}
general.InfoS("refault info", "reclaimAccuracyRatio", reclaimAccuracyRatio, "ReclaimAccuracyTarget", conf.RefaultPolicyConf.ReclaimAccuracyTarget,
"reclaimScanEfficiencyRatio", reclaimScanEfficiencyRatio, "ReclaimScanEfficiencyTarget", conf.RefaultPolicyConf.ReclaimScanEfficiencyTarget,
"refaultDelta", refaultDelta, "pgstealDelta", pgstealDelta, "pgscanDelta", pgscanDelta, "lastOffloadingTargetSize", general.FormatMemoryQuantity(lastStats.offloadingTargetSize),
"result", general.FormatMemoryQuantity(result))
return nil, result
}

type TMOBlockFn func(ci *types.ContainerInfo, conf interface{}) bool
Expand Down Expand Up @@ -229,7 +236,7 @@ func (tmoEngine *tmoEngineInstance) getStats() (TmoStats, error) {
tmoStats.pgscan = pgscan.Value
tmoStats.refault = refault.Value
tmoStats.refaultActivate = refaultActivate.Value
tmoStats.lastOffloadingTargetSize = tmoEngine.offloadingTargetSize
tmoStats.offloadingTargetSize = tmoEngine.offloadingTargetSize
general.Infof("Memory Usage of Cgroup %s, memUsage: %v", tmoEngine.cgpath, memUsage.Value)
return nil
}
Expand Down Expand Up @@ -273,7 +280,7 @@ func (tmoEngine *tmoEngineInstance) getStats() (TmoStats, error) {
tmoStats.pgscan = pgscan.Value
tmoStats.refault = refault.Value
tmoStats.refaultActivate = refaultActivate.Value
tmoStats.lastOffloadingTargetSize = tmoEngine.offloadingTargetSize
tmoStats.offloadingTargetSize = tmoEngine.offloadingTargetSize
general.Infof("Memory Usage of Pod %v, Container %v, memUsage: %v", podUID, containerName, memUsage.Value)
return nil
}
Expand Down Expand Up @@ -336,10 +343,11 @@ func (tmoEngine *tmoEngineInstance) CalculateOffloadingTargetSize() {
if policyFunc, ok := fn.(TmoPolicyFn); ok {
err, targetSize := policyFunc(tmoEngine.lastStats, currStats, tmoEngine.conf)
if err != nil {
general.Infof("Failed to calculate offloading memory size")
general.ErrorS(err, "Failed to calculate offloading memory size")
return
}
tmoEngine.offloadingTargetSize = targetSize
currStats.offloadingTargetSize = targetSize
tmoEngine.lastStats = currStats
tmoEngine.lastTime = currTime
}
Expand Down Expand Up @@ -481,13 +489,15 @@ func (tmo *transparentMemoryOffloading) Reconcile(status *types.MemoryPressureSt
// calculate memory offloading size for each container
for podContainerName, tmoEngine := range tmo.containerTmoEngines {
tmoEngine.CalculateOffloadingTargetSize()
general.Infof("Calculate target offloading size for podContainer: %v, result: %v", podContainerName, tmoEngine.GetOffloadingTargetSize())
general.InfoS("Calculate target offloading size", "podContainer", podContainerName,
"result", general.FormatMemoryQuantity(tmoEngine.GetOffloadingTargetSize()))
}

// calculate memory offloading size for each cgroups
for cgpath, tmoEngine := range tmo.cgpathTmoEngines {
tmoEngine.CalculateOffloadingTargetSize()
general.Infof("Calculate target offloading size for cgroup: %v, result: %v", cgpath, tmoEngine.GetOffloadingTargetSize())
general.InfoS("Calculate target offloading size", "groupPath", cgpath,
"result", general.FormatMemoryQuantity(tmoEngine.GetOffloadingTargetSize()))
}
return nil
}
Expand Down
6 changes: 4 additions & 2 deletions pkg/util/cgroup/manager/cgroup.go
Original file line number Diff line number Diff line change
Expand Up @@ -316,7 +316,7 @@ func SetSwapMaxWithAbsolutePathToParentCgroupRecursive(absCgroupPath string) err
}

parentDir := filepath.Dir(absCgroupPath)
if parentDir != absCgroupPath {
if parentDir != absCgroupPath && parentDir != common.GetCgroupRootPath(common.CgroupSubsysMemory) {
err = SetSwapMaxWithAbsolutePathToParentCgroupRecursive(parentDir)
if err != nil {
return err
Expand All @@ -333,7 +333,9 @@ func SetSwapMaxWithAbsolutePathRecursive(absCgroupPath string) error {
general.Infof("[SetSwapMaxWithAbsolutePathRecursive] on cgroup: %s", absCgroupPath)

// set swap max to parent cgroups recursively
_ = SetSwapMaxWithAbsolutePathToParentCgroupRecursive(filepath.Dir(absCgroupPath))
if err := SetSwapMaxWithAbsolutePathToParentCgroupRecursive(filepath.Dir(absCgroupPath)); err != nil {
return err
}

// set swap max to sub cgroups recursively
err := filepath.Walk(absCgroupPath, func(path string, info fs.FileInfo, err error) error {
Expand Down
37 changes: 28 additions & 9 deletions pkg/util/cgroup/manager/cgroup_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"context"
"fmt"
"io/ioutil"
"math"
"os"
"path/filepath"
"testing"
Expand All @@ -40,23 +41,25 @@ func TestManager(t *testing.T) {
t.Parallel()

_ = GetManager()

testV1Manager(t)
testV2Manager(t)
}

func TestV1Manager(t *testing.T) {
t.Parallel()
func testV1Manager(t *testing.T) {

_ = v1.NewManager()

testManager(t, "v1")
testNetCls(t, "v1")
}

func TestV2Manager(t *testing.T) {
t.Parallel()
func testV2Manager(t *testing.T) {

_ = v2.NewManager()

testManager(t, "v2")
testSwapMax(t)
}

func testManager(t *testing.T, version string) {
Expand Down Expand Up @@ -101,9 +104,7 @@ func testNetCls(t *testing.T, version string) {
assert.Error(t, err)
}

func TestSwapMax(t *testing.T) {
t.Parallel()

func testSwapMax(t *testing.T) {
defer monkey.UnpatchAll()
monkey.Patch(common.CheckCgroup2UnifiedMode, func() bool { return true })
monkey.Patch(GetManager, func() Manager { return v2.NewManager() })
Expand All @@ -120,14 +121,28 @@ func TestSwapMax(t *testing.T) {
return ioutil.WriteFile(f, []byte(data), 0700)
})

tmpDir, err := ioutil.TempDir("", "fake-cgroup")
rootDir := os.TempDir()
dir := filepath.Join(rootDir, "tmp")
err := os.Mkdir(dir, 0700)
assert.NoError(t, err)

tmpDir, err := ioutil.TempDir(dir, "fake-cgroup")
assert.NoError(t, err)
defer os.RemoveAll(tmpDir)
defer os.RemoveAll(dir)

monkey.Patch(common.GetCgroupRootPath, func(s string) string {
t.Logf("rootDir=%v", rootDir)
return rootDir
})

sawpFile := filepath.Join(tmpDir, "memory.swap.max")
err = ioutil.WriteFile(sawpFile, []byte{}, 0700)
assert.NoError(t, err)

sawpFile2 := filepath.Join(dir, "memory.swap.max")
err = ioutil.WriteFile(sawpFile2, []byte{}, 0700)
assert.NoError(t, err)

maxFile := filepath.Join(tmpDir, "memory.max")
err = ioutil.WriteFile(maxFile, []byte("12800"), 0700)
assert.NoError(t, err)
Expand All @@ -143,6 +158,10 @@ func TestSwapMax(t *testing.T) {
assert.NoError(t, err)
assert.Equal(t, fmt.Sprintf("%v", 200), string(s))

s, err = ioutil.ReadFile(sawpFile2)
assert.NoError(t, err)
assert.Equal(t, fmt.Sprintf("%v", math.MaxInt64), string(s))

err = DisableSwapMaxWithAbsolutePathRecursive(tmpDir)
assert.NoError(t, err)

Expand Down

0 comments on commit 1be73c5

Please sign in to comment.