Skip to content

Commit

Permalink
Get limits from parent in cgroupv2 when max is specified in leaf cgro…
Browse files Browse the repository at this point in the history
…up node.

Without this fix, incorrect values were passed to --total-memory and --num-cpu.
This meant applications were reading incorrect values from /proc/meminfo, etc.
while the external cgroups were enforcing the correct limits. This can lead to
bad/surprising app behavior.

Fixes #9850

PiperOrigin-RevId: 579352625
  • Loading branch information
manninglucas authored and gvisor-bot committed Nov 4, 2023
1 parent a8a46b4 commit 42b69d0
Show file tree
Hide file tree
Showing 2 changed files with 127 additions and 14 deletions.
64 changes: 50 additions & 14 deletions runsc/cgroup/cgroup_v2.go
Original file line number Diff line number Diff line change
Expand Up @@ -40,9 +40,12 @@ import (
)

const (
subtreeControl = "cgroup.subtree_control"
controllersFile = "cgroup.controllers"
cgroup2Key = "cgroup2"
subtreeControl = "cgroup.subtree_control"
controllersFile = "cgroup.controllers"
cgroup2Key = "cgroup2"
memoryLimitCgroup = "memory.max"
cpuLimitCgroup = "cpu.max"
maxLimitStr = "max"

// https://www.kernel.org/doc/html/latest/admin-guide/cgroup-v2.html
defaultPeriod = 100000
Expand Down Expand Up @@ -230,14 +233,29 @@ func (c *cgroupV2) Join() (func(), error) {
return cu.Release(), nil
}

func getCPUQuota(path string) (float64, error) {
cpuMax, err := getValue(path, cpuLimitCgroup)
if err != nil {
return -1, err
}
return parseCPUQuota(cpuMax)
}

// CPUQuota returns the CFS CPU quota.
func (c *cgroupV2) CPUQuota() (float64, error) {
cpuMax, err := getValue(c.MakePath(""), "cpu.max")
cpuQuota, err := getCPUQuota(c.MakePath(""))
if err != nil {
return -1, err
}

return parseCPUQuota(cpuMax)
// In cgroupv2+systemd, limits are set in the parent slice rather
// than the leaf node. Check the parent to see if this is the case.
if cpuQuota == -1 {
cpuQuota, err = getCPUQuota(filepath.Dir(c.MakePath("")))
if err != nil && errors.Is(err, os.ErrNotExist) {
err = nil
}
}
return cpuQuota, nil
}

func parseCPUQuota(cpuMax string) (float64, error) {
Expand All @@ -247,7 +265,7 @@ func parseCPUQuota(cpuMax string) (float64, error) {
}

// no cpu limit if quota is max
if data[0] == "max" {
if data[0] == maxLimitStr {
return -1, nil
}

Expand Down Expand Up @@ -298,21 +316,39 @@ func (c *cgroupV2) NumCPU() (int, error) {
return countCpuset(strings.TrimSpace(cpuset))
}

func getMemoryLimit(path string) (string, error) {
limStr, err := getValue(path, memoryLimitCgroup)
if err != nil {
return "", err
}
return strings.TrimSpace(limStr), nil
}

// MemoryLimit returns the memory limit.
func (c *cgroupV2) MemoryLimit() (uint64, error) {
limStr, err := getValue(c.MakePath(""), "memory.max")
limStr, err := getMemoryLimit(c.MakePath(""))
if err != nil {
return 0, err
}
limStr = strings.TrimSpace(limStr)
if limStr == "max" {
return math.MaxUint64, nil
// In cgroupv2+systemd, limits are set in the parent slice rather
// than the leaf node. Check the parent to see if this is the case.
if limStr == maxLimitStr {
parentLimStr, err := getMemoryLimit(filepath.Dir(c.MakePath("")))
if err != nil && !errors.Is(err, os.ErrNotExist) {
return 0, err
}
if parentLimStr != "" {
limStr = parentLimStr
}
if limStr == maxLimitStr {
return math.MaxUint64, nil
}
}
return strconv.ParseUint(limStr, 10, 64)
}

// MakePath builds a path to the given controller.
func (c *cgroupV2) MakePath(controllerName string) string {
func (c *cgroupV2) MakePath(string) string {
return filepath.Join(c.Mountpoint, c.Path)
}

Expand Down Expand Up @@ -388,7 +424,7 @@ func (*cpu2) set(spec *specs.LinuxResources, path string) error {
}

if spec.CPU.Period != nil || spec.CPU.Quota != nil {
v := "max"
v := maxLimitStr
if spec.CPU.Quota != nil && *spec.CPU.Quota > 0 {
v = strconv.FormatInt(*spec.CPU.Quota, 10)
}
Expand Down Expand Up @@ -760,7 +796,7 @@ func numToStr(value int64) (ret string) {
case value == 0:
ret = ""
case value == -1:
ret = "max"
ret = maxLimitStr
default:
ret = strconv.FormatInt(value, 10)
}
Expand Down
77 changes: 77 additions & 0 deletions runsc/cgroup/cgroup_v2_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,83 @@ func TestLoadPathsCgroupv2(t *testing.T) {
}
}

func TestGetLimits(t *testing.T) {
for _, tc := range []struct {
name string
mem string
cpu string
expMem uint64
expCPU int
limitPath string
path string
}{
{
name: "get limit from parent cgroup",
mem: "150",
cpu: "100 50",
limitPath: "user.slice",
path: "user.slice/container.scope",
expMem: 150,
expCPU: 2,
},
{
name: "get limit from leaf cgroup",
mem: "150",
cpu: "100 50",
limitPath: "user.slice/container.scope",
path: "user.slice/container.scope",
expMem: 150,
expCPU: 2,
},
} {
t.Run(tc.name, func(t *testing.T) {
testutil.TmpDir()
dir, err := ioutil.TempDir(testutil.TmpDir(), "cgroup")
if err != nil {
t.Fatalf("error creating temporary directory: %v", err)
}
defer os.RemoveAll(dir)

fullPath := filepath.Join(dir, tc.path)
if err := os.MkdirAll(fullPath, 0o777); err != nil {
t.Fatalf("os.MkdirAll(): %v", err)
}
cg := cgroupV2{
Mountpoint: dir,
Path: tc.path,
}

if err := os.WriteFile(filepath.Join(dir, tc.path, "memory.max"), []byte("max"), 0o777); err != nil {
t.Fatalf("os.WriteFile(): %v", err)
}
if err := os.WriteFile(filepath.Join(dir, tc.path, "cpu.max"), []byte("max max"), 0o777); err != nil {
t.Fatalf("os.WriteFile(): %v", err)
}
if err := os.WriteFile(filepath.Join(dir, tc.limitPath, "memory.max"), []byte(tc.mem), 0o655); err != nil {
t.Fatalf("os.WriteFile(): %v", err)
}
if err := os.WriteFile(filepath.Join(dir, tc.limitPath, "cpu.max"), []byte(tc.cpu), 0o655); err != nil {
t.Fatalf("os.WriteFile(): %v", err)
}

quota, err := cg.CPUQuota()
if err != nil {
t.Fatalf("cg.CPUQuota(): %v", err)
}
if int(quota) != tc.expCPU {
t.Errorf("cg.CPUQuota() = %v, want %v", quota, tc.expCPU)
}
mem, err := cg.MemoryLimit()
if err != nil {
t.Fatalf("cg.MemoryLimit(): %v", err)
}
if mem != tc.expMem {
t.Errorf("cg.MemoryLimit() = %v, want %v", mem, tc.expMem)
}
})
}
}

func TestNumToStr(t *testing.T) {
cases := map[int64]string{
0: "",
Expand Down

0 comments on commit 42b69d0

Please sign in to comment.