Skip to content

Commit 8618a16

Browse files
committed
libct: fix resetting CPU affinity
unix.CPUSet is limited to 1024 CPUs. Calling unix.SchedSetaffinity(pid, cpuset) removes all CPUs starting from 1024 from allowed CPUs of pid, even if cpuset is all ones. The consequence of runc trying to reset CPU affinity by default is that it prevents all containers from using those CPUs. This change is uses huge CPU mask to play safe and get all possible CPUs enabled with single sched_setaffinity call. Fixes: #5023 Signed-off-by: Antti Kervinen <antti.kervinen@intel.com>
1 parent 59a5ff1 commit 8618a16

File tree

2 files changed

+35
-26
lines changed

2 files changed

+35
-26
lines changed

internal/linux/linux.go

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package linux
22

33
import (
44
"os"
5+
"unsafe"
56

67
"golang.org/x/sys/unix"
78
)
@@ -65,6 +66,22 @@ func Recvfrom(fd int, p []byte, flags int) (n int, from unix.Sockaddr, err error
6566
return n, from, err
6667
}
6768

69+
// SchedSetaffinity wraps sched_setaffinity syscall without unix.CPUSet size limitation.
70+
func SchedSetaffinity(pid int, buf []byte) (err error) {
71+
err = retryOnEINTR(func() error {
72+
_, _, errno := unix.Syscall(
73+
unix.SYS_SCHED_SETAFFINITY,
74+
uintptr(pid),
75+
uintptr(len(buf)),
76+
uintptr((unsafe.Pointer)(&buf[0])))
77+
if errno != 0 {
78+
return os.NewSyscallError("sched_setaffinity", errno)
79+
}
80+
return nil
81+
})
82+
return err
83+
}
84+
6885
// Sendmsg wraps [unix.Sendmsg].
6986
func Sendmsg(fd int, p, oob []byte, to unix.Sockaddr, flags int) error {
7087
err := retryOnEINTR(func() error {

libcontainer/process_linux.go

Lines changed: 18 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,7 @@ import (
2424

2525
"github.com/opencontainers/cgroups"
2626
"github.com/opencontainers/cgroups/fs2"
27+
"github.com/opencontainers/runc/internal/linux"
2728
"github.com/opencontainers/runc/libcontainer/configs"
2829
"github.com/opencontainers/runc/libcontainer/intelrdt"
2930
"github.com/opencontainers/runc/libcontainer/internal/userns"
@@ -162,34 +163,25 @@ type setnsProcess struct {
162163
}
163164

164165
// tryResetCPUAffinity tries to reset the CPU affinity of the process
165-
// identified by pid to include all possible CPUs (notwithstanding cgroup
166-
// cpuset restrictions and isolated CPUs).
166+
// identified by pid to include all possible CPUs.
167167
func tryResetCPUAffinity(pid int) {
168-
// When resetting the CPU affinity, we want to match the configured cgroup
169-
// cpuset (or the default set of all CPUs, if no cpuset is configured)
170-
// rather than some more restrictive affinity we were spawned in (such as
171-
// one that may have been inherited from systemd). The cpuset cgroup used
172-
// to reconfigure the cpumask automatically for joining processes, but
173-
// kcommit da019032819a ("sched: Enforce user requested affinity") changed
174-
// this behaviour in Linux 6.2.
168+
// When resetting the CPU affinity, we want to allow all
169+
// possible CPUs in the system, including those not in
170+
// cpuset.cpus, online or even present (hot-plugged) at call
171+
// time. Using a cpumask any tighter this that may disallow
172+
// using those CPUs if they are added to cpuset.cpus later.
175173
//
176-
// Parsing cpuset.cpus.effective is quite inefficient (and looking at
177-
// things like /proc/stat would be wrong for most nested containers), but
178-
// luckily sched_setaffinity(2) will implicitly:
179-
//
180-
// * Clamp the cpumask so that it matches the current number of CPUs on
181-
// the system.
182-
// * Mask out any CPUs that are not a member of the target task's
183-
// configured cgroup cpuset.
184-
//
185-
// So we can just pass a very large array of set cpumask bits and the
186-
// kernel will silently convert that to the correct value very cheaply.
187-
var cpuset unix.CPUSet
188-
cpuset.Fill() // set all bits
189-
if err := unix.SchedSetaffinity(pid, &cpuset); err != nil {
190-
logrus.WithError(
191-
os.NewSyscallError("sched_setaffinity", err),
192-
).Warnf("resetting the CPU affinity of pid %d failed -- the container process may inherit runc's CPU affinity", pid)
174+
// Use similar huge buffer as go 1.25 runtime in getCPUCount()
175+
// does for mask. This avoids reading and parsing
176+
// /sys/devices/system/cpu/possible.
177+
const maxCPUs = 64 * 1024
178+
buf := make([]byte, maxCPUs/8)
179+
for i := range buf {
180+
buf[i] = 0xff
181+
}
182+
if err := linux.SchedSetaffinity(pid, buf); err != nil {
183+
logrus.WithError(err).Warnf("resetting the CPU affinity of pid %d failed -- the container process may inherit runc's CPU affinity", pid)
184+
return
193185
}
194186
}
195187

0 commit comments

Comments
 (0)