@@ -24,6 +24,7 @@ import (
2424
2525 "github.com/opencontainers/cgroups"
2626 "github.com/opencontainers/cgroups/fs2"
27+ "github.com/opencontainers/runc/internal/linux"
2728 "github.com/opencontainers/runc/libcontainer/configs"
2829 "github.com/opencontainers/runc/libcontainer/intelrdt"
2930 "github.com/opencontainers/runc/libcontainer/internal/userns"
@@ -162,34 +163,25 @@ type setnsProcess struct {
162163}
163164
164165// tryResetCPUAffinity tries to reset the CPU affinity of the process
165- // identified by pid to include all possible CPUs (notwithstanding cgroup
166- // cpuset restrictions and isolated CPUs).
166+ // identified by pid to include all possible CPUs.
167167func tryResetCPUAffinity (pid int ) {
168- // When resetting the CPU affinity, we want to match the configured cgroup
169- // cpuset (or the default set of all CPUs, if no cpuset is configured)
170- // rather than some more restrictive affinity we were spawned in (such as
171- // one that may have been inherited from systemd). The cpuset cgroup used
172- // to reconfigure the cpumask automatically for joining processes, but
173- // kcommit da019032819a ("sched: Enforce user requested affinity") changed
174- // this behaviour in Linux 6.2.
168+ // When resetting the CPU affinity, we want to allow all
169+ // possible CPUs in the system, including those not in
170+ // cpuset.cpus, online or even present (hot-plugged) at call
171+ // time. Using a cpumask any tighter this that may disallow
172+ // using those CPUs if they are added to cpuset.cpus later.
175173 //
176- // Parsing cpuset.cpus.effective is quite inefficient (and looking at
177- // things like /proc/stat would be wrong for most nested containers), but
178- // luckily sched_setaffinity(2) will implicitly:
179- //
180- // * Clamp the cpumask so that it matches the current number of CPUs on
181- // the system.
182- // * Mask out any CPUs that are not a member of the target task's
183- // configured cgroup cpuset.
184- //
185- // So we can just pass a very large array of set cpumask bits and the
186- // kernel will silently convert that to the correct value very cheaply.
187- var cpuset unix.CPUSet
188- cpuset .Fill () // set all bits
189- if err := unix .SchedSetaffinity (pid , & cpuset ); err != nil {
190- logrus .WithError (
191- os .NewSyscallError ("sched_setaffinity" , err ),
192- ).Warnf ("resetting the CPU affinity of pid %d failed -- the container process may inherit runc's CPU affinity" , pid )
174+ // Use similar huge buffer as go 1.25 runtime in getCPUCount()
175+ // does for mask. This avoids reading and parsing
176+ // /sys/devices/system/cpu/possible.
177+ const maxCPUs = 64 * 1024
178+ buf := make ([]byte , maxCPUs / 8 )
179+ for i := range buf {
180+ buf [i ] = 0xff
181+ }
182+ if err := linux .SchedSetaffinity (pid , buf ); err != nil {
183+ logrus .WithError (err ).Warnf ("resetting the CPU affinity of pid %d failed -- the container process may inherit runc's CPU affinity" , pid )
184+ return
193185 }
194186}
195187
0 commit comments