From 55e272e263ef72de89e1582a0bc3d2ec71362196 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavol=20=C5=BD=C3=A1=C4=8Dik?= Date: Fri, 6 Dec 2024 10:36:34 +0100 Subject: [PATCH 1/2] scheduler: Log process info when its affinity cannot be changed Include its current affinity, cmdline, and cgroup. This may help identify issues with tuning, e.g., in cases when the process disappears soon after affining it fails. Resolves: RHEL-69933 --- tuned/plugins/plugin_scheduler.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tuned/plugins/plugin_scheduler.py b/tuned/plugins/plugin_scheduler.py index 7d9bcbe0..b8763595 100644 --- a/tuned/plugins/plugin_scheduler.py +++ b/tuned/plugins/plugin_scheduler.py @@ -661,6 +661,9 @@ def _ignore_set_affinity_error(self, pid): log.warning("Affinity of task with PID %d cannot be changed, the task's affinity mask is fixed." % pid) return True + log.info("Task %d cmdline: %s" % (pid, self._get_cmdline(process))) + log.info("Task %d cgroup: %s" % (pid, self._get_stat_cgroup(process))) + log.info("Task %d affinity: %s" % (pid, list(self._scheduler_utils.get_affinity(pid)))) except (OSError, IOError) as e: if e.errno == errno.ENOENT or e.errno == errno.ESRCH: log.debug("Failed to get task info for PID %d, the task vanished." From 7113c91b5b0726dfd0fe7c3792beac1d54fba447 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Pavol=20=C5=BD=C3=A1=C4=8Dik?= Date: Wed, 15 Jan 2025 15:57:13 +0100 Subject: [PATCH 2/2] scheduler: Postpone cgroup blacklist check and double-check after fail It may happen that a process gets moved into a blacklisted cgroup when TuneD is already applying a profile. Postpone the filtering of processes according to the blacklist to the last possible moment to minimize the race window. When setting process affinity fails, re-check whether the process belongs to a blacklisted cgroup once again and do not report an error if that's the case. Resolves: RHEL-72981 --- tuned/plugins/plugin_scheduler.py | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) diff --git a/tuned/plugins/plugin_scheduler.py b/tuned/plugins/plugin_scheduler.py index b8763595..ffa54f0b 100644 --- a/tuned/plugins/plugin_scheduler.py +++ b/tuned/plugins/plugin_scheduler.py @@ -644,15 +644,24 @@ def _set_rt(self, pid, sched, prio): def _is_kthread(self, process): return process["stat"]["flags"] & procfs.pidstat.PF_KTHREAD != 0 + def _process_in_blacklisted_cgroup(self, process): + if self._cgroup_ps_blacklist_re == "": + return False + return re.search(self._cgroup_ps_blacklist_re, self._get_stat_cgroup(process)) is not None + # Returns True if we can ignore a failed affinity change of # a process with the given PID and therefore not report it as an error. - def _ignore_set_affinity_error(self, pid): + def _ignore_set_affinity_error(self, process): + pid = process.pid try: - process = procfs.process(pid) if process["stat"]["state"] == "Z": log.debug("Affinity of zombie task with PID %d could not be changed." % pid) return True + if self._process_in_blacklisted_cgroup(process): + log.debug("Affinity of task with PID %d could not be changed, the task was moved into a blacklisted cgroup." + % pid) + return True if process["stat"].is_bound_to_cpu(): if self._is_kthread(process): log.debug("Affinity of kernel thread with PID %d cannot be changed, the task's affinity mask is fixed." @@ -1168,13 +1177,17 @@ def _get_affinity(self, pid): return res def _set_affinity(self, pid, affinity): + process = procfs.process(pid) + if self._process_in_blacklisted_cgroup(process): + log.debug("Not setting CPU affinity of PID %d, the task belongs to a blacklisted cgroup." % pid) + return log.debug("Setting CPU affinity of PID %d to '%s'." % (pid, affinity)) try: self._scheduler_utils.set_affinity(pid, affinity) # Workaround for old python-schedutils (pre-0.4) which # incorrectly raised SystemError instead of OSError except (SystemError, OSError) as e: - if not self._ignore_set_affinity_error(pid): + if not self._ignore_set_affinity_error(process): log.error("Failed to set affinity of PID %d to '%s': %s" % (pid, affinity, e)) @@ -1191,9 +1204,6 @@ def _set_all_obj_affinity(self, objs, affinity, threads = False): if self._ps_blacklist != "": psl = [v for v in psl if re.search(self._ps_blacklist, self._get_stat_comm(v)) is None] - if self._cgroup_ps_blacklist_re != "": - psl = [v for v in psl if re.search(self._cgroup_ps_blacklist_re, - self._get_stat_cgroup(v)) is None] psd = dict([(v.pid, v) for v in psl]) for pid in psd: try: