Skip to content

Commit 175ee25

Browse files
mrutland-armokta-10
authored andcommitted
rcu: Correctly handle sparse possible cpus
In many cases in the RCU tree code, we iterate over the set of cpus for a leaf node described by rcu_node::grplo and rcu_node::grphi, checking per-cpu data for each cpu in this range. However, if the set of possible cpus is sparse, some cpus described in this range are not possible, and thus no per-cpu region will have been allocated (or initialised) for them by the generic percpu code. Erroneous accesses to a per-cpu area for these !possible cpus may fault or may hit other data depending on the addressed generated when the erroneous per cpu offset is applied. In practice, both cases have been observed on arm64 hardware (the former being silent, but detectable with additional patches). To avoid issues resulting from this, we must iterate over the set of *possible* cpus for a given leaf node. This patch add a new helper, for_each_leaf_node_possible_cpu, to enable this. As iteration is often intertwined with rcu_node local bitmask manipulation, a new leaf_node_cpu_bit helper is added to make this simpler and more consistent. The RCU tree code is made to use both of these where appropriate. Without this patch, running reboot at a shell can result in an oops like: [ 3369.075979] Unable to handle kernel paging request at virtual address ffffff8008b21b4c [ 3369.083881] pgd = ffffffc3ecdda000 [ 3369.087270] [ffffff8008b21b4c] *pgd=00000083eca48003, *pud=00000083eca48003, *pmd=0000000000000000 [ 3369.096222] Internal error: Oops: 96000007 [#1] PREEMPT SMP [ 3369.101781] Modules linked in: [ 3369.104825] CPU: 2 PID: 1817 Comm: NetworkManager Tainted: G W 4.6.0+ #3 [ 3369.121239] task: ffffffc0fa13e000 ti: ffffffc3eb940000 task.ti: ffffffc3eb940000 [ 3369.128708] PC is at sync_rcu_exp_select_cpus+0x188/0x510 [ 3369.134094] LR is at sync_rcu_exp_select_cpus+0x104/0x510 [ 3369.139479] pc : [<ffffff80081109a8>] lr : [<ffffff8008110924>] pstate: 200001c5 [ 3369.146860] sp : ffffffc3eb9435a0 [ 3369.150162] x29: ffffffc3eb9435a0 x28: ffffff8008be4f88 [ 3369.155465] x27: ffffff8008b66c80 x26: ffffffc3eceb2600 [ 3369.160767] x25: 0000000000000001 x24: ffffff8008be4f88 [ 3369.166070] x23: ffffff8008b51c3c x22: ffffff8008b66c80 [ 3369.171371] x21: 0000000000000001 x20: ffffff8008b21b40 [ 3369.176673] x19: ffffff8008b66c80 x18: 0000000000000000 [ 3369.181975] x17: 0000007fa951a010 x16: ffffff80086a30f0 [ 3369.187278] x15: 0000007fa9505590 x14: 0000000000000000 [ 3369.192580] x13: ffffff8008b51000 x12: ffffffc3eb940000 [ 3369.197882] x11: 0000000000000006 x10: ffffff8008b51b78 [ 3369.203184] x9 : 0000000000000001 x8 : ffffff8008be4000 [ 3369.208486] x7 : ffffff8008b21b40 x6 : 0000000000001003 [ 3369.213788] x5 : 0000000000000000 x4 : ffffff8008b27280 [ 3369.219090] x3 : ffffff8008b21b4c x2 : 0000000000000001 [ 3369.224406] x1 : 0000000000000001 x0 : 0000000000000140 ... [ 3369.972257] [<ffffff80081109a8>] sync_rcu_exp_select_cpus+0x188/0x510 [ 3369.978685] [<ffffff80081128b4>] synchronize_rcu_expedited+0x64/0xa8 [ 3369.985026] [<ffffff80086b987c>] synchronize_net+0x24/0x30 [ 3369.990499] [<ffffff80086ddb54>] dev_deactivate_many+0x28c/0x298 [ 3369.996493] [<ffffff80086b6bb8>] __dev_close_many+0x60/0xd0 [ 3370.002052] [<ffffff80086b6d48>] __dev_close+0x28/0x40 [ 3370.007178] [<ffffff80086bf62c>] __dev_change_flags+0x8c/0x158 [ 3370.012999] [<ffffff80086bf718>] dev_change_flags+0x20/0x60 [ 3370.018558] [<ffffff80086cf7f0>] do_setlink+0x288/0x918 [ 3370.023771] [<ffffff80086d0798>] rtnl_newlink+0x398/0x6a8 [ 3370.029158] [<ffffff80086cee84>] rtnetlink_rcv_msg+0xe4/0x220 [ 3370.034891] [<ffffff80086e274c>] netlink_rcv_skb+0xc4/0xf8 [ 3370.040364] [<ffffff80086ced8c>] rtnetlink_rcv+0x2c/0x40 [ 3370.045663] [<ffffff80086e1fe8>] netlink_unicast+0x160/0x238 [ 3370.051309] [<ffffff80086e24b8>] netlink_sendmsg+0x2f0/0x358 [ 3370.056956] [<ffffff80086a0070>] sock_sendmsg+0x18/0x30 [ 3370.062168] [<ffffff80086a21cc>] ___sys_sendmsg+0x26c/0x280 [ 3370.067728] [<ffffff80086a30ac>] __sys_sendmsg+0x44/0x88 [ 3370.073027] [<ffffff80086a3100>] SyS_sendmsg+0x10/0x20 [ 3370.078153] [<ffffff8008085e70>] el0_svc_naked+0x24/0x28 Signed-off-by: Mark Rutland <mark.rutland@arm.com> Reported-by: Dennis Chen <dennis.chen@arm.com> Cc: Catalin Marinas <catalin.marinas@arm.com> Cc: Josh Triplett <josh@joshtriplett.org> Cc: Lai Jiangshan <jiangshanlai@gmail.com> Cc: Mathieu Desnoyers <mathieu.desnoyers@efficios.com> Cc: Steve Capper <steve.capper@arm.com> Cc: Steven Rostedt <rostedt@goodmis.org> Cc: Will Deacon <will.deacon@arm.com> Cc: linux-kernel@vger.kernel.org Signed-off-by: Paul E. McKenney <paulmck@linux.vnet.ibm.com> Signed-off-by: celtare21 <celtare21@gmail.com> Signed-off-by: Oktapra Amtono <oktapra.amtono@gmail.com>
1 parent 4c4a937 commit 175ee25

File tree

4 files changed

+34
-23
lines changed

4 files changed

+34
-23
lines changed

kernel/rcu/tree.c

Lines changed: 9 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -1272,9 +1272,9 @@ static void rcu_dump_cpu_stacks(struct rcu_state *rsp)
12721272
rcu_for_each_leaf_node(rsp, rnp) {
12731273
raw_spin_lock_irqsave_rcu_node(rnp, flags);
12741274
if (rnp->qsmask != 0) {
1275-
for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
1276-
if (rnp->qsmask & (1UL << cpu))
1277-
dump_cpu_task(rnp->grplo + cpu);
1275+
for_each_leaf_node_possible_cpu(rnp, cpu)
1276+
if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu))
1277+
dump_cpu_task(cpu);
12781278
}
12791279
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
12801280
}
@@ -1345,10 +1345,9 @@ static void print_other_cpu_stall(struct rcu_state *rsp, unsigned long gpnum)
13451345
raw_spin_lock_irqsave_rcu_node(rnp, flags);
13461346
ndetected += rcu_print_task_stall(rnp);
13471347
if (rnp->qsmask != 0) {
1348-
for (cpu = 0; cpu <= rnp->grphi - rnp->grplo; cpu++)
1349-
if (rnp->qsmask & (1UL << cpu)) {
1350-
print_cpu_stall_info(rsp,
1351-
rnp->grplo + cpu);
1348+
for_each_leaf_node_possible_cpu(rnp, cpu)
1349+
if (rnp->qsmask & leaf_node_cpu_bit(rnp, cpu)) {
1350+
print_cpu_stall_info(rsp, cpu);
13521351
ndetected++;
13531352
}
13541353
}
@@ -2789,7 +2788,6 @@ static void force_qs_rnp(struct rcu_state *rsp,
27892788
unsigned long *maxj),
27902789
bool *isidle, unsigned long *maxj)
27912790
{
2792-
unsigned long bit;
27932791
int cpu;
27942792
unsigned long flags;
27952793
unsigned long mask;
@@ -2824,9 +2822,8 @@ static void force_qs_rnp(struct rcu_state *rsp,
28242822
continue;
28252823
}
28262824
}
2827-
cpu = rnp->grplo;
2828-
bit = 1;
2829-
for (; cpu <= rnp->grphi; cpu++, bit <<= 1) {
2825+
for_each_leaf_node_possible_cpu(rnp, cpu) {
2826+
unsigned long bit = leaf_node_cpu_bit(rnp, cpu);
28302827
if ((rnp->qsmask & bit) != 0) {
28312828
if (f(per_cpu_ptr(rsp->rda, cpu), isidle, maxj))
28322829
mask |= bit;
@@ -3654,7 +3651,7 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)
36543651

36553652
/* Set up local state, ensuring consistent view of global state. */
36563653
raw_spin_lock_irqsave_rcu_node(rnp, flags);
3657-
rdp->grpmask = 1UL << (cpu - rdp->mynode->grplo);
3654+
rdp->grpmask = leaf_node_cpu_bit(rdp->mynode, cpu);
36583655
rdp->dynticks = &per_cpu(rcu_dynticks, cpu);
36593656
WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_EXIT_IDLE);
36603657
WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);

kernel/rcu/tree.h

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,13 @@ struct rcu_node {
254254
wait_queue_head_t exp_wq[4];
255255
} ____cacheline_internodealigned_in_smp;
256256

257+
/*
258+
* Bitmasks in an rcu_node cover the interval [grplo, grphi] of CPU IDs, and
259+
* are indexed relative to this interval rather than the global CPU ID space.
260+
* This generates the bit for a CPU in node-local masks.
261+
*/
262+
#define leaf_node_cpu_bit(rnp, cpu) (1UL << ((cpu) - (rnp)->grplo))
263+
257264
/*
258265
* Do a full breadth-first scan of the rcu_node structures for the
259266
* specified rcu_state structure.
@@ -281,6 +288,14 @@ struct rcu_node {
281288
for ((rnp) = (rsp)->level[rcu_num_lvls - 1]; \
282289
(rnp) < &(rsp)->node[rcu_num_nodes]; (rnp)++)
283290

291+
/*
292+
* Iterate over all possible CPUs in a leaf RCU node.
293+
*/
294+
#define for_each_leaf_node_possible_cpu(rnp, cpu) \
295+
for ((cpu) = cpumask_next(rnp->grplo - 1, cpu_possible_mask); \
296+
cpu <= rnp->grphi; \
297+
cpu = cpumask_next((cpu), cpu_possible_mask))
298+
284299
/*
285300
* Union to allow "aggregate OR" operation on the need for a quiescent
286301
* state by the normal and expedited grace periods.

kernel/rcu/tree_exp.h

Lines changed: 7 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -344,7 +344,6 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
344344
{
345345
int cpu;
346346
unsigned long flags;
347-
unsigned long mask;
348347
unsigned long mask_ofl_test;
349348
unsigned long mask_ofl_ipi;
350349
int ret;
@@ -356,7 +355,7 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
356355

357356
/* Each pass checks a CPU for identity, offline, and idle. */
358357
mask_ofl_test = 0;
359-
for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++) {
358+
for_each_leaf_node_possible_cpu(rnp, cpu) {
360359
struct rcu_data *rdp = per_cpu_ptr(rsp->rda, cpu);
361360
struct rcu_dynticks *rdtp = &per_cpu(rcu_dynticks, cpu);
362361

@@ -376,8 +375,8 @@ static void sync_rcu_exp_select_cpus(struct rcu_state *rsp,
376375
raw_spin_unlock_irqrestore_rcu_node(rnp, flags);
377376

378377
/* IPI the remaining CPUs for expedited quiescent state. */
379-
mask = 1;
380-
for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) {
378+
for_each_leaf_node_possible_cpu(rnp, cpu) {
379+
unsigned long mask = leaf_node_cpu_bit(rnp, cpu);
381380
if (!(mask_ofl_ipi & mask))
382381
continue;
383382
retry_ipi:
@@ -440,10 +439,10 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
440439
ndetected = 0;
441440
rcu_for_each_leaf_node(rsp, rnp) {
442441
ndetected += rcu_print_task_exp_stall(rnp);
443-
mask = 1;
444-
for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) {
442+
for_each_leaf_node_possible_cpu(rnp, cpu) {
445443
struct rcu_data *rdp;
446444

445+
mask = leaf_node_cpu_bit(rnp, cpu);
447446
if (!(rnp->expmask & mask))
448447
continue;
449448
ndetected++;
@@ -453,7 +452,6 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
453452
"o."[!!(rdp->grpmask & rnp->expmaskinit)],
454453
"N."[!!(rdp->grpmask & rnp->expmaskinitnext)]);
455454
}
456-
mask <<= 1;
457455
}
458456
pr_cont(" } %lu jiffies s: %lu root: %#lx/%c\n",
459457
jiffies - jiffies_start, rsp->expedited_sequence,
@@ -473,8 +471,8 @@ static void synchronize_sched_expedited_wait(struct rcu_state *rsp)
473471
pr_cont("\n");
474472
}
475473
rcu_for_each_leaf_node(rsp, rnp) {
476-
mask = 1;
477-
for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask <<= 1) {
474+
for_each_leaf_node_possible_cpu(rnp, cpu) {
475+
mask = leaf_node_cpu_bit(rnp, cpu);
478476
if (!(rnp->expmask & mask))
479477
continue;
480478
dump_cpu_task(cpu);

kernel/rcu/tree_plugin.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1252,8 +1252,9 @@ static void rcu_boost_kthread_setaffinity(struct rcu_node *rnp, int outgoingcpu)
12521252
return;
12531253
if (!zalloc_cpumask_var(&cm, GFP_KERNEL))
12541254
return;
1255-
for (cpu = rnp->grplo; cpu <= rnp->grphi; cpu++, mask >>= 1)
1256-
if ((mask & 0x1) && cpu != outgoingcpu)
1255+
for_each_leaf_node_possible_cpu(rnp, cpu)
1256+
if ((mask & leaf_node_cpu_bit(rnp, cpu)) &&
1257+
cpu != outgoingcpu)
12571258
cpumask_set_cpu(cpu, cm);
12581259
if (cpumask_weight(cm) == 0)
12591260
cpumask_setall(cm);

0 commit comments

Comments
 (0)