Skip to content

Commit

Permalink
blk-mq: move cpuhp callback registering out of q->sysfs_lock
Browse files Browse the repository at this point in the history
[ Upstream commit 22465bb ]

Registering and unregistering cpuhp callback requires global cpu hotplug lock,
which is used everywhere. Meantime q->sysfs_lock is used in block layer
almost everywhere.

It is easy to trigger lockdep warning[1] by connecting the two locks.

Fix the warning by moving blk-mq's cpuhp callback registering out of
q->sysfs_lock. Add one dedicated global lock for covering registering &
unregistering hctx's cpuhp, and it is safe to do so because hctx is
guaranteed to be live if our request_queue is live.

[1] https://lore.kernel.org/lkml/Z04pz3AlvI4o0Mr8@agluck-desk3/

Cc: Reinette Chatre <reinette.chatre@intel.com>
Cc: Fenghua Yu <fenghua.yu@intel.com>
Cc: Peter Newman <peternewman@google.com>
Cc: Babu Moger <babu.moger@amd.com>
Reported-by: Luck Tony <tony.luck@intel.com>
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Tested-by: Tony Luck <tony.luck@intel.com>
Link: https://lore.kernel.org/r/20241206111611.978870-3-ming.lei@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Stable-dep-of: be26ba9 ("block: Fix potential deadlock while freezing queue and acquiring sysfs_lock")
Signed-off-by: Sasha Levin <sashal@kernel.org>
  • Loading branch information
Ming Lei authored and gregkh committed Dec 19, 2024
1 parent 7ccd621 commit c1291ea
Showing 1 changed file with 92 additions and 6 deletions.
98 changes: 92 additions & 6 deletions block/blk-mq.c
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@

static DEFINE_PER_CPU(struct llist_head, blk_cpu_done);
static DEFINE_PER_CPU(call_single_data_t, blk_cpu_csd);
static DEFINE_MUTEX(blk_mq_cpuhp_lock);

static void blk_mq_insert_request(struct request *rq, blk_insert_t flags);
static void blk_mq_request_bypass_insert(struct request *rq,
Expand Down Expand Up @@ -3740,13 +3741,91 @@ static int blk_mq_hctx_notify_dead(unsigned int cpu, struct hlist_node *node)
return 0;
}

static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx)
static void __blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx)
{
if (!(hctx->flags & BLK_MQ_F_STACKING))
lockdep_assert_held(&blk_mq_cpuhp_lock);

if (!(hctx->flags & BLK_MQ_F_STACKING) &&
!hlist_unhashed(&hctx->cpuhp_online)) {
cpuhp_state_remove_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE,
&hctx->cpuhp_online);
cpuhp_state_remove_instance_nocalls(CPUHP_BLK_MQ_DEAD,
&hctx->cpuhp_dead);
INIT_HLIST_NODE(&hctx->cpuhp_online);
}

if (!hlist_unhashed(&hctx->cpuhp_dead)) {
cpuhp_state_remove_instance_nocalls(CPUHP_BLK_MQ_DEAD,
&hctx->cpuhp_dead);
INIT_HLIST_NODE(&hctx->cpuhp_dead);
}
}

static void blk_mq_remove_cpuhp(struct blk_mq_hw_ctx *hctx)
{
mutex_lock(&blk_mq_cpuhp_lock);
__blk_mq_remove_cpuhp(hctx);
mutex_unlock(&blk_mq_cpuhp_lock);
}

static void __blk_mq_add_cpuhp(struct blk_mq_hw_ctx *hctx)
{
lockdep_assert_held(&blk_mq_cpuhp_lock);

if (!(hctx->flags & BLK_MQ_F_STACKING) &&
hlist_unhashed(&hctx->cpuhp_online))
cpuhp_state_add_instance_nocalls(CPUHP_AP_BLK_MQ_ONLINE,
&hctx->cpuhp_online);

if (hlist_unhashed(&hctx->cpuhp_dead))
cpuhp_state_add_instance_nocalls(CPUHP_BLK_MQ_DEAD,
&hctx->cpuhp_dead);
}

static void __blk_mq_remove_cpuhp_list(struct list_head *head)
{
struct blk_mq_hw_ctx *hctx;

lockdep_assert_held(&blk_mq_cpuhp_lock);

list_for_each_entry(hctx, head, hctx_list)
__blk_mq_remove_cpuhp(hctx);
}

/*
* Unregister cpuhp callbacks from exited hw queues
*
* Safe to call if this `request_queue` is live
*/
static void blk_mq_remove_hw_queues_cpuhp(struct request_queue *q)
{
LIST_HEAD(hctx_list);

spin_lock(&q->unused_hctx_lock);
list_splice_init(&q->unused_hctx_list, &hctx_list);
spin_unlock(&q->unused_hctx_lock);

mutex_lock(&blk_mq_cpuhp_lock);
__blk_mq_remove_cpuhp_list(&hctx_list);
mutex_unlock(&blk_mq_cpuhp_lock);

spin_lock(&q->unused_hctx_lock);
list_splice(&hctx_list, &q->unused_hctx_list);
spin_unlock(&q->unused_hctx_lock);
}

/*
* Register cpuhp callbacks from all hw queues
*
* Safe to call if this `request_queue` is live
*/
static void blk_mq_add_hw_queues_cpuhp(struct request_queue *q)
{
struct blk_mq_hw_ctx *hctx;
unsigned long i;

mutex_lock(&blk_mq_cpuhp_lock);
queue_for_each_hw_ctx(q, hctx, i)
__blk_mq_add_cpuhp(hctx);
mutex_unlock(&blk_mq_cpuhp_lock);
}

/*
Expand Down Expand Up @@ -3797,8 +3876,6 @@ static void blk_mq_exit_hctx(struct request_queue *q,
if (set->ops->exit_hctx)
set->ops->exit_hctx(hctx, hctx_idx);

blk_mq_remove_cpuhp(hctx);

xa_erase(&q->hctx_table, hctx_idx);

spin_lock(&q->unused_hctx_lock);
Expand All @@ -3815,6 +3892,7 @@ static void blk_mq_exit_hw_queues(struct request_queue *q,
queue_for_each_hw_ctx(q, hctx, i) {
if (i == nr_queue)
break;
blk_mq_remove_cpuhp(hctx);
blk_mq_exit_hctx(q, set, hctx, i);
}
}
Expand Down Expand Up @@ -3878,6 +3956,8 @@ blk_mq_alloc_hctx(struct request_queue *q, struct blk_mq_tag_set *set,
INIT_DELAYED_WORK(&hctx->run_work, blk_mq_run_work_fn);
spin_lock_init(&hctx->lock);
INIT_LIST_HEAD(&hctx->dispatch);
INIT_HLIST_NODE(&hctx->cpuhp_dead);
INIT_HLIST_NODE(&hctx->cpuhp_online);
hctx->queue = q;
hctx->flags = set->flags & ~BLK_MQ_F_TAG_QUEUE_SHARED;

Expand Down Expand Up @@ -4416,6 +4496,12 @@ static void blk_mq_realloc_hw_ctxs(struct blk_mq_tag_set *set,
xa_for_each_start(&q->hctx_table, j, hctx, j)
blk_mq_exit_hctx(q, set, hctx, j);
mutex_unlock(&q->sysfs_lock);

/* unregister cpuhp callbacks for exited hctxs */
blk_mq_remove_hw_queues_cpuhp(q);

/* register cpuhp for new initialized hctxs */
blk_mq_add_hw_queues_cpuhp(q);
}

int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set,
Expand Down

0 comments on commit c1291ea

Please sign in to comment.