Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Free htab element out of bucket lock #8374

Open
wants to merge 5 commits into
base: bpf-next_base
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 32 additions & 28 deletions kernel/bpf/hashtab.c
Original file line number Diff line number Diff line change
Expand Up @@ -824,13 +824,14 @@ static bool htab_lru_map_delete_node(void *arg, struct bpf_lru_node *node)
hlist_nulls_for_each_entry_rcu(l, n, head, hash_node)
if (l == tgt_l) {
hlist_nulls_del_rcu(&l->hash_node);
check_and_free_fields(htab, l);
bpf_map_dec_elem_count(&htab->map);
break;
}

htab_unlock_bucket(htab, b, tgt_l->hash, flags);

if (l == tgt_l)
check_and_free_fields(htab, l);
return l == tgt_l;
}

Expand Down Expand Up @@ -1634,41 +1635,44 @@ static int __htab_map_lookup_and_delete_elem(struct bpf_map *map, void *key,
l = lookup_elem_raw(head, hash, key, key_size);
if (!l) {
ret = -ENOENT;
} else {
if (is_percpu) {
u32 roundup_value_size = round_up(map->value_size, 8);
void __percpu *pptr;
int off = 0, cpu;
goto out_unlock;
}

pptr = htab_elem_get_ptr(l, key_size);
for_each_possible_cpu(cpu) {
copy_map_value_long(&htab->map, value + off, per_cpu_ptr(pptr, cpu));
check_and_init_map_value(&htab->map, value + off);
off += roundup_value_size;
}
} else {
u32 roundup_key_size = round_up(map->key_size, 8);
if (is_percpu) {
u32 roundup_value_size = round_up(map->value_size, 8);
void __percpu *pptr;
int off = 0, cpu;

if (flags & BPF_F_LOCK)
copy_map_value_locked(map, value, l->key +
roundup_key_size,
true);
else
copy_map_value(map, value, l->key +
roundup_key_size);
/* Zeroing special fields in the temp buffer */
check_and_init_map_value(map, value);
pptr = htab_elem_get_ptr(l, key_size);
for_each_possible_cpu(cpu) {
copy_map_value_long(&htab->map, value + off, per_cpu_ptr(pptr, cpu));
check_and_init_map_value(&htab->map, value + off);
off += roundup_value_size;
}
} else {
u32 roundup_key_size = round_up(map->key_size, 8);

hlist_nulls_del_rcu(&l->hash_node);
if (!is_lru_map)
free_htab_elem(htab, l);
if (flags & BPF_F_LOCK)
copy_map_value_locked(map, value, l->key +
roundup_key_size,
true);
else
copy_map_value(map, value, l->key +
roundup_key_size);
/* Zeroing special fields in the temp buffer */
check_and_init_map_value(map, value);
}
hlist_nulls_del_rcu(&l->hash_node);

out_unlock:
htab_unlock_bucket(htab, b, hash, bflags);

if (is_lru_map && l)
htab_lru_push_free(htab, l);
if (l) {
if (is_lru_map)
htab_lru_push_free(htab, l);
else
free_htab_elem(htab, l);
}

return ret;
}
Expand Down
18 changes: 16 additions & 2 deletions kernel/bpf/helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -1593,10 +1593,24 @@ void bpf_timer_cancel_and_free(void *val)
* To avoid these issues, punt to workqueue context when we are in a
* timer callback.
*/
if (this_cpu_read(hrtimer_running))
if (this_cpu_read(hrtimer_running)) {
queue_work(system_unbound_wq, &t->cb.delete_work);
else
return;
}

if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
/* If the timer is running on other CPU, also use a kworker to
* wait for the completion of the timer instead of trying to
* acquire a sleepable lock in hrtimer_cancel() to wait for its
* completion.
*/
if (hrtimer_try_to_cancel(&t->timer) >= 0)
kfree_rcu(t, cb.rcu);
else
queue_work(system_unbound_wq, &t->cb.delete_work);
} else {
bpf_timer_delete_work(&t->cb.delete_work);
}
}

/* This function is called by map_delete/update_elem for individual element and
Expand Down
165 changes: 165 additions & 0 deletions tools/testing/selftests/bpf/prog_tests/free_timer.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,165 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2025. Huawei Technologies Co., Ltd */
#define _GNU_SOURCE
#include <unistd.h>
#include <sys/syscall.h>
#include <test_progs.h>

#include "free_timer.skel.h"

struct run_ctx {
struct bpf_program *start_prog;
struct bpf_program *overwrite_prog;
pthread_barrier_t notify;
int loop;
bool start;
bool stop;
};

static void start_threads(struct run_ctx *ctx)
{
ctx->start = true;
}

static void stop_threads(struct run_ctx *ctx)
{
ctx->stop = true;
/* Guarantee the order between ->stop and ->start */
__atomic_store_n(&ctx->start, true, __ATOMIC_RELEASE);
}

static int wait_for_start(struct run_ctx *ctx)
{
while (!__atomic_load_n(&ctx->start, __ATOMIC_ACQUIRE))
usleep(10);

return ctx->stop;
}

static void *overwrite_timer_fn(void *arg)
{
struct run_ctx *ctx = arg;
int loop, fd, err;
cpu_set_t cpuset;
long ret = 0;

/* Pin on CPU 0 */
CPU_ZERO(&cpuset);
CPU_SET(0, &cpuset);
pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);

/* Is the thread being stopped ? */
err = wait_for_start(ctx);
if (err)
return NULL;

fd = bpf_program__fd(ctx->overwrite_prog);
loop = ctx->loop;
while (loop-- > 0) {
LIBBPF_OPTS(bpf_test_run_opts, opts);

/* Wait for start thread to complete */
pthread_barrier_wait(&ctx->notify);

/* Overwrite timers */
err = bpf_prog_test_run_opts(fd, &opts);
if (err)
ret |= 1;
else if (opts.retval)
ret |= 2;

/* Notify start thread to start timers */
pthread_barrier_wait(&ctx->notify);
}

return (void *)ret;
}

static void *start_timer_fn(void *arg)
{
struct run_ctx *ctx = arg;
int loop, fd, err;
cpu_set_t cpuset;
long ret = 0;

/* Pin on CPU 1 */
CPU_ZERO(&cpuset);
CPU_SET(1, &cpuset);
pthread_setaffinity_np(pthread_self(), sizeof(cpuset), &cpuset);

/* Is the thread being stopped ? */
err = wait_for_start(ctx);
if (err)
return NULL;

fd = bpf_program__fd(ctx->start_prog);
loop = ctx->loop;
while (loop-- > 0) {
LIBBPF_OPTS(bpf_test_run_opts, opts);

/* Run the prog to start timer */
err = bpf_prog_test_run_opts(fd, &opts);
if (err)
ret |= 4;
else if (opts.retval)
ret |= 8;

/* Notify overwrite thread to do overwrite */
pthread_barrier_wait(&ctx->notify);

/* Wait for overwrite thread to complete */
pthread_barrier_wait(&ctx->notify);
}

return (void *)ret;
}

void test_free_timer(void)
{
struct free_timer *skel;
struct bpf_program *prog;
struct run_ctx ctx;
pthread_t tid[2];
void *ret;
int err;

skel = free_timer__open_and_load();
if (!ASSERT_OK_PTR(skel, "open_load"))
return;

memset(&ctx, 0, sizeof(ctx));

prog = bpf_object__find_program_by_name(skel->obj, "start_timer");
if (!ASSERT_OK_PTR(prog, "find start prog"))
goto out;
ctx.start_prog = prog;

prog = bpf_object__find_program_by_name(skel->obj, "overwrite_timer");
if (!ASSERT_OK_PTR(prog, "find overwrite prog"))
goto out;
ctx.overwrite_prog = prog;

pthread_barrier_init(&ctx.notify, NULL, 2);
ctx.loop = 10;

err = pthread_create(&tid[0], NULL, start_timer_fn, &ctx);
if (!ASSERT_OK(err, "create start_timer"))
goto out;

err = pthread_create(&tid[1], NULL, overwrite_timer_fn, &ctx);
if (!ASSERT_OK(err, "create overwrite_timer")) {
stop_threads(&ctx);
goto out;
}

start_threads(&ctx);

ret = NULL;
err = pthread_join(tid[0], &ret);
ASSERT_EQ(err | (long)ret, 0, "start_timer");
ret = NULL;
err = pthread_join(tid[1], &ret);
ASSERT_EQ(err | (long)ret, 0, "overwrite_timer");
out:
free_timer__destroy(skel);
}
71 changes: 71 additions & 0 deletions tools/testing/selftests/bpf/progs/free_timer.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
// SPDX-License-Identifier: GPL-2.0
/* Copyright (C) 2025. Huawei Technologies Co., Ltd */
#include <linux/bpf.h>
#include <time.h>
#include <bpf/bpf_tracing.h>
#include <bpf/bpf_helpers.h>

#define MAX_ENTRIES 8

struct map_value {
struct bpf_timer timer;
};

struct {
__uint(type, BPF_MAP_TYPE_HASH);
__type(key, int);
__type(value, struct map_value);
__uint(max_entries, MAX_ENTRIES);
} map SEC(".maps");

static int timer_cb(void *map, void *key, struct map_value *value)
{
volatile int sum = 0;
int i;

bpf_for(i, 0, 1024 * 1024) sum += i;

return 0;
}

static int start_cb(int key)
{
struct map_value *value;

value = bpf_map_lookup_elem(&map, (void *)&key);
if (!value)
return 0;

bpf_timer_init(&value->timer, &map, CLOCK_MONOTONIC);
bpf_timer_set_callback(&value->timer, timer_cb);
/* Hope 100us will be enough to wake-up and run the overwrite thread */
bpf_timer_start(&value->timer, 100000, BPF_F_TIMER_CPU_PIN);

return 0;
}

static int overwrite_cb(int key)
{
struct map_value zero = {};

/* Free the timer which may run on other CPU */
bpf_map_update_elem(&map, (void *)&key, &zero, BPF_ANY);

return 0;
}

SEC("syscall")
int BPF_PROG(start_timer)
{
bpf_loop(MAX_ENTRIES, start_cb, NULL, 0);
return 0;
}

SEC("syscall")
int BPF_PROG(overwrite_timer)
{
bpf_loop(MAX_ENTRIES, overwrite_cb, NULL, 0);
return 0;
}

char _license[] SEC("license") = "GPL";
Loading