Skip to content

Commit

Permalink
core/uwsgi: stop using pthread_cancel()
Browse files Browse the repository at this point in the history
When working to reproduce unbit#2615 I saw many strange "defunct" (zombie)
workers.
The master called waitpid(-1, ...) but it return 0 even there are some
zombies.
Finally, master sends KILL signal (MERCY) and worker is restarted.

I believe this strange zombie was born from pthread_cancel. Subthreads
calls pthread_cancel() for main thread and it cause strange process.

pthread_cancel() is very hard to use and debug. I can not even attach the
strange zombie with gdb --pid. I think it is not maintainable.

In the end we can remove six_feet_under_lock and make wait_for_threads()
static.
  • Loading branch information
methane authored and xrmx committed Apr 7, 2024
1 parent f7856e5 commit 7610f52
Show file tree
Hide file tree
Showing 4 changed files with 6 additions and 65 deletions.
3 changes: 0 additions & 3 deletions core/loop.c
Original file line number Diff line number Diff line change
Expand Up @@ -81,9 +81,6 @@ void uwsgi_setup_thread_req(long core_id, struct wsgi_request *wsgi_req) {
int i;
sigset_t smask;


pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &i);
pthread_setcanceltype(PTHREAD_CANCEL_ASYNCHRONOUS, &i);
pthread_setspecific(uwsgi.tur_key, (void *) wsgi_req);

if (core_id > 0) {
Expand Down
21 changes: 0 additions & 21 deletions core/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -1034,12 +1034,6 @@ void uwsgi_destroy_request(struct wsgi_request *wsgi_req) {

close_and_free_request(wsgi_req);

int foo;
if (uwsgi.threads > 1) {
// now the thread can die...
pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &foo);
}

// reset for avoiding following requests to fail on non-uwsgi protocols
// thanks Marko Tiikkaja for catching it
wsgi_req->uh->_pktsize = 0;
Expand Down Expand Up @@ -1131,11 +1125,6 @@ void uwsgi_close_request(struct wsgi_request *wsgi_req) {
func(wsgi_req);
}

if (uwsgi.threads > 1) {
// now the thread can die...
pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &tmp_id);
}

// leave harakiri mode
if (uwsgi.workers[uwsgi.mywid].cores[wsgi_req->async_id].harakiri > 0) {
set_harakiri(wsgi_req, 0);
Expand Down Expand Up @@ -1583,18 +1572,12 @@ int wsgi_req_accept(int queue, struct wsgi_request *wsgi_req) {
}
}

// kill the thread after the request completion
if (uwsgi.threads > 1)
pthread_setcancelstate(PTHREAD_CANCEL_DISABLE, &ret);

if (uwsgi.signal_socket > -1 && (interesting_fd == uwsgi.signal_socket || interesting_fd == uwsgi.my_signal_socket)) {

thunder_unlock;

uwsgi_receive_signal(wsgi_req, interesting_fd, "worker", uwsgi.mywid);

if (uwsgi.threads > 1)
pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &ret);
return -1;
}

Expand All @@ -1605,8 +1588,6 @@ int wsgi_req_accept(int queue, struct wsgi_request *wsgi_req) {
wsgi_req->fd = wsgi_req->socket->proto_accept(wsgi_req, interesting_fd);
thunder_unlock;
if (wsgi_req->fd < 0) {
if (uwsgi.threads > 1)
pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &ret);
return -1;
}

Expand All @@ -1621,8 +1602,6 @@ int wsgi_req_accept(int queue, struct wsgi_request *wsgi_req) {
}

thunder_unlock;
if (uwsgi.threads > 1)
pthread_setcancelstate(PTHREAD_CANCEL_ENABLE, &ret);
return -1;
}

Expand Down
46 changes: 6 additions & 40 deletions core/uwsgi.c
Original file line number Diff line number Diff line change
Expand Up @@ -1226,30 +1226,17 @@ void warn_pipe() {
}
}

// in threading mode we need to use the cancel pthread subsystem
void wait_for_threads() {
// This function is called from signal handler or main thread to wait worker threads.
// `uwsgi.workers[uwsgi.mywid].manage_next_request` should be set to 0 to stop worker threads.
static void wait_for_threads() {
int i, ret;

// on some platform thread cancellation is REALLY flaky
// This option was added because we used pthread_cancel().
// thread cancellation is REALLY flaky
if (uwsgi.no_threads_wait) return;

int sudden_death = 0;

pthread_mutex_lock(&uwsgi.six_feet_under_lock);
for (i = 1; i < uwsgi.threads; i++) {
if (!pthread_equal(uwsgi.workers[uwsgi.mywid].cores[i].thread_id, pthread_self())) {
if (pthread_cancel(uwsgi.workers[uwsgi.mywid].cores[i].thread_id)) {
uwsgi_error("pthread_cancel()\n");
sudden_death = 1;
}
}
}

if (sudden_death)
goto end;

// wait for thread termination
for (i = 1; i < uwsgi.threads; i++) {
for (i = 0; i < uwsgi.threads; i++) {
if (!pthread_equal(uwsgi.workers[uwsgi.mywid].cores[i].thread_id, pthread_self())) {
ret = pthread_join(uwsgi.workers[uwsgi.mywid].cores[i].thread_id, NULL);
if (ret) {
Expand All @@ -1261,26 +1248,6 @@ void wait_for_threads() {
}
}
}

// cancel initial thread last since after pthread_cancel() and
// pthread_join() is called on it, the whole process will appear to be
// a zombie. although it won't eliminate process zombie time, but it
// should minimize it.
if (!pthread_equal(uwsgi.workers[uwsgi.mywid].cores[0].thread_id, pthread_self())) {
if (pthread_cancel(uwsgi.workers[uwsgi.mywid].cores[0].thread_id)) {
uwsgi_error("pthread_cancel() on initial thread\n");
goto end;
}

ret = pthread_join(uwsgi.workers[uwsgi.mywid].cores[0].thread_id, NULL);
if (ret) {
uwsgi_log("pthread_join() = %d on initial thread\n", ret);
}
}

end:

pthread_mutex_unlock(&uwsgi.six_feet_under_lock);
}


Expand Down Expand Up @@ -3641,7 +3608,6 @@ void uwsgi_worker_run() {

if (uwsgi.cores > 1) {
uwsgi.workers[uwsgi.mywid].cores[0].thread_id = pthread_self();
pthread_mutex_init(&uwsgi.six_feet_under_lock, NULL);
}

uwsgi_ignition();
Expand Down
1 change: 0 additions & 1 deletion uwsgi.h
Original file line number Diff line number Diff line change
Expand Up @@ -2526,7 +2526,6 @@ struct uwsgi_server {

// avoid thundering herd in threaded modes
pthread_mutex_t thunder_mutex;
pthread_mutex_t six_feet_under_lock;
pthread_mutex_t lock_static;

int use_thunder_lock;
Expand Down

0 comments on commit 7610f52

Please sign in to comment.