Skip to content

Commit b40140e

Browse files
authored
Improve redbean concurrency (#1332)
In the course of playing with redbean I was confused about how the state was behaving and then noticed that some stuff is maybe getting edited by multiple processes. I tried to improve things by changing the definition of the counter variables to be explicitly atomic. Claude assures me that most modern Unixes support cross-process atomics, so I just went with it on that front. I also added some mutexes to the shared state to try to synchronize some other things that might get written or read from workers but couldn't be made atomic, mainly the rusage and time values. I could've probably been less granular and just had a global shared-state lock, but I opted to be fairly granular as a starting point. This also reorders the resetting of the lastmeltdown timespec before the SIGUSR2 signal is sent; hopefully this is okay.
1 parent 3142758 commit b40140e

File tree

1 file changed

+67
-29
lines changed

1 file changed

+67
-29
lines changed

tool/net/redbean.c

Lines changed: 67 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -181,12 +181,8 @@ __static_yoink("blink_xnu_aarch64"); // is apple silicon
181181
#define HeaderLength(H) (cpm.msg.headers[H].b - cpm.msg.headers[H].a)
182182
#define HeaderEqualCase(H, S) \
183183
SlicesEqualCase(S, strlen(S), HeaderData(H), HeaderLength(H))
184-
#define LockInc(P) \
185-
atomic_fetch_add_explicit((_Atomic(typeof(*(P))) *)(P), +1, \
186-
memory_order_relaxed)
187-
#define LockDec(P) \
188-
atomic_fetch_add_explicit((_Atomic(typeof(*(P))) *)(P), -1, \
189-
memory_order_relaxed)
184+
#define LockInc(P) atomic_fetch_add_explicit(P, +1, memory_order_relaxed)
185+
#define LockDec(P) atomic_fetch_add_explicit(P, -1, memory_order_relaxed)
190186

191187
#define TRACE_BEGIN \
192188
do { \
@@ -377,19 +373,21 @@ struct Blackhole {
377373
} blackhole;
378374

379375
static struct Shared {
380-
int workers;
381-
struct timespec nowish;
382-
struct timespec lastreindex;
376+
_Atomic(int) workers;
383377
struct timespec lastmeltdown;
378+
struct timespec nowish;
384379
char currentdate[32];
385380
struct rusage server;
386381
struct rusage children;
387382
struct Counters {
388-
#define C(x) long x;
383+
#define C(x) _Atomic(long) x;
389384
#include "tool/net/counters.inc"
390385
#undef C
391386
} c;
392-
pthread_spinlock_t montermlock;
387+
pthread_mutex_t datetime_mu;
388+
pthread_mutex_t server_mu;
389+
pthread_mutex_t children_mu;
390+
pthread_mutex_t lastmeltdown_mu;
393391
} *shared;
394392

395393
static const char kCounterNames[] =
@@ -1350,8 +1348,8 @@ static void CallSimpleHookIfDefined(const char *s) {
13501348
}
13511349

13521350
static void ReportWorkerExit(int pid, int ws) {
1353-
int workers;
1354-
workers = atomic_fetch_sub((_Atomic(int) *)&shared->workers, 1) - 1;
1351+
int workers =
1352+
atomic_fetch_sub_explicit(&shared->workers, 1, memory_order_release);
13551353
if (WIFEXITED(ws)) {
13561354
if (WEXITSTATUS(ws)) {
13571355
LockInc(&shared->c.failedchildren);
@@ -1383,7 +1381,9 @@ static void ReportWorkerResources(int pid, struct rusage *ru) {
13831381

13841382
static void HandleWorkerExit(int pid, int ws, struct rusage *ru) {
13851383
LockInc(&shared->c.connectionshandled);
1384+
unassert(!pthread_mutex_lock(&shared->children_mu));
13861385
rusage_add(&shared->children, ru);
1386+
unassert(!pthread_mutex_unlock(&shared->children_mu));
13871387
ReportWorkerExit(pid, ws);
13881388
ReportWorkerResources(pid, ru);
13891389
if (hasonprocessdestroy) {
@@ -2129,9 +2129,11 @@ static void UpdateCurrentDate(struct timespec now) {
21292129
int64_t t;
21302130
struct tm tm;
21312131
t = now.tv_sec;
2132-
shared->nowish = now;
21332132
gmtime_r(&t, &tm);
2133+
unassert(!pthread_mutex_lock(&shared->datetime_mu));
2134+
shared->nowish = now;
21342135
FormatHttpDateTime(shared->currentdate, &tm);
2136+
unassert(!pthread_mutex_unlock(&shared->datetime_mu));
21352137
}
21362138

21372139
static int64_t GetGmtOffset(int64_t t) {
@@ -2364,7 +2366,10 @@ static char *AppendCache(char *p, int64_t seconds, char *directive) {
23642366
p = stpcpy(p, directive);
23652367
}
23662368
p = AppendCrlf(p);
2367-
return AppendExpires(p, shared->nowish.tv_sec + seconds);
2369+
unassert(!pthread_mutex_lock(&shared->datetime_mu));
2370+
long nowish_sec = shared->nowish.tv_sec;
2371+
unassert(!pthread_mutex_unlock(&shared->datetime_mu));
2372+
return AppendExpires(p, nowish_sec + seconds);
23682373
}
23692374

23702375
static inline char *AppendContentLength(char *p, size_t n) {
@@ -3103,9 +3108,12 @@ td { padding-right: 3em; }\r\n\
31033108
<td valign=\"top\">\r\n\
31043109
<a href=\"/statusz\">/statusz</a>\r\n\
31053110
");
3106-
if (shared->c.connectionshandled) {
3111+
if (atomic_load_explicit(&shared->c.connectionshandled,
3112+
memory_order_acquire)) {
31073113
appends(&cpm.outbuf, "says your redbean<br>\r\n");
3114+
unassert(!pthread_mutex_lock(&shared->children_mu));
31083115
AppendResourceReport(&cpm.outbuf, &shared->children, "<br>\r\n");
3116+
unassert(!pthread_mutex_unlock(&shared->children_mu));
31093117
}
31103118
appends(&cpm.outbuf, "<td valign=\"top\">\r\n");
31113119
and = "";
@@ -3127,12 +3135,12 @@ td { padding-right: 3em; }\r\n\
31273135
}
31283136
appendf(&cpm.outbuf, "%s%,ld second%s of operation<br>\r\n", and, y.rem,
31293137
y.rem == 1 ? "" : "s");
3130-
x = shared->c.messageshandled;
3138+
x = atomic_load_explicit(&shared->c.messageshandled, memory_order_relaxed);
31313139
appendf(&cpm.outbuf, "%,ld message%s handled<br>\r\n", x, x == 1 ? "" : "s");
3132-
x = shared->c.connectionshandled;
3140+
x = atomic_load_explicit(&shared->c.connectionshandled, memory_order_relaxed);
31333141
appendf(&cpm.outbuf, "%,ld connection%s handled<br>\r\n", x,
31343142
x == 1 ? "" : "s");
3135-
x = shared->workers;
3143+
x = atomic_load_explicit(&shared->workers, memory_order_relaxed);
31363144
appendf(&cpm.outbuf, "%,ld connection%s active<br>\r\n", x,
31373145
x == 1 ? "" : "s");
31383146
appends(&cpm.outbuf, "</table>\r\n");
@@ -3184,11 +3192,11 @@ static void AppendRusage(const char *a, struct rusage *ru) {
31843192
}
31853193

31863194
static void ServeCounters(void) {
3187-
const long *c;
3195+
const _Atomic(long) *c;
31883196
const char *s;
3189-
for (c = (const long *)&shared->c, s = kCounterNames; *s;
3197+
for (c = (const _Atomic(long) *)&shared->c, s = kCounterNames; *s;
31903198
++c, s += strlen(s) + 1) {
3191-
AppendLong1(s, *c);
3199+
AppendLong1(s, atomic_load_explicit(c, memory_order_relaxed));
31923200
}
31933201
}
31943202

@@ -3201,21 +3209,30 @@ static char *ServeStatusz(void) {
32013209
AppendLong1("pid", getpid());
32023210
AppendLong1("ppid", getppid());
32033211
AppendLong1("now", timespec_real().tv_sec);
3212+
unassert(!pthread_mutex_lock(&shared->datetime_mu));
32043213
AppendLong1("nowish", shared->nowish.tv_sec);
3214+
unassert(!pthread_mutex_unlock(&shared->datetime_mu));
32053215
AppendLong1("gmtoff", gmtoff);
32063216
AppendLong1("CLK_TCK", CLK_TCK);
32073217
AppendLong1("startserver", startserver.tv_sec);
3218+
unassert(!pthread_mutex_lock(&shared->lastmeltdown_mu));
32083219
AppendLong1("lastmeltdown", shared->lastmeltdown.tv_sec);
3209-
AppendLong1("workers", shared->workers);
3220+
unassert(!pthread_mutex_unlock(&shared->lastmeltdown_mu));
3221+
AppendLong1("workers",
3222+
atomic_load_explicit(&shared->workers, memory_order_relaxed));
32103223
AppendLong1("assets.n", assets.n);
32113224
#ifndef STATIC
32123225
lua_State *L = GL;
32133226
AppendLong1("lua.memory",
32143227
lua_gc(L, LUA_GCCOUNT) * 1024 + lua_gc(L, LUA_GCCOUNTB));
32153228
#endif
32163229
ServeCounters();
3230+
unassert(!pthread_mutex_lock(&shared->server_mu));
32173231
AppendRusage("server", &shared->server);
3232+
unassert(!pthread_mutex_unlock(&shared->server_mu));
3233+
unassert(!pthread_mutex_lock(&shared->children_mu));
32183234
AppendRusage("children", &shared->children);
3235+
unassert(!pthread_mutex_unlock(&shared->children_mu));
32193236
p = SetStatus(200, "OK");
32203237
p = AppendContentType(p, "text/plain");
32213238
if (cpm.msg.version >= 11) {
@@ -3980,7 +3997,9 @@ static int LuaNilTlsError(lua_State *L, const char *s, int r) {
39803997
#include "tool/net/fetch.inc"
39813998

39823999
static int LuaGetDate(lua_State *L) {
4000+
unassert(!pthread_mutex_lock(&shared->datetime_mu));
39834001
lua_pushinteger(L, shared->nowish.tv_sec);
4002+
unassert(!pthread_mutex_unlock(&shared->datetime_mu));
39844003
return 1;
39854004
}
39864005

@@ -5034,7 +5053,7 @@ static int LuaProgramTokenBucket(lua_State *L) {
50345053
npassert(pid != -1);
50355054
if (!pid)
50365055
Replenisher();
5037-
++shared->workers;
5056+
atomic_fetch_add_explicit(&shared->workers, 1, memory_order_acquire);
50385057
return 0;
50395058
}
50405059

@@ -5679,7 +5698,8 @@ static void LogClose(const char *reason) {
56795698
if (amtread || meltdown || killed) {
56805699
LockInc(&shared->c.fumbles);
56815700
INFOF("(stat) %s %s with %,ld unprocessed and %,d handled (%,d workers)",
5682-
DescribeClient(), reason, amtread, messageshandled, shared->workers);
5701+
DescribeClient(), reason, amtread, messageshandled,
5702+
atomic_load_explicit(&shared->workers, memory_order_relaxed));
56835703
} else {
56845704
DEBUGF("(stat) %s %s with %,d messages handled", DescribeClient(), reason,
56855705
messageshandled);
@@ -5737,14 +5757,18 @@ Content-Length: 22\r\n\
57375757
}
57385758

57395759
static void EnterMeltdownMode(void) {
5760+
unassert(!pthread_mutex_lock(&shared->lastmeltdown_mu));
57405761
if (timespec_cmp(timespec_sub(timespec_real(), shared->lastmeltdown),
57415762
(struct timespec){1}) < 0) {
5763+
unassert(!pthread_mutex_unlock(&shared->lastmeltdown_mu));
57425764
return;
57435765
}
5744-
WARNF("(srvr) server is melting down (%,d workers)", shared->workers);
5745-
LOGIFNEG1(kill(0, SIGUSR2));
57465766
shared->lastmeltdown = timespec_real();
5747-
++shared->c.meltdowns;
5767+
pthread_mutex_unlock(&shared->lastmeltdown_mu);
5768+
WARNF("(srvr) server is melting down (%,d workers)",
5769+
atomic_load_explicit(&shared->workers, memory_order_relaxed));
5770+
LOGIFNEG1(kill(0, SIGUSR2));
5771+
LockInc(&shared->c.meltdowns);
57485772
}
57495773

57505774
static char *HandlePayloadDisconnect(void) {
@@ -5861,7 +5885,9 @@ static void HandleHeartbeat(void) {
58615885
size_t i;
58625886
UpdateCurrentDate(timespec_real());
58635887
Reindex();
5888+
unassert(!pthread_mutex_lock(&shared->server_mu));
58645889
getrusage(RUSAGE_SELF, &shared->server);
5890+
unassert(!pthread_mutex_unlock(&shared->server_mu));
58655891
#ifndef STATIC
58665892
CallSimpleHookIfDefined("OnServerHeartbeat");
58675893
CollectGarbage();
@@ -6481,7 +6507,9 @@ static bool HandleMessageActual(void) {
64816507
DEBUGF("(clnt) could not synchronize message stream");
64826508
}
64836509
if (cpm.msg.version >= 10) {
6510+
unassert(!pthread_mutex_lock(&shared->datetime_mu));
64846511
p = AppendCrlf(stpcpy(stpcpy(p, "Date: "), shared->currentdate));
6512+
unassert(!pthread_mutex_unlock(&shared->datetime_mu));
64856513
if (!cpm.branded)
64866514
p = stpcpy(p, serverheader);
64876515
if (extrahdrs)
@@ -6751,7 +6779,9 @@ static int HandleConnection(size_t i) {
67516779
DEBUGF("(token) can't acquire accept() token for client");
67526780
}
67536781
startconnection = timespec_real();
6754-
if (UNLIKELY(maxworkers) && shared->workers >= maxworkers) {
6782+
if (UNLIKELY(maxworkers) &&
6783+
atomic_load_explicit(&shared->workers, memory_order_relaxed) >=
6784+
maxworkers) {
67556785
EnterMeltdownMode();
67566786
SendServiceUnavailable();
67576787
close(client);
@@ -7346,6 +7376,14 @@ void RedBean(int argc, char *argv[]) {
73467376
(shared = mmap(NULL, ROUNDUP(sizeof(struct Shared), getgransize()),
73477377
PROT_READ | PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS,
73487378
-1, 0)));
7379+
pthread_mutexattr_t attr;
7380+
unassert(!pthread_mutexattr_init(&attr));
7381+
unassert(!pthread_mutexattr_setpshared(&attr, PTHREAD_PROCESS_SHARED));
7382+
unassert(!pthread_mutex_init(&shared->datetime_mu, &attr));
7383+
unassert(!pthread_mutex_init(&shared->server_mu, &attr));
7384+
unassert(!pthread_mutex_init(&shared->children_mu, &attr));
7385+
unassert(!pthread_mutex_init(&shared->lastmeltdown_mu, &attr));
7386+
unassert(!pthread_mutexattr_destroy(&attr));
73497387
if (daemonize) {
73507388
for (int i = 0; i < 256; ++i) {
73517389
close(i);

0 commit comments

Comments
 (0)