From 0e51f033498426d622f6d0a0058927e43618f341 Mon Sep 17 00:00:00 2001 From: Raimo Niskanen Date: Tue, 19 Nov 2024 18:45:10 +0100 Subject: [PATCH 1/6] Implement option `{otp,select_read}` --- erts/emulator/nifs/common/prim_socket_int.h | 1 + erts/emulator/nifs/common/prim_socket_nif.c | 74 +++++ erts/emulator/nifs/unix/unix_socket_syncio.c | 284 ++++++++++--------- erts/preloaded/ebin/prim_socket.beam | Bin 15644 -> 15672 bytes erts/preloaded/src/prim_socket.erl | 2 + lib/kernel/src/inet_epmd_socket.erl | 4 +- lib/kernel/src/socket.erl | 112 +++----- 7 files changed, 269 insertions(+), 208 deletions(-) diff --git a/erts/emulator/nifs/common/prim_socket_int.h b/erts/emulator/nifs/common/prim_socket_int.h index fede46d7b8ae..1343b7ff285a 100644 --- a/erts/emulator/nifs/common/prim_socket_int.h +++ b/erts/emulator/nifs/common/prim_socket_int.h @@ -543,6 +543,7 @@ typedef struct { SOCKET sock; SOCKET origFD; // A 'socket' created from this FD BOOLEAN_T closeOnClose; // Have we dup'ed or not + BOOLEAN_T selectRead; // Try to have read select active /* +++ The dbg flag for SSDBG +++ */ BOOLEAN_T dbg; BOOLEAN_T useReg; diff --git a/erts/emulator/nifs/common/prim_socket_nif.c b/erts/emulator/nifs/common/prim_socket_nif.c index 3fe99af5f71b..76f1eb6cfb4d 100644 --- a/erts/emulator/nifs/common/prim_socket_nif.c +++ b/erts/emulator/nifs/common/prim_socket_nif.c @@ -866,6 +866,7 @@ const int esock_ioctl_flags_length = NUM(esock_ioctl_flags); #define ESOCK_OPT_OTP_FD 1008 #define ESOCK_OPT_OTP_META 1009 #define ESOCK_OPT_OTP_USE_REGISTRY 1010 +#define ESOCK_OPT_OTP_SELECT_READ 1011 /**/ #define ESOCK_OPT_OTP_DOMAIN 1999 // INTERNAL AND ONLY GET #if 0 @@ -1228,6 +1229,7 @@ static ERL_NIF_TERM esock_setopt_otp(ErlNifEnv* env, /* *** esock_setopt_otp_debug *** * *** esock_setopt_otp_iow *** * *** esock_setopt_otp_ctrl_proc *** + * *** esock_setopt_otp_select_read *** * *** esock_setopt_otp_rcvbuf *** * *** esock_setopt_otp_rcvctrlbuf *** * *** esock_setopt_otp_sndctrlbuf *** @@ -1238,6 +1240,7 @@ static ERL_NIF_TERM esock_setopt_otp(ErlNifEnv* env, ESOCK_SETOPT_OTP_FUNC_DEF(debug); \ ESOCK_SETOPT_OTP_FUNC_DEF(iow); \ ESOCK_SETOPT_OTP_FUNC_DEF(ctrl_proc); \ + ESOCK_SETOPT_OTP_FUNC_DEF(select_read); \ ESOCK_SETOPT_OTP_FUNC_DEF(rcvbuf); \ ESOCK_SETOPT_OTP_FUNC_DEF(rcvctrlbuf); \ ESOCK_SETOPT_OTP_FUNC_DEF(sndctrlbuf); \ @@ -1256,6 +1259,7 @@ static ERL_NIF_TERM esock_getopt_otp(ErlNifEnv* env, /* *** esock_getopt_otp_debug *** * *** esock_getopt_otp_iow *** * *** esock_getopt_otp_ctrl_proc *** + * *** esock_getopt_otp_select_read *** * *** esock_getopt_otp_rcvbuf *** * *** esock_getopt_otp_rcvctrlbuf *** * *** esock_getopt_otp_sndctrlbuf *** @@ -1271,6 +1275,7 @@ static ERL_NIF_TERM esock_getopt_otp(ErlNifEnv* env, ESOCK_GETOPT_OTP_FUNC_DEF(debug); \ ESOCK_GETOPT_OTP_FUNC_DEF(iow); \ ESOCK_GETOPT_OTP_FUNC_DEF(ctrl_proc); \ + ESOCK_GETOPT_OTP_FUNC_DEF(select_read); \ ESOCK_GETOPT_OTP_FUNC_DEF(rcvbuf); \ ESOCK_GETOPT_OTP_FUNC_DEF(rcvctrlbuf); \ ESOCK_GETOPT_OTP_FUNC_DEF(sndctrlbuf); \ @@ -6779,6 +6784,12 @@ ERL_NIF_TERM esock_setopt_otp(ErlNifEnv* env, MUNLOCK(descP->readMtx); break; + case ESOCK_OPT_OTP_SELECT_READ: + MLOCK(descP->readMtx); + result = esock_setopt_otp_select_read(env, descP, eVal); + MUNLOCK(descP->readMtx); + break; + case ESOCK_OPT_OTP_RCVBUF: MLOCK(descP->readMtx); result = esock_setopt_otp_rcvbuf(env, descP, eVal); @@ -6888,6 +6899,34 @@ ERL_NIF_TERM esock_setopt_otp_iow(ErlNifEnv* env, +/* esock_setopt_otp_select_read - Handle the OTP (level) select_read option + */ + +static +ERL_NIF_TERM esock_setopt_otp_select_read(ErlNifEnv* env, + ESockDescriptor* descP, + ERL_NIF_TERM eVal) +{ + if (! IS_OPEN(descP->readState)) { + SSDBG( descP, + ("SOCKET", "esock_setopt_otp_iow {%d} -> closed\r\n", + descP->sock) ); + return esock_make_error_closed(env); + } + + if (! esock_decode_bool(eVal, &descP->selectRead)) + return esock_make_invalid(env, esock_atom_value); + + SSDBG( descP, + ("SOCKET", "esock_setopt_otp_select_read {%d} -> ok" + "\r\n eVal: %T" + "\r\n", descP->sock, eVal) ); + + return esock_atom_ok; +} + + + /* esock_setopt_otp_ctrl_proc - Handle the OTP (level) * controlling_process options */ @@ -8542,6 +8581,12 @@ ERL_NIF_TERM esock_getopt_otp(ErlNifEnv* env, MUNLOCK(descP->readMtx); break; + case ESOCK_OPT_OTP_SELECT_READ: + MLOCK(descP->readMtx); + result = esock_getopt_otp_select_read(env, descP); + MUNLOCK(descP->readMtx); + break; + case ESOCK_OPT_OTP_RCVBUF: MLOCK(descP->readMtx); result = esock_getopt_otp_rcvbuf(env, descP); @@ -8676,6 +8721,34 @@ ERL_NIF_TERM esock_getopt_otp_iow(ErlNifEnv* env, +/* esock_getopt_otp_select_read - Handle the OTP (level) select_read option + */ + +static +ERL_NIF_TERM esock_getopt_otp_select_read(ErlNifEnv* env, + ESockDescriptor* descP) +{ + ERL_NIF_TERM eVal; + + if (! IS_OPEN(descP->readState)) { + SSDBG( descP, + ("SOCKET", "esock_getopt_otp_select_read {%d} -> done closed\r\n", + descP->sock) ); + return esock_make_error_closed(env); + } + + eVal = esock_encode_bool(descP->selectRead); + + SSDBG( descP, + ("SOCKET", "esock_getopt_otp_select_read {%d} ->" + "\r\n eVal: %T" + "\r\n", descP->sock, eVal) ); + + return esock_make_ok2(env, eVal); +} + + + /* esock_getopt_otp_ctrl_proc - Handle the OTP (level) controlling_process option */ @@ -11977,6 +12050,7 @@ ESockDescriptor* esock_alloc_descriptor(SOCKET sock) descP->wCtrlSz = ESOCK_SEND_CTRL_BUFFER_SIZE_DEFAULT; descP->iow = FALSE; descP->dbg = ESOCK_DEBUG_DEFAULT; // Overwritten by caller + descP->selectRead = FALSE; descP->useReg = ESOCK_USE_SOCKET_REGISTRY;// Overwritten by caller descP->meta.env = esock_alloc_env("esock_alloc_descriptor - " "meta-env"); diff --git a/erts/emulator/nifs/unix/unix_socket_syncio.c b/erts/emulator/nifs/unix/unix_socket_syncio.c index 4a952b8f06df..9a7398616312 100644 --- a/erts/emulator/nifs/unix/unix_socket_syncio.c +++ b/erts/emulator/nifs/unix/unix_socket_syncio.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2022-2024. All Rights Reserved. + * Copyright Ericsson AB 2022-2025. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -354,7 +354,12 @@ static ERL_NIF_TERM recv_check_full_maybe_done(ErlNifEnv* env, ERL_NIF_TERM recvRef); static ERL_NIF_TERM recv_check_full_done(ErlNifEnv* env, ESockDescriptor* descP, - ERL_NIF_TERM sockRef); + ERL_NIF_TERM sockRef, + ERL_NIF_TERM recvRef); +static ERL_NIF_TERM recv_check_select_done(ErlNifEnv* env, + ESockDescriptor* descP, + ERL_NIF_TERM sockRef, + ERL_NIF_TERM recvRef); static ERL_NIF_TERM recv_check_fail(ErlNifEnv* env, ESockDescriptor* descP, int saveErrno, @@ -368,10 +373,10 @@ static ERL_NIF_TERM recv_check_fail_econnreset(ErlNifEnv* env, ESockDescriptor* descP, ERL_NIF_TERM sockRef, ERL_NIF_TERM recvRef); -static ERL_NIF_TERM recv_check_retry(ErlNifEnv* env, - ESockDescriptor* descP, - ERL_NIF_TERM sockRef, - ERL_NIF_TERM recvRef); +static ERL_NIF_TERM recv_check_select(ErlNifEnv* env, + ESockDescriptor* descP, + ERL_NIF_TERM sockRef, + ERL_NIF_TERM recvRef); static ERL_NIF_TERM recv_check_partial(ErlNifEnv* env, ESockDescriptor* descP, ssize_t toRead, @@ -381,10 +386,6 @@ static ERL_NIF_TERM recv_check_partial_done(ErlNifEnv* env, ESockDescriptor* descP, ERL_NIF_TERM sockRef, ERL_NIF_TERM returnTag); -static ERL_NIF_TERM recv_check_partial_part(ErlNifEnv* env, - ESockDescriptor* descP, - ERL_NIF_TERM sockRef, - ERL_NIF_TERM recvRef); static void recv_init_current_reader(ErlNifEnv* env, ESockDescriptor* descP, ERL_NIF_TERM recvRef); @@ -2791,15 +2792,17 @@ ERL_NIF_TERM essio_recv(ErlNifEnv* env, descP->readResult = 0; } else { /* We already have a buffer. - * Only happens for SOCK_STREAM when a recv didn't - * fill the buffer (partial recv). + * Happens for ERRNO_BLOCK when we return 'select' - we keep + * the empty buffer, and for SOCK_STREAM when a recv + * for a specified length didn't fill the buffer (partial recv). */ if ((len == 0) && (0 < descP->readResult)) { /* The request is for any amount of data * - deliver what we have */ ESOCK_ASSERT( REALLOC_BIN(&descP->readBuf, descP->readResult) ); - return recv_check_full_done(env, descP, sockRef); + /* Return {ok|select, Bin} */ + return recv_check_full_done(env, descP, sockRef, recvRef); } else if (descP->readBuf.size < recvLen) { /* Our buffer is too small */ ESOCK_ASSERT( REALLOC_BIN(&descP->readBuf, recvLen) ); @@ -2814,7 +2817,8 @@ ERL_NIF_TERM essio_recv(ErlNifEnv* env, sys_memcpy(buf.data, descP->readBuf.data + recvLen, keepLen); ESOCK_ASSERT( REALLOC_BIN(&descP->readBuf, recvLen) ); - ret = recv_check_full_done(env, descP, sockRef); + /* Return {ok|select, Bin} */ + ret = recv_check_full_done(env, descP, sockRef, recvRef); descP->readBuf = buf; descP->readResult = keepLen; return ret; @@ -2829,7 +2833,8 @@ ERL_NIF_TERM essio_recv(ErlNifEnv* env, /* Our buffer contains exactly what is requested * - just deliver it all */ - return recv_check_full_done(env, descP, sockRef); + /* Return {ok|select, Bin} */ + return recv_check_full_done(env, descP, sockRef, recvRef); } } ESOCK_ASSERT( recvLen == descP->readBuf.size ); @@ -2913,8 +2918,10 @@ ERL_NIF_TERM recv_check_result(ErlNifEnv* env, ESOCK_ASSERT( REALLOC_BIN(&descP->readBuf, descP->readResult) ); data = MKBIN(env, &descP->readBuf); descP->readBuf.data = NULL; + /* {error, {closed, Bin}} */ res = esock_make_error(env, MKT2(env, reason, data)); } else { + /* {error, closed} */ res = esock_make_error(env, reason); } @@ -2924,6 +2931,7 @@ ERL_NIF_TERM recv_check_result(ErlNifEnv* env, /* +++ Error handling +++ */ + /* 'timeout' | {error, SaveErrno} */ res = recv_check_fail(env, descP, saveErrno, sockRef, recvRef); } else if ((descP->readResult += readResult) < descP->readBuf.size) { @@ -3075,6 +3083,7 @@ ERL_NIF_TERM recvfrom_check_result(ErlNifEnv* env, FREE_BIN(bufP); + /* Return {error, closed} */ return esock_make_error_closed(env); } @@ -7146,7 +7155,6 @@ void essio_down(ErlNifEnv* env, * here, since we do not actually know yet if we need to! We do that in * the [recv|recvfrom|recvmsg]_check_result function. */ - static BOOLEAN_T recv_check_reader(ErlNifEnv* env, ESockDescriptor* descP, @@ -7168,10 +7176,13 @@ BOOLEAN_T recv_check_reader(ErlNifEnv* env, "\r\n", descP->sock, ref) ); if (! esock_reader_search4pid(env, descP, &caller)) { - if (COMPARE(ref, esock_atom_zero) == 0) - goto done_ok; - esock_reader_push(env, descP, caller, ref, NULL); - *checkResult = esock_atom_select; + if (COMPARE(ref, esock_atom_zero) == 0) { + *checkResult = esock_atom_timeout; + } + else { + esock_reader_push(env, descP, caller, ref, NULL); + *checkResult = esock_atom_select; + } } else { /* Reader already in queue */ *checkResult = esock_raise_invalid(env, esock_atom_state); @@ -7186,9 +7197,8 @@ BOOLEAN_T recv_check_reader(ErlNifEnv* env, } } - done_ok: - // Does not actually matter in this case, but ... *checkResult = esock_atom_ok; + // *checkResult ignored by the caller that creates the actual result return TRUE; } @@ -7201,7 +7211,6 @@ BOOLEAN_T recv_check_reader(ErlNifEnv* env, * toRead = 0 means: Give me everything you have => maybe * toRead > 0 means: Yes */ - static ERL_NIF_TERM recv_check_full(ErlNifEnv* env, ESockDescriptor* descP, @@ -7217,16 +7226,6 @@ ERL_NIF_TERM recv_check_full(ErlNifEnv* env, /* +++ Give us everything you have got => * * (maybe) needs to continue +++ */ - /* Send up each chunk of data for each of the read - * and let the erlang code assemble it: {more, Bin} - * (when complete it should return {ok, Bin}). - * We need to read at least one more time to be sure if its - * done... - * - * Also, we need to check if the rNumCnt has reached its max (rNum), - * in which case we will assume the read to be done! - */ - SSDBG( descP, ("UNIX-ESSIO", "recv_check_full(%T) {%d} -> shall we continue reading?" @@ -7237,6 +7236,7 @@ ERL_NIF_TERM recv_check_full(ErlNifEnv* env, (unsigned long) descP->readResult, descP->rNum, descP->rNumCnt) ); + /* Res = {more|ok|select, Bin} */ res = recv_check_full_maybe_done(env, descP, sockRef, recvRef); } else { @@ -7249,8 +7249,8 @@ ERL_NIF_TERM recv_check_full(ErlNifEnv* env, "we got exactly what we could fit\r\n", sockRef, descP->sock, (long) toRead) ); - res = recv_check_full_done(env, descP, sockRef); - + /* Res = {ok|select, Bin} */ + res = recv_check_full_done(env, descP, sockRef, recvRef); } return res; @@ -7260,16 +7260,12 @@ ERL_NIF_TERM recv_check_full(ErlNifEnv* env, /* *** recv_check_full_maybe_done *** * - * Send up each chunk of data for each of the read - * and let the erlang code assemble it: {more, Bin} - * (when complete it should return {ok, Bin}). - * We need to read at least one more time to be sure if its - * done... - * - * Also, we need to check if the rNumCnt has reached its max (rNum), - * in which case we will assume the read to be done! - */ - + * Increment and check rNumCnt. If it hasn't reached its max + * (rNum); return {more, Bin}, + * then more reads should be done, + * otherwise return {ok|select, Bin} + * depending on selectRead. +*/ static ERL_NIF_TERM recv_check_full_maybe_done(ErlNifEnv* env, ESockDescriptor* descP, @@ -7286,22 +7282,9 @@ ERL_NIF_TERM recv_check_full_maybe_done(ErlNifEnv* env, descP->rNumCnt++; if (descP->rNumCnt >= descP->rNum) { - descP->rNumCnt = 0; - - ESOCK_CNT_INC(env, descP, sockRef, - esock_atom_read_pkg, &descP->readPkgCnt, 1); - if (descP->readPkgMaxCnt > descP->readPkgMax) - descP->readPkgMax = descP->readPkgMaxCnt; - descP->readPkgMaxCnt = 0; - - recv_update_current_reader(env, descP, sockRef); + /* Ret = {ok|select, Bin} */ + ret = recv_check_full_done(env, descP, sockRef, recvRef); - /* This transfers "ownership" of the *allocated* binary to an - * erlang term (no need for an explicit free). - */ - - ret = esock_make_ok2(env, MKBIN(env, &descP->readBuf)); - descP->readBuf.data = NULL; } else { /* Yes, we *do* need to continue reading */ @@ -7318,6 +7301,7 @@ ERL_NIF_TERM recv_check_full_maybe_done(ErlNifEnv* env, "we are done for now - read more\r\n", sockRef, descP->sock, (unsigned long) descP->readBuf.size) ); + /* Ret = {more, Bin} */ ret = MKT2(env, esock_atom_more, MKBIN(env, &descP->readBuf)); descP->readBuf.data = NULL; } @@ -7330,17 +7314,20 @@ ERL_NIF_TERM recv_check_full_maybe_done(ErlNifEnv* env, /* *** recv_check_full_done *** * * A successful recv and we filled the buffer. - * - * Deliver the whole buffer as a binary. + * - return {ok, Bin} */ - static ERL_NIF_TERM recv_check_full_done(ErlNifEnv* env, ESockDescriptor* descP, - ERL_NIF_TERM sockRef) + ERL_NIF_TERM sockRef, + ERL_NIF_TERM recvRef) { ERL_NIF_TERM data; + if (descP->selectRead && (COMPARE(recvRef, esock_atom_zero) != 0)) + /* {select, Bin} */ + return recv_check_select_done(env, descP, sockRef, recvRef); + ESOCK_CNT_INC(env, descP, sockRef, esock_atom_read_pkg, &descP->readPkgCnt, 1); ESOCK_CNT_INC(env, descP, sockRef, @@ -7362,16 +7349,79 @@ ERL_NIF_TERM recv_check_full_done(ErlNifEnv* env, data = MKBIN(env, &descP->readBuf); descP->readBuf.data = NULL; + /* Return {ok, Bin} */ return esock_make_ok2(env, data); } +/* *** recv_check_select_done *** + * + * Deliver the binary, and initiate select_read + * - return {select, Bin} + */ +static +ERL_NIF_TERM recv_check_select_done(ErlNifEnv* env, + ESockDescriptor* descP, + ERL_NIF_TERM sockRef, + ERL_NIF_TERM recvRef) +{ + ERL_NIF_TERM res; + int sres; + + descP->rNumCnt = 0; + + ESOCK_CNT_INC(env, descP, sockRef, + esock_atom_read_pkg, &descP->readPkgCnt, 1); + ESOCK_CNT_INC(env, descP, sockRef, + esock_atom_read_byte, &descP->readByteCnt, descP->readResult); + + descP->readPkgMaxCnt += descP->readResult; + if (descP->readPkgMaxCnt > descP->readPkgMax) + descP->readPkgMax = descP->readPkgMaxCnt; + descP->readPkgMaxCnt = 0; + + recv_init_current_reader(env, descP, recvRef); + + /* SELECT for more data */ + + sres = esock_select_read(env, descP->sock, descP, NULL, + sockRef, recvRef); + if (sres < 0) { + /* Unlikely that any next reader will have better luck, + * but why not give them a shot - the queue will be cleared + */ + recv_update_current_reader(env, descP, sockRef); + + /* Res = error({select_read, SRes}) */ + res = enif_raise_exception(env, + MKT2(env, esock_atom_select_read, + MKI(env, sres))); + } else { + ERL_NIF_TERM data; + /* This transfers "ownership" of the *allocated* binary to an + * erlang term (no need for an explicit free). + */ + data = MKBIN(env, &descP->readBuf); + descP->readBuf.data = NULL; + + SSDBG( descP, + ("UNIX-ESSIO", + "recv_check_select_done(%T) {%d} -> [%ld] done\r\n", + sockRef, descP->sock, (long) descP->readResult) ); + + descP->readState |= ESOCK_STATE_SELECTED; + /* Res = {select, Bin} */ + res = MKT2(env, esock_atom_select, data); + } + + return res; +} + /* *** recv_check_fail *** * * Handle recv failure. */ - static ERL_NIF_TERM recv_check_fail(ErlNifEnv* env, ESockDescriptor* descP, @@ -7397,6 +7447,7 @@ ERL_NIF_TERM recv_check_fail(ErlNifEnv* env, ESOCK_CNT_INC(env, descP, sockRef, esock_atom_read_fails, &descP->readFails, 1); + /* Res = {error, econnreset} */ res = recv_check_fail_econnreset(env, descP, sockRef, recvRef); } else if ((saveErrno == ERRNO_BLOCK) || @@ -7408,10 +7459,17 @@ ERL_NIF_TERM recv_check_fail(ErlNifEnv* env, "\r\n recvRef: %T" "\r\n", sockRef, descP->sock, recvRef) ); - if (COMPARE(recvRef, esock_atom_zero) == 0) + if (COMPARE(recvRef, esock_atom_zero) == 0) { + /* Would block and zero time-out - this is a time-out + * Res = 'timeout' + */ res = esock_atom_timeout; - else - res = recv_check_retry(env, descP, sockRef, recvRef); + } + else { + descP->rNumCnt = 0; + /* Res = 'select' */ + res = recv_check_select(env, descP, sockRef, recvRef); + } } else { @@ -7424,6 +7482,7 @@ ERL_NIF_TERM recv_check_fail(ErlNifEnv* env, ESOCK_CNT_INC(env, descP, sockRef, esock_atom_read_fails, &descP->readFails, 1); + /* Res = {error, SaveErrno} */ res = recv_check_fail_gen(env, descP, saveErrno, sockRef); } @@ -7435,7 +7494,6 @@ ERL_NIF_TERM recv_check_fail(ErlNifEnv* env, * * The recv call had a "general" failure. */ - static ERL_NIF_TERM recv_check_fail_gen(ErlNifEnv* env, ESockDescriptor* descP, @@ -7446,6 +7504,7 @@ ERL_NIF_TERM recv_check_fail_gen(ErlNifEnv* env, recv_error_current_reader(env, descP, sockRef, reason); + /* Return {error, SaveErrno} */ return esock_make_error(env, reason); } @@ -7455,7 +7514,6 @@ ERL_NIF_TERM recv_check_fail_gen(ErlNifEnv* env, * We detected that the socket was closed while reading. * Inform current and waiting readers. */ - static ERL_NIF_TERM recv_check_fail_econnreset(ErlNifEnv* env, ESockDescriptor* descP, @@ -7482,17 +7540,18 @@ ERL_NIF_TERM recv_check_fail_econnreset(ErlNifEnv* env, recv_error_current_reader(env, descP, sockRef, reason); + /* Return {error, econnreset} */ return res; } -/* *** recv_check_retry *** +/* *** recv_check_select *** * - * The recv call would have blocked, so retry. + * The recv call should be retried + * - initiate and return 'select', keep the buffer binary */ - static -ERL_NIF_TERM recv_check_retry(ErlNifEnv* env, +ERL_NIF_TERM recv_check_select(ErlNifEnv* env, ESockDescriptor* descP, ERL_NIF_TERM sockRef, ERL_NIF_TERM recvRef) @@ -7500,12 +7559,11 @@ ERL_NIF_TERM recv_check_retry(ErlNifEnv* env, int sres; ERL_NIF_TERM res; - descP->rNumCnt = 0; recv_init_current_reader(env, descP, recvRef); SSDBG( descP, ("UNIX-ESSIO", - "recv_check_retry(%T) {%d} -> SELECT for more" + "recv_check_select(%T) {%d} -> SELECT for more" "\r\n recvRef: %T" "\r\n", sockRef, descP->sock, recvRef) ); @@ -7516,24 +7574,25 @@ ERL_NIF_TERM recv_check_retry(ErlNifEnv* env, */ recv_update_current_reader(env, descP, sockRef); + /* Res = error({select_read, SRes}) */ res = enif_raise_exception(env, MKT2(env, esock_atom_select_read, MKI(env, sres))); } else { descP->readState |= ESOCK_STATE_SELECTED; + /* Res = 'select' */ res = esock_atom_select; } + /* Keep the buffer binary */ return res; } - /* *** recv_check_partial *** * * Handle a successful recv which only partly filled the specified buffer. */ - static ERL_NIF_TERM recv_check_partial(ErlNifEnv* env, ESockDescriptor* descP, @@ -7555,7 +7614,7 @@ ERL_NIF_TERM recv_check_partial(ErlNifEnv* env, if (COMPARE(recvRef, esock_atom_zero) == 0) { - /* Polling read - deliver as {timeout,Data} */ + /* Polling read */ SSDBG( descP, ("UNIX-ESSIO", @@ -7564,13 +7623,12 @@ ERL_NIF_TERM recv_check_partial(ErlNifEnv* env, "\r\n", sockRef, descP->sock, (long) toRead, recvRef) ); + /* Res = {timeout, Bin} */ res = recv_check_partial_done(env, descP, sockRef, esock_atom_timeout); } else { - /* Incomplete data - * - return a select result to initiate a retry - */ + /* Incomplete data */ SSDBG( descP, ("UNIX-ESSIO", @@ -7580,11 +7638,15 @@ ERL_NIF_TERM recv_check_partial(ErlNifEnv* env, "\r\n", sockRef, descP->sock, (long) toRead, recvRef) ); - res = recv_check_partial_part(env, descP, sockRef, recvRef); + + ESOCK_CNT_INC(env, descP, sockRef, esock_atom_read_byte, + &descP->readByteCnt, descP->readResult); + /* Initiate select read, Res = 'select' */ + res = recv_check_select(env, descP, sockRef, recvRef); } } else { - /* No more data is needed - deliver as {ok,Data} */ + /* No more data is needed */ SSDBG( descP, ("UNIX-ESSIO", @@ -7593,7 +7655,14 @@ ERL_NIF_TERM recv_check_partial(ErlNifEnv* env, "\r\n", sockRef, descP->sock, (long) toRead, recvRef) ); - res = recv_check_partial_done(env, descP, sockRef, esock_atom_ok); + if (descP->selectRead && (COMPARE(recvRef, esock_atom_zero) != 0)) { + ESOCK_ASSERT( REALLOC_BIN(&descP->readBuf, descP->readResult) ); + /* Res = {select, Bin} */ + res = recv_check_select_done(env, descP, sockRef, recvRef); + } + else + /* Res = {ok, Bin} */ + res = recv_check_partial_done(env, descP, sockRef, esock_atom_ok); } return res; @@ -7604,7 +7673,6 @@ ERL_NIF_TERM recv_check_partial(ErlNifEnv* env, * * A successful but only partial recv, which fulfilled the required read. */ - static ERL_NIF_TERM recv_check_partial_done(ErlNifEnv* env, ESockDescriptor* descP, @@ -7637,53 +7705,11 @@ ERL_NIF_TERM recv_check_partial_done(ErlNifEnv* env, ("UNIX-ESSIO", "recv_check_partial_done(%T) {%d} -> [%ld] done\r\n", sockRef, descP->sock, (long) descP->readResult) ); + /* {ReturnTag, Bin} */ return MKT2(env, returnTag, data); } -/* *** recv_check_partial_part *** - * - * A successful but only partial recv, which only partly fulfilled - * the required read. - */ - -static -ERL_NIF_TERM recv_check_partial_part(ErlNifEnv* env, - ESockDescriptor* descP, - ERL_NIF_TERM sockRef, - ERL_NIF_TERM recvRef) -{ - ERL_NIF_TERM res; - int sres; - - ESOCK_CNT_INC(env, descP, sockRef, - esock_atom_read_byte, &descP->readByteCnt, descP->readResult); - - recv_init_current_reader(env, descP, recvRef); - - /* SELECT for more data */ - - sres = esock_select_read(env, descP->sock, descP, NULL, - sockRef, recvRef); - if (sres < 0) { - /* Unlikely that any next reader will have better luck, - * but why not give them a shot - the queue will be cleared - */ - recv_update_current_reader(env, descP, sockRef); - - res = enif_raise_exception(env, - MKT2(env, esock_atom_select_read, - MKI(env, sres))); - } else { - descP->readState |= ESOCK_STATE_SELECTED; - res = esock_atom_select; - } - /* Keep the buffer binary */; - - return res; -} - - /* *** recv_init_current_reader *** * * Initiate (maybe) the currentReader structure of the descriptor. diff --git a/erts/preloaded/ebin/prim_socket.beam b/erts/preloaded/ebin/prim_socket.beam index bb396bef140b39bc4d5b5632bc81b897e5406378..b7cdf9cf8905b77ba0791f7f16f3dc0996440978 100644 GIT binary patch delta 4296 zcmZXX33wDm7RRTiC!u;sq?3sWHywsx6`^rpz~vBah`7_ZVfj=HA}jK_s0AKeBA_^n zT$WrydliIm$YHo)KyJbn?jzi1g{i}Mqx|833X@C6U zKVPozay1W(!I|M+Y9FfJ{pVXP^Y;}%0UtOp!QkSVk>Kb)}TC8qY zcdPr2_K|_tQP}1U*I1 z()08dy-y!l$Wp8fE6FNn)vz+HfR%0et!J#pR#WRG>jmpstAo|a>TJDjG3!HXl-0)? zXbradSv{=+Yq~YnnrIbTv#q(-LTib&%qp_hSsSeIF>9-})7otnTNkX$)-~(4b;tU{ zdcYLsVTsJk(pUzo$}(9NqpS`Kum;RzEm?@QX0Nag>^1f-W9%c=gAHK+VMEz4_B9*B z#d%`@Cr*I#y#H;XXJc|=v zhd<5h^G3WeH+VDtEPsw${3ZSde}@m^BlvheiBIFx`CLAaFXR!voUh=kcoAR2*YWkd zm~Z5p`4+y7@8G+53E$24@O^wgKfn+2L;MIo%8&6*?$CU$hhLLc7rhv=L3Em8ecLX%?-c&Qc52 z8Fp9qdx>hoqeqYGMGm=NN=TaDtIPXc-tOJ0XP3_Zj`;B+C1ttG<XRK1JXf5ajl}3LVjCJ4ve!*KDo2Z#2S<1g-_OhzsFo5?#g^yQ9_$aeInp zz_lcLeHgvgO))BR)9o`Azv+RBPvbya5b#{0X4rO*lpv%}OZgIwvFoLlg5o3}0n*5~ z3w>)Rq?MxY?bB&9k+4TSIi%tp*3eXZrpIsAQl$7=0_S=H#&|nBy%0^b_osKunt<>i zf!l$yraHod6Ln)U!b1dp4M;N}LxKEgcge^^(;~w%UPNd{WM{=T(p0PbDnf;J&ng4q zdZ)@+R2Vr@^=UVnWqWHZfa}T{z0o|ozMkfrgYYSVhe5@5u#nTRkOg*oy$OogWAzSb zv3*tVik8^zGbt?Oi_AB(mLYsw;4zTj0rKxa{tATe3Op9b8=&bPXj)|_)a-|f>~PH@ zwAOy!pATb?`A4AjHmfzoSBywifu}+KI~pR@pu#4*de-Y`i`_TtEws%(oHdB-2q03& ztmT(Glom8!P!ddIM_WX)1)c)~yF$2bV!+sC53XGsiqES399%Eeo`-m3+*8ZlVA|CQ zp>Qc8?+CmXMsOXGcR|{IMBWp4ACRwruqMU;r71MMrQ-6pRJ35W@rmC%9Iko3HKa?sGg#%ZpOUsCc6~#dYJ4+%us{j3L+nC0d1-RzwhCS50O5mx4yKW7)D>eVSE~_YlJ_Qs{qzFU<`<}D9Sea<@t4Q zrWP=x1u}z>)GtXlO~1zsrp2C?{qkwaH$GPijQ-K*D9@uS@hO$GR2_!&mvctFPho~U ze+gWJNt$LV@WiDlVSx|CiiDxaXYnGYlB&Vj&!S_cHvoPD0}=UL;Qg&(S#p%L{}K2g z+^28gM>4Q%rJWSW9fHUg0w03dw-6gt7W*z98!Yf)h}G9nm<%Zk)`y4nNE_`2B0~i} z3gPeJ_7Bqnx?F!M1dp}RaXKPj3S0`&aZs4Y$I9q0Z8V&L$X5a%hiJYFk>L^3TU`NM z5s?uBpO6>^jdb8qu#%I&3xJPy@UI0v1$ZJ<8RNiX1wJh?WZMq>jlgFBPXfK)I`DS_ zpOx4|UL(P~f|OXTpex z11}Qz9^hGk7d!CJ0^bKb8)jY-FaK+-{8E8`gUB2xzbrN&9Q}1H7x;JJbAhjL6j~|p zAAslSh^%tp)dD{NJRj;8Iq({R{{*}sfXG@0UMKKFi9zIg2i_p?Bf#N>FlVuYZxqA@ zJOX%=18)`t0bT@Bwm9%sLEI9;~*jjz(M37l=r|Z zK-0BW9wl}P9g4jZ4mUK8$iwqU*?F-Xl$IcJRFHD5fgckDgR&K%thBLlDz5A#C_7oE zEUqeG90!qSe07cEdEp?Ou?5C2QJHx`Jx37rvAU?Z6Nvnxy%jr!vx3pX7>+99#o7(n zo#Rp6Ctz~WUFy_*5_H4c?NmeKwA6k2FS^eNQogk>jL2C*l0f)M5Pq(iVqDBHy)POU zyni>&BXT}T(@kIO+lVB~gF5;_q9o`>7nuBl>60gF3q&r02a{Y9Uol;H_m>U2FGwnsSqo);HOI<~-vsdiSqJ3z@(H+#f+YPzkTjqh zfIf)j{VB+kK#E~U9~Oc-Jvj%FM}njS+^C_D>wkkoIuaxU@Fo+5+)Z*lN>ga^QzHjH zHD1ZiRqW~wYlmZ-81f_ra;u_Hf*_S($mReF0VtUQig2<2QZ4iILexgxE zQUyV(1K18glb~~oD1rVC4TVyDDC84sptloxY0!I8kQ&h41>JP$W(cA~w?s#wim*Mf zs7iul0@)1-l@qn(bQG!*+nGH96sp=hw~FF&MFuyjfx>-&=55F55WY7 zzvM5z%$GUqp}AZ2?fxp5j^H${9srk1i?ywBP0C#1u z(LW{SdOA2I&|o5WMX*v7it}BuzFX!uUf7)oPJ$z_ zH5PCuft9hc3AV-g6JX0Z_?9?V!D0vB9_MnWWUQ>tCU8NBTZ;F(!jD(B0f5}P2Vl!X zD;rP@E+nIeV5O+Co!BV-_$DNSUzWc%#?h}}rPwmQF3x`kTcM1Hz*!9q6rpQirP{Iy bR>k>cuqh6{5_~D>99X%n(Fr1Yvj^zELT^9} delta 4236 zcmZ{m33wDm7RRTilTbY*(#eD*K!k|{SOxSlIJjI3HfGUjG%TyQS&wzu{nVxL0u)6= zM--3(L`dUN5F&EP4Kc`l1c=;_$bCaV?)y|kW&fR-NPq6P`BME}{obor|Eij)=|%ku z`_qU1@>mOxr+xZpniK7#_Er0-Z>#UCh3W)#k~&5GN}aCGRA;Gk)nc_oU8Js1SE_5& zGIf($p>9`qtCi|$^|E?Ry{_I+e^hUCh1cN8ycVy`HJ-!k^Ts^Lvw0?O${BCYTk{a_ zz#rw^cs_rLzsh^@KKxDIj}PR7`4B!L%183CypVsvr|=nkAz#dw@fCan-^9!L4*nfK z%q#i#`~<(uZ}3~D$Ml*RW=%86tZxR)=4KP~9`k;)mD$GZZ04Hn%qPuf&F<#E%^v2< z=0|2<^F8wev!B`99B)oBCz(a&G;^jo+blL0n2XF6<|=cI8QpJgFt?Z$W|?`?JY$|W zub5ZOpUhj9*HWxROSSw~hE>lBSPd;^ajUu2!g|2!XgzG1)??O_))UqXRuAhHtCuyv ziduuMA=by%C~K@W&ic}tZWUYet>xAVtJK~T(NeS$%|u(#PP7*h(OEnto)-hfFd;>em?S2PSz@-BCyK=) zu~;k>%f$*&DprX#qD-t6>%<1JNo)~YMY*UD+r)OUL+lj0#2&F%>=XONL2*bN7M0?t zI3_wB7bnCiaax=a=R|k$5An3f7k?Fxi#*XmbQDR}Dyz&|&Hv7y;m`8tcu(Gfx8$ij zi#Ol_-jH*3hB{51YWMW^NTN4y-@e_vWRLfe#FFN8zLGl2zd) z3T%vkh#n*DCQfn-`i z_Hi<;5tc3K6y(345q38!thJjodXlWS2Q}(W zHrmGu2xNDmu-RuT<;b63KeN;(?@dm`K{DWq?;M>TGY;tLyh!1G4q zxnyX`)O(kBVgA)5LZSx%ky zQ@vk)FyPD4LPk}AoG`NbB?k;6m|%p{4WAz$<(&zdKD1DOM=8?ZvM)8ujK1Z}kQP8{ ze-z9QR^S;<)-*#&FnsA4u`4#>U5t1;UkPOf@)HatO~Zh0G#l9guR3Mow0^@ zG0L$~Nq6HIYc2#3exHk%J5ca8yeOVPo$I94F~1&@VA%p(}a3q^BK>LF6rgiSOZqeLT-0CT!Yd{H1$bVFu+lp?%2rAGtHYsk^&QUENO~K%7;~1n z@VAnAfWHP_>%!|KBfuq)vfhO^Nal4g=G^GQn z9>OY&H0O&^hp_Kr3^KMk7dUW$)+KDaWQoXK2pv0G>wA=rN>C}(cL$5~-T6V>9lk~3 zLVceAFNV|POfC?@5CJ@?4Z^?cI7q-SKYk0s+f;zm#{mB zs&pU5$xj-7=R)mB*eQ6Zby~8!P-~e%*qQPA*>I*7 zRg9n$IjaRiop4xi&q-DbNz0LRzK?#%U#MRW7wH%C!#F4f8*?EzHZiCeI5j`AFKD4q z)uLAfCDW?5{37OjpRh~uyIJwa`j;h3?HnfTilmRC{z?eE+B&LVQ}AokQBe{gzvJ`- z1?H-IpRTpkuRF4?|89S7NS1~&r6}`*QQ&0!D48E*70At+iL{7Z*sZ^iO{ zmaH~N8FuuS2c|<_ASIWuUnR=`{T6w*e;Xa~NR|n@)*unmCQnh?qB(C8xpp~=w7|Kghw60Yfq!h&VH1uCsoz+YQ$50lGQ`%2Bg+(ljnmFpAtw8gz!6UA8eT&O_uC##5W?If*G9^_=(>+ z8O0x;_%6xnBV`j(YW<%SO;#XdvqmDReiBKOrRZ%z4<{W-m#hK0ThXnJZiZw5bjt%I zl9?1}oRUob1|StCUQ*zX6GUhPU|7(Y$DmKNgLk zIdAxdE+23Dzj4QR@|~R3j^C+YuWm5Uz`qae8#L4)+v2{{t-wRZ`B2#2F~6ikt2W-K z6T|>-9oVYB8tv*Ig;S1(iM*Mxjwll6TVs81?O5F7iSsRS?uS*-s`=(PPlZiz@pbT# z7xgB>aa#3&4Y7c?CTx<6Z;bO4*cvXrKF%qu)2i!iit}2qs*A6M4Q9e)f&dSyR zI9qoM)|tGzP#Ik81o;Iv8LfIJzIA;3--dNWRp*z&b@&gkccE4DRdIe3))7_3Be2WS mpopA@biRyd6S diff --git a/erts/preloaded/src/prim_socket.erl b/erts/preloaded/src/prim_socket.erl index 965262141606..2708902fcb7c 100644 --- a/erts/preloaded/src/prim_socket.erl +++ b/erts/preloaded/src/prim_socket.erl @@ -99,6 +99,7 @@ -define(ESOCK_OPT_OTP_FD, 1008). -define(ESOCK_OPT_OTP_META, 1009). -define(ESOCK_OPT_OTP_USE_REGISTRY, 1010). +-define(ESOCK_OPT_OTP_SELECT_READ, 1011). %% -define(ESOCK_OPT_OTP_DOMAIN, 1999). % INTERNAL %%-define(ESOCK_OPT_OTP_TYPE, 1998). % INTERNAL @@ -1134,6 +1135,7 @@ enc_sockopt({otp = Level, Opt}, 0 = _NativeValue) -> fd -> ?ESOCK_OPT_OTP_FD; meta -> ?ESOCK_OPT_OTP_META; use_registry -> ?ESOCK_OPT_OTP_USE_REGISTRY; + select_read -> ?ESOCK_OPT_OTP_SELECT_READ; domain -> ?ESOCK_OPT_OTP_DOMAIN; _ -> invalid end diff --git a/lib/kernel/src/inet_epmd_socket.erl b/lib/kernel/src/inet_epmd_socket.erl index 9f1a921542fb..e2fd38501c9b 100644 --- a/lib/kernel/src/inet_epmd_socket.erl +++ b/lib/kernel/src/inet_epmd_socket.erl @@ -349,7 +349,9 @@ output_data(Socket, Buffer) -> %% ------------------------------------------------------------ -spec input_handler_start(_, _) -> no_return(). % Server loop input_handler_start(Socket, DistHandle) -> - try input_handler(Socket, DistHandle) + try + ok = socket:setopt(Socket, {otp,select_read}, true), + input_handler(Socket, DistHandle) catch Class : Reason : Stacktrace when Class =:= error -> error_logger:error_report( diff --git a/lib/kernel/src/socket.erl b/lib/kernel/src/socket.erl index 973a05047d6d..2192dc05623a 100644 --- a/lib/kernel/src/socket.erl +++ b/lib/kernel/src/socket.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2020-2024. All Rights Reserved. +%% Copyright Ericsson AB 2020-2025. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -5083,6 +5083,8 @@ recv_zero(SockRef, Length, Flags, Buf) -> case prim_socket:recv(SockRef, Length, Flags, zero) of {more, Bin} -> % Type == stream, Length == 0, default buffer filled recv_zero(SockRef, Length, Flags, [Bin | Buf]); + {ok, Bin} -> % All requested data + {ok, condense_buffer([Bin | Buf])}; timeout when Buf =:= [] -> {error, timeout}; timeout -> @@ -5092,28 +5094,17 @@ recv_zero(SockRef, Length, Flags, Buf) -> {timeout, Bin} -> %% Stream socket with Length > 0 and not all data {error, {timeout, condense_buffer([Bin | Buf])}}; - {ok, Bin} -> % All requested data - {ok, condense_buffer([Bin | Buf])}; - {error, _} = Error when Buf =:= [] -> - Error; {error, Reason} -> - {error, {Reason, condense_buffer(Buf)}} + recv_error(Buf, Reason) end. -%% Condense buffer into a Binary --compile({inline, [condense_buffer/1]}). -condense_buffer([]) -> <<>>; -condense_buffer([Bin]) when is_binary(Bin) -> Bin; -condense_buffer(Buffer) -> - iolist_to_binary(lists:reverse(Buffer)). - recv_nowait(SockRef, Length, Flags, Handle) -> case prim_socket:recv(SockRef, Length, Flags, Handle) of {more, Bin} -> % Type = stream, Length = 0, default buffer filled recv_zero(SockRef, Length, Flags, [Bin]); - {select, Bin} -> - %% We got less than requested so the caller will - %% get a select message when there might be more to read + {ok, _} = OK -> % All requested data + OK; + {select, Bin} -> % All data, new recv operation in progress {select, {?SELECT_INFO(recv, Handle), Bin}}; select -> %% The caller will get a select message when there @@ -5124,8 +5115,6 @@ recv_nowait(SockRef, Length, Flags, Handle) -> %% result) when the data arrives. *No* further action %% is required. {completion, ?COMPLETION_INFO(recv, Handle)}; - {ok, _} = OK -> % All requested data - OK; {error, _} = Error -> Error end. @@ -5153,43 +5142,32 @@ recv_nowait(SockRef, Length, Flags, Handle) -> %% else read error -> {error, _} %% end -%% We will only recurse with Length == 0 if Length is 0, -%% so Length == 0 means to return all available data also when recursing - +%% Buf is [], for 'select' platforms; it is only used +%% for 'completion' platforms. +%% recv_deadline(SockRef, Length, Flags, Deadline, Buf) -> Handle = make_ref(), case prim_socket:recv(SockRef, Length, Flags, Handle) of {more, Bin} -> % Type = stream, Length = 0, default buffer filled - 0 = Length, + 0 = Length, + [] = Buf, recv_zero(SockRef, Length, Flags, [Bin]); %% - {select, Bin} -> - %% We got less than requested on a stream socket - Timeout = timeout(Deadline), - receive - ?socket_msg(?socket(SockRef), select, Handle) -> - if - 0 < Timeout -> - %% Recv more - recv_deadline( - SockRef, Length - byte_size(Bin), Flags, - Deadline, [Bin | Buf]); - true -> - {error, {timeout, condense_buffer([Bin | Buf])}} - end; - ?socket_msg(_Socket, abort, {Handle, Reason}) -> - {error, {Reason, condense_buffer([Bin | Buf])}} - after Timeout -> - _ = cancel(SockRef, recv, Handle), - recv_error(Buf, timeout) - end; + {ok, _Bin} = OK -> %% All data + [] = Buf, + OK; + %% - select - when 0 < Length; % Requested a specific amount of data - Buf =:= [] -> % or Buf empty (and requested any amount of data) + {select, Bin} -> %% All data, new recv operation in progress + [] = Buf, + _ = cancel(SockRef, recv, Handle), + {ok, Bin}; + %% + select -> + [] = Buf, %% - %% There is nothing just now, but we will be notified when there - %% is something to read (a select message). + %% There is nothing just now, but we will be notified + %% with a select message when there is something to recv Timeout = timeout(Deadline), receive ?socket_msg(?socket(SockRef), select, Handle) -> @@ -5199,23 +5177,15 @@ recv_deadline(SockRef, Length, Flags, Deadline, Buf) -> recv_deadline( SockRef, Length, Flags, Deadline, Buf); true -> - recv_error(Buf, timeout) + {error, timeout} end; ?socket_msg(_Socket, abort, {Handle, Reason}) -> - recv_error(Buf, Reason) + {error, Reason} after Timeout -> _ = cancel(SockRef, recv, Handle), - recv_error(Buf, timeout) + {error, timeout} end; %% - select -> % Length is 0 (request any amount of data), Buf not empty - %% - %% We first got some data and are then asked to wait, - %% but what we already got will do just fine; - %% - cancel and return what we have - _ = cancel(SockRef, recv, Handle), - {ok, condense_buffer(Buf)}; - %% completion -> %% There is nothing just now, but we will be notified when the %% data has been read (with a completion message). @@ -5249,26 +5219,6 @@ recv_deadline(SockRef, Length, Flags, Deadline, Buf) -> _ = cancel(SockRef, recv, Handle), recv_error(Buf, timeout) end; - - - %% All requested data - {ok, Bin} when (Length =:= 0) orelse - (Length =:= byte_size(Bin)) -> % All requested data - {ok, condense_buffer([Bin | Buf])}; - - {ok, Bin} -> % Only part of the requested data - Timeout = timeout(Deadline), - if - 0 < Timeout -> - %% Recv more - recv_deadline( - SockRef, Length - byte_size(Bin), Flags, - Deadline, [Bin | Buf]); - true -> - recv_error([Bin | Buf], timeout) - end; - - %% {error, Reason} -> recv_error(Buf, Reason) @@ -5279,6 +5229,12 @@ recv_error([], Reason) -> recv_error(Buf, Reason) when is_list(Buf) -> {error, {Reason, condense_buffer(Buf)}}. +%% Condense buffer into a Binary +-compile({inline, [condense_buffer/1]}). +condense_buffer([]) -> <<>>; +condense_buffer([Bin]) when is_binary(Bin) -> Bin; +condense_buffer(Buffer) -> + iolist_to_binary(lists:reverse(Buffer)). %% --------------------------------------------------------------------------- %% From 295ea9f8e98393a9da5df922502f6b6df35030c3 Mon Sep 17 00:00:00 2001 From: Raimo Niskanen Date: Wed, 15 Jan 2025 17:57:06 +0100 Subject: [PATCH 2/6] Improve benchmark with select_read --- lib/kernel/src/inet_epmd_socket.erl | 412 ++++++++++++------- lib/ssl/test/ssl_dist_bench_SUITE.erl | 561 +++++++++++++++++--------- 2 files changed, 633 insertions(+), 340 deletions(-) diff --git a/lib/kernel/src/inet_epmd_socket.erl b/lib/kernel/src/inet_epmd_socket.erl index e2fd38501c9b..7cc64a4a54eb 100644 --- a/lib/kernel/src/inet_epmd_socket.erl +++ b/lib/kernel/src/inet_epmd_socket.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 1997-2024. All Rights Reserved. +%% Copyright Ericsson AB 1997-2025. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -267,11 +267,24 @@ tick(DistCtrl) -> ok. %% ------------------------------------------------------------ +-record(ohp, %% Output Handler Parameters + {socket, dist_handle, watermark}). + -spec output_handler_start(_, _) -> no_return(). % Server loop output_handler_start(Socket, DistHandle) -> try erlang:dist_ctrl_get_data_notification(DistHandle), - output_handler(Socket, DistHandle) + {ok, SndbufSize} = socket:getopt(Socket, {socket,sndbuf}), + OHP = + #ohp{ + socket = Socket, + dist_handle = DistHandle, + watermark = SndbufSize bsr 1}, + Buffer = [], + Size = 0, + DistData = false, + SelectInfo = undefined, + output_handler(OHP, Buffer, Size, DistData, SelectInfo) catch Class : Reason : Stacktrace when Class =:= error -> error_logger:error_report( @@ -282,76 +295,115 @@ output_handler_start(Socket, DistHandle) -> erlang:raise(Class, Reason, Stacktrace) end. -output_handler(Socket, DistHandle) -> - receive Msg -> - case Msg of - dist_tick -> - output_handler_tick(Socket, DistHandle); - dist_data -> - output_handler_data(Socket, DistHandle); - _ -> % Ignore - output_handler(Socket, DistHandle) - end +output_handler(OHP, Buffer, Size, DistData, SelectInfo) -> + if + DistData, OHP#ohp.watermark > Size -> + %% There is dist_data from the emulator, + %% and we have buffer space for it + output_handler_data(OHP, Buffer, Size, SelectInfo); + SelectInfo =:= undefined, Size > 0-> + %% We are not waiting for a send to complete, + %% and we have buffered data + output_handler_send(OHP, Buffer, Size, DistData); + true -> + output_handler_wait( + OHP, Buffer, Size, DistData, SelectInfo, infinity) end. -output_handler_tick(Socket, DistHandle) -> - receive Msg -> - case Msg of - dist_tick -> - output_handler_tick(Socket, DistHandle); - dist_data -> - output_handler_data(Socket, DistHandle); - _ -> % Ignore - output_handler_tick(Socket, DistHandle) - end - after 0 -> - output_data(Socket, [<<0:32>>]), - output_handler(Socket, DistHandle) +%% Wait for an external event (message) +output_handler_wait( + #ohp{socket = Socket} = OHP, Buffer, Size, DistData, SelectInfo, Tick) -> + receive + dist_data -> + output_handler(OHP, Buffer, Size, true, SelectInfo); + dist_tick + when SelectInfo =:= undefined, not DistData, Size == 0 -> + %% Tick only when we don't wait for a send to complete + %% and there is no dist_data to send, + %% but receive all dist_tick messages first + %% by looping with after timeout Tick = 0 + output_handler_wait(OHP, Buffer, Size, DistData, SelectInfo, 0); + {'$socket', Socket, select, SelectHandle} + when element(3, SelectInfo) =:= SelectHandle -> + %% Send no longer pending; try to send again + output_handler_send(OHP, Buffer, Size, DistData, SelectInfo); + _ -> % Ignore + output_handler_wait(OHP, Buffer, Size, DistData, SelectInfo, Tick) + after Tick -> + %% Send a tick + Buffer = [], % Assert + Size = 0, % Assert + DistData = false, % Assert + output_handler_send(OHP, [<<0:32>>], 4, false) end. -output_handler_data(Socket, DistHandle) -> - output_handler_data(Socket, DistHandle, [], 0). -%% -output_handler_data(Socket, DistHandle, Buffer, Size) - when 1 bsl 16 =< Size -> - output_data(Socket, Buffer), - output_handler_data(Socket, DistHandle); -output_handler_data(Socket, DistHandle, Buffer, Size) -> +%% Get dist_data from the emulator +output_handler_data( + #ohp{dist_handle = DistHandle, watermark = Watermark} = OHP, + Buffer, Size, SelectInfo) + when Watermark > Size -> + %% case erlang:dist_ctrl_get_data(DistHandle) of none -> - if - Size =:= 0 -> - [] = Buffer, % ASSERT - erlang:dist_ctrl_get_data_notification(DistHandle), - output_handler(Socket, DistHandle); - true -> - output_data(Socket, Buffer), - output_handler_data(Socket, DistHandle) - end; + erlang:dist_ctrl_get_data_notification(DistHandle), + output_handler(OHP, Buffer, Size, false, SelectInfo); {Len, Iovec} -> %% erlang:display({Len, '==>>'}), - output_handler_data( - Socket, DistHandle, - lists:reverse(Iovec, [<> | Buffer]), Len + 4 + Size) - end. + Buffer_1 = lists:reverse(Iovec, [<> | Buffer]), + Size_1 = Len + 4 + Size, + output_handler_data(OHP, Buffer_1, Size_1, SelectInfo) + end; +output_handler_data(OHP, Buffer, Size, SelectInfo) -> + output_handler(OHP, Buffer, Size, true, SelectInfo). %% Output data to socket -output_data(Socket, Buffer) -> - Iovec = lists:reverse(Buffer), - case socket:sendmsg(Socket, #{ iov => Iovec }) of +output_handler_send(OHP, Buffer, Size, DistData) -> + output_handler_send_result( + OHP, Buffer, Size, DistData, + socket:sendv(OHP#ohp.socket, lists:reverse(Buffer), nowait)). + +%% Output data to socket, continuation +output_handler_send(OHP, Buffer, Size, DistData, SelectInfo) -> + output_handler_send_result( + OHP, Buffer, Size, DistData, + socket:sendv(OHP#ohp.socket, lists:reverse(Buffer), SelectInfo, nowait)). + +output_handler_send_result(OHP, Buffer, Size, DistData, Result) -> + case Result of ok -> - %% erlang:display({iolist_size(Iovec), '>>'}), - ok; + output_handler(OHP, [], 0, DistData, undefined); + {select, {SelectInfo, RestIOV}} -> + Size_1 = iolist_size(RestIOV), + Buffer_1 = lists:reverse(RestIOV), + output_handler(OHP, Buffer_1, Size_1, DistData, SelectInfo); + {select, SelectInfo} -> + output_handler(OHP, Buffer, Size, DistData, SelectInfo); + {error, {Reason, _RestIOV}} -> + exit(Reason); {error, Reason} -> exit(Reason) end. %% ------------------------------------------------------------ +-record(ihp, %% Input Handler Parameters + {socket, dist_handle, watermark}). + -spec input_handler_start(_, _) -> no_return(). % Server loop input_handler_start(Socket, DistHandle) -> try - ok = socket:setopt(Socket, {otp,select_read}, true), - input_handler(Socket, DistHandle) + ok = socket:setopt(Socket, {otp,select_read}, true), + {ok, RcvbufSize} = socket:getopt(Socket, {socket,rcvbuf}), + IHP = + #ihp{ + socket = Socket, + dist_handle = DistHandle, + watermark = RcvbufSize}, + Front = [], + Size = 0, + Rear = [], + SelectHandle = undefined, + %% erlang:display({?FUNCTION_NAME, Socket, DistHandle}), + input_handler(IHP, Front, Size, Rear, SelectHandle) catch Class : Reason : Stacktrace when Class =:= error -> error_logger:error_report( @@ -362,107 +414,189 @@ input_handler_start(Socket, DistHandle) -> erlang:raise(Class, Reason, Stacktrace) end. -input_handler(Socket, DistHandle) -> - input_handler(Socket, DistHandle, <<>>, [], 0). - -input_handler(Socket, DistHandle, First, Buffer, Size) -> - %% Size is size of First + Buffer - case First of - <> -> - put_data(DistHandle, PacketSize1, Packet1), - put_data(DistHandle, PacketSize2, Packet2), - DataSize = 4 + PacketSize1 + 4 + PacketSize2, - input_handler( - Socket, DistHandle, Rest, Buffer, Size - DataSize); +input_handler(IHP, Front, Size, Rear, SelectHandle) + when IHP#ihp.watermark > Size, SelectHandle =:= undefined -> + %% erlang:display({?FUNCTION_NAME, ?LINE, Size}), + input_handler_recv(IHP, Front, Size, Rear, SelectHandle); +input_handler(IHP, [] = Front, Size, [] = Rear, SelectHandle) -> + 0 = Size, % Assert + input_handler_recv(IHP, Front, Size, Rear, SelectHandle); +input_handler(IHP, [] = _Front, Size, Rear, SelectHandle) -> + %% erlang:display({?FUNCTION_NAME, ?LINE, Size}), + input_handler(IHP, lists:reverse(Rear), Size, [], SelectHandle); +input_handler(IHP, [Bin | Front] = Bin_Front, Size, Rear, SelectHandle) -> + case Bin of + <> -> + %% erlang:display({?FUNCTION_NAME, ?LINE, Size, PacketSize}), + %% 4 complete packets in Bin + DistHandle = IHP#ihp.dist_handle, + put_data(DistHandle, Packet_1), + put_data(DistHandle, Packet_2), + put_data(DistHandle, Packet_3), + put_data(DistHandle, Packet_4), + Size_1 = + Size - + (16 + PacketSize_1 + PacketSize_2 + + PacketSize_3 + PacketSize_4), + if + byte_size(Rest) > 0 -> + input_handler( + IHP, [Rest | Front], Size_1, Rear, SelectHandle); + true -> % byte_size(Rest) == 0 + input_handler(IHP, Front, Size_1, Rear, SelectHandle) + end; + <> -> + %% erlang:display({?FUNCTION_NAME, ?LINE, Size, PacketSize}), + %% 2 complete packets in Bin + DistHandle = IHP#ihp.dist_handle, + put_data(DistHandle, Packet_1), + put_data(DistHandle, Packet_2), + Size_1 = Size - (8 + PacketSize_1 + PacketSize_2), + if + byte_size(Rest) > 0 -> + input_handler( + IHP, [Rest | Front], Size_1, Rear, SelectHandle); + true -> % byte_size(Rest) == 0 + input_handler(IHP, Front, Size_1, Rear, SelectHandle) + end; <> -> - DataSize = 4 + PacketSize, - put_data(DistHandle, PacketSize, Packet), - input_handler( - Socket, DistHandle, Rest, Buffer, Size - DataSize); - <> -> - input_handler( - Socket, DistHandle, PacketStart, Buffer, Size - 4, - PacketSize); - <> -> + %% erlang:display({?FUNCTION_NAME, ?LINE, Size, PacketSize}), + %% Complete packet in Bin + put_data(IHP#ihp.dist_handle, Packet), + Size_1 = Size - (4 + PacketSize), if - 4 =< Size -> - {First_1, Buffer_1, PacketSize} = - input_get_packet_size(Bin, lists:reverse(Buffer)), + byte_size(Rest) > 0 -> input_handler( - Socket, DistHandle, First_1, Buffer_1, Size - 4, - PacketSize); - true -> - Data = input_data(Socket), - Buffer_1 = [Data | Buffer], - DataSize = byte_size(Data), + IHP, [Rest | Front], Size_1, Rear, SelectHandle); + true -> % byte_size(Rest) == 0 + input_handler(IHP, Front, Size_1, Rear, SelectHandle) + end; + <> -> + %% erlang:display({?FUNCTION_NAME, ?LINE, Size, PacketSize}), + %% Incomplete packet in Bin + Size_1 = Size - (4 + PacketSize), + if + 0 > Size_1 -> + %% Incomplete packet in buffer + input_handler_recv( + IHP, Bin_Front, Size, Rear, SelectHandle); + Size_1 > 0-> + %% Complete packet is buffered, and some more + PacketStartSize = byte_size(PacketStart), + IOV = + if PacketStartSize > 0 -> [PacketStart]; + true -> [] + end, + {Packet, Front_1, Rear_1} = + collect_iov( + IOV, Front, PacketSize - PacketStartSize, Rear), + put_data(IHP#ihp.dist_handle, Packet), + input_handler(IHP, Front_1, Size_1, Rear_1, SelectHandle); + true -> % Size_1 == 0 + %% Exactly a packet is buffered + Packet = [PacketStart | Front] ++ lists:reverse(Rear), + put_data(IHP#ihp.dist_handle, Packet), + input_handler(IHP, [], 0, [], SelectHandle) + end; + <> -> + %% erlang:display({?FUNCTION_NAME, ?LINE, Size, byte_size(First)}), + %% Incompleate packet header in Bin + if + 4 > Size -> + %% Incomplete packet header in buffer + input_handler_recv( + IHP, Bin_Front, Size, Rear, SelectHandle); + Size > 4 -> + %% Complete packet header is buffered, and some more + {Hdr, Front_1, Rear_1} = + collect_bin(First, Front, 4 - byte_size(First), Rear), input_handler( - Socket, DistHandle, First, Buffer_1, Size + DataSize) + IHP, [Hdr | Front_1], Size, Rear_1, SelectHandle); + true -> % Size == 4 + %% Exacty a packet header is buffered + Hdr = list_to_binary(Bin_Front ++ lists:reverse(Rear)), + input_handler(IHP, [Hdr], Size, [], SelectHandle) end end. -%% PacketSize has been matched in PacketStart -input_handler(Socket, DistHandle, PacketStart, Buffer, Size, PacketSize) -> - %% Size is size of PacketStart + Buffer - RestSize = Size - PacketSize, - if - RestSize < 0 -> - %% Incomplete packet received so far - More = input_data(Socket), - MoreSize = byte_size(More), - input_handler( - Socket, DistHandle, PacketStart, - [More | Buffer], Size + MoreSize, PacketSize); - 0 < RestSize, Buffer =:= [] -> - %% Rest data in PacketStart - <> = PacketStart, - put_data(DistHandle, PacketSize, Packet), - input_handler(Socket, DistHandle, Rest, [], RestSize); - Buffer =:= [] -> - %% No rest data - RestSize = 0, % ASSERT - put_data(DistHandle, PacketSize, PacketStart), - input_handler(Socket, DistHandle); - true -> - %% Split packet from rest data - Bin = hd(Buffer), - LastSize = byte_size(Bin) - RestSize, - <> = Bin, - Packet = [PacketStart|lists:reverse(tl(Buffer), [LastBin])], - put_data(DistHandle, PacketSize, Packet), - input_handler(Socket, DistHandle, Rest, [], RestSize) - end. - -%% There are enough bytes (4) in First + [Bin|Buffer] -%% to get the packet size, but not enough in First -input_get_packet_size(First, [Bin|Buffer]) -> - MissingSize = 4 - byte_size(First), - if - MissingSize =< byte_size(Bin) -> - <> = Bin, - <> = <>, - {Rest, lists:reverse(Buffer), PacketSize}; - true -> - input_get_packet_size(<>, Buffer) - end. - -%% Input data from socket -input_data(Socket) -> - case socket:recv(Socket) of +input_handler_recv(IHP, Front, Size, Rear, undefined) -> + case socket:recv(IHP#ihp.socket, 0, [], nowait) of {ok, Data} -> - %% erlang:display({'<<', byte_size(Data)}), - Data; + %% erlang:display({?FUNCTION_NAME, ?LINE, '<<', byte_size(Data)}), + Size_1 = byte_size(Data) + Size, + Rear_1 = [Data | Rear], + input_handler(IHP, Front, Size_1, Rear_1, undefined); + {select, {{select_info, _, SelectHandle}, Data}} -> + %% erlang:display({?FUNCTION_NAME, ?LINE, + %% select, {SelectHandle,byte_size(Data)}}), + Size_1 = byte_size(Data) + Size, + Rear_1 = [Data | Rear], + input_handler(IHP, Front, Size_1, Rear_1, SelectHandle); + {select, {select_info, _, SelectHandle}} -> + %% erlang:display({?FUNCTION_NAME, ?LINE, select, SelectHandle}), + input_handler(IHP, Front, Size, Rear, SelectHandle); + {error, {Reason, _Data}} -> + %% erlang:display({?FUNCTION_NAME, ?LINE, + %% error, {Reason, byte_size(_Data)}}), + exit(Reason); {error, Reason} -> + %% erlang:display({?FUNCTION_NAME, ?LINE, error, Reason}), exit(Reason) + end; +input_handler_recv(IHP, Front, Size, Rear, SelectHandle) -> + input_handler_wait(IHP, Front, Size, Rear, SelectHandle). + +input_handler_wait(IHP, Front, Size, Rear, SelectHandle) -> + %% erlang:display({?FUNCTION_NAME, ?LINE, SelectHandle}), + Socket = IHP#ihp.socket, + receive + {'$socket', Socket, select, SelectHandle} -> + input_handler_recv(IHP, Front, Size, Rear, undefined); + _Ignore -> + %% erlang:display({?FUNCTION_NAME, ?LINE, _Ignore}), + input_handler_wait(IHP, Front, Size, Rear, SelectHandle) end. -%%% put_data(_DistHandle, 0, _) -> -%%% ok; +collect_bin(Collected, [Bin | Front], N, Rear) -> + BinSize = byte_size(Bin), + if + N > BinSize -> + collect_bin( + <>, Front, N - BinSize, Rear); + BinSize > N-> + <> = Bin, + {<>, [Rest | Front], Rear}; + true -> % BinSize == N + {<>, Front, Rear} + end; +collect_bin(Collected, [], N, [_|_] = Rear) -> + collect_bin(Collected, lists:reverse(Rear), N, []). + +collect_iov(Collected, [Bin | Front], N, Rear) -> + BinSize = byte_size(Bin), + if + N > BinSize -> + collect_iov([Bin | Collected], Front, N - BinSize, Rear); + BinSize > N -> + <> = Bin, + {lists:reverse(Collected, [First]), [Rest | Front], Rear}; + true -> % BinSize == N + {lists:reverse(Collected, [Bin]), Front, Rear} + end; +collect_iov(Collected, [], N, [_|_] = Rear) -> + collect_iov(Collected, lists:reverse(Rear), N, []). + %% We deliver ticks (packets size 0) to the VM, %% so that erlang:dist_get_stat(DistHandle) that %% dist_util:getstat/3 falls back to becomes good enough -put_data(DistHandle, _PacketSize, Packet) -> - %% erlang:display({'<<==', _PacketSize}), +put_data(DistHandle, Packet) -> + %% erlang:display({'<<==', iolist_size(Packet)}), erlang:dist_ctrl_put_data(DistHandle, Packet). %% ------------------------------------------------------------ diff --git a/lib/ssl/test/ssl_dist_bench_SUITE.erl b/lib/ssl/test/ssl_dist_bench_SUITE.erl index ef7eb6b9e5c7..a4f7ce3c462e 100644 --- a/lib/ssl/test/ssl_dist_bench_SUITE.erl +++ b/lib/ssl/test/ssl_dist_bench_SUITE.erl @@ -1,7 +1,7 @@ %%%------------------------------------------------------------------- %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2017-2024. All Rights Reserved. +%% Copyright Ericsson AB 2017-2025. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -48,7 +48,7 @@ throughput_1048576/1]). %% Debug --export([payload/1, roundtrip_runner/3, setup_runner/3, throughput_runner/4, +-export([payload/1, roundtrip_runner/2, setup_runner/2, throughput_runner/3, mem/0]). %%%------------------------------------------------------------------- @@ -59,14 +59,16 @@ all() -> [{group, smoketest}]. groups() -> - [{smoketest, protocols()}, - {benchmark, protocols()}, + [{smoketest, protocols()}, + {benchmark, protocols()}, + {perf_record, protocols()}, %% %% protocols() - {ssl, ssl_backends()}, - {cryptcookie, cryptcookie_backends()}, - {plain, categories()}, - {socket, categories()}, + {ssl, ssl_backends()}, + {cryptcookie, cryptcookie_backends()}, + {plain, categories()}, + {plain2, categories()}, + {socket, categories()}, %% %% ssl_backends() {tls, categories()}, @@ -101,6 +103,7 @@ protocols() -> [{group, ssl}, {group, cryptcookie}, {group, plain}, + {group, plain2}, {group, socket}]. ssl_backends() -> @@ -246,7 +249,9 @@ end_per_suite(Config) -> ssl_bench_test_lib:cleanup(ServerNode). init_per_group(benchmark, Config) -> - [{effort,10}|Config]; + [{effort,5}|Config]; +init_per_group(perf_record, Config) -> + [{perf_record,true}, {effort,10}|Config]; %% init_per_group(ssl, Config) -> [{ssl_dist, true}, {ssl_dist_prefix, "SSL"}|Config]; @@ -320,13 +325,26 @@ init_per_group(cryptcookie_inet_ktls_ih, Config) -> init_per_group(plain, Config) -> [{ssl_dist, false}, {ssl_dist_prefix, "Plain"}|Config]; %% +init_per_group(plain2, Config) -> + try inet_epmd_socket:supported() of + ok -> + [{ssl_dist, false}, + {ssl_dist_prefix, "Plain2"}, + {ssl_dist_args, "-proto_dist inet_epmd -inet_epmd dist"} + | Config]; + Problem -> + {skip, Problem} + catch + Class : Reason : Stacktrace -> + {fail, {Class, Reason, Stacktrace}} + end; +%% init_per_group(socket, Config) -> try inet_epmd_socket:supported() of ok -> [{ssl_dist, false}, {ssl_dist_prefix, "Socket"}, - {ssl_dist_args, - "-proto_dist inet_epmd -inet_epmd socket"} + {ssl_dist_args, "-proto_dist inet_epmd -inet_epmd socket"} | Config]; Problem -> {skip, Problem} @@ -452,6 +470,51 @@ write_node_conf( split_node(Node) -> string:split(atom_to_list(Node), "@"). + +-define(REPORT_FORMAT, "~s: ~w ~s"). +-define(REPORT_PAL(Name, Value, Info), + ?CT_PAL(?REPORT_FORMAT, + [begin Name end, begin Value end, begin Info end])). + +-define(REPORT(Name, Value, Info), + begin + report( + ?SSL_TEST_LIB_FORMAT, ?SSL_TEST_LIB_ARGS, + begin Name end, begin Value end, begin Info end) + end). +report(LFormat, LArgs, Name, Value, Info) -> + ct:pal(LFormat ++ ?REPORT_FORMAT, LArgs ++ [Name, Value, Info]), + ct_event:notify( + #event{ + name = benchmark_data, + data = [{value, Value}, {suite, "ssl_dist"}, {name, Name}]}), + {comment, term_to_string(Value) ++ " " ++ Info}. + +-define(REPORT(Name, Value, Info, Time), + begin + report( + ?SSL_TEST_LIB_FORMAT, ?SSL_TEST_LIB_ARGS, + begin Name end, begin Value end, begin Info end, begin Time end) + end). +report(LFormat, LArgs, Name, Value, Info, Time) -> % Time in microseconds + report( + LFormat, LArgs, Name, Value, + Info ++ + unicode:characters_to_list( + io_lib:fwrite(" [~.1f s]", [begin Time end/1000_000]))). + +term_to_string(Term) -> + unicode:characters_to_list( + io_lib:write(Term, [{encoding, unicode}])). + +per_s(Bytes, Microseconds) -> + round((Bytes * 1000_000) / Microseconds). +per_s(What) -> What ++ "/s". + +per_ks(Rounds, Microseconds) -> + round((Rounds*1000_000*1000) / Microseconds). +per_ks(What) -> What ++ "/ks". + %%%------------------------------------------------------------------- %%% Test cases @@ -462,7 +525,7 @@ setup(Config) -> run_nodepair_test(fun setup/6, Config). setup(A, B, Prefix, Effort, HA, HB) -> - Rounds = 100 * Effort, + Rounds = 1000 * Effort, [] = ssl_apply(HA, erlang, nodes, []), [] = ssl_apply(HB, erlang, nodes, []), pong = ssl_apply(HA, net_adm, ping, [B]), @@ -470,35 +533,43 @@ setup(A, B, Prefix, Effort, HA, HB) -> {Log, Before, After} = ssl_apply(HB, fun () -> set_cpu_affinity(server) end), ?CT_PAL("Server CPU affinity: ~w -> ~w~n~s", [Before, After, Log]), - MemStart = mem_start(HA, HB), ChildCountResult = - ssl_dist_test_lib:apply_on_ssl_node( - HA, supervisor, count_children, [tls_dist_connection_sup]), + catch ssl_apply( + HA, supervisor, count_children, [tls_dist_connection_sup]), ?CT_LOG("TLS Connection Child Count Result: ~p", [ChildCountResult]), - {SetupTime, CycleTime} = - ssl_apply(HA, fun () -> setup_runner(A, B, Rounds) end), - ok = ssl_apply(HB, fun () -> setup_wait_nodedown(A, 10000) end), - {MemA, MemB, MemSuffix} = mem_stop(HA, HB, MemStart), + io:format("~w(~p, ~p)~n", [setup_runner, B, Rounds]), + {AccSetupTime, TotalTime, {MemA, MemB, MemText}} = + ssl_apply(HA, fun () -> setup_runner(B, Rounds) end), + ok = ssl_apply(HB, fun () -> setup_wait_nodedown(A, 10_000) end), %% [] = ssl_apply(HA, erlang, nodes, []), %% [] = ssl_apply(HB, erlang, nodes, []), - SetupSpeed = round((Rounds*1000000*1000) / SetupTime), - CycleSpeed = round((Rounds*1000000*1000) / CycleTime), - _ = report(Prefix++" Setup Mem A", MemA, "KByte"), - _ = report(Prefix++" Setup Mem B", MemB, "KByte"), - _ = report(Prefix++" Setup", SetupSpeed, "setups/1000s"), - report(Prefix++" Setup Cycle", CycleSpeed, "cycles/1000s " ++ MemSuffix). - -%% Runs on node A against rex in node B -setup_runner(A, B, Rounds) -> + SetupSpeed = per_ks(Rounds, AccSetupTime), + CycleSpeed = per_ks(Rounds, TotalTime), + _ = ?REPORT(Prefix++" Setup Mem A", MemA, "KByte"), + _ = ?REPORT(Prefix++" Setup Mem B", MemB, "KByte"), + _ = ?REPORT(Prefix++" Setup", SetupSpeed, per_ks("setups")), + ?REPORT(Prefix++" Setup Cycle", + CycleSpeed, per_ks("cycles") ++ " " ++ MemText, + TotalTime). + +%% Runs on node A against rex in node B. +%% +%% Can be run on two manually started nodes outside CommonTest +%% +setup_runner(B, Rounds) -> + MemStart = mem_start(B), StartTime = start_time(), - SetupTime = setup_loop(A, B, 0, Rounds), - {microseconds(SetupTime), microseconds(elapsed_time(StartTime))}. + AccSetupTime = setup_loop(B, 0, Rounds), + TotalTime = elapsed_time(StartTime), + MemResult = mem_stop(MemStart), + true = net_kernel:disconnect(B), + {microseconds(AccSetupTime), microseconds(TotalTime), MemResult}. -setup_loop(_A, _B, T, 0) -> - T; -setup_loop(A, B, T, N) -> +setup_loop(_B, AccSetupTime, 0) -> + AccSetupTime; +setup_loop(B, AccSetupTime, N) -> StartTime = start_time(), - try erpc:call(B, net_adm, ping, [A]) of + try erpc:call(B, net_adm, ping, [node()]) of pong -> ok; Other -> error({N,Other}) @@ -506,14 +577,14 @@ setup_loop(A, B, T, N) -> Class : Reason : Stacktrace -> erlang:raise(Class, {N,Reason}, Stacktrace) end, - Time = elapsed_time(StartTime), + SetupTime = elapsed_time(StartTime), [N,B] = [N|erlang:nodes()], Mref = erlang:monitor(process, {rex,B}), true = net_kernel:disconnect(B), receive {'DOWN',Mref,process,_,_} -> [] = erlang:nodes(), - setup_loop(A, B, Time + T, N - 1) + setup_loop(B, AccSetupTime + SetupTime, N - 1) end. setup_wait_nodedown(A, Time) -> @@ -601,18 +672,20 @@ parallel_setup(Config, Clients, _0, HNs) -> try {Log, Before, After} = ssl_apply(ServerHandle, fun () -> set_cpu_affinity(server) end), - ?CT_PAL("Server CPU affinity: ~w -> ~w~n~s", [Before, After, Log]), + ?CT_PAL("~nClients: ~w" + "~nServer CPU affinity: ~w -> ~w~n~s", + [Clients, Before, After, Log]), ServerMemBefore = ssl_apply(ServerHandle, fun mem/0), parallel_setup_result( Config, TotalRounds, ServerHandle, ServerMemBefore, - [parallel_setup_runner(Handle, Node, ServerNode, Rounds) - || {Handle, Node} <- HNs]) + [parallel_setup_runner(Handle, ServerNode, Rounds) + || {Handle, _Node} <- HNs]) after stop_ssl_node(Key, ServerHandle, Config) end. -parallel_setup_runner(Handle, Node, ServerNode, Rounds) -> +parallel_setup_runner(Handle, ServerNode, Rounds) -> Collector = self(), Tag = make_ref(), _ = @@ -625,8 +698,15 @@ parallel_setup_runner(Handle, Node, ServerNode, Rounds) -> ssl_apply(Handle, fun mem/0), Result = ssl_apply( - Handle, ?MODULE, setup_runner, - [Node, ServerNode, Rounds]), + Handle, + fun () -> % See setup_runner/2 + StartTime = start_time(), + AccSetupTime = + setup_loop(ServerNode, 0, Rounds), + TotalTime = elapsed_time(StartTime), + {microseconds(AccSetupTime), + microseconds(TotalTime)} + end), MemAfter = ssl_apply(Handle, fun mem/0), {MemBefore, Result, MemAfter} @@ -644,33 +724,35 @@ parallel_setup_result( %% parallel_setup_result( Config, TotalRounds, ServerHandle, ServerMemBefore, [Tag | Tags], - SetupTime, CycleTime, Mem) -> + SumSetupTime, SumTotalTime, Mem) -> receive - {Tag, {Mem1, {ST, CT}, Mem2}} - when is_integer(ST), is_integer(CT) -> + {Tag, {Mem1, {AccSetupTime, TotalTime}, Mem2}} + when is_integer(AccSetupTime), is_integer(TotalTime) -> parallel_setup_result( Config, TotalRounds, ServerHandle, ServerMemBefore, Tags, - SetupTime + ST, CycleTime + CT, Mem + Mem2 - Mem1); + SumSetupTime + AccSetupTime, SumTotalTime + TotalTime, + Mem + Mem2 - Mem1); {Tag, Error} -> exit(Error) end; parallel_setup_result( Config, TotalRounds, ServerHandle, ServerMemBefore, [], - SetupTime, CycleTime, Mem) -> + SumSetupTime, SumTotalTime, Mem) -> ServerMemAfter = ssl_apply(ServerHandle, fun mem/0), ServerMem = ServerMemAfter - ServerMemBefore, Clients = proplists:get_value(clients, Config), Prefix = proplists:get_value(ssl_dist_prefix, Config), - SetupSpeed = 1000 * round(TotalRounds / (SetupTime/1000000)), - CycleSpeed = 1000 * round(TotalRounds / (CycleTime/1000000)), - {MemC, MemS, MemSuffix} = mem_result({Mem / Clients, ServerMem}), - _ = report(Prefix++" Parallel Setup Mem Clients", MemC, "KByte"), - _ = report(Prefix++" Parallel Setup Mem Server", MemS, "KByte"), - _ = report(Prefix++" Parallel Setup", SetupSpeed, "setups/1000s"), - report( - Prefix++" Parallel Setup Cycle", CycleSpeed, "cycles/1000s " - ++ MemSuffix). + SetupSpeed = per_ks(TotalRounds, SumSetupTime), + CycleSpeed = per_ks(TotalRounds, SumTotalTime), + {MemC, MemS, MemText} = mem_result({Mem / Clients, ServerMem}), + _ = ?REPORT(Prefix++" Parallel Setup Mem Clients", MemC, "KByte"), + _ = ?REPORT(Prefix++" Parallel Setup Mem Server", MemS, "KByte"), + _ = ?REPORT(Prefix++" Parallel Setup", SetupSpeed, per_ks("setups")), + ?REPORT( + Prefix++" Parallel Setup Cycle", + CycleSpeed, per_ks("cycles") ++ " " ++ MemText, + SumTotalTime / Clients). %%---------------- %% Roundtrip speed @@ -679,23 +761,27 @@ roundtrip(Config) -> run_nodepair_test(fun roundtrip/6, Config). roundtrip(A, B, Prefix, Effort, HA, HB) -> - Rounds = 4000 * Effort, + Rounds = 20_000 * Effort, [] = ssl_apply(HA, erlang, nodes, []), [] = ssl_apply(HB, erlang, nodes, []), - MemStart = mem_start(HA, HB), ok = ssl_apply(HA, net_kernel, allow, [[B]]), ok = ssl_apply(HB, net_kernel, allow, [[A]]), - Time = ssl_apply(HA, fun () -> roundtrip_runner(A, B, Rounds) end), + io:format("~w(~p, ~p)~n", [roundtrip_runner, B, Rounds]), + {Time, {MemA, MemB, MemText}} = + ssl_apply(HA, fun () -> roundtrip_runner(B, Rounds) end), [B] = ssl_apply(HA, erlang, nodes, []), [A] = ssl_apply(HB, erlang, nodes, []), - {MemA, MemB, MemSuffix} = mem_stop(HA, HB, MemStart), - Speed = round((Rounds*1000000) / Time), - _ = report(Prefix++" Roundtrip Mem A", MemA, "KByte"), - _ = report(Prefix++" Roundtrip Mem B", MemB, "KByte"), - report(Prefix++" Roundtrip", Speed, "pings/s " ++ MemSuffix). + _ = ?REPORT(Prefix++" Roundtrip Mem A", MemA, "KByte"), + _ = ?REPORT(Prefix++" Roundtrip Mem B", MemB, "KByte"), + ?REPORT(Prefix++" Roundtrip", + per_s(Rounds, Time), per_s("pings") ++ " " ++ MemText, Time). %% Runs on node A and spawns a server on node B -roundtrip_runner(A, B, Rounds) -> +%% +%% Can be run on two manually started nodes outside CommonTest +%% +roundtrip_runner(B, Rounds) -> + A = node(), ClientPid = self(), [A] = erpc:call(B, erlang, nodes, []), ServerPid = @@ -705,7 +791,8 @@ roundtrip_runner(A, B, Rounds) -> roundtrip_server(ClientPid, Rounds) end), ServerMon = erlang:monitor(process, ServerPid), - roundtrip_client(ServerPid, ServerMon, start_time(), Rounds). + MemStart = mem_start(B), + roundtrip_client(ServerPid, ServerMon, start_time(), MemStart, Rounds). roundtrip_server(_Pid, 0) -> exit(ok); @@ -716,19 +803,19 @@ roundtrip_server(Pid, N) -> roundtrip_server(Pid, N-1) end. -roundtrip_client(_Pid, Mon, StartTime, 0) -> - Time = elapsed_time(StartTime), +roundtrip_client(_Pid, Mon, StartTime, MemStart, 0) -> + Time = microseconds(elapsed_time(StartTime)), receive {'DOWN', Mon, _, _, ok} -> - Time; + {Time, mem_stop(MemStart)}; {'DOWN', Mon, _, _, Other} -> exit(Other) end; -roundtrip_client(Pid, Mon, StartTime, N) -> +roundtrip_client(Pid, Mon, StartTime, MemStart, N) -> Pid ! N, receive N -> - roundtrip_client(Pid, Mon, StartTime, N - 1) + roundtrip_client(Pid, Mon, StartTime, MemStart, N - 1) end. %%--------------------------------------- @@ -745,14 +832,13 @@ sched_utilization(A, B, Prefix, Effort, HA, HB, Config) -> SSL = proplists:get_value(ssl_dist, Config), [] = ssl_apply(HA, erlang, nodes, []), [] = ssl_apply(HB, erlang, nodes, []), - MemStart = mem_start(HA, HB), PidA = ssl_apply(HA, os, getpid, []), PidB = ssl_apply(HB, os, getpid, []), ?CT_PAL("Starting scheduler utilization run effort ~w:~n" " [~s] ~w~n" " [~s] ~w~n", [Effort, PidA, A, PidB, B]), - {ClientMsacc, ServerMsacc, BusyDistPortMsgs} = + {ClientMsacc, ServerMsacc, BusyDistPortMsgs, {MemA, MemB, MemText}} = ssl_apply( HA, fun () -> @@ -765,7 +851,6 @@ sched_utilization(A, B, Prefix, Effort, HA, HB, Config) -> ?CT_LOG("Got ~p busy_dist_port msgs",[tail(BusyDistPortMsgs)]), [B] = ssl_apply(HA, erlang, nodes, []), [A] = ssl_apply(HB, erlang, nodes, []), - {MemA, MemB, MemSuffix} = mem_stop(HA, HB, MemStart), ?CT_LOG("Microstate accounting for node ~w:", [A]), msacc:print(ClientMsacc), ?CT_LOG("Microstate accounting for node ~w:", [B]), @@ -775,11 +860,12 @@ sched_utilization(A, B, Prefix, Effort, HA, HB, Config) -> ?CT_LOG("Stats of A from B: ~p", [ssl_apply(HB, net_kernel, node_info, [A])]), SchedUtilClient = - round(10000 * msacc:stats(system_runtime,ClientMsacc) / + round(1000 * msacc:stats(system_runtime,ClientMsacc) / msacc:stats(system_realtime,ClientMsacc)), + ServerRealtime = msacc:stats(system_realtime,ServerMsacc), SchedUtilServer = - round(10000 * msacc:stats(system_runtime,ServerMsacc) / - msacc:stats(system_realtime,ServerMsacc)), + round(1000 * msacc:stats(system_runtime,ServerMsacc) / + ServerRealtime), Verdict = if BusyDistPortMsgs =:= 0 -> @@ -790,30 +876,34 @@ sched_utilization(A, B, Prefix, Effort, HA, HB, Config) -> ?CT_LOG("Stray Msgs: ~p", [BusyDistPortMsgs]), " ???" end, - _ = report(Prefix++" Sched Utilization Client Mem", MemA, "KByte"), - _ = report(Prefix++" Sched Utilization Server Mem", MemB, "KByte"), + _ = ?REPORT(Prefix++" Sched Utilization Client Mem", MemA, "KByte"), + _ = ?REPORT(Prefix++" Sched Utilization Server Mem", MemB, "KByte"), {comment, ClientComment} = - report(Prefix ++ " Sched Utilization Client" ++ Verdict, - SchedUtilClient, " %" ++ Verdict), + ?REPORT(Prefix ++ " Sched Utilization Client" ++ Verdict, + SchedUtilClient, Verdict ++ " | "), {comment, ServerComment} = - report(Prefix++" Sched Utilization Server" ++ Verdict, - SchedUtilServer, " %" ++ Verdict), - {comment, - "Client " ++ ClientComment ++ ", Server " ++ ServerComment ++ - " " ++ MemSuffix}. + ?REPORT(Prefix++" Sched Utilization Server" ++ Verdict, + SchedUtilServer, "per mille" ++ Verdict ++ " " ++ MemText, + round(ServerRealtime / length(ServerMsacc))), + {comment, ClientComment ++ ServerComment}. %% Runs on node A and spawns a server on node B %% We want to avoid getting busy_dist_port as it hides the true SU usage %% of the receiver and sender. -sched_util_runner(A, B, Effort, true, Config) -> - sched_util_runner(A, B, Effort, 100, Config); -sched_util_runner(A, B, Effort, false, Config) -> - sched_util_runner(A, B, Effort, 100, Config); -sched_util_runner(A, B, Effort, Senders, Config) -> +sched_util_runner(A, B, Effort, _SSL = true, Config) -> + sched_util_runner(A, B, Effort, 200, Config); +sched_util_runner(A, B, Effort, _SSL = false, Config) -> + %% We spawn 200 senders that send a message every 10 ms + %% which should produce a load of 20_000 msgs/s with + %% payload 1000 bytes each -> 20 MB/s + sched_util_runner(A, B, Effort, 200, Config); +sched_util_runner(A, B, Effort, Senders, Config) when is_integer(Senders) -> process_flag(trap_exit, true), - Payload = payload(100), + Delay = 2, + Payload = payload(1000), Time = 1000 * Effort, [A] = erpc:call(B, erlang, nodes, []), + MemStart = mem_start(B), ServerPids = [erlang:spawn_link( B, fun () -> throughput_server() end) @@ -849,13 +939,10 @@ sched_util_runner(A, B, Effort, Senders, Config) -> end end), erlang:system_monitor(self(),[busy_dist_port]), - %% We spawn 100 senders that send a message every 10 ms - %% which should produce a load of 10000 msgs/s with - %% payload 100 bytes each -> 1 MByte/s _Clients = [spawn_link( fun() -> - throughput_client(Pid, Payload) + throughput_client(Pid, Delay, Payload) end) || Pid <- ServerPids], %% receive after 1000 -> ok end, @@ -865,6 +952,7 @@ sched_util_runner(A, B, Effort, Senders, Config) -> fs_log(Config, "sched_util_runner.Client.msacc:start", ok), ClientMsaccStats = msacc:stats(), fs_log(Config, "sched_util_runner.Client.msacc.stats", ClientMsaccStats), + MemResult = mem_stop(MemStart), receive after 1000 -> ok end, ServerMsacc ! {done,Tag,self()}, ServerMsaccStats = @@ -874,9 +962,10 @@ sched_util_runner(A, B, Effort, Senders, Config) -> {'EXIT',ServerMsacc,Other} -> exit({other,ServerMsacc,Other}) end, + erlang:system_monitor(self(),[]), fs_log(Config, "sched_util_runner.ServerMsaccStats", ServerMsaccStats), %% - {ClientMsaccStats,ServerMsaccStats, busy_dist_port_msgs()}. + {ClientMsaccStats,ServerMsaccStats, busy_dist_port_msgs(), MemResult}. fs_log(Config, Name, Term) -> PrivDir = proplists:get_value(priv_dir, Config), @@ -886,7 +975,7 @@ fs_log(Config, Name, Term) -> io_lib:format( "~p~n", [{{erlang:unique_integer([positive,monotonic]), - os:system_time(1000000)}, + os:system_time(1000_000)}, Term}])), ok. @@ -925,9 +1014,9 @@ throughput_server() -> receive _ -> ok end, throughput_server(). -throughput_client(Pid, Payload) -> +throughput_client(Pid, Delay, Payload) -> Pid ! Payload, - receive after 10 -> throughput_client(Pid, Payload) end. + receive after Delay -> throughput_client(Pid, Delay, Payload) end. %%----------------- %% Mean load CPU margin @@ -944,39 +1033,52 @@ throughput_client(Pid, Payload) -> mean_load_cpu_margin(Config) -> run_nodepair_test(fun run_mlcm/6, Config). --define(MLCM_NO, 100). - run_mlcm(A, B, Prefix, Effort, HA, HB) -> + ClientServerPairs = 200, [] = ssl_apply(HA, erlang, nodes, []), [] = ssl_apply(HB, erlang, nodes, []), - MemStart = mem_start(HA, HB), pong = ssl_apply(HB, net_adm, ping, [A]), - Count = ssl_apply(HA, fun () -> mlcm(B, Effort) end), - {MemA, MemB, MemSuffix} = mem_stop(HA, HB, MemStart), - _ = report(Prefix++" CPU Margin Mem A", MemA, "KByte"), - _ = report(Prefix++" CPU Margin Mem B", MemB, "KByte"), - report( + {{Count, Bytes}, {MemA, MemB, MemText}, Time} = + ssl_apply(HA, fun () -> mlcm(B, Effort, ClientServerPairs) end), + ?REPORT_PAL("Data rate", per_s(Bytes bsr 10, Time), per_s("KB")), + _ = ?REPORT(Prefix++" CPU Margin Mem A", MemA, "KB"), + _ = ?REPORT(Prefix++" CPU Margin Mem B", MemB, "KB"), + ?REPORT( Prefix++" CPU Margin", - round(Count/?MLCM_NO/Effort), - "stones " ++ MemSuffix). - -mlcm(Node, Effort) -> - Payloads = mlcm_payloads(), + round(Count/ClientServerPairs/Effort), + "stones " ++ MemText, Time). + +mlcm(Node, Effort, ClientServerPairs) -> + Delay = 10, + BlockSize = 512, + Payloads = mlcm_payloads(BlockSize), + RunTime = 1000 * Effort, + MemStart = mem_start(Node), Clients = - [mlcm_client_start(Node, Payloads) || _ <- lists:seq(1, ?MLCM_NO)], - receive after 1000 * Effort -> ok end, + [mlcm_client_start(Node, Delay, Payloads) || + _ <- lists:seq(1, ClientServerPairs)], + receive after RunTime -> ok end, [Alias ! {Alias,stop} || {_Monitor, Alias} <- Clients], Counts = [receive - {'DOWN',Monitor,_,_,{Alias, Count}} -> - Count; + {'DOWN',Monitor,_,_,{Alias, Count, Bytes}} -> + {Count, Bytes}; {'DOWN',Monitor,_,_,Reason} -> exit(Reason) end || {Monitor, Alias} <- Clients], - lists:sum(Counts). + MemResult = mem_stop(MemStart), + {lists_sum_t2(Counts), MemResult, 1000 * RunTime}. -mlcm_payloads() -> - Bin = list_to_binary([rand:uniform(256) - 1 || _ <- lists:seq(1, 512)]), +lists_sum_t2(L) -> lists_sum_t2(L, 0, 0). +%% +lists_sum_t2([], Sa, Sb) -> + {Sa, Sb}; +lists_sum_t2([{A, B} | L], Sa, Sb) -> + lists_sum_t2(L, Sa + A, Sb + B). + +%% Returns #{ I := binary(size I * BlockSize) }, I = 0..255 +mlcm_payloads(BlockSize) -> + Bin = rand:bytes(BlockSize), lists:foldl( fun (N, Payloads) -> Payloads#{N => binary:copy(Bin, N)} @@ -984,7 +1086,7 @@ mlcm_payloads() -> %%------- -mlcm_client_start(Node, Payloads) -> +mlcm_client_start(Node, Delay, Payloads) -> Parent = self(), StartRef = make_ref(), {_,Monitor} = @@ -993,7 +1095,7 @@ mlcm_client_start(Node, Payloads) -> Alias = alias(), Parent ! {StartRef, Alias}, Server = mlcm_server_start(Node, Alias), - mlcm_client(Alias, Server, Payloads, 0) + mlcm_client(Alias, Server, Delay, Payloads, 0, 0) end), receive {StartRef, Alias} -> @@ -1002,27 +1104,29 @@ mlcm_client_start(Node, Payloads) -> exit(Reason) end. -mlcm_client(Alias, Server, Payloads, Seq) -> - {Time, Index} = mlcm_rand(), +mlcm_client(Alias, Server, Delay, Payloads, Seq, Bytes) -> + {Time, Index} = mlcm_rand(Delay), Payload = maps:get(Index, Payloads), + PayloadSize = byte_size(Payload), receive after Time -> ok end, Server ! {Alias, Seq, Payload}, receive - {Alias, Seq, Pl} when byte_size(Pl) =:= byte_size(Payload) -> - mlcm_client(Alias, Server, Payloads, Seq + 1); - {Alias, stop} = Msg -> - Server ! Msg, + {Alias, Seq, Pl} when byte_size(Pl) =:= PayloadSize -> + mlcm_client( + Alias, Server, Delay, Payloads, Seq + 1, Bytes + PayloadSize); + {Alias, stop} -> + Server ! {Alias, stop, Bytes}, receive after infinity -> ok end end. %% Approximate normal distribution Index with an average of 6 uniform bytes %% and use the 7:th byte for uniform Time -mlcm_rand() -> - mlcm_rand(6, rand:uniform(1 bsl (1+6)*8) - 1, 0). +mlcm_rand(Delay) -> + mlcm_rand(6, rand:uniform(Delay bsl (1 + 6*8)) - 1, 0). %% mlcm_rand(0, X, I) -> - Time = X + 1, % 1..256 - Index = abs((I - 3*256) div 3), % 0..255 upper half or normal distribution + Time = X + 1, % 1 .. 2*Delay; average Delay + Index = abs((I - 3*255) div 3), % 0..255 upper half or normal distribution {Time, Index}; mlcm_rand(N, X, I) -> mlcm_rand(N - 1, X bsr 8, I + (X band 255)). @@ -1039,12 +1143,12 @@ mlcm_server_start(Node, Alias) -> mlcm_server(Alias, Seq, Compute) -> receive - {Alias, Seq, _Payload} = Msg -> + {Alias, Seq, _Payload} = Msg when is_integer(Seq) -> Alias ! Msg, - mlcm_server(Alias, Seq + 1, Compute); - {Alias, stop} = Msg -> + mlcm_server(Alias, Seq + 1, Compute); + {Alias, stop, _Bytes} = Msg -> Compute ! Msg, - receive after infinity -> om end + receive after infinity -> ok end end. %%------- @@ -1058,7 +1162,7 @@ mlcm_compute_start(Alias) -> [link, {priority,low}]). mlcm_compute(Alias, State, Count) -> - receive {Alias, stop} -> exit({Alias, Count}) + receive {Alias, stop, Bytes} -> exit({Alias, Count, Bytes}) after 0 -> ok end, mlcm_compute( @@ -1075,90 +1179,90 @@ mlcm_compute(Alias, State, Count) -> throughput_0(Config) -> run_nodepair_test( fun (A, B, Prefix, Effort, HA, HB) -> - throughput(A, B, Prefix, HA, HB, 50000 * Effort, 0) + throughput(A, B, Prefix, HA, HB, 500_000 * Effort, 0) end, Config). throughput_64(Config) -> run_nodepair_test( fun (A, B, Prefix, Effort, HA, HB) -> - throughput(A, B, Prefix, HA, HB, 50000 * Effort, 64) + throughput(A, B, Prefix, HA, HB, 500_000 * Effort, 64) end, Config). throughput_1024(Config) -> run_nodepair_test( fun (A, B, Prefix, Effort, HA, HB) -> - throughput(A, B, Prefix, HA, HB, 10000 * Effort, 1024) + throughput(A, B, Prefix, HA, HB, 500_000 * Effort, 1024) end, Config). throughput_4096(Config) -> run_nodepair_test( fun (A, B, Prefix, Effort, HA, HB) -> - throughput(A, B, Prefix, HA, HB, 5000 * Effort, 4096) + throughput(A, B, Prefix, HA, HB, 200_000 * Effort, 4096) end, Config). throughput_16384(Config) -> run_nodepair_test( fun (A, B, Prefix, Effort, HA, HB) -> - throughput(A, B, Prefix, HA, HB, 1000 * Effort, 16384) + throughput(A, B, Prefix, HA, HB, 100_000 * Effort, 16384) end, Config). throughput_65536(Config) -> run_nodepair_test( fun (A, B, Prefix, Effort, HA, HB) -> - throughput(A, B, Prefix, HA, HB, 200 * Effort, 65536) + throughput(A, B, Prefix, HA, HB, 50_000 * Effort, 65536) end, Config). throughput_262144(Config) -> run_nodepair_test( fun (A, B, Prefix, Effort, HA, HB) -> - throughput(A, B, Prefix, HA, HB, 50 * Effort, 262144) + throughput(A, B, Prefix, HA, HB, 10_000 * Effort, 262144) end, Config). throughput_1048576(Config) -> run_nodepair_test( fun (A, B, Prefix, Effort, HA, HB) -> - throughput(A, B, Prefix, HA, HB, 20 * Effort, 1048576) + throughput(A, B, Prefix, HA, HB, 2_000 * Effort, 1048576) end, Config). throughput(A, B, Prefix, HA, HB, Packets, Size) -> [] = ssl_apply(HA, erlang, nodes, []), [] = ssl_apply(HB, erlang, nodes, []), - MemStart = mem_start(HA, HB), + io:format("~w(~p, ~p, ~p)~n", [throughput_runner, B, Packets, Size]), #{time := Time, client_msacc_stats := ClientMsaccStats, client_prof := ClientProf, server_msacc_stats := ServerMsaccStats, server_prof := ServerProf, server_gc_before := Server_GC_Before, - server_gc_after := Server_GC_After} = - ssl_apply(HA, fun () -> throughput_runner(A, B, Packets, Size) end), + server_gc_after := Server_GC_After, + mem_a := MemA, mem_b := MemB, mem_text := MemText} = + ssl_apply(HA, fun () -> throughput_runner(B, Packets, Size) end), [B] = ssl_apply(HA, erlang, nodes, []), [A] = ssl_apply(HB, erlang, nodes, []), - {MemA, MemB, MemSuffix} = mem_stop(HA, HB, MemStart), ClientMsaccStats =:= undefined orelse msacc:print(ClientMsaccStats), Overhead = 50 % Distribution protocol headers (empirical) (TLS+=54) + byte_size(erlang:term_to_binary([0|<<>>])), % Benchmark overhead Bytes = Packets * (Size + Overhead), - io:format("~w bytes, ~.4g s~n", [Bytes,Time/1000000]), + io:format("~w bytes, ~.4g s~n", [Bytes,Time/1000_000]), SizeString = integer_to_list(Size), - _ = report( - Prefix++" Throughput_" ++ SizeString ++ " Mem A", MemA, "KByte"), - _ = report( - Prefix++" Throughput_" ++ SizeString ++ " Mem B", MemB, "KByte"), + _ = ?REPORT( + Prefix++" Throughput_" ++ SizeString ++ " Mem A", MemA, "KB"), + _ = ?REPORT( + Prefix++" Throughput_" ++ SizeString ++ " Mem B", MemB, "KB"), ClientMsaccStats =:= undefined orelse - report( + ?REPORT( Prefix ++ " Sender_RelativeCoreLoad_" ++ SizeString, round(msacc:stats(system_runtime, ClientMsaccStats) - * 1000000 / Bytes), + * 1000_000 / Bytes), "ps/byte"), ServerMsaccStats =:= undefined orelse begin - report( + ?REPORT( Prefix ++ " Receiver_RelativeCoreLoad_" ++ SizeString, round(msacc:stats(system_runtime, ServerMsaccStats) - * 1000000 / Bytes), + * 1000_000 / Bytes), "ps/byte"), msacc:print(ServerMsaccStats) end, @@ -1166,13 +1270,18 @@ throughput(A, B, Prefix, HA, HB, Packets, Size) -> io:format("******* ServerProf:~n", []), prof_print(ServerProf), io:format("******* Server GC Before:~n~p~n", [Server_GC_Before]), io:format("******* Server GC After:~n~p~n", [Server_GC_After]), - Speed = round((Bytes * 1000000) / (1024 * Time)), - report( - Prefix ++ " Throughput_" ++ SizeString, Speed, "kB/s " ++ MemSuffix). + ?REPORT( + Prefix ++ " Throughput_" ++ SizeString, + per_s(Bytes bsr 10, Time), per_s("KB") ++ " " ++ MemText, + Time). %% Runs on node A and spawns a server on node B -throughput_runner(A, B, Rounds, Size) -> +%% +%% Can be run on two manually started nodes outside CommonTest +%% +throughput_runner(B, Rounds, Size) -> Payload = payload(Size), + A = node(), [A] = erpc:call(B, erlang, nodes, []), ClientPid = self(), ServerPid = @@ -1189,8 +1298,10 @@ throughput_runner(A, B, Rounds, Size) -> ok end, prof_start(), + MemStart = mem_start(B), #{time := Time} = Result = throughput_client(ServerPid, ServerMon, Payload, Rounds), + {MemA, MemB, MemText} = mem_stop(MemStart), prof_stop(), MsaccStats = case msacc_available() of @@ -1202,9 +1313,12 @@ throughput_runner(A, B, Rounds, Size) -> undefined end, Prof = prof_end(), - Result#{time := microseconds(Time), + Result#{time := Time, client_msacc_stats => MsaccStats, - client_prof => Prof}. + client_prof => Prof, + mem_a => MemA, + mem_b => MemB, + mem_text => MemText}. throughput_server(Pid, N) -> GC_Before = get_server_gc_info(), @@ -1266,7 +1380,7 @@ throughput_client(Pid, Mon, Payload, N) -> throughput_client_loop(_Pid, Mon, _Payload, 0, StartTime) -> receive {'DOWN', Mon, _, _, #{} = Result} -> - Result#{time => elapsed_time(StartTime)}; + Result#{time => microseconds(elapsed_time(StartTime))}; {'DOWN', Mon, _, _, Other} -> exit(Other) end; @@ -1341,7 +1455,8 @@ run_nodepair_test(TestFun, Config) -> HA = start_ssl_node({client,1}, Config), try HB = start_ssl_node(server, Config), - try TestFun(A, B, Prefix, Effort, HA, HB) + try + TestFun(A, B, Prefix, Effort, HA, HB) after stop_ssl_node(server, HB, Config) end @@ -1349,7 +1464,7 @@ run_nodepair_test(TestFun, Config) -> stop_ssl_node({client,1}, HA, Config) end. -ssl_apply(Handle, M, F, Args) -> +ssl_apply({Handle,_PerfTag}, M, F, Args) -> case ssl_dist_test_lib:apply_on_ssl_node(Handle, M, F, Args) of {'EXIT',Reason} -> error(Reason); @@ -1357,7 +1472,7 @@ ssl_apply(Handle, M, F, Args) -> Result end. -ssl_apply(Handle, Fun) -> +ssl_apply({Handle,_PerfTag}, Fun) -> case ssl_dist_test_lib:apply_on_ssl_node(Handle, Fun) of {'EXIT',Reason} -> error(Reason); @@ -1365,6 +1480,8 @@ ssl_apply(Handle, Fun) -> Result end. + + start_ssl_node(Spec, Config) -> start_ssl_node(Spec, Config, 0). %% @@ -1372,22 +1489,77 @@ start_ssl_node({client, N}, Config, Verbose) -> Name = proplists:get_value({client_name, N}, Config), Args = get_node_args({client_dist_args, N}, Config), Pa = filename:dirname(code:which(?MODULE)), - ssl_dist_test_lib:start_ssl_node( - Name, "-pa " ++ Pa ++ " +Muacul 0 " ++ Args, Verbose); + {EmuPerfArg, PerfStarter} = perf_starter(Name, Config), + Handle = + ssl_dist_test_lib:start_ssl_node( + Name, + "-pa " ++ Pa ++ EmuPerfArg ++ " +Muacul 0 +IOs false " ++ Args, + Verbose), + PerfStarter(Handle); start_ssl_node(server, Config, Verbose) -> Name = proplists:get_value(server_name, Config), Args = get_node_args(server_dist_args, Config), Pa = filename:dirname(code:which(?MODULE)), ServerNode = proplists:get_value(server_node, Config), - erpc:call( - ServerNode, ssl_dist_test_lib, start_ssl_node, - [Name, "-pa " ++ Pa ++ " +Muacul 0 " ++ Args, Verbose]). + {EmuPerfArg, PerfStarter} = perf_starter(Name, Config), + Handle = + erpc:call( + ServerNode, ssl_dist_test_lib, start_ssl_node, + [Name, + "-pa " ++ Pa ++ EmuPerfArg ++ " +Muacul 0 +IOs false " ++ Args, + Verbose]), + PerfStarter(Handle). + +perf_starter(Name, Config) -> + Parent = self(), + PerfTag = make_ref(), + case proplists:lookup(perf_record, Config) of + {_, true} -> + {" +JPperf true", + fun (Handle) -> + NodeHandle = {Handle,PerfTag}, + NodePid = ssl_apply(NodeHandle, os, getpid, []), + %% The --output option is actually required since + %% it seems perf record, when facing a pipe as output + %% will per default write the collected data to it + PerfCmd = + "perf record -p " ++ NodePid ++ " " + "--output=" ++ Name ++ ".data --call-graph=fp", + ?CT_PAL("~nPerfCmd: ~s~n", [PerfCmd]), + _ = spawn_link( + fun () -> + Parent ! {PerfTag, os:cmd(PerfCmd)}, + void + end), + NodeHandle + end}; + _ -> + {"", + fun (Handle) -> + Parent ! {PerfTag, none}, + {Handle,PerfTag} + end} + end. -stop_ssl_node({client, _}, HA, _Config) -> - ssl_dist_test_lib:stop_ssl_node(HA); -stop_ssl_node(server, HB, Config) -> +stop_ssl_node({client, _}, {HA,PerfTag}, _Config) -> + Result = ssl_dist_test_lib:stop_ssl_node(HA), + perf_result(PerfTag), + Result; +stop_ssl_node(server, {HB,PerfTag}, Config) -> ServerNode = proplists:get_value(server_node, Config), - erpc:call(ServerNode, ssl_dist_test_lib, stop_ssl_node, [HB]). + Result = erpc:call(ServerNode, ssl_dist_test_lib, stop_ssl_node, [HB]), + perf_result(PerfTag), + Result. + +perf_result(PerfTag) -> + receive + {PerfTag, none} -> ok; + {PerfTag, PerfResult} -> + ?CT_PAL("~n" + "Perf CWD: ~s~n" + "Perf result:~n~s~n", + [element(2, file:get_cwd()), PerfResult]), ok + end. get_node_args(Tag, Config) -> case proplists:get_value(ssl_dist, Config) of @@ -1434,6 +1606,7 @@ create_binary(Size, Bin) -> NextSize = Size - 1, create_binary(NextSize, <>). + start_time() -> erlang:system_time(). @@ -1443,40 +1616,26 @@ elapsed_time(StartTime) -> microseconds(Time) -> erlang:convert_time_unit(Time, native, microsecond). -report(Name, Value, Suffix) -> - ?CT_PAL("~s: ~w ~s", [Name, Value, Suffix]), - ct_event:notify( - #event{ - name = benchmark_data, - data = [{value, Value}, {suite, "ssl_dist"}, {name, Name}]}), - {comment, term_to_string(Value) ++ " " ++ Suffix}. - -term_to_string(Term) -> - unicode:characters_to_list( - io_lib:write(Term, [{encoding, unicode}])). msacc_available() -> msacc:available(). +mem_start(B) -> + MemStartAB = {mem(), erpc:call(B, fun mem/0)}, + {MemStartAB, B}. -mem_start(HA, HB) -> - MemA = ssl_apply(HA, fun mem/0), - MemB = ssl_apply(HB, fun mem/0), - {MemA, MemB}. - -mem_stop(HA, HB, Mem1) -> - MemA2 = ssl_apply(HA, fun mem/0), - MemB2 = ssl_apply(HB, fun mem/0), - mem_result(mem_diff(Mem1, {MemA2, MemB2})). +mem_stop({MemStartAB, B}) -> + MemStopAB = {mem(), erpc:call(B, fun mem/0)}, + mem_result(mem_diff(MemStartAB, MemStopAB)). mem_diff({MemA1, MemB1}, {MemA2, MemB2}) -> {MemA2 - MemA1, MemB2 - MemB1}. mem_result({MemDiffA, MemDiffB}) -> - MemSuffix = - io_lib:format( - "~.5g|~.5g MByte", [MemDiffA / (1 bsl 20), MemDiffB / (1 bsl 20)]), - {round(MemDiffA / (1 bsl 10)), round(MemDiffB / (1 bsl 10)), MemSuffix}. + MemA = round(MemDiffA / (1 bsl 10)), + MemB = round(MemDiffB / (1 bsl 10)), + MemText = io_lib:format("~w|~w KB", [MemA, MemB]), + {MemA, MemB, MemText}. memory(Type) -> try erlang:memory(Type) From f97d35d3931e79d51922386d90461653fa4c455e Mon Sep 17 00:00:00 2001 From: Raimo Niskanen Date: Fri, 17 Jan 2025 16:19:13 +0100 Subject: [PATCH 3/6] Remove buffer optimization and clean up code The buffer optimization to retain a buffer in the NIF when a short receive on a stream happens complicate the code and may not give any important improvement. At least removing it made it possible to simplify the code a lot. --- erts/emulator/nifs/common/prim_socket_int.h | 5 +- erts/emulator/nifs/common/prim_socket_nif.c | 6 +- erts/emulator/nifs/unix/unix_socket_syncio.c | 822 +++++++------------ 3 files changed, 285 insertions(+), 548 deletions(-) diff --git a/erts/emulator/nifs/common/prim_socket_int.h b/erts/emulator/nifs/common/prim_socket_int.h index 1343b7ff285a..f7d43727e86d 100644 --- a/erts/emulator/nifs/common/prim_socket_int.h +++ b/erts/emulator/nifs/common/prim_socket_int.h @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2022-2024. All Rights Reserved. + * Copyright Ericsson AB 2022-2025. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -477,9 +477,6 @@ typedef struct { #endif ESockRequestQueue readersQ; - ErlNifBinary readBuf; - ssize_t readResult; - ESockCounter readPkgCnt; ESockCounter readPkgMax; ESockCounter readPkgMaxCnt; diff --git a/erts/emulator/nifs/common/prim_socket_nif.c b/erts/emulator/nifs/common/prim_socket_nif.c index 76f1eb6cfb4d..3fd864362f51 100644 --- a/erts/emulator/nifs/common/prim_socket_nif.c +++ b/erts/emulator/nifs/common/prim_socket_nif.c @@ -1,7 +1,7 @@ /* * %CopyrightBegin% * - * Copyright Ericsson AB 2018-2024. All Rights Reserved. + * Copyright Ericsson AB 2018-2025. All Rights Reserved. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -12009,10 +12009,6 @@ ESockDescriptor* esock_alloc_descriptor(SOCKET sock) descP->readersQ.first = NULL; descP->readersQ.last = NULL; - descP->readBuf.size = 0; - descP->readBuf.data = NULL; - descP->readResult = 0; - descP->readPkgCnt = 0; descP->readPkgMax = 0; descP->readPkgMaxCnt = 0; diff --git a/erts/emulator/nifs/unix/unix_socket_syncio.c b/erts/emulator/nifs/unix/unix_socket_syncio.c index 9a7398616312..f200d37dae26 100644 --- a/erts/emulator/nifs/unix/unix_socket_syncio.c +++ b/erts/emulator/nifs/unix/unix_socket_syncio.c @@ -323,68 +323,68 @@ static ERL_NIF_TERM essio_sendfile_ok(ErlNifEnv* env, size_t count); #endif -static ERL_NIF_TERM recv_check_result(ErlNifEnv* env, - ESockDescriptor* descP, - ssize_t toRead, - int saveErrno, - ssize_t readResult, - ERL_NIF_TERM sockRef, - ERL_NIF_TERM recvRef); -static ERL_NIF_TERM recvfrom_check_result(ErlNifEnv* env, - ESockDescriptor* descP, - ssize_t read, - int saveErrno, - ErlNifBinary* bufP, - ESockAddress* fromAddrP, - SOCKLEN_T fromAddrLen, - ERL_NIF_TERM sockRef, - ERL_NIF_TERM recvRef); +static BOOLEAN_T recv_check_entry(ErlNifEnv *env, + ESockDescriptor *descP, + ERL_NIF_TERM recvRef, + ERL_NIF_TERM *retP); static BOOLEAN_T recv_check_reader(ErlNifEnv* env, ESockDescriptor* descP, ERL_NIF_TERM ref, ERL_NIF_TERM* checkResult); +static BOOLEAN_T recv_check_result(ErlNifEnv *env, + ESockDescriptor *descP, + ERL_NIF_TERM sockRef, + ERL_NIF_TERM recvRef, + ssize_t readResult, + int saveErrno, + ERL_NIF_TERM *retP); static ERL_NIF_TERM recv_check_full(ErlNifEnv* env, ESockDescriptor* descP, - ssize_t toRead, ERL_NIF_TERM sockRef, - ERL_NIF_TERM recvRef); + ERL_NIF_TERM recvRef, + ssize_t len, + ErlNifBinary *bufP); static ERL_NIF_TERM recv_check_full_maybe_done(ErlNifEnv* env, ESockDescriptor* descP, ERL_NIF_TERM sockRef, - ERL_NIF_TERM recvRef); + ERL_NIF_TERM recvRef, + ErlNifBinary *bufP); static ERL_NIF_TERM recv_check_full_done(ErlNifEnv* env, ESockDescriptor* descP, ERL_NIF_TERM sockRef, - ERL_NIF_TERM recvRef); + ERL_NIF_TERM recvRef, + ErlNifBinary *bufP); static ERL_NIF_TERM recv_check_select_done(ErlNifEnv* env, ESockDescriptor* descP, ERL_NIF_TERM sockRef, - ERL_NIF_TERM recvRef); + ERL_NIF_TERM recvRef, + ErlNifBinary *bufP); static ERL_NIF_TERM recv_check_fail(ErlNifEnv* env, ESockDescriptor* descP, - int saveErrno, ERL_NIF_TERM sockRef, - ERL_NIF_TERM recvRef); + ERL_NIF_TERM recvRef, + int saveErrno); static ERL_NIF_TERM recv_check_fail_gen(ErlNifEnv* env, ESockDescriptor* descP, - int saveErrno, - ERL_NIF_TERM sockRef); + ERL_NIF_TERM sockRef, + int saveErrno); static ERL_NIF_TERM recv_check_fail_econnreset(ErlNifEnv* env, ESockDescriptor* descP, - ERL_NIF_TERM sockRef, - ERL_NIF_TERM recvRef); + ERL_NIF_TERM sockRef); static ERL_NIF_TERM recv_check_select(ErlNifEnv* env, ESockDescriptor* descP, ERL_NIF_TERM sockRef, ERL_NIF_TERM recvRef); static ERL_NIF_TERM recv_check_partial(ErlNifEnv* env, ESockDescriptor* descP, - ssize_t toRead, ERL_NIF_TERM sockRef, - ERL_NIF_TERM recvRef); + ERL_NIF_TERM recvRef, + ssize_t len, + ErlNifBinary *bufP); static ERL_NIF_TERM recv_check_partial_done(ErlNifEnv* env, ESockDescriptor* descP, ERL_NIF_TERM sockRef, + ErlNifBinary* bufP, ERL_NIF_TERM returnTag); static void recv_init_current_reader(ErlNifEnv* env, ESockDescriptor* descP, @@ -397,24 +397,6 @@ static void recv_error_current_reader(ErlNifEnv* env, ERL_NIF_TERM sockRef, ERL_NIF_TERM reason); -static ERL_NIF_TERM recvmsg_check_result(ErlNifEnv* env, - ESockDescriptor* descP, - ssize_t read, - int saveErrno, - struct msghdr* msgHdrP, - ErlNifBinary* dataBufP, - ErlNifBinary* ctrlBufP, - ERL_NIF_TERM sockRef, - ERL_NIF_TERM recvRef); -static ERL_NIF_TERM recvmsg_check_msg(ErlNifEnv* env, - ESockDescriptor* descP, - ssize_t read, - struct msghdr* msgHdrP, - ErlNifBinary* dataBufP, - ErlNifBinary* ctrlBufP, - ERL_NIF_TERM sockRef); - - static ERL_NIF_TERM essio_ioctl_gifconf(ErlNifEnv* env, ESockDescriptor* descP); /* esock_ioctl_fionread */ @@ -2753,199 +2735,64 @@ ERL_NIF_TERM essio_recv(ErlNifEnv* env, ssize_t len, int flags) { - ERL_NIF_TERM readerCheck; - int save_errno; + int saveErrno; + ErlNifBinary buf; ssize_t readResult; - /* Receive as much as requested, or if 0 is requested use - * the default size (configured) - */ - size_t recvLen = (len != 0 ? len : descP->rBufSz); + size_t bufSz = (len != 0 ? len : descP->rBufSz); // 0 means default + ERL_NIF_TERM ret; SSDBG( descP, ("UNIX-ESSIO", "essio_recv {%d} -> entry with" - "\r\n count,size: (%ld:%u:%lu)" + "\r\n bufSz: %lu x %u (%ld)" "\r\n", descP->sock, - (long) len, descP->rNumCnt, (unsigned long) recvLen) ); - - if (! IS_OPEN(descP->readState)) - return esock_make_error_closed(env); + (unsigned long) bufSz, descP->rNumCnt, (long) len) ); - /* Accept and Read uses the same select flag - * so they can not be simultaneous - */ - if (descP->currentAcceptorP != NULL) - return esock_make_error_invalid(env, esock_atom_state); - - /* Ensure that we either have no current reader or that we are it, - * or enqueue this process if there is a current reader */ - if (! recv_check_reader(env, descP, recvRef, &readerCheck)) { + /* Check basic state and current reader */ + if (! recv_check_entry(env, descP, recvRef, &ret)) { SSDBG( descP, - ("UNIX-ESSIO", "essio_recv {%d} -> reader check failed: " - "\r\n %T" - "\r\n", descP->sock, readerCheck) ); - return readerCheck; + ("UNIX-ESSIO", "essio_recv {%d} -> entry failed: " + "\r\n %T\r\n", descP->sock, ret) ); + return ret; } - /* Allocate the receive buffer */ - if (descP->readBuf.data == NULL) { - ESOCK_ASSERT( ALLOC_BIN(recvLen, &descP->readBuf) ); - descP->readResult = 0; - } else { - /* We already have a buffer. - * Happens for ERRNO_BLOCK when we return 'select' - we keep - * the empty buffer, and for SOCK_STREAM when a recv - * for a specified length didn't fill the buffer (partial recv). - */ - if ((len == 0) && (0 < descP->readResult)) { - /* The request is for any amount of data - * - deliver what we have - */ - ESOCK_ASSERT( REALLOC_BIN(&descP->readBuf, descP->readResult) ); - /* Return {ok|select, Bin} */ - return recv_check_full_done(env, descP, sockRef, recvRef); - } else if (descP->readBuf.size < recvLen) { - /* Our buffer is too small */ - ESOCK_ASSERT( REALLOC_BIN(&descP->readBuf, recvLen) ); - } else if (recvLen < descP->readResult) { - /* We have more buffered than what is requested - * - deliver the start of the buffer - * and copy the rest to a new buffer */ - ErlNifBinary buf; - ERL_NIF_TERM ret; - size_t keepLen = descP->readResult - recvLen; - ESOCK_ASSERT( ALLOC_BIN(keepLen, &buf) ); - sys_memcpy(buf.data, descP->readBuf.data + recvLen, - keepLen); - ESOCK_ASSERT( REALLOC_BIN(&descP->readBuf, recvLen) ); - /* Return {ok|select, Bin} */ - ret = recv_check_full_done(env, descP, sockRef, recvRef); - descP->readBuf = buf; - descP->readResult = keepLen; - return ret; - } else if (descP->readResult < recvLen) { - /* The request is for more data than we have buffered - * - we need to receive more data */ - if (recvLen < descP->readBuf.size) { - /* Our buffer is too large */ - ESOCK_ASSERT( REALLOC_BIN(&descP->readBuf, recvLen) ); - } /* else our buffer is just the right size */ - } else { - /* Our buffer contains exactly what is requested - * - just deliver it all - */ - /* Return {ok|select, Bin} */ - return recv_check_full_done(env, descP, sockRef, recvRef); - } - } - ESOCK_ASSERT( recvLen == descP->readBuf.size ); - recvLen = descP->readBuf.size - descP->readResult; + ESOCK_ASSERT( ALLOC_BIN(bufSz, &buf) ); - // If it fails (read = -1), we need errno... SSDBG( descP, ("UNIX-ESSIO", "essio_recv {%d} -> try read (%lu)\r\n", - descP->sock, (unsigned long) recvLen) ); + descP->sock, (unsigned long) len) ); ESOCK_CNT_INC(env, descP, sockRef, esock_atom_read_tries, &descP->readTries, 1); - readResult = - sock_recv(descP->sock, - descP->readBuf.data + descP->readResult, recvLen, - flags); - if (ESOCK_IS_ERROR(readResult)) { - save_errno = sock_errno(); - } else { - save_errno = 0; // The value does not actually matter in this case - } + /* recv() */ + readResult = sock_recv(descP->sock, buf.data, buf.size, flags); + saveErrno = ESOCK_IS_ERROR(readResult) ? sock_errno() : 0; SSDBG( descP, ("UNIX-ESSIO", "essio_recv {%d} -> readResult: %ld (%d)\r\n", - descP->sock, (long) readResult, save_errno) ); - - return recv_check_result(env, descP, len, save_errno, readResult, - sockRef, recvRef); -} - - -/* *** recv_check_result *** - * - * Process the result of a call to recv. - */ -static -ERL_NIF_TERM recv_check_result(ErlNifEnv* env, - ESockDescriptor* descP, - ssize_t toRead, - int saveErrno, - ssize_t readResult, - ERL_NIF_TERM sockRef, - ERL_NIF_TERM recvRef) -{ - ERL_NIF_TERM res; - - SSDBG( descP, - ("UNIX-ESSIO", "recv_check_result(%T) {%d} -> entry with" - "\r\n toRead: %ld" - "\r\n saveErrno: %d" - "\r\n readResult: %ld" - "\r\n recvRef: %T" - "\r\n", sockRef, descP->sock, - (long) toRead, saveErrno, (long) readResult, recvRef) ); - - - /* - * - * We need to handle readResult = 0 for other type(s) (DGRAM) when - * its actually valid to read 0 bytes. - * - * - */ - - if ((readResult == 0) && (descP->type == SOCK_STREAM)) { - ERL_NIF_TERM reason = esock_atom_closed; - - /* Stream closed from other side - * - * When a stream socket peer has performed an orderly shutdown, - * the return value will be 0 (the traditional "end-of-file" return). - * - * *We* do never actually try to read 0 bytes! - * - * We must also notify any waiting readers! - */ - - if (0 < descP->readResult) { - ERL_NIF_TERM data; - /* We had data buffered */ - ESOCK_ASSERT( REALLOC_BIN(&descP->readBuf, descP->readResult) ); - data = MKBIN(env, &descP->readBuf); - descP->readBuf.data = NULL; - /* {error, {closed, Bin}} */ - res = esock_make_error(env, MKT2(env, reason, data)); - } else { - /* {error, closed} */ - res = esock_make_error(env, reason); - } + descP->sock, (long) readResult, saveErrno) ); - recv_error_current_reader(env, descP, sockRef, reason); - - } else if (readResult < 0) { - - /* +++ Error handling +++ */ - - /* 'timeout' | {error, SaveErrno} */ - res = recv_check_fail(env, descP, saveErrno, sockRef, recvRef); + /* Check for errors and end of stream */ + if (! recv_check_result(env, descP, sockRef, recvRef, + readResult, saveErrno, &ret) ) { + FREE_BIN(&buf); + return ret; + } + /* readResult >= 0 */ + ESOCK_ASSERT( readResult <= buf.size ); - } else if ((descP->readResult += readResult) < descP->readBuf.size) { + if (readResult < buf.size) { /* +++ We did not fill the buffer +++ */ SSDBG( descP, ("UNIX-ESSIO", - "recv_check_result(%T) {%d} -> [%lu] " + "essio_recv {%d} -> [%lu] " "did not fill the buffer (%ld)\r\n", - sockRef, descP->sock, (unsigned long) descP->readBuf.size, - (long) descP->readResult) ); + descP->sock, (unsigned long) buf.size, + (long) readResult) ); - res = recv_check_partial(env, descP, toRead, sockRef, recvRef); + ESOCK_ASSERT( REALLOC_BIN(&buf, readResult) ); + return recv_check_partial(env, descP, sockRef, recvRef, len, &buf); } else { @@ -2953,18 +2800,14 @@ ERL_NIF_TERM recv_check_result(ErlNifEnv* env, SSDBG( descP, ("UNIX-ESSIO", - "recv_check_result(%T) {%d} -> [%lu] filled the buffer\r\n", - sockRef, descP->sock, - (unsigned long) descP->readBuf.size) ); + "essio_recv {%d} -> [%lu] filled the buffer\r\n", + descP->sock, (unsigned long) buf.size) ); - res = recv_check_full(env, descP, toRead, sockRef, recvRef); + return recv_check_full(env, descP, sockRef, recvRef, len, &buf); } - - return res; } - /* ======================================================================== * The (read) buffer handling *must* be optimized! * But for now we make it easy for ourselves by @@ -2980,152 +2823,76 @@ ERL_NIF_TERM essio_recvfrom(ErlNifEnv* env, int flags) { ESockAddress fromAddr; - SOCKLEN_T addrLen; - ssize_t read; - int save_errno; + SOCKLEN_T fromAddrLen; + ssize_t readResult; + int saveErrno; ErlNifBinary buf; - ERL_NIF_TERM readerCheck; - size_t bufSz = (len != 0 ? len : descP->rBufSz); + size_t bufSz = (len != 0 ? len : descP->rBufSz); // 0 means default + ERL_NIF_TERM ret; SSDBG( descP, ("UNIX-ESSIO", "essio_recvfrom {%d} -> entry with" - "\r\n bufSz: %d" - "\r\n", descP->sock, bufSz) ); - - if (! IS_OPEN(descP->readState)) - return esock_make_error_closed(env); - - /* Accept and Read uses the same select flag - * so they can not be simultaneous - */ - if (descP->currentAcceptorP != NULL) - return esock_make_error_invalid(env, esock_atom_state); + "\r\n bufSz: %lu (%ld)" + "\r\n", descP->sock, + (unsigned long) bufSz, (long) len) ); - /* Ensure that we either have no current reader or that we are it, - * or enqueue this process if there is a current reader */ - if (! recv_check_reader(env, descP, recvRef, &readerCheck)) { + /* Check basic state and current reader */ + if (! recv_check_entry(env, descP, recvRef, &ret)) { SSDBG( descP, - ("UNIX-ESSIO", "essio_recv {%d} -> reader check failed: " - "\r\n %T\r\n", descP->sock, readerCheck) ); - return readerCheck; + ("UNIX-ESSIO", "essio_recvfrom {%d} -> entry failed: " + "\r\n %T\r\n", descP->sock, ret) ); + return ret; } - /* Allocate a buffer: - * Either as much as we want to read or (if zero (0)) use the "default" - * size (what has been configured). - */ + /* Allocate the receive buffer */ ESOCK_ASSERT( ALLOC_BIN(bufSz, &buf) ); ESOCK_CNT_INC(env, descP, sockRef, esock_atom_read_tries, &descP->readTries, 1); - addrLen = sizeof(fromAddr); - sys_memzero((char*) &fromAddr, addrLen); + fromAddrLen = sizeof(fromAddr); + sys_memzero((char*) &fromAddr, fromAddrLen); - read = sock_recvfrom(descP->sock, buf.data, buf.size, flags, - &fromAddr.sa, &addrLen); - if (ESOCK_IS_ERROR(read)) - save_errno = sock_errno(); - else - save_errno = 0; // The value does not actually matter in this case - - return recvfrom_check_result(env, descP, read, save_errno, - &buf, &fromAddr, addrLen, - sockRef, recvRef); -} + /* recvfrom() */ + readResult = sock_recvfrom(descP->sock, buf.data, buf.size, flags, + &fromAddr.sa, &fromAddrLen); + saveErrno = ESOCK_IS_ERROR(readResult) ? sock_errno() : 0; + /* Check for errors and end of stream */ + if (! recv_check_result(env, descP, sockRef, recvRef, + readResult, saveErrno, &ret) ) { + FREE_BIN(&buf); + return ret; + } + /* readResult >= 0 */ + ESOCK_ASSERT( readResult <= buf.size ); -/* The recvfrom function delivers one (1) message. If our buffer - * is too small, the message will be truncated. So, regardless - * if we filled the buffer or not, we have got what we are going - * to get regarding this message. - */ - -static -ERL_NIF_TERM recvfrom_check_result(ErlNifEnv* env, - ESockDescriptor* descP, - ssize_t read, - int saveErrno, - ErlNifBinary* bufP, - ESockAddress* fromAddrP, - SOCKLEN_T fromAddrLen, - ERL_NIF_TERM sockRef, - ERL_NIF_TERM recvRef) -{ - ERL_NIF_TERM data, res; - - SSDBG( descP, - ("UNIX-ESSIO", "recvfrom_check_result(%T) {%d} -> entry with" - "\r\n read: %ld" - "\r\n saveErrno: %d" - "\r\n recvRef: %T" - "\r\n", sockRef, descP->sock, - (long) read, saveErrno, recvRef) ); - - /* - * - * We need to handle read = 0 for non_stream socket type(s) when - * its actually valid to read 0 bytes. + /* The recvfrom function delivers one (1) message. If our buffer + * is too small, the message will be truncated. So, regardless + * if we filled the buffer or not, we have got what we are going + * to get regarding this message. * - * + * Encode the message and source address */ - if ((read == 0) && (descP->type == SOCK_STREAM)) { - - /* - * When a stream socket peer has performed an orderly shutdown, - * the return value will be 0 (the traditional "end-of-file" return). - * - * *We* do never actually try to read 0 bytes! - */ - - ESOCK_CNT_INC(env, descP, sockRef, - esock_atom_read_fails, &descP->readFails, 1); - - FREE_BIN(bufP); - - /* Return {error, closed} */ - return esock_make_error_closed(env); + if (readResult < buf.size) { + ESOCK_ASSERT( REALLOC_BIN(&buf, readResult) ); } - if (read < 0) { - - /* +++ Error handling +++ */ - - res = recv_check_fail(env, descP, saveErrno, sockRef, recvRef); - FREE_BIN(bufP); - - } else { - - /* +++ We successfully got a message - time to encode the address +++ */ - - ERL_NIF_TERM eSockAddr; - - esock_encode_sockaddr(env, - fromAddrP, fromAddrLen, - &eSockAddr); - - if (read != bufP->size) { - ESOCK_ASSERT( REALLOC_BIN(bufP, read) ); - } - data = MKBIN(env, bufP); - - ESOCK_CNT_INC(env, descP, sockRef, esock_atom_read_pkg, - &descP->readPkgCnt, 1); - ESOCK_CNT_INC(env, descP, sockRef, esock_atom_read_byte, - &descP->readByteCnt, read); - - recv_update_current_reader(env, descP, sockRef); - - res = esock_make_ok2(env, MKT2(env, eSockAddr, data)); + ESOCK_CNT_INC(env, descP, sockRef, esock_atom_read_pkg, + &descP->readPkgCnt, 1); + ESOCK_CNT_INC(env, descP, sockRef, esock_atom_read_byte, + &descP->readByteCnt, buf.size); - } + recv_update_current_reader(env, descP, sockRef); - return res; + esock_encode_sockaddr(env, + &fromAddr, fromAddrLen, + &ret); + return esock_make_ok2(env, MKT2(env, ret, MKBIN(env, &buf))); } - /* ======================================================================== * The (read) buffer handling *must* be optimized! * But for now we make it easy for ourselves by @@ -3142,15 +2909,15 @@ ERL_NIF_TERM essio_recvmsg(ErlNifEnv* env, int flags) { SOCKLEN_T addrLen; - ssize_t read; - int save_errno; + ssize_t readResult; + int saveErrno; size_t bufSz = (bufLen != 0 ? bufLen : descP->rBufSz); size_t ctrlSz = (ctrlLen != 0 ? ctrlLen : descP->rCtrlSz); struct msghdr msgHdr; SysIOVec iov[1]; // Shall we always use 1? ErlNifBinary data[1]; // Shall we always use 1? ErlNifBinary ctrl; - ERL_NIF_TERM readerCheck; + ERL_NIF_TERM ret; ESockAddress addr; SSDBG( descP, ("UNIX-ESSIO", "essio_recvmsg {%d} -> entry with" @@ -3160,22 +2927,12 @@ ERL_NIF_TERM essio_recvmsg(ErlNifEnv* env, (unsigned long) bufSz, (long) bufLen, (unsigned long) ctrlSz, (long) ctrlLen) ); - if (! IS_OPEN(descP->readState)) - return esock_make_error_closed(env); - - /* Accept and Read uses the same select flag - * so they can not be simultaneous - */ - if (descP->currentAcceptorP != NULL) - return esock_make_error_invalid(env, esock_atom_state); - - /* Ensure that we either have no current reader or that we are it, - * or enqueue this process if there is a current reader */ - if (! recv_check_reader(env, descP, recvRef, &readerCheck)) { + /* Check basic state and current reader */ + if (! recv_check_entry(env, descP, recvRef, &ret)) { SSDBG( descP, - ("UNIX-ESSIO", "essio_recvmsg {%d} -> reader check failed: " - "\r\n %T\r\n", descP->sock, readerCheck) ); - return readerCheck; + ("UNIX-ESSIO", "essio_recvmsg {%d} -> entry failed: " + "\r\n %T\r\n", descP->sock, ret) ); + return ret; } /* Allocate the (msg) data buffer: @@ -3203,110 +2960,24 @@ ERL_NIF_TERM essio_recvmsg(ErlNifEnv* env, msgHdr.msg_control = ctrl.data; msgHdr.msg_controllen = ctrl.size; - read = sock_recvmsg(descP->sock, &msgHdr, flags); - if (ESOCK_IS_ERROR(read)) - save_errno = sock_errno(); - else - save_errno = 0; // The value does not actually matter in this case - - return recvmsg_check_result(env, descP, read, save_errno, - &msgHdr, - data, // Needed for iov encode - &ctrl, // Needed for ctrl header encode - sockRef, recvRef); -} - - -/* *** recvmsg_check_result *** - * - * The recvmsg function delivers one (1) message. If our buffer - * is to small, the message will be truncated. So, regardless - * if we filled the buffer or not, we have got what we are going - * to get regarding this message. - */ -static -ERL_NIF_TERM recvmsg_check_result(ErlNifEnv* env, - ESockDescriptor* descP, - ssize_t read, - int saveErrno, - struct msghdr* msgHdrP, - ErlNifBinary* dataBufP, - ErlNifBinary* ctrlBufP, - ERL_NIF_TERM sockRef, - ERL_NIF_TERM recvRef) -{ - ERL_NIF_TERM res; - - SSDBG( descP, - ("UNIX-ESSIO", "recvmsg_check_result(%T) {%d} -> entry with" - "\r\n read: %ld" - "\r\n saveErrno: %d" - "\r\n recvRef: %T" - "\r\n", sockRef, descP->sock, - (long) read, saveErrno, recvRef) ); - - - /* - * - * We need to handle read = 0 for non_stream socket type(s) when - * its actually valid to read 0 bytes. - * - * - */ - - if ((read == 0) && (descP->type == SOCK_STREAM)) { - - /* - * When a stream socket peer has performed an orderly shutdown, - * the return value will be 0 (the traditional "end-of-file" return). - * - * *We* do never actually try to read 0 bytes! - */ - - ESOCK_CNT_INC(env, descP, sockRef, - esock_atom_read_fails, &descP->readFails, 1); - - FREE_BIN(dataBufP); FREE_BIN(ctrlBufP); - - return esock_make_error_closed(env); - } - - - if (read < 0) { - - /* +++ Error handling +++ */ - - res = recv_check_fail(env, descP, saveErrno, sockRef, recvRef); - FREE_BIN(dataBufP); FREE_BIN(ctrlBufP); - - } else { - - /* +++ We successfully got a message - time to encode it +++ */ - - res = recvmsg_check_msg(env, descP, read, msgHdrP, - dataBufP, ctrlBufP, sockRef); + /* recvmsg() */ + readResult = sock_recvmsg(descP->sock, &msgHdr, flags); + saveErrno = ESOCK_IS_ERROR(readResult) ? sock_errno() : 0; + /* Check for errors and end of stream */ + if (! recv_check_result(env, descP, sockRef, recvRef, + readResult, saveErrno, &ret) ) { + FREE_BIN(&data[0]); + FREE_BIN(&ctrl); + return ret; } + /* readResult >= 0 */ - return res; - -} - - -/* *** recvmsg_check_msg *** - * - * We successfully read one message. Time to process. - */ -static -ERL_NIF_TERM recvmsg_check_msg(ErlNifEnv* env, - ESockDescriptor* descP, - ssize_t read, - struct msghdr* msgHdrP, - ErlNifBinary* dataBufP, - ErlNifBinary* ctrlBufP, - ERL_NIF_TERM sockRef) -{ - ERL_NIF_TERM eMsg; + /* The recvmsg function delivers one (1) message. If our buffer + * is to small, the message will be truncated. So, regardless + * if we filled the buffer or not, we have got what we are going + * to get regarding this message. + */ /* * @@ -3318,26 +2989,25 @@ ERL_NIF_TERM recvmsg_check_msg(ErlNifEnv* env, * */ - encode_msg(env, descP, - read, msgHdrP, dataBufP, ctrlBufP, - &eMsg); - SSDBG( descP, - ("UNIX-ESSIO", "recvmsg_check_result(%T) {%d} -> ok\r\n", - sockRef, descP->sock) ); + ("UNIX-ESSIO", "essio_recvmsg {%d} -> ok\r\n", + descP->sock) ); ESOCK_CNT_INC(env, descP, sockRef, esock_atom_read_pkg, &descP->readPkgCnt, 1); ESOCK_CNT_INC(env, descP, sockRef, esock_atom_read_byte, - &descP->readByteCnt, read); + &descP->readByteCnt, readResult); + + encode_msg(env, descP, + readResult, &msgHdr, &data[0], &ctrl, + &ret); recv_update_current_reader(env, descP, sockRef); - return esock_make_ok2(env, eMsg); + return esock_make_ok2(env, ret); } - /* ======================================================================== */ extern @@ -6859,11 +6529,6 @@ void essio_dtor(ErlNifEnv* env, descP->writeState |= (ESOCK_STATE_DTOR | ESOCK_STATE_CLOSED); descP->pattern = (ESOCK_DESC_PATTERN_DTOR | ESOCK_STATE_CLOSED); - if (descP->readBuf.data != NULL) { - FREE_BIN(&descP->readBuf); - descP->readBuf.data = NULL; - } - esock_free_env("dtor reader", descP->currentReader.env); descP->currentReader.env = NULL; @@ -7146,6 +6811,36 @@ void essio_down(ErlNifEnv* env, /* *** Recv/recvfrom/recvmsg utility functions *** */ +static +BOOLEAN_T recv_check_entry(ErlNifEnv *env, + ESockDescriptor *descP, + ERL_NIF_TERM recvRef, + ERL_NIF_TERM *retP) +{ + if (! IS_OPEN(descP->readState)) { + *retP = esock_make_error_closed(env); + return FALSE; + } + + /* Accept and Read uses the same select flag + * so they can not be simultaneous + */ + if (descP->currentAcceptorP != NULL) { + *retP = esock_make_error_invalid(env, esock_atom_state); + return FALSE; + } + + /* Ensure that we either have no current reader or that we are it, + * or enqueue this process if there is a current reader */ + if (! recv_check_reader(env, descP, recvRef, retP)) { + return FALSE; + } + + *retP = esock_atom_ok; /* Ignored */ + return TRUE; +} + + /* *** recv_check_reader *** * * Checks if we have a current reader and if that is us. If not, @@ -7163,7 +6858,7 @@ BOOLEAN_T recv_check_reader(ErlNifEnv* env, { if (descP->currentReaderP != NULL) { ErlNifPid caller; - + ESOCK_ASSERT( enif_self(env, &caller) != NULL ); if (COMPARE_PIDS(&descP->currentReader.pid, &caller) != 0) { @@ -7187,7 +6882,7 @@ BOOLEAN_T recv_check_reader(ErlNifEnv* env, /* Reader already in queue */ *checkResult = esock_raise_invalid(env, esock_atom_state); } - + SSDBG( descP, ("UNIX-ESSIO", "recv_check_reader {%d} -> queue (push) result: %T\r\n", @@ -7203,24 +6898,75 @@ BOOLEAN_T recv_check_reader(ErlNifEnv* env, } + +/* *** recv_check_result *** + * + * Common for all recv* functions; check for end of stream + * and recv error, set the result term and return TRUE. + * If neither return FALSE and let the caller handle the message. + */ +static +BOOLEAN_T recv_check_result(ErlNifEnv *env, + ESockDescriptor *descP, + ERL_NIF_TERM sockRef, + ERL_NIF_TERM recvRef, + ssize_t readResult, + int saveErrno, + ERL_NIF_TERM *retP) +{ + if ((readResult == 0) && (descP->type == SOCK_STREAM)) { + ERL_NIF_TERM reason = esock_atom_closed; + + /* Stream closed from other side + * + * When a stream socket peer has performed an orderly shutdown, + * the return value will be 0 (the traditional "end-of-file" return). + * + * *We* do never actually try to read 0 bytes! + * + * We must also notify any waiting readers! + */ + + ESOCK_CNT_INC(env, descP, sockRef, + esock_atom_read_fails, &descP->readFails, 1); + + recv_error_current_reader(env, descP, sockRef, reason); + /* Return {error, closed} */ + *retP = esock_make_error(env, reason); + return FALSE; + } + else if (readResult < 0) { + + /* +++ Error handling +++ */ + + /* 'timeout' | {error, SaveErrno} */ + *retP = recv_check_fail(env, descP, sockRef, recvRef, saveErrno); + return FALSE; + } + + return TRUE; +} + + /* *** recv_check_full *** * * This function is called if we filled the allocated buffer. * But are we done yet? * - * toRead = 0 means: Give me everything you have => maybe - * toRead > 0 means: Yes + * len = 0 means: Give me everything you have => maybe + * len > 0 means: Yes */ static ERL_NIF_TERM recv_check_full(ErlNifEnv* env, ESockDescriptor* descP, - ssize_t toRead, ERL_NIF_TERM sockRef, - ERL_NIF_TERM recvRef) + ERL_NIF_TERM recvRef, + ssize_t len, + ErlNifBinary *bufP) { ERL_NIF_TERM res; - if ((toRead == 0) && + if ((len == 0) && (descP->type == SOCK_STREAM)) { /* +++ Give us everything you have got => * @@ -7229,15 +6975,15 @@ ERL_NIF_TERM recv_check_full(ErlNifEnv* env, SSDBG( descP, ("UNIX-ESSIO", "recv_check_full(%T) {%d} -> shall we continue reading?" - "\r\n readResult: %ld" - "\r\n rNum: %u" - "\r\n rNumCnt: %u" + "\r\n bufSz: %ld" + "\r\n rNum: %u" + "\r\n rNumCnt: %u" "\r\n", sockRef, descP->sock, - (unsigned long) descP->readResult, descP->rNum, + (unsigned long) bufP->size, descP->rNum, descP->rNumCnt) ); /* Res = {more|ok|select, Bin} */ - res = recv_check_full_maybe_done(env, descP, sockRef, recvRef); + res = recv_check_full_maybe_done(env, descP, sockRef, recvRef, bufP); } else { @@ -7247,10 +6993,10 @@ ERL_NIF_TERM recv_check_full(ErlNifEnv* env, ("UNIX-ESSIO", "recv_check_full(%T) {%d} -> [%ld] " "we got exactly what we could fit\r\n", - sockRef, descP->sock, (long) toRead) ); + sockRef, descP->sock, (long) len) ); /* Res = {ok|select, Bin} */ - res = recv_check_full_done(env, descP, sockRef, recvRef); + res = recv_check_full_done(env, descP, sockRef, recvRef, bufP); } return res; @@ -7270,20 +7016,21 @@ static ERL_NIF_TERM recv_check_full_maybe_done(ErlNifEnv* env, ESockDescriptor* descP, ERL_NIF_TERM sockRef, - ERL_NIF_TERM recvRef) + ERL_NIF_TERM recvRef, + ErlNifBinary *bufP) { ERL_NIF_TERM ret; ESOCK_CNT_INC(env, descP, sockRef, esock_atom_read_byte, &descP->readByteCnt, - descP->readResult); - descP->readPkgMaxCnt += descP->readResult; + bufP->size); + descP->readPkgMaxCnt += bufP->size; descP->rNumCnt++; if (descP->rNumCnt >= descP->rNum) { /* Ret = {ok|select, Bin} */ - ret = recv_check_full_done(env, descP, sockRef, recvRef); + ret = recv_check_full_done(env, descP, sockRef, recvRef, bufP); } else { @@ -7299,11 +7046,10 @@ ERL_NIF_TERM recv_check_full_maybe_done(ErlNifEnv* env, ("UNIX-ESSIO", "recv_check_full_maybe_done(%T) {%d} -> [%lu] " "we are done for now - read more\r\n", - sockRef, descP->sock, (unsigned long) descP->readBuf.size) ); + sockRef, descP->sock, (unsigned long) bufP->size) ); /* Ret = {more, Bin} */ - ret = MKT2(env, esock_atom_more, MKBIN(env, &descP->readBuf)); - descP->readBuf.data = NULL; + ret = MKT2(env, esock_atom_more, MKBIN(env, bufP)); } return ret; @@ -7314,29 +7060,29 @@ ERL_NIF_TERM recv_check_full_maybe_done(ErlNifEnv* env, /* *** recv_check_full_done *** * * A successful recv and we filled the buffer. - * - return {ok, Bin} + * - return {ok|select, Bin} */ static ERL_NIF_TERM recv_check_full_done(ErlNifEnv* env, ESockDescriptor* descP, ERL_NIF_TERM sockRef, - ERL_NIF_TERM recvRef) + ERL_NIF_TERM recvRef, + ErlNifBinary *bufP) { ERL_NIF_TERM data; if (descP->selectRead && (COMPARE(recvRef, esock_atom_zero) != 0)) /* {select, Bin} */ - return recv_check_select_done(env, descP, sockRef, recvRef); + return recv_check_select_done(env, descP, sockRef, recvRef, bufP); ESOCK_CNT_INC(env, descP, sockRef, esock_atom_read_pkg, &descP->readPkgCnt, 1); ESOCK_CNT_INC(env, descP, sockRef, - esock_atom_read_byte, &descP->readByteCnt, - descP->readResult); + esock_atom_read_byte, &descP->readByteCnt, bufP->size); descP->rNumCnt = 0; - descP->readPkgMaxCnt += descP->readResult; + descP->readPkgMaxCnt += bufP->size; if (descP->readPkgMaxCnt > descP->readPkgMax) descP->readPkgMax = descP->readPkgMaxCnt; descP->readPkgMaxCnt = 0; @@ -7346,8 +7092,7 @@ ERL_NIF_TERM recv_check_full_done(ErlNifEnv* env, /* This transfers "ownership" of the *allocated* binary to an * erlang term (no need for an explicit free). */ - data = MKBIN(env, &descP->readBuf); - descP->readBuf.data = NULL; + data = MKBIN(env, bufP); /* Return {ok, Bin} */ return esock_make_ok2(env, data); @@ -7361,9 +7106,10 @@ ERL_NIF_TERM recv_check_full_done(ErlNifEnv* env, */ static ERL_NIF_TERM recv_check_select_done(ErlNifEnv* env, - ESockDescriptor* descP, - ERL_NIF_TERM sockRef, - ERL_NIF_TERM recvRef) + ESockDescriptor* descP, + ERL_NIF_TERM sockRef, + ERL_NIF_TERM recvRef, + ErlNifBinary *bufP) { ERL_NIF_TERM res; int sres; @@ -7373,9 +7119,9 @@ ERL_NIF_TERM recv_check_select_done(ErlNifEnv* env, ESOCK_CNT_INC(env, descP, sockRef, esock_atom_read_pkg, &descP->readPkgCnt, 1); ESOCK_CNT_INC(env, descP, sockRef, - esock_atom_read_byte, &descP->readByteCnt, descP->readResult); + esock_atom_read_byte, &descP->readByteCnt, bufP->size); - descP->readPkgMaxCnt += descP->readResult; + descP->readPkgMaxCnt += bufP->size; if (descP->readPkgMaxCnt > descP->readPkgMax) descP->readPkgMax = descP->readPkgMaxCnt; descP->readPkgMaxCnt = 0; @@ -7401,13 +7147,12 @@ ERL_NIF_TERM recv_check_select_done(ErlNifEnv* env, /* This transfers "ownership" of the *allocated* binary to an * erlang term (no need for an explicit free). */ - data = MKBIN(env, &descP->readBuf); - descP->readBuf.data = NULL; + data = MKBIN(env, bufP); SSDBG( descP, ("UNIX-ESSIO", "recv_check_select_done(%T) {%d} -> [%ld] done\r\n", - sockRef, descP->sock, (long) descP->readResult) ); + sockRef, descP->sock, (long) bufP->size) ); descP->readState |= ESOCK_STATE_SELECTED; /* Res = {select, Bin} */ @@ -7425,9 +7170,9 @@ ERL_NIF_TERM recv_check_select_done(ErlNifEnv* env, static ERL_NIF_TERM recv_check_fail(ErlNifEnv* env, ESockDescriptor* descP, - int saveErrno, ERL_NIF_TERM sockRef, - ERL_NIF_TERM recvRef) + ERL_NIF_TERM recvRef, + int saveErrno) { ERL_NIF_TERM res; @@ -7448,7 +7193,7 @@ ERL_NIF_TERM recv_check_fail(ErlNifEnv* env, &descP->readFails, 1); /* Res = {error, econnreset} */ - res = recv_check_fail_econnreset(env, descP, sockRef, recvRef); + res = recv_check_fail_econnreset(env, descP, sockRef); } else if ((saveErrno == ERRNO_BLOCK) || (saveErrno == EAGAIN)) { @@ -7483,7 +7228,7 @@ ERL_NIF_TERM recv_check_fail(ErlNifEnv* env, &descP->readFails, 1); /* Res = {error, SaveErrno} */ - res = recv_check_fail_gen(env, descP, saveErrno, sockRef); + res = recv_check_fail_gen(env, descP, sockRef, saveErrno); } return res; @@ -7497,8 +7242,8 @@ ERL_NIF_TERM recv_check_fail(ErlNifEnv* env, static ERL_NIF_TERM recv_check_fail_gen(ErlNifEnv* env, ESockDescriptor* descP, - int saveErrno, - ERL_NIF_TERM sockRef) + ERL_NIF_TERM sockRef, + int saveErrno) { ERL_NIF_TERM reason = MKA(env, erl_errno_id(saveErrno)); @@ -7517,8 +7262,7 @@ ERL_NIF_TERM recv_check_fail_gen(ErlNifEnv* env, static ERL_NIF_TERM recv_check_fail_econnreset(ErlNifEnv* env, ESockDescriptor* descP, - ERL_NIF_TERM sockRef, - ERL_NIF_TERM recvRef) + ERL_NIF_TERM sockRef) { ERL_NIF_TERM reason = MKA(env, erl_errno_id(ECONNRESET)); ERL_NIF_TERM res = esock_make_error(env, reason); @@ -7596,9 +7340,10 @@ ERL_NIF_TERM recv_check_select(ErlNifEnv* env, static ERL_NIF_TERM recv_check_partial(ErlNifEnv* env, ESockDescriptor* descP, - ssize_t toRead, ERL_NIF_TERM sockRef, - ERL_NIF_TERM recvRef) + ERL_NIF_TERM recvRef, + ssize_t len, + ErlNifBinary *bufP) { ERL_NIF_TERM res; @@ -7606,7 +7351,7 @@ ERL_NIF_TERM recv_check_partial(ErlNifEnv* env, /* Buffer not filled */ - if ((descP->type == SOCK_STREAM) && (toRead > 0)) { + if ((descP->type == SOCK_STREAM) && (len > 0)) { /* A stream socket with specified read size * - more data is needed @@ -7620,11 +7365,11 @@ ERL_NIF_TERM recv_check_partial(ErlNifEnv* env, ("UNIX-ESSIO", "recv_check_partial(%T) {%d} -> [%ld] split buffer time-out" "\r\n recvRef: %T" - "\r\n", sockRef, descP->sock, (long) toRead, + "\r\n", sockRef, descP->sock, (long) len, recvRef) ); /* Res = {timeout, Bin} */ - res = recv_check_partial_done(env, descP, sockRef, + res = recv_check_partial_done(env, descP, sockRef, bufP, esock_atom_timeout); } else { @@ -7635,14 +7380,13 @@ ERL_NIF_TERM recv_check_partial(ErlNifEnv* env, "recv_check_partial(%T) {%d} -> [%ld]" " only part of message - expect more" "\r\n recvRef: %T" - "\r\n", sockRef, descP->sock, (long) toRead, + "\r\n", sockRef, descP->sock, (long) len, recvRef) ); - ESOCK_CNT_INC(env, descP, sockRef, esock_atom_read_byte, - &descP->readByteCnt, descP->readResult); - /* Initiate select read, Res = 'select' */ - res = recv_check_select(env, descP, sockRef, recvRef); + /* Res = {select, Bin} */ + res = recv_check_select_done(env, descP, sockRef, recvRef, bufP); + } } else { @@ -7652,17 +7396,18 @@ ERL_NIF_TERM recv_check_partial(ErlNifEnv* env, ("UNIX-ESSIO", "recv_check_partial(%T) {%d} -> [%ld] split buffer" "\r\n recvRef: %T" - "\r\n", sockRef, descP->sock, (long) toRead, + "\r\n", sockRef, descP->sock, (long) len, recvRef) ); if (descP->selectRead && (COMPARE(recvRef, esock_atom_zero) != 0)) { - ESOCK_ASSERT( REALLOC_BIN(&descP->readBuf, descP->readResult) ); /* Res = {select, Bin} */ - res = recv_check_select_done(env, descP, sockRef, recvRef); + res = recv_check_select_done(env, descP, sockRef, recvRef, bufP); } - else + else { /* Res = {ok, Bin} */ - res = recv_check_partial_done(env, descP, sockRef, esock_atom_ok); + res = recv_check_partial_done(env, descP, sockRef, bufP, + esock_atom_ok); + } } return res; @@ -7677,6 +7422,7 @@ static ERL_NIF_TERM recv_check_partial_done(ErlNifEnv* env, ESockDescriptor* descP, ERL_NIF_TERM sockRef, + ErlNifBinary* bufP, ERL_NIF_TERM returnTag) { ERL_NIF_TERM data; @@ -7685,9 +7431,9 @@ ERL_NIF_TERM recv_check_partial_done(ErlNifEnv* env, ESOCK_CNT_INC(env, descP, sockRef, esock_atom_read_pkg, &descP->readPkgCnt, 1); ESOCK_CNT_INC(env, descP, sockRef, - esock_atom_read_byte, &descP->readByteCnt, descP->readResult); + esock_atom_read_byte, &descP->readByteCnt, bufP->size); - descP->readPkgMaxCnt += descP->readResult; + descP->readPkgMaxCnt += bufP->size; if (descP->readPkgMaxCnt > descP->readPkgMax) descP->readPkgMax = descP->readPkgMaxCnt; descP->readPkgMaxCnt = 0; @@ -7697,13 +7443,11 @@ ERL_NIF_TERM recv_check_partial_done(ErlNifEnv* env, /* This transfers "ownership" of the *allocated* binary to an * erlang term (no need for an explicit free). */ - ESOCK_ASSERT( REALLOC_BIN(&descP->readBuf, descP->readResult) ); - data = MKBIN(env, &descP->readBuf); - descP->readBuf.data = NULL; + data = MKBIN(env, bufP); SSDBG( descP, ("UNIX-ESSIO", "recv_check_partial_done(%T) {%d} -> [%ld] done\r\n", - sockRef, descP->sock, (long) descP->readResult) ); + sockRef, descP->sock, (long) bufP->size) ); /* {ReturnTag, Bin} */ return MKT2(env, returnTag, data); From dbea7aa28ade2a9e4cbd9b950c6e5ae5662a739a Mon Sep 17 00:00:00 2001 From: Raimo Niskanen Date: Fri, 17 Jan 2025 18:16:58 +0100 Subject: [PATCH 4/6] Implement `select_read` We need to differ between a complete read, e.g for a stream socket with specified length during `socket:recv/*` that collects the requested amount, and an incomplete read where the OS returns partial data. So `select_read` will mean that the read is complete but we are awaiting a `select` message due to the `select_read` option being active, while `select` means that the read is incomplete and we are waiting for a `select` message. --- erts/emulator/nifs/unix/unix_socket_syncio.c | 329 +++++++------------ lib/kernel/src/inet_epmd_socket.erl | 94 +++--- lib/kernel/src/socket.erl | 131 +++++--- 3 files changed, 253 insertions(+), 301 deletions(-) diff --git a/erts/emulator/nifs/unix/unix_socket_syncio.c b/erts/emulator/nifs/unix/unix_socket_syncio.c index f200d37dae26..1fefa14d0ea3 100644 --- a/erts/emulator/nifs/unix/unix_socket_syncio.c +++ b/erts/emulator/nifs/unix/unix_socket_syncio.c @@ -327,10 +327,6 @@ static BOOLEAN_T recv_check_entry(ErlNifEnv *env, ESockDescriptor *descP, ERL_NIF_TERM recvRef, ERL_NIF_TERM *retP); -static BOOLEAN_T recv_check_reader(ErlNifEnv* env, - ESockDescriptor* descP, - ERL_NIF_TERM ref, - ERL_NIF_TERM* checkResult); static BOOLEAN_T recv_check_result(ErlNifEnv *env, ESockDescriptor *descP, ERL_NIF_TERM sockRef, @@ -354,11 +350,6 @@ static ERL_NIF_TERM recv_check_full_done(ErlNifEnv* env, ERL_NIF_TERM sockRef, ERL_NIF_TERM recvRef, ErlNifBinary *bufP); -static ERL_NIF_TERM recv_check_select_done(ErlNifEnv* env, - ESockDescriptor* descP, - ERL_NIF_TERM sockRef, - ERL_NIF_TERM recvRef, - ErlNifBinary *bufP); static ERL_NIF_TERM recv_check_fail(ErlNifEnv* env, ESockDescriptor* descP, ERL_NIF_TERM sockRef, @@ -374,18 +365,14 @@ static ERL_NIF_TERM recv_check_fail_econnreset(ErlNifEnv* env, static ERL_NIF_TERM recv_check_select(ErlNifEnv* env, ESockDescriptor* descP, ERL_NIF_TERM sockRef, - ERL_NIF_TERM recvRef); + ERL_NIF_TERM recvRef, + ERL_NIF_TERM msg); static ERL_NIF_TERM recv_check_partial(ErlNifEnv* env, ESockDescriptor* descP, ERL_NIF_TERM sockRef, ERL_NIF_TERM recvRef, ssize_t len, ErlNifBinary *bufP); -static ERL_NIF_TERM recv_check_partial_done(ErlNifEnv* env, - ESockDescriptor* descP, - ERL_NIF_TERM sockRef, - ErlNifBinary* bufP, - ERL_NIF_TERM returnTag); static void recv_init_current_reader(ErlNifEnv* env, ESockDescriptor* descP, ERL_NIF_TERM recvRef); @@ -2792,6 +2779,7 @@ ERL_NIF_TERM essio_recv(ErlNifEnv* env, (long) readResult) ); ESOCK_ASSERT( REALLOC_BIN(&buf, readResult) ); + /* Return {ok|timeout|select|select_read, Bin} */ return recv_check_partial(env, descP, sockRef, recvRef, len, &buf); } else { @@ -2803,6 +2791,7 @@ ERL_NIF_TERM essio_recv(ErlNifEnv* env, "essio_recv {%d} -> [%lu] filled the buffer\r\n", descP->sock, (unsigned long) buf.size) ); + /* Return {more|ok|select_read, Bin} */ return recv_check_full(env, descP, sockRef, recvRef, len, &buf); } } @@ -2878,18 +2867,35 @@ ERL_NIF_TERM essio_recvfrom(ErlNifEnv* env, ESOCK_ASSERT( REALLOC_BIN(&buf, readResult) ); } + descP->rNumCnt = 0; + ESOCK_CNT_INC(env, descP, sockRef, esock_atom_read_pkg, &descP->readPkgCnt, 1); ESOCK_CNT_INC(env, descP, sockRef, esock_atom_read_byte, &descP->readByteCnt, buf.size); - - recv_update_current_reader(env, descP, sockRef); + if (buf.size > descP->readPkgMax) + descP->readPkgMax = buf.size; esock_encode_sockaddr(env, &fromAddr, fromAddrLen, &ret); - return esock_make_ok2(env, MKT2(env, ret, MKBIN(env, &buf))); + /* MKBIN transfers "ownership" of the *allocated* binary to an + * erlang term in env (no need to free; it will be GC:ed). + */ + /* {FromAddr, Bin} */ + ret = MKT2(env, ret, MKBIN(env, &buf)); + + if (descP->selectRead && (COMPARE(recvRef, esock_atom_zero) != 0)) { + /* Return {select_read, {FromAddr, Bin}} */ + ret = MKT2(env, esock_atom_select_read, ret); + return recv_check_select(env, descP, sockRef, recvRef, ret); + } + else { + recv_update_current_reader(env, descP, sockRef); + /* Return {ok, {FromAddr, Bin}} */ + return esock_make_ok2(env, ret); + } } @@ -2993,18 +2999,29 @@ ERL_NIF_TERM essio_recvmsg(ErlNifEnv* env, ("UNIX-ESSIO", "essio_recvmsg {%d} -> ok\r\n", descP->sock) ); + descP->rNumCnt = 0; + ESOCK_CNT_INC(env, descP, sockRef, esock_atom_read_pkg, &descP->readPkgCnt, 1); ESOCK_CNT_INC(env, descP, sockRef, esock_atom_read_byte, &descP->readByteCnt, readResult); + if (readResult > descP->readPkgMax) + descP->readPkgMax = readResult; encode_msg(env, descP, readResult, &msgHdr, &data[0], &ctrl, &ret); - recv_update_current_reader(env, descP, sockRef); - - return esock_make_ok2(env, ret); + if (descP->selectRead && (COMPARE(recvRef, esock_atom_zero) != 0)) { + /* Return {select_read, Msg} */ + ret = MKT2(env, esock_atom_select_read, ret); + return recv_check_select(env, descP, sockRef, recvRef, ret); + } + else { + recv_update_current_reader(env, descP, sockRef); + /* Return {ok, Msg} */ + return esock_make_ok2(env, ret); + } } @@ -6830,32 +6847,13 @@ BOOLEAN_T recv_check_entry(ErlNifEnv *env, return FALSE; } - /* Ensure that we either have no current reader or that we are it, - * or enqueue this process if there is a current reader */ - if (! recv_check_reader(env, descP, recvRef, retP)) { - return FALSE; - } - - *retP = esock_atom_ok; /* Ignored */ - return TRUE; -} - - -/* *** recv_check_reader *** - * - * Checks if we have a current reader and if that is us. If not, - * then we must be made to wait for our turn. This is done by pushing - * us unto the reader queue. - * Note that we do *not* actually initiate the currentReader structure - * here, since we do not actually know yet if we need to! We do that in - * the [recv|recvfrom|recvmsg]_check_result function. - */ -static -BOOLEAN_T recv_check_reader(ErlNifEnv* env, - ESockDescriptor* descP, - ERL_NIF_TERM ref, - ERL_NIF_TERM* checkResult) -{ + /* Check if we have a current reader and if that is us. If not, + * then we must be made to wait for our turn. This is done by pushing + * us unto the reader queue. + * Note that we do *not* actually initiate the currentReader structure + * here, since we do not actually know yet if we need to! We do that in + * the [recv|recvfrom|recvmsg]_check_result function. + */ if (descP->currentReaderP != NULL) { ErlNifPid caller; @@ -6867,38 +6865,35 @@ BOOLEAN_T recv_check_reader(ErlNifEnv* env, SSDBG( descP, ("UNIX-ESSIO", "recv_check_reader {%d} -> not (current) reader" - "\r\n ref: %T" - "\r\n", descP->sock, ref) ); + "\r\n recvRef: %T" + "\r\n", descP->sock, recvRef) ); if (! esock_reader_search4pid(env, descP, &caller)) { - if (COMPARE(ref, esock_atom_zero) == 0) { - *checkResult = esock_atom_timeout; + if (COMPARE(recvRef, esock_atom_zero) == 0) { + *retP = esock_atom_timeout; } else { - esock_reader_push(env, descP, caller, ref, NULL); - *checkResult = esock_atom_select; + esock_reader_push(env, descP, caller, recvRef, NULL); + *retP = esock_atom_select; } } else { /* Reader already in queue */ - *checkResult = esock_raise_invalid(env, esock_atom_state); + *retP = esock_raise_invalid(env, esock_atom_state); } SSDBG( descP, ("UNIX-ESSIO", "recv_check_reader {%d} -> queue (push) result: %T\r\n", - descP->sock, *checkResult) ); + descP->sock, *retP) ); return FALSE; } } - - *checkResult = esock_atom_ok; - // *checkResult ignored by the caller that creates the actual result + *retP = esock_atom_ok; /* Ignored */ return TRUE; } - /* *** recv_check_result *** * * Common for all recv* functions; check for end of stream @@ -6955,6 +6950,8 @@ BOOLEAN_T recv_check_result(ErlNifEnv *env, * * len = 0 means: Give me everything you have => maybe * len > 0 means: Yes + * + * Return {more|ok|select_read, Bin} */ static ERL_NIF_TERM recv_check_full(ErlNifEnv* env, @@ -6982,7 +6979,7 @@ ERL_NIF_TERM recv_check_full(ErlNifEnv* env, (unsigned long) bufP->size, descP->rNum, descP->rNumCnt) ); - /* Res = {more|ok|select, Bin} */ + /* Res = {more|ok|select_read, Bin} */ res = recv_check_full_maybe_done(env, descP, sockRef, recvRef, bufP); } else { @@ -6995,7 +6992,7 @@ ERL_NIF_TERM recv_check_full(ErlNifEnv* env, "we got exactly what we could fit\r\n", sockRef, descP->sock, (long) len) ); - /* Res = {ok|select, Bin} */ + /* Res = {ok|select_read, Bin} */ res = recv_check_full_done(env, descP, sockRef, recvRef, bufP); } @@ -7009,7 +7006,7 @@ ERL_NIF_TERM recv_check_full(ErlNifEnv* env, * Increment and check rNumCnt. If it hasn't reached its max * (rNum); return {more, Bin}, * then more reads should be done, - * otherwise return {ok|select, Bin} + * otherwise return {ok|select_read, Bin} * depending on selectRead. */ static @@ -7029,7 +7026,7 @@ ERL_NIF_TERM recv_check_full_maybe_done(ErlNifEnv* env, descP->rNumCnt++; if (descP->rNumCnt >= descP->rNum) { - /* Ret = {ok|select, Bin} */ + /* Ret = {ok|select_read, Bin} */ ret = recv_check_full_done(env, descP, sockRef, recvRef, bufP); } else { @@ -7060,7 +7057,7 @@ ERL_NIF_TERM recv_check_full_maybe_done(ErlNifEnv* env, /* *** recv_check_full_done *** * * A successful recv and we filled the buffer. - * - return {ok|select, Bin} + * - return {ok|select_read, Bin} */ static ERL_NIF_TERM recv_check_full_done(ErlNifEnv* env, @@ -7068,54 +7065,13 @@ ERL_NIF_TERM recv_check_full_done(ErlNifEnv* env, ERL_NIF_TERM sockRef, ERL_NIF_TERM recvRef, ErlNifBinary *bufP) -{ - ERL_NIF_TERM data; - - if (descP->selectRead && (COMPARE(recvRef, esock_atom_zero) != 0)) - /* {select, Bin} */ - return recv_check_select_done(env, descP, sockRef, recvRef, bufP); - - ESOCK_CNT_INC(env, descP, sockRef, - esock_atom_read_pkg, &descP->readPkgCnt, 1); - ESOCK_CNT_INC(env, descP, sockRef, - esock_atom_read_byte, &descP->readByteCnt, bufP->size); - - descP->rNumCnt = 0; - - descP->readPkgMaxCnt += bufP->size; - if (descP->readPkgMaxCnt > descP->readPkgMax) - descP->readPkgMax = descP->readPkgMaxCnt; - descP->readPkgMaxCnt = 0; - - recv_update_current_reader(env, descP, sockRef); - - /* This transfers "ownership" of the *allocated* binary to an - * erlang term (no need for an explicit free). - */ - data = MKBIN(env, bufP); - - /* Return {ok, Bin} */ - return esock_make_ok2(env, data); -} - - -/* *** recv_check_select_done *** - * - * Deliver the binary, and initiate select_read - * - return {select, Bin} - */ -static -ERL_NIF_TERM recv_check_select_done(ErlNifEnv* env, - ESockDescriptor* descP, - ERL_NIF_TERM sockRef, - ERL_NIF_TERM recvRef, - ErlNifBinary *bufP) { ERL_NIF_TERM res; - int sres; descP->rNumCnt = 0; + res = MKBIN(env, bufP); + ESOCK_CNT_INC(env, descP, sockRef, esock_atom_read_pkg, &descP->readPkgCnt, 1); ESOCK_CNT_INC(env, descP, sockRef, @@ -7126,40 +7082,21 @@ ERL_NIF_TERM recv_check_select_done(ErlNifEnv* env, descP->readPkgMax = descP->readPkgMaxCnt; descP->readPkgMaxCnt = 0; - recv_init_current_reader(env, descP, recvRef); - - /* SELECT for more data */ - - sres = esock_select_read(env, descP->sock, descP, NULL, - sockRef, recvRef); - if (sres < 0) { - /* Unlikely that any next reader will have better luck, - * but why not give them a shot - the queue will be cleared - */ + if (descP->selectRead && (COMPARE(recvRef, esock_atom_zero) != 0)) { + /* {select_read, Bin} */ + res = MKT2(env, esock_atom_select_read, res); + return recv_check_select(env, descP, sockRef, recvRef, res); + } + else { recv_update_current_reader(env, descP, sockRef); - /* Res = error({select_read, SRes}) */ - res = enif_raise_exception(env, - MKT2(env, esock_atom_select_read, - MKI(env, sres))); - } else { - ERL_NIF_TERM data; /* This transfers "ownership" of the *allocated* binary to an * erlang term (no need for an explicit free). */ - data = MKBIN(env, bufP); - - SSDBG( descP, - ("UNIX-ESSIO", - "recv_check_select_done(%T) {%d} -> [%ld] done\r\n", - sockRef, descP->sock, (long) bufP->size) ); - descP->readState |= ESOCK_STATE_SELECTED; - /* Res = {select, Bin} */ - res = MKT2(env, esock_atom_select, data); + /* Return {ok, Bin} */ + return esock_make_ok2(env, res); } - - return res; } @@ -7205,15 +7142,18 @@ ERL_NIF_TERM recv_check_fail(ErlNifEnv* env, "\r\n", sockRef, descP->sock, recvRef) ); if (COMPARE(recvRef, esock_atom_zero) == 0) { + + recv_update_current_reader(env, descP, sockRef); + /* Would block and zero time-out - this is a time-out * Res = 'timeout' */ res = esock_atom_timeout; } else { - descP->rNumCnt = 0; /* Res = 'select' */ - res = recv_check_select(env, descP, sockRef, recvRef); + res = recv_check_select(env, descP, sockRef, recvRef, + esock_atom_select); } } else { @@ -7292,24 +7232,18 @@ ERL_NIF_TERM recv_check_fail_econnreset(ErlNifEnv* env, /* *** recv_check_select *** * * The recv call should be retried - * - initiate and return 'select', keep the buffer binary + * - initiate select read and return Msg or an exception */ static ERL_NIF_TERM recv_check_select(ErlNifEnv* env, - ESockDescriptor* descP, - ERL_NIF_TERM sockRef, - ERL_NIF_TERM recvRef) + ESockDescriptor* descP, + ERL_NIF_TERM sockRef, + ERL_NIF_TERM recvRef, + ERL_NIF_TERM msg) { int sres; - ERL_NIF_TERM res; - - recv_init_current_reader(env, descP, recvRef); - SSDBG( descP, - ("UNIX-ESSIO", - "recv_check_select(%T) {%d} -> SELECT for more" - "\r\n recvRef: %T" - "\r\n", sockRef, descP->sock, recvRef) ); + /* SELECT for more data */ if ((sres = esock_select_read(env, descP->sock, descP, NULL, sockRef, recvRef)) < 0) { @@ -7318,24 +7252,32 @@ ERL_NIF_TERM recv_check_select(ErlNifEnv* env, */ recv_update_current_reader(env, descP, sockRef); - /* Res = error({select_read, SRes}) */ - res = enif_raise_exception(env, - MKT2(env, esock_atom_select_read, - MKI(env, sres))); + /* Return error({select_read, SRes}) */ + return enif_raise_exception(env, + MKT2(env, esock_atom_select_read, + MKI(env, sres))); } else { + + recv_init_current_reader(env, descP, recvRef); + + SSDBG( descP, + ("UNIX-ESSIO", + "recv_check_select(%T) {%d} -> SELECT for more" + "\r\n recvRef: %T" + "\r\n msg: %T" + "\r\n", sockRef, descP->sock, recvRef, msg) ); + descP->readState |= ESOCK_STATE_SELECTED; - /* Res = 'select' */ - res = esock_atom_select; + return msg; } - /* Keep the buffer binary */ - - return res; } /* *** recv_check_partial *** * * Handle a successful recv which only partly filled the specified buffer. + * + * Return {ok|timeout|select|select_read, Bin} */ static ERL_NIF_TERM recv_check_partial(ErlNifEnv* env, @@ -7347,9 +7289,19 @@ ERL_NIF_TERM recv_check_partial(ErlNifEnv* env, { ERL_NIF_TERM res; + /* Buffer not filled */ + descP->rNumCnt = 0; - /* Buffer not filled */ + ESOCK_CNT_INC(env, descP, sockRef, + esock_atom_read_byte, &descP->readByteCnt, bufP->size); + + descP->readPkgMaxCnt += bufP->size; + if (descP->readPkgMaxCnt > descP->readPkgMax) + descP->readPkgMax = descP->readPkgMaxCnt; + descP->readPkgMaxCnt = 0; + + res = MKBIN(env, bufP); if ((descP->type == SOCK_STREAM) && (len > 0)) { @@ -7361,6 +7313,8 @@ ERL_NIF_TERM recv_check_partial(ErlNifEnv* env, /* Polling read */ + ESOCK_CNT_INC(env, descP, sockRef, + esock_atom_read_pkg, &descP->readPkgCnt, 1); SSDBG( descP, ("UNIX-ESSIO", "recv_check_partial(%T) {%d} -> [%ld] split buffer time-out" @@ -7368,9 +7322,9 @@ ERL_NIF_TERM recv_check_partial(ErlNifEnv* env, "\r\n", sockRef, descP->sock, (long) len, recvRef) ); + recv_update_current_reader(env, descP, sockRef); /* Res = {timeout, Bin} */ - res = recv_check_partial_done(env, descP, sockRef, bufP, - esock_atom_timeout); + res = MKT2(env, esock_atom_timeout, res); } else { /* Incomplete data */ @@ -7385,13 +7339,17 @@ ERL_NIF_TERM recv_check_partial(ErlNifEnv* env, /* Res = {select, Bin} */ - res = recv_check_select_done(env, descP, sockRef, recvRef, bufP); + res = MKT2(env, esock_atom_select, res); + res = recv_check_select(env, descP, sockRef, recvRef, res); } } else { /* No more data is needed */ + ESOCK_CNT_INC(env, descP, sockRef, + esock_atom_read_pkg, &descP->readPkgCnt, 1); + SSDBG( descP, ("UNIX-ESSIO", "recv_check_partial(%T) {%d} -> [%ld] split buffer" @@ -7400,13 +7358,14 @@ ERL_NIF_TERM recv_check_partial(ErlNifEnv* env, recvRef) ); if (descP->selectRead && (COMPARE(recvRef, esock_atom_zero) != 0)) { - /* Res = {select, Bin} */ - res = recv_check_select_done(env, descP, sockRef, recvRef, bufP); + /* Res = {select_read, Bin} */ + res = MKT2(env, esock_atom_select_read, res); + res = recv_check_select(env, descP, sockRef, recvRef, res); } else { /* Res = {ok, Bin} */ - res = recv_check_partial_done(env, descP, sockRef, bufP, - esock_atom_ok); + recv_update_current_reader(env, descP, sockRef); + res = esock_make_ok2(env, res); } } @@ -7414,46 +7373,6 @@ ERL_NIF_TERM recv_check_partial(ErlNifEnv* env, } -/* *** recv_check_partial_done *** - * - * A successful but only partial recv, which fulfilled the required read. - */ -static -ERL_NIF_TERM recv_check_partial_done(ErlNifEnv* env, - ESockDescriptor* descP, - ERL_NIF_TERM sockRef, - ErlNifBinary* bufP, - ERL_NIF_TERM returnTag) -{ - ERL_NIF_TERM data; - - descP->rNumCnt = 0; - ESOCK_CNT_INC(env, descP, sockRef, - esock_atom_read_pkg, &descP->readPkgCnt, 1); - ESOCK_CNT_INC(env, descP, sockRef, - esock_atom_read_byte, &descP->readByteCnt, bufP->size); - - descP->readPkgMaxCnt += bufP->size; - if (descP->readPkgMaxCnt > descP->readPkgMax) - descP->readPkgMax = descP->readPkgMaxCnt; - descP->readPkgMaxCnt = 0; - - recv_update_current_reader(env, descP, sockRef); - - /* This transfers "ownership" of the *allocated* binary to an - * erlang term (no need for an explicit free). - */ - data = MKBIN(env, bufP); - - SSDBG( descP, - ("UNIX-ESSIO", "recv_check_partial_done(%T) {%d} -> [%ld] done\r\n", - sockRef, descP->sock, (long) bufP->size) ); - - /* {ReturnTag, Bin} */ - return MKT2(env, returnTag, data); -} - - /* *** recv_init_current_reader *** * * Initiate (maybe) the currentReader structure of the descriptor. diff --git a/lib/kernel/src/inet_epmd_socket.erl b/lib/kernel/src/inet_epmd_socket.erl index 7cc64a4a54eb..905bdd4b064e 100644 --- a/lib/kernel/src/inet_epmd_socket.erl +++ b/lib/kernel/src/inet_epmd_socket.erl @@ -401,9 +401,9 @@ input_handler_start(Socket, DistHandle) -> Front = [], Size = 0, Rear = [], - SelectHandle = undefined, + CSHandle = undefined, %% erlang:display({?FUNCTION_NAME, Socket, DistHandle}), - input_handler(IHP, Front, Size, Rear, SelectHandle) + input_handler(IHP, Front, Size, Rear, CSHandle) catch Class : Reason : Stacktrace when Class =:= error -> error_logger:error_report( @@ -414,17 +414,17 @@ input_handler_start(Socket, DistHandle) -> erlang:raise(Class, Reason, Stacktrace) end. -input_handler(IHP, Front, Size, Rear, SelectHandle) - when IHP#ihp.watermark > Size, SelectHandle =:= undefined -> +input_handler(IHP, Front, Size, Rear, CSHandle) + when IHP#ihp.watermark > Size, CSHandle =:= undefined -> %% erlang:display({?FUNCTION_NAME, ?LINE, Size}), - input_handler_recv(IHP, Front, Size, Rear, SelectHandle); -input_handler(IHP, [] = Front, Size, [] = Rear, SelectHandle) -> + input_handler_recv(IHP, Front, Size, Rear, CSHandle); +input_handler(IHP, [] = Front, Size, [] = Rear, CSHandle) -> 0 = Size, % Assert - input_handler_recv(IHP, Front, Size, Rear, SelectHandle); -input_handler(IHP, [] = _Front, Size, Rear, SelectHandle) -> + input_handler_recv(IHP, Front, Size, Rear, CSHandle); +input_handler(IHP, [] = _Front, Size, Rear, CSHandle) -> %% erlang:display({?FUNCTION_NAME, ?LINE, Size}), - input_handler(IHP, lists:reverse(Rear), Size, [], SelectHandle); -input_handler(IHP, [Bin | Front] = Bin_Front, Size, Rear, SelectHandle) -> + input_handler(IHP, lists:reverse(Rear), Size, [], CSHandle); +input_handler(IHP, [Bin | Front] = Bin_Front, Size, Rear, CSHandle) -> case Bin of < if byte_size(Rest) > 0 -> input_handler( - IHP, [Rest | Front], Size_1, Rear, SelectHandle); + IHP, [Rest | Front], Size_1, Rear, CSHandle); true -> % byte_size(Rest) == 0 - input_handler(IHP, Front, Size_1, Rear, SelectHandle) + input_handler(IHP, Front, Size_1, Rear, CSHandle) end; < if byte_size(Rest) > 0 -> input_handler( - IHP, [Rest | Front], Size_1, Rear, SelectHandle); + IHP, [Rest | Front], Size_1, Rear, CSHandle); true -> % byte_size(Rest) == 0 - input_handler(IHP, Front, Size_1, Rear, SelectHandle) + input_handler(IHP, Front, Size_1, Rear, CSHandle) end; <> -> %% erlang:display({?FUNCTION_NAME, ?LINE, Size, PacketSize}), @@ -473,9 +473,9 @@ input_handler(IHP, [Bin | Front] = Bin_Front, Size, Rear, SelectHandle) -> if byte_size(Rest) > 0 -> input_handler( - IHP, [Rest | Front], Size_1, Rear, SelectHandle); + IHP, [Rest | Front], Size_1, Rear, CSHandle); true -> % byte_size(Rest) == 0 - input_handler(IHP, Front, Size_1, Rear, SelectHandle) + input_handler(IHP, Front, Size_1, Rear, CSHandle) end; <> -> %% erlang:display({?FUNCTION_NAME, ?LINE, Size, PacketSize}), @@ -485,7 +485,7 @@ input_handler(IHP, [Bin | Front] = Bin_Front, Size, Rear, SelectHandle) -> 0 > Size_1 -> %% Incomplete packet in buffer input_handler_recv( - IHP, Bin_Front, Size, Rear, SelectHandle); + IHP, Bin_Front, Size, Rear, CSHandle); Size_1 > 0-> %% Complete packet is buffered, and some more PacketStartSize = byte_size(PacketStart), @@ -497,12 +497,12 @@ input_handler(IHP, [Bin | Front] = Bin_Front, Size, Rear, SelectHandle) -> collect_iov( IOV, Front, PacketSize - PacketStartSize, Rear), put_data(IHP#ihp.dist_handle, Packet), - input_handler(IHP, Front_1, Size_1, Rear_1, SelectHandle); + input_handler(IHP, Front_1, Size_1, Rear_1, CSHandle); true -> % Size_1 == 0 %% Exactly a packet is buffered Packet = [PacketStart | Front] ++ lists:reverse(Rear), put_data(IHP#ihp.dist_handle, Packet), - input_handler(IHP, [], 0, [], SelectHandle) + input_handler(IHP, [], 0, [], CSHandle) end; <> -> %% erlang:display({?FUNCTION_NAME, ?LINE, Size, byte_size(First)}), @@ -511,56 +511,64 @@ input_handler(IHP, [Bin | Front] = Bin_Front, Size, Rear, SelectHandle) -> 4 > Size -> %% Incomplete packet header in buffer input_handler_recv( - IHP, Bin_Front, Size, Rear, SelectHandle); + IHP, Bin_Front, Size, Rear, CSHandle); Size > 4 -> %% Complete packet header is buffered, and some more {Hdr, Front_1, Rear_1} = collect_bin(First, Front, 4 - byte_size(First), Rear), input_handler( - IHP, [Hdr | Front_1], Size, Rear_1, SelectHandle); + IHP, [Hdr | Front_1], Size, Rear_1, CSHandle); true -> % Size == 4 %% Exacty a packet header is buffered Hdr = list_to_binary(Bin_Front ++ lists:reverse(Rear)), - input_handler(IHP, [Hdr], Size, [], SelectHandle) + input_handler(IHP, [Hdr], Size, [], CSHandle) end end. input_handler_recv(IHP, Front, Size, Rear, undefined) -> - case socket:recv(IHP#ihp.socket, 0, [], nowait) of + CSHandle = make_ref(), + case socket:recv(IHP#ihp.socket, 0, [], CSHandle) of + {select_read, {{select_info,_,_}, Data}} -> + %% erlang:display({?FUNCTION_NAME, ?LINE, + %% select, {CSHandle,byte_size(Data)}}), + Size_1 = byte_size(Data) + Size, + Rear_1 = [Data | Rear], + input_handler(IHP, Front, Size_1, Rear_1, CSHandle); + {select, {select_info,_,_}} -> + %% erlang:display({?FUNCTION_NAME, ?LINE, select, CSHandle}), + input_handler(IHP, Front, Size, Rear, CSHandle); + {completion, {completion_info,_,_}} -> + %% erlang:display({?FUNCTION_NAME, ?LINE, select, CSHandle}), + input_handler(IHP, Front, Size, Rear, CSHandle); + Result -> + input_handler_common(IHP, Front, Size, Rear, Result) + end; +input_handler_recv(IHP, Front, Size, Rear, CSHandle) -> + input_handler_wait(IHP, Front, Size, Rear, CSHandle). + +input_handler_common(IHP, Front, Size, Rear, Result) -> + case Result of {ok, Data} -> %% erlang:display({?FUNCTION_NAME, ?LINE, '<<', byte_size(Data)}), Size_1 = byte_size(Data) + Size, Rear_1 = [Data | Rear], input_handler(IHP, Front, Size_1, Rear_1, undefined); - {select, {{select_info, _, SelectHandle}, Data}} -> - %% erlang:display({?FUNCTION_NAME, ?LINE, - %% select, {SelectHandle,byte_size(Data)}}), - Size_1 = byte_size(Data) + Size, - Rear_1 = [Data | Rear], - input_handler(IHP, Front, Size_1, Rear_1, SelectHandle); - {select, {select_info, _, SelectHandle}} -> - %% erlang:display({?FUNCTION_NAME, ?LINE, select, SelectHandle}), - input_handler(IHP, Front, Size, Rear, SelectHandle); - {error, {Reason, _Data}} -> - %% erlang:display({?FUNCTION_NAME, ?LINE, - %% error, {Reason, byte_size(_Data)}}), - exit(Reason); {error, Reason} -> %% erlang:display({?FUNCTION_NAME, ?LINE, error, Reason}), exit(Reason) - end; -input_handler_recv(IHP, Front, Size, Rear, SelectHandle) -> - input_handler_wait(IHP, Front, Size, Rear, SelectHandle). + end. -input_handler_wait(IHP, Front, Size, Rear, SelectHandle) -> - %% erlang:display({?FUNCTION_NAME, ?LINE, SelectHandle}), +input_handler_wait(IHP, Front, Size, Rear, CSHandle) -> + %% erlang:display({?FUNCTION_NAME, ?LINE, CSHandle}), Socket = IHP#ihp.socket, receive - {'$socket', Socket, select, SelectHandle} -> + {'$socket', Socket, select, CSHandle} -> input_handler_recv(IHP, Front, Size, Rear, undefined); + {'$socket', Socket, completion, {CSHandle, Result}} -> + input_handler_common(IHP, Front, Size, Rear, Result); _Ignore -> %% erlang:display({?FUNCTION_NAME, ?LINE, _Ignore}), - input_handler_wait(IHP, Front, Size, Rear, SelectHandle) + input_handler_wait(IHP, Front, Size, Rear, CSHandle) end. collect_bin(Collected, [Bin | Front], N, Rear) -> diff --git a/lib/kernel/src/socket.erl b/lib/kernel/src/socket.erl index 2192dc05623a..63c75ec3e28e 100644 --- a/lib/kernel/src/socket.erl +++ b/lib/kernel/src/socket.erl @@ -5084,7 +5084,9 @@ recv_zero(SockRef, Length, Flags, Buf) -> {more, Bin} -> % Type == stream, Length == 0, default buffer filled recv_zero(SockRef, Length, Flags, [Bin | Buf]); {ok, Bin} -> % All requested data - {ok, condense_buffer([Bin | Buf])}; + {ok, condense_buffer(Bin, Buf)}; + {error, Reason} -> + recv_error(Buf, Reason); timeout when Buf =:= [] -> {error, timeout}; timeout -> @@ -5093,9 +5095,7 @@ recv_zero(SockRef, Length, Flags, Buf) -> {ok, condense_buffer(Buf)}; {timeout, Bin} -> %% Stream socket with Length > 0 and not all data - {error, {timeout, condense_buffer([Bin | Buf])}}; - {error, Reason} -> - recv_error(Buf, Reason) + {error, {timeout, condense_buffer(Bin, Buf)}} end. recv_nowait(SockRef, Length, Flags, Handle) -> @@ -5104,19 +5104,21 @@ recv_nowait(SockRef, Length, Flags, Handle) -> recv_zero(SockRef, Length, Flags, [Bin]); {ok, _} = OK -> % All requested data OK; - {select, Bin} -> % All data, new recv operation in progress - {select, {?SELECT_INFO(recv, Handle), Bin}}; - select -> - %% The caller will get a select message when there - %% might be data to read - {select, ?SELECT_INFO(recv, Handle)}; + {error, _} = Error -> + Error; completion -> %% The caller will get a completion message (with the %% result) when the data arrives. *No* further action %% is required. {completion, ?COMPLETION_INFO(recv, Handle)}; - {error, _} = Error -> - Error + {Select, Bin} % New recv operation in progress + when Select =:= select; % Incomplete data + Select =:= select_read -> % Final data + {Select, {?SELECT_INFO(recv, Handle), Bin}}; + select -> %% No data + %% The caller will get a select message when there + %% might be data to read + {select, ?SELECT_INFO(recv, Handle)} end. %% prim_socket:recv(_, AskedFor, _, zero|Handle) @@ -5142,49 +5144,19 @@ recv_nowait(SockRef, Length, Flags, Handle) -> %% else read error -> {error, _} %% end -%% Buf is [], for 'select' platforms; it is only used -%% for 'completion' platforms. -%% recv_deadline(SockRef, Length, Flags, Deadline, Buf) -> Handle = make_ref(), case prim_socket:recv(SockRef, Length, Flags, Handle) of {more, Bin} -> % Type = stream, Length = 0, default buffer filled 0 = Length, - [] = Buf, recv_zero(SockRef, Length, Flags, [Bin]); %% - {ok, _Bin} = OK -> %% All data - [] = Buf, + {ok, _Bin} = OK when Buf =:= [] -> %% All data OK; - - %% - {select, Bin} -> %% All data, new recv operation in progress - [] = Buf, - _ = cancel(SockRef, recv, Handle), - {ok, Bin}; - %% - select -> - [] = Buf, - %% - %% There is nothing just now, but we will be notified - %% with a select message when there is something to recv - Timeout = timeout(Deadline), - receive - ?socket_msg(?socket(SockRef), select, Handle) -> - if - 0 < Timeout -> - %% Retry - recv_deadline( - SockRef, Length, Flags, Deadline, Buf); - true -> - {error, timeout} - end; - ?socket_msg(_Socket, abort, {Handle, Reason}) -> - {error, Reason} - after Timeout -> - _ = cancel(SockRef, recv, Handle), - {error, timeout} - end; + {ok, Bin} -> + {ok, condense_buffer(Bin, Buf)}; + {error, _Reason} = ERROR -> + ERROR; %% completion -> %% There is nothing just now, but we will be notified when the @@ -5194,11 +5166,11 @@ recv_deadline(SockRef, Length, Flags, Deadline, Buf) -> ?socket_msg(?socket(SockRef), completion, {Handle, {ok, Bin}}) when Length =:= 0 -> - {ok, condense_buffer([Bin | Buf])}; + {ok, condense_buffer(Bin, Buf)}; ?socket_msg(?socket(SockRef), completion, {Handle, {ok, Bin}}) when Length =:= byte_size(Bin) -> - {ok, condense_buffer([Bin | Buf])}; + {ok, condense_buffer(Bin, Buf)}; ?socket_msg(?socket(SockRef), completion, {Handle, {ok, Bin}}) -> if @@ -5208,7 +5180,7 @@ recv_deadline(SockRef, Length, Flags, Deadline, Buf) -> SockRef, Length - byte_size(Bin), Flags, Deadline, [Bin | Buf]); true -> - recv_error([Bin | Buf], timeout) + recv_error(Bin, Buf, timeout) end; ?socket_msg(?socket(SockRef), completion, {Handle, {error, Reason}}) -> @@ -5220,8 +5192,41 @@ recv_deadline(SockRef, Length, Flags, Deadline, Buf) -> recv_error(Buf, timeout) end; %% - {error, Reason} -> - recv_error(Buf, Reason) + {select_read, Bin} -> %% All data, new recv operation in progress + _ = cancel(SockRef, recv, Handle), + {ok, condense_buffer(Bin, Buf)}; + %% + Select %% select | {select, Bin} %% No data or incomplete + when Select =:= select; + tuple_size(Select) =:= 2, element(1, Select) =:= select -> + {Length_1, Buf_1} = + if + Select =:= select -> + {Length, Buf}; + true -> + Bin = element(2, Select), + {Length - byte_size(Bin), [Bin | Buf]} + end, + %% + %% There is nothing just now, but we will be notified + %% with a select message when there is something to recv + Timeout = timeout(Deadline), + receive + ?socket_msg(?socket(SockRef), select, Handle) -> + if + 0 < Timeout -> + %% Retry + recv_deadline( + SockRef, Length_1, Flags, Deadline, Buf_1); + true -> + recv_error(Buf_1, timeout) + end; + ?socket_msg(_Socket, abort, {Handle, Reason}) -> + recv_error(Buf_1, Reason) + after Timeout -> + _ = cancel(SockRef, recv, Handle), + recv_error(Buf_1, timeout) + end end. recv_error([], Reason) -> @@ -5229,13 +5234,21 @@ recv_error([], Reason) -> recv_error(Buf, Reason) when is_list(Buf) -> {error, {Reason, condense_buffer(Buf)}}. +recv_error(Bin, Buf, Reason) when is_list(Buf) -> + {error, {Reason, condense_buffer(Bin, Buf)}}. + %% Condense buffer into a Binary --compile({inline, [condense_buffer/1]}). +%% +-compile({inline, [condense_buffer/1, condense_buffer/2]}). condense_buffer([]) -> <<>>; condense_buffer([Bin]) when is_binary(Bin) -> Bin; condense_buffer(Buffer) -> iolist_to_binary(lists:reverse(Buffer)). +condense_buffer(Bin, []) -> Bin; +condense_buffer(Bin, Buffer) when is_binary(Bin) -> + iolist_to_binary(lists:reverse(Buffer, [Bin])). + %% --------------------------------------------------------------------------- %% %% With recvfrom we get messages, which means that regardless of how @@ -5429,6 +5442,8 @@ recvfrom(Socket, BufSz, Flags, Timeout) -> recvfrom_nowait(SockRef, BufSz, Handle, Flags) -> case prim_socket:recvfrom(SockRef, BufSz, Flags, Handle) of + {select_read = Tag, Source_Data} -> + {Tag, {?SELECT_INFO(recvfrom, Handle), Source_Data}}; select = Tag -> {Tag, ?SELECT_INFO(recvfrom, Handle)}; completion = Tag -> @@ -5440,6 +5455,10 @@ recvfrom_nowait(SockRef, BufSz, Handle, Flags) -> recvfrom_deadline(SockRef, BufSz, Flags, Deadline) -> Handle = make_ref(), case prim_socket:recvfrom(SockRef, BufSz, Flags, Handle) of + {select_read, Source_Data} -> + _ = cancel(SockRef, recvfrom, Handle), + {ok, Source_Data}; + select -> %% There is nothing just now, but we will be notified when there %% is something to read (a select message). @@ -5677,6 +5696,8 @@ recvmsg(Socket, BufSz, CtrlSz, Flags, Timeout) -> recvmsg_nowait(SockRef, BufSz, CtrlSz, Flags, Handle) -> case prim_socket:recvmsg(SockRef, BufSz, CtrlSz, Flags, Handle) of + {select_read = Tag, Msg} -> + {Tag, {?SELECT_INFO(recvmsg, Handle), Msg}}; select = Tag -> {Tag, ?SELECT_INFO(recvmsg, Handle)}; completion = Tag -> @@ -5688,6 +5709,10 @@ recvmsg_nowait(SockRef, BufSz, CtrlSz, Flags, Handle) -> recvmsg_deadline(SockRef, BufSz, CtrlSz, Flags, Deadline) -> Handle = make_ref(), case prim_socket:recvmsg(SockRef, BufSz, CtrlSz, Flags, Handle) of + {select_read, Msg} -> + _ = cancel(SockRef, recvmsg, Handle), + {ok, Msg}; + select = Tag -> %% There is nothing just now, but we will be notified when there %% is something to read (a select message). From 33a1fa05add633dd21fedca77fb120b3fbfbdb4d Mon Sep 17 00:00:00 2001 From: Raimo Niskanen Date: Fri, 24 Jan 2025 15:55:26 +0100 Subject: [PATCH 5/6] Document option `{otp,select_read}` --- lib/kernel/src/socket.erl | 228 +++++++++++++++++++++++++------------- 1 file changed, 150 insertions(+), 78 deletions(-) diff --git a/lib/kernel/src/socket.erl b/lib/kernel/src/socket.erl index 63c75ec3e28e..472505ecb283 100644 --- a/lib/kernel/src/socket.erl +++ b/lib/kernel/src/socket.erl @@ -27,8 +27,10 @@ create, delete and manipulate the sockets as well as sending and receiving data on them. The intent is that it shall be as "close as possible" to the OS level socket -interface. The only significant addition is that some of the functions, e.g. -`recv/3`, have a time-out argument. +interface. The only significant additions are that some of the functions, e.g. +`recv/3`, have a time-out argument, and that [`recv/*`](`recv/1`) for a +[`stream`](`t:type/0`) socket iterates until the requested amount of data +has been received. [](){: #asynchronous-calls } @@ -50,29 +52,48 @@ interface. The only significant addition is that some of the functions, e.g. > > #### The `completion` and `select` Return Values > -> For instance, if calling `recv/3` like this; -> [`recv(Socket, 0, nowait)`](#recv-nowait), when there is -> no data available for reading, it will return one of: +> For instance, the call [`recv(Socket, 0, nowait)`](#recv-nowait), +> when there is no data available for reading, will, +> depending on the operating system, return one of: > > - `{completion, `[`CompletionInfo`](`t:completion_info/0`)`}` > - `{select, `[`SelectInfo`](`t:select_info/0`)`}` > -> `CompletionInfo` contains a [CompletionHandle](`t:completion_handle/0`) and -> `SelectInfo` contains a [SelectHandle](`t:select_handle/0`). -> Both are types are aliases to `t:reference/0`. +> Where `CompletionInfo` is +> `{completion_info, _, `[`CompletionHandle`](`t:completion_handle/0`)`}` +> and `SelectInfo` is +> `{select_info, _, `[`SelectHandle`](`t:select_handle/0`)`}`. +> Both the `CompletionHandle` and the `SelectHandle` +> are of type `t:reference/0`. +> > When the operation can continue, a `completion` message containing > the `CompletionHandle` or a `select` message containing > the `SelectHandle` is sent to the calling process. > -> On `select` systems some functions may also return: +> On `select` systems, [`recv/2,3,4`](`recv/2`) may also return: +> +> - `{select, {`[`SelectInfo`](`t:select_info/0`)`, Data}` +> +> This may happen for sockets of type [`stream`](`t:type/0`) +> when `Length > 1` since the OS may split a data stream at any point +> and deliver just the first part of the requested data. +> For the next [`recv/2,3,4`](`recv/2`) call; the `Length` to receive +> will probably have to be adjusted due to the already delivered data +> in this return value. > -> - `{select, {`[`SelectInfo`](`t:select_info/0`)`, _}` +> On `select` systems, when the `{otp, select_read}` option is `true`, +> the asynchronous [`recv/3,4`](#recv-nowait), +> [`recvfrom/3,4`](#recvfrom-nowait), and +> [`recvmsg/3,4,5`](#recvmsg-nowait) functions may also return: > -> This may happen for sockets of type [`stream`](`t:type/0`) where -> the stream handling can split the data stream at any point. -> See the respective function's type specification's return type. +> - `{select_read, {`[`SelectInfo`](`t:select_info/0`)`, Data}` > -> #### The `completion` and `select` Messages +> This indicates that the receive operation was completed; +> all requested data has been delivered, and that the calling process +> will get a `select` message when there is data available +> for the next receive operation. +> +> #### The `completion` and `select` Messages [](){: #async-messages } > > The `completion` message has the format: > @@ -85,11 +106,11 @@ interface. The only significant addition is that some of the functions, e.g. > `[`SelectHandle`](`t:select_handle/0`)`}` > > When a `completion` message is received (which contains the _result_ -> of the operation), it means that the operation has been _completed_ with -> `CompletionStatus :: ok | {error, Reason}`. -> See the respective function's documentation for possible values -> of `Reason`, which are the same `{error, Reason}` values -> that can be returned by the function itself. +> of the operation), it means that the operation has been _completed_ and +> `CompletionStatus` is the return value for the operation, +> which is what the function that initiated the operation +> could have returned, with the `nowait` argument, +> except for the `completion` and `select` return values. > > When a `select` message is received, it only means that the operation > _may now continue_, by retrying the operation (which may return @@ -102,15 +123,19 @@ interface. The only significant addition is that some of the functions, e.g. > On `select` systems, all other processes are _locked out_ until the > current process has completed the operation as in a continuation > call has returned a value indicating success or failure -> (not a `select` return). Other processes are queued and get -> a `select` return which makes them wait for their turn. +> (not a `select` or `select_read` return). Other processes are queued +> and get a `select` return which makes them wait for their turn. +> +> Note that receiving data from parallel processes is only suitable +> for some protocols. For a [`stream`](`t:type/0`) socket +> it is in general a recipe for disaster. > -> #### Canceling an operation +> #### Cancelling an operation > > An operation that is in progress (not completed) may be canceled > using `cancel/2` both on `completion` and `select` systems. > -> Canceling an operation ensures that there is no `completion`, +> Cancelling an operation ensures that there is no `completion`, > `select`, nor `abort` message in the inbox after the `cancel/2` call. > > #### Using a `Handle` @@ -126,22 +151,23 @@ interface. The only significant addition is that some of the functions, e.g. > to only scan the messages that arrive after the `t:reference/0` > is created. If the message queue is large this is a big optimization. > -> The `t:reference/0` has to be unique for the call. +> It is not possible to have more than one operation in progress +> with the same `t:reference/0`. > > #### Repeating an Operation on a `select` Systems > > On`select` systems, if a call would be repeated _before_ the `select` -> message has been received it replaces the call in progress: +> message has been received it replaces the operation in progress: > > ```erlang -> {select, {select_info, Handle}} = socket:accept(LSock, nowait), -> {error, timeout} = socket:accept(LSock, 500), +> {select, {select_info, Handle}} = socket:accept(LSock, nowait), +> {ok, Socket} = socket:accept(LSock, 1000), > : > ``` > Above, `Handle` is _no longer_ valid once the second `accept/2`, call > has been made (the first call is automatically canceled). -> After the second `accept/2` call returns `{error, timeout}`, -> the accept operation has completed. +> After the second `accept/2` call returns, the accept operation +> has completed. > > Note that there is a race here; there is no way to know if the call > is repeated _before_ the `select` message is sent since it _may_ @@ -887,6 +913,19 @@ hence above all OS protocol levels. See [sendmsg](`sendmsg/2`) and also the `ctrl` field of the `t:msg_send/0` type. +- **`select_read`** - `t:boolean/0` \- + On `select` implementations, see [Asynchronous Calls](#asynchronous-calls), + automatically activate select after a completed read. + + Instead of `{ok, Data}` the receive operation returns + [`{select_read, {SelectInfo, Data}}`](`t:select_info/0`), + and the calling process can wait for a [`select` message](#async-messages) + containing `SelectInfo` when there is data available again. + + Setting this option locks out other processes from receiving any data + since the current process continues its operation, so it effectively + disables receive operation queuing. + - **`fd`** - `t:integer/0` \- Only valid to _get_. The OS protocol levels' socket descriptor. Functions [`open/1,2`](`open/1`) can be used to create a socket according to this module from an existing OS socket descriptor. @@ -904,6 +943,7 @@ internal use only. rcvbuf | % sndbuf | rcvctrlbuf | sndctrlbuf | + select_read | meta | use_registry | fd | @@ -1093,14 +1133,14 @@ _Options for protocol level_ [_`ip`_:](`t:level/0`) [control message](`t:cmsg_recv/0`) `#{level := ip, type := recverr}`. A working strategy should be to first poll the error queue using - [`recvmsg/2,3,4` ](`m:socket#recvmsg-timeout`)with `Timeout =:= 0` and `Flags` + [`recvmsg/2,3,4` ](#recvmsg-timeout)with `Timeout =:= 0` and `Flags` containing `errqueue` (ignore the return value `{error, timeout}`) before reading the actual data to ensure that the error queue gets cleared. And read the data using one of the `nowait |` [`select_handle()` ](`t:select_handle/0`)recv functions: - [`recv/3,4`](`m:socket#recv-nowait`), - [`recvfrom/3,4`](`m:socket#recvfrom-nowait`) or - [`recvmsg/3,4,5`](`m:socket#recvmsg-nowait`). Otherwise you might accidentally + [`recv/3,4`](#recv-nowait), + [`recvfrom/3,4`](#recvfrom-nowait) or + [`recvmsg/3,4,5`](#recvmsg-nowait). Otherwise you might accidentally cause a busy loop in and out of 'select' for the socket. - **`{ip, recvif}`** - `Value = boolean()` @@ -1737,7 +1777,16 @@ contained in the returned `t:completion_info/0`. [Select operation](#asynchronous-calls) info. Returned by an operation that requires the caller to wait for a -[select message](`m:socket#asynchronous-calls`) containing the +[`select` message](#async-messages) containing the +[`SelectHandle`](`t:select_handle/0`). + +On `select` systems, if the option +[`{otp, select_read}`](`t:otp_socket_option/0`) is set, +[`{select_read, {select_info(), _}}`](`t:select_info/0`) +is returned instead of `{ok, _}` to indicate that a new +asynchronous receive operation has been initiated +and the caller should wait for a +[`select` message](#async-messages) containing the [`SelectHandle`](`t:select_handle/0`). """. -type select_info() :: @@ -1750,7 +1799,7 @@ Returned by an operation that requires the caller to wait for a [Completion operation](#asynchronous-calls) info. Returned by an operation that requires the caller to wait for a -[completion message](`m:socket#asynchronous-calls`) containing the +[`completion` message](#async-messages) containing the [`CompletionHandle`](`t:completion_handle/0`) _and_ the result of the operation; the `CompletionStatus`. """. @@ -2945,7 +2994,7 @@ See the note [Asynchronous Calls](#asynchronous-calls) at the start of this module reference manual page. On `select` systems this function finalizes a connection setup -on a socket, after receiving a `select` message +on a socket, after receiving a [`select` message](#async-messages) `{'$socket',` [`Socket`](`t:socket/0`)`, select, `[`SelectHandle`](`t:select_handle/0`)`}`, and returns whether the connection setup was successful or not. @@ -3021,7 +3070,7 @@ if the connection hasn't been established within `Timeout` milliseconds. > > The safe play is to close the socket and start over. > -> Also note that this applies to canceling a `nowait` connect call +> Also note that this applies to cancelling a `nowait` connect call > described below. [](){: #connect-nowait } @@ -3038,10 +3087,10 @@ start an [asynchronous call](#asynchronous-calls) like for `nowait`. See the note [Asynchronous Calls](#asynchronous-calls) at the start of this module reference manual page. -After receiving a `select` message call `connect/1` +After receiving a [`select` message](#async-messages); call `connect/1` to complete the operation. -If canceling the operation with `cancel/2` see the note above +If cancelling the operation with `cancel/2` see the note above about [connection time-out](#connect-timeout). """. -spec connect(Socket, SockAddr, Timeout :: 'infinity') -> @@ -3851,8 +3900,9 @@ an [asynchronous call](#asynchronous-calls) like for `nowait`. See the note [Asynchronous Calls](#asynchronous-calls) at the start of this module reference manual page. -After receiving a `select` message call [`sendto/3,4`](`sendto/3`) -with `SelectInfo` as the `Cont` argument, to complete the operation. +After receiving a [`select` message](#async-messages); +call [`sendto/3,4`](`sendto/3`) with `SelectInfo` as the `Cont` argument, +to complete the operation. """. -spec sendto(Socket, Data, Dest, Flags, Timeout :: 'infinity') -> 'ok' | @@ -4078,8 +4128,9 @@ an [asynchronous call](#asynchronous-calls) like for `nowait`. See the note [Asynchronous Calls](#asynchronous-calls) at the start of this module reference manual page. -After receiving a `select` message call [`sendmsg/3,4`](`sendmsg/3`) -with `SelectInfo` as the `Cont` argument, to complete the operation. +After receiving a [`select` message](#async-messages); +call [`sendmsg/3,4`](`sendmsg/3`) with `SelectInfo` as the `Cont` argument, +to complete the operation. [](){: #sendmsg-cont } @@ -4635,8 +4686,10 @@ an [asynchronous call](#asynchronous-calls) like for `nowait`. See the note [Asynchronous Calls](#asynchronous-calls) at the start of this module reference manual page. -After receiving a `select` message call [`sendfile/2,3,4,5`](`sendfile/2`) -with `SelectInfo` as the `Continuation` argument, to complete the operation. +After receiving a [`select` message](#async-messages); +call [`sendfile/2,3,4,5`](`sendfile/2`) +with `SelectInfo` as the `Continuation` argument, +to complete the operation. [](){: #sendfile-cont } @@ -4927,12 +4980,23 @@ recv(Socket, Length, TimeoutOrHandle) -> -doc """ Receive data on a connected socket. -The argument `Length` specifies how many bytes to receive, -with the special case `0` meaning "all available". +The argument `Length` specifies the size of the receive buffer. +Packet oriented sockets truncate the packet if the size is too small. -When `Length` is `0`, a default buffer size is used, which can be set by +If `Length == 0`; a default buffer size is used, which can be set by [`socket:setopt(Socket, {otp,recvbuf}, BufSz)`](`setopt/3`). +For a socket of [type `stream`](`t:type/0`), when a `Timeout` argument +is used, the operation iterates until `Length` bytes has been received, +or the operation times out. If `Length == 0` all readily available +data is returned. + +On a `select` system, when the default receive buffer size option +[`{otp,recvbuf}`](`t:otp_socket_option/0`) special value `{N,BufSize}` +is used, `N` limits how many `BufSize` buffers that may be received +in a tight loop before the receive operation returns. The option value +`{1,BufSize}` is equivalent to just specifying a size value `BufSize`. + The message `Flags` may be symbolic `t:msg_flag/0`s and/or `t:integer/0`s as in the platform's appropriate header files. The values of all symbolic flags and integers are or:ed together. @@ -4954,8 +5018,8 @@ or if the OS reports an error for the operation. If the `Timeout` argument is a time-out value (`t:non_neg_integer/0`); return `{error, timeout}` if no data has arrived after `Timeout` milliseconds, -or `{error, {timeout, Data}}` if some but not enough data -has been received on a socket of [type `stream`](`t:type/0`). +or `{error, {timeout, Data}}` if some but not enough data has been received +(on a socket of [type `stream`](`t:type/0`) with `Length > 0`). `Timeout = 0` only polls the OS receive call and doesn't engage the Asynchronous Calls mechanisms. If no data @@ -4978,10 +5042,18 @@ See the note [Asynchronous Calls](#asynchronous-calls) at the start of this module reference manual page. On `select` systems, for a socket of type [`stream`](`t:type/0`), -if `Length > 0` and there isn't enough data available, this function -will return [`{select, {SelectInfo, Data}}`](`t:select_info/0`) +if `Length > 0` and there is some but not enough data available, +this function will return [`{select, {SelectInfo, Data}}`](`t:select_info/0`) with partial `Data`. A repeated call to complete the operation will probably need an updated `Length` argument. + +On `select` systems, if the option +[`{otp, select_read}`](`t:otp_socket_option/0`) is set, +[`{select_read, {SelectInfo, Data}}`](`t:select_info/0`) +is returned instead of `{ok, Data}` and a new asynchronous +receive operation has been initiated, which can be seen +as an automatic [nowait](#recv-nowait) call whenever +a receive operation is completed. """. -spec recv(Socket, Length, Flags, Timeout :: 'infinity') -> {'ok', Data} | @@ -5007,9 +5079,9 @@ will probably need an updated `Length` argument. {'ok', Data} | {'select', SelectInfo} | {'select', {SelectInfo, Data}} | + {'select_read', {SelectInfo, Data}} | {'completion', CompletionInfo} | - {'error', Reason} | - {'error', {Reason, Data}} when + {'error', Reason} when Socket :: socket(), Length :: non_neg_integer(), Flags :: [msg_flag() | integer()], @@ -5086,7 +5158,7 @@ recv_zero(SockRef, Length, Flags, Buf) -> {ok, Bin} -> % All requested data {ok, condense_buffer(Bin, Buf)}; {error, Reason} -> - recv_error(Buf, Reason); + recv_error(Reason, Buf); timeout when Buf =:= [] -> {error, timeout}; timeout -> @@ -5095,7 +5167,7 @@ recv_zero(SockRef, Length, Flags, Buf) -> {ok, condense_buffer(Buf)}; {timeout, Bin} -> %% Stream socket with Length > 0 and not all data - {error, {timeout, condense_buffer(Bin, Buf)}} + recv_error(timeout, [Bin | Buf]) end. recv_nowait(SockRef, Length, Flags, Handle) -> @@ -5155,8 +5227,8 @@ recv_deadline(SockRef, Length, Flags, Deadline, Buf) -> OK; {ok, Bin} -> {ok, condense_buffer(Bin, Buf)}; - {error, _Reason} = ERROR -> - ERROR; + {error, Reason} -> + recv_error(Reason, Buf); %% completion -> %% There is nothing just now, but we will be notified when the @@ -5180,16 +5252,16 @@ recv_deadline(SockRef, Length, Flags, Deadline, Buf) -> SockRef, Length - byte_size(Bin), Flags, Deadline, [Bin | Buf]); true -> - recv_error(Bin, Buf, timeout) + recv_error(timeout, [Bin | Buf]) end; ?socket_msg(?socket(SockRef), completion, {Handle, {error, Reason}}) -> - recv_error(Buf, Reason); + recv_error(Reason, Buf); ?socket_msg(_Socket, abort, {Handle, Reason}) -> {error, Reason} after Timeout -> _ = cancel(SockRef, recv, Handle), - recv_error(Buf, timeout) + recv_error(timeout, Buf) end; %% {select_read, Bin} -> %% All data, new recv operation in progress @@ -5219,24 +5291,21 @@ recv_deadline(SockRef, Length, Flags, Deadline, Buf) -> recv_deadline( SockRef, Length_1, Flags, Deadline, Buf_1); true -> - recv_error(Buf_1, timeout) + recv_error(timeout, Buf_1) end; ?socket_msg(_Socket, abort, {Handle, Reason}) -> - recv_error(Buf_1, Reason) + recv_error(Reason, Buf_1) after Timeout -> _ = cancel(SockRef, recv, Handle), - recv_error(Buf_1, timeout) + recv_error(timeout, Buf_1) end end. -recv_error([], Reason) -> +recv_error(Reason, []) -> {error, Reason}; -recv_error(Buf, Reason) when is_list(Buf) -> +recv_error(Reason, Buf) when is_list(Buf) -> {error, {Reason, condense_buffer(Buf)}}. -recv_error(Bin, Buf, Reason) when is_list(Buf) -> - {error, {Reason, condense_buffer(Bin, Buf)}}. - %% Condense buffer into a Binary %% -compile({inline, [condense_buffer/1, condense_buffer/2]}). @@ -5301,10 +5370,10 @@ With arguments `BufSz` and `Flags`; equivalent to [`recvfrom(Socket, BufSz, Flags, infinity)`](`recvfrom/4`). With arguments `BufSz` and `TimeoutOrHandle`; equivalent to -[`recv(Socket, BufSz, [], TimeoutOrHandle)`](`recvfrom/4`). +[`recvfrom(Socket, BufSz, [], TimeoutOrHandle)`](`recvfrom/4`). With arguments `Flags` and `TimeoutOrHandle`; equivalent to -[`recv(Socket, 0, Flags, TimeoutOrHandle)`](`recvfrom/4`) +[`recvfrom(Socket, 0, Flags, TimeoutOrHandle)`](`recvfrom/4`) `TimeoutOrHandle :: 'nowait'` has been allowed *since OTP 22.1*. @@ -5330,9 +5399,9 @@ recvfrom(Socket, BufSz, TimeoutOrHandle) -> -doc """ Receive a message on a socket. -This function is intended for sockets that are not connection +This function is intended primarily for sockets that are not connection oriented such as type [`dgram`](`t:type/0`) or [`seqpacket`](`t:type/0`) -where it may arrive messages from different source addresses. +where messages may arrive from different source addresses. Argument `BufSz` specifies the number of bytes for the receive buffer. If the buffer size is too small, the message will be truncated. @@ -5402,6 +5471,7 @@ at the start of this module reference manual page. (Socket, BufSz, Flags, 'nowait' | Handle) -> {'ok', {Source, Data}} | {'select', SelectInfo} | + {'select_read', {SelectInfo, {Source, Data}}} | {'completion', CompletionInfo} | {'error', Reason} when Socket :: socket(), @@ -5582,7 +5652,8 @@ recvmsg(Socket, BufSz, CtrlSz, TimeoutOrHandle) -> -doc """ Receive a message on a socket. -This function receives both data and control messages. +This function receives a data message with control messages +as well as its source address. Arguments `BufSz` and `CtrlSz` specifies the number of bytes for the receive buffer and the control message buffer. If the buffer size(s) @@ -5655,6 +5726,7 @@ at the start of this module reference manual page. (Socket, BufSz, CtrlSz, Flags, 'nowait' | Handle) -> {'ok', Msg} | {'select', SelectInfo} | + {'select_read', {SelectInfo, Msg}} | {'completion', CompletionInfo} | {'error', Reason} when Socket :: socket(), @@ -6621,10 +6693,10 @@ at the start of this module reference manual page. If another process tries an operation of the same basic type (`accept/1` | `send/2` | `recv/2`) it will be enqueued and notified -through a `select` or `completion` message when the current operation -and all enqueued before it has been completed. If the current operation -is canceled by this function it is treated as a completed operation; -the process first in queue is notified. +through a [`select` or `completion` message](#async-messages) +when the current operation and all enqueued before it has been completed. +If the current operation is canceled by this function it is treated +as a completed operation; the process first in queue is notified. If [`SelectInfo`](`t:select_info/0`) `|` [`CompletionInfo`](`t:completion_info/0`) does not match From 77fa667646b6f231d8bd58ed0f4478feb56bfdd1 Mon Sep 17 00:00:00 2001 From: Raimo Niskanen Date: Mon, 3 Feb 2025 16:19:23 +0100 Subject: [PATCH 6/6] Fix corner case for sendv --- erts/preloaded/ebin/prim_socket.beam | Bin 15672 -> 15700 bytes erts/preloaded/src/prim_socket.erl | 4 ++-- lib/kernel/src/socket.erl | 27 +++++++-------------------- 3 files changed, 9 insertions(+), 22 deletions(-) diff --git a/erts/preloaded/ebin/prim_socket.beam b/erts/preloaded/ebin/prim_socket.beam index b7cdf9cf8905b77ba0791f7f16f3dc0996440978..bd4d490809f97f37e1bed139dea7a03987900440 100644 GIT binary patch delta 576 zcmdl{b)|~K%|FPHfx*^iBgbuS#@(BraI1+hxi8qvqp+Qsal_^#$|WrP2bkFubp07M zoE?~j9o#QWcGt*fd@y;thAZQT$$vF;nK%SCYipM9Y+hjcg1MgQ$AKRV3=9{5m}ocV`2l+tVrVZr=Su% zKpG?oGw3u_oDWE|BZ;4cigN;K4kYn2P;mhu4U&VIa~#M7I)n-6IR-8uhI#A=h|R!I z&-e$(;09uthNB=RQ0^a)2003*;TVVwmSSMw1+ro44?@HlL7@n8EKK|mRQwl^=0_6W z2V?^s#P|+K3oy7NEZ7T`cnzclff#1-9v~a!pl3iD6s%wid@@TyfMQocd>{~<>|=dQ kT&y{Rshq7iH77N>q@1%fFC{fCGcPq|GQZ8o$^UGu0I=A1t^fc4 delta 551 zcmcaowWEr|%|FPHfx*^bBgbuS#$B7AaI1+haoKL>QP|GRxM1@U|@y%+-n!v!E_0wM_@ z%?QF!@$(Qq6G#nEo`C@>ehw-w0un+JKMNHX0@5r<;wOM?pgJZtAkB&-UVjQI!2_g0 zk}wNSL&f=kG&_>`NvJp{kmf)VKLZsP0MZ~im^sIROrS%UK#u1EVwlH{fY=NS^^AXj z3~nHXX*ddG0u?j<1JWQz!89BLv7v#$3uMF8AB2cA{s7V-$HK%9LB)RoX?`T}eLyzQ zL5%N!v;c!U!h*d}iPu0{5Qt$G?*Xzw4tfTpLBR^Pz$ddL1Sqx}#0LU_$v)P{ChOR| Jn=E5%0RZmoPw@Z% diff --git a/erts/preloaded/src/prim_socket.erl b/erts/preloaded/src/prim_socket.erl index 2708902fcb7c..1256518db338 100644 --- a/erts/preloaded/src/prim_socket.erl +++ b/erts/preloaded/src/prim_socket.erl @@ -1,7 +1,7 @@ %% %% %CopyrightBegin% %% -%% Copyright Ericsson AB 2018-2024. All Rights Reserved. +%% Copyright Ericsson AB 2018-2025. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. @@ -726,7 +726,7 @@ sendv_result(SockRef, IOV, SendRef, HasWritten, Result) -> %% Cont is not used for sendv {select, IOV, undefined}; true -> - select + {select, undefined} end; {select, Written} -> RestIOV = rest_iov(Written, IOV), diff --git a/lib/kernel/src/socket.erl b/lib/kernel/src/socket.erl index 472505ecb283..67557cc095b4 100644 --- a/lib/kernel/src/socket.erl +++ b/lib/kernel/src/socket.erl @@ -3674,17 +3674,18 @@ send_common_deadline_result( Op, Fun, SendResult) -> %% case SendResult of - select -> + completion -> %% Would block, wait for continuation Timeout = timeout(Deadline), receive - ?socket_msg(_Socket, select, Handle) -> - Fun(SockRef, Data, undefined, Deadline, HasWritten); + ?socket_msg(_Socket, completion, {Handle, CompletionStatus}) -> + CompletionStatus; ?socket_msg(_Socket, abort, {Handle, Reason}) -> - send_common_error(Reason, Data, HasWritten) + send_common_error(Reason, Data, false) after Timeout -> + %% ?DBG(['completion send timeout - cancel']), _ = cancel(SockRef, Op, Handle), - send_common_error(timeout, Data, HasWritten) + send_common_error(timeout, Data, false) end; {select, Cont} -> @@ -3712,20 +3713,6 @@ send_common_deadline_result( send_common_error(timeout, Data_1, true) end; - completion -> - %% Would block, wait for continuation - Timeout = timeout(Deadline), - receive - ?socket_msg(_Socket, completion, {Handle, CompletionStatus}) -> - CompletionStatus; - ?socket_msg(_Socket, abort, {Handle, Reason}) -> - send_common_error(Reason, Data, false) - after Timeout -> - %% ?DBG(['completion send timeout - cancel']), - _ = cancel(SockRef, Op, Handle), - send_common_error(timeout, Data, false) - end; - %% {error, {_Reason, RestIOV}} = Error when is_list(RestIOV) -> Error; @@ -4562,7 +4549,7 @@ sendv_deadline(SockRef, IOV, Deadline) -> sendv, fun sendv_deadline_cont/5, prim_socket:sendv(SockRef, IOV, Handle)). -sendv_deadline_cont(SockRef, IOV, _, Deadline, HasWritten) -> +sendv_deadline_cont(SockRef, IOV, _undefined, Deadline, HasWritten) -> SelectHandle = make_ref(), send_common_deadline_result( SockRef, IOV, SelectHandle, Deadline, HasWritten,