From 1e39a7843a8ac3927ab007a98c76043768c6b972 Mon Sep 17 00:00:00 2001 From: Nichamon Naksinehaboon Date: Thu, 7 Dec 2023 14:09:28 -0600 Subject: [PATCH] Add a configuration command to reset all statistics --- ldms/python/ldmsd/ldmsd_communicator.py | 21 ++++++ ldms/python/ldmsd/ldmsd_controller | 20 ++++++ ldms/src/core/ldms.h | 8 ++- ldms/src/core/ldms_stream.c | 56 ++++++++++++++++ ldms/src/core/ldms_xprt.c | 16 +++-- ldms/src/ldmsd/ldmsd_request.c | 85 +++++++++++++++++++++++++ ldms/src/ldmsd/ldmsd_request.h | 1 + 7 files changed, 199 insertions(+), 8 deletions(-) diff --git a/ldms/python/ldmsd/ldmsd_communicator.py b/ldms/python/ldmsd/ldmsd_communicator.py index bad637ab0..3c26b092f 100644 --- a/ldms/python/ldmsd/ldmsd_communicator.py +++ b/ldms/python/ldmsd/ldmsd_communicator.py @@ -55,6 +55,7 @@ import time import json import errno +from pickle import NONE #:Dictionary contains the cmd_id, required attribute list #:and optional attribute list of each ldmsd commands. For example, @@ -160,6 +161,7 @@ 'metric_sets_default_authz': {'req_attr':[], 'opt_attr': ['uid', 'gid', 'perm']}, 'set_sec_mod' : {'req_attr': ['regex'], 'opt_attr': ['uid', 'gid', 'perm']}, 'log_status' : {'req_attr' : [], 'opt_attr' : ['name']}, + 'stats_reset' : {'req_attr' : [], 'opt_attr' : ['list']}, ##### Failover. ##### 'failover_config': { 'req_attr': [ @@ -529,6 +531,7 @@ class LDMSD_Request(object): SET_DEFAULT_AUTHZ = 0x600 + 17 SET_SEC_MOD = 0x600 + 19 LOG_STATUS = 0x600 + 20 + STATS_RESET = 0x600 + 21 FAILOVER_CONFIG = 0x700 FAILOVER_PEERCFG_START = 0x700 + 1 @@ -1774,6 +1777,24 @@ def log_level(self, level, name = None, regex = None): self.close() return errno.ENOTCONN, str(e) + def stats_reset(self, s = None): + """ + Reset the statistics counters + """ + if s is not None and len(s) > 0: + attr_list = [LDMSD_Req_Attr(attr_id = LDMSD_Req_Attr.STRING, value = s)] + else: + attr_list = [] + req = LDMSD_Request(command_id = LDMSD_Request.STATS_RESET, attrs = attr_list) + + try: + req.send(self) + resp = req.receive(self) + return resp['errcode'], resp['msg'] + except Exception as e: + self.close() + return errno.ENOTCONN, str(e) + def getCfgCntr(self): req = LDMSD_Request(command_id=LDMSD_Request.CFG_CNTR) try: diff --git a/ldms/python/ldmsd/ldmsd_controller b/ldms/python/ldmsd/ldmsd_controller index 92e2151ab..b8eb78c2c 100755 --- a/ldms/python/ldmsd/ldmsd_controller +++ b/ldms/python/ldmsd/ldmsd_controller @@ -2777,6 +2777,26 @@ class LdmsdCmdParser(cmd.Cmd): "log level as the default logger (ldmsd). When the default log " "level changes, their log levels change accordingly.") + def complete_stats_reset(self, text, line, begidx, endidx): + return self.__complete_attr_list('stats_reset', text) + + def do_stats_reset(self, arg): + """ + Reset the statistics counters + + Parameters: + [list=] A comma-seprated list of statistics to be reset + thread - reset the thread statistics. + xprt - reset the transport statistics. + update - reset the update time statistics and skipped and over-sampled counters. + store - reset the store time statistics. + stream - reset the stream and stream client statistics + """ + arg = self.handle_args('stats_reset', arg) + rc, msg = self.comm.stats_reset(s = arg['list']) + if rc: + print(f"Failed to reset the statistics") + def do_option(self, arg): """ ONLY SUPPORTED IN CONFIGURATION FILES diff --git a/ldms/src/core/ldms.h b/ldms/src/core/ldms.h index 1261b4131..f9422e25b 100644 --- a/ldms/src/core/ldms.h +++ b/ldms/src/core/ldms.h @@ -1339,7 +1339,8 @@ char *ldms_stream_stats_tq_to_str(struct ldms_stream_stats_tq_s *tq); * * \param match The stream name or a regular expression. * \param is_regex 1 if \c match is a regular expression; otherwise, 0. - * \param is_reset 1 means to reset the streams' statistics + * \param is_reset 0 means not to reset the statistics. + * A non-zero value means to reset the statistics. * * \retval str The string describing the stats. * @@ -1388,6 +1389,11 @@ char *ldms_stream_client_stats_tq_to_str(struct ldms_stream_client_stats_tq_s *t */ char *ldms_stream_client_stats_str(int is_reset); +/** + * \brief Reset the statistics of streams and their clients + */ +void ldms_stream_n_client_stats_reset(); + /** \} */ /** diff --git a/ldms/src/core/ldms_stream.c b/ldms/src/core/ldms_stream.c index 54293fe73..ad688faf4 100644 --- a/ldms/src/core/ldms_stream.c +++ b/ldms/src/core/ldms_stream.c @@ -1741,6 +1741,62 @@ char *ldms_stream_stats_tq_to_str(struct ldms_stream_stats_tq_s *tq) return ret; } +void ldms_stream_n_client_stats_reset() +{ + struct rbn *rbn, *srbn; + struct ldms_stream_s *s; + struct ldms_stream_client_entry_s *sce; + struct ldms_stream_src_stats_s *src; + ldms_stream_client_t cli; + + /* + * There is a possibility of racing because the readlock is used. + * However, the reset logic does not change the tree or list's structures. + */ + __STREAM_RDLOCK(); + + /* Reset regex clients first */ + TAILQ_FOREACH(cli, &__regex_client_tq, entry) { + pthread_rwlock_rdlock(&cli->rwlock); + LDMS_STREAM_COUNTERS_INIT(&cli->tx); + LDMS_STREAM_COUNTERS_INIT(&cli->drops); + TAILQ_FOREACH(sce, &cli->stream_tq, client_stream_entry) { + LDMS_STREAM_COUNTERS_INIT(&sce->tx); + LDMS_STREAM_COUNTERS_INIT(&sce->drops); + } + pthread_rwlock_unlock(&cli->rwlock); + } + + RBT_FOREACH(rbn, &__stream_rbt) { + s = container_of(rbn, struct ldms_stream_s, rbn); + pthread_rwlock_rdlock(&s->rwlock); + + RBT_FOREACH(srbn, &s->src_stats_rbt) { + src = container_of(srbn, struct ldms_stream_src_stats_s, rbn); + LDMS_STREAM_COUNTERS_INIT(&src->rx); + } + + TAILQ_FOREACH(sce, &s->client_tq, stream_client_entry) { + /* reset client's stats */ + cli = sce->client; + if (!cli) + continue; + if (cli->is_regex) + continue; /* Already reset above */ + + LDMS_STREAM_COUNTERS_INIT(&sce->tx); + LDMS_STREAM_COUNTERS_INIT(&sce->drops); + LDMS_STREAM_COUNTERS_INIT(&cli->tx); + LDMS_STREAM_COUNTERS_INIT(&cli->drops); + } + LDMS_STREAM_COUNTERS_INIT(&s->rx); + pthread_rwlock_unlock(&s->rwlock); + } + + __STREAM_UNLOCK(); + return; +} + char *ldms_stream_stats_str(const char *match, int is_regex, int is_reset) { struct ldms_stream_stats_tq_s *tq = NULL; diff --git a/ldms/src/core/ldms_xprt.c b/ldms/src/core/ldms_xprt.c index c47453f17..2c8ac1a29 100644 --- a/ldms/src/core/ldms_xprt.c +++ b/ldms/src/core/ldms_xprt.c @@ -220,13 +220,15 @@ void ldms_xprt_rate_data(struct ldms_xprt_rate_data *data, int reset) struct timespec now; double dur_s; (void)clock_gettime(CLOCK_REALTIME, &now); - dur_s = ldms_timespec_diff_s(&xprt_start, &now); - data->connect_rate_s = (double)xprt_connect_count / dur_s; - data->connect_request_rate_s = (double)xprt_connect_request_count / dur_s; - data->disconnect_rate_s = (double)xprt_disconnect_count / dur_s; - data->reject_rate_s = (double)xprt_reject_count / dur_s; - data->auth_fail_rate_s = (double)xprt_auth_fail_count / dur_s; - data->duration = dur_s; + if (data) { + dur_s = ldms_timespec_diff_s(&xprt_start, &now); + data->connect_rate_s = (double)xprt_connect_count / dur_s; + data->connect_request_rate_s = (double)xprt_connect_request_count / dur_s; + data->disconnect_rate_s = (double)xprt_disconnect_count / dur_s; + data->reject_rate_s = (double)xprt_reject_count / dur_s; + data->auth_fail_rate_s = (double)xprt_auth_fail_count / dur_s; + data->duration = dur_s; + } if (reset) { struct ldms_xprt *x; pthread_mutex_lock(&xprt_list_lock); diff --git a/ldms/src/ldmsd/ldmsd_request.c b/ldms/src/ldmsd/ldmsd_request.c index b5bbcd98f..ab2c8f5b7 100644 --- a/ldms/src/ldmsd/ldmsd_request.c +++ b/ldms/src/ldmsd/ldmsd_request.c @@ -262,6 +262,7 @@ static int prdcr_hint_tree_status_handler(ldmsd_req_ctxt_t reqc); static int update_time_stats_handler(ldmsd_req_ctxt_t reqc); static int set_sec_mod_handler(ldmsd_req_ctxt_t reqc); static int log_status_handler(ldmsd_req_ctxt_t reqc); +static int stats_reset_handler(ldmsd_req_ctxt_t reqc); /* these are implemented in ldmsd_failover.c */ int failover_config_handler(ldmsd_req_ctxt_t req_ctxt); @@ -510,6 +511,9 @@ static struct request_handler_entry request_handler[] = { [LDMSD_LOG_STATUS_REQ] = { LDMSD_LOG_STATUS_REQ, log_status_handler, XUG }, + [LDMSD_STATS_RESET_REQ] = { + LDMSD_STATS_RESET_REQ, stats_reset_handler, XALL + }, /* Transport Stats Request */ [LDMSD_XPRT_STATS_REQ] = { @@ -8926,3 +8930,84 @@ static int store_time_stats_handler(ldmsd_req_ctxt_t reqc) json_entity_free(strgp_dict); return rc; } + +static void __prdset_stats_reset(struct timespec *now, int is_update, int is_store) +{ + ldmsd_prdcr_t prdcr; + ldmsd_prdcr_set_t prdset; + struct rbn *rbn; + + for (prdcr = ldmsd_prdcr_first(); prdcr; prdcr = ldmsd_prdcr_next(prdcr)) { + ldmsd_prdcr_lock(prdcr); + RBT_FOREACH(rbn, &prdcr->set_tree) { + prdset = container_of(rbn, struct ldmsd_prdcr_set, rbn); + if (is_update) { + memset(&prdset->updt_stat, 0, sizeof(struct ldmsd_stat)); + prdset->updt_stat.start = prdset->store_stat.start = *now; + prdset->oversampled_cnt = prdset->skipped_upd_cnt = 0; + } + if (is_store) + memset(&prdset->store_stat, 0, sizeof(struct ldmsd_stat)); + } + ldmsd_prdcr_unlock(prdcr); + } +} + +static int stats_reset_handler(ldmsd_req_ctxt_t reqc) +{ + struct timespec now; + int rc = 0; + char *s; + char *tmp, *tok, *ptr; + int is_update; + int is_store; + int is_thread; + int is_xprt; + int is_stream; + is_update = is_store = is_thread = is_xprt = is_stream = 0; + + s = ldmsd_req_attr_str_value_get_by_id(reqc, LDMSD_ATTR_STRING); + if (s) { + tmp = strdup(s); + if (!tmp) { + ovis_log(config_log, OVIS_LCRIT, "Memory allocation failure\n"); + (void) Snprintf(&reqc->line_buf, &reqc->line_len, "Memory allocation failed."); + rc = ENOMEM; + goto out; + } + + tok = strtok_r(tmp, ",", &ptr); + while (tok) { + if (0 == strcasecmp(tok, "update")) + is_update = 1; + else if (0 == strcasecmp(tok, "store")) + is_store = 1; + else if (0 == strcasecmp(tok, "thread")) + is_thread = 1; + else if (0 == strcasecmp(tok, "xprt")) + is_xprt = 1; + else if (0 == strcasecmp(tok, "stream")) + is_stream = 1; + tok = strtok_r(NULL, ",", &ptr); + } + + } else { + is_update = is_store = is_thread = is_xprt = is_stream = 1; + } + + clock_gettime(CLOCK_REALTIME, &now); + if (is_thread) + zap_thrstat_reset_all(); + + if (is_xprt) + ldms_xprt_rate_data(NULL, 1); + + __prdset_stats_reset(&now, is_update, is_store); + + if (is_stream) + ldms_stream_n_client_stats_reset(); +out: + free(s); + ldmsd_send_req_response(reqc, reqc->line_buf); + return rc; +} diff --git a/ldms/src/ldmsd/ldmsd_request.h b/ldms/src/ldmsd/ldmsd_request.h index fd0dffabf..b0933cc06 100644 --- a/ldms/src/ldmsd/ldmsd_request.h +++ b/ldms/src/ldmsd/ldmsd_request.h @@ -145,6 +145,7 @@ enum ldmsd_request { LDMSD_CMDLINE_OPTIONS_SET_REQ, LDMSD_SET_SEC_MOD_REQ, LDMSD_LOG_STATUS_REQ, + LDMSD_STATS_RESET_REQ, /* failover requests by user */ LDMSD_FAILOVER_CONFIG_REQ = 0x700, /* "failover_config" user command */