Skip to content

Commit

Permalink
Merge fixes/NFSE-4953-replace-timer-calibration-with-read-from-file
Browse files Browse the repository at this point in the history
  • Loading branch information
beckerg committed Aug 27, 2021
2 parents 3be5200 + d3fdeb6 commit b15eb5b
Show file tree
Hide file tree
Showing 12 changed files with 91 additions and 256 deletions.
22 changes: 0 additions & 22 deletions lib/c0/c0sk.c
Original file line number Diff line number Diff line change
Expand Up @@ -454,26 +454,6 @@ c0sk_pfx_probe(
return err;
}

/**
* c0sk_calibrate() - record overhead of calling nanosleep()
* @self: ptr to c0sk
*/
static void
c0sk_calibrate(struct c0sk_impl *self)
{
self->c0sk_nslpmin = timer_nslpmin;

/* If our measured value of nslpmin is high, it's probably because
* high resolution timers are not enabled. But it might be due to
* the machine being really busy, so cap it to a reasonable amount.
*/
if (self->c0sk_nslpmin > (NSEC_PER_SEC / HZ) / 10)
self->c0sk_nslpmin = (NSEC_PER_SEC / HZ) / 10;

if (self->c0sk_kvdb_rp->throttle_sleep_min_ns == 0)
self->c0sk_kvdb_rp->throttle_sleep_min_ns = self->c0sk_nslpmin;
}

merr_t
c0sk_open(
struct kvdb_rparams *kvdb_rp,
Expand Down Expand Up @@ -536,8 +516,6 @@ c0sk_open(
for (int i = 0; i < NELEM(c0sk->c0sk_ingest_refv); ++i)
atomic_set(&c0sk->c0sk_ingest_refv[i].refcnt, 0);

c0sk_calibrate(c0sk);

tdmax = clamp_t(uint, kvdb_rp->c0_ingest_threads, 1, HSE_C0_INGEST_THREADS_MAX);

c0sk->c0sk_wq_ingest = alloc_workqueue("c0sk_ingest", 0, tdmax);
Expand Down
1 change: 0 additions & 1 deletion lib/c0/c0sk_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,6 @@ struct c0sk_impl {
u32 c0sk_ingest_width_max HSE_ALIGNED(SMP_CACHE_BYTES);
u32 c0sk_ingest_width;
int c0sk_boost;
int c0sk_nslpmin;
char *c0sk_kvdbhome;
void *c0sk_stash;

Expand Down
1 change: 0 additions & 1 deletion lib/include/hse_ikvdb/kvdb_rparams.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,6 @@ struct kvdb_rparams {
uint32_t throttle_debug;
uint32_t throttle_debug_intvl_s;
uint32_t throttle_c0_hi_th;
uint64_t throttle_sleep_min_ns;
uint64_t throttle_burst;
uint64_t throttle_rate;
char throttle_init_policy[THROTTLE_INIT_POLICY_NAME_LEN_MAX];
Expand Down
2 changes: 0 additions & 2 deletions lib/include/hse_ikvdb/throttle.h
Original file line number Diff line number Diff line change
Expand Up @@ -128,7 +128,6 @@ struct throttle_mavg {
* @thr_next: time at which to recompute %thr_pct (nsecs)
* @thr_pct: percentage of requests not to throttle
* @thr_delay_raw: raw throttle delay amount
* @thr_nslpmin: fixed overhead of nanosleep() (nsecs)
* @thr_lock: lock for updating %thr_pct
* @thr_mavg: struct to compute mavg
* @thr_reduce_sum: sum to compute cumulative mavg while reducing sleep
Expand Down Expand Up @@ -160,7 +159,6 @@ struct throttle {
atomic_t thr_pct;
atomic64_t thr_next;
uint thr_delay_raw;
int thr_nslpmin;
spinlock_t thr_lock;

HSE_ALIGNED(SMP_CACHE_BYTES)
Expand Down
19 changes: 0 additions & 19 deletions lib/kvdb/kvdb_rparams.c
Original file line number Diff line number Diff line change
Expand Up @@ -1003,25 +1003,6 @@ static const struct param_spec pspecs[] = {
},
},
},
{
.ps_name = "throttle_sleep_min_ns",
.ps_description = "nanosleep time overhead (nsecs)",
.ps_flags = PARAM_FLAG_EXPERIMENTAL,
.ps_type = PARAM_TYPE_U64,
.ps_offset = offsetof(struct kvdb_rparams, throttle_sleep_min_ns),
.ps_size = sizeof(((struct kvdb_rparams *) 0)->throttle_sleep_min_ns),
.ps_convert = param_default_converter,
.ps_validate = param_default_validator,
.ps_default_value = {
.as_uscalar = 0,
},
.ps_bounds = {
.as_uscalar = {
.ps_min = 0,
.ps_max = UINT64_MAX,
},
},
},
{
.ps_name = "throttle_c0_hi_th",
.ps_description = "c0 ingest backlog sensor high-watermark (x10)",
Expand Down
2 changes: 1 addition & 1 deletion lib/kvs/kvs_rparams.c
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ static const struct param_spec pspecs[] = {
.ps_bounds = {
.as_uscalar = {
.ps_min = 0,
.ps_max = 1,
.ps_max = 128,
},
},
},
Expand Down
2 changes: 1 addition & 1 deletion lib/util/include/hse_util/perfc.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
#ifndef HSE_PLATFORM_PERFC_H
#define HSE_PLATFORM_PERFC_H

#include <hse_util/arch.h>
#include <hse_util/platform.h>
#include <hse_util/assert.h>
#include <hse_util/atomic.h>
#include <hse_util/timing.h>
Expand Down
33 changes: 33 additions & 0 deletions lib/util/include/hse_util/platform.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,39 @@ hse_getcpu(uint *cpu, uint *node, uint *core)
*core = hse_cpu2core(cpuid);
}

/*
* hse_tsc_freq is the measured frequency of the time stamp counter.
*
* hse_tsc_mult and hse_tsc_shift are used to quickly convert from
* cycles to nanoseconds by avoiding division.
*
* hse_tsc_shift determines the number of significant digits in the
* conversion performed by cycles_to_nsecs().
*
* tsc_mult represents nanoseconds-per-cycle multiplied by 2^hse_tsc_shift to
* scale it up to an integer with a reasonable number of significant digits.
* Conversion from cycles to nanoseconds then requires only a multiplication
* by hse_tsc_mult and a division by 2^hse_tsc_shift (i.e., the division reduces
* to a simple shift by hse_tsc_shift). The multiplication by hse_tsc_mult therefore
* limits the magnitude of the value that can be converted to 2^(64 - hse_tsc_shift))
* in order to avoid overflow. For example, given a TSC frequency of 2.6GHz,
* the range of cycles_to_nsecs() is limited to 2^43, or about 3383 seconds,
* which should be good enough for typical latency measurement purposes.
* To convert values larger than 2^43 simply divide by hse_tsc_freq, which is
* slower but will not overflow.
*/
extern unsigned long hse_tsc_freq;
extern unsigned int hse_tsc_mult;
extern unsigned int hse_tsc_shift;

static HSE_ALWAYS_INLINE u64
cycles_to_nsecs(u64 cycles)
{
/* To avoid overflow, cycles is limited to 2^(64 - hse_tsc_shift).
*/
return (cycles * hse_tsc_mult) >> hse_tsc_shift;
}

extern merr_t hse_platform_init(void);
extern void hse_platform_fini(void);

Expand Down
48 changes: 1 addition & 47 deletions lib/util/include/hse_util/timer.h
Original file line number Diff line number Diff line change
@@ -1,19 +1,11 @@
/* SPDX-License-Identifier: Apache-2.0 */
/*
* Copyright (C) 2015-2020 Micron Technology, Inc. All rights reserved.
* Copyright (C) 2015-2021 Micron Technology, Inc. All rights reserved.
*/

#ifndef HSE_PLATFORM_TIMER_H
#define HSE_PLATFORM_TIMER_H

/**
* This file and its source code peer reproduces the kernel's basic
* timer functionality, i.e.:
* - add_timer
* - del_timer
* - init_timer
*/

#include <hse_util/arch.h>
#include <hse_util/atomic.h>
#include <hse_util/hse_err.h>
Expand Down Expand Up @@ -44,51 +36,13 @@ struct timer_list {
*
* jclock_ns is updated HSE_HZ times per second and reflects
* the time of CLOCK_MONOTONIC in nanoseconds.
*
* timer_slack is the timer thread's TIMERSLACK (see prctl(2)).
*
* timer_nslpmin is the timer thread's measured timer slack
* of clock_nanosleep().
*
* tsc_freq is the measured frequency of the time stamp counter.
*
* tsc_mult and tsc_shift are employed by cycles_to_nsecs() to very
* quickly convert from cycles to nanoseconds by avoiding division.
*
* tsc_shift determines the number of significant digits in the conversion
* as performed by cycles_to_nsecs().
*
* tsc_mult represents nanoseconds-per-cycle multiplied by 2^tsc_shift to
* scale it up to an integer with a reasonable number of significant digits.
* Conversion from cycles to nanoseconds then requires only a multiplication
* by tsc_mult and a division by 2^tsc_shift (i.e., the division reduces to
* a simple shift by tsc_shift). The multiplication by tsc_mult therefore
* limits the magnitude of the value that can be converted to 2^(64 - tsc_shift))
* in order to avoid overflow. For example, given a TSC frequency of 2.6GHz,
* the range of cycles_to_nsecs() is limited to 2^43, or about 3383 seconds,
* which should be good enough for typical latency measurement purposes.
* To convert values larger than 2^43 simply divide by tsc_freq, which is
* slower but will not overflow.
*/
extern struct timer_jclock timer_jclock;

#define jclock_ns atomic64_read(&timer_jclock.jc_jclock_ns)
#define jiffies atomic64_read(&timer_jclock.jc_jiffies)

extern unsigned long timer_nslpmin;
extern unsigned long timer_slack;
extern unsigned long tsc_freq;
extern unsigned long tsc_mult;
extern unsigned int tsc_shift;

static HSE_ALWAYS_INLINE u64
cycles_to_nsecs(u64 cycles)
{
/* To avoid overflow cycles is limited to 2^(64 - tsc_shift)
* (see note in timer.h regarding tsc_mult and tsc_shift).
*/
return (cycles * tsc_mult) >> tsc_shift;
}

static HSE_ALWAYS_INLINE unsigned long
msecs_to_jiffies(const unsigned int m)
Expand Down
38 changes: 38 additions & 0 deletions lib/util/src/platform.c
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,10 @@
struct hse_cputopo *hse_cputopov HSE_READ_MOSTLY;
uint hse_cputopoc HSE_READ_MOSTLY;

unsigned long hse_tsc_freq HSE_READ_MOSTLY;
unsigned int hse_tsc_mult HSE_READ_MOSTLY;
unsigned int hse_tsc_shift HSE_READ_MOSTLY;

rest_get_t kmc_rest_get;

static inline int
Expand Down Expand Up @@ -344,6 +348,38 @@ hse_cputopo_fini(void)
free(hse_cputopov);
}

void
hse_cpufreq_init(void)
{
char linebuf[1024];
double bogomips;
int n = EOF;
FILE *fp;

fp = fopen("/proc/cpuinfo", "r");
if (fp) {
while (fgets(linebuf, sizeof(linebuf), fp)) {
n = sscanf(linebuf, "bogomips%*[^0-9]%lf", &bogomips);
if (n == 1)
break;
}

fclose(fp);
}

if (n != 1) {
hse_log(HSE_WARNING "%s: unable to determine cpu frequency", __func__);
bogomips = 1000;
}

hse_tsc_freq = (bogomips * 1000000) / 2;
hse_tsc_shift = 21;
hse_tsc_mult = (NSEC_PER_SEC << hse_tsc_shift) / hse_tsc_freq;

hse_log(HSE_NOTICE "%s: freq %lu, shift %u, mult %u",
__func__, hse_tsc_freq, hse_tsc_shift, hse_tsc_mult);
}

merr_t
hse_platform_init(void)
{
Expand Down Expand Up @@ -374,6 +410,8 @@ hse_platform_init(void)
if (err)
goto errout;

hse_cpufreq_init();

err = vlb_init();
if (err)
goto errout;
Expand Down
Loading

0 comments on commit b15eb5b

Please sign in to comment.