Skip to content

Commit

Permalink
Rename stcd -> statsd_client. Extract telegraf mem_limit
Browse files Browse the repository at this point in the history
  • Loading branch information
badrogger committed May 16, 2024
1 parent 9995399 commit 5bacac8
Show file tree
Hide file tree
Showing 8 changed files with 33 additions and 28 deletions.
5 changes: 3 additions & 2 deletions core/monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,8 @@
TELEGRAF,
TELEGRAF_CONTAINER_NAME, TELEGRAF_IMAGE,
TELEGRAF_TEMPLATE_PATH,
TELEGRAF_CONFIG_PATH
TELEGRAF_CONFIG_PATH,
TELEGRAF_MEM_LIMIT
)

logger = logging.getLogger(__name__)
Expand Down Expand Up @@ -79,7 +80,7 @@ def ensure_telegraf_running(dutils: Optional[DockerUtils] = None) -> None:
f'{SKALE_DIR_HOST}/node_data/telegraf': {'bind': '/var/lib/telegraf', 'mode': 'rw'},
'/var/run/skale/': {'bind': '/var/run/skale', 'mode': 'rw'}
},
mem_limit='1GB'
mem_limit=TELEGRAF_MEM_LIMIT
)


Expand Down
10 changes: 5 additions & 5 deletions core/schains/checks.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def get_all(self,
logger.debug('Running check %s', name)
checks_status[name] = getattr(self, name).status
if expose:
send_to_statsd(self.stcd, self.get_name(), checks_status)
send_to_statsd(self.statsd_client, self.get_name(), checks_status)

Check warning on line 130 in core/schains/checks.py

View check run for this annotation

Codecov / codecov/patch

core/schains/checks.py#L130

Added line #L130 was not covered by tests
if log:
log_checks_dict(self.get_name(), checks_status)
if save:
Expand Down Expand Up @@ -171,7 +171,7 @@ def __init__(self,
self.cfm: ConfigFileManager = ConfigFileManager(
schain_name=schain_name
)
self.stcd = get_statsd_client()
self.statsd_client = get_statsd_client()

def get_name(self) -> str:
return self.name
Expand Down Expand Up @@ -262,7 +262,7 @@ def __init__(
self.cfm: ConfigFileManager = ConfigFileManager(
schain_name=schain_name
)
self.stcd = get_statsd_client()
self.statsd_client = get_statsd_client()

def get_name(self) -> str:
return self.name
Expand Down Expand Up @@ -522,7 +522,7 @@ def log_checks_dict(schain_name, checks_dict):
)


def send_to_statsd(stcd: statsd.StatsClient, schain_name: str, checks_dict: dict) -> None:
def send_to_statsd(statsd_client: statsd.StatsClient, schain_name: str, checks_dict: dict) -> None:
for check, result in checks_dict.items():
mname = f'admin.checks.{schain_name}.{check}'
stcd.gauge(mname, int(result))
statsd_client.gauge(mname, int(result))

Check warning on line 528 in core/schains/checks.py

View check run for this annotation

Codecov / codecov/patch

core/schains/checks.py#L526-L528

Added lines #L526 - L528 were not covered by tests
4 changes: 2 additions & 2 deletions core/schains/dkg/client.py
Original file line number Diff line number Diff line change
Expand Up @@ -172,7 +172,7 @@ def __init__(
self.complaint_error_event_hash = self.skale.web3.to_hex(self.skale.web3.keccak(
text="ComplaintError(string)"
))
self.stcd = get_statsd_client()
self.statsd_client = get_statsd_client()
self._last_completed_step = step # last step
logger.info(f'sChain: {self.schain_name}. DKG timeout is {self.dkg_timeout}')

Expand All @@ -182,7 +182,7 @@ def last_completed_step(self) -> DKGStep:

@last_completed_step.setter
def last_completed_step(self, value: DKGStep):
self.stcd.gauge(f'admin.dkg.last_completed_step.{self.schain_name}', value)
self.statsd_client.gauge(f'admin.dkg.last_completed_step.{self.schain_name}', value)
self._last_completed_step = value

def is_channel_opened(self):
Expand Down
15 changes: 9 additions & 6 deletions core/schains/monitor/action.py
Original file line number Diff line number Diff line change
Expand Up @@ -166,7 +166,7 @@ def __init__(
self.cfm: ConfigFileManager = ConfigFileManager(
schain_name=self.schain['name']
)
self.stcd = get_statsd_client()
self.statsd_client = get_statsd_client()
super().__init__(name=schain['name'])

@BaseActionManager.monitor_block
Expand All @@ -178,7 +178,7 @@ def config_dir(self) -> bool:
@BaseActionManager.monitor_block
def dkg(self) -> bool:
initial_status = self.checks.dkg.status
with self.stcd.timer(f'admin.dkg.{self.name}'):
with self.statsd_client.timer(f'admin.dkg.{self.name}'):
if not initial_status:
logger.info('Initing dkg client')
dkg_client = get_dkg_client(

Check warning on line 184 in core/schains/monitor/action.py

View check run for this annotation

Codecov / codecov/patch

core/schains/monitor/action.py#L181-L184

Added lines #L181 - L184 were not covered by tests
Expand Down Expand Up @@ -212,7 +212,7 @@ def dkg(self) -> bool:

@BaseActionManager.monitor_block
def upstream_config(self) -> bool:
with self.stcd.timer(f'admin.upstream_config.{self.name}'):
with self.statsd_client.timer(f'admin.upstream_config.{self.name}'):
logger.info(
'Creating new upstream_config rotation_id: %s, stream: %s',
self.rotation_data.get('rotation_id'), self.stream_version
Expand Down Expand Up @@ -317,7 +317,7 @@ def __init__(

self.esfm = ExitScheduleFileManager(schain['name'])
self.dutils = dutils or DockerUtils()
self.stcd = get_statsd_client()
self.statsd_client = get_statsd_client()

self.node_options = node_options or NodeOptions()

Expand Down Expand Up @@ -348,14 +348,17 @@ def firewall_rules(self, upstream: bool = False) -> bool:

ranges = self.econfig.ranges
logger.info('Adding ranges %s', ranges)
with self.stcd.timer(f'admin.firewall.{self.name}'):
with self.statsd_client.timer(f'admin.firewall.{self.name}'):
self.rc.configure(
base_port=base_port,
own_ip=own_ip,
node_ips=node_ips,
sync_ip_ranges=ranges
)
self.stcd.gauge(f'admin.expected_rules.{self.name}', len(self.rc.expected_rules()))
self.statsd_client.gauge(
f'admin.expected_rules.{self.name}',
len(self.rc.expected_rules())
)
self.rc.sync()
return initial_status

Expand Down
20 changes: 10 additions & 10 deletions core/schains/monitor/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -121,11 +121,11 @@ def run_config_pipeline(
else:
logger.info('Regular node mode, running config monitor')
mon = RegularConfigMonitor(config_am, config_checks)
stcd = get_statsd_client()
statsd_client = get_statsd_client()

Check warning on line 124 in core/schains/monitor/main.py

View check run for this annotation

Codecov / codecov/patch

core/schains/monitor/main.py#L124

Added line #L124 was not covered by tests

stcd.incr(f'admin.config.pipeline.{name}.{mon.__class__.__name__}')
stcd.gauge(f'admin.schain.rotation_id.{name}', rotation_data['rotation_id'])
with stcd.timer(f'admin.config.pipeline.{name}.duration'):
statsd_client.incr(f'admin.config.pipeline.{name}.{mon.__class__.__name__}')
statsd_client.gauge(f'admin.schain.rotation_id.{name}', rotation_data['rotation_id'])
with statsd_client.timer(f'admin.config.pipeline.{name}.duration'):
mon.run()

Check warning on line 129 in core/schains/monitor/main.py

View check run for this annotation

Codecov / codecov/patch

core/schains/monitor/main.py#L126-L129

Added lines #L126 - L129 were not covered by tests


Expand Down Expand Up @@ -177,9 +177,9 @@ def run_skaled_pipeline(
automatic_repair=automatic_repair
)

stcd = get_statsd_client()
stcd.incr(f'schain.skaled.pipeline.{name}.{mon.__name__}')
with stcd.timer(f'admin.skaled.pipeline.{name}.duration'):
statsd_client = get_statsd_client()
statsd_client.incr(f'schain.skaled.pipeline.{name}.{mon.__name__}')
with statsd_client.timer(f'admin.skaled.pipeline.{name}.duration'):
mon(skaled_am, skaled_checks).run()

Check warning on line 183 in core/schains/monitor/main.py

View check run for this annotation

Codecov / codecov/patch

core/schains/monitor/main.py#L180-L183

Added lines #L180 - L183 were not covered by tests


Expand Down Expand Up @@ -219,10 +219,10 @@ def create_and_execute_tasks(
schain_record.sync_config_run, schain_record.config_version, stream_version
)

stcd = get_statsd_client()
statsd_client = get_statsd_client()
monitor_last_seen_ts = schain_record.monitor_last_seen.timestamp()
stcd.incr(f'admin.schain.monitor.{name}')
stcd.gauge(f'admin.schain.monitor_last_seen.{name}', monitor_last_seen_ts)
statsd_client.incr(f'admin.schain.monitor.{name}')
statsd_client.gauge(f'admin.schain.monitor_last_seen.{name}', monitor_last_seen_ts)

tasks = []
if not leaving_chain:
Expand Down
2 changes: 1 addition & 1 deletion core/schains/monitor/skaled_monitor.py
Original file line number Diff line number Diff line change
Expand Up @@ -44,7 +44,7 @@ def __init__(
) -> None:
self.am = action_manager
self.checks = checks
self.stcd = get_statsd_client()
self.statsd_client = get_statsd_client()

@abstractmethod
def execute(self) -> None:
Expand Down
1 change: 1 addition & 0 deletions tools/configs/monitoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,3 +19,4 @@
TELEGRAF_CONTAINER_NAME = 'skale_telegraf'
TELEGRAF_SERVICE_NAME = 'telegraf'
TELEGRAF_IMAGE = 'telegraf:1.27.4'
TELEGRAF_MEM_LIMIT = os.getenv('TELEGRAF_MEM_LIMIT', '1GB')
4 changes: 2 additions & 2 deletions tools/resources.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,12 @@
db = SqliteDatabase(DB_FILE, DB_PRAGMAS)
cpool: redis.ConnectionPool = redis.ConnectionPool.from_url(REDIS_URI)
rs: redis.Redis = redis.Redis(connection_pool=cpool)
stcd = statsd.StatsClient(STATSD_HOST, STATSD_PORT)
statsd_client = statsd.StatsClient(STATSD_HOST, STATSD_PORT)


def get_database():
return db


def get_statsd_client():
return stcd
return statsd_client

0 comments on commit 5bacac8

Please sign in to comment.