Skip to content

Commit 5da93e3

Browse files
committed
prometheus - fix elapsed warning and improve start msgs
Signed-off-by: Paul Cuzner <pcuzner@ibm.com>
1 parent 284e639 commit 5da93e3

File tree

1 file changed

+9
-6
lines changed

1 file changed

+9
-6
lines changed

control/prometheus.py

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -14,9 +14,12 @@
1414
import spdk.rpc as rpc
1515

1616
from prometheus_client.core import REGISTRY, GaugeMetricFamily, CounterMetricFamily, InfoMetricFamily
17-
from prometheus_client import start_http_server
17+
from prometheus_client import start_http_server, GC_COLLECTOR
1818
from typing import NamedTuple
1919

20+
COLLECTION_ELAPSED_WARNING = 0.8 # Percentage of the refresh interval before a warning message is issued
21+
REGISTRY.unregister(GC_COLLECTOR) # Turn of the garbage collector metrics
22+
2023

2124
class RBD(NamedTuple):
2225
pool: str
@@ -42,10 +45,11 @@ def start_exporter(spdk_rpc_client, port, config):
4245
key_filepath = config.get('mtls', 'server_key')
4346
logger = logging.getLogger(__name__)
4447
if os.path.exists(cert_filepath) and os.path.exists(key_filepath):
48+
logger.info("Prometheus exporter endpoint mode is https")
4549
start_http_server(port=port, certfile=cert_filepath, keyfile=key_filepath)
4650
else:
4751
# fallback to http if the cert and key are unavailable
48-
logger.warning("TLS cert and key files not found, falling back to HTTP support")
52+
logger.warning("Prometheus exporter endpoint mode is http. TLS cert and key files not found.")
4953
start_http_server(port)
5054
REGISTRY.register(NVMeOFCollector(spdk_rpc_client, config))
5155

@@ -116,11 +120,10 @@ def _collect(self):
116120
self._get_subsystems
117121
elapsed = time.time() - start_time
118122

119-
interval_used = elapsed / self.interval
120-
if interval_used > 1:
123+
if elapsed > self.interval:
121124
self.logger.error(f"Stats refresh time > interval time of {self.interval} secs")
122-
elif interval_used > 0.8:
123-
self.logger.warning("Stats refresh is close to exceeding the interval (>80%)")
125+
elif elapsed > self.interval * COLLECTION_ELAPSED_WARNING:
126+
self.logger.warning(f"Stats refresh of {elapsed:.2f}s is close to exceeding the interval {self.interval}s")
124127
else:
125128
self.logger.debug(f"Stats refresh completed in {elapsed:.3f} secs.")
126129

0 commit comments

Comments
 (0)