Skip to content

Commit a9bb40f

Browse files
committed
Implement prometheus metrics
1 parent 46205b1 commit a9bb40f

File tree

12 files changed

+245
-8
lines changed

12 files changed

+245
-8
lines changed

Cargo.lock

Lines changed: 49 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Cargo.toml

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,10 @@ getrandom = "0.2"
2929
hkd32 = { version = "0.7", default-features = false, features = ["mnemonic"] }
3030
hkdf = "0.12"
3131
k256 = { version = "0.13", features = ["ecdsa", "sha256"] }
32-
ledger = { version = "0.2", optional = true }
32+
lazy_static = { version = "1.5.0" }
33+
ledger = { version = "0.2" }
3334
once_cell = "1.5"
35+
prometheus = { version = "0.13.4", default-features = false, features = [] }
3436
prost = "0.12"
3537
prost-derive = "0.12"
3638
rand_core = { version = "0.6", features = ["std"] }
@@ -48,6 +50,7 @@ tendermint-config = "0.35"
4850
tendermint-p2p = "0.35"
4951
tendermint-proto = "0.35"
5052
thiserror = "1"
53+
tiny_http = { version = "0.12.0" }
5154
url = { version = "2.2.2", features = ["serde"], optional = true }
5255
uuid = { version = "1", features = ["serde"], optional = true }
5356
wait-timeout = "0.2"

src/client.rs

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ use crate::{
1010
config::ValidatorConfig,
1111
error::{Error, ErrorKind},
1212
prelude::*,
13+
prometheus::initialize_consensus_metrics,
1314
session::Session,
1415
};
1516
use std::{panic, process::exit, thread, time::Duration};
@@ -88,7 +89,7 @@ fn main_loop(config: ValidatorConfig) -> Result<(), Error> {
8889
/// Ensure chain with given ID is properly registered
8990
pub fn register_chain(chain_id: &chain::Id) {
9091
let registry = chain::REGISTRY.get();
91-
92+
initialize_consensus_metrics(chain_id);
9293
debug!("registering chain: {}", chain_id);
9394
registry.get_chain(chain_id).unwrap_or_else(|| {
9495
status_err!(

src/commands/init/config_builder.rs

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,7 @@ impl ConfigBuilder {
4343
self.add_chain_config();
4444
self.add_provider_config();
4545
self.add_validator_config();
46+
self.add_metrics_config();
4647

4748
self.contents
4849
}
@@ -148,6 +149,12 @@ impl ConfigBuilder {
148149
self.add_template_with_chain_id(include_str!("templates/keyring/fortanixdsm.toml"));
149150
}
150151

152+
/// Add `[metrics]` configurations
153+
fn add_metrics_config(&mut self) {
154+
self.add_section_comment("Metrics exporter configuration");
155+
self.add_template(include_str!("templates/metrics.toml"));
156+
}
157+
151158
/// Append a template to the config file, substituting `$KMS_HOME`
152159
fn add_template(&mut self, template: &str) {
153160
self.add_str(&format_template(
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
[metrics]
2+
bind_address = "localhost:3333"

src/commands/start.rs

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,6 @@
11
//! Start the KMS
22
3-
use crate::{chain, client::Client, prelude::*};
3+
use crate::{chain, client::Client, prelude::*, prometheus};
44
use abscissa_core::Command;
55
use clap::Parser;
66
use std::{path::PathBuf, process};
@@ -40,6 +40,16 @@ impl StartCommand {
4040
process::exit(1);
4141
});
4242

43+
if let Some(config) = &APP.config().metrics {
44+
let address = config.bind_address.clone();
45+
info!("Starting up prometheus server on {}", address);
46+
let thread_name = abscissa_core::thread::Name::new("prometheus-thread").unwrap();
47+
APP.state()
48+
.threads_mut()
49+
.spawn(thread_name, move || prometheus::serve(&address))
50+
.expect("Unable to start prometheus exporter thread");
51+
}
52+
4353
// Spawn the validator client threads
4454
config
4555
.validator

src/config.rs

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,13 @@
11
//! Configuration file structures (with serde-derived parser)
22
33
pub mod chain;
4+
pub mod metrics;
45
pub mod provider;
56
pub mod validator;
67

78
pub use self::validator::*;
89

9-
use self::{chain::ChainConfig, provider::ProviderConfig};
10+
use self::{chain::ChainConfig, metrics::MetricsConfig, provider::ProviderConfig};
1011
use serde::Deserialize;
1112

1213
/// Environment variable containing path to config file
@@ -29,4 +30,7 @@ pub struct KmsConfig {
2930
/// Addresses of validator nodes
3031
#[serde(default)]
3132
pub validator: Vec<ValidatorConfig>,
33+
34+
/// Configuration for metrics exporter
35+
pub metrics: Option<MetricsConfig>,
3236
}

src/config/metrics.rs

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,11 @@
1+
//! Metrics configuration
2+
3+
use serde::Deserialize;
4+
5+
/// Metrics configuration
6+
#[derive(Deserialize, Debug, Clone, Default)]
7+
#[serde(deny_unknown_fields)]
8+
pub struct MetricsConfig {
9+
/// Address on which to bind metrics exporter
10+
pub bind_address: String,
11+
}

src/lib.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,7 @@ pub mod key_utils;
2525
pub mod keyring;
2626
pub mod prelude;
2727
pub mod privval;
28+
pub mod prometheus;
2829
pub mod rpc;
2930
pub mod session;
3031

src/prometheus.rs

Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
//! Prometheus metrics collection and serving functionality
2+
use crate::rpc;
3+
4+
use lazy_static::lazy_static;
5+
use prometheus::{
6+
default_registry, register_histogram_vec, register_int_counter_vec, Encoder, HistogramVec,
7+
IntCounterVec, TextEncoder,
8+
};
9+
use std::time::Duration;
10+
use tendermint::chain;
11+
12+
lazy_static! {
13+
static ref METRIC_CONSENSUS_UPDATES: IntCounterVec = register_int_counter_vec!(
14+
"consensus_updates_total",
15+
"Number of consensus updates by status.",
16+
&["chain", "status"]
17+
)
18+
.unwrap();
19+
static ref METRIC_REQUEST_DURATION: HistogramVec = register_histogram_vec!(
20+
"request_duration_seconds",
21+
"Duration of request",
22+
&["chain", "message_type"],
23+
)
24+
.unwrap();
25+
}
26+
27+
/// Status outcomes for consensus updates
28+
pub enum ConsensusUpdateStatus {
29+
/// Update completed successfully
30+
Success,
31+
/// Generic error occurred during update
32+
Error,
33+
/// Double signing attempt detected
34+
DoubleSign,
35+
}
36+
37+
/// Type of RPC request received from validator node
38+
pub enum RpcRequestType {
39+
/// Sign proposal or vote request
40+
Sign,
41+
/// Ping/heartbeat request
42+
Ping,
43+
/// Request for public key
44+
Pubkey,
45+
}
46+
47+
impl From<&rpc::Request> for RpcRequestType {
48+
fn from(value: &rpc::Request) -> Self {
49+
match value {
50+
rpc::Request::SignProposal(_) | rpc::Request::SignVote(_) => RpcRequestType::Sign,
51+
rpc::Request::PingRequest => RpcRequestType::Ping,
52+
rpc::Request::ShowPublicKey => RpcRequestType::Pubkey,
53+
}
54+
}
55+
}
56+
impl ToString for ConsensusUpdateStatus {
57+
fn to_string(&self) -> String {
58+
match self {
59+
ConsensusUpdateStatus::Success => "success".into(),
60+
ConsensusUpdateStatus::Error => "error".into(),
61+
ConsensusUpdateStatus::DoubleSign => "double_sign".into(),
62+
}
63+
}
64+
}
65+
66+
/// Initialize metrics counters for a given chain ID
67+
///
68+
/// Creates and resets counters for all possible status outcomes
69+
pub fn initialize_consensus_metrics(chain_id: &chain::Id) {
70+
METRIC_CONSENSUS_UPDATES
71+
.with_label_values(&[
72+
chain_id.as_str(),
73+
&ConsensusUpdateStatus::Success.to_string(),
74+
])
75+
.reset();
76+
METRIC_CONSENSUS_UPDATES
77+
.with_label_values(&[chain_id.as_str(), &ConsensusUpdateStatus::Error.to_string()])
78+
.reset();
79+
METRIC_CONSENSUS_UPDATES
80+
.with_label_values(&[
81+
chain_id.as_str(),
82+
&ConsensusUpdateStatus::DoubleSign.to_string(),
83+
])
84+
.reset();
85+
}
86+
87+
/// Increment counter for a consensus update with given status
88+
pub fn increment_consensus_counter(chain_id: &chain::Id, status: ConsensusUpdateStatus) {
89+
METRIC_CONSENSUS_UPDATES
90+
.with_label_values(&[chain_id.as_str(), &status.to_string()])
91+
.inc();
92+
}
93+
94+
impl ToString for RpcRequestType {
95+
fn to_string(&self) -> String {
96+
match self {
97+
RpcRequestType::Sign => "sign",
98+
RpcRequestType::Ping => "ping",
99+
RpcRequestType::Pubkey => "pubkey",
100+
}
101+
.into()
102+
}
103+
}
104+
105+
/// Record duration of an RPC request
106+
pub fn record_request_duration(
107+
chain_id: &chain::Id,
108+
request_type: &RpcRequestType,
109+
duration: Duration,
110+
) {
111+
METRIC_REQUEST_DURATION
112+
.with_label_values(&[chain_id.as_str(), &request_type.to_string()])
113+
.observe(duration.as_secs_f64());
114+
}
115+
116+
/// Start HTTP server to expose Prometheus metrics
117+
pub fn serve(address: &str) {
118+
let encoder = TextEncoder::new();
119+
let registry = default_registry();
120+
let server = tiny_http::Server::http(address).expect("Unable to bind to address");
121+
for request in server.incoming_requests() {
122+
let mut response = Vec::<u8>::new();
123+
let metric_families = registry.gather();
124+
// TODO
125+
encoder.encode(&metric_families, &mut response).unwrap();
126+
request
127+
.respond(tiny_http::Response::from_data(response))
128+
.unwrap();
129+
}
130+
}

0 commit comments

Comments
 (0)