Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
43 changes: 20 additions & 23 deletions crates/blockchain/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ use tokio::sync::mpsc;
use tracing::{error, info, warn};

pub mod key_manager;
mod metrics;
pub mod store;

/// Messages sent from the blockchain to the P2P layer for publishing.
Expand Down Expand Up @@ -90,6 +91,9 @@ impl BlockChainServer {
let slot = time_since_genesis / SECONDS_PER_SLOT;
let interval = time_since_genesis % SECONDS_PER_SLOT;

// Update current slot metric
metrics::update_current_slot(slot);

// Produce attestations at interval 1
if interval == 1 {
self.produce_attestations(slot);
Expand All @@ -99,17 +103,14 @@ impl BlockChainServer {
let has_proposal = false;

self.store.on_tick(timestamp, has_proposal);

// Update safe target slot metric (updated by store.on_tick at interval 2)
metrics::update_safe_target_slot(self.store.safe_target_slot());
}

fn produce_attestations(&mut self, slot: u64) {
// Get the head state to determine number of validators
let head_state = match self.store.head_state() {
Some(state) => state,
None => {
warn!(%slot, "Cannot produce attestations: no head state");
return;
}
};
let head_state = self.store.head_state();

let num_validators = head_state.validators.len() as u64;

Expand Down Expand Up @@ -149,14 +150,16 @@ impl BlockChainServer {
};

// Publish to gossip network
if let Err(err) = self
let Ok(_) = self
.p2p_tx
.send(OutboundGossip::PublishAttestation(signed_attestation))
{
error!(%slot, %validator_id, %err, "Failed to publish attestation");
} else {
info!(%slot, %validator_id, "Published attestation");
}
.inspect_err(
|err| error!(%slot, %validator_id, %err, "Failed to publish attestation"),
)
else {
continue;
};
info!(%slot, %validator_id, "Published attestation");
}
}

Expand All @@ -166,7 +169,10 @@ impl BlockChainServer {
warn!(%slot, %err, "Failed to process block");
return;
}
update_head_slot(slot);
metrics::update_head_slot(slot);
metrics::update_latest_justified_slot(self.store.latest_justified().slot);
metrics::update_latest_finalized_slot(self.store.latest_finalized().slot);
metrics::update_validators_count(self.store.head_state().validators.len() as u64);
}

fn on_gossip_attestation(&mut self, attestation: SignedAttestation) {
Expand Down Expand Up @@ -228,12 +234,3 @@ impl GenServer for BlockChainServer {
CastResponse::NoReply
}
}

fn update_head_slot(slot: u64) {
static LEAN_HEAD_SLOT: std::sync::LazyLock<prometheus::IntGauge> =
std::sync::LazyLock::new(|| {
prometheus::register_int_gauge!("lean_head_slot", "Latest slot of the lean chain")
.unwrap()
});
LEAN_HEAD_SLOT.set(slot.try_into().unwrap());
}
57 changes: 57 additions & 0 deletions crates/blockchain/src/metrics.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
//! Prometheus metrics for the blockchain module.

pub fn update_head_slot(slot: u64) {
static LEAN_HEAD_SLOT: std::sync::LazyLock<prometheus::IntGauge> =
std::sync::LazyLock::new(|| {
prometheus::register_int_gauge!("lean_head_slot", "Latest slot of the lean chain")
.unwrap()
});
LEAN_HEAD_SLOT.set(slot.try_into().unwrap());
}

pub fn update_latest_justified_slot(slot: u64) {
static LEAN_LATEST_JUSTIFIED_SLOT: std::sync::LazyLock<prometheus::IntGauge> =
std::sync::LazyLock::new(|| {
prometheus::register_int_gauge!("lean_latest_justified_slot", "Latest justified slot")
.unwrap()
});
LEAN_LATEST_JUSTIFIED_SLOT.set(slot.try_into().unwrap());
}

pub fn update_latest_finalized_slot(slot: u64) {
static LEAN_LATEST_FINALIZED_SLOT: std::sync::LazyLock<prometheus::IntGauge> =
std::sync::LazyLock::new(|| {
prometheus::register_int_gauge!("lean_latest_finalized_slot", "Latest finalized slot")
.unwrap()
});
LEAN_LATEST_FINALIZED_SLOT.set(slot.try_into().unwrap());
}

pub fn update_current_slot(slot: u64) {
static LEAN_CURRENT_SLOT: std::sync::LazyLock<prometheus::IntGauge> =
std::sync::LazyLock::new(|| {
prometheus::register_int_gauge!("lean_current_slot", "Current slot of the lean chain")
.unwrap()
});
LEAN_CURRENT_SLOT.set(slot.try_into().unwrap());
}

pub fn update_validators_count(count: u64) {
static LEAN_VALIDATORS_COUNT: std::sync::LazyLock<prometheus::IntGauge> =
std::sync::LazyLock::new(|| {
prometheus::register_int_gauge!(
"lean_validators_count",
"Number of validators managed by a node"
)
.unwrap()
});
LEAN_VALIDATORS_COUNT.set(count.try_into().unwrap());
}

pub fn update_safe_target_slot(slot: u64) {
static LEAN_SAFE_TARGET_SLOT: std::sync::LazyLock<prometheus::IntGauge> =
std::sync::LazyLock::new(|| {
prometheus::register_int_gauge!("lean_safe_target_slot", "Safe target slot").unwrap()
});
LEAN_SAFE_TARGET_SLOT.set(slot.try_into().unwrap());
}
11 changes: 9 additions & 2 deletions crates/blockchain/src/store.rs
Original file line number Diff line number Diff line change
Expand Up @@ -676,8 +676,15 @@ impl Store {
}

/// Returns a reference to the head state if it exists.
pub fn head_state(&self) -> Option<&State> {
self.states.get(&self.head)
pub fn head_state(&self) -> &State {
self.states
.get(&self.head)
.expect("head state is always available")
}

/// Returns the slot of the current safe target block.
pub fn safe_target_slot(&self) -> u64 {
self.blocks[&self.safe_target].slot
}
}

Expand Down
68 changes: 68 additions & 0 deletions docs/metrics.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,68 @@
# Metrics

We collect various metrics and serve them via a Prometheus-compatible HTTP endpoint at `http://<metrics_address>:<metrics_port>/metrics` (default: `http://127.0.0.1:5054/metrics`).

The exposed metrics follow [the leanMetrics specification](https://github.com/leanEthereum/leanMetrics/blob/3b32b300cca5ed7a7a2b3f142273fae9dbc171bf/metrics.md), with some metrics not yet implemented. We have a full list of implemented metrics below, with a checkbox indicating whether each metric is currently supported or not.

## Node Info Metrics

| Name | Type | Usage | Sample collection event | Labels | Supported |
|--------|-------|-------|-------------------------|--------|---------------|
| `lean_node_info` | Gauge | Node information (always 1) | On node start | name, version | □ |
| `lean_node_start_time_seconds` | Gauge | Start timestamp | On node start | | □ |


## PQ Signature Metrics

| Name | Type | Usage | Sample collection event | Labels | Buckets | Supported |
|--------|-------|-------|-------------------------|--------|---------|-----------|
| `lean_pq_sig_attestation_signing_time_seconds` | Histogram | Time taken to sign an attestation | On each attestation signing | | 0.005, 0.01, 0.025, 0.05, 0.1, 1 | □ |
| `lean_pq_sig_attestation_verification_time_seconds` | Histogram | Time taken to verify an attestation signature | On each `signature.verify()` on an attestation | | 0.005, 0.01, 0.025, 0.05, 0.1, 1 | □ |
| `lean_pq_sig_aggregated_signatures_total` | Counter | Total number of aggregated signatures | On `build_attestation_signatures()` | | 0.005, 0.01, 0.025, 0.05, 0.1, 1 | □ |
| `lean_pq_sig_attestations_in_aggregated_signatures_total` | Counter | Total number of attestations included into aggregated signatures | On `build_attestation_signatures()` | | 0.005, 0.01, 0.025, 0.05, 0.1, 1 | □ |
| `lean_pq_sig_attestation_signatures_building_time_seconds` | Histogram | Time taken to verify an aggregated attestation signature | On `build_attestation_signatures()` | | 0.005, 0.01, 0.025, 0.05, 0.1, 1 | □ |
| `lean_pq_sig_aggregated_signatures_verification_time_seconds` | Histogram | Time taken to verify an aggregated attestation signature | On validate aggregated signature | | 0.005, 0.01, 0.025, 0.05, 0.1, 1 | □ |
| `lean_pq_sig_aggregated_signatures_valid_total`| Counter | Total number of valid aggregated signatures | On validate aggregated signature | | | □ |
| `lean_pq_sig_aggregated_signatures_invalid_total`| Counter | Total number of invalid aggregated signatures | On validate aggregated signature | | | □ |

## Fork-Choice Metrics

| Name | Type | Usage | Sample collection event | Labels | Buckets | Supported |
|--------|-------|-------|-------------------------|--------|---------|-----------|
| `lean_head_slot` | Gauge | Latest slot of the lean chain | On get fork choice head | | | ✅ |
| `lean_current_slot` | Gauge | Current slot of the lean chain | On scrape | | | ✅ |
| `lean_safe_target_slot` | Gauge | Safe target slot | On safe target update | | | ✅ |
|`lean_fork_choice_block_processing_time_seconds`| Histogram | Time taken to process block | On fork choice process block | | 0.005, 0.01, 0.025, 0.05, 0.1, 1 | □ |
|`lean_attestations_valid_total`| Counter | Total number of valid attestations | On validate attestation | source=block,gossip | | □ |
|`lean_attestations_invalid_total`| Counter | Total number of invalid attestations | On validate attestation | source=block,gossip | | □ |
|`lean_attestation_validation_time_seconds`| Histogram | Time taken to validate attestation | On validate attestation | | 0.005, 0.01, 0.025, 0.05, 0.1, 1 | □ |
| `lean_fork_choice_reorgs_total` | Counter | Total number of fork choice reorgs | On fork choice reorg | | | □ |
| `lean_fork_choice_reorg_depth` | Histogram | Depth of fork choice reorgs (in blocks) | On fork choice reorg | | 1, 2, 3, 5, 7, 10, 20, 30, 50, 100 | □ |

## State Transition Metrics

| Name | Type | Usage | Sample collection event | Labels | Buckets | Supported |
|--------|-------|-------|-------------------------|--------|---------|-----------|
| `lean_latest_justified_slot` | Gauge | Latest justified slot | On state transition | | | ✅ |
| `lean_latest_finalized_slot` | Gauge | Latest finalized slot | On state transition | | | ✅ |
| `lean_finalizations_total` | Counter | Total number of finalization attempts | On finalization attempt | result=success,error | | □ |
|`lean_state_transition_time_seconds`| Histogram | Time to process state transition | On state transition | | 0.25, 0.5, 0.75, 1, 1.25, 1.5, 2, 2.5, 3, 4 | □ |
|`lean_state_transition_slots_processed_total`| Counter | Total number of processed slots | On state transition process slots | | | □ |
|`lean_state_transition_slots_processing_time_seconds`| Histogram | Time taken to process slots | On state transition process slots | | 0.005, 0.01, 0.025, 0.05, 0.1, 1 | □ |
|`lean_state_transition_block_processing_time_seconds`| Histogram | Time taken to process block | On state transition process block | | 0.005, 0.01, 0.025, 0.05, 0.1, 1 | □ |
|`lean_state_transition_attestations_processed_total`| Counter | Total number of processed attestations | On state transition process attestations | | | □ |
|`lean_state_transition_attestations_processing_time_seconds`| Histogram | Time taken to process attestations | On state transition process attestations | | 0.005, 0.01, 0.025, 0.05, 0.1, 1 | □ |

## Validator Metrics

| Name | Type | Usage | Sample collection event | Labels | Supported |
|--------|-------|-------|-------------------------|--------|-----------|
|`lean_validators_count`| Gauge | Number of validators managed by a node | On scrape | | ✅ |

## Network Metrics

| Name | Type | Usage | Sample collection event | Labels | Supported |
|--------|-------|-------|-------------------------|--------|-----------|
|`lean_connected_peers`| Gauge | Number of connected peers | On scrape | client=lantern,qlean,ream,zeam | □ |
|`lean_peer_connection_events_total`| Counter | Total number of peer connection events | On peer connection | direction=inbound,outbound<br>result=success,timeout,error | □ |
|`lean_peer_disconnection_events_total`| Counter | Total number of peer disconnection events | On peer disconnection | direction=inbound,outbound<br>reason=timeout,remote_close,local_close,error | □ |