Skip to content

Commit

Permalink
more prometheus metrics (#1612)
Browse files Browse the repository at this point in the history
* add rpc metrics

* update metrics as batches to reduce ocalls

* count all rpc requests and TC submissions

* add histogram metric for stf execution duration

* add histogram metric for stf execution call counts for success and failure

* add total issuance metric

* add parentchains processed block metrics

* measure time to load and write state

* add metric for remaining duration in AURA slots

* fmt

* taplo fmt

* clippy

* fix teeracle build

* add metric for state size

* add account balances for enclave on all parentchains and shard vault

* balance metrics with decimals applied

* add enclave fingerprint and version as labels

* add sidechain peer count metric

* add metric for last finalized sidechain block

* refactor to use more lables

* cosmetics

* fix and simplify bogus top-pool-size metric

* clippy

* fix tests

* review fixes

* nicen up

* change top pool size to a priori

* clippy
  • Loading branch information
brenzi authored Oct 15, 2024
1 parent 2dbbfb4 commit c4143d8
Show file tree
Hide file tree
Showing 48 changed files with 817 additions and 257 deletions.
5 changes: 5 additions & 0 deletions Cargo.lock
Original file line number Diff line number Diff line change
Expand Up @@ -3235,6 +3235,7 @@ dependencies = [
name = "itp-enclave-metrics"
version = "0.9.0"
dependencies = [
"itp-types",
"parity-scale-codec",
"sgx_tstd",
"substrate-fixed",
Expand Down Expand Up @@ -3453,6 +3454,7 @@ version = "0.9.0"
dependencies = [
"hex",
"itc-parentchain-test",
"itp-enclave-metrics",
"itp-node-api",
"itp-ocall-api",
"itp-sgx-crypto",
Expand All @@ -3461,6 +3463,7 @@ dependencies = [
"itp-stf-primitives",
"itp-stf-state-handler",
"itp-stf-state-observer",
"itp-storage",
"itp-test",
"itp-time-utils",
"itp-top-pool",
Expand Down Expand Up @@ -3848,7 +3851,9 @@ dependencies = [
name = "its-rpc-handler"
version = "0.9.0"
dependencies = [
"itp-enclave-metrics",
"itp-import-queue",
"itp-ocall-api",
"itp-rpc",
"itp-stf-primitives",
"itp-top-pool-author",
Expand Down
22 changes: 12 additions & 10 deletions app-libs/oracle/src/metrics_exporter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ use crate::types::{ExchangeRate, TradingPair};
use itp_enclave_metrics::{EnclaveMetric, ExchangeRateOracleMetric, OracleMetric};
use itp_ocall_api::EnclaveMetricsOCallApi;
use log::error;
use std::{string::String, sync::Arc, time::Instant};
use std::{string::String, sync::Arc, time::Instant, vec, vec::Vec};

/// Trait to export metrics for any Teeracle.
pub trait ExportMetrics<MetricsInfo> {
Expand All @@ -38,7 +38,7 @@ pub trait ExportMetrics<MetricsInfo> {
}

pub trait UpdateMetric<MetricInfo> {
fn update_metric(&self, metric: OracleMetric<MetricInfo>);
fn update_metrics(&self, metric: OracleMetric<MetricInfo>);
}

/// Metrics exporter implementation.
Expand All @@ -50,7 +50,7 @@ impl<OCallApi, MetricInfo> UpdateMetric<MetricInfo> for MetricsExporter<OCallApi
where
OCallApi: EnclaveMetricsOCallApi,
{
fn update_metric(&self, _metric: OracleMetric<MetricInfo>) {
fn update_metrics(&self, _metric: OracleMetric<MetricInfo>) {
// TODO: Implement me
}
}
Expand All @@ -63,8 +63,10 @@ where
MetricsExporter { ocall_api }
}

fn update_metric(&self, metric: ExchangeRateOracleMetric) {
if let Err(e) = self.ocall_api.update_metric(EnclaveMetric::ExchangeRateOracle(metric)) {
fn update_metrics(&self, metrics: Vec<ExchangeRateOracleMetric>) {
let wrapped_metrics =
metrics.iter().map(|m| EnclaveMetric::ExchangeRateOracle(m.clone())).collect();
if let Err(e) = self.ocall_api.update_metrics(wrapped_metrics) {
error!("Failed to update enclave metric, sgx_status_t: {}", e)
}
}
Expand All @@ -75,14 +77,14 @@ where
OCallApi: EnclaveMetricsOCallApi,
{
fn increment_number_requests(&self, source: String) {
self.update_metric(ExchangeRateOracleMetric::NumberRequestsIncrement(source));
self.update_metrics(vec![ExchangeRateOracleMetric::NumberRequestsIncrement(source)]);
}

fn record_response_time(&self, source: String, timer: Instant) {
self.update_metric(ExchangeRateOracleMetric::ResponseTime(
self.update_metrics(vec![ExchangeRateOracleMetric::ResponseTime(
source,
timer.elapsed().as_millis(),
));
)]);
}

fn update_exchange_rate(
Expand All @@ -91,11 +93,11 @@ where
exchange_rate: ExchangeRate,
trading_pair: TradingPair,
) {
self.update_metric(ExchangeRateOracleMetric::ExchangeRate(
self.update_metrics(vec![ExchangeRateOracleMetric::ExchangeRate(
source,
trading_pair.key(),
exchange_rate,
));
)]);
}

fn update_weather(&self, _source: String, _metrics_info: MetricsInfo) {
Expand Down
1 change: 1 addition & 0 deletions app-libs/stf/src/stf_sgx.rs
Original file line number Diff line number Diff line change
Expand Up @@ -171,6 +171,7 @@ where
}
fn on_finalize(_state: &mut State) -> Result<(), Self::Error> {
trace!("on_finalize called");

Ok(())
}
}
Expand Down
2 changes: 1 addition & 1 deletion core-primitives/enclave-metrics/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@ edition = "2021"

[dependencies]
# sgx
itp-types = { path = "../types", default-features = false }
sgx_tstd = { branch = "master", git = "https://github.com/apache/teaclave-sgx-sdk.git", optional = true }

# no-std dependencies
codec = { package = "parity-scale-codec", version = "3.0.0", default-features = false, features = ["derive", "full"] }
substrate-fixed = { default-features = false, git = "https://github.com/encointer/substrate-fixed", tag = "v0.5.9" }
Expand Down
23 changes: 17 additions & 6 deletions core-primitives/enclave-metrics/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,23 +24,34 @@ compile_error!("feature \"std\" and feature \"sgx\" cannot be enabled at the sam
extern crate sgx_tstd as std;

use codec::{Decode, Encode};
use core::time::Duration;
use itp_types::{
parentchain::{BlockNumber, ParentchainId},
ShardIdentifier,
};
use std::string::String;
use substrate_fixed::types::U32F32;

// FIXME: Copied from ita-oracle because of cyclic deps. Should be removed after integritee-network/pallets#71
pub type ExchangeRate = U32F32;

#[derive(Encode, Decode, Debug)]
#[derive(Encode, Decode, Debug, Clone)]
pub enum EnclaveMetric {
SetSidechainBlockHeight(u64),
TopPoolSizeSet(u64),
TopPoolSizeIncrement,
TopPoolSizeDecrement,
TopPoolAPrioriSizeSet(u64),
RpcRequestsIncrement,
RpcTrustedCallsIncrement,
SidechainAuraSlotRemainingTimes(String, Duration),
StfStateUpdateExecutionDuration(Duration),
StfStateUpdateExecutedCallsCount(bool, u64),
StfStateSizeSet(ShardIdentifier, u64),
StfRuntimeTotalIssuanceSet(f64),
StfRuntimeParentchainProcessedBlockNumberSet(ParentchainId, BlockNumber),
ExchangeRateOracle(ExchangeRateOracleMetric),
// OracleMetric(OracleMetric<MetricsInfo>),
}

#[derive(Encode, Decode, Debug)]
#[derive(Encode, Decode, Debug, Clone)]
pub enum ExchangeRateOracleMetric {
/// Exchange Rate from CoinGecko - (Source, TradingPair, ExchangeRate)
ExchangeRate(String, String, ExchangeRate),
Expand All @@ -50,7 +61,7 @@ pub enum ExchangeRateOracleMetric {
NumberRequestsIncrement(String),
}

#[derive(Encode, Decode, Debug)]
#[derive(Encode, Decode, Debug, Clone)]
pub enum OracleMetric<MetricsInfo> {
OracleSpecificMetric(MetricsInfo),
ResponseTime(String, u128),
Expand Down
1 change: 1 addition & 0 deletions core-primitives/node-api/api-client-extensions/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ pub use substrate_api_client::{api::Error as ApiClientError, rpc::TungsteniteRpc

pub mod account;
pub mod chain;
pub mod pallet_sidechain;
pub mod pallet_teeracle;
pub mod pallet_teerex;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
use crate::ApiResult;
use itp_api_client_types::{traits::GetStorage, Api, Config, Request};
use itp_types::{parentchain::SidechainBlockConfirmation, ShardIdentifier};

pub const SIDECHAIN: &str = "Sidechain";

pub trait PalletSidechainApi {
type Hash;

fn latest_sidechain_block_confirmation(
&self,
shard: &ShardIdentifier,
at_block: Option<Self::Hash>,
) -> ApiResult<Option<SidechainBlockConfirmation>>;
}

impl<RuntimeConfig, Client> PalletSidechainApi for Api<RuntimeConfig, Client>
where
RuntimeConfig: Config,
Client: Request,
{
type Hash = RuntimeConfig::Hash;

fn latest_sidechain_block_confirmation(
&self,
shard: &ShardIdentifier,
at_block: Option<Self::Hash>,
) -> ApiResult<Option<SidechainBlockConfirmation>> {
self.get_storage_map(SIDECHAIN, "LatestSidechainBlockConfirmation", shard, at_block)
}
}
2 changes: 1 addition & 1 deletion core-primitives/ocall-api/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ pub trait EnclaveOnChainOCallApi: Clone + Send + Sync {

/// Trait for sending metric updates.
pub trait EnclaveMetricsOCallApi: Clone + Send + Sync {
fn update_metric<Metric: Encode>(&self, metric: Metric) -> SgxResult<()>;
fn update_metrics<Metric: Encode>(&self, metric: Vec<Metric>) -> SgxResult<()>;
}

pub trait EnclaveSidechainOCallApi: Clone + Send + Sync {
Expand Down
2 changes: 2 additions & 0 deletions core-primitives/stf-executor/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ sgx_tstd = { branch = "master", git = "https://github.com/apache/teaclave-sgx-sd
sgx_types = { branch = "master", git = "https://github.com/apache/teaclave-sgx-sdk.git" }

# local dependencies
itp-enclave-metrics = { path = "../enclave-metrics", default-features = false }
itp-node-api = { path = "../node-api", default-features = false }
itp-ocall-api = { path = "../ocall-api", default-features = false }
itp-sgx-crypto = { path = "../sgx/crypto", default-features = false }
Expand All @@ -20,6 +21,7 @@ itp-stf-interface = { path = "../stf-interface", default-features = false }
itp-stf-primitives = { path = "../stf-primitives", default-features = false }
itp-stf-state-handler = { path = "../stf-state-handler", default-features = false }
itp-stf-state-observer = { path = "../stf-state-observer", default-features = false }
itp-storage = { path = "../storage", default-features = false }
itp-time-utils = { path = "../time-utils", default-features = false }
itp-top-pool-author = { path = "../top-pool-author", default-features = false }
itp-types = { path = "../types", default-features = false }
Expand Down
82 changes: 76 additions & 6 deletions core-primitives/stf-executor/src/executor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,9 @@ use crate::{
BatchExecutionResult, ExecutedOperation,
};
use codec::{Decode, Encode};
use itp_enclave_metrics::EnclaveMetric;
use itp_node_api::metadata::{provider::AccessNodeMetadata, NodeMetadataTrait};
use itp_ocall_api::{EnclaveAttestationOCallApi, EnclaveOnChainOCallApi};
use itp_ocall_api::{EnclaveAttestationOCallApi, EnclaveMetricsOCallApi, EnclaveOnChainOCallApi};
use itp_sgx_externalities::{SgxExternalitiesTrait, StateHash};
use itp_stf_interface::{
parentchain_pallet::ParentchainPalletInstancesInterface, StateCallInterface, UpdateState,
Expand All @@ -32,11 +33,12 @@ use itp_stf_primitives::{
types::{ShardIdentifier, TrustedOperation, TrustedOperationOrHash},
};
use itp_stf_state_handler::{handle_state::HandleState, query_shard_state::QueryShardState};
use itp_storage::keys::storage_value_key;
use itp_time_utils::{duration_now, now_as_millis};
use itp_types::{
parentchain::{Header as ParentchainHeader, ParentchainCall, ParentchainId},
parentchain::{BlockNumber, Header as ParentchainHeader, ParentchainCall, ParentchainId},
storage::StorageEntryVerified,
H256,
Balance, H256,
};
use log::*;
use sp_runtime::traits::Header as HeaderTrait;
Expand All @@ -59,7 +61,7 @@ where
impl<OCallApi, StateHandler, NodeMetadataRepository, Stf, TCS, G>
StfExecutor<OCallApi, StateHandler, NodeMetadataRepository, Stf, TCS, G>
where
OCallApi: EnclaveAttestationOCallApi + EnclaveOnChainOCallApi,
OCallApi: EnclaveAttestationOCallApi + EnclaveOnChainOCallApi + EnclaveMetricsOCallApi,
StateHandler: HandleState<HashType = H256>,
StateHandler::StateT: SgxExternalitiesTrait + Encode,
NodeMetadataRepository: AccessNodeMetadata,
Expand Down Expand Up @@ -278,7 +280,7 @@ where
impl<OCallApi, StateHandler, NodeMetadataRepository, Stf, TCS, G> StateUpdateProposer<TCS, G>
for StfExecutor<OCallApi, StateHandler, NodeMetadataRepository, Stf, TCS, G>
where
OCallApi: EnclaveAttestationOCallApi + EnclaveOnChainOCallApi,
OCallApi: EnclaveAttestationOCallApi + EnclaveOnChainOCallApi + EnclaveMetricsOCallApi,
StateHandler: HandleState<HashType = H256>,
StateHandler::StateT: SgxExternalitiesTrait + Encode + StateHash,
<StateHandler::StateT as SgxExternalitiesTrait>::SgxExternalitiesType: Encode,
Expand Down Expand Up @@ -310,7 +312,8 @@ where
PH: HeaderTrait<Hash = H256>,
F: FnOnce(Self::Externalities) -> Self::Externalities,
{
let ends_at = duration_now() + max_exec_duration;
let started_at = duration_now();
let ends_at = started_at + max_exec_duration;

let (state, state_hash_before_execution) = self.state_handler.load_cloned(shard)?;

Expand Down Expand Up @@ -350,6 +353,33 @@ where
error!("on_finalize failed: {:?}", e);
});

let state_size_bytes = state.size();
let runtime_metrics = gather_runtime_metrics(&state);
let successful_call_count =
executed_and_failed_calls.iter().filter(|call| call.is_success()).count();
let failed_call_count = executed_and_failed_calls.len() - successful_call_count;
self.ocall_api
.update_metrics(vec![
EnclaveMetric::StfStateUpdateExecutionDuration(duration_now() - started_at),
EnclaveMetric::StfStateUpdateExecutedCallsCount(true, successful_call_count as u64),
EnclaveMetric::StfStateUpdateExecutedCallsCount(false, failed_call_count as u64),
EnclaveMetric::TopPoolAPrioriSizeSet(trusted_calls.len() as u64),
EnclaveMetric::StfStateSizeSet(*shard, state_size_bytes as u64),
EnclaveMetric::StfRuntimeTotalIssuanceSet(runtime_metrics.total_issuance),
EnclaveMetric::StfRuntimeParentchainProcessedBlockNumberSet(
ParentchainId::Integritee,
runtime_metrics.parentchain_integritee_processed_block_number,
),
EnclaveMetric::StfRuntimeParentchainProcessedBlockNumberSet(
ParentchainId::TargetA,
runtime_metrics.parentchain_target_a_processed_block_number,
),
EnclaveMetric::StfRuntimeParentchainProcessedBlockNumberSet(
ParentchainId::TargetB,
runtime_metrics.parentchain_target_b_processed_block_number,
),
])
.unwrap_or_else(|e| error!("failed to update prometheus metric: {:?}", e));
Ok(BatchExecutionResult {
executed_operations: executed_and_failed_calls,
state_hash_before_execution,
Expand All @@ -374,3 +404,43 @@ pub fn shards_key_hash() -> Vec<u8> {
// ShardIdentifiers the enclave uses this to autosubscribe to no shards
vec![]
}

/// assumes a common structure of sgx_runtime and extracts interesting metrics
/// while this may not be the best abstraction, it avoids circular dependencies
/// with app-libs and will be suitable in 99% of cases
fn gather_runtime_metrics<State>(state: &State) -> RuntimeMetrics
where
State: SgxExternalitiesTrait + Encode,
{
// prometheus has no support for NaN, therefore we fall back to -1
let total_issuance: f64 = state
.get(&storage_value_key("Balances", "TotalIssuance"))
.map(|v| Balance::decode(&mut v.as_slice()).map(|b| b as f64).unwrap_or(-1.0))
.unwrap_or(-1.0);
// fallback to zero is fine here
let parentchain_integritee_processed_block_number: u32 = state
.get(&storage_value_key("ParentchainIntegritee", "Number"))
.map(|v| BlockNumber::decode(&mut v.as_slice()).unwrap_or_default())
.unwrap_or_default();
let parentchain_target_a_processed_block_number: u32 = state
.get(&storage_value_key("ParentchainTargetA", "Number"))
.map(|v| BlockNumber::decode(&mut v.as_slice()).unwrap_or_default())
.unwrap_or_default();
let parentchain_target_b_processed_block_number: u32 = state
.get(&storage_value_key("ParentchainTargetB", "Number"))
.map(|v| BlockNumber::decode(&mut v.as_slice()).unwrap_or_default())
.unwrap_or_default();
RuntimeMetrics {
total_issuance,
parentchain_integritee_processed_block_number,
parentchain_target_a_processed_block_number,
parentchain_target_b_processed_block_number,
}
}

struct RuntimeMetrics {
total_issuance: f64,
parentchain_integritee_processed_block_number: u32,
parentchain_target_a_processed_block_number: u32,
parentchain_target_b_processed_block_number: u32,
}
Loading

0 comments on commit c4143d8

Please sign in to comment.