Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

chore: add Open Telemetry attributes to grpc spans #698

Merged
Merged
Show file tree
Hide file tree
Changes from 17 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
320 changes: 319 additions & 1 deletion Cargo.lock

Large diffs are not rendered by default.

3 changes: 3 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ version = "0.8.0"

[workspace.dependencies]
assert_matches = { version = "1.5" }
http = { version = "1.2" }
itertools = { version = "0.14" }
miden-air = { version = "0.12" }
miden-lib = { git = "https://github.com/0xPolygonMiden/miden-base", branch = "next" }
Expand All @@ -44,6 +45,8 @@ thiserror = { version = "2.0", default-features = false }
tokio = { version = "1.40", features = ["rt-multi-thread"] }
tokio-stream = { version = "0.1" }
tonic = { version = "0.12" }
tower = { version = "0.5" }
tower-http = { version = "0.6", features = ["trace"] }
tracing = { version = "0.1" }
tracing-subscriber = { version = "0.3", features = ["env-filter", "fmt", "json"] }
url = { version = "2.5", features = ["serde"] }
Expand Down
4 changes: 2 additions & 2 deletions bin/faucet/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@ thiserror = { workspace = true }
tokio = { workspace = true, features = ["fs"] }
toml = { version = "0.8" }
tonic = { workspace = true }
tower = "0.5"
tower-http = { version = "0.6", features = ["cors", "set-header", "trace"] }
tower = { workspace = true }
tower-http = { workspace = true, features = ["cors", "set-header", "trace"] }
tracing = { workspace = true }
url = { workspace = true }

Expand Down
4 changes: 2 additions & 2 deletions crates/block-producer/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@ itertools = { workspace = true }
miden-block-prover = { git = "https://github.com/0xPolygonMiden/miden-base.git", branch = "next" }
miden-lib = { workspace = true }
miden-node-proto = { workspace = true }
miden-node-utils = { workspace = true }
miden-objects = { workspace = true }
miden-processor = { workspace = true }
miden-tx = { workspace = true }
Expand All @@ -34,7 +33,8 @@ serde = { version = "1.0", features = ["derive"] }
thiserror = { workspace = true }
tokio = { workspace = true, features = ["macros", "net", "rt-multi-thread", "sync", "time"] }
tokio-stream = { workspace = true, features = ["net"] }
tonic = { workspace = true }
tonic = { workspace = true, features = ["transport"] }
tower-http = { workspace = true, features = ["util"] }
tracing = { workspace = true }
url = { workspace = true }

Expand Down
6 changes: 4 additions & 2 deletions crates/block-producer/src/server.rs
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,15 @@ use miden_node_proto::generated::{
use miden_node_utils::{
errors::ApiError,
formatting::{format_input_notes, format_output_notes},
tracing::grpc::OtelInterceptor,
tracing::grpc::{block_producer_trace_fn, OtelInterceptor},
};
use miden_objects::{
block::BlockNumber, transaction::ProvenTransaction, utils::serde::Deserializable,
};
use tokio::{net::TcpListener, sync::Mutex};
use tokio_stream::wrappers::TcpListenerStream;
use tonic::Status;
use tower_http::trace::TraceLayer;
use tracing::{debug, info, instrument};

use crate::{
Expand Down Expand Up @@ -211,8 +212,9 @@ impl BlockProducerRpcServer {
}

async fn serve(self, listener: TcpListener) -> Result<(), tonic::transport::Error> {
// Build the gRPC server with the API service and trace layer.
tonic::transport::Server::builder()
.trace_fn(miden_node_utils::tracing::grpc::block_producer_trace_fn)
.layer(TraceLayer::new_for_grpc().make_span_with(block_producer_trace_fn))
.add_service(api_server::ApiServer::new(self))
.serve_with_incoming(TcpListenerStream::new(listener))
.await
Expand Down
1 change: 1 addition & 0 deletions crates/store/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ thiserror = { workspace = true }
tokio = { workspace = true, features = ["fs", "macros", "net", "rt-multi-thread"] }
tokio-stream = { workspace = true, features = ["net"] }
tonic = { workspace = true }
tower-http = { workspace = true, features = ["util"] }
tracing = { workspace = true }
url = { workspace = true }

Expand Down
6 changes: 4 additions & 2 deletions crates/store/src/server/mod.rs
Original file line number Diff line number Diff line change
@@ -1,9 +1,10 @@
use std::sync::Arc;

use miden_node_proto::generated::store::api_server;
use miden_node_utils::errors::ApiError;
use miden_node_utils::{errors::ApiError, tracing::grpc::store_trace_fn};
use tokio::net::TcpListener;
use tokio_stream::wrappers::TcpListenerStream;
use tower_http::trace::TraceLayer;
use tracing::info;

use crate::{blocks::BlockStore, config::StoreConfig, db::Db, state::State, COMPONENT};
Expand Down Expand Up @@ -61,8 +62,9 @@ impl Store {
///
/// Note: this blocks until the server dies.
pub async fn serve(self) -> Result<(), ApiError> {
// Build the gRPC server with the API service and trace layer.
tonic::transport::Server::builder()
.trace_fn(miden_node_utils::tracing::grpc::store_trace_fn)
.layer(TraceLayer::new_for_grpc().make_span_with(store_trace_fn))
.add_service(self.api_service)
.serve_with_incoming(TcpListenerStream::new(self.listener))
.await
Expand Down
5 changes: 3 additions & 2 deletions crates/utils/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -21,12 +21,12 @@ vergen = ["dep:vergen", "dep:vergen-gitcl"]
[dependencies]
anyhow = { version = "1.0" }
figment = { version = "0.10", features = ["env", "toml"] }
http = { version = "1.2" }
http = { workspace = true }
itertools = { workspace = true }
miden-objects = { workspace = true }
opentelemetry = { version = "0.28" }
opentelemetry-otlp = { version = "0.28", default-features = false, features = ["grpc-tonic", "tls-roots", "trace"] }
opentelemetry_sdk = { version = "0.28", features = ["rt-tokio"] }
opentelemetry_sdk = { version = "0.28", features = ["rt-tokio", "testing"] }
rand = { workspace = true }
serde = { version = "1.0", features = ["derive"] }
thiserror = { workspace = true }
Expand All @@ -35,6 +35,7 @@ tracing = { workspace = true }
tracing-forest = { version = "0.1", optional = true, features = ["chrono"] }
tracing-opentelemetry = { version = "0.29" }
tracing-subscriber = { workspace = true }

# Optional dependencies enabled by `vergen` feature.
# This must match the version expected by `vergen-gitcl`.
vergen = { "version" = "9.0", optional = true }
Expand Down
24 changes: 15 additions & 9 deletions crates/utils/src/logging.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ use std::str::FromStr;
use anyhow::Result;
use opentelemetry::trace::TracerProvider as _;
use opentelemetry_otlp::WithTonicConfig;
use opentelemetry_sdk::propagation::TraceContextPropagator;
use opentelemetry_sdk::{propagation::TraceContextPropagator, trace::SpanExporter};
use tracing::subscriber::Subscriber;
use tracing_opentelemetry::OpenTelemetryLayer;
use tracing_subscriber::{
Expand Down Expand Up @@ -39,25 +39,31 @@ pub fn setup_tracing(otel: OpenTelemetry) -> Result<()> {
// Note: open-telemetry requires a tokio-runtime, so this _must_ be lazily evaluated (aka not
// `then_some`) to avoid crashing sync callers (with OpenTelemetry::Disabled set). Examples of
// such callers are tests with logging enabled.
let otel_layer = otel.is_enabled().then(open_telemetry_layer);
let otel_layer = {
if otel.is_enabled() {
let exporter = opentelemetry_otlp::SpanExporter::builder()
.with_tonic()
.with_tls_config(tonic::transport::ClientTlsConfig::new().with_native_roots())
.build()?;
Some(open_telemetry_layer(exporter))
} else {
None
}
};

let subscriber = Registry::default()
.with(stdout_layer().with_filter(env_or_default_filter()))
.with(otel_layer.with_filter(env_or_default_filter()));
tracing::subscriber::set_global_default(subscriber).map_err(Into::into)
}

fn open_telemetry_layer<S>() -> Box<dyn tracing_subscriber::Layer<S> + Send + Sync + 'static>
fn open_telemetry_layer<S>(
exporter: impl SpanExporter + 'static,
) -> Box<dyn tracing_subscriber::Layer<S> + Send + Sync + 'static>
where
S: Subscriber + Sync + Send,
for<'a> S: tracing_subscriber::registry::LookupSpan<'a>,
{
let exporter = opentelemetry_otlp::SpanExporter::builder()
.with_tonic()
.with_tls_config(tonic::transport::ClientTlsConfig::new().with_native_roots())
.build()
.unwrap();

let tracer = opentelemetry_sdk::trace::SdkTracerProvider::builder()
.with_batch_exporter(exporter)
.build();
Expand Down
81 changes: 58 additions & 23 deletions crates/utils/src/tracing/grpc.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,25 @@
use tracing_opentelemetry::OpenTelemetrySpanExt;
/// Creates a [`tracing::Span`] based on RPC service and method name.
macro_rules! rpc_span {
($service:expr, $method:expr) => {
tracing::info_span!(
concat!($service, "/", $method),
rpc.service = $service,
rpc.method = $method
)
};
}

/// A [`trace_fn`](tonic::transport::server::Server) implementation for the block producer which
/// adds open-telemetry information to the span.
///
/// Creates an `info` span following the open-telemetry standard: `block-producer.rpc/{method}`.
/// Additionally also pulls in remote tracing context which allows the server trace to be connected
/// to the client's origin trace.
pub fn block_producer_trace_fn(request: &http::Request<()>) -> tracing::Span {
pub fn block_producer_trace_fn<T>(request: &http::Request<T>) -> tracing::Span {
let span = if let Some("SubmitProvenTransaction") = request.uri().path().rsplit('/').next() {
tracing::info_span!("block-producer.rpc/SubmitProvenTransaction")
rpc_span!("block-producer.rpc", "SubmitProvenTransaction")
} else {
tracing::info_span!("block-producer.rpc/Unknown")
rpc_span!("block-producer.rpc", "Unknown")
};

add_otel_span_attributes(span, request)
Expand All @@ -22,23 +31,23 @@ pub fn block_producer_trace_fn(request: &http::Request<()>) -> tracing::Span {
/// Creates an `info` span following the open-telemetry standard: `store.rpc/{method}`. Additionally
/// also pulls in remote tracing context which allows the server trace to be connected to the
/// client's origin trace.
pub fn store_trace_fn(request: &http::Request<()>) -> tracing::Span {
pub fn store_trace_fn<T>(request: &http::Request<T>) -> tracing::Span {
let span = match request.uri().path().rsplit('/').next() {
Some("ApplyBlock") => tracing::info_span!("store.rpc/ApplyBlock"),
Some("CheckNullifiers") => tracing::info_span!("store.rpc/CheckNullifiers"),
Some("CheckNullifiersByPrefix") => tracing::info_span!("store.rpc/CheckNullifiersByPrefix"),
Some("GetAccountDetails") => tracing::info_span!("store.rpc/GetAccountDetails"),
Some("GetAccountProofs") => tracing::info_span!("store.rpc/GetAccountProofs"),
Some("GetAccountStateDelta") => tracing::info_span!("store.rpc/GetAccountStateDelta"),
Some("GetBlockByNumber") => tracing::info_span!("store.rpc/GetBlockByNumber"),
Some("GetBlockHeaderByNumber") => tracing::info_span!("store.rpc/GetBlockHeaderByNumber"),
Some("GetBlockInputs") => tracing::info_span!("store.rpc/GetBlockInputs"),
Some("GetBatchInputs") => tracing::info_span!("store.rpc/GetBatchInputs"),
Some("GetNotesById") => tracing::info_span!("store.rpc/GetNotesById"),
Some("GetTransactionInputs") => tracing::info_span!("store.rpc/GetTransactionInputs"),
Some("SyncNotes") => tracing::info_span!("store.rpc/SyncNotes"),
Some("SyncState") => tracing::info_span!("store.rpc/SyncState"),
_ => tracing::info_span!("store.rpc/Unknown"),
Some("ApplyBlock") => rpc_span!("store.rpc", "ApplyBlock"),
Some("CheckNullifiers") => rpc_span!("store.rpc", "CheckNullifiers"),
Some("CheckNullifiersByPrefix") => rpc_span!("store.rpc", "CheckNullifiersByPrefix"),
Some("GetAccountDetails") => rpc_span!("store.rpc", "GetAccountDetails"),
Some("GetAccountProofs") => rpc_span!("store.rpc", "GetAccountProofs"),
Some("GetAccountStateDelta") => rpc_span!("store.rpc", "GetAccountStateDelta"),
Some("GetBlockByNumber") => rpc_span!("store.rpc", "GetBlockByNumber"),
Some("GetBlockHeaderByNumber") => rpc_span!("store.rpc", "GetBlockHeaderByNumber"),
Some("GetBlockInputs") => rpc_span!("store.rpc", "GetBlockInputs"),
Some("GetBatchInputs") => rpc_span!("store.rpc", "GetBatchInputs"),
Some("GetNotesById") => rpc_span!("store.rpc", "GetNotesById"),
Some("GetTransactionInputs") => rpc_span!("store.rpc", "GetTransactionInputs"),
Some("SyncNotes") => rpc_span!("store.rpc", "SyncNotes"),
Some("SyncState") => rpc_span!("store.rpc", "SyncState"),
_ => rpc_span!("store.rpc", "Unknown"),
};

add_otel_span_attributes(span, request)
Expand All @@ -47,19 +56,44 @@ pub fn store_trace_fn(request: &http::Request<()>) -> tracing::Span {
/// Adds remote tracing context to the span.
///
/// Could be expanded in the future by adding in more open-telemetry properties.
fn add_otel_span_attributes(span: tracing::Span, request: &http::Request<()>) -> tracing::Span {
fn add_otel_span_attributes<T>(span: tracing::Span, request: &http::Request<T>) -> tracing::Span {
use super::OpenTelemetrySpanExt;
// Pull the open-telemetry parent context using the HTTP extractor. We could make a more
// generic gRPC extractor by utilising the gRPC metadata. However that
// (a) requires cloning headers,
// (b) we would have to write this ourselves, and
// (c) gRPC metadata is transferred using HTTP headers in any case.
use tracing_opentelemetry::OpenTelemetrySpanExt;
let otel_ctx = opentelemetry::global::get_text_map_propagator(|propagator| {
propagator.extract(&MetadataExtractor(&tonic::metadata::MetadataMap::from_headers(
request.headers().clone(),
)))
});
span.set_parent(otel_ctx);
tracing_opentelemetry::OpenTelemetrySpanExt::set_parent(&span, otel_ctx);

// Set HTTP attributes.
// See https://opentelemetry.io/docs/specs/semconv/rpc/rpc-spans/#server-attributes.
span.set_attribute("rpc.system", "grpc");
if let Some(host) = request.uri().host() {
span.set_attribute("server.address", host);
}
if let Some(host_port) = request.uri().port() {
span.set_attribute("server.port", host_port.as_str());
}
let remote_addr = request
.extensions()
.get::<tonic::transport::server::TcpConnectInfo>()
.and_then(tonic::transport::server::TcpConnectInfo::remote_addr);
if let Some(addr) = remote_addr {
span.set_attribute("client.address", addr.ip());
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is the one that the spec says may be the DNS name if no DNS lookup is required.

Client address - domain name if available without reverse DNS lookup; otherwise, IP address or Unix domain socket name.

I'm wondering if we shouldn't use the text_map_propagator to cheat the system a bit, and on the client side inject block-producer and rpc as an additional property.

We could then use that here, and otherwise default to the IP if its missing.

Could also be done as a follow-up PR - I imagine you might be very much over this back-and-forth :D

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could also be done as a follow-up PR - I imagine you might be very much over this back-and-forth :D

Not at all - happy to iterate on this PR as much as required.

I'm wondering if we shouldn't use the text_map_propagator to cheat the system a bit, and on the client side inject block-producer and rpc as an additional property.
We could then use that here, and otherwise default to the IP if its missing.

I'm a bit dubious as to the utility of that functionality but happy to add that to this PR if you think it is worthwhile.

Copy link
Contributor

@Mirko-von-Leipzig Mirko-von-Leipzig Mar 3, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The main reason I'm considering it is because our components can (and currently do) run on the same server instance (and same process). Which means for the inter-component comms, the client and server IP address will just be the same since they're the same for all three components.

So I think it would help having the "caller" identifiable at least. Though maybe instead we should be hosting different api endpoints for this internal communication in any case e.g. store.block-producer/get_block_inputs and store.rpc/get_state, which would do the same job and provide some additional decoupling.

I'm happy to just punt this to a separate issue instead.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ah ok yea that makes sense. Thanks for elaborating

span.set_attribute("client.port", addr.port());
span.set_attribute("network.peer.address", addr.ip());
span.set_attribute("network.peer.port", addr.port());
span.set_attribute("network.transport", "tcp");
match addr.ip() {
std::net::IpAddr::V4(_) => span.set_attribute("network.type", "ipv4"),
std::net::IpAddr::V6(_) => span.set_attribute("network.type", "ipv6"),
}
}

span
}
Expand All @@ -73,6 +107,7 @@ impl tonic::service::Interceptor for OtelInterceptor {
&mut self,
mut request: tonic::Request<()>,
) -> Result<tonic::Request<()>, tonic::Status> {
use tracing_opentelemetry::OpenTelemetrySpanExt;
let ctx = tracing::Span::current().context();
opentelemetry::global::get_text_map_propagator(|propagator| {
propagator.inject_context(&ctx, &mut MetadataInjector(request.metadata_mut()));
Expand Down
53 changes: 39 additions & 14 deletions crates/utils/src/tracing/span_ext.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
use core::time::Duration;
use std::net::IpAddr;

use miden_objects::{block::BlockNumber, Digest};
use opentelemetry::{trace::Status, Key, Value};
Expand All @@ -20,29 +21,53 @@ impl ToValue for Digest {
}
}

impl ToValue for f64 {
fn to_value(&self) -> Value {
(*self).into()
}
}

impl ToValue for BlockNumber {
fn to_value(&self) -> Value {
i64::from(self.as_u32()).into()
}
}

impl ToValue for u32 {
fn to_value(&self) -> Value {
i64::from(*self).into()
}
/// Generates `impl ToValue` blocks for types that are `ToString`.
macro_rules! impl_to_string_to_value {
($($t:ty),*) => {
$(
impl ToValue for $t {
fn to_value(&self) -> Value {
self.to_string().into()
}
}
)*
};
}
impl_to_string_to_value!(IpAddr, &str);

impl ToValue for i64 {
fn to_value(&self) -> Value {
(*self).into()
}
/// Generates `impl ToValue` blocks for integer types.
macro_rules! impl_int_to_value {
($($t:ty),*) => {
$(
impl ToValue for $t {
fn to_value(&self) -> Value {
i64::from(*self).into()
}
}
)*
};
}
impl_int_to_value!(u16, u32, i64);

/// Generates `impl ToValue` blocks for types that are `Into<Value>`.
macro_rules! impl_to_value {
($($t:ty),*) => {
$(
impl ToValue for $t {
fn to_value(&self) -> Value {
(*self).into()
}
}
)*
};
}
impl_to_value!(f64);

/// Utility functions based on [`tracing_opentelemetry::OpenTelemetrySpanExt`].
///
Expand Down
3 changes: 2 additions & 1 deletion docs/operator.md
Original file line number Diff line number Diff line change
Expand Up @@ -90,6 +90,7 @@ block_builder.build_block
┕━ mempool.revert_expired_transactions
┕━ mempool.revert_transactions
```

</details>

#### Batch building
Expand Down Expand Up @@ -143,7 +144,7 @@ The exporter can be configured using environment variables as specified in the o
> [setup guide](https://docs.honeycomb.io/send-data/opentelemetry/#using-the-honeycomb-opentelemetry-endpoint).

```sh
OTEL_EXPORTER_OTLP_ENDPOINT=api.honeycomb.io:443 \
OTEL_EXPORTER_OTLP_ENDPOINT=https://api.honeycomb.io:443 \
OTEL_EXPORTER_OTLP_HEADERS="x-honeycomb-team=your-api-key" \
miden-node start --open-telemetry node
```
Expand Down