From f87f0fc7d465f6ee9dbe72e2c6213f90f5cca264 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jorge=20Ant=C3=B3nio?= Date: Tue, 6 May 2025 09:06:32 +0100 Subject: [PATCH 01/61] feat: ppcie verifier logic (#441) * chore(deps): bump atoma-utils from `d59ea1f` to `dfe0830` Bumps [atoma-utils](https://github.com/atoma-network/atoma-node) from `d59ea1f` to `dfe0830`. - [Release notes](https://github.com/atoma-network/atoma-node/releases) - [Commits](https://github.com/atoma-network/atoma-node/compare/d59ea1fd6a26f03d56f10eff25bbba7e7c9694d5...dfe0830ecffc5429b4c131bc88b18fb8016cd5b9) --- updated-dependencies: - dependency-name: atoma-utils dependency-version: dfe0830ecffc5429b4c131bc88b18fb8016cd5b9 dependency-type: direct:production ... Signed-off-by: dependabot[bot] * chore(deps): bump atoma-p2p from `d59ea1f` to `dfe0830` Bumps [atoma-p2p](https://github.com/atoma-network/atoma-node) from `d59ea1f` to `dfe0830`. - [Release notes](https://github.com/atoma-network/atoma-node/releases) - [Commits](https://github.com/atoma-network/atoma-node/compare/d59ea1fd6a26f03d56f10eff25bbba7e7c9694d5...dfe0830ecffc5429b4c131bc88b18fb8016cd5b9) --- updated-dependencies: - dependency-name: atoma-p2p dependency-version: dfe0830ecffc5429b4c131bc88b18fb8016cd5b9 dependency-type: direct:production ... Signed-off-by: dependabot[bot] * chore(deps): bump atoma-sui from `d59ea1f` to `dfe0830` Bumps [atoma-sui](https://github.com/atoma-network/atoma-node) from `d59ea1f` to `dfe0830`. - [Release notes](https://github.com/atoma-network/atoma-node/releases) - [Commits](https://github.com/atoma-network/atoma-node/compare/d59ea1fd6a26f03d56f10eff25bbba7e7c9694d5...dfe0830ecffc5429b4c131bc88b18fb8016cd5b9) --- updated-dependencies: - dependency-name: atoma-sui dependency-version: dfe0830ecffc5429b4c131bc88b18fb8016cd5b9 dependency-type: direct:production ... Signed-off-by: dependabot[bot] * ppcie verifier logic * add tables for fiat (#442) * fix: ensure 400 is returned on cancelled streams * chore: removed unnecessary 400 response * chore: remove unnecessary comment and clone * feat: add metrics to track latency from proxy to node (#444) * feat: add metrics to track latency from proxy to node * fix: move network ticker to seperate thread * chore: added docs + removed unnecessary port * fix: remove unnecessary prepend * fix: add model names to stream cancellations (#447) * handle PR comments --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Martin Stefcek <35243812+Cifko@users.noreply.github.com> Co-authored-by: Chad Nehemiah --- Cargo.lock | 46 ++- Cargo.toml | 135 ++++---- atoma-proxy-service/src/handlers/auth.rs | 2 +- .../src/server/handlers/chat_completions.rs | 6 +- atoma-proxy/src/server/handlers/metrics.rs | 19 + atoma-proxy/src/server/streamer.rs | 9 +- atoma-state/Cargo.toml | 50 +-- atoma-state/src/errors.rs | 13 +- atoma-state/src/handlers.rs | 327 +++++++++++++++++- atoma-state/src/lib.rs | 1 + .../20250424111320_fiat_payments.sql | 18 + atoma-state/src/network.rs | 110 ++++++ atoma-state/src/state_manager.rs | 127 +++++-- 13 files changed, 700 insertions(+), 163 deletions(-) create mode 100644 atoma-state/src/migrations/20250424111320_fiat_payments.sql create mode 100644 atoma-state/src/network.rs diff --git a/Cargo.lock b/Cargo.lock index 31a23ff4..b67b6df8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -719,7 +719,7 @@ dependencies = [ [[package]] name = "atoma-p2p" version = "0.1.0" -source = "git+https://github.com/atoma-network/atoma-node.git?branch=main#d59ea1fd6a26f03d56f10eff25bbba7e7c9694d5" +source = "git+https://github.com/atoma-network/atoma-node.git?branch=main#dfe0830ecffc5429b4c131bc88b18fb8016cd5b9" dependencies = [ "blake3", "bytes", @@ -731,7 +731,6 @@ dependencies = [ "futures", "isocountry", "libp2p", - "once_cell", "opentelemetry", "rand 0.8.5", "reqwest", @@ -832,6 +831,7 @@ dependencies = [ "atoma-p2p", "atoma-sui", "atoma-utils", + "base64 0.22.1", "blake3", "chrono", "config", @@ -844,7 +844,7 @@ dependencies = [ "opentelemetry", "prometheus", "proptest", - "remote-attestation", + "remote-attestation-verifier", "reqwest", "serde", "serde_json", @@ -852,6 +852,7 @@ dependencies = [ "sqlx", "thiserror 2.0.12", "tokio", + "topology", "tracing", "url", "utoipa", @@ -861,7 +862,7 @@ dependencies = [ [[package]] name = "atoma-sui" version = "0.1.0" -source = "git+https://github.com/atoma-network/atoma-node.git?branch=main#d59ea1fd6a26f03d56f10eff25bbba7e7c9694d5" +source = "git+https://github.com/atoma-network/atoma-node.git?branch=main#dfe0830ecffc5429b4c131bc88b18fb8016cd5b9" dependencies = [ "anyhow", "config", @@ -880,7 +881,7 @@ dependencies = [ [[package]] name = "atoma-utils" version = "0.1.0" -source = "git+https://github.com/atoma-network/atoma-node.git?branch=main#d59ea1fd6a26f03d56f10eff25bbba7e7c9694d5" +source = "git+https://github.com/atoma-network/atoma-node.git?branch=main#dfe0830ecffc5429b4c131bc88b18fb8016cd5b9" dependencies = [ "aes-gcm", "anyhow", @@ -1662,7 +1663,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "117725a109d387c937a1533ce01b450cbde6b88abceea8473c4d7a85853cda3c" dependencies = [ "lazy_static", - "windows-sys 0.59.0", + "windows-sys 0.48.0", ] [[package]] @@ -1671,7 +1672,7 @@ version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fde0e0ec90c9dfb3b4b1a0891a7dcd0e2bffde2f7efed5fe7c9bb00e5bfb915e" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.48.0", ] [[package]] @@ -2136,7 +2137,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8d162beedaa69905488a8da94f5ac3edb4dd4788b732fadb7bd120b2625c1976" dependencies = [ "data-encoding", - "syn 2.0.100", + "syn 1.0.109", ] [[package]] @@ -5786,6 +5787,14 @@ version = "0.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "38bf9645c8b145698bb0b18a4637dcacbc421ea49bef2317e4fd8065a387cf21" +[[package]] +name = "nscq" +version = "0.1.0" +source = "git+https://github.com/atoma-network/nvrust?branch=main#1add98df60e9702f74d267132af0635139aeb7f6" +dependencies = [ + "libloading", +] + [[package]] name = "ntapi" version = "0.4.1" @@ -6875,7 +6884,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" dependencies = [ "anyhow", - "itertools 0.14.0", + "itertools 0.11.0", "proc-macro2", "quote", "syn 2.0.100", @@ -7269,14 +7278,14 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2b15c43186be67a4fd63bee50d0303afffcef381492ebe2c5d87f324e1b8815c" [[package]] -name = "remote-attestation" +name = "remote-attestation-verifier" version = "0.1.0" -source = "git+https://github.com/atoma-network/nvrust?branch=main#4418eccd685c75ea8a47dfa9f25b9777643ddeb4" +source = "git+https://github.com/atoma-network/nvrust?branch=main#1add98df60e9702f74d267132af0635139aeb7f6" dependencies = [ "base64 0.22.1", "hex", "jsonwebtoken", - "nvml-wrapper", + "nscq", "once_cell", "rand 0.8.5", "reqwest", @@ -9863,6 +9872,17 @@ dependencies = [ "tracing", ] +[[package]] +name = "topology" +version = "0.1.0" +source = "git+https://github.com/atoma-network/nvrust?branch=main#1add98df60e9702f74d267132af0635139aeb7f6" +dependencies = [ + "nvml-wrapper", + "once_cell", + "thiserror 2.0.12", + "tracing", +] + [[package]] name = "tower" version = "0.4.13" @@ -10713,7 +10733,7 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "cf221c93e13a30d793f7645a0e7762c55d169dbb0a49671918a2319d289b10bb" dependencies = [ - "windows-sys 0.59.0", + "windows-sys 0.48.0", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index c0cb1f02..f997df6a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -8,70 +8,71 @@ license = "Apache-2.0" version = "0.1.0" [workspace.dependencies] -anyhow = "1.0.98" -async-trait = "0.1.88" -atoma-auth = { path = "./atoma-auth" } -atoma-p2p = { git = "https://github.com/atoma-network/atoma-node.git", package = "atoma-p2p", branch = "main" } -atoma-proxy-service = { path = "./atoma-proxy-service" } -atoma-state = { path = "./atoma-state" } -atoma-sui = { git = "https://github.com/atoma-network/atoma-node.git", package = "atoma-sui", branch = "main" } -atoma-utils = { git = "https://github.com/atoma-network/atoma-node.git", package = "atoma-utils", branch = "main" } -axum = "0.8.3" -base64 = "0.22.1" -bcs = "0.1.6" -blake2 = "0.10.6" -blake3 = "1.8.2" -chrono = "=0.4.39" -clap = "4.5.37" -config = "0.14.1" -dashmap = "6.1.0" -fastcrypto = { git = "https://github.com/MystenLabs/fastcrypto", rev = "69d496c71fb37e3d22fe85e5bbfd4256d61422b9", package = "fastcrypto" } -fastcrypto-zkp = { git = "https://github.com/MystenLabs/fastcrypto", rev = "69d496c71fb37e3d22fe85e5bbfd4256d61422b9", package = "fastcrypto-zkp" } -fastrand = "2.3.0" -flume = "0.11.1" -futures = "0.3.31" -hex = "0.4.3" -hf-hub = "0.3.2" -isocountry = "0.3.2" -itertools = "0.14.0" -jsonwebtoken = "9.3.0" -mockito = "1.6.1" -opentelemetry = "0.27.1" -opentelemetry-otlp = "0.27.0" -opentelemetry_sdk = "0.27.1" -pem = "3.0.5" -prometheus = "0.13.4" -proptest = "1.6.0" -rand = "0.8.5" -regex = "1.11.1" -remote-attestation = { git = "https://github.com/atoma-network/nvrust", branch = "main" } -reqwest = "0.12.12" -rsa = "0.9.7" -sentry = { version = "0.37.0", features = [ "tracing" ] } -serde = "1.0.214" -serde_json = "1.0.140" -serde_yaml = "0.9.34" -serial_test = "3.1.1" -shared-crypto = { git = "https://github.com/mystenlabs/sui", package = "shared-crypto", tag = "testnet-v1.47.0" } -sqlx = { version = "0.8.5", features = [ "postgres", "runtime-tokio-native-tls" ] } -sui-keys = { git = "https://github.com/mystenlabs/sui", package = "sui-keys", tag = "testnet-v1.47.0" } -sui-sdk = { git = "https://github.com/mystenlabs/sui", package = "sui-sdk", tag = "testnet-v1.47.0" } -sui-sdk-types = "0.0.2" -thiserror = "2.0.12" -tokenizers = "0.21.0" -tokio = "1.44.2" -toml = "0.8.19" -tonic = "0.12" -tower = "0.5.1" -tower-http = "0.6.2" -tracing = "0.1.40" -tracing-appender = "0.2.3" -tracing-loki = "0.2.6" -tracing-opentelemetry = "0.28.0" -tracing-subscriber = "0.3.18" -url = "2.5.4" -utoipa = "5.3.1" -utoipa-swagger-ui = "9.0.1" -uuid = "1.15.1" -x25519-dalek = "2.0.1" -zeroize = "1.8.1" +anyhow = "1.0.98" +async-trait = "0.1.88" +atoma-auth = { path = "./atoma-auth" } +atoma-p2p = { git = "https://github.com/atoma-network/atoma-node.git", package = "atoma-p2p", branch = "main" } +atoma-proxy-service = { path = "./atoma-proxy-service" } +atoma-state = { path = "./atoma-state" } +atoma-sui = { git = "https://github.com/atoma-network/atoma-node.git", package = "atoma-sui", branch = "main" } +atoma-utils = { git = "https://github.com/atoma-network/atoma-node.git", package = "atoma-utils", branch = "main" } +axum = "0.8.3" +base64 = "0.22.1" +bcs = "0.1.6" +blake2 = "0.10.6" +blake3 = "1.8.2" +chrono = "=0.4.39" +clap = "4.5.37" +config = "0.14.1" +dashmap = "6.1.0" +fastcrypto = { git = "https://github.com/MystenLabs/fastcrypto", rev = "69d496c71fb37e3d22fe85e5bbfd4256d61422b9", package = "fastcrypto" } +fastcrypto-zkp = { git = "https://github.com/MystenLabs/fastcrypto", rev = "69d496c71fb37e3d22fe85e5bbfd4256d61422b9", package = "fastcrypto-zkp" } +fastrand = "2.3.0" +flume = "0.11.1" +futures = "0.3.31" +hex = "0.4.3" +hf-hub = "0.3.2" +isocountry = "0.3.2" +itertools = "0.14.0" +jsonwebtoken = "9.3.0" +mockito = "1.6.1" +opentelemetry = "0.27.1" +opentelemetry-otlp = "0.27.0" +opentelemetry_sdk = "0.27.1" +pem = "3.0.5" +prometheus = "0.13.4" +proptest = "1.6.0" +rand = "0.8.5" +regex = "1.11.1" +remote-attestation-verifier = { git = "https://github.com/atoma-network/nvrust", package = "remote-attestation-verifier", branch = "main" } +reqwest = "0.12.12" +rsa = "0.9.7" +sentry = { version = "0.37.0", features = [ "tracing" ] } +serde = "1.0.214" +serde_json = "1.0.140" +serde_yaml = "0.9.34" +serial_test = "3.1.1" +shared-crypto = { git = "https://github.com/mystenlabs/sui", package = "shared-crypto", tag = "testnet-v1.47.0" } +sqlx = { version = "0.8.5", features = [ "postgres", "runtime-tokio-native-tls" ] } +sui-keys = { git = "https://github.com/mystenlabs/sui", package = "sui-keys", tag = "testnet-v1.47.0" } +sui-sdk = { git = "https://github.com/mystenlabs/sui", package = "sui-sdk", tag = "testnet-v1.47.0" } +sui-sdk-types = "0.0.2" +thiserror = "2.0.12" +tokenizers = "0.21.0" +tokio = "1.44.2" +toml = "0.8.19" +tonic = "0.12" +topology = { git = "https://github.com/atoma-network/nvrust", package = "topology", branch = "main" } +tower = "0.5.1" +tower-http = "0.6.2" +tracing = "0.1.40" +tracing-appender = "0.2.3" +tracing-loki = "0.2.6" +tracing-opentelemetry = "0.28.0" +tracing-subscriber = "0.3.18" +url = "2.5.4" +utoipa = "5.3.1" +utoipa-swagger-ui = "9.0.1" +uuid = "1.15.1" +x25519-dalek = "2.0.1" +zeroize = "1.8.1" diff --git a/atoma-proxy-service/src/handlers/auth.rs b/atoma-proxy-service/src/handlers/auth.rs index c9b89548..73d44b08 100644 --- a/atoma-proxy-service/src/handlers/auth.rs +++ b/atoma-proxy-service/src/handlers/auth.rs @@ -586,7 +586,7 @@ pub async fn get_balance( Ok(Json( proxy_service_state .atoma_state - .get_balance_for_user(user_id) + .get_crypto_balance_for_user(user_id) .await .map_err(|e| { error!("Failed to get balance: {:?}", e); diff --git a/atoma-proxy/src/server/handlers/chat_completions.rs b/atoma-proxy/src/server/handlers/chat_completions.rs index 0f7893d9..501359f2 100644 --- a/atoma-proxy/src/server/handlers/chat_completions.rs +++ b/atoma-proxy/src/server/handlers/chat_completions.rs @@ -49,7 +49,8 @@ use super::metrics::{ CHAT_COMPLETIONS_INPUT_TOKENS, CHAT_COMPLETIONS_INPUT_TOKENS_PER_USER, CHAT_COMPLETIONS_LATENCY_METRICS, CHAT_COMPLETIONS_NUM_REQUESTS, CHAT_COMPLETIONS_TOTAL_TOKENS, CHAT_COMPLETIONS_TOTAL_TOKENS_PER_USER, CHAT_COMPLETION_REQUESTS_PER_USER, - TOTAL_COMPLETED_REQUESTS, TOTAL_FAILED_CHAT_REQUESTS, TOTAL_FAILED_REQUESTS, + INTENTIONALLY_CANCELLED_CHAT_COMPLETION_STREAMING_REQUESTS, TOTAL_COMPLETED_REQUESTS, + TOTAL_FAILED_CHAT_REQUESTS, TOTAL_FAILED_REQUESTS, UNSUCCESSFUL_CHAT_COMPLETION_REQUESTS_PER_USER, }; use super::request_model::{ComputeUnitsEstimate, RequestModel}; @@ -174,7 +175,6 @@ pub async fn chat_completions_create( ) -> Result> { let endpoint = metadata.endpoint.clone(); tokio::spawn(async move { - // TODO: We should allow cancelling the request if the client disconnects let is_streaming = payload .get(STREAM) .and_then(serde_json::Value::as_bool) @@ -1074,6 +1074,7 @@ async fn handle_streaming_response( } } Ok(()) = kill_signal_receiver.recv_async() => { + INTENTIONALLY_CANCELLED_CHAT_COMPLETION_STREAMING_REQUESTS.add(1, &[KeyValue::new("user_id", user_id)]); tracing::info!(target = "atoma-service-streamer", "Received kill signal, stopping streamer"); let stop_response = client_clone .post(format!("{node_address_clone}{STOP_STREAMER_PATH}")) @@ -1238,7 +1239,6 @@ impl RequestModel for RequestModelChatCompletions { MessageContentPart::Image { .. } => { // TODO: Ensure that for image content parts, we have a way to estimate the number of tokens, // which can depend on the size of the image and the output description. - continue; } } } diff --git a/atoma-proxy/src/server/handlers/metrics.rs b/atoma-proxy/src/server/handlers/metrics.rs index 63349036..43d14d21 100644 --- a/atoma-proxy/src/server/handlers/metrics.rs +++ b/atoma-proxy/src/server/handlers/metrics.rs @@ -110,6 +110,24 @@ pub static CHAT_COMPLETIONS_STREAMING_LATENCY_METRICS: LazyLock> .build() }); +/// Counter metric that tracks the total number of intentionally cancelled chat completion streaming requests. +/// +/// # Metric Details +/// - Name: `atoma_intentionally_cancelled_chat_completion_streaming_requests` +/// - Type: Counter +/// - Labels: `model` +/// - Unit: requests (count) +pub static INTENTIONALLY_CANCELLED_CHAT_COMPLETION_STREAMING_REQUESTS: LazyLock> = + LazyLock::new(|| { + GLOBAL_METER + .u64_counter("atoma_intentionally_cancelled_chat_completion_streaming_requests") + .with_description( + "The number of intentionally cancelled chat completion streaming requests", + ) + .with_unit("requests") + .build() + }); + /// Histogram metric that tracks the latency of image generation requests. /// /// This metric measures the time taken to generate images, broken down by model type. @@ -520,6 +538,7 @@ pub static CHAT_COMPLETIONS_COMPLETIONS_TOKENS_PER_USER: LazyLock> /// - Name: `atoma_cancelled_stream_chat_completion_requests_per_user` /// - Type: Counter /// - Labels: `user_id` +/// - Model `model` /// - Unit: requests (count) pub static CANCELLED_STREAM_CHAT_COMPLETION_REQUESTS_PER_USER: LazyLock> = LazyLock::new(|| { diff --git a/atoma-proxy/src/server/streamer.rs b/atoma-proxy/src/server/streamer.rs index 0c4221a8..94d258c8 100644 --- a/atoma-proxy/src/server/streamer.rs +++ b/atoma-proxy/src/server/streamer.rs @@ -555,8 +555,13 @@ impl Drop for Streamer { CHAT_COMPLETION_REQUESTS_PER_USER.add(1, &[KeyValue::new("user_id", self.user_id)]); return; } - CANCELLED_STREAM_CHAT_COMPLETION_REQUESTS_PER_USER - .add(1, &[KeyValue::new("user_id", self.user_id)]); + CANCELLED_STREAM_CHAT_COMPLETION_REQUESTS_PER_USER.add( + 1, + &[ + KeyValue::new("user_id", self.user_id), + KeyValue::new("model", self.model_name.clone()), + ], + ); match self.stack_small_id { Some(stack_small_id) => { if let Err(e) = update_state_manager( diff --git a/atoma-state/Cargo.toml b/atoma-state/Cargo.toml index 7a99f05a..cbf4fa0d 100644 --- a/atoma-state/Cargo.toml +++ b/atoma-state/Cargo.toml @@ -5,30 +5,32 @@ name = "atoma-state" version.workspace = true [dependencies] -atoma-p2p = { workspace = true } -atoma-sui = { workspace = true } -atoma-utils = { workspace = true } -blake3 = { workspace = true } -chrono.workspace = true -config = { workspace = true } -fastrand = { workspace = true } -flume = { workspace = true } -futures = { workspace = true } -hex = { workspace = true } -isocountry = { workspace = true } -mockito = { workspace = true } -opentelemetry = { workspace = true } -prometheus = { workspace = true } -remote-attestation = { workspace = true } -reqwest = { workspace = true } -serde = { workspace = true, features = [ "derive" ] } -serde_json = { workspace = true } -sqlx = { workspace = true, features = [ "chrono", "runtime-tokio-native-tls", "sqlite" ] } -thiserror = { workspace = true } -tokio = { workspace = true, features = [ "full" ] } -tracing = { workspace = true } -url = { workspace = true } -utoipa = { workspace = true } +atoma-p2p = { workspace = true } +atoma-sui = { workspace = true } +atoma-utils = { workspace = true } +base64 = { workspace = true } +blake3 = { workspace = true } +chrono.workspace = true +config = { workspace = true } +fastrand = { workspace = true } +flume = { workspace = true } +futures = { workspace = true } +hex = { workspace = true } +isocountry = { workspace = true } +mockito = { workspace = true } +opentelemetry = { workspace = true } +prometheus = { workspace = true } +remote-attestation-verifier = { workspace = true } +reqwest = { workspace = true } +serde = { workspace = true, features = [ "derive" ] } +serde_json = { workspace = true } +sqlx = { workspace = true, features = [ "chrono", "runtime-tokio-native-tls", "sqlite" ] } +thiserror = { workspace = true } +tokio = { workspace = true, features = [ "full" ] } +topology = { workspace = true } +tracing = { workspace = true } +url = { workspace = true } +utoipa = { workspace = true } [dev-dependencies] futures = { workspace = true } diff --git a/atoma-state/src/errors.rs b/atoma-state/src/errors.rs index 8aa8f0b3..4e355ce6 100644 --- a/atoma-state/src/errors.rs +++ b/atoma-state/src/errors.rs @@ -1,5 +1,6 @@ use atoma_utils::compression::CompressionError; use thiserror::Error; +use topology::error::NvidiaRemoteAttestationError; use tracing::error; #[derive(Debug, thiserror::Error)] @@ -100,7 +101,17 @@ pub enum AtomaStateManagerError { #[derive(Error, Debug)] pub enum AtomaStateRemoteAttestationError { #[error("Failed to attest remote: {0}")] - FailedToAttestRemote(#[from] remote_attestation::AttestError), + FailedToAttestRemote(#[from] remote_attestation_verifier::AttestError), #[error("Failed to retrieve contract nonce")] FailedToRetrieveContractNonce, + #[error("Failed to decode {evidence_type} evidence data")] + FailedToDecode { + evidence_type: String, + error: Box, + }, + #[error("{check_type} topology check failed")] + TopologyCheckFailed { + check_type: String, + error: Box, + }, } diff --git a/atoma-state/src/handlers.rs b/atoma-state/src/handlers.rs index f1b8e5df..d9f08140 100644 --- a/atoma-state/src/handlers.rs +++ b/atoma-state/src/handlers.rs @@ -8,7 +8,7 @@ use atoma_sui::events::{ }; use atoma_utils::compression::decompress_bytes; use chrono::{DateTime, Utc}; -use remote_attestation::DeviceEvidence; +use remote_attestation_verification::CombinedEvidence; use tokio::sync::oneshot; use tracing::{error, info, instrument, trace}; @@ -1388,7 +1388,10 @@ pub async fn handle_state_manager_event( .map_err(|_| AtomaStateManagerError::ChannelSendError)?; } AtomaAtomaStateManagerEvent::TopUpBalance { user_id, amount } => { - state_manager.state.top_up_balance(user_id, amount).await?; + state_manager + .state + .top_up_crypto_balance(user_id, amount) + .await?; } AtomaAtomaStateManagerEvent::DeductFromUsdc { user_id, @@ -1570,16 +1573,69 @@ pub async fn handle_node_key_rotation_event( device_type, evidence_bytes, } = event; + let original_evidence_bytes = decompress_bytes(&evidence_bytes)?; - let evidence_data = serde_json::from_slice::>(&original_evidence_bytes)?; - let is_valid = remote_attestation_verification::attest_nvidia_evidence_list( - state_manager, - &evidence_data, - &new_public_key, - device_type, - ) - .await - .is_ok(); + let evidence_data = serde_json::from_slice::>(&original_evidence_bytes)?; + + let (gpu_evidence_data, nvswitch_evidence_data): (Vec<_>, Vec<_>) = evidence_data + .into_iter() + .partition(|evidence| matches!(evidence, CombinedEvidence::Device(_))); + + let gpu_evidence_data = gpu_evidence_data + .into_iter() + .filter_map(|e| { + if let CombinedEvidence::Device(evidence) = e { + Some(evidence) + } else { + None + } + }) + .collect::>(); + + let nvswitch_evidence_data = nvswitch_evidence_data + .into_iter() + .filter_map(|e| { + if let CombinedEvidence::NvSwitch(evidence) = e { + Some(evidence) + } else { + None + } + }) + .collect::>(); + + // Check if ppcie mode is enabled for current attestation + let is_ppcie_mode = gpu_evidence_data.len() == 8 && nvswitch_evidence_data.len() == 4; + + let is_valid = if is_ppcie_mode { + remote_attestation_verification::attest_nvidia_gpu_evidence_list( + state_manager, + &gpu_evidence_data, + &new_public_key, + device_type, + ) + .await?; + remote_attestation_verification::attest_nvidia_nvswitch_evidence_list( + state_manager, + &nvswitch_evidence_data, + &new_public_key, + device_type, + ) + .await?; + remote_attestation_verification::verify_topology( + &gpu_evidence_data, + &nvswitch_evidence_data, + )?; + true + } else { + remote_attestation_verification::attest_nvidia_gpu_evidence_list( + state_manager, + &gpu_evidence_data, + &new_public_key, + device_type, + ) + .await + .is_ok() + }; state_manager .state .update_node_public_key( @@ -1746,20 +1802,61 @@ pub(crate) async fn handle_node_small_id_ownership_verification_event( pub mod remote_attestation_verification { use crate::{errors::AtomaStateRemoteAttestationError, AtomaStateManager}; - use remote_attestation::{attest_remote, AttestError, DeviceEvidence}; + use base64::{engine::general_purpose::STANDARD, Engine}; + use remote_attestation_verifier::{ + remote_gpu_attestation::AttestRemoteOptions, verify_gpu_attestation, + verify_nvswitch_attestation, AttestError, DeviceEvidence, NvSwitchEvidence, + }; + use serde::{Deserialize, Serialize}; use tracing::instrument; type Result = std::result::Result; + /// Combined evidence from a device and an NVSwitch + /// + /// This enum represents the evidence from a device and an NVSwitch, which is used to verify the integrity and authenticity of the GPU hardware and its execution environment. + #[derive(Debug, Serialize, Deserialize)] + #[serde(tag = "evidence_type")] + pub enum CombinedEvidence { + /// Evidence from a device + #[serde(rename = "device")] + Device(DeviceEvidence), + + /// Evidence from an NVSwitch + #[serde(rename = "nvswitch")] + NvSwitch(NvSwitchEvidence), + } + + /// Attests the NVIDIA GPU evidence list + /// + /// This function attests the NVIDIA GPU evidence list by verifying the evidence data and the topology. + /// + /// # Arguments + /// + /// * `state_manager` - A reference to the `AtomaStateManager` that provides access to the state database + /// * `evidence_data` - A reference to the GPU evidence data + /// * `new_public_key` - The new public key + /// * `device_type` - The device type + /// + /// # Returns + /// + /// * `Result<()>` - Ok(()) if the attestation is successful, or an error if the operation failed + /// + /// # Errors + /// + /// This function will return an error if: + /// * The GPU evidence data cannot be decoded + /// * The topology check fails #[instrument( level = "info", skip_all, fields( device_type = device_type, new_public_key = hex::encode(new_public_key), - ) + ), + err )] - pub async fn attest_nvidia_evidence_list( + pub async fn attest_nvidia_gpu_evidence_list( state_manager: &AtomaStateManager, evidence_data: &[DeviceEvidence], new_public_key: &[u8], @@ -1787,10 +1884,10 @@ pub mod remote_attestation_verification { device_type = device_type, "Attesting NVIDIA evidence list, with should_be_nonce: {should_be_nonce_hex}" ); - let result = match attest_remote( + let result = match verify_gpu_attestation( evidence_data, &should_be_nonce_hex, - attest_remote::AttestRemoteOptions::default(), + AttestRemoteOptions::default(), ) .await { @@ -1827,4 +1924,202 @@ pub mod remote_attestation_verification { )) } } + + /// Attests the NVIDIA NVSwitch evidence list + /// + /// This function attests the NVIDIA NVSwitch evidence list by verifying the evidence data and the topology. + /// + /// # Arguments + /// + /// * `state_manager` - A reference to the `AtomaStateManager` that provides access to the state database + /// * `nvswitch_evidence_data` - A reference to the NVSwitch evidence data + /// * `new_public_key` - The new public key + /// * `device_type` - The device type + /// + /// # Returns + /// + /// * `Result<()>` - Ok(()) if the attestation is successful, or an error if the operation failed + /// + /// # Errors + /// + /// This function will return an error if: + /// * The NVSwitch evidence data cannot be decoded + /// * The topology check fails + #[instrument( + level = "info", + skip_all, + fields( + device_type = device_type, + new_public_key = hex::encode(new_public_key), + ), + err, + )] + pub async fn attest_nvidia_nvswitch_evidence_list( + state_manager: &AtomaStateManager, + nvswitch_evidence_data: &[NvSwitchEvidence], + new_public_key: &[u8], + device_type: u16, + ) -> Result<()> { + let contract_nonce = state_manager + .state + .get_contract_key_rotation_nonce() + .await + .map_err(|_| AtomaStateRemoteAttestationError::FailedToRetrieveContractNonce)?; + let should_be_nonce = blake3::hash( + &[ + &contract_nonce.to_le_bytes()[..], + new_public_key, + &device_type.to_le_bytes()[..], + ] + .concat(), + ); + let should_be_nonce_hex = hex::encode(should_be_nonce.as_bytes()); + let (result, _) = verify_nvswitch_attestation( + nvswitch_evidence_data, + &should_be_nonce_hex, + AttestRemoteOptions::default(), + ).await.map_err(|e| { + tracing::error!( + target = "atoma-state-handlers", + event = "attest-nvidia-evidence-list", + "NVSwitch attestation verification failed for device type: {device_type} and public key: {}, with error: {e}", + hex::encode(new_public_key), + ); + AtomaStateRemoteAttestationError::FailedToAttestRemote( + AttestError::RemoteAttestationFailed, + ) + })?; + if result { + tracing::info!( + target = "atoma-state-handlers", + event = "attest-nvidia-evidence-list", + "Attestation successful for device type: {device_type} and public key: {}", + hex::encode(new_public_key), + ); + Ok(()) + } else { + tracing::error!( + target = "atoma-state-handlers", + event = "attest-nvidia-evidence-list", + "Attestation failed for device type: {device_type} and public key: {}", + hex::encode(new_public_key), + ); + Err(AtomaStateRemoteAttestationError::FailedToAttestRemote( + AttestError::RemoteAttestationFailed, + )) + } + } + + /// Verifies the topology of the GPU and NVSwitch evidence data + /// + /// This function verifies the topology of the GPU and NVSwitch evidence data by decoding the evidence data and checking the topology. + /// + /// # Arguments + /// + /// * `gpu_evidence_data` - A reference to the GPU evidence data + /// * `nvswitch_evidence_data` - A reference to the NVSwitch evidence data + /// + /// # Returns + /// + /// * `Result<()>` - Ok(()) if the topology is valid, or an error if the operation failed + /// + /// # Errors + /// + /// This function will return an error if: + /// * The GPU evidence data cannot be decoded + /// * The NVSwitch evidence data cannot be decoded + /// * The topology check fails + #[instrument(level = "info", skip_all, fields(topology_check = "true",), err)] + pub fn verify_topology( + gpu_evidence_data: &[DeviceEvidence], + nvswitch_evidence_data: &[NvSwitchEvidence], + ) -> Result<()> { + let gpu_evidence_data_refs = decode_evidence( + &gpu_evidence_data + .iter() + .map(|e| &e.evidence) + .collect::>(), + "GPU", + )?; + let nvswitch_evidence_data_refs = decode_evidence( + &nvswitch_evidence_data + .iter() + .map(|e| &e.evidence) + .collect::>(), + "NVSwitch", + )?; + let unique_switch_pdis_set = topology::topology::gpu_topology_check( + &gpu_evidence_data_refs + .iter() + .map(Vec::as_slice) + .collect::>(), + ) + .map_err(|e| { + tracing::error!( + target = "atoma-state-handlers", + event = "verify-topology", + "Failed to check GPU topology: {e}", + ); + AtomaStateRemoteAttestationError::TopologyCheckFailed { + check_type: "GPU topology check".to_string(), + error: Box::new(e), + } + })?; + topology::topology::switch_topology_check( + &nvswitch_evidence_data_refs + .iter() + .map(Vec::as_slice) + .collect::>(), + gpu_evidence_data.len(), + unique_switch_pdis_set, + ) + .map_err(|e| { + tracing::error!( + target = "atoma-state-handlers", + event = "verify-topology", + "Failed to check NVSwitch topology: {e}", + ); + AtomaStateRemoteAttestationError::TopologyCheckFailed { + check_type: "NVSwitch topology check".to_string(), + error: Box::new(e), + } + })?; + + tracing::info!( + target = "atoma-state-handlers", + event = "verify-topology", + "Topology verification completed successfully", + ); + Ok(()) + } + + /// Decodes the evidence data from base64 to a vector of bytes + /// + /// This function decodes the evidence data from base64 to a vector of bytes. + /// + /// # Arguments + /// + /// * `evidence_list` - A reference to the evidence data + /// * `evidence_type` - The type of evidence + /// + /// # Returns + /// + /// * `Result>>` - Ok(()) if the evidence data is decoded successfully, or an error if the operation failed + fn decode_evidence>( + evidence_list: &[T], + evidence_type: &str, + ) -> Result>> { + evidence_list + .iter() + .map(|evidence| { + STANDARD.decode(evidence.as_ref()).map_err(|e| { + tracing::error!("Failed to decode {} evidence data: {}", evidence_type, e); + AtomaStateRemoteAttestationError::FailedToDecode { + evidence_type: evidence_type.to_string(), + error: Box::new(e), + } + }) + }) + .collect() + } } diff --git a/atoma-state/src/lib.rs b/atoma-state/src/lib.rs index 02986826..a919d749 100644 --- a/atoma-state/src/lib.rs +++ b/atoma-state/src/lib.rs @@ -7,6 +7,7 @@ pub mod config; pub mod errors; pub mod handlers; pub mod metrics; +pub mod network; pub mod state_manager; #[cfg(test)] pub mod tests; diff --git a/atoma-state/src/migrations/20250424111320_fiat_payments.sql b/atoma-state/src/migrations/20250424111320_fiat_payments.sql new file mode 100644 index 00000000..08c0344d --- /dev/null +++ b/atoma-state/src/migrations/20250424111320_fiat_payments.sql @@ -0,0 +1,18 @@ +CREATE TABLE + IF NOT EXISTS fiat_balance ( + user_id BIGINT NOT NULL PRIMARY KEY, + usd_balance BIGINT NOT NULL DEFAULT 0, + already_debited_amount BIGINT NOT NULL DEFAULT 0, + overcharged_unsettled_amount BIGINT NOT NULL DEFAULT 0, + num_requests BIGINT NOT NULL DEFAULT 0 + ); + +CREATE TABLE + IF NOT EXISTS usage_per_model ( + user_id BIGINT NOT NULL, + model TEXT NOT NULL, + total_number_processed_tokens BIGINT NOT NULL DEFAULT 0, + PRIMARY KEY (user_id, model) + ); + + ALTER TABLE IF EXISTS balance RENAME TO crypto_balances; diff --git a/atoma-state/src/network.rs b/atoma-state/src/network.rs new file mode 100644 index 00000000..b4a1fb60 --- /dev/null +++ b/atoma-state/src/network.rs @@ -0,0 +1,110 @@ +use opentelemetry::{ + global, + metrics::{Counter, Histogram, Meter}, + KeyValue, +}; +use reqwest::Client; +use std::sync::LazyLock; +use std::time::{Duration, Instant}; +use tracing::{debug, error, instrument, warn}; + +static GLOBAL_METER: LazyLock = LazyLock::new(|| global::meter("atoma-proxy")); + +/// Histogram metric that tracks the latency of node connections from the proxy to the node. +/// +/// This metric tracks the latency of node connections from the proxy to the node. +/// +/// # Metric Details +/// - Name: `atoma_node_connection_latency` +/// - Type: Histogram +/// - Labels: `node_ip_address` +/// - Unit: seconds (s) +/// +pub static NODE_CONNECTION_LATENCY: LazyLock> = LazyLock::new(|| { + GLOBAL_METER + .f64_histogram("atoma_node_connection_latency") + .with_description("The latency of node connections from the proxy to the node") + .with_unit("s") + .build() +}); + +/// Counter metric that tracks the number of failed node connections from the proxy to the node. +/// +/// This metric tracks the number of failed node connections from the proxy to the node. +/// +/// # Metric Details +/// - Name: `atoma_failed_node_connections` +/// - Type: Counter +/// - Labels: `node_ip_address` +/// - Unit: count +/// +pub static FAILED_NODE_CONNECTIONS: LazyLock> = LazyLock::new(|| { + GLOBAL_METER + .u64_counter("atoma_failed_node_connections") + .with_description("The number of failed node connections from the proxy to the node") + .with_unit("s") + .build() +}); + +#[derive(Default)] +pub struct NetworkMetrics { + client: Client, +} + +impl NetworkMetrics { + #[must_use] + pub fn new() -> Self { + Self { + client: Client::new(), + } + } + #[instrument(level = "trace", name = "update_metrics", skip(self))] + pub async fn update_metrics(&mut self, node_addresses: Vec) { + for address in node_addresses { + let url = format!("{address}/health"); + let start = Instant::now(); + match self + .client + .get(&url) + .timeout(Duration::from_secs(5)) + .send() + .await + { + Ok(response) => { + let latency = start.elapsed(); + if response.status().is_success() { + NODE_CONNECTION_LATENCY.record( + latency.as_secs_f64(), + &[KeyValue::new("node_ip_address", address.clone())], + ); + debug!( + target = "network_metrics", + node_address = %address, + latency_ms = latency.as_millis(), + "Successfully pinged node" + ); + } else { + FAILED_NODE_CONNECTIONS + .add(1, &[KeyValue::new("node_ip_address", address.clone())]); + warn!( + target = "network_metrics", + node_address = %address, + status = %response.status(), + "Node health check returned non-success status" + ); + } + } + Err(e) => { + FAILED_NODE_CONNECTIONS + .add(1, &[KeyValue::new("node_ip_address", address.clone())]); + error!( + target = "network_metrics", + node_address = %address, + error = %e, + "Failed to ping node" + ); + } + } + } + } +} diff --git a/atoma-state/src/state_manager.rs b/atoma-state/src/state_manager.rs index c9cd4730..5c926c49 100644 --- a/atoma-state/src/state_manager.rs +++ b/atoma-state/src/state_manager.rs @@ -1,6 +1,7 @@ use std::time::Duration; use crate::handlers::{handle_atoma_event, handle_p2p_event, handle_state_manager_event}; +use crate::network::NetworkMetrics; use crate::types::{ AtomaAtomaStateManagerEvent, CheapestNode, ComputedUnitsProcessedResponse, LatencyResponse, NodeDistribution, NodePublicKey, NodeSubscription, Stack, StackAttestationDispute, @@ -135,8 +136,38 @@ impl AtomaStateManager { /// ``` #[instrument(level = "trace", skip_all)] pub async fn run(self, mut shutdown_signal: Receiver) -> Result<()> { + let mut network_metrics = NetworkMetrics::new(); + let interval = std::time::Duration::from_secs(15); + + // Create a channel for interval-based updates + let (interval_tx, interval_rx) = flume::unbounded(); + + // Spawn a task that sends a message every interval + tokio::spawn(async move { + loop { + tokio::time::sleep(interval).await; + if interval_tx.send(()).is_err() { + break; + } + } + }); + loop { tokio::select! { + _ = interval_rx.recv_async() => { + match self.state.retrieve_node_public_addresses().await { + Ok(node_addresses) => { + network_metrics.update_metrics(node_addresses).await; + } + Err(e) => { + tracing::error!( + target = "network_metrics", + error = %e, + "Failed to retrieve node addresses" + ); + } + } + } atoma_event = self.event_subscriber_receiver.recv_async() => { match atoma_event { Ok(atoma_event) => { @@ -152,7 +183,6 @@ impl AtomaStateManager { error = %e, "Error handling Atoma event" ); - continue; } } Err(e) => { @@ -176,7 +206,6 @@ impl AtomaStateManager { error = %e, "Error handling state manager event" ); - continue; } } Err(e) => { @@ -189,7 +218,6 @@ impl AtomaStateManager { // NOTE: We continue the loop, as the inference service might be shutting down, // but we want to keep the state manager running // for event synchronization with the Atoma Network protocol. - continue; } } } @@ -208,7 +236,6 @@ impl AtomaStateManager { // NOTE: We continue the loop, as the inference service might be shutting down, // but we want to keep the state manager running // for event synchronization with the Atoma Network protocol. - continue; } } } @@ -422,13 +449,13 @@ impl AtomaState { let stack = sqlx::query( " WITH selected_stack AS ( - SELECT stack_small_id + SELECT stack_small_id FROM stacks - WHERE task_small_id = $1 - AND num_compute_units - already_computed_units - locked_compute_units >= $2 - AND user_id = $3 - AND is_claimed = false - AND is_locked = false + WHERE task_small_id = $1 + AND num_compute_units - already_computed_units - locked_compute_units >= $2 + AND user_id = $3 + AND is_claimed = false + AND is_locked = false AND in_settle_period = false LIMIT 1 FOR UPDATE @@ -782,7 +809,7 @@ impl AtomaState { GROUP BY npk.node_small_id, npk.public_key HAVING bool_and(npk.is_valid) = true ) - SELECT node_small_id, public_key + SELECT node_small_id, public_key FROM valid_node " ) @@ -2999,6 +3026,32 @@ impl AtomaState { Ok(()) } + /// Retrieves all node public addresses from the database. + /// + /// This method fetches all public addresses of nodes from the `nodes` table. + /// + /// # Returns + /// + /// - `Result>`: A result containing either: + /// - `Ok(Vec)`: A vector of all node public addresses. + /// - `Err(AtomaStateManagerError)`: An error if the database query fails. + /// + /// # Errors + /// + /// This function will return an error if: + /// - The database query fails to execute. + #[instrument(level = "trace", skip_all)] + pub async fn retrieve_node_public_addresses(&self) -> Result> { + let addresses = sqlx::query_scalar( + "SELECT public_address + FROM nodes + WHERE public_address IS NOT NULL", + ) + .fetch_all(&self.db) + .await?; + Ok(addresses) + } + /// Retrieves the public address of a node from the database. /// /// This method fetches the public address of a node from the `nodes` table. @@ -3288,16 +3341,16 @@ impl AtomaState { ) -> Result<()> { sqlx::query( "INSERT INTO node_public_keys ( - node_small_id, - epoch, - key_rotation_counter, - public_key, - evidence_bytes, - device_type, + node_small_id, + epoch, + key_rotation_counter, + public_key, + evidence_bytes, + device_type, is_valid ) VALUES ($1, $2, $3, $4, $5, $6, $7) ON CONFLICT (node_small_id, device_type) - DO UPDATE SET + DO UPDATE SET epoch = EXCLUDED.epoch, key_rotation_counter = EXCLUDED.key_rotation_counter, public_key = EXCLUDED.public_key, @@ -3898,7 +3951,7 @@ impl AtomaState { /// Get balance for a user. /// - /// This method fetches the balance for a user from the `balance` table. + /// This method fetches the balance for a user from the `crypto_balances` table. /// /// # Arguments /// @@ -3921,13 +3974,13 @@ impl AtomaState { /// ```rust,ignore /// use atoma_node::atoma_state::AtomaStateManager; /// - /// async fn get_balance(state_manager: &AtomaStateManager, user_id: i64) -> Result { - /// state_manager.get_balance_for_user(user_id).await + /// async fn get_crypto_balance(state_manager: &AtomaStateManager, user_id: i64) -> Result { + /// state_manager.get_crypto_balance_for_user(user_id).await /// } /// ``` #[instrument(level = "trace", skip(self))] - pub async fn get_balance_for_user(&self, user_id: i64) -> Result { - let balance = sqlx::query("SELECT usdc_balance FROM balance WHERE user_id = $1") + pub async fn get_crypto_balance_for_user(&self, user_id: i64) -> Result { + let balance = sqlx::query("SELECT usdc_balance FROM crypto_balances WHERE user_id = $1") .bind(user_id) .fetch_optional(&self.db) .await? @@ -4306,7 +4359,7 @@ impl AtomaState { /// Update the balance for the user. /// - /// This method updates the `balance` field for the user in the `users` table. + /// This method updates the `usdc_balance` field for the user in the `users` table. /// /// # Arguments /// @@ -4328,18 +4381,18 @@ impl AtomaState { /// ```rust,ignore /// use atoma_node::atoma_state::AtomaStateManager; /// - /// async fn update_balance(state_manager: &AtomaStateManager, user_id: i64, balance: i64) -> Result<(), AtomaStateManagerError> { - /// state_manager.update_balance(user_id, balance).await + /// async fn top_up_crypto_balance(state_manager: &AtomaStateManager, user_id: i64, balance: i64) -> Result<(), AtomaStateManagerError> { + /// state_manager.top_up_crypto_balance(user_id, balance).await /// } /// ``` #[instrument(level = "trace", skip(self))] - pub async fn top_up_balance(&self, user_id: i64, balance: i64) -> Result<()> { + pub async fn top_up_crypto_balance(&self, user_id: i64, balance: i64) -> Result<()> { sqlx::query( - "INSERT INTO balance (user_id, usdc_balance) + "INSERT INTO crypto_balances (user_id, usdc_balance) VALUES ($1, $2) ON CONFLICT (user_id) DO UPDATE SET - usdc_balance = balance.usdc_balance + EXCLUDED.usdc_balance", + usdc_balance = crypto_balances.usdc_balance + EXCLUDED.usdc_balance", ) .bind(user_id) .bind(balance) @@ -4368,7 +4421,7 @@ impl AtomaState { /// - The database query fails to execute (that could mean the balance is not available) #[instrument(level = "trace", skip(self))] pub async fn deduct_from_usdc(&self, user_id: i64, balance: i64) -> Result<()> { - let result = sqlx::query("UPDATE balance SET usdc_balance = usdc_balance - $2 WHERE user_id = $1 AND usdc_balance >= $2") + let result = sqlx::query("UPDATE crypto_balances SET usdc_balance = usdc_balance - $2 WHERE user_id = $1 AND usdc_balance >= $2") .bind(user_id) .bind(balance) .execute(&self.db) @@ -4381,7 +4434,7 @@ impl AtomaState { /// Refunds a USDC payment. /// - /// This method refunds a USDC payment to the user in the `balance` table. + /// This method refunds a USDC payment to the user in the `crypto_balances` table. /// /// # Arguments /// @@ -4399,11 +4452,13 @@ impl AtomaState { /// - The database query fails to execute. #[instrument(level = "trace", skip(self))] pub async fn refund_usdc(&self, user_id: i64, amount: i64) -> Result<()> { - sqlx::query("UPDATE balance SET usdc_balance = usdc_balance + $2 WHERE user_id = $1") - .bind(user_id) - .bind(amount) - .execute(&self.db) - .await?; + sqlx::query( + "UPDATE crypto_balances SET usdc_balance = usdc_balance + $2 WHERE user_id = $1", + ) + .bind(user_id) + .bind(amount) + .execute(&self.db) + .await?; Ok(()) } From ae211700097854825e2e4572dc105084231be05b Mon Sep 17 00:00:00 2001 From: chad Date: Thu, 8 May 2025 17:57:04 -0500 Subject: [PATCH 02/61] feat: integrate helm setup --- helm/atoma-proxy/Chart.yaml | 29 ++ helm/atoma-proxy/README.md | 206 ++++++++++++++ helm/atoma-proxy/scripts/deploy.sh | 186 ++++++++++++ helm/atoma-proxy/scripts/setup-cluster.sh | 316 +++++++++++++++++++++ helm/atoma-proxy/templates/configmap.yaml | 23 ++ helm/atoma-proxy/templates/deployment.yaml | 72 +++++ helm/atoma-proxy/templates/ingress.yaml | 43 +++ helm/atoma-proxy/templates/namespace.yaml | 8 + helm/atoma-proxy/templates/service.yaml | 23 ++ helm/atoma-proxy/values-dev.yaml | 95 +++++++ helm/atoma-proxy/values-prod.yaml | 95 +++++++ helm/atoma-proxy/values.yaml | 138 +++++++++ 12 files changed, 1234 insertions(+) create mode 100644 helm/atoma-proxy/Chart.yaml create mode 100644 helm/atoma-proxy/README.md create mode 100755 helm/atoma-proxy/scripts/deploy.sh create mode 100755 helm/atoma-proxy/scripts/setup-cluster.sh create mode 100644 helm/atoma-proxy/templates/configmap.yaml create mode 100644 helm/atoma-proxy/templates/deployment.yaml create mode 100644 helm/atoma-proxy/templates/ingress.yaml create mode 100644 helm/atoma-proxy/templates/namespace.yaml create mode 100644 helm/atoma-proxy/templates/service.yaml create mode 100644 helm/atoma-proxy/values-dev.yaml create mode 100644 helm/atoma-proxy/values-prod.yaml create mode 100644 helm/atoma-proxy/values.yaml diff --git a/helm/atoma-proxy/Chart.yaml b/helm/atoma-proxy/Chart.yaml new file mode 100644 index 00000000..14498b75 --- /dev/null +++ b/helm/atoma-proxy/Chart.yaml @@ -0,0 +1,29 @@ +apiVersion: v2 +name: atoma-proxy +description: A Helm chart for Atoma Proxy +type: application +version: 0.1.0 +appVersion: "1.0.0" +maintainers: + - name: Atoma Network +dependencies: + - name: postgresql + version: "12.x.x" + repository: https://charts.bitnami.com/bitnami + condition: postgresql.enabled + - name: prometheus + version: "15.x.x" + repository: https://prometheus-community.github.io/helm-charts + condition: prometheus.enabled + - name: grafana + version: "6.x.x" + repository: https://grafana.github.io/helm-charts + condition: grafana.enabled + - name: loki + version: "5.x.x" + repository: https://grafana.github.io/helm-charts + condition: loki.enabled + - name: tempo + version: "1.x.x" + repository: https://grafana.github.io/helm-charts + condition: tempo.enabled diff --git a/helm/atoma-proxy/README.md b/helm/atoma-proxy/README.md new file mode 100644 index 00000000..a63495f9 --- /dev/null +++ b/helm/atoma-proxy/README.md @@ -0,0 +1,206 @@ +# Atoma Proxy Helm Chart + +This Helm chart deploys the Atoma Proxy application along with its dependencies, including PostgreSQL, Prometheus, Grafana, Loki, and Tempo. + +## Prerequisites + +- Kubernetes cluster (v1.19+) +- Helm 3.x +- cert-manager (for SSL certificates) +- nginx-ingress-controller +- kubectl configured to communicate with your cluster + +## Quick Start + +1. Add the required Helm repositories: +```bash +helm repo add bitnami https://charts.bitnami.com/bitnami +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo add grafana https://grafana.github.io/helm-charts +helm repo update +``` + +2. Install the chart: +```bash +# For development +./scripts/deploy.sh -e dev -p your-password + +# For production +./scripts/deploy.sh -e prod -p your-secure-password +``` + +## Configuration + +The chart can be configured using values files or command-line arguments. The main configuration files are: + +- `values.yaml`: Default configuration +- `values-dev.yaml`: Development environment configuration +- `values-prod.yaml`: Production environment configuration + +### Key Configuration Parameters + +#### Main Application +```yaml +atomaProxy: + image: + repository: ghcr.io/atoma-network/atoma-proxy + tag: latest + replicas: 1 + resources: + requests: + memory: "512Mi" + cpu: "250m" + limits: + memory: "1Gi" + cpu: "500m" +``` + +#### PostgreSQL +```yaml +postgresql: + enabled: true + auth: + database: atoma_proxy + username: atoma_proxy + password: "" # Set via --set or secrets management + primary: + persistence: + size: 10Gi +``` + +#### Monitoring Stack +```yaml +prometheus: + enabled: true + server: + persistentVolume: + size: 10Gi + +grafana: + enabled: true + persistence: + size: 10Gi + admin: + password: "" # Set via --set or secrets management + +loki: + enabled: true + persistence: + size: 10Gi + +tempo: + enabled: true + persistence: + size: 10Gi +``` + +## Deployment Script + +The `scripts/deploy.sh` script provides an easy way to deploy the application: + +```bash +./scripts/deploy.sh [options] + +Options: + -e, --env ENV Environment to deploy (dev/prod) [default: dev] + -n, --namespace NS Kubernetes namespace [default: atoma-proxy-{env}] + -r, --release NAME Helm release name [default: atoma-proxy-{env}] + -p, --password PASS Password for PostgreSQL and Grafana + -h, --help Show this help message +``` + +## Environment-Specific Configurations + +### Development +- Uses staging SSL certificates +- Lower resource limits +- Debug logging +- Single replica +- Smaller storage volumes +- Dev-specific hostnames + +### Production +- Uses production SSL certificates +- Higher resource limits +- Info logging +- Multiple replicas +- Larger storage volumes +- Production hostnames +- Secure password management + +## Monitoring and Logging + +The chart includes a complete monitoring stack: + +- **Prometheus**: Metrics collection +- **Grafana**: Metrics visualization +- **Loki**: Log aggregation +- **Tempo**: Distributed tracing + +Access the monitoring tools at: +- Grafana: `https://grafana.{domain}` +- Prometheus: `https://prometheus.{domain}` +- Loki: `https://loki.{domain}` +- Tempo: `https://tempo.{domain}` + +## Security Considerations + +1. **Passwords**: Always set secure passwords for production using: + ```bash + --set postgresql.auth.password=your-secure-password \ + --set grafana.admin.password=your-secure-password + ``` + +2. **SSL Certificates**: The chart uses cert-manager for SSL certificate management: + - Development: `letsencrypt-staging` + - Production: `letsencrypt-prod` + +3. **Resource Limits**: Adjust resource limits based on your cluster capacity and application needs. + +## Troubleshooting + +1. **Pod Issues**: + ```bash + kubectl get pods -n atoma-proxy-{env} + kubectl describe pod -n atoma-proxy-{env} + kubectl logs -n atoma-proxy-{env} + ``` + +2. **Ingress Issues**: + ```bash + kubectl get ingress -n atoma-proxy-{env} + kubectl describe ingress -n atoma-proxy-{env} + ``` + +3. **Database Issues**: + ```bash + kubectl get pods -n atoma-proxy-{env} -l app.kubernetes.io/name=postgresql + kubectl logs -n atoma-proxy-{env} + ``` + +## Maintenance + +1. **Updating the Chart**: + ```bash + helm repo update + helm upgrade atoma-proxy-{env} . -f values-{env}.yaml + ``` + +2. **Backup**: + - PostgreSQL data is stored in persistent volumes + - Regular backups should be configured for production + +3. **Scaling**: + - Adjust replicas in values file + - Scale horizontally: `kubectl scale deployment atoma-proxy-{env} --replicas=N` + +## Contributing + +1. Fork the repository +2. Create a feature branch +3. Make your changes +4. Submit a pull request + +## License + +This chart is licensed under the same license as the Atoma Proxy application. \ No newline at end of file diff --git a/helm/atoma-proxy/scripts/deploy.sh b/helm/atoma-proxy/scripts/deploy.sh new file mode 100755 index 00000000..56e1594b --- /dev/null +++ b/helm/atoma-proxy/scripts/deploy.sh @@ -0,0 +1,186 @@ +#!/bin/bash + +set -e + +# Default values +ENV="dev" +NAMESPACE="" +VALUES_FILE="" +RELEASE_NAME="" +PASSWORD="" + +# Check for required tools +check_prerequisites() { + local missing_tools=() + + # Check for kubectl + if ! command -v kubectl &> /dev/null; then + missing_tools+=("kubectl") + fi + + # Check for helm + if ! command -v helm &> /dev/null; then + missing_tools+=("helm") + fi + + # If any tools are missing, show error and exit + if [ ${#missing_tools[@]} -ne 0 ]; then + echo "Error: The following required tools are not installed:" + for tool in "${missing_tools[@]}"; do + echo " - $tool" + done + echo "" + echo "Please install the missing tools and try again." + echo "You can install them using:" + echo " - kubectl: https://kubernetes.io/docs/tasks/tools/install-kubectl/" + echo " - helm: https://helm.sh/docs/intro/install/" + exit 1 + fi + + # Check kubectl configuration + echo "Checking kubectl configuration..." + + # Check if kubeconfig exists + if [ -z "$KUBECONFIG" ]; then + if [ ! -f "$HOME/.kube/config" ]; then + echo "Error: No kubeconfig file found." + echo "Please ensure you have a valid kubeconfig file at ~/.kube/config" + echo "You can get this file from your cluster administrator or cloud provider." + exit 1 + fi + fi + + # Check current context + if ! kubectl config current-context &> /dev/null; then + echo "Error: No current context set in kubectl configuration." + echo "Available contexts:" + kubectl config get-contexts + echo "" + echo "Please set a context using:" + echo " kubectl config use-context " + exit 1 + fi + + # Check cluster connection + if ! kubectl cluster-info &> /dev/null; then + echo "Error: Cannot connect to the Kubernetes cluster." + echo "Current context: $(kubectl config current-context)" + echo "" + echo "Please check:" + echo " 1. Your cluster is running" + echo " 2. Your kubeconfig file is correct" + echo " 3. You have network access to the cluster" + echo " 4. Your credentials are valid" + echo "" + echo "You can verify your configuration with:" + echo " kubectl config view" + echo " kubectl cluster-info" + exit 1 + fi + + echo "✓ kubectl is properly configured" +} + +# Help message +show_help() { + echo "Usage: $0 [options]" + echo "Options:" + echo " -e, --env ENV Environment to deploy (dev/prod) [default: dev]" + echo " -n, --namespace NS Kubernetes namespace [default: atoma-proxy-{env}]" + echo " -r, --release NAME Helm release name [default: atoma-proxy-{env}]" + echo " -p, --password PASS Password for PostgreSQL and Grafana" + echo " -h, --help Show this help message" + exit 1 +} + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + case $1 in + -e|--env) + ENV="$2" + shift 2 + ;; + -n|--namespace) + NAMESPACE="$2" + shift 2 + ;; + -r|--release) + RELEASE_NAME="$2" + shift 2 + ;; + -p|--password) + PASSWORD="$2" + shift 2 + ;; + -h|--help) + show_help + ;; + *) + echo "Unknown option: $1" + show_help + ;; + esac +done + +# Check prerequisites +echo "Checking prerequisites..." +check_prerequisites + +# Validate environment +if [[ "$ENV" != "dev" && "$ENV" != "prod" ]]; then + echo "Error: Environment must be either 'dev' or 'prod'" + exit 1 +fi + +# Set default values if not provided +if [ -z "$NAMESPACE" ]; then + NAMESPACE="atoma-proxy-$ENV" +fi + +if [ -z "$RELEASE_NAME" ]; then + RELEASE_NAME="atoma-proxy-$ENV" +fi + +# Get the directory where the script is located +SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" +CHART_DIR="$(dirname "$SCRIPT_DIR")" + +# Set values file based on environment +VALUES_FILE="$CHART_DIR/values-$ENV.yaml" + +# Check if values file exists +if [ ! -f "$VALUES_FILE" ]; then + echo "Error: Values file $VALUES_FILE not found" + exit 1 +fi + +# Add Helm repositories if not already added +echo "Adding Helm repositories..." +helm repo add bitnami https://charts.bitnami.com/bitnami +helm repo add prometheus-community https://prometheus-community.github.io/helm-charts +helm repo add grafana https://grafana.github.io/helm-charts +helm repo update + +# Update dependencies +echo "Updating Helm dependencies..." +cd "$CHART_DIR" +helm dependency update + +# Prepare password arguments +PASSWORD_ARGS="" +if [ ! -z "$PASSWORD" ]; then + PASSWORD_ARGS="--set postgresql.auth.password=$PASSWORD --set grafana.admin.password=$PASSWORD" +fi + +# Deploy the chart +echo "Deploying $RELEASE_NAME to namespace $NAMESPACE..." +helm upgrade --install $RELEASE_NAME . \ + --namespace $NAMESPACE \ + --create-namespace \ + -f "$VALUES_FILE" \ + $PASSWORD_ARGS + +echo "Deployment completed successfully!" +echo "Release: $RELEASE_NAME" +echo "Namespace: $NAMESPACE" +echo "Environment: $ENV" \ No newline at end of file diff --git a/helm/atoma-proxy/scripts/setup-cluster.sh b/helm/atoma-proxy/scripts/setup-cluster.sh new file mode 100755 index 00000000..f68d4fa7 --- /dev/null +++ b/helm/atoma-proxy/scripts/setup-cluster.sh @@ -0,0 +1,316 @@ +#!/bin/bash + +set -e + +# Default values +CLUSTER_TYPE="minikube" +CLOUD_PROVIDER="" +CLUSTER_NAME="atoma-proxy" +REGION="us-west-2" + +# Help message +show_help() { + echo "Usage: $0 [options]" + echo "Options:" + echo " -t, --type TYPE Cluster type (minikube/kind/eks/gke/aks) [default: minikube]" + echo " -p, --provider PROV Cloud provider (aws/gcp/azure) [required for cloud clusters]" + echo " -n, --name NAME Cluster name [default: atoma-proxy]" + echo " -r, --region REGION Region for cloud clusters [default: us-west-2]" + echo " -h, --help Show this help message" + exit 1 +} + +# Parse command line arguments +while [[ $# -gt 0 ]]; do + case $1 in + -t|--type) + CLUSTER_TYPE="$2" + shift 2 + ;; + -p|--provider) + CLOUD_PROVIDER="$2" + shift 2 + ;; + -n|--name) + CLUSTER_NAME="$2" + shift 2 + ;; + -r|--region) + REGION="$2" + shift 2 + ;; + -h|--help) + show_help + ;; + *) + echo "Unknown option: $1" + show_help + ;; + esac +done + +# Check for required tools +check_prerequisites() { + local missing_tools=() + + # Check for kubectl + if ! command -v kubectl &> /dev/null; then + missing_tools+=("kubectl") + fi + + # Check for helm + if ! command -v helm &> /dev/null; then + missing_tools+=("helm") + fi + + # Check for cluster-specific tools + case $CLUSTER_TYPE in + minikube) + if ! command -v minikube &> /dev/null; then + missing_tools+=("minikube") + fi + ;; + kind) + if ! command -v kind &> /dev/null; then + missing_tools+=("kind") + fi + ;; + eks) + if ! command -v aws &> /dev/null; then + missing_tools+=("aws-cli") + fi + if ! command -v eksctl &> /dev/null; then + missing_tools+=("eksctl") + fi + ;; + gke) + if ! command -v gcloud &> /dev/null; then + missing_tools+=("gcloud") + fi + ;; + aks) + if ! command -v az &> /dev/null; then + missing_tools+=("azure-cli") + fi + ;; + esac + + # If any tools are missing, show error and exit + if [ ${#missing_tools[@]} -ne 0 ]; then + echo "Error: The following required tools are not installed:" + for tool in "${missing_tools[@]}"; do + echo " - $tool" + done + echo "" + echo "Please install the missing tools and try again." + echo "You can install them using:" + echo " - kubectl: https://kubernetes.io/docs/tasks/tools/install-kubectl/" + echo " - helm: https://helm.sh/docs/intro/install/" + echo " - minikube: https://minikube.sigs.k8s.io/docs/start/" + echo " - kind: https://kind.sigs.k8s.io/docs/user/quick-start/#installation" + echo " - aws-cli: https://aws.amazon.com/cli/" + echo " - eksctl: https://eksctl.io/introduction/installation/" + echo " - gcloud: https://cloud.google.com/sdk/docs/install" + echo " - azure-cli: https://docs.microsoft.com/en-us/cli/azure/install-azure-cli" + exit 1 + fi +} + +# Setup local development cluster +setup_local_cluster() { + case $CLUSTER_TYPE in + minikube) + echo "Setting up Minikube cluster..." + # Start minikube with profile name instead of --name flag + minikube start -p $CLUSTER_NAME --driver=docker --cpus=4 --memory=8g --disk-size=50g + + # Wait for minikube to be ready + echo "Waiting for minikube to be ready..." + minikube status -p $CLUSTER_NAME + + # Enable addons + echo "Enabling minikube addons..." + minikube addons enable ingress -p $CLUSTER_NAME + minikube addons enable metrics-server -p $CLUSTER_NAME + + # Update kubeconfig + echo "Updating kubeconfig..." + minikube update-context -p $CLUSTER_NAME + + # Set up kubectl to use minikube + echo "Setting up kubectl configuration..." + export KUBECONFIG=~/.kube/config + minikube kubectl -- config use-context $CLUSTER_NAME + + # Verify the context + if ! kubectl config current-context | grep -q "$CLUSTER_NAME"; then + echo "Error: Failed to set minikube context" + exit 1 + fi + + # Test kubectl connection + echo "Testing kubectl connection..." + kubectl cluster-info + ;; + kind) + echo "Setting up Kind cluster..." + cat < /dev/null; do + echo "Still waiting for ingress-nginx namespace to be deleted..." + sleep 5 + done + + # Additional cleanup of any remaining resources + echo "Cleaning up any remaining ingress-nginx resources..." + kubectl delete clusterrole ingress-nginx --ignore-not-found + kubectl delete clusterrolebinding ingress-nginx --ignore-not-found + kubectl delete serviceaccount ingress-nginx -n ingress-nginx --ignore-not-found + kubectl delete service ingress-nginx-controller -n ingress-nginx --ignore-not-found + kubectl delete service ingress-nginx-controller-admission -n ingress-nginx --ignore-not-found + kubectl delete deployment ingress-nginx-controller -n ingress-nginx --ignore-not-found + kubectl delete validatingwebhookconfiguration ingress-nginx-admission --ignore-not-found + kubectl delete ingressclass nginx --ignore-not-found + kubectl delete ingressclass nginx-ingress --ignore-not-found + kubectl delete ingressclass ingress-nginx --ignore-not-found + + # Install nginx-ingress + echo "Installing nginx-ingress..." + helm repo add ingress-nginx https://kubernetes.github.io/ingress-nginx + helm repo update + helm install ingress-nginx ingress-nginx/ingress-nginx \ + --namespace ingress-nginx \ + --create-namespace \ + --set controller.service.type=LoadBalancer \ + --set controller.service.externalTrafficPolicy=Local \ + --set controller.service.enableHttps=false \ + --set controller.ingressClassResource.name=nginx \ + --set controller.ingressClassResource.enabled=true \ + --set controller.ingressClassResource.default=true + + # Install metrics-server if not using minikube + if [ "$CLUSTER_TYPE" != "minikube" ]; then + echo "Installing metrics-server..." + kubectl apply -f https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml + fi +} + +# Main execution +echo "Starting cluster setup..." + +# Check prerequisites +check_prerequisites + +# Setup cluster +if [[ "$CLUSTER_TYPE" == "minikube" || "$CLUSTER_TYPE" == "kind" ]]; then + setup_local_cluster +else + setup_cloud_cluster +fi + +# Install components +install_components + +echo "Cluster setup completed successfully!" +echo "Cluster type: $CLUSTER_TYPE" +echo "Cluster name: $CLUSTER_NAME" +echo "" +echo "You can now deploy the application using:" +echo "./deploy.sh -e dev -p your-password" \ No newline at end of file diff --git a/helm/atoma-proxy/templates/configmap.yaml b/helm/atoma-proxy/templates/configmap.yaml new file mode 100644 index 00000000..d398f763 --- /dev/null +++ b/helm/atoma-proxy/templates/configmap.yaml @@ -0,0 +1,23 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Release.Name }}-config + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: {{ .Release.Name }} + app.kubernetes.io/instance: {{ .Release.Name }} +data: + config.toml: | + # This will be populated from your existing config.toml + # You should replace this with your actual configuration + prometheus.yml: | + # This will be populated from your existing prometheus.yml + # You should replace this with your actual configuration + loki.yaml: | + # This will be populated from your existing loki.yaml + # You should replace this with your actual configuration + tempo.yaml: | + # This will be populated from your existing tempo.yaml + # You should replace this with your actual configuration + environment: {{ .Values.atomaProxy.config.environment | quote }} + log_level: {{ .Values.atomaProxy.config.logLevel | quote }} \ No newline at end of file diff --git a/helm/atoma-proxy/templates/deployment.yaml b/helm/atoma-proxy/templates/deployment.yaml new file mode 100644 index 00000000..4ed88338 --- /dev/null +++ b/helm/atoma-proxy/templates/deployment.yaml @@ -0,0 +1,72 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ .Release.Name }} + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: {{ .Release.Name }} + app.kubernetes.io/instance: {{ .Release.Name }} +spec: + replicas: {{ .Values.atomaProxy.replicas }} + selector: + matchLabels: + app.kubernetes.io/name: {{ .Release.Name }} + app.kubernetes.io/instance: {{ .Release.Name }} + template: + metadata: + labels: + app.kubernetes.io/name: {{ .Release.Name }} + app.kubernetes.io/instance: {{ .Release.Name }} + spec: + containers: + - name: {{ .Chart.Name }} + image: "{{ .Values.atomaProxy.image.repository }}:{{ .Values.atomaProxy.image.tag }}" + imagePullPolicy: {{ .Values.atomaProxy.image.pullPolicy }} + ports: + - name: http + containerPort: {{ .Values.atomaProxy.service.http.port }} + - name: credentials + containerPort: {{ .Values.atomaProxy.service.credentials.port }} + - name: p2p + containerPort: {{ .Values.atomaProxy.service.p2p.port }} + env: + - name: RUST_LOG + value: {{ .Values.atomaProxy.config.logLevel | quote }} + - name: OTEL_EXPORTER_OTLP_ENDPOINT + value: "http://{{ .Release.Name }}-otel-collector:4317" + volumeMounts: + - name: config + mountPath: /app/config.toml + subPath: config.toml + - name: open-router + mountPath: /app/open_router.json + subPath: open_router.json + - name: logs + mountPath: /app/logs + - name: data + mountPath: /app/data + resources: + {{- toYaml .Values.atomaProxy.resources | nindent 12 }} + livenessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 30 + periodSeconds: 10 + readinessProbe: + httpGet: + path: /health + port: http + initialDelaySeconds: 5 + periodSeconds: 5 + volumes: + - name: config + configMap: + name: {{ .Release.Name }}-config + - name: open-router + secret: + secretName: {{ .Release.Name }}-open-router + - name: logs + emptyDir: {} + - name: data + emptyDir: {} \ No newline at end of file diff --git a/helm/atoma-proxy/templates/ingress.yaml b/helm/atoma-proxy/templates/ingress.yaml new file mode 100644 index 00000000..23e1cf8d --- /dev/null +++ b/helm/atoma-proxy/templates/ingress.yaml @@ -0,0 +1,43 @@ +{{- if .Values.atomaProxy.ingress.enabled -}} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ .Release.Name }}-ingress + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: {{ .Release.Name }} + app.kubernetes.io/instance: {{ .Release.Name }} + {{- with .Values.atomaProxy.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + ingressClassName: {{ .Values.atomaProxy.ingress.className }} + tls: + - hosts: + {{- range .Values.atomaProxy.ingress.hosts }} + - {{ .host }} + {{- end }} + secretName: {{ .Release.Name }}-tls + rules: + {{- range .Values.atomaProxy.ingress.hosts }} + - host: {{ .host }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + pathType: {{ .pathType }} + backend: + service: + name: {{ $.Release.Name }} + port: + {{- if eq .service "http" }} + name: http + {{- else if eq .service "credentials" }} + name: credentials + {{- else if eq .service "prover" }} + number: 8080 + {{- end }} + {{- end }} + {{- end }} +{{- end }} \ No newline at end of file diff --git a/helm/atoma-proxy/templates/namespace.yaml b/helm/atoma-proxy/templates/namespace.yaml new file mode 100644 index 00000000..d59a6be4 --- /dev/null +++ b/helm/atoma-proxy/templates/namespace.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: Namespace +metadata: + name: { { .Release.Namespace } } + labels: + name: { { .Release.Namespace } } + app.kubernetes.io/part-of: atoma-proxy + app.kubernetes.io/managed-by: helm diff --git a/helm/atoma-proxy/templates/service.yaml b/helm/atoma-proxy/templates/service.yaml new file mode 100644 index 00000000..7beca7bd --- /dev/null +++ b/helm/atoma-proxy/templates/service.yaml @@ -0,0 +1,23 @@ +apiVersion: v1 +kind: Service +metadata: + name: { { .Release.Name } } + namespace: { { .Release.Namespace } } + labels: + app.kubernetes.io/name: { { .Release.Name } } + app.kubernetes.io/instance: { { .Release.Name } } +spec: + selector: + app.kubernetes.io/name: { { .Release.Name } } + app.kubernetes.io/instance: { { .Release.Name } } + ports: + - name: http + port: { { .Values.atomaProxy.service.http.port } } + targetPort: { { .Values.atomaProxy.service.http.port } } + - name: credentials + port: { { .Values.atomaProxy.service.credentials.port } } + targetPort: { { .Values.atomaProxy.service.credentials.port } } + - name: p2p + port: { { .Values.atomaProxy.service.p2p.port } } + targetPort: { { .Values.atomaProxy.service.p2p.port } } + type: ClusterIP diff --git a/helm/atoma-proxy/values-dev.yaml b/helm/atoma-proxy/values-dev.yaml new file mode 100644 index 00000000..7578f296 --- /dev/null +++ b/helm/atoma-proxy/values-dev.yaml @@ -0,0 +1,95 @@ +# Development environment settings +global: + environment: development + domain: atoma.network + +atomaProxy: + image: + repository: ghcr.io/atoma-network/atoma-proxy + tag: dev-latest + pullPolicy: Always + replicas: 1 + resources: + requests: + memory: "256Mi" + cpu: "100m" + limits: + memory: "512Mi" + cpu: "200m" + ingress: + enabled: true + className: "nginx" + annotations: + cert-manager.io/cluster-issuer: "letsencrypt-staging" + nginx.ingress.kubernetes.io/ssl-redirect: "true" + hosts: + - host: api-dev.atoma.network + paths: + - path: / + pathType: Prefix + service: http + - host: credentials-dev.atoma.network + paths: + - path: / + pathType: Prefix + service: credentials + - host: prover-dev.atoma.network + paths: + - path: / + pathType: Prefix + service: prover + config: + logLevel: "debug" + environment: "development" + +# PostgreSQL settings +postgresql: + enabled: true + auth: + database: atoma_proxy_dev + username: atoma_proxy_dev + password: "dev_password" # Change this in production + primary: + persistence: + size: 5Gi + service: + port: 5432 + +# Monitoring stack settings +prometheus: + enabled: true + server: + persistentVolume: + size: 5Gi + alertmanager: + persistentVolume: + size: 1Gi + +grafana: + enabled: true + persistence: + size: 5Gi + admin: + password: "admin" # Change this in production + ingress: + enabled: true + hosts: + - grafana-dev.atoma.network + +loki: + enabled: true + persistence: + size: 5Gi + ingress: + enabled: true + hosts: + - loki-dev.atoma.network + +tempo: + enabled: true + persistence: + size: 5Gi + ingress: + enabled: true + hosts: + - tempo-dev.atoma.network diff --git a/helm/atoma-proxy/values-prod.yaml b/helm/atoma-proxy/values-prod.yaml new file mode 100644 index 00000000..16bef508 --- /dev/null +++ b/helm/atoma-proxy/values-prod.yaml @@ -0,0 +1,95 @@ +# Production environment settings +global: + environment: production + domain: atoma.network + +atomaProxy: + image: + repository: ghcr.io/atoma-network/atoma-proxy + tag: latest + pullPolicy: IfNotPresent + replicas: 3 + resources: + requests: + memory: "1Gi" + cpu: "500m" + limits: + memory: "2Gi" + cpu: "1000m" + ingress: + enabled: true + className: "nginx" + annotations: + cert-manager.io/cluster-issuer: "letsencrypt-prod" + nginx.ingress.kubernetes.io/ssl-redirect: "true" + hosts: + - host: api.atoma.network + paths: + - path: / + pathType: Prefix + service: http + - host: credentials.atoma.network + paths: + - path: / + pathType: Prefix + service: credentials + - host: prover.atoma.network + paths: + - path: / + pathType: Prefix + service: prover + config: + logLevel: "info" + environment: "production" + +# PostgreSQL settings +postgresql: + enabled: true + auth: + database: atoma_proxy + username: atoma_proxy + password: "" # Set this via --set or secrets management + primary: + persistence: + size: 50Gi + service: + port: 5432 + +# Monitoring stack settings +prometheus: + enabled: true + server: + persistentVolume: + size: 50Gi + alertmanager: + persistentVolume: + size: 10Gi + +grafana: + enabled: true + persistence: + size: 20Gi + admin: + password: "" # Set this via --set or secrets management + ingress: + enabled: true + hosts: + - grafana.atoma.network + +loki: + enabled: true + persistence: + size: 50Gi + ingress: + enabled: true + hosts: + - loki.atoma.network + +tempo: + enabled: true + persistence: + size: 50Gi + ingress: + enabled: true + hosts: + - tempo.atoma.network diff --git a/helm/atoma-proxy/values.yaml b/helm/atoma-proxy/values.yaml new file mode 100644 index 00000000..0f3994b4 --- /dev/null +++ b/helm/atoma-proxy/values.yaml @@ -0,0 +1,138 @@ +# Global settings +global: + environment: production + domain: atoma.network + +# Main application settings +atomaProxy: + image: + repository: ghcr.io/atoma-network/atoma-proxy + tag: latest + pullPolicy: IfNotPresent + replicas: 1 + resources: + requests: + memory: "512Mi" + cpu: "250m" + limits: + memory: "1Gi" + cpu: "500m" + service: + http: + port: 8080 + credentials: + port: 8081 + p2p: + port: 8083 + ingress: + enabled: true + className: "nginx" + annotations: + cert-manager.io/cluster-issuer: "letsencrypt-prod" + nginx.ingress.kubernetes.io/ssl-redirect: "true" + hosts: + - host: api.atoma.network + paths: + - path: / + pathType: Prefix + service: http + - host: credentials.atoma.network + paths: + - path: / + pathType: Prefix + service: credentials + - host: prover.atoma.network + paths: + - path: / + pathType: Prefix + service: prover + config: + logLevel: "info" + environment: "production" + +# PostgreSQL settings +postgresql: + enabled: true + auth: + database: atoma_proxy + username: atoma_proxy + password: "" + primary: + persistence: + size: 10Gi + service: + port: 5432 + +# Monitoring stack settings +prometheus: + enabled: true + server: + persistentVolume: + size: 10Gi + alertmanager: + persistentVolume: + size: 2Gi + +grafana: + enabled: true + persistence: + size: 10Gi + admin: + password: "" + ingress: + enabled: true + hosts: + - grafana.atoma.network + +loki: + enabled: true + persistence: + size: 10Gi + ingress: + enabled: true + hosts: + - loki.atoma.network + +tempo: + enabled: true + persistence: + size: 10Gi + ingress: + enabled: true + hosts: + - tempo.atoma.network + +# OpenTelemetry Collector settings +otelCollector: + enabled: true + config: + receivers: + otlp: + protocols: + grpc: + endpoint: 0.0.0.0:4317 + http: + endpoint: 0.0.0.0:4318 + processors: + batch: {} + exporters: + prometheus: + endpoint: prometheus-server:9090 + loki: + endpoint: loki:3100 + otlp: + endpoint: tempo:4317 + service: + pipelines: + traces: + receivers: [otlp] + processors: [batch] + exporters: [otlp] + metrics: + receivers: [otlp] + processors: [batch] + exporters: [prometheus] + logs: + receivers: [otlp] + processors: [batch] + exporters: [loki] From c8c0d5b517e6545f6d5e6dda0a27feda264a12b5 Mon Sep 17 00:00:00 2001 From: Jorge Antonio Date: Wed, 14 May 2025 09:15:17 +0100 Subject: [PATCH 03/61] debug tracing for sending chunk for model --- atoma-proxy/src/server/handlers/chat_completions.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/atoma-proxy/src/server/handlers/chat_completions.rs b/atoma-proxy/src/server/handlers/chat_completions.rs index a5d23874..417f86a9 100644 --- a/atoma-proxy/src/server/handlers/chat_completions.rs +++ b/atoma-proxy/src/server/handlers/chat_completions.rs @@ -831,7 +831,10 @@ async fn handle_streaming_response( event = streamer.next() => { match event { Some(Ok(maybe_chunk)) => { - tracing::info!(target = "atoma-service-chat-completions", "Sending chunk to event sender"); + tracing::debug!( + target = "atoma-service-chat-completions", + "Sending chunk to event sender" + ); if let Err(e) = event_sender.send(maybe_chunk) { tracing::error!( target = "atoma-service-chat-completions", From efcab4a44ffcde0b8ae5a0132cd49703515e3836 Mon Sep 17 00:00:00 2001 From: Martin Stefcek <35243812+Cifko@users.noreply.github.com> Date: Wed, 14 May 2025 16:31:08 +0200 Subject: [PATCH 04/61] fix: update fiat (#471) * fix: update fiat * million --- .../src/server/handlers/chat_completions.rs | 23 +++++---- .../src/server/handlers/completions.rs | 23 +++++---- atoma-proxy/src/server/handlers/embeddings.rs | 26 ++++++---- .../src/server/handlers/image_generations.rs | 17 +++++-- atoma-proxy/src/server/handlers/mod.rs | 22 ++++++++- atoma-proxy/src/server/handlers/nodes.rs | 1 - atoma-proxy/src/server/middleware.rs | 46 +++++++---------- atoma-proxy/src/server/streamer.rs | 22 ++++----- atoma-state/src/handlers.rs | 7 +++ atoma-state/src/state_manager.rs | 49 +++++++++++++++++++ atoma-state/src/types.rs | 2 + 11 files changed, 161 insertions(+), 77 deletions(-) diff --git a/atoma-proxy/src/server/handlers/chat_completions.rs b/atoma-proxy/src/server/handlers/chat_completions.rs index 417f86a9..7cded040 100644 --- a/atoma-proxy/src/server/handlers/chat_completions.rs +++ b/atoma-proxy/src/server/handlers/chat_completions.rs @@ -191,8 +191,10 @@ pub async fn chat_completions_create( update_state_manager_fiat( &state.state_manager_sender, metadata.user_id, - metadata.fiat_estimated_amount.unwrap_or_default(), + metadata.max_total_num_compute_units as i64, 0, + metadata.price_per_million, + metadata.model_name, &metadata.endpoint, )?; } @@ -278,7 +280,6 @@ async fn handle_chat_completions_request( &payload, metadata.num_input_tokens.map(|v| v as i64), metadata.max_total_num_compute_units as i64, - metadata.fiat_estimated_amount, metadata.price_per_million, metadata.selected_stack_small_id, metadata.endpoint.clone(), @@ -293,7 +294,7 @@ async fn handle_chat_completions_request( headers, &payload, metadata.max_total_num_compute_units as i64, - metadata.fiat_estimated_amount, + metadata.price_per_million, metadata.selected_stack_small_id, metadata.endpoint.clone(), metadata.model_name.clone(), @@ -460,8 +461,10 @@ pub async fn confidential_chat_completions_create( update_state_manager_fiat( &state.state_manager_sender, metadata.user_id, - metadata.fiat_estimated_amount.unwrap_or_default(), + metadata.max_total_num_compute_units as i64, 0, + metadata.price_per_million, + metadata.model_name, &metadata.endpoint, )?; } @@ -567,7 +570,7 @@ async fn handle_non_streaming_response( headers: HeaderMap, payload: &Value, estimated_total_tokens: i64, - fiat_estimated_amount: Option, + price_per_million: i64, selected_stack_small_id: Option, endpoint: String, model_name: String, @@ -656,7 +659,7 @@ async fn handle_non_streaming_response( .send( AtomaAtomaStateManagerEvent::UpdateNodeThroughputPerformance { timestamp: DateTime::::from(std::time::SystemTime::now()), - model_name, + model_name: model_name.clone(), input_tokens, output_tokens, time: time.elapsed().as_secs_f64(), @@ -690,8 +693,10 @@ async fn handle_non_streaming_response( if let Err(e) = update_state_manager_fiat( &state.state_manager_sender, user_id, - fiat_estimated_amount.unwrap_or(0), + estimated_total_tokens, total_tokens, + price_per_million, + model_name, &endpoint, ) { return Err(AtomaProxyError::InternalError { @@ -765,8 +770,7 @@ async fn handle_streaming_response( payload: &Value, num_input_tokens: Option, estimated_total_tokens: i64, - fiat_estimated_amount: Option, - price_per_million: Option, + price_per_million: i64, selected_stack_small_id: Option, endpoint: String, model_name: String, @@ -819,7 +823,6 @@ async fn handle_streaming_response( selected_stack_small_id, num_input_tokens.unwrap_or(0), estimated_total_tokens, - fiat_estimated_amount, price_per_million, start, user_id, diff --git a/atoma-proxy/src/server/handlers/completions.rs b/atoma-proxy/src/server/handlers/completions.rs index d3747e85..1ee9dd75 100644 --- a/atoma-proxy/src/server/handlers/completions.rs +++ b/atoma-proxy/src/server/handlers/completions.rs @@ -151,8 +151,10 @@ pub async fn completions_create( update_state_manager_fiat( &state.state_manager_sender, metadata.user_id, - metadata.fiat_estimated_amount.unwrap_or_default(), + metadata.max_total_num_compute_units as i64, 0, + metadata.price_per_million, + metadata.model_name, &metadata.endpoint, )?; } @@ -221,7 +223,6 @@ async fn handle_completions_request( &payload, metadata.num_input_tokens.map(|v| v as i64), metadata.max_total_num_compute_units as i64, - metadata.fiat_estimated_amount, metadata.price_per_million, metadata.selected_stack_small_id, metadata.endpoint.clone(), @@ -236,7 +237,7 @@ async fn handle_completions_request( headers, &payload, metadata.max_total_num_compute_units as i64, - metadata.fiat_estimated_amount, + metadata.price_per_million, metadata.selected_stack_small_id, metadata.endpoint.clone(), metadata.model_name.clone(), @@ -416,8 +417,10 @@ pub async fn confidential_completions_create( update_state_manager_fiat( &state.state_manager_sender, metadata.user_id, - metadata.fiat_estimated_amount.unwrap_or_default(), + metadata.max_total_num_compute_units as i64, 0, + metadata.price_per_million, + metadata.model_name, &metadata.endpoint, )?; } @@ -525,7 +528,7 @@ async fn handle_non_streaming_response( headers: HeaderMap, payload: &Value, estimated_total_tokens: i64, - fiat_estimated_amount: Option, + price_per_million: i64, selected_stack_small_id: Option, endpoint: String, model_name: String, @@ -614,7 +617,7 @@ async fn handle_non_streaming_response( .send( AtomaAtomaStateManagerEvent::UpdateNodeThroughputPerformance { timestamp: DateTime::::from(std::time::SystemTime::now()), - model_name, + model_name: model_name.clone(), input_tokens, output_tokens, time: time.elapsed().as_secs_f64(), @@ -648,8 +651,10 @@ async fn handle_non_streaming_response( if let Err(e) = update_state_manager_fiat( &state.state_manager_sender, user_id, - fiat_estimated_amount.unwrap_or(0), + estimated_total_tokens, total_tokens, + price_per_million, + model_name, &endpoint, ) { return Err(AtomaProxyError::InternalError { @@ -718,8 +723,7 @@ async fn handle_streaming_response( payload: &Value, num_input_tokens: Option, estimated_total_tokens: i64, - fiat_estimated_amount: Option, - price_per_million: Option, + price_per_million: i64, selected_stack_small_id: Option, endpoint: String, model_name: String, @@ -772,7 +776,6 @@ async fn handle_streaming_response( selected_stack_small_id, num_input_tokens.unwrap_or(0), estimated_total_tokens, - fiat_estimated_amount, price_per_million, start, user_id, diff --git a/atoma-proxy/src/server/handlers/embeddings.rs b/atoma-proxy/src/server/handlers/embeddings.rs index f41855f3..5f3e3937 100644 --- a/atoma-proxy/src/server/handlers/embeddings.rs +++ b/atoma-proxy/src/server/handlers/embeddings.rs @@ -21,7 +21,7 @@ use crate::server::{ http_server::ProxyState, middleware::RequestMetadataExtension, types::{ConfidentialComputeRequest, ConfidentialComputeResponse}, - MODEL, ONE_MILLION, + MODEL, }; use super::{ @@ -192,7 +192,8 @@ pub async fn embeddings_create( .await { Ok(response) => { - TOTAL_COMPLETED_REQUESTS.add(1, &[KeyValue::new("model", metadata.model_name)]); + TOTAL_COMPLETED_REQUESTS + .add(1, &[KeyValue::new("model", metadata.model_name.clone())]); SUCCESSFUL_TEXT_EMBEDDING_REQUESTS_PER_USER .add(1, &[KeyValue::new("user_id", metadata.user_id)]); match metadata.selected_stack_small_id { @@ -209,8 +210,10 @@ pub async fn embeddings_create( update_state_manager_fiat( &state.state_manager_sender, metadata.user_id, - metadata.fiat_estimated_amount.unwrap_or_default(), - metadata.fiat_estimated_amount.unwrap_or_default(), + num_input_compute_units as i64, + num_input_compute_units as i64, + metadata.price_per_million, + metadata.model_name, &metadata.endpoint, )?; } @@ -237,8 +240,10 @@ pub async fn embeddings_create( update_state_manager_fiat( &state.state_manager_sender, metadata.user_id, - metadata.fiat_estimated_amount.unwrap_or_default(), + num_input_compute_units as i64, 0, + metadata.price_per_million, + metadata.model_name, &metadata.endpoint, )?; } @@ -347,9 +352,10 @@ pub async fn confidential_embeddings_create( update_state_manager_fiat( &state.state_manager_sender, metadata.user_id, - metadata.fiat_estimated_amount.unwrap_or_default(), - total_tokens * metadata.price_per_million.unwrap_or_default() - / ONE_MILLION as i64, + num_input_compute_units as i64, + total_tokens, + metadata.price_per_million, + metadata.model_name.clone(), &metadata.endpoint, )?; } @@ -380,8 +386,10 @@ pub async fn confidential_embeddings_create( update_state_manager_fiat( &state.state_manager_sender, metadata.user_id, - metadata.fiat_estimated_amount.unwrap_or_default(), + num_input_compute_units as i64, 0, + metadata.price_per_million, + metadata.model_name, &metadata.endpoint, )?; } diff --git a/atoma-proxy/src/server/handlers/image_generations.rs b/atoma-proxy/src/server/handlers/image_generations.rs index 144f5296..76f95ac3 100644 --- a/atoma-proxy/src/server/handlers/image_generations.rs +++ b/atoma-proxy/src/server/handlers/image_generations.rs @@ -185,7 +185,8 @@ pub async fn image_generations_create( .await { Ok(response) => { - TOTAL_COMPLETED_REQUESTS.add(1, &[KeyValue::new("model", metadata.model_name)]); + TOTAL_COMPLETED_REQUESTS + .add(1, &[KeyValue::new("model", metadata.model_name.clone())]); match metadata.selected_stack_small_id { Some(stack_small_id) => { update_state_manager( @@ -200,8 +201,10 @@ pub async fn image_generations_create( update_state_manager_fiat( &state.state_manager_sender, metadata.user_id, - metadata.fiat_estimated_amount.unwrap_or_default(), - metadata.fiat_estimated_amount.unwrap_or_default(), + metadata.max_total_num_compute_units as i64, + metadata.max_total_num_compute_units as i64, + metadata.price_per_million, + metadata.model_name, &metadata.endpoint, )?; } @@ -231,8 +234,10 @@ pub async fn image_generations_create( update_state_manager_fiat( &state.state_manager_sender, metadata.user_id, - metadata.fiat_estimated_amount.unwrap_or_default(), + metadata.max_total_num_compute_units as i64, 0, + metadata.price_per_million, + metadata.model_name, &metadata.endpoint, )?; } @@ -343,8 +348,10 @@ pub async fn confidential_image_generations_create( update_state_manager_fiat( &state.state_manager_sender, metadata.user_id, - metadata.fiat_estimated_amount.unwrap_or_default(), + metadata.max_total_num_compute_units as i64, 0, + metadata.price_per_million, + metadata.model_name, &metadata.endpoint, )?; } diff --git a/atoma-proxy/src/server/handlers/mod.rs b/atoma-proxy/src/server/handlers/mod.rs index 187ad8cf..58be018c 100644 --- a/atoma-proxy/src/server/handlers/mod.rs +++ b/atoma-proxy/src/server/handlers/mod.rs @@ -14,7 +14,7 @@ use reqwest::StatusCode; use sui_sdk::types::crypto::{PublicKey, Signature, SignatureScheme, SuiSignature}; use tracing::instrument; -use super::error::AtomaProxyError; +use super::{error::AtomaProxyError, ONE_MILLION}; use crate::server::Result; pub mod chat_completions; @@ -137,14 +137,34 @@ pub fn update_state_manager_fiat( user_id: i64, estimated_amount: i64, amount: i64, + price_per_one_million_compute_units: i64, + model_name: String, endpoint: &str, ) -> Result<()> { + let estimated_amount = i64::try_from( + estimated_amount as u128 * price_per_one_million_compute_units as u128 + / u128::from(ONE_MILLION), + ) + .map_err(|e| AtomaProxyError::InternalError { + message: format!("Error converting estimated amount: {e}"), + client_message: None, + endpoint: endpoint.to_string(), + })?; + let amount = i64::try_from( + amount as u128 * price_per_one_million_compute_units as u128 / u128::from(ONE_MILLION), + ) + .map_err(|e| AtomaProxyError::InternalError { + message: format!("Error converting amount: {e}"), + client_message: None, + endpoint: endpoint.to_string(), + })?; // Update stack num tokens state_manager_sender .send(AtomaAtomaStateManagerEvent::UpdateStackNumTokensFiat { user_id, estimated_amount, amount, + model_name, }) .map_err(|e| AtomaProxyError::InternalError { message: format!("Error updating fiat balance: {e}"), diff --git a/atoma-proxy/src/server/handlers/nodes.rs b/atoma-proxy/src/server/handlers/nodes.rs index 0e4c1eb9..69af59c6 100644 --- a/atoma-proxy/src/server/handlers/nodes.rs +++ b/atoma-proxy/src/server/handlers/nodes.rs @@ -373,7 +373,6 @@ pub async fn nodes_create_lock( stack_small_id, selected_node_id, tx_digest, - fiat_locked_amount: _, price_per_million: _, } = if let Some(lock_guard) = acquire_stack_lock::LockGuard::try_lock( &state.users_buy_stack_lock_map, diff --git a/atoma-proxy/src/server/middleware.rs b/atoma-proxy/src/server/middleware.rs index f8ab9f19..5834effe 100644 --- a/atoma-proxy/src/server/middleware.rs +++ b/atoma-proxy/src/server/middleware.rs @@ -81,11 +81,8 @@ pub struct RequestMetadataExtension { /// Selected stack small id for this request. pub selected_stack_small_id: Option, - /// Estimated amount for fiat currency. - pub fiat_estimated_amount: Option, - /// Price per million tokens for this request. - pub price_per_million: Option, + pub price_per_million: i64, /// The endpoint path for this request. pub endpoint: String, @@ -341,7 +338,6 @@ pub async fn authenticate_middleware( stack_small_id, selected_node_id, tx_digest, - fiat_locked_amount, price_per_million, } = auth::get_selected_node(GetSelectedNodeArgs { model: &model, @@ -371,6 +367,7 @@ pub async fn authenticate_middleware( stack_small_id, num_input_compute_units, max_total_compute_units, + price_per_million, tx_digest, user_id, &endpoint, @@ -396,8 +393,7 @@ pub async fn authenticate_middleware( &body_json, &mut req_parts, selected_node_id, - fiat_locked_amount.unwrap(), - price_per_million.unwrap(), + price_per_million, num_input_compute_units, max_total_compute_units, user_id, @@ -410,8 +406,10 @@ pub async fn authenticate_middleware( update_state_manager_fiat( &state.state_manager_sender, user_id, - fiat_locked_amount.unwrap(), + max_total_compute_units as i64, 0, + price_per_million, + model, &endpoint, )?; return Err(e); @@ -795,8 +793,7 @@ pub async fn handle_locked_stack_middleware( selected_node_id: stack.selected_node_id, stack_small_id: stack.stack_small_id, tx_digest: None, - fiat_locked_amount: None, - price_per_million: None, + price_per_million: stack.price_per_million, }, None => { // 2. Acquire a new stack for the request, this will also lock compute units for the new acquired stack @@ -847,6 +844,7 @@ pub async fn handle_locked_stack_middleware( })?, request_metadata.num_input_tokens.unwrap_or_default(), max_total_num_compute_units, + selected_node_metadata.price_per_million, selected_node_metadata.tx_digest, user_id, &endpoint, @@ -1262,8 +1260,7 @@ pub mod auth { stack_small_id: Some(stack.stack_small_id), selected_node_id: stack.selected_node_id, tx_digest: None, - fiat_locked_amount: None, - price_per_million: None, + price_per_million: stack.price_per_one_million_compute_units, })) } else { Ok(None) @@ -1373,10 +1370,8 @@ pub mod auth { pub selected_node_id: i64, /// The transaction digest of the stack entry creation transaction pub tx_digest: Option, - /// The amount locked for fiat request - pub fiat_locked_amount: Option, /// The price per million compute units (this is used for the fiat request) - pub price_per_million: Option, + pub price_per_million: i64, } /// Acquires a new stack entry for the cheapest node. @@ -1620,8 +1615,7 @@ pub mod auth { stack_small_id: Some(stack_small_id), selected_node_id, tx_digest: Some(tx_digest), - fiat_locked_amount: None, - price_per_million: None, + price_per_million: price_per_million_compute_units as i64, }) } @@ -1937,8 +1931,7 @@ pub mod auth { stack_small_id: Some(stack.stack_small_id), selected_node_id: stack.selected_node_id, tx_digest: None, - fiat_locked_amount: None, - price_per_million: None, + price_per_million: stack.price_per_one_million_compute_units, }); } // WARN: This temporary check is to prevent users from trying to buy more compute units than the allowed stack size, @@ -2084,8 +2077,7 @@ pub mod auth { stack_small_id: None, selected_node_id: node.node_small_id, tx_digest: None, - fiat_locked_amount: Some(fiat_locked_amount), - price_per_million: Some(node.price_per_one_million_compute_units), + price_per_million: node.price_per_one_million_compute_units, }) } else { // NOTE: At this point, we have an acquired stack lock, so we can safely acquire a new stack. @@ -2170,8 +2162,7 @@ pub mod auth { selected_node_id: stack.selected_node_id, stack_small_id: Some(stack.stack_small_id), tx_digest: None, - fiat_locked_amount: None, - price_per_million: None, + price_per_million: stack.price_per_one_million_compute_units, })) } } @@ -2277,6 +2268,7 @@ pub mod utils { selected_stack_small_id: i64, num_input_tokens: u64, total_compute_units: u64, + price_per_million: i64, tx_digest: Option, user_id: i64, endpoint: &str, @@ -2341,8 +2333,7 @@ pub mod utils { max_total_num_compute_units: total_compute_units, user_id, selected_stack_small_id: Some(selected_stack_small_id), - fiat_estimated_amount: None, - price_per_million: None, + price_per_million, endpoint: endpoint.to_string(), model_name: request_model.to_string(), }); @@ -2414,7 +2405,6 @@ pub mod utils { /// * Model name #[instrument(level = "info", skip_all, fields( %endpoint, - %fiat_estimated_amount, %user_id ), err)] #[allow(clippy::too_many_arguments)] @@ -2423,7 +2413,6 @@ pub mod utils { body_json: &Value, req_parts: &mut Parts, selected_node_id: i64, - fiat_estimated_amount: i64, price_per_million: i64, num_input_tokens: u64, total_compute_units: u64, @@ -2468,8 +2457,7 @@ pub mod utils { max_total_num_compute_units: total_compute_units, user_id, selected_stack_small_id: None, - fiat_estimated_amount: Some(fiat_estimated_amount), - price_per_million: Some(price_per_million), + price_per_million, endpoint: endpoint.to_string(), model_name: request_model.to_string(), }); diff --git a/atoma-proxy/src/server/streamer.rs b/atoma-proxy/src/server/streamer.rs index b6725553..d6996d5e 100644 --- a/atoma-proxy/src/server/streamer.rs +++ b/atoma-proxy/src/server/streamer.rs @@ -32,7 +32,6 @@ use super::handlers::metrics::{ use super::handlers::{ update_state_manager_fiat, verify_response_hash_and_signature, RESPONSE_HASH_KEY, }; -use super::ONE_MILLION; /// The chunk that indicates the end of a streaming response const DONE_CHUNK: &str = "[DONE]"; @@ -60,10 +59,8 @@ pub struct Streamer { status: StreamStatus, /// Estimated total tokens for the stream estimated_total_tokens: i64, - /// Estimated amount for fiat currency. - fiat_estimated_amount: Option, /// Price per million tokens for this request. - price_per_million: Option, + price_per_million: i64, /// Stack small id stack_small_id: Option, /// State manager sender @@ -119,8 +116,7 @@ impl Streamer { stack_small_id: Option, num_input_tokens: i64, estimated_total_tokens: i64, - fiat_estimated_amount: Option, - price_per_million: Option, + price_per_million: i64, start: Instant, user_id: i64, model_name: String, @@ -142,7 +138,6 @@ impl Streamer { inter_stream_token_latency_timer: None, is_final_chunk_handled: false, num_generated_tokens: num_input_tokens, - fiat_estimated_amount, price_per_million, } } @@ -271,8 +266,10 @@ impl Streamer { if let Err(e) = update_state_manager_fiat( &self.state_manager_sender, self.user_id, - self.fiat_estimated_amount.unwrap_or_default(), - total_tokens * self.price_per_million.unwrap_or_default() / ONE_MILLION as i64, + self.estimated_total_tokens, + total_tokens, + self.price_per_million, + self.model_name.clone(), &self.endpoint, ) { error!( @@ -618,9 +615,10 @@ impl Drop for Streamer { if let Err(e) = update_state_manager_fiat( &self.state_manager_sender, self.user_id, - self.fiat_estimated_amount.unwrap_or_default(), - self.num_generated_tokens * self.price_per_million.unwrap_or_default() - / ONE_MILLION as i64, + self.estimated_total_tokens, + self.num_generated_tokens, + self.price_per_million, + self.model_name.clone(), &self.endpoint, ) { error!( diff --git a/atoma-state/src/handlers.rs b/atoma-state/src/handlers.rs index 629dc401..e56f20c5 100644 --- a/atoma-state/src/handlers.rs +++ b/atoma-state/src/handlers.rs @@ -1444,6 +1444,7 @@ pub async fn handle_state_manager_event( } AtomaAtomaStateManagerEvent::UpdateStackNumTokensFiat { user_id, + model_name, estimated_amount, amount, } => { @@ -1451,6 +1452,12 @@ pub async fn handle_state_manager_event( .state .update_real_amount_fiat_balance(user_id, estimated_amount, amount) .await?; + if amount > 0 { + state_manager + .state + .update_usage_per_model(user_id, model_name, amount) + .await?; + } } } Ok(()) diff --git a/atoma-state/src/state_manager.rs b/atoma-state/src/state_manager.rs index 7aff7031..2bc3b84f 100644 --- a/atoma-state/src/state_manager.rs +++ b/atoma-state/src/state_manager.rs @@ -4483,6 +4483,55 @@ impl AtomaState { Ok(()) } + + /// Updates the usage per model for a user. + /// + /// This method updates the `usage_per_model` table for the specified user and model. + /// + /// # Arguments + /// + /// * `user_id` - The unique identifier of the user. + /// * `model_name` - The name of the model. + /// * `total_tokens` - The total tokens used by the user for the model. + /// + /// # Returns + /// + /// - `Result<()>`: A result indicating success (Ok(())) or failure (Err(AtomaStateManagerError)). + /// + /// # Errors + /// + /// This function will return an error if: + /// - The database query fails to execute. + /// + /// # Example + /// + /// ```rust,ignore + /// use atoma_node::atoma_state::AtomaStateManager; + /// + /// async fn update_usage(state_manager: &AtomaStateManager, user_id: i64, model_name: String, total_tokens: i64) -> Result<(), AtomaStateManagerError> { + /// state_manager.update_usage_per_model(user_id, model_name, total_tokens).await + /// } + /// ``` + #[instrument(level = "trace", skip(self))] + pub async fn update_usage_per_model( + &self, + user_id: i64, + model_name: String, + total_tokens: i64, + ) -> Result<()> { + sqlx::query( + "INSERT INTO usage_per_model (user_id, model, total_number_processed_tokens) + VALUES ($1, $2, $3) + ON CONFLICT (user_id, model) DO UPDATE SET + total_number_processed_tokens = usage_per_model.total_number_processed_tokens + EXCLUDED.total_number_processed_tokens", + ) + .bind(user_id) + .bind(model_name) + .bind(total_tokens) + .execute(&self.db) + .await?; + Ok(()) + } } pub mod validation { diff --git a/atoma-state/src/types.rs b/atoma-state/src/types.rs index 65dd1e44..1103e368 100644 --- a/atoma-state/src/types.rs +++ b/atoma-state/src/types.rs @@ -910,6 +910,8 @@ pub enum AtomaAtomaStateManagerEvent { UpdateStackNumTokensFiat { /// The user ID user_id: i64, + /// Model name + model_name: String, /// The original estimated amount estimated_amount: i64, /// The actual amount From 308878c964e8ecc1cb1f882b7dd2e228c00377f5 Mon Sep 17 00:00:00 2001 From: Martin Stefcek <35243812+Cifko@users.noreply.github.com> Date: Wed, 14 May 2025 18:30:37 +0200 Subject: [PATCH 05/61] feat: use fiat when available (#474) * feat: use fiat when available * address comments --- atoma-proxy/src/server/middleware.rs | 253 +++++++++++++++++---------- 1 file changed, 160 insertions(+), 93 deletions(-) diff --git a/atoma-proxy/src/server/middleware.rs b/atoma-proxy/src/server/middleware.rs index 5834effe..5ab09c8c 100644 --- a/atoma-proxy/src/server/middleware.rs +++ b/atoma-proxy/src/server/middleware.rs @@ -320,6 +320,9 @@ pub async fn authenticate_middleware( max_total_compute_units, model, user_id, + selected_node_id, + price_per_million, + is_fiat_request, } = auth::handle_authenticate_and_lock_compute_units( &state, &req_parts.headers, @@ -339,15 +342,24 @@ pub async fn authenticate_middleware( selected_node_id, tx_digest, price_per_million, - } = auth::get_selected_node(GetSelectedNodeArgs { - model: &model, - state: &state, - optional_stack, - total_tokens: max_total_compute_units, - user_id, - endpoint: &endpoint, - }) - .await?; + } = if is_fiat_request { + SelectedNodeMetadata { + stack_small_id: None, + selected_node_id, + tx_digest: None, + price_per_million, + } + } else { + auth::get_selected_node(GetSelectedNodeArgs { + model: &model, + state: &state, + optional_stack, + total_tokens: max_total_compute_units, + user_id, + endpoint: &endpoint, + }) + .await? + }; if let Some(stack_small_id) = &stack_small_id { STACK_NUM_REQUESTS_COUNTER.add(1, &[KeyValue::new("stack_small_id", *stack_small_id)]); @@ -881,6 +893,7 @@ pub mod auth { use atoma_auth::StackEntryResponse; use atoma_auth::Sui; + use atoma_state::types::CheapestNode; use atoma_state::types::Stack; use atoma_state::{timestamp_to_datetime_or_now, types::AtomaAtomaStateManagerEvent}; use axum::http::HeaderMap; @@ -934,6 +947,12 @@ pub mod auth { pub model: String, /// The user ID that made the request. pub user_id: i64, + /// Node ID that was selected for fiat request. + pub selected_node_id: i64, + /// Price per million tokens for fiat request. + pub price_per_million: i64, + /// Is fiat request. + pub is_fiat_request: bool, } /// Handles authentication and compute unit locking for incoming API requests. @@ -1159,6 +1178,51 @@ pub mod auth { request_model.get_compute_units_estimate(Some(&tokenizer))? }; + let node = get_cheapest_node(state, &model, endpoint).await?; + // We don't have a stack for the user, lets check if the user is using fiat currency. + let (result_sender, result_receiver) = oneshot::channel(); + let fiat_locked_amount = max_total_compute_units as i64 + * node.price_per_one_million_compute_units + / ONE_MILLION as i64; + state + .state_manager_sender + .send(AtomaAtomaStateManagerEvent::LockUserFiatBalance { + user_id, + amount: fiat_locked_amount, + result_sender, + }) + .map_err(|err| AtomaProxyError::InternalError { + message: format!("Failed to send LockUserFiatBalance event: {err:?}"), + client_message: None, + endpoint: endpoint.to_string(), + })?; + + let locked_fiat = result_receiver + .await + .map_err(|err| AtomaProxyError::InternalError { + message: format!("Failed to receive LockUserFiatBalance result: {err:?}"), + client_message: None, + endpoint: endpoint.to_string(), + })? + .map_err(|err| AtomaProxyError::InternalError { + message: format!("Failed to get LockUserFiatBalance result: {err:?}"), + client_message: None, + endpoint: endpoint.to_string(), + })?; + + if locked_fiat { + return Ok(StackMetadata { + optional_stack: None, + num_input_compute_units, + max_total_compute_units, + model, + user_id, + selected_node_id: node.node_small_id, + price_per_million: node.price_per_one_million_compute_units, + is_fiat_request: true, + }); + } + let (result_sender, result_receiver) = oneshot::channel(); state @@ -1195,6 +1259,9 @@ pub mod auth { max_total_compute_units, model, user_id, + selected_node_id: node.node_small_id, + price_per_million: node.price_per_one_million_compute_units, + is_fiat_request: false, }) } @@ -1947,6 +2014,78 @@ pub mod auth { get_cheapest_node_and_acquire_new_stack(state, user_id, model, endpoint, total_tokens).await } + /// Gets the cheapest node for a model. + /// + /// This function sends a request to the state manager to retrieve the cheapest node for the given model. + /// It returns a `CheapestNode` if successful. + /// + /// # Arguments + /// * `state` - The proxy state containing models, and other shared state + /// * `model` - The name/identifier of the AI model being requested + /// * `endpoint` - The API endpoint being accessed + /// + /// # Returns + /// Returns a `CheapestNode` containing: + /// * `task_small_id` - The small ID of the task + /// * `selected_node_id` - The small ID of the selected node + /// * `price_per_one_million_compute_units` - The price per one million compute units + /// + /// # Errors + /// Returns an `AtomaProxyError` error in the following cases: + /// * `INTERNAL_SERVER_ERROR` - Communication errors with state manager + /// * `NOT_FOUND` - No available node address found + /// * `BAD_REQUEST` - Invalid model name or unsupported model + /// + /// # Example + /// ```no_run + /// let node = get_cheapest_node( + /// &state, + /// "gpt-4", + /// "/v1/chat/completions" + /// ).await?; + /// println!("Cheapest node ID: {}", node.selected_node_id); + /// ``` + #[instrument(level = "info", skip_all, fields(model =%model), err)] + async fn get_cheapest_node( + state: &ProxyState, + model: &str, + endpoint: &str, + ) -> Result { + let (result_sender, result_receiver) = oneshot::channel(); + state + .state_manager_sender + .send(AtomaAtomaStateManagerEvent::GetCheapestNodeForModel { + model: model.to_string(), + is_confidential: false, + result_sender, + }) + .map_err(|err| AtomaProxyError::InternalError { + message: format!("Failed to send GetTasksForModel event: {err:?}"), + client_message: None, + endpoint: endpoint.to_string(), + })?; + let node = result_receiver + .await + .map_err(|err| AtomaProxyError::InternalError { + message: format!("Failed to receive GetTasksForModel result: {err:?}"), + client_message: None, + endpoint: endpoint.to_string(), + })? + .map_err(|err| AtomaProxyError::InternalError { + message: format!("Failed to get retrieve `CheapestNode` from the state manager with result: {err:?}"), + client_message: None, + endpoint: endpoint.to_string(), + })?; + node.map_or_else( + || { + Err(AtomaProxyError::RequestError { + message: format!("No node found for model {model}"), + endpoint: endpoint.to_string(), + }) + }, + Ok, + ) + } /// Gets the cheapest node for a model and acquires a new stack for the request. /// /// This function follows a two-step process: @@ -1992,40 +2131,7 @@ pub mod auth { endpoint: &str, total_tokens: u64, ) -> Result { - let (result_sender, result_receiver) = oneshot::channel(); - state - .state_manager_sender - .send(AtomaAtomaStateManagerEvent::GetCheapestNodeForModel { - model: model.to_string(), - is_confidential: false, - result_sender, - }) - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to send GetTasksForModel event: {err:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })?; - let node = result_receiver - .await - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to receive GetTasksForModel result: {err:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })? - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to get retrieve `CheapestNode` from the state manager with result: {err:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })?; - let node: atoma_state::types::CheapestNode = match node { - Some(node) => node, - None => { - return Err(AtomaProxyError::RequestError { - message: format!("No node found for model {model}"), - endpoint: endpoint.to_string(), - }); - } - }; + let node = get_cheapest_node(state, model, endpoint).await?; tracing::info!( "Attempting to acquire lock guard to buy a new stack for user {} with model {} and max compute units {}", user_id, @@ -2042,56 +2148,17 @@ pub mod auth { .await; }; - // We don't have a stack for the user, lets check if the user is using fiat currency. - let (result_sender, result_receiver) = oneshot::channel(); - let fiat_locked_amount = - total_tokens as i64 * node.price_per_one_million_compute_units / ONE_MILLION as i64; - state - .state_manager_sender - .send(AtomaAtomaStateManagerEvent::LockUserFiatBalance { - user_id, - amount: fiat_locked_amount, - result_sender, - }) - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to send LockUserFiatBalance event: {err:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })?; - - let locked_fiat = result_receiver - .await - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to receive LockUserFiatBalance result: {err:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })? - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to get LockUserFiatBalance result: {err:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })?; - - if locked_fiat { - Ok(SelectedNodeMetadata { - stack_small_id: None, - selected_node_id: node.node_small_id, - tx_digest: None, - price_per_million: node.price_per_one_million_compute_units, - }) - } else { - // NOTE: At this point, we have an acquired stack lock, so we can safely acquire a new stack. - acquire_new_stack( - state.state_manager_sender.clone(), - user_id, - lock_guard, - endpoint.to_string(), - total_tokens, - Arc::clone(&state.sui), - node, - ) - .await - } + // NOTE: At this point, we have an acquired stack lock, so we can safely acquire a new stack. + acquire_new_stack( + state.state_manager_sender.clone(), + user_id, + lock_guard, + endpoint.to_string(), + total_tokens, + Arc::clone(&state.sui), + node, + ) + .await // NOTE: The `acquire_new_stack` method will emit a stack creation event, and it will stored it // in the AtomaStateManager's internal state, therefore any new request querying the state manager after this // lock guard release will see the new stack. From ff6378c23af33c7c9548b5dc27955d103f68135a Mon Sep 17 00:00:00 2001 From: chad Date: Wed, 14 May 2025 13:46:46 -0500 Subject: [PATCH 06/61] build: kubernetes deployment working for non-arm64 containers --- .gitignore | 1 + helm/atoma-proxy/Chart.lock | 18 ++ helm/atoma-proxy/README.md | 185 ++++++++++++++++-- helm/atoma-proxy/scripts/fix-addons.sh | 53 +++++ helm/atoma-proxy/scripts/fix-rbac.sh | 50 +++++ helm/atoma-proxy/templates/namespace.yaml | 8 - .../templates/otel-collector-configmap.yaml | 40 ++++ .../templates/otel-collector-deployment.yaml | 33 ++++ .../templates/otel-collector-service.yaml | 20 ++ helm/atoma-proxy/templates/service.yaml | 24 +-- helm/atoma-proxy/values-dev.yaml | 36 +++- 11 files changed, 431 insertions(+), 37 deletions(-) create mode 100644 helm/atoma-proxy/Chart.lock create mode 100755 helm/atoma-proxy/scripts/fix-addons.sh create mode 100755 helm/atoma-proxy/scripts/fix-rbac.sh delete mode 100644 helm/atoma-proxy/templates/namespace.yaml create mode 100644 helm/atoma-proxy/templates/otel-collector-configmap.yaml create mode 100644 helm/atoma-proxy/templates/otel-collector-deployment.yaml create mode 100644 helm/atoma-proxy/templates/otel-collector-service.yaml diff --git a/.gitignore b/.gitignore index 7e4a45bb..cf2e6a78 100644 --- a/.gitignore +++ b/.gitignore @@ -9,3 +9,4 @@ config.test.toml /config.toml local_key .rest/ +helm/atoma-proxy/charts/ diff --git a/helm/atoma-proxy/Chart.lock b/helm/atoma-proxy/Chart.lock new file mode 100644 index 00000000..acf72f30 --- /dev/null +++ b/helm/atoma-proxy/Chart.lock @@ -0,0 +1,18 @@ +dependencies: +- name: postgresql + repository: https://charts.bitnami.com/bitnami + version: 12.12.10 +- name: prometheus + repository: https://prometheus-community.github.io/helm-charts + version: 15.18.0 +- name: grafana + repository: https://grafana.github.io/helm-charts + version: 6.61.2 +- name: loki + repository: https://grafana.github.io/helm-charts + version: 5.48.0 +- name: tempo + repository: https://grafana.github.io/helm-charts + version: 1.21.1 +digest: sha256:7e74b8e8a4fcc6eeff7275adb824a0f6bb0dc434351595501ecdad2cba9b8f24 +generated: "2025-05-14T11:34:17.060119-05:00" diff --git a/helm/atoma-proxy/README.md b/helm/atoma-proxy/README.md index a63495f9..7ea99dc3 100644 --- a/helm/atoma-proxy/README.md +++ b/helm/atoma-proxy/README.md @@ -10,23 +10,67 @@ This Helm chart deploys the Atoma Proxy application along with its dependencies, - nginx-ingress-controller - kubectl configured to communicate with your cluster -## Quick Start +## Installation -1. Add the required Helm repositories: -```bash -helm repo add bitnami https://charts.bitnami.com/bitnami -helm repo add prometheus-community https://prometheus-community.github.io/helm-charts -helm repo add grafana https://grafana.github.io/helm-charts -helm repo update -``` +### Step-by-Step Installation + +1. **Start Minikube** (if using Minikube): + ```bash + minikube start -p atoma-proxy \ + --driver=docker \ + --cpus=4 \ + --memory=8g \ + --disk-size=50g \ + --force \ + --addons=ingress,metrics-server + ``` + +2. **Create the namespace**: + ```bash + kubectl create namespace atoma-proxy + ``` + +3. **Add required Helm repositories**: + ```bash + helm repo add bitnami https://charts.bitnami.com/bitnami + helm repo add prometheus-community https://prometheus-community.github.io/helm-charts + helm repo add grafana https://grafana.github.io/helm-charts + helm repo update + ``` + +4. **Navigate to the Helm chart directory and build dependencies**: + ```bash + cd helm/atoma-proxy + helm dependency build + ``` -2. Install the chart: +5. **Install the chart with development values**: + ```bash + helm install atoma-proxy . -f values-dev.yaml -n atoma-proxy + ``` + +6. **Start Minikube tunnel** (in a separate terminal) for ingress access: + ```bash + minikube tunnel -p atoma-proxy + ``` + +7. **Verify the installation**: + ```bash + # Check all pods are running + kubectl get pods -n atoma-proxy + + # Check ingress resources + kubectl get ingress -n atoma-proxy + + # Check services + kubectl get svc -n atoma-proxy + ``` + +### Quick Installation + +Alternatively, you can use the deployment script: ```bash -# For development ./scripts/deploy.sh -e dev -p your-password - -# For production -./scripts/deploy.sh -e prod -p your-secure-password ``` ## Configuration @@ -203,4 +247,117 @@ Access the monitoring tools at: ## License -This chart is licensed under the same license as the Atoma Proxy application. \ No newline at end of file +This chart is licensed under the same license as the Atoma Proxy application. + +## Quickstart Checklist (including Apple Silicon/M1/M2/M3) + +1. **Build and Push a Multi-Arch Image (if on Apple Silicon):** + ```bash + docker buildx build --platform linux/amd64,linux/arm64 -t ghcr.io/atoma-network/atoma-proxy:latest --push . + ``` + > If you are on an M1/M2/M3 Mac, you must use a multi-arch image or an arm64 image. Otherwise, your pod will fail with `rosetta error: failed to open elf at /lib64/ld-linux-x86-64.so.2`. + +2. **Start Minikube:** + ```bash + minikube start -p atoma-proxy \ + --driver=docker \ + --cpus=4 \ + --memory=8g \ + --disk-size=50g \ + --force \ + --addons=ingress,metrics-server + ``` + +3. **Create the namespace:** + ```bash + kubectl create namespace atoma-proxy + ``` + +4. **Add required Helm repositories:** + ```bash + helm repo add bitnami https://charts.bitnami.com/bitnami + helm repo add prometheus-community https://prometheus-community.github.io/helm-charts + helm repo add grafana https://grafana.github.io/helm-charts + helm repo update + ``` + +5. **Build Helm dependencies:** + ```bash + cd helm/atoma-proxy + helm dependency build + ``` + +6. **Install the chart with development values:** + ```bash + helm install atoma-proxy . -f values-dev.yaml -n atoma-proxy + ``` + +7. **Start Minikube tunnel (for ingress):** + ```bash + minikube tunnel -p atoma-proxy + ``` + +8. **Verify all pods are running:** + ```bash + kubectl get pods -n atoma-proxy + ``` + +9. **If using OTEL Collector, ensure the deployment and service are present:** + ```bash + kubectl get deployment,svc -n atoma-proxy | grep otel-collector + ``` + +10. **If Atoma Proxy pod is not running, check logs and describe:** + ```bash + kubectl logs -n atoma-proxy -l app.kubernetes.io/name=atoma-proxy --tail=50 + kubectl describe pod -n atoma-proxy -l app.kubernetes.io/name=atoma-proxy + ``` + +11. **If you see `ImagePullBackOff`, ensure your image tag is correct and public, or use an image pull secret for private images.** + +12. **If you see `rosetta error: failed to open elf at /lib64/ld-linux-x86-64.so.2`, you need a multi-arch image. See step 1.** + +13. **Check Prometheus targets:** + ```bash + kubectl port-forward svc/atoma-proxy-prometheus-server 9090:80 -n atoma-proxy + # Then open http://localhost:9090/targets in your browser + ``` + +14. **Check for Atoma metrics in Prometheus/Grafana:** + - In Prometheus or Grafana, search for metrics with the prefix `atoma_`. + +--- + +## Debugging & Verification + +- **Check all pods in the namespace:** + ```bash + kubectl get pods -n atoma-proxy + ``` +- **Check logs for a specific pod:** + ```bash + kubectl logs -n atoma-proxy + ``` +- **Describe a pod for events and status:** + ```bash + kubectl describe pod -n atoma-proxy + ``` +- **Check OTEL Collector deployment and service:** + ```bash + kubectl get deployment,svc -n atoma-proxy | grep otel-collector + ``` +- **Check if Atoma Proxy pod is running:** + ```bash + kubectl get pods -n atoma-proxy -l app.kubernetes.io/name=atoma-proxy + ``` +- **Check for image pull errors:** + ```bash + kubectl describe pod -n atoma-proxy -l app.kubernetes.io/name=atoma-proxy + ``` +- **Check Prometheus targets:** + ```bash + kubectl port-forward svc/atoma-proxy-prometheus-server 9090:80 -n atoma-proxy + # Open http://localhost:9090/targets + ``` +- **Check for Atoma metrics in Prometheus/Grafana:** + - In Prometheus or Grafana, search for metrics with the prefix `atoma_`. \ No newline at end of file diff --git a/helm/atoma-proxy/scripts/fix-addons.sh b/helm/atoma-proxy/scripts/fix-addons.sh new file mode 100755 index 00000000..01306a54 --- /dev/null +++ b/helm/atoma-proxy/scripts/fix-addons.sh @@ -0,0 +1,53 @@ +#!/bin/bash + +set -e + +# Function to wait for API server +wait_for_api() { + echo "Waiting for API server to be ready..." + until kubectl get nodes &>/dev/null; do + echo "Waiting for API server..." + sleep 5 + done +} + +# Function to apply manifest with retries +apply_with_retry() { + local manifest=$1 + local max_attempts=3 + local attempt=1 + + while [ $attempt -le $max_attempts ]; do + echo "Attempt $attempt of $max_attempts to apply manifest..." + if kubectl apply -f "$manifest" --validate=false; then + return 0 + fi + echo "Attempt $attempt failed, waiting before retry..." + sleep 10 + attempt=$((attempt + 1)) + done + echo "Failed to apply manifest after $max_attempts attempts" + return 1 +} + +# Wait for API server +wait_for_api + +# Enable metrics server +echo "Enabling metrics server..." +apply_with_retry "https://github.com/kubernetes-sigs/metrics-server/releases/latest/download/components.yaml" + +# Enable default storage class +echo "Enabling default storage class..." +apply_with_retry "https://raw.githubusercontent.com/kubernetes/minikube/master/deploy/addons/storageclass/storageclass.yaml" + +# Enable storage provisioner +echo "Enabling storage provisioner..." +apply_with_retry "https://raw.githubusercontent.com/kubernetes/minikube/master/deploy/addons/storage-provisioner/storage-provisioner.yaml" + +# Wait for metrics server to be ready +echo "Waiting for metrics server to be ready..." +kubectl wait --for=condition=ready pod -l k8s-app=metrics-server -n kube-system --timeout=300s || true + +echo "Addons have been installed. Please verify their status with:" +echo "kubectl get pods -n kube-system" \ No newline at end of file diff --git a/helm/atoma-proxy/scripts/fix-rbac.sh b/helm/atoma-proxy/scripts/fix-rbac.sh new file mode 100755 index 00000000..4f2a1eb2 --- /dev/null +++ b/helm/atoma-proxy/scripts/fix-rbac.sh @@ -0,0 +1,50 @@ +#!/bin/bash + +set -e + +# Wait for API server to be ready +echo "Waiting for API server to be ready..." +until kubectl get nodes &>/dev/null; do + echo "Waiting for API server..." + sleep 5 +done + +# Create ClusterRole for namespace controller +echo "Creating ClusterRole..." +kubectl apply -f - --validate=false < Date: Fri, 16 May 2025 20:25:37 -0500 Subject: [PATCH 07/61] build: update secrets and values config --- helm/atoma-proxy/README.md | 17 +++++++++++++++- helm/atoma-proxy/templates/configmap.yaml | 16 ++++----------- helm/atoma-proxy/templates/secrets.yaml | 24 +++++++++++++++++++++++ helm/atoma-proxy/values.yaml | 7 +++++-- 4 files changed, 49 insertions(+), 15 deletions(-) create mode 100644 helm/atoma-proxy/templates/secrets.yaml diff --git a/helm/atoma-proxy/README.md b/helm/atoma-proxy/README.md index 7ea99dc3..bd920e48 100644 --- a/helm/atoma-proxy/README.md +++ b/helm/atoma-proxy/README.md @@ -360,4 +360,19 @@ This chart is licensed under the same license as the Atoma Proxy application. # Open http://localhost:9090/targets ``` - **Check for Atoma metrics in Prometheus/Grafana:** - - In Prometheus or Grafana, search for metrics with the prefix `atoma_`. \ No newline at end of file + - In Prometheus or Grafana, search for metrics with the prefix `atoma_`. + + **Create Grafana secrets** + ``` + kubectl create secret generic grafana-admin \ + --from-literal=admin-user=admin \ + --from-literal=admin-password=admin \ + -n atoma-proxy-dev + ``` + + **Create OpenRouter++ + ``` + kubectl create secret generic atoma-proxy-dev-open-router \ + --from-literal=open_router.json='{"api_key": "your-api-key"}' \ + -n atoma-proxy-dev + ``` \ No newline at end of file diff --git a/helm/atoma-proxy/templates/configmap.yaml b/helm/atoma-proxy/templates/configmap.yaml index d398f763..9fa8bf78 100644 --- a/helm/atoma-proxy/templates/configmap.yaml +++ b/helm/atoma-proxy/templates/configmap.yaml @@ -7,17 +7,9 @@ metadata: app.kubernetes.io/name: {{ .Release.Name }} app.kubernetes.io/instance: {{ .Release.Name }} data: - config.toml: | - # This will be populated from your existing config.toml - # You should replace this with your actual configuration - prometheus.yml: | - # This will be populated from your existing prometheus.yml - # You should replace this with your actual configuration - loki.yaml: | - # This will be populated from your existing loki.yaml - # You should replace this with your actual configuration - tempo.yaml: | - # This will be populated from your existing tempo.yaml - # You should replace this with your actual configuration + config.toml: {{ .Files.Get "../../config.toml" | nindent 4 }} + prometheus.yml: {{ .Files.Get "../../prometheus.yml" | nindent 4 }} + loki.yaml: {{ .Files.Get "../../loki.yaml" | nindent 4 }} + tempo.yaml: {{ .Files.Get "../../tempo.yaml" | nindent 4 }} environment: {{ .Values.atomaProxy.config.environment | quote }} log_level: {{ .Values.atomaProxy.config.logLevel | quote }} \ No newline at end of file diff --git a/helm/atoma-proxy/templates/secrets.yaml b/helm/atoma-proxy/templates/secrets.yaml new file mode 100644 index 00000000..83b16214 --- /dev/null +++ b/helm/atoma-proxy/templates/secrets.yaml @@ -0,0 +1,24 @@ +apiVersion: v1 +kind: Secret +metadata: + name: grafana-admin + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: {{ .Release.Name }} + app.kubernetes.io/instance: {{ .Release.Name }} +type: Opaque +data: + admin-user: {{ .Values.grafana.adminUser | default "admin" | b64enc }} + admin-password: {{ .Values.grafana.adminPassword | default "admin" | b64enc }} +--- +apiVersion: v1 +kind: Secret +metadata: + name: {{ .Release.Name }}-open-router + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: {{ .Release.Name }} + app.kubernetes.io/instance: {{ .Release.Name }} +type: Opaque +data: + open_router.json: {{ .Values.openRouter.apiKey | default "your-api-key" | toJson | b64enc }} \ No newline at end of file diff --git a/helm/atoma-proxy/values.yaml b/helm/atoma-proxy/values.yaml index 0f3994b4..68860cee 100644 --- a/helm/atoma-proxy/values.yaml +++ b/helm/atoma-proxy/values.yaml @@ -74,16 +74,19 @@ prometheus: size: 2Gi grafana: + adminUser: admin + adminPassword: admin enabled: true persistence: size: 10Gi - admin: - password: "" ingress: enabled: true hosts: - grafana.atoma.network +openRouter: + apiKey: your-api-key + loki: enabled: true persistence: From 753dbf20f09b76d3e8638f8f75bfd8dcdf564730 Mon Sep 17 00:00:00 2001 From: chad Date: Fri, 16 May 2025 20:35:54 -0500 Subject: [PATCH 08/61] build: update deployment --- helm/atoma-proxy/templates/deployment.yaml | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/helm/atoma-proxy/templates/deployment.yaml b/helm/atoma-proxy/templates/deployment.yaml index 4ed88338..632aaf39 100644 --- a/helm/atoma-proxy/templates/deployment.yaml +++ b/helm/atoma-proxy/templates/deployment.yaml @@ -45,6 +45,8 @@ spec: mountPath: /app/logs - name: data mountPath: /app/data + - name: sui-config + mountPath: /root/.sui/sui_config resources: {{- toYaml .Values.atomaProxy.resources | nindent 12 }} livenessProbe: @@ -69,4 +71,7 @@ spec: - name: logs emptyDir: {} - name: data - emptyDir: {} \ No newline at end of file + emptyDir: {} + - name: sui-config + secret: + secretName: {{ .Release.Name }}-sui-config \ No newline at end of file From b5eb45a58444675c3b0b08de5955e5b39e1e9325 Mon Sep 17 00:00:00 2001 From: chad Date: Fri, 16 May 2025 20:50:04 -0500 Subject: [PATCH 09/61] build: update file paths --- helm/atoma-proxy/README.md | 75 +++++++++++++++-------- helm/atoma-proxy/templates/configmap.yaml | 13 ++-- 2 files changed, 59 insertions(+), 29 deletions(-) diff --git a/helm/atoma-proxy/README.md b/helm/atoma-proxy/README.md index bd920e48..ac75ef43 100644 --- a/helm/atoma-proxy/README.md +++ b/helm/atoma-proxy/README.md @@ -68,8 +68,7 @@ This Helm chart deploys the Atoma Proxy application along with its dependencies, ### Quick Installation -Alternatively, you can use the deployment script: -```bash +Alternatively, you can use the deployment script:```bash ./scripts/deploy.sh -e dev -p your-password ``` @@ -189,12 +188,28 @@ Access the monitoring tools at: ## Security Considerations -1. **Passwords**: Always set secure passwords for production using: +1. **Secret Management**: Always use Helm for managing secrets instead of direct kubectl commands. This ensures consistent deployment and better security practices: + ```bash - --set postgresql.auth.password=your-secure-password \ - --set grafana.admin.password=your-secure-password + # Update secrets using Helm + helm upgrade atoma-proxy-dev . \ + -n atoma-proxy-dev \ + -f values-dev.yaml \ + --set postgresql.auth.password=your-secure-password \ + --set grafana.admin.password=your-secure-password \ + --set openRouter.apiKey=your-api-key + + # For production environments + helm upgrade atoma-proxy-prod . \ + -n atoma-proxy-prod \ + -f values-prod.yaml \ + --set postgresql.auth.password=your-secure-password \ + --set grafana.admin.password=your-secure-password \ + --set openRouter.apiKey=your-api-key ``` + > **Important**: Never store sensitive values in values files. Always use `--set` or `--set-file` flags with Helm commands to manage secrets. + 2. **SSL Certificates**: The chart uses cert-manager for SSL certificate management: - Development: `letsencrypt-staging` - Production: `letsencrypt-prod` @@ -224,19 +239,44 @@ Access the monitoring tools at: ## Maintenance -1. **Updating the Chart**: +1. **Updating the Chart and Secrets**: ```bash - helm repo update - helm upgrade atoma-proxy-{env} . -f values-{env}.yaml + # Update chart and secrets for development + helm upgrade atoma-proxy-dev . \ + -n atoma-proxy-dev \ + -f values-dev.yaml \ + --set postgresql.auth.password=your-secure-password \ + --set grafana.admin.password=your-secure-password + + # Update chart and secrets for production + helm upgrade atoma-proxy-prod . \ + -n atoma-proxy-prod \ + -f values-prod.yaml \ + --set postgresql.auth.password=your-secure-password \ + --set grafana.admin.password=your-secure-password ``` 2. **Backup**: - PostgreSQL data is stored in persistent volumes - Regular backups should be configured for production + - Use Helm to manage backup configurations: + ```bash + helm upgrade atoma-proxy-prod . \ + -n atoma-proxy-prod \ + -f values-prod.yaml \ + --set postgresql.backup.enabled=true \ + --set postgresql.backup.schedule="0 0 * * *" + ``` 3. **Scaling**: - - Adjust replicas in values file - - Scale horizontally: `kubectl scale deployment atoma-proxy-{env} --replicas=N` + - Use Helm to adjust replicas and resources: + ```bash + helm upgrade atoma-proxy-prod . \ + -n atoma-proxy-prod \ + -f values-prod.yaml \ + --set atomaProxy.replicas=3 \ + --set atomaProxy.resources.limits.memory=2Gi + ``` ## Contributing @@ -361,18 +401,3 @@ This chart is licensed under the same license as the Atoma Proxy application. ``` - **Check for Atoma metrics in Prometheus/Grafana:** - In Prometheus or Grafana, search for metrics with the prefix `atoma_`. - - **Create Grafana secrets** - ``` - kubectl create secret generic grafana-admin \ - --from-literal=admin-user=admin \ - --from-literal=admin-password=admin \ - -n atoma-proxy-dev - ``` - - **Create OpenRouter++ - ``` - kubectl create secret generic atoma-proxy-dev-open-router \ - --from-literal=open_router.json='{"api_key": "your-api-key"}' \ - -n atoma-proxy-dev - ``` \ No newline at end of file diff --git a/helm/atoma-proxy/templates/configmap.yaml b/helm/atoma-proxy/templates/configmap.yaml index 9fa8bf78..ea3ec122 100644 --- a/helm/atoma-proxy/templates/configmap.yaml +++ b/helm/atoma-proxy/templates/configmap.yaml @@ -1,3 +1,4 @@ +# helm/atoma-proxy/templates/configmap.yaml apiVersion: v1 kind: ConfigMap metadata: @@ -7,9 +8,13 @@ metadata: app.kubernetes.io/name: {{ .Release.Name }} app.kubernetes.io/instance: {{ .Release.Name }} data: - config.toml: {{ .Files.Get "../../config.toml" | nindent 4 }} - prometheus.yml: {{ .Files.Get "../../prometheus.yml" | nindent 4 }} - loki.yaml: {{ .Files.Get "../../loki.yaml" | nindent 4 }} - tempo.yaml: {{ .Files.Get "../../tempo.yaml" | nindent 4 }} + config.toml: |- +{{ .Files.Get "files/config.toml" | indent 4 }} + prometheus.yml: |- +{{ .Files.Get "files/prometheus.yml" | indent 4 }} + loki.yaml: |- +{{ .Files.Get "files/loki.yaml" | indent 4 }} + tempo.yaml: |- +{{ .Files.Get "files/tempo.yaml" | indent 4 }} environment: {{ .Values.atomaProxy.config.environment | quote }} log_level: {{ .Values.atomaProxy.config.logLevel | quote }} \ No newline at end of file From c5194589dce8748dbf50b610eb21d4eb7213e4e3 Mon Sep 17 00:00:00 2001 From: chad Date: Fri, 16 May 2025 20:54:20 -0500 Subject: [PATCH 10/61] build: update deployment --- helm/atoma-proxy/templates/deployment.yaml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/helm/atoma-proxy/templates/deployment.yaml b/helm/atoma-proxy/templates/deployment.yaml index 632aaf39..d99ef532 100644 --- a/helm/atoma-proxy/templates/deployment.yaml +++ b/helm/atoma-proxy/templates/deployment.yaml @@ -18,6 +18,13 @@ spec: app.kubernetes.io/name: {{ .Release.Name }} app.kubernetes.io/instance: {{ .Release.Name }} spec: + initContainers: + - name: init-sui-config + image: busybox + command: ["sh", "-c", "chmod -R 777 /root/.sui/sui_config"] + volumeMounts: + - name: sui-config + mountPath: /root/.sui/sui_config containers: - name: {{ .Chart.Name }} image: "{{ .Values.atomaProxy.image.repository }}:{{ .Values.atomaProxy.image.tag }}" From 6b6cdb6c40528862324dab7ece89f5a4d57c25eb Mon Sep 17 00:00:00 2001 From: chad Date: Fri, 16 May 2025 20:58:12 -0500 Subject: [PATCH 11/61] build: update build for sui config --- helm/atoma-proxy/templates/deployment.yaml | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/helm/atoma-proxy/templates/deployment.yaml b/helm/atoma-proxy/templates/deployment.yaml index d99ef532..bff31a85 100644 --- a/helm/atoma-proxy/templates/deployment.yaml +++ b/helm/atoma-proxy/templates/deployment.yaml @@ -18,13 +18,6 @@ spec: app.kubernetes.io/name: {{ .Release.Name }} app.kubernetes.io/instance: {{ .Release.Name }} spec: - initContainers: - - name: init-sui-config - image: busybox - command: ["sh", "-c", "chmod -R 777 /root/.sui/sui_config"] - volumeMounts: - - name: sui-config - mountPath: /root/.sui/sui_config containers: - name: {{ .Chart.Name }} image: "{{ .Values.atomaProxy.image.repository }}:{{ .Values.atomaProxy.image.tag }}" @@ -80,5 +73,6 @@ spec: - name: data emptyDir: {} - name: sui-config - secret: - secretName: {{ .Release.Name }}-sui-config \ No newline at end of file + hostPath: + path: ../../sui_config + type: Directory \ No newline at end of file From d9b6dbc624d9f6479ffe9468f1065fc62312e115 Mon Sep 17 00:00:00 2001 From: chad Date: Fri, 16 May 2025 20:59:57 -0500 Subject: [PATCH 12/61] build: update sui configmap --- helm/atoma-proxy/templates/deployment.yaml | 5 ++--- .../templates/sui-config-configmap.yaml | 14 ++++++++++++++ 2 files changed, 16 insertions(+), 3 deletions(-) create mode 100644 helm/atoma-proxy/templates/sui-config-configmap.yaml diff --git a/helm/atoma-proxy/templates/deployment.yaml b/helm/atoma-proxy/templates/deployment.yaml index bff31a85..73144867 100644 --- a/helm/atoma-proxy/templates/deployment.yaml +++ b/helm/atoma-proxy/templates/deployment.yaml @@ -73,6 +73,5 @@ spec: - name: data emptyDir: {} - name: sui-config - hostPath: - path: ../../sui_config - type: Directory \ No newline at end of file + configMap: + name: {{ .Release.Name }}-sui-config \ No newline at end of file diff --git a/helm/atoma-proxy/templates/sui-config-configmap.yaml b/helm/atoma-proxy/templates/sui-config-configmap.yaml new file mode 100644 index 00000000..568c2357 --- /dev/null +++ b/helm/atoma-proxy/templates/sui-config-configmap.yaml @@ -0,0 +1,14 @@ +# helm/atoma-proxy/templates/sui-config-configmap.yaml +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ .Release.Name }}-sui-config + namespace: {{ .Release.Namespace }} + labels: + app.kubernetes.io/name: {{ .Release.Name }} + app.kubernetes.io/instance: {{ .Release.Name }} +data: + client.yaml: |- +{{ .Files.Get "files/sui_config/client.yaml" | indent 4 }} + sui.keystore: |- +{{ .Files.Get "files/sui_config/sui.keystore" | indent 4 }} \ No newline at end of file From 963ac62b2bb66bf00d57d0e25201256e3fed7a30 Mon Sep 17 00:00:00 2001 From: Martin Stefcek <35243812+Cifko@users.noreply.github.com> Date: Mon, 19 May 2025 15:24:30 +0200 Subject: [PATCH 13/61] feat: split input output tokens (#477) * feat: split tokens into input/output * fix clippy * fix openapi.yml * address comments --------- Co-authored-by: Chad Nehemiah --- .../src/server/handlers/chat_completions.rs | 143 ++++++++++-------- .../src/server/handlers/completions.rs | 64 ++++---- atoma-proxy/src/server/handlers/embeddings.rs | 46 +++--- .../src/server/handlers/image_generations.rs | 42 +++-- atoma-proxy/src/server/handlers/mod.rs | 40 ++++- .../src/server/handlers/request_model.rs | 6 +- atoma-proxy/src/server/middleware.rs | 93 ++++++------ atoma-proxy/src/server/streamer.rs | 35 +++-- atoma-state/src/handlers.rs | 23 ++- .../20250515143255_split-computed-units.sql | 22 +++ atoma-state/src/state_manager.rs | 62 +++++--- atoma-state/src/types.rs | 18 ++- 12 files changed, 364 insertions(+), 230 deletions(-) create mode 100644 atoma-state/src/migrations/20250515143255_split-computed-units.sql diff --git a/atoma-proxy/src/server/handlers/chat_completions.rs b/atoma-proxy/src/server/handlers/chat_completions.rs index a0016459..2609cb06 100644 --- a/atoma-proxy/src/server/handlers/chat_completions.rs +++ b/atoma-proxy/src/server/handlers/chat_completions.rs @@ -108,6 +108,8 @@ const MESSAGES: &str = "messages"; )] pub struct ChatCompletionsOpenApi; +const MESSAGE_OVERHEAD_TOKENS: u64 = 3; + /// Create chat completions /// /// This function processes chat completion requests by determining whether to use streaming @@ -183,7 +185,8 @@ pub async fn chat_completions_create( update_state_manager( &state.state_manager_sender, stack_small_id, - metadata.max_total_num_compute_units as i64, + (metadata.num_input_tokens.unwrap_or_default() + metadata.max_output_tokens) + as i64, 0, &metadata.endpoint, )?; @@ -191,7 +194,9 @@ pub async fn chat_completions_create( update_state_manager_fiat( &state.state_manager_sender, metadata.user_id, - metadata.max_total_num_compute_units as i64, + metadata.num_input_tokens.unwrap_or_default() as i64, + 0, + metadata.max_output_tokens as i64, 0, metadata.price_per_million, metadata.model_name, @@ -278,8 +283,8 @@ async fn handle_chat_completions_request( metadata.user_id, headers, &payload, - metadata.num_input_tokens.map(|v| v as i64), - metadata.max_total_num_compute_units as i64, + metadata.num_input_tokens.unwrap_or_default() as i64, + metadata.max_output_tokens as i64, metadata.price_per_million, metadata.selected_stack_small_id, metadata.endpoint.clone(), @@ -293,7 +298,8 @@ async fn handle_chat_completions_request( metadata.user_id, headers, &payload, - metadata.max_total_num_compute_units as i64, + metadata.num_input_tokens.unwrap_or_default() as i64, + metadata.max_output_tokens as i64, metadata.price_per_million, metadata.selected_stack_small_id, metadata.endpoint.clone(), @@ -453,7 +459,8 @@ pub async fn confidential_chat_completions_create( update_state_manager( &state.state_manager_sender, stack_small_id, - metadata.max_total_num_compute_units as i64, + (metadata.num_input_tokens.unwrap_or_default() + metadata.max_output_tokens) + as i64, 0, &metadata.endpoint, )?; @@ -461,7 +468,9 @@ pub async fn confidential_chat_completions_create( update_state_manager_fiat( &state.state_manager_sender, metadata.user_id, - metadata.max_total_num_compute_units as i64, + metadata.num_input_tokens.unwrap_or_default() as i64, + 0, + metadata.max_output_tokens as i64, 0, metadata.price_per_million, metadata.model_name, @@ -569,7 +578,8 @@ async fn handle_non_streaming_response( user_id: i64, headers: HeaderMap, payload: &Value, - estimated_total_tokens: i64, + num_input_tokens: i64, + estimated_output_tokens: i64, price_per_million: i64, selected_stack_small_id: Option, endpoint: String, @@ -673,39 +683,36 @@ async fn handle_non_streaming_response( // NOTE: We need to update the stack num tokens, because the inference response might have produced // less tokens than estimated what we initially estimated, from the middleware. - match selected_stack_small_id { - Some(stack_small_id) => { - if let Err(e) = update_state_manager( - &state.state_manager_sender, - stack_small_id, - estimated_total_tokens, - total_tokens, - &endpoint, - ) { - return Err(AtomaProxyError::InternalError { - message: format!("Error updating state manager: {e:?}"), - client_message: None, - endpoint: endpoint.to_string(), - }); - } - } - None => { - if let Err(e) = update_state_manager_fiat( - &state.state_manager_sender, - user_id, - estimated_total_tokens, - total_tokens, - price_per_million, - model_name, - &endpoint, - ) { - return Err(AtomaProxyError::InternalError { - message: format!("Error updating fiat state manager: {e:?}"), - client_message: None, - endpoint: endpoint.to_string(), - }); - } + if let Some(stack_small_id) = selected_stack_small_id { + if let Err(e) = update_state_manager( + &state.state_manager_sender, + stack_small_id, + num_input_tokens + estimated_output_tokens, + total_tokens, + &endpoint, + ) { + return Err(AtomaProxyError::InternalError { + message: format!("Error updating state manager: {e:?}"), + client_message: None, + endpoint: endpoint.to_string(), + }); } + } else if let Err(e) = update_state_manager_fiat( + &state.state_manager_sender, + user_id, + num_input_tokens, + input_tokens, + estimated_output_tokens, + output_tokens, + price_per_million, + model_name, + &endpoint, + ) { + return Err(AtomaProxyError::InternalError { + message: format!("Error updating fiat state manager: {e:?}"), + client_message: None, + endpoint: endpoint.to_string(), + }); } CHAT_COMPLETIONS_LATENCY_METRICS.record( @@ -768,8 +775,8 @@ async fn handle_streaming_response( user_id: i64, mut headers: HeaderMap, payload: &Value, - num_input_tokens: Option, - estimated_total_tokens: i64, + num_input_tokens: i64, + estimated_output_tokens: i64, price_per_million: i64, selected_stack_small_id: Option, endpoint: String, @@ -821,8 +828,8 @@ async fn handle_streaming_response( stream, state_manager_sender, selected_stack_small_id, - num_input_tokens.unwrap_or(0), - estimated_total_tokens, + num_input_tokens, + estimated_output_tokens, price_per_million, start, user_id, @@ -967,7 +974,6 @@ impl RequestModel for RequestModelChatCompletions { ) -> Result { // In order to account for the possibility of not taking into account possible additional special tokens, // which might not be considered by the tokenizer, we add a small overhead to the total number of tokens, per message. - const MESSAGE_OVERHEAD_TOKENS: u64 = 3; let Some(tokenizer) = tokenizer else { return Err(AtomaProxyError::InternalError { client_message: Some("No available tokenizer found for current model, try again later or open a ticket".to_string()), @@ -1033,8 +1039,8 @@ impl RequestModel for RequestModelChatCompletions { } // add the max completion tokens, to account for the response Ok(ComputeUnitsEstimate { - num_input_compute_units: total_num_tokens, - max_total_compute_units: total_num_tokens + self.max_completion_tokens, + num_input_tokens: total_num_tokens, + max_output_tokens: self.max_completion_tokens, }) } } @@ -2060,9 +2066,11 @@ mod tests { max_completion_tokens: 10, }; let tokenizer = load_tokenizer().await; - let result = request.get_compute_units_estimate(Some(&tokenizer)); - assert!(result.is_ok()); - assert_eq!(result.unwrap().max_total_compute_units, 21); // 8 tokens + 3 overhead + 10 completion + let result = request + .get_compute_units_estimate(Some(&tokenizer)) + .unwrap(); + assert_eq!(result.num_input_tokens, 8 + MESSAGE_OVERHEAD_TOKENS); + assert_eq!(result.max_output_tokens, 10); // 10 completion } #[tokio::test] @@ -2082,9 +2090,11 @@ mod tests { max_completion_tokens: 10, }; let tokenizer = load_tokenizer().await; - let result = request.get_compute_units_estimate(Some(&tokenizer)); - assert!(result.is_ok()); - assert_eq!(result.unwrap().max_total_compute_units, 32); // (8+8) tokens + (3+3) overhead + 10 completion + let result = request + .get_compute_units_estimate(Some(&tokenizer)) + .unwrap(); + assert_eq!(result.num_input_tokens, (8 + MESSAGE_OVERHEAD_TOKENS) * 2); + assert_eq!(result.max_output_tokens, 10); } #[tokio::test] @@ -2108,9 +2118,11 @@ mod tests { }; let tokenizer = load_tokenizer().await; - let result = request.get_compute_units_estimate(Some(&tokenizer)); - assert!(result.is_ok()); - assert_eq!(result.unwrap().max_total_compute_units, 32); // (8+8) tokens (3 + 3) overhead + 10 completion + let result = request + .get_compute_units_estimate(Some(&tokenizer)) + .unwrap(); + assert_eq!(result.num_input_tokens, (8 + MESSAGE_OVERHEAD_TOKENS) * 2); + assert_eq!(result.max_output_tokens, 10); } #[tokio::test] @@ -2124,9 +2136,11 @@ mod tests { max_completion_tokens: 10, }; let tokenizer = load_tokenizer().await; - let result = request.get_compute_units_estimate(Some(&tokenizer)); - assert!(result.is_ok()); - assert_eq!(result.unwrap().max_total_compute_units, 14); // 1 tokens (special token) + 3 overhead + 10 completion + let result = request + .get_compute_units_estimate(Some(&tokenizer)) + .unwrap(); + assert_eq!(result.num_input_tokens, 1 + MESSAGE_OVERHEAD_TOKENS); // 1 tokens (special token) + overhead tokens + assert_eq!(result.max_output_tokens, 10); } #[tokio::test] @@ -2161,12 +2175,13 @@ mod tests { max_completion_tokens: 15, }; let tokenizer = load_tokenizer().await; - let result = request.get_compute_units_estimate(Some(&tokenizer)); - assert!(result.is_ok()); + let result = request + .get_compute_units_estimate(Some(&tokenizer)) + .unwrap(); // System message: tokens + 15 completion // User message array: (2 text parts tokens) + (15 * 2 for text completion for parts) - let tokens = result.unwrap(); - assert_eq!(tokens.max_total_compute_units, 48); // 3 * 8 + 3 * 3 overhead + 15 + assert_eq!(result.num_input_tokens, (8 + MESSAGE_OVERHEAD_TOKENS) * 3); + assert_eq!(result.max_output_tokens, 15); } #[tokio::test] @@ -2221,6 +2236,6 @@ mod tests { let result = request.get_compute_units_estimate(Some(&tokenizer)); assert!(result.is_ok()); let tokens = result.unwrap(); - assert!(tokens.max_total_compute_units > 13); // Should be more than minimum (3 overhead + 10 completion) + assert_eq!(tokens.max_output_tokens, 10); // Should be more than minimum (3 overhead + 10 completion) } } diff --git a/atoma-proxy/src/server/handlers/completions.rs b/atoma-proxy/src/server/handlers/completions.rs index bfb267ee..9f28644e 100644 --- a/atoma-proxy/src/server/handlers/completions.rs +++ b/atoma-proxy/src/server/handlers/completions.rs @@ -143,7 +143,8 @@ pub async fn completions_create( update_state_manager( &state.state_manager_sender, stack_small_id, - metadata.max_total_num_compute_units as i64, + (metadata.num_input_tokens.unwrap_or_default() + metadata.max_output_tokens) + as i64, 0, &metadata.endpoint, )?; @@ -151,7 +152,9 @@ pub async fn completions_create( update_state_manager_fiat( &state.state_manager_sender, metadata.user_id, - metadata.max_total_num_compute_units as i64, + metadata.num_input_tokens.unwrap_or_default() as i64, + 0, + metadata.max_output_tokens as i64, 0, metadata.price_per_million, metadata.model_name, @@ -221,8 +224,8 @@ async fn handle_completions_request( metadata.user_id, headers, &payload, - metadata.num_input_tokens.map(|v| v as i64), - metadata.max_total_num_compute_units as i64, + metadata.num_input_tokens.unwrap_or_default() as i64, + metadata.max_output_tokens as i64, metadata.price_per_million, metadata.selected_stack_small_id, metadata.endpoint.clone(), @@ -236,7 +239,8 @@ async fn handle_completions_request( metadata.user_id, headers, &payload, - metadata.max_total_num_compute_units as i64, + metadata.num_input_tokens.unwrap_or_default() as i64, + metadata.max_output_tokens as i64, metadata.price_per_million, metadata.selected_stack_small_id, metadata.endpoint.clone(), @@ -394,7 +398,8 @@ pub async fn confidential_completions_create( update_state_manager( &state.state_manager_sender, stack_small_id, - metadata.max_total_num_compute_units as i64, + (metadata.num_input_tokens.unwrap_or_default() + metadata.max_output_tokens) + as i64, 0, &metadata.endpoint, )?; @@ -402,7 +407,9 @@ pub async fn confidential_completions_create( update_state_manager_fiat( &state.state_manager_sender, metadata.user_id, - metadata.max_total_num_compute_units as i64, + metadata.num_input_tokens.unwrap_or_default() as i64, + 0, + metadata.max_output_tokens as i64, 0, metadata.price_per_million, metadata.model_name, @@ -463,7 +470,8 @@ pub fn confidential_completions_create_stream( /// * `user_id` - The ID of the user making the request /// * `headers` - HTTP request headers to forward to the inference service /// * `payload` - The JSON payload containing the chat completion request -/// * `estimated_total_tokens` - The estimated total number of tokens for the completion +/// * `num_input_tokens` - The number of input tokens +/// * `estimated_output_tokens` - The estimated total number of tokens for the completion /// * `fiat_estimated_amount` - The estimated amount in fiat currency for the completion /// * `selected_stack_small_id` - The ID of the stack small to update /// * `endpoint` - The endpoint to forward the request to @@ -491,7 +499,7 @@ pub fn confidential_completions_create_stream( path = endpoint, completion_type = "non-streaming", stack_small_id, - estimated_total_tokens, + estimated_total_tokens = num_input_tokens + estimated_output_tokens, payload_hash ) )] @@ -502,7 +510,8 @@ async fn handle_non_streaming_response( user_id: i64, headers: HeaderMap, payload: &Value, - estimated_total_tokens: i64, + num_input_tokens: i64, + estimated_output_tokens: i64, price_per_million: i64, selected_stack_small_id: Option, endpoint: String, @@ -611,7 +620,7 @@ async fn handle_non_streaming_response( if let Err(e) = update_state_manager( &state.state_manager_sender, stack_small_id, - estimated_total_tokens, + num_input_tokens + estimated_output_tokens, total_tokens, &endpoint, ) { @@ -626,7 +635,9 @@ async fn handle_non_streaming_response( if let Err(e) = update_state_manager_fiat( &state.state_manager_sender, user_id, - estimated_total_tokens, + num_input_tokens, + 0, + estimated_output_tokens, total_tokens, price_per_million, model_name, @@ -661,8 +672,7 @@ async fn handle_non_streaming_response( /// * `headers` - The headers of the request /// * `payload` - The payload of the request /// * `num_input_tokens` - The number of input tokens -/// * `estimated_total_tokens` - The estimated total tokens -/// * `fiat_estimated_amount` - The fiat estimated amount +/// * `estimated_output_tokens` - The estimated output tokens /// * `price_per_million` - The price per million /// * `selected_stack_small_id` - The selected stack small id /// * `endpoint` - The endpoint of the request @@ -685,7 +695,7 @@ async fn handle_non_streaming_response( path = endpoint, completion_type = "streaming", stack_small_id, - estimated_total_tokens, + estimated_total_tokens = num_input_tokens + estimated_output_tokens, payload_hash ) )] @@ -696,8 +706,8 @@ async fn handle_streaming_response( user_id: i64, mut headers: HeaderMap, payload: &Value, - num_input_tokens: Option, - estimated_total_tokens: i64, + num_input_tokens: i64, + estimated_output_tokens: i64, price_per_million: i64, selected_stack_small_id: Option, endpoint: String, @@ -749,8 +759,8 @@ async fn handle_streaming_response( stream, state_manager_sender, selected_stack_small_id, - num_input_tokens.unwrap_or(0), - estimated_total_tokens, + num_input_tokens, + estimated_output_tokens, price_per_million, start, user_id, @@ -920,8 +930,8 @@ impl RequestModel for RequestModelCompletions { } })?; Ok(ComputeUnitsEstimate { - num_input_compute_units, - max_total_compute_units: self.max_tokens, + num_input_tokens: num_input_compute_units, + max_output_tokens: self.max_tokens, }) } CompletionsPrompt::List(prompts) => { @@ -934,23 +944,23 @@ impl RequestModel for RequestModelCompletions { .map(|prompt| count_text_tokens(prompt, tokenizer).unwrap_or(0)) .sum(); Ok(ComputeUnitsEstimate { - num_input_compute_units, - max_total_compute_units: self.max_tokens, + num_input_tokens: num_input_compute_units, + max_output_tokens: self.max_tokens, }) } CompletionsPrompt::Tokens(tokens) => { let num_input_compute_units = tokens.len() as u64; Ok(ComputeUnitsEstimate { - num_input_compute_units, - max_total_compute_units: self.max_tokens, + num_input_tokens: num_input_compute_units, + max_output_tokens: self.max_tokens, }) } CompletionsPrompt::TokenArrays(token_arrays) => { let num_input_compute_units = token_arrays.iter().map(|tokens| tokens.len() as u64).sum(); Ok(ComputeUnitsEstimate { - num_input_compute_units, - max_total_compute_units: self.max_tokens, + num_input_tokens: num_input_compute_units, + max_output_tokens: self.max_tokens, }) } } diff --git a/atoma-proxy/src/server/handlers/embeddings.rs b/atoma-proxy/src/server/handlers/embeddings.rs index 5f3e3937..fbc1db86 100644 --- a/atoma-proxy/src/server/handlers/embeddings.rs +++ b/atoma-proxy/src/server/handlers/embeddings.rs @@ -125,8 +125,8 @@ impl RequestModel for RequestModelEmbeddings { .get_ids() .len() as u64; Ok(ComputeUnitsEstimate { - num_input_compute_units: num_tokens, - max_total_compute_units: num_tokens, + num_input_tokens: num_tokens, + max_output_tokens: 0, }) } } @@ -173,11 +173,12 @@ pub async fn embeddings_create( // TODO: We should allow cancelling the request if the client disconnects let RequestMetadataExtension { node_address, - max_total_num_compute_units: num_input_compute_units, + num_input_tokens, .. } = metadata; + let num_input_tokens = num_input_tokens.unwrap_or_default() as i64; EMBEDDING_TOTAL_TOKENS_PER_USER.add( - num_input_compute_units, + num_input_tokens as u64, &[KeyValue::new("user_id", metadata.user_id)], ); match handle_embeddings_response( @@ -185,7 +186,7 @@ pub async fn embeddings_create( node_address, headers, payload, - num_input_compute_units as i64, + num_input_tokens, metadata.endpoint.clone(), metadata.model_name.clone(), ) @@ -201,8 +202,8 @@ pub async fn embeddings_create( update_state_manager( &state.state_manager_sender, stack_small_id, - num_input_compute_units as i64, - num_input_compute_units as i64, + num_input_tokens, + num_input_tokens, &metadata.endpoint, )?; } @@ -210,8 +211,10 @@ pub async fn embeddings_create( update_state_manager_fiat( &state.state_manager_sender, metadata.user_id, - num_input_compute_units as i64, - num_input_compute_units as i64, + num_input_tokens, + num_input_tokens, + 0, + 0, metadata.price_per_million, metadata.model_name, &metadata.endpoint, @@ -231,7 +234,7 @@ pub async fn embeddings_create( update_state_manager( &state.state_manager_sender, stack_small_id, - num_input_compute_units as i64, + num_input_tokens, 0, &metadata.endpoint, )?; @@ -240,7 +243,9 @@ pub async fn embeddings_create( update_state_manager_fiat( &state.state_manager_sender, metadata.user_id, - num_input_compute_units as i64, + num_input_tokens, + 0, + 0, 0, metadata.price_per_million, metadata.model_name, @@ -311,11 +316,12 @@ pub async fn confidential_embeddings_create( // TODO: We should allow cancelling the request if the client disconnects let RequestMetadataExtension { node_address, - max_total_num_compute_units: num_input_compute_units, + num_input_tokens, .. } = metadata; + let num_input_tokens = num_input_tokens.unwrap_or_default() as i64; EMBEDDING_TOTAL_TOKENS_PER_USER.add( - num_input_compute_units, + num_input_tokens as u64, &[KeyValue::new("user_id", metadata.user_id)], ); match handle_embeddings_response( @@ -323,7 +329,7 @@ pub async fn confidential_embeddings_create( node_address, headers, payload, - num_input_compute_units as i64, + num_input_tokens, metadata.endpoint.clone(), metadata.model_name.clone(), ) @@ -343,7 +349,7 @@ pub async fn confidential_embeddings_create( update_state_manager( &state.state_manager_sender, stack_small_id, - num_input_compute_units as i64, + num_input_tokens, total_tokens, &metadata.endpoint, )?; @@ -352,8 +358,10 @@ pub async fn confidential_embeddings_create( update_state_manager_fiat( &state.state_manager_sender, metadata.user_id, - num_input_compute_units as i64, + num_input_tokens, total_tokens, + 0, + 0, metadata.price_per_million, metadata.model_name.clone(), &metadata.endpoint, @@ -377,7 +385,7 @@ pub async fn confidential_embeddings_create( update_state_manager( &state.state_manager_sender, stack_small_id, - num_input_compute_units as i64, + num_input_tokens, 0, &metadata.endpoint, )?; @@ -386,7 +394,9 @@ pub async fn confidential_embeddings_create( update_state_manager_fiat( &state.state_manager_sender, metadata.user_id, - num_input_compute_units as i64, + num_input_tokens, + 0, + 0, 0, metadata.price_per_million, metadata.model_name, diff --git a/atoma-proxy/src/server/handlers/image_generations.rs b/atoma-proxy/src/server/handlers/image_generations.rs index 76f95ac3..31dbbb80 100644 --- a/atoma-proxy/src/server/handlers/image_generations.rs +++ b/atoma-proxy/src/server/handlers/image_generations.rs @@ -126,8 +126,8 @@ impl RequestModel for RequestModelImageGenerations { // Calculate compute units based on number of images and pixel count Ok(ComputeUnitsEstimate { - num_input_compute_units: self.n * width * height, - max_total_compute_units: self.n * width * height, + num_input_tokens: 0, + max_output_tokens: self.n * width * height, }) } } @@ -169,7 +169,7 @@ pub async fn image_generations_create( let endpoint = metadata.endpoint.clone(); tokio::spawn(async move { IMAGE_GENERATION_TOTAL_TOKENS_PER_USER.add( - metadata.max_total_num_compute_units, + metadata.max_output_tokens, &[KeyValue::new("user_id", metadata.user_id)], ); // TODO: We should allow cancelling the request if the client disconnects @@ -178,7 +178,7 @@ pub async fn image_generations_create( metadata.node_address, headers, payload, - metadata.max_total_num_compute_units as i64, + metadata.max_output_tokens as i64, metadata.endpoint.clone(), metadata.model_name.clone(), ) @@ -192,8 +192,10 @@ pub async fn image_generations_create( update_state_manager( &state.state_manager_sender, stack_small_id, - metadata.max_total_num_compute_units as i64, - metadata.max_total_num_compute_units as i64, + (metadata.num_input_tokens.unwrap_or_default() + + metadata.max_output_tokens) as i64, + (metadata.num_input_tokens.unwrap_or_default() + + metadata.max_output_tokens) as i64, &metadata.endpoint, )?; } @@ -201,8 +203,10 @@ pub async fn image_generations_create( update_state_manager_fiat( &state.state_manager_sender, metadata.user_id, - metadata.max_total_num_compute_units as i64, - metadata.max_total_num_compute_units as i64, + metadata.num_input_tokens.unwrap_or_default() as i64, + metadata.num_input_tokens.unwrap_or_default() as i64, + metadata.max_output_tokens as i64, + metadata.max_output_tokens as i64, metadata.price_per_million, metadata.model_name, &metadata.endpoint, @@ -225,7 +229,8 @@ pub async fn image_generations_create( update_state_manager( &state.state_manager_sender, stack_small_id, - metadata.max_total_num_compute_units as i64, + (metadata.num_input_tokens.unwrap_or_default() + + metadata.max_output_tokens) as i64, 0, &metadata.endpoint, )?; @@ -234,7 +239,9 @@ pub async fn image_generations_create( update_state_manager_fiat( &state.state_manager_sender, metadata.user_id, - metadata.max_total_num_compute_units as i64, + metadata.num_input_tokens.unwrap_or_default() as i64, + 0, + metadata.max_output_tokens as i64, 0, metadata.price_per_million, metadata.model_name, @@ -303,7 +310,7 @@ pub async fn confidential_image_generations_create( endpoint: metadata.endpoint.clone(), })?; IMAGE_GENERATION_TOTAL_TOKENS_PER_USER.add( - metadata.max_total_num_compute_units, + metadata.num_input_tokens.unwrap_or_default() + metadata.max_output_tokens, &[KeyValue::new("user_id", metadata.user_id)], ); match handle_image_generation_response( @@ -311,7 +318,7 @@ pub async fn confidential_image_generations_create( metadata.node_address, headers, payload, - metadata.max_total_num_compute_units as i64, + metadata.max_output_tokens as i64, metadata.endpoint.clone(), metadata.model_name.clone(), ) @@ -339,7 +346,8 @@ pub async fn confidential_image_generations_create( update_state_manager( &state.state_manager_sender, stack_small_id, - metadata.max_total_num_compute_units as i64, + (metadata.num_input_tokens.unwrap_or_default() + + metadata.max_output_tokens) as i64, 0, &metadata.endpoint, )?; @@ -348,7 +356,9 @@ pub async fn confidential_image_generations_create( update_state_manager_fiat( &state.state_manager_sender, metadata.user_id, - metadata.max_total_num_compute_units as i64, + metadata.num_input_tokens.unwrap_or_default() as i64, + 0, + metadata.max_output_tokens as i64, 0, metadata.price_per_million, metadata.model_name, @@ -412,7 +422,7 @@ async fn handle_image_generation_response( node_address: String, headers: HeaderMap, payload: Value, - total_tokens: i64, + output_tokens: i64, endpoint: String, model_name: String, ) -> Result> { @@ -464,7 +474,7 @@ async fn handle_image_generation_response( timestamp: DateTime::::from(std::time::SystemTime::now()), model_name, input_tokens: 0, - output_tokens: total_tokens, + output_tokens, time: time.elapsed().as_secs_f64(), }, ) diff --git a/atoma-proxy/src/server/handlers/mod.rs b/atoma-proxy/src/server/handlers/mod.rs index 58be018c..d7d7aa4f 100644 --- a/atoma-proxy/src/server/handlers/mod.rs +++ b/atoma-proxy/src/server/handlers/mod.rs @@ -132,17 +132,20 @@ pub fn update_state_manager( skip_all, fields(user_id, estimated_amount, amount, endpoint) )] +#[allow(clippy::too_many_arguments)] pub fn update_state_manager_fiat( state_manager_sender: &Sender, user_id: i64, - estimated_amount: i64, - amount: i64, + estimated_input_tokens: i64, + input_tokens: i64, + estimated_output_tokens: i64, + output_tokens: i64, price_per_one_million_compute_units: i64, model_name: String, endpoint: &str, ) -> Result<()> { - let estimated_amount = i64::try_from( - estimated_amount as u128 * price_per_one_million_compute_units as u128 + let estimated_input_amount = i64::try_from( + estimated_input_tokens as u128 * price_per_one_million_compute_units as u128 / u128::from(ONE_MILLION), ) .map_err(|e| AtomaProxyError::InternalError { @@ -150,8 +153,27 @@ pub fn update_state_manager_fiat( client_message: None, endpoint: endpoint.to_string(), })?; - let amount = i64::try_from( - amount as u128 * price_per_one_million_compute_units as u128 / u128::from(ONE_MILLION), + let estimated_output_amount = i64::try_from( + estimated_output_tokens as u128 * price_per_one_million_compute_units as u128 + / u128::from(ONE_MILLION), + ) + .map_err(|e| AtomaProxyError::InternalError { + message: format!("Error converting estimated amount: {e}"), + client_message: None, + endpoint: endpoint.to_string(), + })?; + let input_amount = i64::try_from( + input_tokens as u128 * price_per_one_million_compute_units as u128 + / u128::from(ONE_MILLION), + ) + .map_err(|e| AtomaProxyError::InternalError { + message: format!("Error converting amount: {e}"), + client_message: None, + endpoint: endpoint.to_string(), + })?; + let output_amount = i64::try_from( + output_tokens as u128 * price_per_one_million_compute_units as u128 + / u128::from(ONE_MILLION), ) .map_err(|e| AtomaProxyError::InternalError { message: format!("Error converting amount: {e}"), @@ -162,8 +184,10 @@ pub fn update_state_manager_fiat( state_manager_sender .send(AtomaAtomaStateManagerEvent::UpdateStackNumTokensFiat { user_id, - estimated_amount, - amount, + estimated_input_amount, + input_amount, + estimated_output_amount, + output_amount, model_name, }) .map_err(|e| AtomaProxyError::InternalError { diff --git a/atoma-proxy/src/server/handlers/request_model.rs b/atoma-proxy/src/server/handlers/request_model.rs index 09f40e02..4bbb035d 100644 --- a/atoma-proxy/src/server/handlers/request_model.rs +++ b/atoma-proxy/src/server/handlers/request_model.rs @@ -7,9 +7,9 @@ use crate::server::Result; #[derive(Debug, Clone, PartialEq, Eq)] pub struct ComputeUnitsEstimate { /// The number of compute units needed for the input tokens. - pub num_input_compute_units: u64, - /// The maximum number of compute units that can be used for the request. - pub max_total_compute_units: u64, + pub num_input_tokens: u64, + /// The number of compute units needed for the output tokens. + pub max_output_tokens: u64, } /// A trait for parsing and handling AI model requests across different endpoints (chat, embeddings, images). diff --git a/atoma-proxy/src/server/middleware.rs b/atoma-proxy/src/server/middleware.rs index 5ab09c8c..3f6007fe 100644 --- a/atoma-proxy/src/server/middleware.rs +++ b/atoma-proxy/src/server/middleware.rs @@ -73,7 +73,7 @@ pub struct RequestMetadataExtension { /// Estimated compute units required for this request. /// This represents the total computational resources needed for both input and output processing. - pub max_total_num_compute_units: u64, + pub max_output_tokens: u64, /// The user id for this request. pub user_id: i64, @@ -124,22 +124,19 @@ impl RequestMetadataExtension { self } - /// Adds a num compute units to the request metadata. + /// Adds a max output tokens to the request metadata. /// /// This method is used to set the num compute units that will be used for the request. /// /// # Arguments /// - /// * `num_compute_units` - The num compute units to set + /// * `max_output_tokens` - The max output tokens to set /// /// # Returns /// /// Returns self with the num compute units field populated, enabling method chaining - pub const fn with_max_total_num_compute_units( - mut self, - max_total_num_compute_units: u64, - ) -> Self { - self.max_total_num_compute_units = max_total_num_compute_units; + pub const fn with_max_output_tokens(mut self, max_output_tokens: u64) -> Self { + self.max_output_tokens = max_output_tokens; self } @@ -316,8 +313,8 @@ pub async fn authenticate_middleware( tokio::spawn(async move { let StackMetadata { optional_stack, - num_input_compute_units, - max_total_compute_units, + num_input_tokens, + max_output_tokens, model, user_id, selected_node_id, @@ -354,7 +351,7 @@ pub async fn authenticate_middleware( model: &model, state: &state, optional_stack, - total_tokens: max_total_compute_units, + total_tokens: num_input_tokens + max_output_tokens, user_id, endpoint: &endpoint, }) @@ -377,8 +374,8 @@ pub async fn authenticate_middleware( &mut req_parts, selected_node_id, stack_small_id, - num_input_compute_units, - max_total_compute_units, + num_input_tokens, + max_output_tokens, price_per_million, tx_digest, user_id, @@ -391,7 +388,7 @@ pub async fn authenticate_middleware( update_state_manager( &state.state_manager_sender, stack_small_id, - max_total_compute_units as i64, + (num_input_tokens + max_output_tokens) as i64, 0, &endpoint, )?; @@ -406,8 +403,8 @@ pub async fn authenticate_middleware( &mut req_parts, selected_node_id, price_per_million, - num_input_compute_units, - max_total_compute_units, + num_input_tokens, + max_output_tokens, user_id, &endpoint, ) @@ -418,7 +415,9 @@ pub async fn authenticate_middleware( update_state_manager_fiat( &state.state_manager_sender, user_id, - max_total_compute_units as i64, + num_input_tokens as i64, + 0, + max_output_tokens as i64, 0, price_per_million, model, @@ -616,7 +615,7 @@ pub async fn confidential_compute_middleware( .with_node_address(node_address) .with_node_small_id(node_small_id) .with_stack_small_id(confidential_compute_request.stack_small_id as i64) - .with_max_total_num_compute_units(num_compute_units as u64) + .with_max_output_tokens(num_compute_units as u64) .with_user_id(user_id) .with_model_name(confidential_compute_request.model_name.clone()) .with_endpoint(endpoint); @@ -789,13 +788,14 @@ pub async fn handle_locked_stack_middleware( } // We need to acquire a new stack for the request, to be able to retry let user_id = request_metadata.user_id; - let max_total_num_compute_units = request_metadata.max_total_num_compute_units; + let num_input_tokens = request_metadata.num_input_tokens.unwrap_or_default(); + let max_output_tokens = request_metadata.max_output_tokens; // 1. Try to get a Stack from the state manager let maybe_stack = get_node_metadata_from_state_manager( &state, &request_metadata.model_name, user_id, - max_total_num_compute_units as i64, + (num_input_tokens + max_output_tokens) as i64, is_confidential_compute_endpoint(&endpoint), &endpoint, ) @@ -814,7 +814,7 @@ pub async fn handle_locked_stack_middleware( user_id, &request_metadata.model_name, &request_metadata.endpoint, - max_total_num_compute_units, + num_input_tokens + max_output_tokens, ) .await? } @@ -855,7 +855,7 @@ pub async fn handle_locked_stack_middleware( } })?, request_metadata.num_input_tokens.unwrap_or_default(), - max_total_num_compute_units, + max_output_tokens, selected_node_metadata.price_per_million, selected_node_metadata.tx_digest, user_id, @@ -874,7 +874,7 @@ pub async fn handle_locked_stack_middleware( endpoint: endpoint.to_string(), } })?, - max_total_num_compute_units as i64, + (num_input_tokens + max_output_tokens) as i64, 0, &endpoint, )?; @@ -939,10 +939,10 @@ pub mod auth { pub struct StackMetadata { /// The stack that was selected for the request. pub optional_stack: Option, - /// The number of input compute units for the request. - pub num_input_compute_units: u64, - /// The maximum total compute units for the request. - pub max_total_compute_units: u64, + /// The number of input tokens for the request. + pub num_input_tokens: u64, + /// The maximum number of output tokens for the request. + pub max_output_tokens: u64, /// The model that was selected for the request. pub model: String, /// The user ID that made the request. @@ -1160,8 +1160,8 @@ pub mod auth { // Retrieve the model and the appropriate tokenizer let model = request_model.get_model(); let ComputeUnitsEstimate { - num_input_compute_units, - max_total_compute_units, + num_input_tokens, + max_output_tokens, } = if [IMAGE_GENERATIONS_PATH, CONFIDENTIAL_IMAGE_GENERATIONS_PATH].contains(&endpoint) { request_model.get_compute_units_estimate(None)? } else { @@ -1181,14 +1181,17 @@ pub mod auth { let node = get_cheapest_node(state, &model, endpoint).await?; // We don't have a stack for the user, lets check if the user is using fiat currency. let (result_sender, result_receiver) = oneshot::channel(); - let fiat_locked_amount = max_total_compute_units as i64 + let fiat_locked_input_amount = + num_input_tokens as i64 * node.price_per_one_million_compute_units / ONE_MILLION as i64; + let fiat_locked_output_amount = max_output_tokens as i64 * node.price_per_one_million_compute_units / ONE_MILLION as i64; state .state_manager_sender .send(AtomaAtomaStateManagerEvent::LockUserFiatBalance { user_id, - amount: fiat_locked_amount, + input_amount: fiat_locked_input_amount, + output_amount: fiat_locked_output_amount, result_sender, }) .map_err(|err| AtomaProxyError::InternalError { @@ -1213,8 +1216,8 @@ pub mod auth { if locked_fiat { return Ok(StackMetadata { optional_stack: None, - num_input_compute_units, - max_total_compute_units, + num_input_tokens, + max_output_tokens, model, user_id, selected_node_id: node.node_small_id, @@ -1229,7 +1232,7 @@ pub mod auth { .state_manager_sender .send(AtomaAtomaStateManagerEvent::GetStacksForModel { model: model.to_string(), - free_compute_units: max_total_compute_units as i64, + free_compute_units: (num_input_tokens + max_output_tokens) as i64, user_id, is_confidential: false, // NOTE: This method is only used for non-confidential compute result_sender, @@ -1255,8 +1258,8 @@ pub mod auth { Ok(StackMetadata { optional_stack, - num_input_compute_units, - max_total_compute_units, + num_input_tokens, + max_output_tokens, model, user_id, selected_node_id: node.node_small_id, @@ -2271,7 +2274,8 @@ pub mod utils { /// * `req_parts` - Mutable reference to request parts for header modification /// * `selected_node_id` - ID of the node selected to process this request /// * `selected_stack_small_id` - ID of the stack allocated for this request - /// * `total_compute_units` - Total compute units required for this request + /// * `num_input_tokens` - Number of input tokens in the request + /// * `max_output_tokens` - Max number of output tokens required for this request /// * `tx_digest` - Optional transaction digest if a new stack was created /// * `user_id` - ID of the user making the request /// * `endpoint` - API endpoint path being accessed @@ -2323,7 +2327,7 @@ pub mod utils { /// * Model name #[instrument(level = "info", skip_all, fields( %endpoint, - %total_compute_units, + %max_output_tokens, %user_id ), err)] #[allow(clippy::too_many_arguments)] @@ -2334,7 +2338,7 @@ pub mod utils { selected_node_id: i64, selected_stack_small_id: i64, num_input_tokens: u64, - total_compute_units: u64, + max_output_tokens: u64, price_per_million: i64, tx_digest: Option, user_id: i64, @@ -2397,7 +2401,7 @@ pub mod utils { node_address, node_id: selected_node_id, num_input_tokens: Some(num_input_tokens), - max_total_num_compute_units: total_compute_units, + max_output_tokens, user_id, selected_stack_small_id: Some(selected_stack_small_id), price_per_million, @@ -2424,7 +2428,8 @@ pub mod utils { /// * `req_parts` - Mutable reference to request parts for header modification /// * `selected_node_id` - ID of the node selected to process this request /// * `fiat_estimated_amount` - The amount of fiat that was locked - /// * `total_compute_units` - Total compute units required for this request + /// * `num_input_tokens` - Number of input tokens in the request + /// * `max_output_tokens` - Max number of output tokens required for this request /// * `user_id` - ID of the user making the request /// * `endpoint` - API endpoint path being accessed /// @@ -2459,7 +2464,7 @@ pub mod utils { /// compute_units, /// user_id, /// "/v1/chat/completions" - /// ).await?; + /// ).await?;ple /// ``` /// /// # Request Metadata @@ -2482,7 +2487,7 @@ pub mod utils { selected_node_id: i64, price_per_million: i64, num_input_tokens: u64, - total_compute_units: u64, + max_output_tokens: u64, user_id: i64, endpoint: &str, ) -> Result> { @@ -2521,7 +2526,7 @@ pub mod utils { node_address, node_id: selected_node_id, num_input_tokens: Some(num_input_tokens), - max_total_num_compute_units: total_compute_units, + max_output_tokens, user_id, selected_stack_small_id: None, price_per_million, diff --git a/atoma-proxy/src/server/streamer.rs b/atoma-proxy/src/server/streamer.rs index d6996d5e..7bc3b848 100644 --- a/atoma-proxy/src/server/streamer.rs +++ b/atoma-proxy/src/server/streamer.rs @@ -57,8 +57,10 @@ pub struct Streamer { stream: Pin> + Send>>, /// Current status of the stream status: StreamStatus, - /// Estimated total tokens for the stream - estimated_total_tokens: i64, + /// Estimated input tokens for the stream + estimated_input_tokens: i64, + /// Estimated output tokens for the stream + estimated_output_tokens: i64, /// Price per million tokens for this request. price_per_million: i64, /// Stack small id @@ -114,8 +116,8 @@ impl Streamer { stream: impl Stream> + Send + 'static, state_manager_sender: Sender, stack_small_id: Option, - num_input_tokens: i64, - estimated_total_tokens: i64, + estimated_input_tokens: i64, + estimated_output_tokens: i64, price_per_million: i64, start: Instant, user_id: i64, @@ -125,7 +127,8 @@ impl Streamer { Self { stream: Box::pin(stream), status: StreamStatus::NotStarted, - estimated_total_tokens, + estimated_input_tokens, + estimated_output_tokens, stack_small_id, state_manager_sender, start, @@ -137,7 +140,7 @@ impl Streamer { first_token_generation_timer: Some(start), inter_stream_token_latency_timer: None, is_final_chunk_handled: false, - num_generated_tokens: num_input_tokens, + num_generated_tokens: 0, price_per_million, } } @@ -177,7 +180,7 @@ impl Streamer { skip(self, usage), fields( endpoint = "handle_final_chunk", - estimated_total_tokens = self.estimated_total_tokens, + estimated_total_tokens = self.estimated_input_tokens + self.estimated_output_tokens, ) )] fn handle_final_chunk( @@ -247,7 +250,7 @@ impl Streamer { if let Err(e) = update_state_manager( &self.state_manager_sender, stack_small_id, - self.estimated_total_tokens, + self.estimated_input_tokens + self.estimated_output_tokens, total_tokens, &self.endpoint, ) { @@ -266,8 +269,10 @@ impl Streamer { if let Err(e) = update_state_manager_fiat( &self.state_manager_sender, self.user_id, - self.estimated_total_tokens, - total_tokens, + self.estimated_input_tokens, + input_tokens, + self.estimated_output_tokens, + output_tokens, self.price_per_million, self.model_name.clone(), &self.endpoint, @@ -571,7 +576,7 @@ impl Drop for Streamer { fields( streamer = "drop-streamer", num_generated_tokens = self.num_generated_tokens, - estimated_total_tokens = self.estimated_total_tokens, + estimated_total_tokens = self.estimated_input_tokens + self.estimated_output_tokens, stack_small_id = self.stack_small_id, endpoint = self.endpoint, ) @@ -599,8 +604,8 @@ impl Drop for Streamer { if let Err(e) = update_state_manager( &self.state_manager_sender, stack_small_id, - self.estimated_total_tokens, - self.num_generated_tokens, + self.estimated_input_tokens + self.estimated_output_tokens, + self.estimated_input_tokens + self.num_generated_tokens, &self.endpoint, ) { error!( @@ -615,7 +620,9 @@ impl Drop for Streamer { if let Err(e) = update_state_manager_fiat( &self.state_manager_sender, self.user_id, - self.estimated_total_tokens, + self.estimated_input_tokens, + self.estimated_input_tokens, + self.estimated_output_tokens, self.num_generated_tokens, self.price_per_million, self.model_name.clone(), diff --git a/atoma-state/src/handlers.rs b/atoma-state/src/handlers.rs index e56f20c5..3b9ddee1 100644 --- a/atoma-state/src/handlers.rs +++ b/atoma-state/src/handlers.rs @@ -1431,12 +1431,13 @@ pub async fn handle_state_manager_event( } AtomaAtomaStateManagerEvent::LockUserFiatBalance { user_id, - amount, + input_amount, + output_amount, result_sender, } => { let result = state_manager .state - .lock_user_fiat_balance(user_id, amount) + .lock_user_fiat_balance(user_id, input_amount, output_amount) .await; result_sender .send(result) @@ -1445,17 +1446,25 @@ pub async fn handle_state_manager_event( AtomaAtomaStateManagerEvent::UpdateStackNumTokensFiat { user_id, model_name, - estimated_amount, - amount, + estimated_input_amount, + input_amount, + estimated_output_amount, + output_amount, } => { state_manager .state - .update_real_amount_fiat_balance(user_id, estimated_amount, amount) + .update_real_amount_fiat_balance( + user_id, + estimated_input_amount, + input_amount, + estimated_output_amount, + output_amount, + ) .await?; - if amount > 0 { + if output_amount > 0 { state_manager .state - .update_usage_per_model(user_id, model_name, amount) + .update_usage_per_model(user_id, model_name, input_amount, output_amount) .await?; } } diff --git a/atoma-state/src/migrations/20250515143255_split-computed-units.sql b/atoma-state/src/migrations/20250515143255_split-computed-units.sql new file mode 100644 index 00000000..c9685b03 --- /dev/null +++ b/atoma-state/src/migrations/20250515143255_split-computed-units.sql @@ -0,0 +1,22 @@ +BEGIN; + +ALTER TABLE fiat_balance +RENAME COLUMN already_debited_amount TO already_debited_completions_amount; + +ALTER TABLE fiat_balance +RENAME COLUMN overcharged_unsettled_amount TO overcharged_unsettled_completions_amount; + +ALTER TABLE fiat_balance +ADD COLUMN already_debited_input_amount BIGINT NOT NULL DEFAULT 0, +ADD COLUMN overcharged_unsettled_input_amount BIGINT NOT NULL DEFAULT 0; + +ALTER TABLE usage_per_model +RENAME COLUMN total_number_processed_tokens TO total_input_tokens; + +ALTER TABLE usage_per_model +ADD COLUMN total_output_tokens BIGINT NOT NULL DEFAULT 0; + +ALTER TABLE IF EXISTS fiat_balance +RENAME TO fiat_balances; + +COMMIT; diff --git a/atoma-state/src/state_manager.rs b/atoma-state/src/state_manager.rs index 2bc3b84f..ea6961bd 100644 --- a/atoma-state/src/state_manager.rs +++ b/atoma-state/src/state_manager.rs @@ -4420,13 +4420,23 @@ impl AtomaState { /// This function will return an error if: /// /// - The database query fails to execute. - #[instrument(level = "trace", skip(self),fields(%user_id, %amount))] - pub async fn lock_user_fiat_balance(&self, user_id: i64, amount: i64) -> Result { + #[instrument(level = "trace", skip(self))] + pub async fn lock_user_fiat_balance( + &self, + user_id: i64, + input_amount: i64, + output_amount: i64, + ) -> Result { let result = sqlx::query( - "UPDATE fiat_balance SET overcharged_unsettled_amount = overcharged_unsettled_amount + $2 WHERE user_id = $1 AND usd_balance >= already_debited_amount + overcharged_unsettled_amount + $1", + "UPDATE fiat_balances + SET overcharged_unsettled_input_amount = overcharged_unsettled_input_amount + $2, + overcharged_unsettled_completions_amount = overcharged_unsettled_completions_amount + $3 + WHERE user_id = $1 AND + usd_balance >= already_debited_completions_amount + already_debited_input_amount + overcharged_unsettled_completions_amount + overcharged_unsettled_input_amount + $2 + $3", ) .bind(user_id) - .bind(amount) + .bind(input_amount) + .bind(output_amount) .execute(&self.db) .await?; @@ -4452,28 +4462,31 @@ impl AtomaState { /// /// This function will return an error if: /// - The database query fails to execute. - #[instrument( - level = "trace", - skip_all, - fields(%user_id, %estimated_amount, %amount) - )] + #[instrument(level = "trace", skip(self))] pub async fn update_real_amount_fiat_balance( &self, user_id: i64, - estimated_amount: i64, - amount: i64, + estimated_input_amount: i64, + input_amount: i64, + estimated_output_amount: i64, + output_amount: i64, ) -> Result<()> { let result = sqlx::query( - "UPDATE fiat_balance - SET already_debited_amount = already_debited_amount + $3, - overcharged_unsettled_amount = overcharged_unsettled_amount - $2, - num_requests = num_requests + $4 + "UPDATE fiat_balances + SET + overcharged_unsettled_input_amount = overcharged_unsettled_input_amount - $2, + already_debited_input_amount = already_debited_input_amount + $3, + overcharged_unsettled_completions_amount = overcharged_unsettled_completions_amount - $4, + already_debited_completions_amount = already_debited_completions_amount + $5, + num_requests = num_requests + $6 WHERE user_id = $1", ) .bind(user_id) - .bind(estimated_amount) - .bind(amount) - .bind(i64::from(amount > 0)) // If total amount is greater than 0 then the request was successful + .bind(estimated_input_amount) + .bind(input_amount) + .bind(estimated_output_amount) + .bind(output_amount) + .bind(i64::from(output_amount > 0)) // If total amount is greater than 0 then the request was successful .execute(&self.db) .await?; @@ -4517,17 +4530,20 @@ impl AtomaState { &self, user_id: i64, model_name: String, - total_tokens: i64, + input_tokens: i64, + output_tokens: i64, ) -> Result<()> { sqlx::query( - "INSERT INTO usage_per_model (user_id, model, total_number_processed_tokens) - VALUES ($1, $2, $3) + "INSERT INTO usage_per_model (user_id, model, total_input_tokens, total_output_tokens) + VALUES ($1, $2, $3, $4) ON CONFLICT (user_id, model) DO UPDATE SET - total_number_processed_tokens = usage_per_model.total_number_processed_tokens + EXCLUDED.total_number_processed_tokens", + total_input_tokens = usage_per_model.total_input_tokens + EXCLUDED.total_input_tokens, + total_output_tokens = usage_per_model.total_output_tokens + EXCLUDED.total_output_tokens", ) .bind(user_id) .bind(model_name) - .bind(total_tokens) + .bind(input_tokens) + .bind(output_tokens) .execute(&self.db) .await?; Ok(()) diff --git a/atoma-state/src/types.rs b/atoma-state/src/types.rs index 1103e368..1cd9cbfd 100644 --- a/atoma-state/src/types.rs +++ b/atoma-state/src/types.rs @@ -902,8 +902,10 @@ pub enum AtomaAtomaStateManagerEvent { LockUserFiatBalance { /// The user ID user_id: i64, - /// The amount to lock - amount: i64, + /// The amount to lock for input + input_amount: i64, + /// The amount to lock for output + output_amount: i64, /// The result sender to send back the success status result_sender: oneshot::Sender>, }, @@ -912,9 +914,13 @@ pub enum AtomaAtomaStateManagerEvent { user_id: i64, /// Model name model_name: String, - /// The original estimated amount - estimated_amount: i64, - /// The actual amount - amount: i64, + /// The original estimated input amount + estimated_input_amount: i64, + /// The actual input amount + input_amount: i64, + /// The original estimated output amount + estimated_output_amount: i64, + /// The actual output amount + output_amount: i64, }, } From 55848aec6825cf90cf61cbb0b80ffc33c555a549 Mon Sep 17 00:00:00 2001 From: chad Date: Mon, 19 May 2025 11:35:42 -0500 Subject: [PATCH 14/61] build: update sui config --- helm/atoma-proxy/templates/deployment.yaml | 24 ++++++++++++++++--- .../templates/sui-config-configmap.yaml | 4 +++- 2 files changed, 24 insertions(+), 4 deletions(-) diff --git a/helm/atoma-proxy/templates/deployment.yaml b/helm/atoma-proxy/templates/deployment.yaml index 73144867..f11095b4 100644 --- a/helm/atoma-proxy/templates/deployment.yaml +++ b/helm/atoma-proxy/templates/deployment.yaml @@ -18,6 +18,21 @@ spec: app.kubernetes.io/name: {{ .Release.Name }} app.kubernetes.io/instance: {{ .Release.Name }} spec: + initContainers: + - name: init-sui-config + image: busybox + command: + - sh + - -c + - | + mkdir -p /root/.sui/sui_config + cp /tmp-sui-config/* /root/.sui/sui_config/ + chmod -R 777 /root/.sui/sui_config + volumeMounts: + - name: tmp-sui-config + mountPath: /tmp-sui-config + - name: sui-config-volume + mountPath: /root/.sui/sui_config containers: - name: {{ .Chart.Name }} image: "{{ .Values.atomaProxy.image.repository }}:{{ .Values.atomaProxy.image.tag }}" @@ -45,7 +60,7 @@ spec: mountPath: /app/logs - name: data mountPath: /app/data - - name: sui-config + - name: sui-config-volume mountPath: /root/.sui/sui_config resources: {{- toYaml .Values.atomaProxy.resources | nindent 12 }} @@ -72,6 +87,9 @@ spec: emptyDir: {} - name: data emptyDir: {} - - name: sui-config + - name: tmp-sui-config configMap: - name: {{ .Release.Name }}-sui-config \ No newline at end of file + name: {{ .Release.Name }}-sui-config + defaultMode: 0644 + - name: sui-config-volume + emptyDir: {} \ No newline at end of file diff --git a/helm/atoma-proxy/templates/sui-config-configmap.yaml b/helm/atoma-proxy/templates/sui-config-configmap.yaml index 568c2357..8a24c866 100644 --- a/helm/atoma-proxy/templates/sui-config-configmap.yaml +++ b/helm/atoma-proxy/templates/sui-config-configmap.yaml @@ -11,4 +11,6 @@ data: client.yaml: |- {{ .Files.Get "files/sui_config/client.yaml" | indent 4 }} sui.keystore: |- -{{ .Files.Get "files/sui_config/sui.keystore" | indent 4 }} \ No newline at end of file +{{ .Files.Get "files/sui_config/sui.keystore" | indent 4 }} + sui.aliases: |- +{{ .Files.Get "files/sui_config/sui.aliases" | indent 4 }} \ No newline at end of file From 16219adcb3456f1fee1484111274389b04b5e1c1 Mon Sep 17 00:00:00 2001 From: chad Date: Mon, 19 May 2025 11:41:28 -0500 Subject: [PATCH 15/61] build: use temp mount for config --- helm/atoma-proxy/templates/deployment.yaml | 1 + 1 file changed, 1 insertion(+) diff --git a/helm/atoma-proxy/templates/deployment.yaml b/helm/atoma-proxy/templates/deployment.yaml index f11095b4..ffd43119 100644 --- a/helm/atoma-proxy/templates/deployment.yaml +++ b/helm/atoma-proxy/templates/deployment.yaml @@ -31,6 +31,7 @@ spec: volumeMounts: - name: tmp-sui-config mountPath: /tmp-sui-config + readOnly: true - name: sui-config-volume mountPath: /root/.sui/sui_config containers: From b5da0fefd43d873e7f80dedbfb3aab10fbf2efbe Mon Sep 17 00:00:00 2001 From: chad Date: Mon, 19 May 2025 11:57:42 -0500 Subject: [PATCH 16/61] build: increase resource limits --- helm/atoma-proxy/values-dev.yaml | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/helm/atoma-proxy/values-dev.yaml b/helm/atoma-proxy/values-dev.yaml index a090cbba..7790c083 100644 --- a/helm/atoma-proxy/values-dev.yaml +++ b/helm/atoma-proxy/values-dev.yaml @@ -10,12 +10,12 @@ atomaProxy: pullPolicy: Always replicas: 1 resources: - requests: - memory: "256Mi" - cpu: "100m" limits: - memory: "512Mi" - cpu: "200m" + cpu: 500m + memory: 1Gi + requests: + cpu: 200m + memory: 512Mi ingress: enabled: true className: "nginx" From 8418c781b64e8bb73ef24785e1acd455713f9408 Mon Sep 17 00:00:00 2001 From: chad Date: Mon, 19 May 2025 12:51:15 -0500 Subject: [PATCH 17/61] chore: update prometheus config --- helm/atoma-proxy/values-dev.yaml | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/helm/atoma-proxy/values-dev.yaml b/helm/atoma-proxy/values-dev.yaml index 7790c083..cee43c53 100644 --- a/helm/atoma-proxy/values-dev.yaml +++ b/helm/atoma-proxy/values-dev.yaml @@ -64,6 +64,16 @@ prometheus: server: persistentVolume: size: 5Gi + ingress: + enabled: true + className: "nginx" + annotations: + cert-manager.io/cluster-issuer: "letsencrypt-staging" + nginx.ingress.kubernetes.io/ssl-redirect: "true" + hosts: + - prometheus-dev.atoma.network + service: + port: 9090 alertmanager: persistentVolume: size: 1Gi From b742b692a2e2bf21cd0cddbef2315306d052ebec Mon Sep 17 00:00:00 2001 From: chad Date: Mon, 19 May 2025 13:07:11 -0500 Subject: [PATCH 18/61] chore: use load balancer for prometheus --- helm/atoma-proxy/values-dev.yaml | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/helm/atoma-proxy/values-dev.yaml b/helm/atoma-proxy/values-dev.yaml index cee43c53..91a1d453 100644 --- a/helm/atoma-proxy/values-dev.yaml +++ b/helm/atoma-proxy/values-dev.yaml @@ -72,11 +72,10 @@ prometheus: nginx.ingress.kubernetes.io/ssl-redirect: "true" hosts: - prometheus-dev.atoma.network - service: - port: 9090 - alertmanager: - persistentVolume: - size: 1Gi + service: + type: LoadBalancer + port: 9090 + targetPort: 9090 grafana: enabled: true From a6ed6af01e92192f7cd188ba64d0022cf821384b Mon Sep 17 00:00:00 2001 From: chad Date: Mon, 19 May 2025 13:38:00 -0500 Subject: [PATCH 19/61] build: use nodeport for public access --- helm/atoma-proxy/values-dev.yaml | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/helm/atoma-proxy/values-dev.yaml b/helm/atoma-proxy/values-dev.yaml index 91a1d453..798e79af 100644 --- a/helm/atoma-proxy/values-dev.yaml +++ b/helm/atoma-proxy/values-dev.yaml @@ -73,8 +73,9 @@ prometheus: hosts: - prometheus-dev.atoma.network service: - type: LoadBalancer - port: 9090 + type: NodePort + nodePort: 30259 # Or omit to let Kubernetes assign + port: 80 targetPort: 9090 grafana: From f481043c1a3f09d6f6c7b7bde6feb6dc87b35ba2 Mon Sep 17 00:00:00 2001 From: chad Date: Mon, 19 May 2025 14:17:49 -0500 Subject: [PATCH 20/61] chore: update prometheus config --- helm/atoma-proxy/values-dev.yaml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/helm/atoma-proxy/values-dev.yaml b/helm/atoma-proxy/values-dev.yaml index 798e79af..91a1d453 100644 --- a/helm/atoma-proxy/values-dev.yaml +++ b/helm/atoma-proxy/values-dev.yaml @@ -73,9 +73,8 @@ prometheus: hosts: - prometheus-dev.atoma.network service: - type: NodePort - nodePort: 30259 # Or omit to let Kubernetes assign - port: 80 + type: LoadBalancer + port: 9090 targetPort: 9090 grafana: From d950c5290847b3df037cb2ab169c7fce1103d729 Mon Sep 17 00:00:00 2001 From: chad Date: Mon, 19 May 2025 14:28:55 -0500 Subject: [PATCH 21/61] build: add metallb configs --- helm/infrastructure/metallb-config.yaml | 15 ++++++ helm/infrastructure/metallb-install.yaml | 68 ++++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 helm/infrastructure/metallb-config.yaml create mode 100644 helm/infrastructure/metallb-install.yaml diff --git a/helm/infrastructure/metallb-config.yaml b/helm/infrastructure/metallb-config.yaml new file mode 100644 index 00000000..b327a46b --- /dev/null +++ b/helm/infrastructure/metallb-config.yaml @@ -0,0 +1,15 @@ +# metallb-config.yaml +apiVersion: metallb.io/v1beta1 +kind: IPAddressPool +metadata: + name: public-ips + namespace: metallb-system +spec: + addresses: + - 35.87.207.130/32 # Use your public IP or a range (e.g., 35.87.207.130-35.87.207.140) +--- +apiVersion: metallb.io/v1beta1 +kind: L2Advertisement +metadata: + name: l2 + namespace: metallb-system diff --git a/helm/infrastructure/metallb-install.yaml b/helm/infrastructure/metallb-install.yaml new file mode 100644 index 00000000..8235a01f --- /dev/null +++ b/helm/infrastructure/metallb-install.yaml @@ -0,0 +1,68 @@ +# metallb-install.yaml +# Installs MetalLB in native mode (recommended for k3s and modern clusters) +apiVersion: v1 +kind: Namespace +metadata: + name: metallb-system +--- +# Install MetalLB components (controller and speaker) +# You can always get the latest from: https://metallb.universe.tf/installation/ +# This is for v0.14.5 (as of June 2024) +apiVersion: apps/v1 +kind: Deployment +metadata: + name: controller + namespace: metallb-system +spec: + selector: + matchLabels: + app: metallb + component: controller + replicas: 1 + template: + metadata: + labels: + app: metallb + component: controller + spec: + containers: + - name: controller + image: quay.io/metallb/controller:v0.14.5 + args: + - controller + resources: + requests: + cpu: 100m + memory: 100Mi + limits: + cpu: 200m + memory: 200Mi +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: speaker + namespace: metallb-system +spec: + selector: + matchLabels: + app: metallb + component: speaker + template: + metadata: + labels: + app: metallb + component: speaker + spec: + containers: + - name: speaker + image: quay.io/metallb/speaker:v0.14.5 + args: + - speaker + resources: + requests: + cpu: 100m + memory: 100Mi + limits: + cpu: 200m + memory: 200Mi From ec9416a5e4596ba7ad77f12e12082c069178b5ce Mon Sep 17 00:00:00 2001 From: chad Date: Mon, 19 May 2025 15:10:40 -0500 Subject: [PATCH 22/61] build: use elastic associated private IP --- helm/infrastructure/metallb-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helm/infrastructure/metallb-config.yaml b/helm/infrastructure/metallb-config.yaml index b327a46b..c648822d 100644 --- a/helm/infrastructure/metallb-config.yaml +++ b/helm/infrastructure/metallb-config.yaml @@ -6,7 +6,7 @@ metadata: namespace: metallb-system spec: addresses: - - 35.87.207.130/32 # Use your public IP or a range (e.g., 35.87.207.130-35.87.207.140) + - 10.0.235.50/32 # private ip --- apiVersion: metallb.io/v1beta1 kind: L2Advertisement From 27d9beb5bccdea657eccb9d4cd53a81bc485310e Mon Sep 17 00:00:00 2001 From: chad Date: Mon, 19 May 2025 15:20:08 -0500 Subject: [PATCH 23/61] chore: update private ip --- helm/infrastructure/metallb-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helm/infrastructure/metallb-config.yaml b/helm/infrastructure/metallb-config.yaml index c648822d..fab739bd 100644 --- a/helm/infrastructure/metallb-config.yaml +++ b/helm/infrastructure/metallb-config.yaml @@ -6,7 +6,7 @@ metadata: namespace: metallb-system spec: addresses: - - 10.0.235.50/32 # private ip + - 10.0.235.51/32 # private ip --- apiVersion: metallb.io/v1beta1 kind: L2Advertisement From 321c4c9ea6d3c3a2cc8f08ff2f5a32cdcc714e32 Mon Sep 17 00:00:00 2001 From: chad Date: Mon, 19 May 2025 17:22:10 -0500 Subject: [PATCH 24/61] chore: update metallb config --- helm/infrastructure/metallb-config.yaml | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/helm/infrastructure/metallb-config.yaml b/helm/infrastructure/metallb-config.yaml index fab739bd..85c514b4 100644 --- a/helm/infrastructure/metallb-config.yaml +++ b/helm/infrastructure/metallb-config.yaml @@ -1,12 +1,20 @@ -# metallb-config.yaml apiVersion: metallb.io/v1beta1 kind: IPAddressPool metadata: - name: public-ips + name: traefik-pool namespace: metallb-system spec: addresses: - - 10.0.235.51/32 # private ip + - 10.0.235.50/32 +--- +apiVersion: metallb.io/v1beta1 +kind: IPAddressPool +metadata: + name: prometheus-pool + namespace: metallb-system +spec: + addresses: + - 10.0.235.51/32 --- apiVersion: metallb.io/v1beta1 kind: L2Advertisement From aeaa90f47930f742d61b5974d55e411f95cc2f68 Mon Sep 17 00:00:00 2001 From: chad Date: Mon, 19 May 2025 17:33:31 -0500 Subject: [PATCH 25/61] build: update prometheus service port key --- helm/atoma-proxy/values-dev.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helm/atoma-proxy/values-dev.yaml b/helm/atoma-proxy/values-dev.yaml index 91a1d453..4d9df455 100644 --- a/helm/atoma-proxy/values-dev.yaml +++ b/helm/atoma-proxy/values-dev.yaml @@ -74,7 +74,7 @@ prometheus: - prometheus-dev.atoma.network service: type: LoadBalancer - port: 9090 + servicePort: 9090 targetPort: 9090 grafana: From b23dd4a75c2d37368e053fd7e8440ff03be3a671 Mon Sep 17 00:00:00 2001 From: chad Date: Mon, 19 May 2025 17:36:58 -0500 Subject: [PATCH 26/61] chore: add pool for Grafana --- helm/atoma-proxy/values-dev.yaml | 4 ++++ helm/infrastructure/metallb-config.yaml | 9 +++++++++ 2 files changed, 13 insertions(+) diff --git a/helm/atoma-proxy/values-dev.yaml b/helm/atoma-proxy/values-dev.yaml index 4d9df455..5cee3452 100644 --- a/helm/atoma-proxy/values-dev.yaml +++ b/helm/atoma-proxy/values-dev.yaml @@ -91,6 +91,10 @@ grafana: enabled: true hosts: - grafana-dev.atoma.network + service: + type: LoadBalancer + port: 3000 + targetPort: 3000 loki: enabled: false # Temporarily disable Loki diff --git a/helm/infrastructure/metallb-config.yaml b/helm/infrastructure/metallb-config.yaml index 85c514b4..0f93290f 100644 --- a/helm/infrastructure/metallb-config.yaml +++ b/helm/infrastructure/metallb-config.yaml @@ -21,3 +21,12 @@ kind: L2Advertisement metadata: name: l2 namespace: metallb-system +--- +apiVersion: metallb.io/v1beta1 +kind: IPAddressPool +metadata: + name: grafana-pool + namespace: metallb-system +spec: + addresses: + - 10.0.235.52/32 From 3f26a21b292e8d9fb7d86358c5d64a0d0439a2b8 Mon Sep 17 00:00:00 2001 From: chad Date: Mon, 19 May 2025 17:41:18 -0500 Subject: [PATCH 27/61] chore: add annotations --- helm/atoma-proxy/values-dev.yaml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/helm/atoma-proxy/values-dev.yaml b/helm/atoma-proxy/values-dev.yaml index 5cee3452..91549076 100644 --- a/helm/atoma-proxy/values-dev.yaml +++ b/helm/atoma-proxy/values-dev.yaml @@ -95,6 +95,8 @@ grafana: type: LoadBalancer port: 3000 targetPort: 3000 + annotations: + metallb.universe.tf/address-pool: grafana-pool loki: enabled: false # Temporarily disable Loki From 982dee9b457343fc9bcb4728aea2a9075a0819f8 Mon Sep 17 00:00:00 2001 From: chad Date: Mon, 19 May 2025 17:49:57 -0500 Subject: [PATCH 28/61] chore: format grafana correctly --- helm/atoma-proxy/values-dev.yaml | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/helm/atoma-proxy/values-dev.yaml b/helm/atoma-proxy/values-dev.yaml index 91549076..ed36c534 100644 --- a/helm/atoma-proxy/values-dev.yaml +++ b/helm/atoma-proxy/values-dev.yaml @@ -91,12 +91,12 @@ grafana: enabled: true hosts: - grafana-dev.atoma.network - service: - type: LoadBalancer - port: 3000 - targetPort: 3000 - annotations: - metallb.universe.tf/address-pool: grafana-pool + service: + type: LoadBalancer + port: 3000 + targetPort: 3000 + annotations: + metallb.universe.tf/address-pool: grafana-pool loki: enabled: false # Temporarily disable Loki From 53cfb12f0d4a0973d02f20b0e2566190de23609f Mon Sep 17 00:00:00 2001 From: chad Date: Tue, 20 May 2025 15:53:27 -0500 Subject: [PATCH 29/61] build: enable loki --- helm/atoma-proxy/values-dev.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helm/atoma-proxy/values-dev.yaml b/helm/atoma-proxy/values-dev.yaml index ed36c534..3c5a6718 100644 --- a/helm/atoma-proxy/values-dev.yaml +++ b/helm/atoma-proxy/values-dev.yaml @@ -99,7 +99,7 @@ grafana: metallb.universe.tf/address-pool: grafana-pool loki: - enabled: false # Temporarily disable Loki + enabled: true persistence: size: 5Gi ingress: From fe80858a80cf211f07ec2fc37ce8d1a63d4e87e4 Mon Sep 17 00:00:00 2001 From: chad Date: Tue, 20 May 2025 16:02:16 -0500 Subject: [PATCH 30/61] build: update loki config --- helm/atoma-proxy/values-dev.yaml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/helm/atoma-proxy/values-dev.yaml b/helm/atoma-proxy/values-dev.yaml index 3c5a6718..e1548379 100644 --- a/helm/atoma-proxy/values-dev.yaml +++ b/helm/atoma-proxy/values-dev.yaml @@ -108,9 +108,6 @@ loki: - loki-dev.atoma.network storage: type: filesystem - bucketNames: - chunks: loki-chunks - ruler: loki-ruler auth_enabled: false schemaConfig: configs: @@ -126,9 +123,6 @@ loki: path_prefix: /var/loki limits_config: retention_period: 744h - memberlist: - join_members: - - loki-memberlist tempo: enabled: true From 7e8b6c1252d6fc80a32d92a4366c7264d95f71fe Mon Sep 17 00:00:00 2001 From: chad Date: Tue, 20 May 2025 16:03:41 -0500 Subject: [PATCH 31/61] build: add loki buckets --- helm/atoma-proxy/values-dev.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/helm/atoma-proxy/values-dev.yaml b/helm/atoma-proxy/values-dev.yaml index e1548379..4480da2d 100644 --- a/helm/atoma-proxy/values-dev.yaml +++ b/helm/atoma-proxy/values-dev.yaml @@ -108,6 +108,10 @@ loki: - loki-dev.atoma.network storage: type: filesystem + bucketNames: + chunks: loki-chunks + ruler: loki-ruler + admin: loki-admin auth_enabled: false schemaConfig: configs: From e9834f1232764bfaa5ef08f3429a77298d323404 Mon Sep 17 00:00:00 2001 From: chad Date: Tue, 20 May 2025 17:16:29 -0500 Subject: [PATCH 32/61] build: use a simpler loki config --- helm/atoma-proxy/values-dev.yaml | 54 +++++++++++++++----------------- 1 file changed, 25 insertions(+), 29 deletions(-) diff --git a/helm/atoma-proxy/values-dev.yaml b/helm/atoma-proxy/values-dev.yaml index 4480da2d..145303ab 100644 --- a/helm/atoma-proxy/values-dev.yaml +++ b/helm/atoma-proxy/values-dev.yaml @@ -98,35 +98,31 @@ grafana: annotations: metallb.universe.tf/address-pool: grafana-pool -loki: - enabled: true - persistence: - size: 5Gi - ingress: - enabled: true - hosts: - - loki-dev.atoma.network - storage: - type: filesystem - bucketNames: - chunks: loki-chunks - ruler: loki-ruler - admin: loki-admin - auth_enabled: false - schemaConfig: - configs: - - from: 2022-01-11 - store: boltdb-shipper - object_store: filesystem - schema: v12 - index: - prefix: loki_index_ - period: 24h - commonConfig: - replication_factor: 1 - path_prefix: /var/loki - limits_config: - retention_period: 744h + loki: + enabled: true + persistence: + size: 5Gi + storage: + type: filesystem + bucketNames: + chunks: loki-chunks + ruler: loki-ruler + admin: loki-admin + auth_enabled: false + schemaConfig: + configs: + - from: 2022-01-11 + store: boltdb-shipper + object_store: filesystem + schema: v12 + index: + prefix: loki_index_ + period: 24h + commonConfig: + replication_factor: 1 + path_prefix: /var/loki + limits_config: + retention_period: 744h tempo: enabled: true From 9b0c7803643b01bebfc453e07ec5eb6320d705d1 Mon Sep 17 00:00:00 2001 From: chad Date: Tue, 20 May 2025 17:18:37 -0500 Subject: [PATCH 33/61] chore: lint fix --- helm/atoma-proxy/values-dev.yaml | 50 ++++++++++++++++---------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/helm/atoma-proxy/values-dev.yaml b/helm/atoma-proxy/values-dev.yaml index 145303ab..88360766 100644 --- a/helm/atoma-proxy/values-dev.yaml +++ b/helm/atoma-proxy/values-dev.yaml @@ -98,31 +98,31 @@ grafana: annotations: metallb.universe.tf/address-pool: grafana-pool - loki: - enabled: true - persistence: - size: 5Gi - storage: - type: filesystem - bucketNames: - chunks: loki-chunks - ruler: loki-ruler - admin: loki-admin - auth_enabled: false - schemaConfig: - configs: - - from: 2022-01-11 - store: boltdb-shipper - object_store: filesystem - schema: v12 - index: - prefix: loki_index_ - period: 24h - commonConfig: - replication_factor: 1 - path_prefix: /var/loki - limits_config: - retention_period: 744h + loki: + enabled: true + persistence: + size: 5Gi + storage: + type: filesystem + bucketNames: + chunks: loki-chunks + ruler: loki-ruler + admin: loki-admin + auth_enabled: false + schemaConfig: + configs: + - from: 2022-01-11 + store: boltdb-shipper + object_store: filesystem + schema: v12 + index: + prefix: loki_index_ + period: 24h + commonConfig: + replication_factor: 1 + path_prefix: /var/loki + limits_config: + retention_period: 744h tempo: enabled: true From 14c4af9816a9aeaa10c1b889f09a43ea10dbfc3b Mon Sep 17 00:00:00 2001 From: chad Date: Tue, 20 May 2025 17:23:04 -0500 Subject: [PATCH 34/61] build: only enable loki --- helm/atoma-proxy/values-dev.yaml | 27 ++------------------------- 1 file changed, 2 insertions(+), 25 deletions(-) diff --git a/helm/atoma-proxy/values-dev.yaml b/helm/atoma-proxy/values-dev.yaml index 88360766..b383924a 100644 --- a/helm/atoma-proxy/values-dev.yaml +++ b/helm/atoma-proxy/values-dev.yaml @@ -98,31 +98,8 @@ grafana: annotations: metallb.universe.tf/address-pool: grafana-pool - loki: - enabled: true - persistence: - size: 5Gi - storage: - type: filesystem - bucketNames: - chunks: loki-chunks - ruler: loki-ruler - admin: loki-admin - auth_enabled: false - schemaConfig: - configs: - - from: 2022-01-11 - store: boltdb-shipper - object_store: filesystem - schema: v12 - index: - prefix: loki_index_ - period: 24h - commonConfig: - replication_factor: 1 - path_prefix: /var/loki - limits_config: - retention_period: 744h + loki: + enabled: true tempo: enabled: true From 57213365c5016afa391fb4ea13ba6727d87b107f Mon Sep 17 00:00:00 2001 From: chad Date: Tue, 20 May 2025 17:24:25 -0500 Subject: [PATCH 35/61] chore: explicitly use bucket storage --- helm/atoma-proxy/values-dev.yaml | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/helm/atoma-proxy/values-dev.yaml b/helm/atoma-proxy/values-dev.yaml index b383924a..62699a26 100644 --- a/helm/atoma-proxy/values-dev.yaml +++ b/helm/atoma-proxy/values-dev.yaml @@ -100,6 +100,12 @@ grafana: loki: enabled: true + storage: + type: filesystem + bucketNames: + chunks: loki-chunks + ruler: loki-ruler + admin: loki-admin tempo: enabled: true From daa2ba48cb27e298c6d3f9c430672d27ace5c702 Mon Sep 17 00:00:00 2001 From: chad Date: Tue, 20 May 2025 17:26:10 -0500 Subject: [PATCH 36/61] chore: fix indentation bug --- helm/atoma-proxy/values-dev.yaml | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/helm/atoma-proxy/values-dev.yaml b/helm/atoma-proxy/values-dev.yaml index 62699a26..ecfa675a 100644 --- a/helm/atoma-proxy/values-dev.yaml +++ b/helm/atoma-proxy/values-dev.yaml @@ -98,14 +98,14 @@ grafana: annotations: metallb.universe.tf/address-pool: grafana-pool - loki: - enabled: true - storage: - type: filesystem - bucketNames: - chunks: loki-chunks - ruler: loki-ruler - admin: loki-admin +loki: + enabled: true + storage: + type: filesystem + bucketNames: + chunks: loki-chunks + ruler: loki-ruler + admin: loki-admin tempo: enabled: true From c2527bdaf87fcccf83d20cf6ad6d418751917b8b Mon Sep 17 00:00:00 2001 From: chad Date: Tue, 20 May 2025 17:37:09 -0500 Subject: [PATCH 37/61] chore: use built in config --- helm/atoma-proxy/values-dev.yaml | 38 ++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) diff --git a/helm/atoma-proxy/values-dev.yaml b/helm/atoma-proxy/values-dev.yaml index ecfa675a..3f07ef4d 100644 --- a/helm/atoma-proxy/values-dev.yaml +++ b/helm/atoma-proxy/values-dev.yaml @@ -100,12 +100,42 @@ grafana: loki: enabled: true + auth_enabled: false + server: + http_listen_port: 3100 + commonConfig: + path_prefix: /loki + replication_factor: 1 + ring: + kvstore: + store: inmemory storage: type: filesystem - bucketNames: - chunks: loki-chunks - ruler: loki-ruler - admin: loki-admin + storage_config: + filesystem: + chunks_directory: /loki/chunks + rules_directory: /loki/rules + schemaConfig: + configs: + - from: 2020-10-24 + store: boltdb-shipper + object_store: filesystem + schema: v11 + index: + prefix: index_ + period: 24h + ruler: + alertmanager_url: http://localhost:9093 + limits_config: + enforce_metric_name: false + reject_old_samples: true + reject_old_samples_max_age: 168h + volume_enabled: true + retention_period: 96h + compactor: + retention_enabled: true + retention_delete_delay: 2h + retention_delete_worker_count: 150 tempo: enabled: true From 57d4c147fd2176ae5595b7d520f5e751b0f4dd50 Mon Sep 17 00:00:00 2001 From: chad Date: Tue, 20 May 2025 17:38:39 -0500 Subject: [PATCH 38/61] chore: remove unused configmap --- helm/atoma-proxy/templates/configmap.yaml | 6 ------ 1 file changed, 6 deletions(-) diff --git a/helm/atoma-proxy/templates/configmap.yaml b/helm/atoma-proxy/templates/configmap.yaml index ea3ec122..d62601c2 100644 --- a/helm/atoma-proxy/templates/configmap.yaml +++ b/helm/atoma-proxy/templates/configmap.yaml @@ -10,11 +10,5 @@ metadata: data: config.toml: |- {{ .Files.Get "files/config.toml" | indent 4 }} - prometheus.yml: |- -{{ .Files.Get "files/prometheus.yml" | indent 4 }} - loki.yaml: |- -{{ .Files.Get "files/loki.yaml" | indent 4 }} - tempo.yaml: |- -{{ .Files.Get "files/tempo.yaml" | indent 4 }} environment: {{ .Values.atomaProxy.config.environment | quote }} log_level: {{ .Values.atomaProxy.config.logLevel | quote }} \ No newline at end of file From e486c5cbfc00565fba0607ca93fffe87f0a64feb Mon Sep 17 00:00:00 2001 From: chad Date: Tue, 20 May 2025 17:40:05 -0500 Subject: [PATCH 39/61] chore: add back bucketnames --- helm/atoma-proxy/values-dev.yaml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/helm/atoma-proxy/values-dev.yaml b/helm/atoma-proxy/values-dev.yaml index 3f07ef4d..1b64d347 100644 --- a/helm/atoma-proxy/values-dev.yaml +++ b/helm/atoma-proxy/values-dev.yaml @@ -111,6 +111,10 @@ loki: store: inmemory storage: type: filesystem + bucketNames: + chunks: loki-chunks + ruler: loki-ruler + admin: loki-admin storage_config: filesystem: chunks_directory: /loki/chunks From dd2609b1745459715cc72a9fd71111bd33338964 Mon Sep 17 00:00:00 2001 From: chad Date: Tue, 20 May 2025 17:43:16 -0500 Subject: [PATCH 40/61] chore: use simple config --- helm/atoma-proxy/values-dev.yaml | 37 +++----------------------------- 1 file changed, 3 insertions(+), 34 deletions(-) diff --git a/helm/atoma-proxy/values-dev.yaml b/helm/atoma-proxy/values-dev.yaml index 1b64d347..fcce7624 100644 --- a/helm/atoma-proxy/values-dev.yaml +++ b/helm/atoma-proxy/values-dev.yaml @@ -100,46 +100,15 @@ grafana: loki: enabled: true - auth_enabled: false - server: - http_listen_port: 3100 - commonConfig: - path_prefix: /loki - replication_factor: 1 - ring: - kvstore: - store: inmemory storage: type: filesystem bucketNames: chunks: loki-chunks ruler: loki-ruler admin: loki-admin - storage_config: - filesystem: - chunks_directory: /loki/chunks - rules_directory: /loki/rules - schemaConfig: - configs: - - from: 2020-10-24 - store: boltdb-shipper - object_store: filesystem - schema: v11 - index: - prefix: index_ - period: 24h - ruler: - alertmanager_url: http://localhost:9093 - limits_config: - enforce_metric_name: false - reject_old_samples: true - reject_old_samples_max_age: 168h - volume_enabled: true - retention_period: 96h - compactor: - retention_enabled: true - retention_delete_delay: 2h - retention_delete_worker_count: 150 + commonConfig: + path_prefix: /loki + replication_factor: 1 tempo: enabled: true From 21a2fdd644f4b873f7a3c04e54b47614cf91328a Mon Sep 17 00:00:00 2001 From: chad Date: Tue, 20 May 2025 17:47:42 -0500 Subject: [PATCH 41/61] chore: use old config --- helm/atoma-proxy/values-dev.yaml | 30 +++++++++++++++++++++++++++--- 1 file changed, 27 insertions(+), 3 deletions(-) diff --git a/helm/atoma-proxy/values-dev.yaml b/helm/atoma-proxy/values-dev.yaml index fcce7624..46355400 100644 --- a/helm/atoma-proxy/values-dev.yaml +++ b/helm/atoma-proxy/values-dev.yaml @@ -100,15 +100,39 @@ grafana: loki: enabled: true + auth_enabled: false + commonConfig: + path_prefix: /loki + replication_factor: 1 storage: type: filesystem bucketNames: chunks: loki-chunks ruler: loki-ruler admin: loki-admin - commonConfig: - path_prefix: /loki - replication_factor: 1 + persistence: + size: 5Gi + limits_config: + enforce_metric_name: false + reject_old_samples: true + reject_old_samples_max_age: 168h + volume_enabled: true + retention_period: 96h + schemaConfig: + configs: + - from: "2020-10-24" + index: + period: 24h + prefix: index_ + object_store: filesystem + schema: v11 + store: boltdb-shipper + ruler: + alertmanager_url: http://localhost:9093 + compactor: + retention_enabled: true + retention_delete_delay: 2h + retention_delete_worker_count: 150 tempo: enabled: true From 3e64582e9974a1758927bd91ba9720d06e9137c1 Mon Sep 17 00:00:00 2001 From: chad Date: Tue, 20 May 2025 17:51:33 -0500 Subject: [PATCH 42/61] chore: added some more required fields --- helm/atoma-proxy/values-dev.yaml | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/helm/atoma-proxy/values-dev.yaml b/helm/atoma-proxy/values-dev.yaml index 46355400..ec42a815 100644 --- a/helm/atoma-proxy/values-dev.yaml +++ b/helm/atoma-proxy/values-dev.yaml @@ -133,6 +133,11 @@ loki: retention_enabled: true retention_delete_delay: 2h retention_delete_worker_count: 150 + server: + http_listen_port: 3100 + memberlist: + join_members: + - loki-memberlist tempo: enabled: true From 55d951910aa448b0c074f7eee55a8bf8ee8fe5a6 Mon Sep 17 00:00:00 2001 From: chad Date: Tue, 20 May 2025 17:52:34 -0500 Subject: [PATCH 43/61] chore: use explicit key --- helm/atoma-proxy/values-dev.yaml | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/helm/atoma-proxy/values-dev.yaml b/helm/atoma-proxy/values-dev.yaml index ec42a815..1a56f82e 100644 --- a/helm/atoma-proxy/values-dev.yaml +++ b/helm/atoma-proxy/values-dev.yaml @@ -104,12 +104,10 @@ loki: commonConfig: path_prefix: /loki replication_factor: 1 - storage: - type: filesystem - bucketNames: - chunks: loki-chunks - ruler: loki-ruler - admin: loki-admin + storage_config: + filesystem: + chunks_directory: /loki/chunks + rules_directory: /loki/rules persistence: size: 5Gi limits_config: From eec30ab98e0c3eeb984c804ff4727b225058a23c Mon Sep 17 00:00:00 2001 From: chad Date: Tue, 20 May 2025 13:02:00 -0500 Subject: [PATCH 44/61] chore: add stronger config validation --- Cargo.lock | 3 + Cargo.toml | 1 + atoma-proxy-service/Cargo.toml | 1 + atoma-proxy-service/src/config.rs | 102 ++++++++++++++++++++++++++--- atoma-proxy/Cargo.toml | 1 + atoma-proxy/src/main.rs | 34 +++++----- atoma-proxy/src/server/config.rs | 103 +++++++++++++++++++++++++++--- atoma-state/Cargo.toml | 1 + atoma-state/src/config.rs | 97 ++++++++++++++++++++++------ atoma-state/src/config_error.rs | 25 ++++++++ atoma-state/src/lib.rs | 1 + 11 files changed, 314 insertions(+), 55 deletions(-) create mode 100644 atoma-state/src/config_error.rs diff --git a/Cargo.lock b/Cargo.lock index 7a01acbb..cbf7a2ff 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -795,6 +795,7 @@ dependencies = [ "utoipa", "utoipa-swagger-ui", "uuid", + "validator", ] [[package]] @@ -820,6 +821,7 @@ dependencies = [ "tower-http 0.6.4", "tracing", "tracing-subscriber", + "url", "utoipa", "utoipa-swagger-ui", ] @@ -857,6 +859,7 @@ dependencies = [ "url", "utoipa", "uuid", + "validator", ] [[package]] diff --git a/Cargo.toml b/Cargo.toml index 34c34946..27b1e8c5 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -74,5 +74,6 @@ url = "2.5.4" utoipa = "5.3.1" utoipa-swagger-ui = "9.0.1" uuid = "1.15.1" +validator = { version = "0.20.0", features = [ "derive" ] } x25519-dalek = "2.0.1" zeroize = "1.8.1" diff --git a/atoma-proxy-service/Cargo.toml b/atoma-proxy-service/Cargo.toml index b935b4c6..47e9b26d 100644 --- a/atoma-proxy-service/Cargo.toml +++ b/atoma-proxy-service/Cargo.toml @@ -24,6 +24,7 @@ tokio = { workspace = true, features = [ "full" ] } tower-http = { workspace = true, features = [ "cors" ] } tracing.workspace = true tracing-subscriber.workspace = true +url = "2.5.0" utoipa = { workspace = true, features = [ "axum_extras" ] } utoipa-swagger-ui = { workspace = true, features = [ "axum" ] } diff --git a/atoma-proxy-service/src/config.rs b/atoma-proxy-service/src/config.rs index 98988800..bf1866d3 100644 --- a/atoma-proxy-service/src/config.rs +++ b/atoma-proxy-service/src/config.rs @@ -1,6 +1,24 @@ use config::{Config, File}; use serde::Deserialize; use std::path::Path; +use thiserror::Error; +use url::Url; + +#[derive(Error, Debug)] +pub enum ProxyServiceConfigError { + #[error("Invalid service bind address: {0}")] + InvalidBindAddress(String), + + #[error("Invalid Grafana URL: {0}")] + InvalidGrafanaUrl(String), + + #[error("Missing required field: {0}")] + MissingField(String), + + #[error("Configuration file error: {0}")] + FileError(#[from] config::ConfigError), +} + /// Configuration for the Atoma proxy service /// /// This struct holds the configuration parameters needed to run the Atoma Proxy Service, @@ -31,6 +49,64 @@ pub struct AtomaProxyServiceConfig { } impl AtomaProxyServiceConfig { + /// Validates the proxy service configuration + /// + /// # Returns + /// + /// Returns `Ok(())` if the configuration is valid, or a `ProxyServiceConfigError` if there are any validation errors. + /// + /// # Errors + /// + /// Returns a `ProxyServiceConfigError` if: + /// * The service bind address is empty + /// * The Grafana URL is empty or invalid + /// * The Grafana API token is empty + /// * The Grafana dashboard tag is empty + /// * The Grafana stats tag is empty + pub fn validate(&self) -> Result<(), ProxyServiceConfigError> { + // Validate service bind address + if self.service_bind_address.is_empty() { + return Err(ProxyServiceConfigError::MissingField( + "service_bind_address".to_string(), + )); + } + + // Validate Grafana URL + if self.grafana_url.is_empty() { + return Err(ProxyServiceConfigError::MissingField( + "grafana_url".to_string(), + )); + } + if Url::parse(&self.grafana_url).is_err() { + return Err(ProxyServiceConfigError::InvalidGrafanaUrl( + self.grafana_url.clone(), + )); + } + + // Validate Grafana API token + if self.grafana_api_token.is_empty() { + return Err(ProxyServiceConfigError::MissingField( + "grafana_api_token".to_string(), + )); + } + + // Validate Grafana dashboard tag + if self.grafana_dashboard_tag.is_empty() { + return Err(ProxyServiceConfigError::MissingField( + "grafana_dashboard_tag".to_string(), + )); + } + + // Validate Grafana stats tag + if self.grafana_stats_tag.is_empty() { + return Err(ProxyServiceConfigError::MissingField( + "grafana_stats_tag".to_string(), + )); + } + + Ok(()) + } + /// Creates a new AtomaProxyServiceConfig instance from a configuration file /// /// # Arguments @@ -44,13 +120,20 @@ impl AtomaProxyServiceConfig { /// /// Returns a new `AtomaProxyServiceConfig` instance populated with values from the config file. /// - /// # Panics + /// # Errors /// - /// This method will panic if: + /// Returns a `ProxyServiceConfigError` if: /// * The configuration file cannot be read or parsed /// * The "atoma-proxy-service" section is missing from the configuration /// * The configuration format doesn't match the expected structure - pub fn from_file_path>(config_file_path: P) -> Self { + /// * The configuration fails validation + /// + /// # Panics + /// + /// Panics if the path cannot be converted to a string. + pub fn from_file_path>( + config_file_path: P, + ) -> Result { let builder = Config::builder() .add_source(File::with_name(config_file_path.as_ref().to_str().unwrap())) .add_source( @@ -58,11 +141,12 @@ impl AtomaProxyServiceConfig { .keep_prefix(true) .separator("__"), ); - let config = builder - .build() - .expect("Failed to generate atoma-proxy-service configuration file"); - config - .get::("atoma_proxy_service") - .expect("Failed to generate configuration instance") + let config = builder.build()?; + let config = config.get::("atoma_proxy_service")?; + + // Validate the configuration + config.validate()?; + + Ok(config) } } diff --git a/atoma-proxy/Cargo.toml b/atoma-proxy/Cargo.toml index 679cf731..b09d6f4d 100644 --- a/atoma-proxy/Cargo.toml +++ b/atoma-proxy/Cargo.toml @@ -56,6 +56,7 @@ url = { workspace = true } utoipa = { workspace = true, features = [ "axum_extras", "preserve_path_order" ] } utoipa-swagger-ui = { workspace = true, features = [ "axum" ] } uuid = { workspace = true } +validator = { workspace = true } [features] google-oauth = [ "atoma-auth/google-oauth", "atoma-proxy-service/google-oauth" ] diff --git a/atoma-proxy/src/main.rs b/atoma-proxy/src/main.rs index 7e359324..65fbd4c5 100644 --- a/atoma-proxy/src/main.rs +++ b/atoma-proxy/src/main.rs @@ -24,7 +24,8 @@ mod server; mod telemetry; /// Command line arguments for the Atoma node -#[derive(Parser)] +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] struct Args { /// Path to the configuration file #[arg(short, long)] @@ -36,36 +37,39 @@ struct Args { /// This struct holds the configuration settings for various components /// of the Atoma proxy, including the Sui, service, and state manager configurations. #[derive(Debug)] -struct Config { +pub struct Config { /// Configuration for the Sui component. - sui: AtomaSuiConfig, + pub sui: AtomaSuiConfig, /// Configuration for the service component. - service: AtomaServiceConfig, + pub service: AtomaServiceConfig, /// Configuration for the state manager component. - state: AtomaStateManagerConfig, + pub state: AtomaStateManagerConfig, /// Configuration for the proxy service component. - proxy_service: AtomaProxyServiceConfig, + pub proxy_service: AtomaProxyServiceConfig, /// Configuration for the authentication component. - auth: AtomaAuthConfig, + pub auth: AtomaAuthConfig, /// Configuration for the P2P component. - p2p: AtomaP2pNodeConfig, + pub p2p: AtomaP2pNodeConfig, } impl Config { - fn load(path: String) -> Self { - Self { + fn load(path: String) -> Result { + Ok(Self { sui: AtomaSuiConfig::from_file_path(path.clone()), - service: AtomaServiceConfig::from_file_path(path.clone()), - state: AtomaStateManagerConfig::from_file_path(path.clone()), - proxy_service: AtomaProxyServiceConfig::from_file_path(path.clone()), + service: AtomaServiceConfig::from_file_path(path.clone()) + .context("failed to load service configuration")?, + state: AtomaStateManagerConfig::from_file_path(path.clone()) + .context("failed to load state manager configuration")?, + proxy_service: AtomaProxyServiceConfig::from_file_path(path.clone()) + .context("Failed to load proxy configuration")?, auth: AtomaAuthConfig::from_file_path(path.clone()), p2p: AtomaP2pNodeConfig::from_file_path(path), - } + }) } } @@ -81,7 +85,7 @@ async fn main() -> Result<()> { let args = Args::parse(); tracing::info!("Loading configuration from: {}", args.config_path); - let config = Config::load(args.config_path); + let config = Config::load(args.config_path).context("Failed to load configuration")?; tracing::info!("Configuration loaded successfully"); // Initialize Sentry only if DSN is provided diff --git a/atoma-proxy/src/server/config.rs b/atoma-proxy/src/server/config.rs index d7e68c7a..b303acea 100644 --- a/atoma-proxy/src/server/config.rs +++ b/atoma-proxy/src/server/config.rs @@ -2,52 +2,129 @@ use std::path::Path; use atoma_proxy_service::ModelModality; use serde::Deserialize; +use thiserror::Error; +use validator::{Validate, ValidationError}; use config::{Config, File}; +#[derive(Error, Debug)] +pub enum ServiceConfigError { + #[error("Invalid service bind address: {0}")] + InvalidBindAddress(String), + + #[error("Invalid heartbeat URL: {0}")] + InvalidHeartbeatUrl(String), + + #[error("Invalid model configuration: {0}")] + InvalidModelConfig(String), + + #[error("Missing required field: {0}")] + MissingField(String), + + #[error("Configuration file error: {0}")] + FileError(#[from] config::ConfigError), + + #[error("Validation error: {0}")] + ValidationError(String), +} + /// Configuration for the Atoma Service. /// /// This struct holds the configuration options for the Atoma Service, /// including URLs for various services and a list of models. -#[derive(Debug, Deserialize)] +#[derive(Debug, Deserialize, Validate)] pub struct AtomaServiceConfig { /// Bind address for the Atoma Proxy Server. /// /// This field specifies the address and port on which the Atoma Proxy Server will bind. + #[validate(custom(function = "validate_bind_address"))] pub service_bind_address: String, /// List of model names. /// /// This field contains a list of model names that are deployed by the Atoma Service, /// on behalf of the node. + #[validate(length(min = 1, message = "at least one model must be specified"))] pub models: Vec, /// List of model revisions. /// /// This field contains a list of the associated model revisions, for each /// model that is currently supported by the Atoma Service. + #[validate(length(min = 1, message = "at least one revision must be specified"))] pub revisions: Vec, /// List of model modalities. /// /// This field contains a list of the associated model modalities, for each /// model that is currently supported by the Atoma Service. + #[validate(length(min = 1, message = "at least one modality must be specified"))] + #[validate(custom(function = "validate_modalities"))] pub modalities: Vec>, /// Hugging face api token. /// /// This field contains the Hugging Face API token that is used to authenticate /// requests to the Hugging Face API. + #[validate(length(min = 1, message = "HF token cannot be empty"))] pub hf_token: String, /// Path to open router json. + #[validate(length(min = 1, message = "open router models file path cannot be empty"))] pub open_router_models_file: String, /// Heartbeat URL. + #[validate(url(message = "heartbeat_url must be a valid URL"))] pub heartbeat_url: String, } +fn validate_bind_address(addr: &str) -> Result<(), ValidationError> { + if addr.is_empty() { + return Err(ValidationError::new("empty_bind_address")); + } + + // Basic format validation for bind address (host:port) + let parts: Vec<&str> = addr.split(':').collect(); + if parts.len() != 2 { + return Err(ValidationError::new("invalid_bind_address_format")); + } + + // Validate port is a number + if parts[1].parse::().is_err() { + return Err(ValidationError::new("invalid_port_number")); + } + + Ok(()) +} + +fn validate_modalities(modalities: &[Vec]) -> Result<(), ValidationError> { + for model_modalities in modalities { + if model_modalities.is_empty() { + return Err(ValidationError::new("empty_modalities")); + } + } + Ok(()) +} + impl AtomaServiceConfig { + /// Validates the service configuration + /// + /// # Returns + /// + /// Returns `Ok(())` if the configuration is valid, or a `ServiceConfigError` if there are any validation errors. + /// + /// # Errors + /// + /// Returns a `ServiceConfigError` if: + /// - The service bind address is invalid + /// - The heartbeat URL is invalid + /// - The models configuration is invalid + /// - The HF token is empty + /// - The open router models file path is empty + pub fn validate(&self) -> Result<(), ServiceConfigError> { + Validate::validate(self).map_err(|e| ServiceConfigError::ValidationError(e.to_string())) + } + /// Creates a new `AtomaServiceConfig` instance from a configuration file. /// /// # Arguments @@ -60,13 +137,18 @@ impl AtomaServiceConfig { /// /// Returns a new `AtomaServiceConfig` instance populated with values from the config file. /// - /// # Panics + /// # Errors /// - /// This method will panic if: + /// Returns a `ServiceConfigError` if: /// * The configuration file cannot be read or parsed /// * The "atoma-service" section is missing from the configuration /// * The configuration format doesn't match the expected structure - pub fn from_file_path>(config_file_path: P) -> Self { + /// * The configuration fails validation + /// + /// # Panics + /// + /// This method will panic if the path cannot be converted to a string. + pub fn from_file_path>(config_file_path: P) -> Result { let builder = Config::builder() .add_source(File::with_name(config_file_path.as_ref().to_str().unwrap())) .add_source( @@ -74,11 +156,12 @@ impl AtomaServiceConfig { .keep_prefix(true) .separator("__"), ); - let config = builder - .build() - .expect("Failed to generate atoma-service configuration file"); - config - .get::("atoma_service") - .expect("Failed to generate configuration instance") + let config = builder.build()?; + let config = config.get::("atoma_service")?; + + // Validate the configuration + config.validate()?; + + Ok(config) } } diff --git a/atoma-state/Cargo.toml b/atoma-state/Cargo.toml index cbf4fa0d..cc143b51 100644 --- a/atoma-state/Cargo.toml +++ b/atoma-state/Cargo.toml @@ -31,6 +31,7 @@ topology = { workspace = true } tracing = { workspace = true } url = { workspace = true } utoipa = { workspace = true } +validator = { workspace = true } [dev-dependencies] futures = { workspace = true } diff --git a/atoma-state/src/config.rs b/atoma-state/src/config.rs index a2dd6d14..8ba29c02 100644 --- a/atoma-state/src/config.rs +++ b/atoma-state/src/config.rs @@ -1,32 +1,73 @@ use config::Config; use serde::{Deserialize, Serialize}; use std::path::Path; +use url::Url; +use validator::{Validate, ValidationError}; +use crate::config_error::ConfigError; use crate::types::Modalities; /// Configuration for the Atoma State Manager instance. -#[derive(Clone, Debug, Deserialize, Serialize)] +#[derive(Clone, Debug, Deserialize, Serialize, Validate)] pub struct AtomaStateManagerConfig { /// The URL of the Postgres database. + #[validate(custom(function = "validate_postgres_url"))] pub database_url: String, /// The configuration for metrics collection. + #[validate(nested)] pub metrics_collection: MetricsCollectionConfig, } /// Configuration for metrics collection. -#[derive(Debug, Clone, Deserialize, Serialize)] +#[derive(Debug, Clone, Deserialize, Serialize, Validate)] pub struct MetricsCollectionConfig { /// The URL endpoint where metrics can be collected from. + #[validate(url(message = "metrics_url must be a valid URL"))] pub metrics_url: String, /// A vector of tuples containing modality types and their corresponding model identifiers. + #[validate(length(min = 1, message = "at least one model must be specified"))] + #[validate(custom(function = "validate_models"))] pub models: Vec<(Modalities, String)>, /// Optional parameter to limit the number of best nodes returned. + #[validate(range(min = 1, message = "top_k must be greater than 0"))] pub top_k: Option, } +fn validate_postgres_url(url: &str) -> Result<(), ValidationError> { + if url.is_empty() { + return Err(ValidationError::new("empty_database_url")); + } + + // Basic PostgreSQL URL format validation + if !url.starts_with("postgresql://") && !url.starts_with("postgres://") { + return Err(ValidationError::new("invalid_postgres_url_format")); + } + + // Try to parse as URL + if Url::parse(url).is_err() { + return Err(ValidationError::new("invalid_url_format")); + } + + Ok(()) +} + +fn validate_models(models: &[(Modalities, String)]) -> Result<(), ValidationError> { + for (modality, model_id) in models { + if model_id.is_empty() { + return Err(ValidationError::new("empty_model_id")); + } + match modality { + Modalities::ChatCompletions + | Modalities::Embeddings + | Modalities::ImagesGenerations => (), + } + } + Ok(()) +} + impl AtomaStateManagerConfig { /// Constructor #[must_use] @@ -37,6 +78,23 @@ impl AtomaStateManagerConfig { } } + /// Validates the configuration + /// + /// # Returns + /// + /// Returns `Ok(())` if the configuration is valid, or a `ConfigError` if there are any validation errors. + /// + /// # Errors + /// + /// Returns a `ConfigError` if: + /// - The database URL is invalid or empty + /// - The metrics URL is invalid or empty + /// - The models configuration is invalid + /// - The top_k value is invalid + pub fn validate(&self) -> Result<(), ConfigError> { + Validate::validate(self).map_err(|e| ConfigError::ValidationError(e.to_string())) + } + /// Creates a new `AtomaStateManagerConfig` instance from a configuration file. /// /// # Arguments @@ -47,22 +105,18 @@ impl AtomaStateManagerConfig { /// /// Returns a new `AtomaStateManagerConfig` instance populated with values from the configuration file. /// - /// # Panics - /// - /// This method will panic if: - /// - The configuration file cannot be read or parsed. - /// - The "atoma-state" section is missing from the configuration file. - /// - The required fields are missing or have invalid types in the configuration file. + /// # Errors /// - /// # Examples + /// Returns a `ConfigError` if: + /// - The configuration file cannot be read or parsed + /// - The "atoma-state" section is missing from the configuration file + /// - The required fields are missing or have invalid types + /// - The configuration fails validation /// - /// ```rust,ignore - /// use std::path::Path; - /// use atoma_node::atoma_state::AtomaStateManagerConfig; + /// # Panics /// - /// let config = AtomaStateManagerConfig::from_file_path("path/to/config.toml"); - /// ``` - pub fn from_file_path>(config_file_path: P) -> Self { + /// This method will panic if the path cannot be converted to a string. + pub fn from_file_path>(config_file_path: P) -> Result { let builder = Config::builder() .add_source(config::File::with_name( config_file_path.as_ref().to_str().unwrap(), @@ -72,11 +126,12 @@ impl AtomaStateManagerConfig { .keep_prefix(true) .separator("__"), ); - let config = builder - .build() - .expect("Failed to generate atoma state configuration file"); - config - .get::("atoma_state") - .expect("Failed to generate configuration instance") + let config = builder.build()?; + let config = config.get::("atoma_state")?; + + // Validate the configuration + config.validate()?; + + Ok(config) } } diff --git a/atoma-state/src/config_error.rs b/atoma-state/src/config_error.rs new file mode 100644 index 00000000..dc84793b --- /dev/null +++ b/atoma-state/src/config_error.rs @@ -0,0 +1,25 @@ +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum ConfigError { + #[error("Invalid metrics URL: {0}")] + InvalidMetricsUrl(String), + + #[error("Invalid database URL: {0}")] + InvalidDatabaseUrl(String), + + #[error("Invalid model configuration: {0}")] + InvalidModelConfig(String), + + #[error("Invalid top_k value: {0}")] + InvalidTopK(String), + + #[error("Missing required field: {0}")] + MissingField(String), + + #[error("Configuration file error: {0}")] + FileError(#[from] config::ConfigError), + + #[error("Validation error: {0}")] + ValidationError(String), +} diff --git a/atoma-state/src/lib.rs b/atoma-state/src/lib.rs index a919d749..c97ec12f 100644 --- a/atoma-state/src/lib.rs +++ b/atoma-state/src/lib.rs @@ -4,6 +4,7 @@ #![allow(clippy::doc_markdown)] pub mod config; +pub mod config_error; pub mod errors; pub mod handlers; pub mod metrics; From dc6de22f9e0327213e72a7fb37b52821d7942e0c Mon Sep 17 00:00:00 2001 From: chad Date: Tue, 20 May 2025 13:16:16 -0500 Subject: [PATCH 45/61] chore: update auth config --- Cargo.lock | 1 + atoma-auth/Cargo.toml | 1 + atoma-auth/src/config.rs | 97 +++++++++++++++++++++++----------------- atoma-proxy/src/main.rs | 3 +- 4 files changed, 61 insertions(+), 41 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index cbf7a2ff..b7009ce6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -714,6 +714,7 @@ dependencies = [ "thiserror 2.0.12", "tokio", "tracing", + "validator", ] [[package]] diff --git a/atoma-auth/Cargo.toml b/atoma-auth/Cargo.toml index 5cf228e7..ce164035 100644 --- a/atoma-auth/Cargo.toml +++ b/atoma-auth/Cargo.toml @@ -34,6 +34,7 @@ sui-sdk-types = { workspace = true, features = [ "serde" ] } thiserror.workspace = true tokio.workspace = true tracing.workspace = true +validator.workspace = true [features] google-oauth = [ ] diff --git a/atoma-auth/src/config.rs b/atoma-auth/src/config.rs index ace0182d..7b12c820 100644 --- a/atoma-auth/src/config.rs +++ b/atoma-auth/src/config.rs @@ -1,23 +1,48 @@ -use config::Config; +use config::{Config, File}; use serde::{Deserialize, Serialize}; use std::path::Path; +use thiserror::Error; +use validator::Validate; -/// Configuration for Postgres database connection. -#[derive(Clone, Debug, Deserialize, Serialize)] +#[derive(Error, Debug)] +pub enum AuthConfigError { + #[error("Invalid auth configuration: {0}")] + InvalidConfig(String), + + #[error("Missing required field: {0}")] + MissingField(String), + + #[error("Configuration file error: {0}")] + FileError(#[from] config::ConfigError), + + #[error("Validation error: {0}")] + ValidationError(String), +} + +#[derive(Clone, Debug, Serialize, Deserialize, Validate)] pub struct AtomaAuthConfig { - /// The secret key for JWT authentication. - pub secret_key: String, - /// The access token lifetime in minutes. + /// Access token validity duration in minutes + #[validate(range(min = 1, message = "access token lifetime must be at least 1 minute"))] pub access_token_lifetime: usize, - /// The refresh token lifetime in days. - pub refresh_token_lifetime: usize, - /// Google client id. + + /// Google OAuth client ID (required only when google-oauth feature is enabled) + #[validate(length( + min = 1, + message = "google client ID cannot be empty when google-oauth is enabled" + ))] #[cfg(feature = "google-oauth")] pub google_client_id: String, + + /// Refresh token validity duration in days + #[validate(range(min = 1, message = "refresh token lifetime must be at least 1 day"))] + pub refresh_token_lifetime: usize, + + /// JWT signing key for token generation + #[validate(length(min = 1, message = "secret key cannot be empty"))] + pub secret_key: String, } impl AtomaAuthConfig { - /// Constructor #[must_use] pub const fn new( secret_key: String, @@ -26,54 +51,46 @@ impl AtomaAuthConfig { #[cfg(feature = "google-oauth")] google_client_id: String, ) -> Self { Self { - secret_key, access_token_lifetime, - refresh_token_lifetime, #[cfg(feature = "google-oauth")] google_client_id, + refresh_token_lifetime, + secret_key, } } - /// Creates a new `AtomaAuthConfig` instance from a configuration file. + /// Validates the auth configuration /// - /// # Arguments + /// # Errors /// - /// * `config_file_path` - A path-like object representing the location of the configuration file. + /// Returns `AuthConfigError::ValidationError` if any validation rules fail + pub fn validate(&self) -> Result<(), AuthConfigError> { + Validate::validate(self).map_err(|e| AuthConfigError::ValidationError(e.to_string())) + } + + /// Loads configuration from a file path /// - /// # Returns + /// # Errors /// - /// Returns a new `AtomaAuthConfig` instance populated with values from the configuration file. + /// Returns `AuthConfigError` if the configuration file cannot be read or parsed /// /// # Panics /// - /// This method will panic if: - /// - The configuration file cannot be read or parsed. - /// - The "atoma-auth" section is missing from the configuration file. - /// - The required fields are missing or have invalid types in the configuration file. - /// - /// # Examples - /// - /// ```rust,ignore - /// use std::path::Path; - /// use atoma_node::atoma_state::AtomaAuthConfig; - /// - /// let config = AtomaAuthConfig::from_file_path("path/to/config.toml"); - /// ``` - pub fn from_file_path>(config_file_path: P) -> Self { + /// Panics if the path cannot be converted to a string + pub fn from_file_path>(config_file_path: P) -> Result { let builder = Config::builder() - .add_source(config::File::with_name( - config_file_path.as_ref().to_str().unwrap(), - )) + .add_source(File::with_name(config_file_path.as_ref().to_str().unwrap())) .add_source( config::Environment::with_prefix("ATOMA_AUTH") .keep_prefix(true) .separator("__"), ); - let config = builder - .build() - .expect("Failed to generate atoma state configuration file"); - config - .get::("atoma_auth") - .expect("Failed to generate configuration instance") + let config = builder.build()?; + let config = config.get::("atoma_auth")?; + + // Validate the configuration + config.validate()?; + + Ok(config) } } diff --git a/atoma-proxy/src/main.rs b/atoma-proxy/src/main.rs index 65fbd4c5..e1675af7 100644 --- a/atoma-proxy/src/main.rs +++ b/atoma-proxy/src/main.rs @@ -67,7 +67,8 @@ impl Config { .context("failed to load state manager configuration")?, proxy_service: AtomaProxyServiceConfig::from_file_path(path.clone()) .context("Failed to load proxy configuration")?, - auth: AtomaAuthConfig::from_file_path(path.clone()), + auth: AtomaAuthConfig::from_file_path(path.clone()) + .context("Failed to load auth configuration")?, p2p: AtomaP2pNodeConfig::from_file_path(path), }) } From 95d373d2582297036afe63ede829e5e77a206881 Mon Sep 17 00:00:00 2001 From: chad Date: Tue, 20 May 2025 15:23:14 -0500 Subject: [PATCH 46/61] chore: move sentry config to service --- atoma-proxy-service/src/config.rs | 7 ++----- atoma-proxy/src/main.rs | 4 ++-- atoma-proxy/src/server/config.rs | 6 ++++++ config.example.toml | 4 ++-- 4 files changed, 12 insertions(+), 9 deletions(-) diff --git a/atoma-proxy-service/src/config.rs b/atoma-proxy-service/src/config.rs index bf1866d3..4dfa823f 100644 --- a/atoma-proxy-service/src/config.rs +++ b/atoma-proxy-service/src/config.rs @@ -41,11 +41,8 @@ pub struct AtomaProxyServiceConfig { /// Only dashboards tagged with this tag will be proxied as stats pub grafana_stats_tag: String, - /// Sentry DSN for error reporting - pub sentry_dsn: Option, - - /// Environment - pub environment: Option, + /// Enable Grafana dashboard and stats proxy + pub enable_grafana_proxy: Option, } impl AtomaProxyServiceConfig { diff --git a/atoma-proxy/src/main.rs b/atoma-proxy/src/main.rs index e1675af7..84c226f1 100644 --- a/atoma-proxy/src/main.rs +++ b/atoma-proxy/src/main.rs @@ -90,7 +90,7 @@ async fn main() -> Result<()> { tracing::info!("Configuration loaded successfully"); // Initialize Sentry only if DSN is provided - let _guard = config.proxy_service.sentry_dsn.map_or_else( + let _guard = config.service.sentry_dsn.clone().map_or_else( || { info!("No Sentry DSN provided, skipping Sentry initialization"); None @@ -106,7 +106,7 @@ async fn main() -> Result<()> { send_default_pii: false, environment: Some(std::borrow::Cow::Owned( config - .proxy_service + .service .environment .clone() .unwrap_or_else(|| "development".to_string()), diff --git a/atoma-proxy/src/server/config.rs b/atoma-proxy/src/server/config.rs index b303acea..5806cd45 100644 --- a/atoma-proxy/src/server/config.rs +++ b/atoma-proxy/src/server/config.rs @@ -76,6 +76,12 @@ pub struct AtomaServiceConfig { /// Heartbeat URL. #[validate(url(message = "heartbeat_url must be a valid URL"))] pub heartbeat_url: String, + + /// Sentry DSN for error reporting + pub sentry_dsn: Option, + + /// Environment + pub environment: Option, } fn validate_bind_address(addr: &str) -> Result<(), ValidationError> { diff --git a/config.example.toml b/config.example.toml index aff3b32e..942e9145 100644 --- a/config.example.toml +++ b/config.example.toml @@ -42,14 +42,14 @@ open_router_models_file = "/app/open_router.json" # Path to the Open Router JSON password = "password" # Authentication password for the service API revisions = [ "main", "main" ] # Model revision/version tags (must match models array length) service_bind_address = "0.0.0.0:8080" # HTTP service binding address and port (must match docker-compose.yml) +environment = "development" # or "production" based on your deployment used in sentry +sentry_dsn = "sentry_dsn" # Sentry DSN for error reporting [atoma_proxy_service] -environment = "development" # or "production" based on your deployment used in sentry grafana_api_token = "" # Grafana API token (read-only permissions required) grafana_dashboard_tag = "" # Tag to filter which Grafana dashboards to expose as graphs grafana_stats_tag = "" # Tag to filter which Grafana dashboards to expose as stats grafana_url = "" # Grafana instance URL for metrics visualization -sentry_dsn = "sentry_dsn" # Sentry DSN for error reporting service_bind_address = "0.0.0.0:8081" # Proxy service binding address and port (must match docker-compose.yml) From 5deefe82eb61d644f0c26c58b428e41ba6c93ee5 Mon Sep 17 00:00:00 2001 From: Jorge Antonio Date: Wed, 21 May 2025 14:58:59 +0200 Subject: [PATCH 47/61] taplo fmt --- config.example.toml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/config.example.toml b/config.example.toml index 942e9145..7eb4b132 100644 --- a/config.example.toml +++ b/config.example.toml @@ -25,6 +25,7 @@ models = [ top_k = 10 # Number of top performing nodes to return in rankings [atoma_service] +environment = "development" # or "production" based on your deployment used in sentry heartbeat_url = "my-heartbeat-url" # Heartbeat URL for the service hf_token = "" # Hugging Face API token (required for gated/private models) modalities = [ @@ -41,9 +42,8 @@ models = [ open_router_models_file = "/app/open_router.json" # Path to the Open Router JSON file for model configuration password = "password" # Authentication password for the service API revisions = [ "main", "main" ] # Model revision/version tags (must match models array length) +sentry_dsn = "sentry_dsn" # Sentry DSN for error reporting service_bind_address = "0.0.0.0:8080" # HTTP service binding address and port (must match docker-compose.yml) -environment = "development" # or "production" based on your deployment used in sentry -sentry_dsn = "sentry_dsn" # Sentry DSN for error reporting [atoma_proxy_service] grafana_api_token = "" # Grafana API token (read-only permissions required) From adca7dfb9ad8a89a60046b6740a49a98a918c731 Mon Sep 17 00:00:00 2001 From: chad Date: Wed, 21 May 2025 09:30:12 -0500 Subject: [PATCH 48/61] chore: add docs for functions --- atoma-proxy/src/server/config.rs | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) diff --git a/atoma-proxy/src/server/config.rs b/atoma-proxy/src/server/config.rs index 5806cd45..73a4d75a 100644 --- a/atoma-proxy/src/server/config.rs +++ b/atoma-proxy/src/server/config.rs @@ -83,7 +83,23 @@ pub struct AtomaServiceConfig { /// Environment pub environment: Option, } - +/// Validates the bind address format. +/// +/// This function checks if the provided bind address is valid by ensuring: +/// - The address is not empty +/// - The address follows the format "host:port" +/// - The port is a valid number between 0 and 65535 +/// +/// # Arguments +/// +/// * `addr` - The bind address string to validate +/// +/// # Returns +/// +/// Returns `Ok(())` if the address is valid, or a `ValidationError` if: +/// - The address is empty +/// - The address format is invalid (not host:port) +/// - The port is not a valid number fn validate_bind_address(addr: &str) -> Result<(), ValidationError> { if addr.is_empty() { return Err(ValidationError::new("empty_bind_address")); @@ -103,6 +119,19 @@ fn validate_bind_address(addr: &str) -> Result<(), ValidationError> { Ok(()) } +/// Validates the modalities configuration. +/// +/// This function checks if the provided modalities configuration is valid by ensuring: +/// - Each model has at least one modality specified +/// +/// # Arguments +/// +/// * `modalities` - A slice of vectors containing model modalities to validate +/// +/// # Returns +/// +/// Returns `Ok(())` if the modalities configuration is valid, or a `ValidationError` if: +/// - Any model has an empty modalities list fn validate_modalities(modalities: &[Vec]) -> Result<(), ValidationError> { for model_modalities in modalities { if model_modalities.is_empty() { From 47947883a4df8117a51b15923b0db8f015765f19 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jorge=20Ant=C3=B3nio?= Date: Fri, 23 May 2025 08:33:53 +0100 Subject: [PATCH 49/61] feat: increase max body size in middleware logic (#483) * feat: develop branch merge (#452) * feat: ppcie verifier logic (#441) * chore(deps): bump atoma-utils from `d59ea1f` to `dfe0830` Bumps [atoma-utils](https://github.com/atoma-network/atoma-node) from `d59ea1f` to `dfe0830`. - [Release notes](https://github.com/atoma-network/atoma-node/releases) - [Commits](https://github.com/atoma-network/atoma-node/compare/d59ea1fd6a26f03d56f10eff25bbba7e7c9694d5...dfe0830ecffc5429b4c131bc88b18fb8016cd5b9) --- updated-dependencies: - dependency-name: atoma-utils dependency-version: dfe0830ecffc5429b4c131bc88b18fb8016cd5b9 dependency-type: direct:production ... Signed-off-by: dependabot[bot] * chore(deps): bump atoma-p2p from `d59ea1f` to `dfe0830` Bumps [atoma-p2p](https://github.com/atoma-network/atoma-node) from `d59ea1f` to `dfe0830`. - [Release notes](https://github.com/atoma-network/atoma-node/releases) - [Commits](https://github.com/atoma-network/atoma-node/compare/d59ea1fd6a26f03d56f10eff25bbba7e7c9694d5...dfe0830ecffc5429b4c131bc88b18fb8016cd5b9) --- updated-dependencies: - dependency-name: atoma-p2p dependency-version: dfe0830ecffc5429b4c131bc88b18fb8016cd5b9 dependency-type: direct:production ... Signed-off-by: dependabot[bot] * chore(deps): bump atoma-sui from `d59ea1f` to `dfe0830` Bumps [atoma-sui](https://github.com/atoma-network/atoma-node) from `d59ea1f` to `dfe0830`. - [Release notes](https://github.com/atoma-network/atoma-node/releases) - [Commits](https://github.com/atoma-network/atoma-node/compare/d59ea1fd6a26f03d56f10eff25bbba7e7c9694d5...dfe0830ecffc5429b4c131bc88b18fb8016cd5b9) --- updated-dependencies: - dependency-name: atoma-sui dependency-version: dfe0830ecffc5429b4c131bc88b18fb8016cd5b9 dependency-type: direct:production ... Signed-off-by: dependabot[bot] * ppcie verifier logic * add tables for fiat (#442) * fix: ensure 400 is returned on cancelled streams * chore: removed unnecessary 400 response * chore: remove unnecessary comment and clone * feat: add metrics to track latency from proxy to node (#444) * feat: add metrics to track latency from proxy to node * fix: move network ticker to seperate thread * chore: added docs + removed unnecessary port * fix: remove unnecessary prepend * fix: add model names to stream cancellations (#447) * handle PR comments --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Martin Stefcek <35243812+Cifko@users.noreply.github.com> Co-authored-by: Chad Nehemiah * debug tracing for sending chunk for model * fix: update fiat (#471) * fix: update fiat * million * feat: use fiat when available (#474) * feat: use fiat when available * address comments * feat: split input output tokens (#477) * feat: split tokens into input/output * fix clippy * fix openapi.yml * address comments --------- Co-authored-by: Chad Nehemiah * fix: remove bad check (#480) --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Martin Stefcek <35243812+Cifko@users.noreply.github.com> Co-authored-by: Chad Nehemiah * feat: add storage retention configuration for Prometheus (#482) * increase max body size in middleware logic --------- Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Martin Stefcek <35243812+Cifko@users.noreply.github.com> Co-authored-by: Chad Nehemiah --- atoma-proxy/src/server/middleware.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/atoma-proxy/src/server/middleware.rs b/atoma-proxy/src/server/middleware.rs index 3f6007fe..6f364913 100644 --- a/atoma-proxy/src/server/middleware.rs +++ b/atoma-proxy/src/server/middleware.rs @@ -50,7 +50,7 @@ const DEFAULT_IMAGE_RESOLUTION: u64 = 1024 * 1024; /// Maximum size of the body in bytes. /// This is to prevent DoS attacks by limiting the size of the request body. -const MAX_BODY_SIZE: usize = 1024 * 1024; // 1MB +const MAX_BODY_SIZE: usize = 1024 * 1024 * 1024; // 1GB /// Metadata extension for tracking request-specific information about the selected inference node. /// From ba3ac8e413a8b0806835098a799b988e70fe92ca Mon Sep 17 00:00:00 2001 From: Jorge Antonio Date: Tue, 27 May 2025 08:49:03 +0100 Subject: [PATCH 50/61] squash commits and resolve issues with main --- Cargo.lock | 326 +++++++++++++++------------ Cargo.toml | 1 + atoma-auth/Cargo.toml | 1 + atoma-auth/src/config.rs | 97 ++++---- atoma-proxy-service/Cargo.toml | 1 + atoma-proxy-service/src/config.rs | 109 +++++++-- atoma-proxy/Cargo.toml | 1 + atoma-proxy/src/main.rs | 41 ++-- atoma-proxy/src/server/config.rs | 138 +++++++++++- atoma-proxy/src/server/middleware.rs | 2 +- atoma-state/Cargo.toml | 1 + atoma-state/src/config.rs | 97 ++++++-- atoma-state/src/config_error.rs | 25 ++ atoma-state/src/handlers.rs | 25 +- atoma-state/src/lib.rs | 1 + atoma-state/src/metrics.rs | 3 - atoma-state/src/state_manager.rs | 7 +- atoma-state/src/tests.rs | 10 +- config.example.toml | 4 +- 19 files changed, 608 insertions(+), 282 deletions(-) create mode 100644 atoma-state/src/config_error.rs diff --git a/Cargo.lock b/Cargo.lock index de7e4501..381926f6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -220,12 +220,12 @@ dependencies = [ [[package]] name = "anstyle-wincon" -version = "3.0.7" +version = "3.0.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca3534e77181a9cc07539ad51f2141fe32f6c3ffd4df76db8ad92346b003ae4e" +checksum = "6680de5231bd6ee4c6191b8a1325daa282b415391ec9d3a37bd34f2060dc73fa" dependencies = [ "anstyle", - "once_cell", + "once_cell_polyfill", "windows-sys 0.59.0", ] @@ -587,9 +587,9 @@ dependencies = [ [[package]] name = "async-io" -version = "2.4.0" +version = "2.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "43a2b323ccce0a1d90b449fd71f2a06ca7faa7c54c2751f06c9bd851fc061059" +checksum = "1237c0ae75a0f3765f58910ff9cdd0a12eeb39ab2f4c7de23262f337f0aacbb3" dependencies = [ "async-lock", "cfg-if", @@ -598,7 +598,7 @@ dependencies = [ "futures-lite", "parking", "polling", - "rustix 0.38.44", + "rustix", "slab", "tracing", "windows-sys 0.59.0", @@ -714,12 +714,13 @@ dependencies = [ "thiserror 2.0.12", "tokio", "tracing", + "validator", ] [[package]] name = "atoma-p2p" version = "0.1.0" -source = "git+https://github.com/atoma-network/atoma-node.git?branch=main#6b95d099f4c767ee9e6802b1d56c3633ad13b13c" +source = "git+https://github.com/atoma-network/atoma-node.git?branch=main#595baa9f82ff92df6cd56d05d24d9b2994a389e6" dependencies = [ "blake3", "bytes", @@ -732,6 +733,7 @@ dependencies = [ "isocountry", "libp2p", "opentelemetry", + "prometheus-http-query", "rand 0.8.5", "reqwest", "serde", @@ -795,6 +797,7 @@ dependencies = [ "utoipa", "utoipa-swagger-ui", "uuid", + "validator", ] [[package]] @@ -820,6 +823,7 @@ dependencies = [ "tower-http 0.6.4", "tracing", "tracing-subscriber", + "url", "utoipa", "utoipa-swagger-ui", ] @@ -857,12 +861,13 @@ dependencies = [ "url", "utoipa", "uuid", + "validator", ] [[package]] name = "atoma-sui" version = "0.1.0" -source = "git+https://github.com/atoma-network/atoma-node.git?branch=main#6b95d099f4c767ee9e6802b1d56c3633ad13b13c" +source = "git+https://github.com/atoma-network/atoma-node.git?branch=main#595baa9f82ff92df6cd56d05d24d9b2994a389e6" dependencies = [ "anyhow", "config", @@ -881,7 +886,7 @@ dependencies = [ [[package]] name = "atoma-utils" version = "0.1.0" -source = "git+https://github.com/atoma-network/atoma-node.git?branch=main#6b95d099f4c767ee9e6802b1d56c3633ad13b13c" +source = "git+https://github.com/atoma-network/atoma-node.git?branch=main#595baa9f82ff92df6cd56d05d24d9b2994a389e6" dependencies = [ "aes-gcm", "anyhow", @@ -1250,9 +1255,9 @@ checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" [[package]] name = "bitflags" -version = "2.9.0" +version = "2.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5c8214115b7bf84099f1309324e63141d4c5d7cc26862f97a0a857dbefe165bd" +checksum = "1b8e56985ec62d17e9c1001dc89c88ecd7dc08e47eba5ec7c29c7b5eeecde967" dependencies = [ "serde", ] @@ -1484,9 +1489,9 @@ dependencies = [ [[package]] name = "cc" -version = "1.2.22" +version = "1.2.24" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32db95edf998450acc7881c932f94cd9b05c87b4b2599e8bab064753da4acfd1" +checksum = "16595d3be041c03b09d08d0858631facccee9221e579704070e6e9e4915d3bc7" dependencies = [ "jobserver", "libc", @@ -1797,9 +1802,9 @@ dependencies = [ [[package]] name = "core-foundation" -version = "0.10.0" +version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b55271e5c8c478ad3f38ad24ef34923091e0548492a266d19b3c0b4d82574c63" +checksum = "b2a6cd9ae233e7f62ba4e9353e81a88df7fc8a5987b8d445b4d90c879bd156f6" dependencies = [ "core-foundation-sys", "libc", @@ -2614,9 +2619,9 @@ dependencies = [ [[package]] name = "errno" -version = "0.3.11" +version = "0.3.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "976dd42dc7e85965fe702eb8164f21f450704bdde31faefd6471dba214cb594e" +checksum = "cea14ef9355e3beab063703aa9dab15afd25f0667c341310c1e5274bb1d0da18" dependencies = [ "libc", "windows-sys 0.59.0", @@ -2644,9 +2649,9 @@ dependencies = [ [[package]] name = "ethnum" -version = "1.5.1" +version = "1.5.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0939f82868b77ef93ce3c3c3daf2b3c526b456741da5a1a4559e590965b6026b" +checksum = "ca81e6b4777c89fd810c25a4be2b1bd93ea034fbe58e6a75216a34c6b82c539b" [[package]] name = "event-listener" @@ -3146,15 +3151,16 @@ dependencies = [ [[package]] name = "generator" -version = "0.8.4" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc6bd114ceda131d3b1d665eba35788690ad37f5916457286b32ab6fd3c438dd" +checksum = "d18470a76cb7f8ff746cf1f7470914f900252ec36bbc40b569d74b1258446827" dependencies = [ + "cc", "cfg-if", "libc", "log", "rustversion", - "windows 0.58.0", + "windows 0.61.1", ] [[package]] @@ -3384,9 +3390,9 @@ checksum = "d231dfb89cfffdbc30e7fc41579ed6066ad03abda9e567ccafae602b97ec5024" [[package]] name = "hermit-abi" -version = "0.4.0" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fbf6a919d6cf397374f7dfeeea91d974c7c0a7221d0d0f4f20d859d329e53fcc" +checksum = "f154ce46856750ed433c8649605bf7ed2de3bc35fd9d2a9f30cddd873c80cb08" [[package]] name = "hex" @@ -3609,11 +3615,10 @@ dependencies = [ [[package]] name = "hyper-rustls" -version = "0.27.5" +version = "0.27.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d191583f3da1305256f22463b9bb0471acad48a4e534a5218b9963e9c1f59b2" +checksum = "03a01595e11bdcec50946522c32dde3fc6914743000a68b93000965f2f02406d" dependencies = [ - "futures-util", "http 1.3.1", "hyper", "hyper-util", @@ -3624,7 +3629,7 @@ dependencies = [ "tokio", "tokio-rustls", "tower-service", - "webpki-roots 0.26.11", + "webpki-roots 1.0.0", ] [[package]] @@ -3658,9 +3663,9 @@ dependencies = [ [[package]] name = "hyper-util" -version = "0.1.11" +version = "0.1.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "497bbc33a26fdd4af9ed9c70d63f61cf56a938375fbb32df34db9b1cd6d643f2" +checksum = "cf9f1e950e0d9d1d3c47184416723cf29c0d1f93bd8cccf37e4beb6b44f31710" dependencies = [ "bytes", "futures-channel", @@ -3688,7 +3693,7 @@ dependencies = [ "js-sys", "log", "wasm-bindgen", - "windows-core 0.58.0", + "windows-core 0.61.2", ] [[package]] @@ -3749,9 +3754,9 @@ checksum = "00210d6893afc98edb752b664b8890f0ef174c8adbb8d0be9710fa66fbbf72d3" [[package]] name = "icu_properties" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2549ca8c7241c82f59c80ba2a6f415d931c5b58d24fb8412caa1a1f02c49139a" +checksum = "016c619c1eeb94efb86809b015c58f479963de65bdb6253345c1a1276f22e32b" dependencies = [ "displaydoc", "icu_collections", @@ -3765,9 +3770,9 @@ dependencies = [ [[package]] name = "icu_properties_data" -version = "2.0.0" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8197e866e47b68f8f7d95249e172903bec06004b18b2937f1095d40a0c57de04" +checksum = "298459143998310acd25ffe6810ed544932242d3f07083eee1084d83a71bd632" [[package]] name = "icu_provider" @@ -4351,9 +4356,9 @@ checksum = "d750af042f7ef4f724306de029d18836c26c1765a54a6a3f094cbd23a7267ffa" [[package]] name = "libloading" -version = "0.8.7" +version = "0.8.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6a793df0d7afeac54f95b471d3af7f0d4fb975699f972341a4b76988d49cdf0c" +checksum = "07033963ba89ebaf1584d767badaa2e8fcec21aedea6b8c0346d487d49c28667" dependencies = [ "cfg-if", "windows-targets 0.53.0", @@ -4784,7 +4789,7 @@ dependencies = [ "thiserror 2.0.12", "tracing", "yamux 0.12.1", - "yamux 0.13.4", + "yamux 0.13.5", ] [[package]] @@ -4793,7 +4798,7 @@ version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c0ff37bd590ca25063e35af745c343cb7a0271906fb7b37e4813e8f79f00268d" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.1", "libc", ] @@ -4825,30 +4830,24 @@ checksum = "0717cef1bc8b636c6e1c1bbdefc09e6322da8a9321966e8928ef80d20f7f770f" [[package]] name = "linkme" -version = "0.3.32" +version = "0.3.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22d227772b5999ddc0690e733f734f95ca05387e329c4084fe65678c51198ffe" +checksum = "a1b1703c00b2a6a70738920544aa51652532cacddfec2e162d2e29eae01e665c" dependencies = [ "linkme-impl", ] [[package]] name = "linkme-impl" -version = "0.3.32" +version = "0.3.33" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "71a98813fa0073a317ed6a8055dcd4722a49d9b862af828ee68449adb799b6be" +checksum = "04d55ca5d5a14363da83bf3c33874b8feaa34653e760d5216d7ef9829c88001a" dependencies = [ "proc-macro2", "quote", "syn 2.0.101", ] -[[package]] -name = "linux-raw-sys" -version = "0.4.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d26c52dbd32dccf2d10cac7725f8eae5296885fb5703b261f7d0a0739ec807ab" - [[package]] name = "linux-raw-sys" version = "0.9.4" @@ -5032,13 +5031,13 @@ dependencies = [ [[package]] name = "mio" -version = "1.0.3" +version = "1.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2886843bf800fba2e3377cff24abf6379b4c4d5c6681eaf9ea5b0d15090450bd" +checksum = "78bed444cc8a2160f01cbcf811ef18cac863ad68ae8ca62092e8db51d51c761c" dependencies = [ "libc", "wasi 0.11.0+wasi-snapshot-preview1", - "windows-sys 0.52.0", + "windows-sys 0.59.0", ] [[package]] @@ -5947,7 +5946,7 @@ name = "nvml-wrapper" version = "0.10.0" source = "git+https://github.com/atoma-network/nvml-wrapper.git?branch=main#0d416436404473bc11795dacc1c0c5a995d9aa09" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.1", "libloading", "nvml-wrapper-sys", "static_assertions", @@ -6028,13 +6027,19 @@ version = "1.21.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "42f5e15c9953c5e4ccceeb2e7382a716482c34515315f7b03532b8b4e8393d2d" +[[package]] +name = "once_cell_polyfill" +version = "1.70.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" + [[package]] name = "onig" -version = "6.4.0" +version = "6.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c4b31c8722ad9171c6d77d3557db078cab2bd50afcc9d09c8b315c59df8ca4f" +checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0" dependencies = [ - "bitflags 1.3.2", + "bitflags 2.9.1", "libc", "once_cell", "onig_sys", @@ -6042,9 +6047,9 @@ dependencies = [ [[package]] name = "onig_sys" -version = "69.8.1" +version = "69.9.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b829e3d7e9cc74c7e315ee8edb185bf4190da5acde74afd7fc59c35b1f086e7" +checksum = "c7f86c6eef3d6df15f23bcfb6af487cbd2fed4e5581d58d5bf1f5f8b7f6727dc" dependencies = [ "cc", "pkg-config", @@ -6062,7 +6067,7 @@ version = "0.10.72" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fedfea7d58a1f73118430a55da6a286e7b044961736ce96a16a17068ea25e5da" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.1", "cfg-if", "foreign-types", "libc", @@ -6350,7 +6355,7 @@ version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "77144664f6aac5f629d7efa815f5098a054beeeca6ccafee5ec453fd2b0c53f9" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.1", "ciborium", "coset", "data-encoding", @@ -6618,15 +6623,15 @@ checksum = "7edddbd0b52d732b21ad9a5fab5c704c14cd949e5e9a1ec5929a24fded1b904c" [[package]] name = "polling" -version = "3.7.4" +version = "3.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a604568c3202727d1507653cb121dbd627a58684eb09a820fd746bee38b4442f" +checksum = "b53a684391ad002dd6a596ceb6c74fd004fdce75f4be2e3f615068abbea5fd50" dependencies = [ "cfg-if", "concurrent-queue", - "hermit-abi 0.4.0", + "hermit-abi 0.5.1", "pin-project-lite", - "rustix 0.38.44", + "rustix", "tracing", "windows-sys 0.59.0", ] @@ -6827,6 +6832,20 @@ dependencies = [ "protobuf", ] +[[package]] +name = "prometheus-http-query" +version = "0.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0fcebfa99f03ae51220778316b37d24981e36322c82c24848f48c5bd0f64cbdb" +dependencies = [ + "enum-as-inner", + "mime", + "reqwest", + "serde", + "time", + "url", +] + [[package]] name = "proptest" version = "1.6.0" @@ -6835,7 +6854,7 @@ checksum = "14cae93065090804185d3b75f0bf93b8eeda30c7a9b4a33d3bdb3988d6229e50" dependencies = [ "bit-set", "bit-vec", - "bitflags 2.9.0", + "bitflags 2.9.1", "lazy_static", "num-traits", "rand 0.8.5", @@ -7127,7 +7146,7 @@ version = "11.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c6df7ab838ed27997ba19a4664507e6f82b41fe6e20be42929332156e5e85146" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.1", ] [[package]] @@ -7191,7 +7210,7 @@ version = "0.5.12" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "928fca9cf2aa042393a8325b9ead81d2f0df4cb12e1e24cef072922ccd99c5af" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.1", ] [[package]] @@ -7410,7 +7429,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b91f7eff05f748767f183df4320a63d6936e9c6107d97c9e6bdd9784f4289c94" dependencies = [ "base64 0.21.7", - "bitflags 2.9.0", + "bitflags 2.9.1", "serde", "serde_derive", ] @@ -7482,9 +7501,9 @@ dependencies = [ [[package]] name = "rust-embed" -version = "8.7.1" +version = "8.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "60e425e204264b144d4c929d126d0de524b40a961686414bab5040f7465c71be" +checksum = "025908b8682a26ba8d12f6f2d66b987584a4a87bc024abc5bbc12553a8cd178a" dependencies = [ "rust-embed-impl", "rust-embed-utils", @@ -7493,9 +7512,9 @@ dependencies = [ [[package]] name = "rust-embed-impl" -version = "8.7.0" +version = "8.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bf418c9a2e3f6663ca38b8a7134cc2c2167c9d69688860e8961e3faa731702e" +checksum = "6065f1a4392b71819ec1ea1df1120673418bf386f50de1d6f54204d836d4349c" dependencies = [ "proc-macro2", "quote", @@ -7506,9 +7525,9 @@ dependencies = [ [[package]] name = "rust-embed-utils" -version = "8.7.0" +version = "8.7.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08d55b95147fe01265d06b3955db798bdaed52e60e2211c41137701b3aba8e21" +checksum = "f6cc0c81648b20b70c491ff8cce00c1c3b223bb8ed2b5d41f0e54c6c4c0a3594" dependencies = [ "sha2 0.10.9", "walkdir", @@ -7576,29 +7595,16 @@ dependencies = [ "nom", ] -[[package]] -name = "rustix" -version = "0.38.44" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" -dependencies = [ - "bitflags 2.9.0", - "errno", - "libc", - "linux-raw-sys 0.4.15", - "windows-sys 0.59.0", -] - [[package]] name = "rustix" version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c71e83d6afe7ff64890ec6b71d6a69bb8a610ab78ce364b3352876bb4c801266" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.1", "errno", "libc", - "linux-raw-sys 0.9.4", + "linux-raw-sys", "windows-sys 0.59.0", ] @@ -7654,7 +7660,7 @@ version = "0.5.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "19787cda76408ec5404443dc8b31795c87cd8fec49762dc75fa727740d34acc1" dependencies = [ - "core-foundation 0.10.0", + "core-foundation 0.10.1", "core-foundation-sys", "jni", "log", @@ -7698,9 +7704,9 @@ dependencies = [ [[package]] name = "rustversion" -version = "1.0.20" +version = "1.0.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "eded382c5f5f786b989652c49544c4877d9f015cc22e145a5ea8ea66c2921cd2" +checksum = "8a0d197bd2c9dc6e53b84da9556a69ba4cdfab8619eb41a8bd1cc2027a0f6b1d" [[package]] name = "rusty-fork" @@ -7854,7 +7860,7 @@ version = "2.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "897b2245f0b511c87893af39b033e5ca9cce68824c4d7e7630b5a1d339658d02" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.1", "core-foundation 0.9.4", "core-foundation-sys", "libc", @@ -7867,8 +7873,8 @@ version = "3.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271720403f46ca04f7ba6f55d438f8bd878d6b8ca0a1046e8228c4145bcbb316" dependencies = [ - "bitflags 2.9.0", - "core-foundation 0.10.0", + "bitflags 2.9.1", + "core-foundation 0.10.1", "core-foundation-sys", "libc", "security-framework-sys", @@ -8438,9 +8444,9 @@ dependencies = [ [[package]] name = "socket2" -version = "0.5.9" +version = "0.5.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f5fd57c80058a56cf5c777ab8a126398ece8e442983605d280a44ce79d0edef" +checksum = "e22376abed350d73dd1cd119b57ffccad95b4e585a7cda43e286245ce23c0678" dependencies = [ "libc", "windows-sys 0.52.0", @@ -8607,7 +8613,7 @@ checksum = "aa003f0038df784eb8fecbbac13affe3da23b45194bd57dba231c8f48199c526" dependencies = [ "atoi", "base64 0.22.1", - "bitflags 2.9.0", + "bitflags 2.9.1", "byteorder", "bytes", "chrono", @@ -8650,7 +8656,7 @@ checksum = "db58fcd5a53cf07c184b154801ff91347e4c30d17a3562a635ff028ad5deda46" dependencies = [ "atoi", "base64 0.22.1", - "bitflags 2.9.0", + "bitflags 2.9.1", "byteorder", "chrono", "crc", @@ -9360,7 +9366,7 @@ version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c879d448e9d986b661742763247d3693ed13609438cf3d006f51f5368a5ba6b" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.1", "core-foundation 0.9.4", "system-configuration-sys", ] @@ -9420,7 +9426,7 @@ dependencies = [ "fastrand", "getrandom 0.3.3", "once_cell", - "rustix 1.0.7", + "rustix", "windows-sys 0.59.0", ] @@ -9439,7 +9445,7 @@ version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "45c6481c4829e4cc63825e62c49186a34538b7b2750b73b266581ffb612fb5ed" dependencies = [ - "rustix 1.0.7", + "rustix", "windows-sys 0.59.0", ] @@ -9940,7 +9946,7 @@ checksum = "1e9cd434a998747dd2c4276bc96ee2e0c7a2eadf3cae88e52be55a05fa9053f5" dependencies = [ "async-compression", "base64 0.21.7", - "bitflags 2.9.0", + "bitflags 2.9.1", "bytes", "futures-core", "futures-util", @@ -9969,7 +9975,7 @@ version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fdb0c213ca27a9f57ab69ddb290fd80d970922355b83ae380b395d3986b8a2e" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.1", "bytes", "http 1.3.1", "pin-project-lite", @@ -10443,13 +10449,15 @@ dependencies = [ [[package]] name = "uuid" -version = "1.16.0" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "458f7a779bf54acc9f347480ac654f68407d3aab21269a6e3c9f922acd9e2da9" +checksum = "3cf4199d1e5d15ddd86a694e4d0dffa9c323ce759fea589f00fef9d81cc1931d" dependencies = [ "getrandom 0.3.3", + "js-sys", "rand 0.9.1", "serde", + "wasm-bindgen", ] [[package]] @@ -10784,12 +10792,24 @@ dependencies = [ [[package]] name = "windows" -version = "0.58.0" +version = "0.61.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dd04d41d93c4992d421894c18c8b43496aa748dd4c081bac0dc93eb0489272b6" +checksum = "c5ee8f3d025738cb02bad7868bbb5f8a6327501e870bf51f1b455b0a2454a419" dependencies = [ - "windows-core 0.58.0", - "windows-targets 0.52.6", + "windows-collections", + "windows-core 0.61.2", + "windows-future", + "windows-link", + "windows-numerics", +] + +[[package]] +name = "windows-collections" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8" +dependencies = [ + "windows-core 0.61.2", ] [[package]] @@ -10816,15 +10836,26 @@ dependencies = [ [[package]] name = "windows-core" -version = "0.58.0" +version = "0.61.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6ba6d44ec8c2591c134257ce647b7ea6b20335bf6379a27dac5f1641fcf59f99" +checksum = "c0fdd3ddb90610c7638aa2b3a3ab2904fb9e5cdbecc643ddb3647212781c4ae3" dependencies = [ - "windows-implement 0.58.0", - "windows-interface 0.58.0", - "windows-result 0.2.0", - "windows-strings 0.1.0", - "windows-targets 0.52.6", + "windows-implement 0.60.0", + "windows-interface 0.59.1", + "windows-link", + "windows-result 0.3.4", + "windows-strings 0.4.2", +] + +[[package]] +name = "windows-future" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" +dependencies = [ + "windows-core 0.61.2", + "windows-link", + "windows-threading", ] [[package]] @@ -10840,9 +10871,9 @@ dependencies = [ [[package]] name = "windows-implement" -version = "0.58.0" +version = "0.60.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bbd5b46c938e506ecbce286b6628a02171d56153ba733b6c741fc627ec9579b" +checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" dependencies = [ "proc-macro2", "quote", @@ -10862,9 +10893,9 @@ dependencies = [ [[package]] name = "windows-interface" -version = "0.58.0" +version = "0.59.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "053c4c462dc91d3b1504c6fe5a726dd15e216ba718e84a0e46a88fbe5ded3515" +checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" dependencies = [ "proc-macro2", "quote", @@ -10877,13 +10908,23 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "76840935b766e1b0a05c0066835fb9ec80071d4c09a16f6bd5f7e655e3c14c38" +[[package]] +name = "windows-numerics" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" +dependencies = [ + "windows-core 0.61.2", + "windows-link", +] + [[package]] name = "windows-registry" version = "0.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4286ad90ddb45071efd1a66dfa43eb02dd0dfbae1545ad6cc3c51cf34d7e8ba3" dependencies = [ - "windows-result 0.3.2", + "windows-result 0.3.4", "windows-strings 0.3.1", "windows-targets 0.53.0", ] @@ -10899,37 +10940,27 @@ dependencies = [ [[package]] name = "windows-result" -version = "0.2.0" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d1043d8214f791817bab27572aaa8af63732e11bf84aa21a45a78d6c317ae0e" -dependencies = [ - "windows-targets 0.52.6", -] - -[[package]] -name = "windows-result" -version = "0.3.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c64fd11a4fd95df68efcfee5f44a294fe71b8bc6a91993e2791938abcc712252" +checksum = "56f42bd332cc6c8eac5af113fc0c1fd6a8fd2aa08a0119358686e5160d0586c6" dependencies = [ "windows-link", ] [[package]] name = "windows-strings" -version = "0.1.0" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd9b125c486025df0eabcb585e62173c6c9eddcec5d117d3b6e8c30e2ee4d10" +checksum = "87fa48cc5d406560701792be122a10132491cff9d0aeb23583cc2dcafc847319" dependencies = [ - "windows-result 0.2.0", - "windows-targets 0.52.6", + "windows-link", ] [[package]] name = "windows-strings" -version = "0.3.1" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87fa48cc5d406560701792be122a10132491cff9d0aeb23583cc2dcafc847319" +checksum = "56e6c93f3a0c3b36176cb1327a4958a0353d5d166c2a35cb268ace15e91d3b57" dependencies = [ "windows-link", ] @@ -11032,6 +11063,15 @@ dependencies = [ "windows_x86_64_msvc 0.53.0", ] +[[package]] +name = "windows-threading" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b66463ad2e0ea3bbf808b7f1d371311c80e115c0b71d60efc142cafbcfb057a6" +dependencies = [ + "windows-link", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.42.2" @@ -11246,7 +11286,7 @@ version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" dependencies = [ - "bitflags 2.9.0", + "bitflags 2.9.1", ] [[package]] @@ -11381,16 +11421,16 @@ dependencies = [ [[package]] name = "yamux" -version = "0.13.4" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "17610762a1207ee816c6fadc29220904753648aba0a9ed61c7b8336e80a559c4" +checksum = "3da1acad1c2dc53f0dde419115a38bd8221d8c3e47ae9aeceaf453266d29307e" dependencies = [ "futures", "log", "nohash-hasher", "parking_lot", "pin-project", - "rand 0.8.5", + "rand 0.9.1", "static_assertions", "web-time", ] diff --git a/Cargo.toml b/Cargo.toml index 01d6cb12..8dfcf6b9 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -74,5 +74,6 @@ url = "2.5.4" utoipa = "5.3.1" utoipa-swagger-ui = "9.0.2" uuid = "1.15.1" +validator = { version = "0.20.0", features = [ "derive" ] } x25519-dalek = "2.0.1" zeroize = "1.8.1" diff --git a/atoma-auth/Cargo.toml b/atoma-auth/Cargo.toml index 5cf228e7..ce164035 100644 --- a/atoma-auth/Cargo.toml +++ b/atoma-auth/Cargo.toml @@ -34,6 +34,7 @@ sui-sdk-types = { workspace = true, features = [ "serde" ] } thiserror.workspace = true tokio.workspace = true tracing.workspace = true +validator.workspace = true [features] google-oauth = [ ] diff --git a/atoma-auth/src/config.rs b/atoma-auth/src/config.rs index ace0182d..7b12c820 100644 --- a/atoma-auth/src/config.rs +++ b/atoma-auth/src/config.rs @@ -1,23 +1,48 @@ -use config::Config; +use config::{Config, File}; use serde::{Deserialize, Serialize}; use std::path::Path; +use thiserror::Error; +use validator::Validate; -/// Configuration for Postgres database connection. -#[derive(Clone, Debug, Deserialize, Serialize)] +#[derive(Error, Debug)] +pub enum AuthConfigError { + #[error("Invalid auth configuration: {0}")] + InvalidConfig(String), + + #[error("Missing required field: {0}")] + MissingField(String), + + #[error("Configuration file error: {0}")] + FileError(#[from] config::ConfigError), + + #[error("Validation error: {0}")] + ValidationError(String), +} + +#[derive(Clone, Debug, Serialize, Deserialize, Validate)] pub struct AtomaAuthConfig { - /// The secret key for JWT authentication. - pub secret_key: String, - /// The access token lifetime in minutes. + /// Access token validity duration in minutes + #[validate(range(min = 1, message = "access token lifetime must be at least 1 minute"))] pub access_token_lifetime: usize, - /// The refresh token lifetime in days. - pub refresh_token_lifetime: usize, - /// Google client id. + + /// Google OAuth client ID (required only when google-oauth feature is enabled) + #[validate(length( + min = 1, + message = "google client ID cannot be empty when google-oauth is enabled" + ))] #[cfg(feature = "google-oauth")] pub google_client_id: String, + + /// Refresh token validity duration in days + #[validate(range(min = 1, message = "refresh token lifetime must be at least 1 day"))] + pub refresh_token_lifetime: usize, + + /// JWT signing key for token generation + #[validate(length(min = 1, message = "secret key cannot be empty"))] + pub secret_key: String, } impl AtomaAuthConfig { - /// Constructor #[must_use] pub const fn new( secret_key: String, @@ -26,54 +51,46 @@ impl AtomaAuthConfig { #[cfg(feature = "google-oauth")] google_client_id: String, ) -> Self { Self { - secret_key, access_token_lifetime, - refresh_token_lifetime, #[cfg(feature = "google-oauth")] google_client_id, + refresh_token_lifetime, + secret_key, } } - /// Creates a new `AtomaAuthConfig` instance from a configuration file. + /// Validates the auth configuration /// - /// # Arguments + /// # Errors /// - /// * `config_file_path` - A path-like object representing the location of the configuration file. + /// Returns `AuthConfigError::ValidationError` if any validation rules fail + pub fn validate(&self) -> Result<(), AuthConfigError> { + Validate::validate(self).map_err(|e| AuthConfigError::ValidationError(e.to_string())) + } + + /// Loads configuration from a file path /// - /// # Returns + /// # Errors /// - /// Returns a new `AtomaAuthConfig` instance populated with values from the configuration file. + /// Returns `AuthConfigError` if the configuration file cannot be read or parsed /// /// # Panics /// - /// This method will panic if: - /// - The configuration file cannot be read or parsed. - /// - The "atoma-auth" section is missing from the configuration file. - /// - The required fields are missing or have invalid types in the configuration file. - /// - /// # Examples - /// - /// ```rust,ignore - /// use std::path::Path; - /// use atoma_node::atoma_state::AtomaAuthConfig; - /// - /// let config = AtomaAuthConfig::from_file_path("path/to/config.toml"); - /// ``` - pub fn from_file_path>(config_file_path: P) -> Self { + /// Panics if the path cannot be converted to a string + pub fn from_file_path>(config_file_path: P) -> Result { let builder = Config::builder() - .add_source(config::File::with_name( - config_file_path.as_ref().to_str().unwrap(), - )) + .add_source(File::with_name(config_file_path.as_ref().to_str().unwrap())) .add_source( config::Environment::with_prefix("ATOMA_AUTH") .keep_prefix(true) .separator("__"), ); - let config = builder - .build() - .expect("Failed to generate atoma state configuration file"); - config - .get::("atoma_auth") - .expect("Failed to generate configuration instance") + let config = builder.build()?; + let config = config.get::("atoma_auth")?; + + // Validate the configuration + config.validate()?; + + Ok(config) } } diff --git a/atoma-proxy-service/Cargo.toml b/atoma-proxy-service/Cargo.toml index b935b4c6..47e9b26d 100644 --- a/atoma-proxy-service/Cargo.toml +++ b/atoma-proxy-service/Cargo.toml @@ -24,6 +24,7 @@ tokio = { workspace = true, features = [ "full" ] } tower-http = { workspace = true, features = [ "cors" ] } tracing.workspace = true tracing-subscriber.workspace = true +url = "2.5.0" utoipa = { workspace = true, features = [ "axum_extras" ] } utoipa-swagger-ui = { workspace = true, features = [ "axum" ] } diff --git a/atoma-proxy-service/src/config.rs b/atoma-proxy-service/src/config.rs index 98988800..4dfa823f 100644 --- a/atoma-proxy-service/src/config.rs +++ b/atoma-proxy-service/src/config.rs @@ -1,6 +1,24 @@ use config::{Config, File}; use serde::Deserialize; use std::path::Path; +use thiserror::Error; +use url::Url; + +#[derive(Error, Debug)] +pub enum ProxyServiceConfigError { + #[error("Invalid service bind address: {0}")] + InvalidBindAddress(String), + + #[error("Invalid Grafana URL: {0}")] + InvalidGrafanaUrl(String), + + #[error("Missing required field: {0}")] + MissingField(String), + + #[error("Configuration file error: {0}")] + FileError(#[from] config::ConfigError), +} + /// Configuration for the Atoma proxy service /// /// This struct holds the configuration parameters needed to run the Atoma Proxy Service, @@ -23,14 +41,69 @@ pub struct AtomaProxyServiceConfig { /// Only dashboards tagged with this tag will be proxied as stats pub grafana_stats_tag: String, - /// Sentry DSN for error reporting - pub sentry_dsn: Option, - - /// Environment - pub environment: Option, + /// Enable Grafana dashboard and stats proxy + pub enable_grafana_proxy: Option, } impl AtomaProxyServiceConfig { + /// Validates the proxy service configuration + /// + /// # Returns + /// + /// Returns `Ok(())` if the configuration is valid, or a `ProxyServiceConfigError` if there are any validation errors. + /// + /// # Errors + /// + /// Returns a `ProxyServiceConfigError` if: + /// * The service bind address is empty + /// * The Grafana URL is empty or invalid + /// * The Grafana API token is empty + /// * The Grafana dashboard tag is empty + /// * The Grafana stats tag is empty + pub fn validate(&self) -> Result<(), ProxyServiceConfigError> { + // Validate service bind address + if self.service_bind_address.is_empty() { + return Err(ProxyServiceConfigError::MissingField( + "service_bind_address".to_string(), + )); + } + + // Validate Grafana URL + if self.grafana_url.is_empty() { + return Err(ProxyServiceConfigError::MissingField( + "grafana_url".to_string(), + )); + } + if Url::parse(&self.grafana_url).is_err() { + return Err(ProxyServiceConfigError::InvalidGrafanaUrl( + self.grafana_url.clone(), + )); + } + + // Validate Grafana API token + if self.grafana_api_token.is_empty() { + return Err(ProxyServiceConfigError::MissingField( + "grafana_api_token".to_string(), + )); + } + + // Validate Grafana dashboard tag + if self.grafana_dashboard_tag.is_empty() { + return Err(ProxyServiceConfigError::MissingField( + "grafana_dashboard_tag".to_string(), + )); + } + + // Validate Grafana stats tag + if self.grafana_stats_tag.is_empty() { + return Err(ProxyServiceConfigError::MissingField( + "grafana_stats_tag".to_string(), + )); + } + + Ok(()) + } + /// Creates a new AtomaProxyServiceConfig instance from a configuration file /// /// # Arguments @@ -44,13 +117,20 @@ impl AtomaProxyServiceConfig { /// /// Returns a new `AtomaProxyServiceConfig` instance populated with values from the config file. /// - /// # Panics + /// # Errors /// - /// This method will panic if: + /// Returns a `ProxyServiceConfigError` if: /// * The configuration file cannot be read or parsed /// * The "atoma-proxy-service" section is missing from the configuration /// * The configuration format doesn't match the expected structure - pub fn from_file_path>(config_file_path: P) -> Self { + /// * The configuration fails validation + /// + /// # Panics + /// + /// Panics if the path cannot be converted to a string. + pub fn from_file_path>( + config_file_path: P, + ) -> Result { let builder = Config::builder() .add_source(File::with_name(config_file_path.as_ref().to_str().unwrap())) .add_source( @@ -58,11 +138,12 @@ impl AtomaProxyServiceConfig { .keep_prefix(true) .separator("__"), ); - let config = builder - .build() - .expect("Failed to generate atoma-proxy-service configuration file"); - config - .get::("atoma_proxy_service") - .expect("Failed to generate configuration instance") + let config = builder.build()?; + let config = config.get::("atoma_proxy_service")?; + + // Validate the configuration + config.validate()?; + + Ok(config) } } diff --git a/atoma-proxy/Cargo.toml b/atoma-proxy/Cargo.toml index 679cf731..b09d6f4d 100644 --- a/atoma-proxy/Cargo.toml +++ b/atoma-proxy/Cargo.toml @@ -56,6 +56,7 @@ url = { workspace = true } utoipa = { workspace = true, features = [ "axum_extras", "preserve_path_order" ] } utoipa-swagger-ui = { workspace = true, features = [ "axum" ] } uuid = { workspace = true } +validator = { workspace = true } [features] google-oauth = [ "atoma-auth/google-oauth", "atoma-proxy-service/google-oauth" ] diff --git a/atoma-proxy/src/main.rs b/atoma-proxy/src/main.rs index 7e359324..84c226f1 100644 --- a/atoma-proxy/src/main.rs +++ b/atoma-proxy/src/main.rs @@ -24,7 +24,8 @@ mod server; mod telemetry; /// Command line arguments for the Atoma node -#[derive(Parser)] +#[derive(Parser, Debug)] +#[command(author, version, about, long_about = None)] struct Args { /// Path to the configuration file #[arg(short, long)] @@ -36,36 +37,40 @@ struct Args { /// This struct holds the configuration settings for various components /// of the Atoma proxy, including the Sui, service, and state manager configurations. #[derive(Debug)] -struct Config { +pub struct Config { /// Configuration for the Sui component. - sui: AtomaSuiConfig, + pub sui: AtomaSuiConfig, /// Configuration for the service component. - service: AtomaServiceConfig, + pub service: AtomaServiceConfig, /// Configuration for the state manager component. - state: AtomaStateManagerConfig, + pub state: AtomaStateManagerConfig, /// Configuration for the proxy service component. - proxy_service: AtomaProxyServiceConfig, + pub proxy_service: AtomaProxyServiceConfig, /// Configuration for the authentication component. - auth: AtomaAuthConfig, + pub auth: AtomaAuthConfig, /// Configuration for the P2P component. - p2p: AtomaP2pNodeConfig, + pub p2p: AtomaP2pNodeConfig, } impl Config { - fn load(path: String) -> Self { - Self { + fn load(path: String) -> Result { + Ok(Self { sui: AtomaSuiConfig::from_file_path(path.clone()), - service: AtomaServiceConfig::from_file_path(path.clone()), - state: AtomaStateManagerConfig::from_file_path(path.clone()), - proxy_service: AtomaProxyServiceConfig::from_file_path(path.clone()), - auth: AtomaAuthConfig::from_file_path(path.clone()), + service: AtomaServiceConfig::from_file_path(path.clone()) + .context("failed to load service configuration")?, + state: AtomaStateManagerConfig::from_file_path(path.clone()) + .context("failed to load state manager configuration")?, + proxy_service: AtomaProxyServiceConfig::from_file_path(path.clone()) + .context("Failed to load proxy configuration")?, + auth: AtomaAuthConfig::from_file_path(path.clone()) + .context("Failed to load auth configuration")?, p2p: AtomaP2pNodeConfig::from_file_path(path), - } + }) } } @@ -81,11 +86,11 @@ async fn main() -> Result<()> { let args = Args::parse(); tracing::info!("Loading configuration from: {}", args.config_path); - let config = Config::load(args.config_path); + let config = Config::load(args.config_path).context("Failed to load configuration")?; tracing::info!("Configuration loaded successfully"); // Initialize Sentry only if DSN is provided - let _guard = config.proxy_service.sentry_dsn.map_or_else( + let _guard = config.service.sentry_dsn.clone().map_or_else( || { info!("No Sentry DSN provided, skipping Sentry initialization"); None @@ -101,7 +106,7 @@ async fn main() -> Result<()> { send_default_pii: false, environment: Some(std::borrow::Cow::Owned( config - .proxy_service + .service .environment .clone() .unwrap_or_else(|| "development".to_string()), diff --git a/atoma-proxy/src/server/config.rs b/atoma-proxy/src/server/config.rs index d7e68c7a..73a4d75a 100644 --- a/atoma-proxy/src/server/config.rs +++ b/atoma-proxy/src/server/config.rs @@ -2,52 +2,164 @@ use std::path::Path; use atoma_proxy_service::ModelModality; use serde::Deserialize; +use thiserror::Error; +use validator::{Validate, ValidationError}; use config::{Config, File}; +#[derive(Error, Debug)] +pub enum ServiceConfigError { + #[error("Invalid service bind address: {0}")] + InvalidBindAddress(String), + + #[error("Invalid heartbeat URL: {0}")] + InvalidHeartbeatUrl(String), + + #[error("Invalid model configuration: {0}")] + InvalidModelConfig(String), + + #[error("Missing required field: {0}")] + MissingField(String), + + #[error("Configuration file error: {0}")] + FileError(#[from] config::ConfigError), + + #[error("Validation error: {0}")] + ValidationError(String), +} + /// Configuration for the Atoma Service. /// /// This struct holds the configuration options for the Atoma Service, /// including URLs for various services and a list of models. -#[derive(Debug, Deserialize)] +#[derive(Debug, Deserialize, Validate)] pub struct AtomaServiceConfig { /// Bind address for the Atoma Proxy Server. /// /// This field specifies the address and port on which the Atoma Proxy Server will bind. + #[validate(custom(function = "validate_bind_address"))] pub service_bind_address: String, /// List of model names. /// /// This field contains a list of model names that are deployed by the Atoma Service, /// on behalf of the node. + #[validate(length(min = 1, message = "at least one model must be specified"))] pub models: Vec, /// List of model revisions. /// /// This field contains a list of the associated model revisions, for each /// model that is currently supported by the Atoma Service. + #[validate(length(min = 1, message = "at least one revision must be specified"))] pub revisions: Vec, /// List of model modalities. /// /// This field contains a list of the associated model modalities, for each /// model that is currently supported by the Atoma Service. + #[validate(length(min = 1, message = "at least one modality must be specified"))] + #[validate(custom(function = "validate_modalities"))] pub modalities: Vec>, /// Hugging face api token. /// /// This field contains the Hugging Face API token that is used to authenticate /// requests to the Hugging Face API. + #[validate(length(min = 1, message = "HF token cannot be empty"))] pub hf_token: String, /// Path to open router json. + #[validate(length(min = 1, message = "open router models file path cannot be empty"))] pub open_router_models_file: String, /// Heartbeat URL. + #[validate(url(message = "heartbeat_url must be a valid URL"))] pub heartbeat_url: String, + + /// Sentry DSN for error reporting + pub sentry_dsn: Option, + + /// Environment + pub environment: Option, +} +/// Validates the bind address format. +/// +/// This function checks if the provided bind address is valid by ensuring: +/// - The address is not empty +/// - The address follows the format "host:port" +/// - The port is a valid number between 0 and 65535 +/// +/// # Arguments +/// +/// * `addr` - The bind address string to validate +/// +/// # Returns +/// +/// Returns `Ok(())` if the address is valid, or a `ValidationError` if: +/// - The address is empty +/// - The address format is invalid (not host:port) +/// - The port is not a valid number +fn validate_bind_address(addr: &str) -> Result<(), ValidationError> { + if addr.is_empty() { + return Err(ValidationError::new("empty_bind_address")); + } + + // Basic format validation for bind address (host:port) + let parts: Vec<&str> = addr.split(':').collect(); + if parts.len() != 2 { + return Err(ValidationError::new("invalid_bind_address_format")); + } + + // Validate port is a number + if parts[1].parse::().is_err() { + return Err(ValidationError::new("invalid_port_number")); + } + + Ok(()) +} + +/// Validates the modalities configuration. +/// +/// This function checks if the provided modalities configuration is valid by ensuring: +/// - Each model has at least one modality specified +/// +/// # Arguments +/// +/// * `modalities` - A slice of vectors containing model modalities to validate +/// +/// # Returns +/// +/// Returns `Ok(())` if the modalities configuration is valid, or a `ValidationError` if: +/// - Any model has an empty modalities list +fn validate_modalities(modalities: &[Vec]) -> Result<(), ValidationError> { + for model_modalities in modalities { + if model_modalities.is_empty() { + return Err(ValidationError::new("empty_modalities")); + } + } + Ok(()) } impl AtomaServiceConfig { + /// Validates the service configuration + /// + /// # Returns + /// + /// Returns `Ok(())` if the configuration is valid, or a `ServiceConfigError` if there are any validation errors. + /// + /// # Errors + /// + /// Returns a `ServiceConfigError` if: + /// - The service bind address is invalid + /// - The heartbeat URL is invalid + /// - The models configuration is invalid + /// - The HF token is empty + /// - The open router models file path is empty + pub fn validate(&self) -> Result<(), ServiceConfigError> { + Validate::validate(self).map_err(|e| ServiceConfigError::ValidationError(e.to_string())) + } + /// Creates a new `AtomaServiceConfig` instance from a configuration file. /// /// # Arguments @@ -60,13 +172,18 @@ impl AtomaServiceConfig { /// /// Returns a new `AtomaServiceConfig` instance populated with values from the config file. /// - /// # Panics + /// # Errors /// - /// This method will panic if: + /// Returns a `ServiceConfigError` if: /// * The configuration file cannot be read or parsed /// * The "atoma-service" section is missing from the configuration /// * The configuration format doesn't match the expected structure - pub fn from_file_path>(config_file_path: P) -> Self { + /// * The configuration fails validation + /// + /// # Panics + /// + /// This method will panic if the path cannot be converted to a string. + pub fn from_file_path>(config_file_path: P) -> Result { let builder = Config::builder() .add_source(File::with_name(config_file_path.as_ref().to_str().unwrap())) .add_source( @@ -74,11 +191,12 @@ impl AtomaServiceConfig { .keep_prefix(true) .separator("__"), ); - let config = builder - .build() - .expect("Failed to generate atoma-service configuration file"); - config - .get::("atoma_service") - .expect("Failed to generate configuration instance") + let config = builder.build()?; + let config = config.get::("atoma_service")?; + + // Validate the configuration + config.validate()?; + + Ok(config) } } diff --git a/atoma-proxy/src/server/middleware.rs b/atoma-proxy/src/server/middleware.rs index 3f6007fe..6f364913 100644 --- a/atoma-proxy/src/server/middleware.rs +++ b/atoma-proxy/src/server/middleware.rs @@ -50,7 +50,7 @@ const DEFAULT_IMAGE_RESOLUTION: u64 = 1024 * 1024; /// Maximum size of the body in bytes. /// This is to prevent DoS attacks by limiting the size of the request body. -const MAX_BODY_SIZE: usize = 1024 * 1024; // 1MB +const MAX_BODY_SIZE: usize = 1024 * 1024 * 1024; // 1GB /// Metadata extension for tracking request-specific information about the selected inference node. /// diff --git a/atoma-state/Cargo.toml b/atoma-state/Cargo.toml index cbf4fa0d..cc143b51 100644 --- a/atoma-state/Cargo.toml +++ b/atoma-state/Cargo.toml @@ -31,6 +31,7 @@ topology = { workspace = true } tracing = { workspace = true } url = { workspace = true } utoipa = { workspace = true } +validator = { workspace = true } [dev-dependencies] futures = { workspace = true } diff --git a/atoma-state/src/config.rs b/atoma-state/src/config.rs index a2dd6d14..8ba29c02 100644 --- a/atoma-state/src/config.rs +++ b/atoma-state/src/config.rs @@ -1,32 +1,73 @@ use config::Config; use serde::{Deserialize, Serialize}; use std::path::Path; +use url::Url; +use validator::{Validate, ValidationError}; +use crate::config_error::ConfigError; use crate::types::Modalities; /// Configuration for the Atoma State Manager instance. -#[derive(Clone, Debug, Deserialize, Serialize)] +#[derive(Clone, Debug, Deserialize, Serialize, Validate)] pub struct AtomaStateManagerConfig { /// The URL of the Postgres database. + #[validate(custom(function = "validate_postgres_url"))] pub database_url: String, /// The configuration for metrics collection. + #[validate(nested)] pub metrics_collection: MetricsCollectionConfig, } /// Configuration for metrics collection. -#[derive(Debug, Clone, Deserialize, Serialize)] +#[derive(Debug, Clone, Deserialize, Serialize, Validate)] pub struct MetricsCollectionConfig { /// The URL endpoint where metrics can be collected from. + #[validate(url(message = "metrics_url must be a valid URL"))] pub metrics_url: String, /// A vector of tuples containing modality types and their corresponding model identifiers. + #[validate(length(min = 1, message = "at least one model must be specified"))] + #[validate(custom(function = "validate_models"))] pub models: Vec<(Modalities, String)>, /// Optional parameter to limit the number of best nodes returned. + #[validate(range(min = 1, message = "top_k must be greater than 0"))] pub top_k: Option, } +fn validate_postgres_url(url: &str) -> Result<(), ValidationError> { + if url.is_empty() { + return Err(ValidationError::new("empty_database_url")); + } + + // Basic PostgreSQL URL format validation + if !url.starts_with("postgresql://") && !url.starts_with("postgres://") { + return Err(ValidationError::new("invalid_postgres_url_format")); + } + + // Try to parse as URL + if Url::parse(url).is_err() { + return Err(ValidationError::new("invalid_url_format")); + } + + Ok(()) +} + +fn validate_models(models: &[(Modalities, String)]) -> Result<(), ValidationError> { + for (modality, model_id) in models { + if model_id.is_empty() { + return Err(ValidationError::new("empty_model_id")); + } + match modality { + Modalities::ChatCompletions + | Modalities::Embeddings + | Modalities::ImagesGenerations => (), + } + } + Ok(()) +} + impl AtomaStateManagerConfig { /// Constructor #[must_use] @@ -37,6 +78,23 @@ impl AtomaStateManagerConfig { } } + /// Validates the configuration + /// + /// # Returns + /// + /// Returns `Ok(())` if the configuration is valid, or a `ConfigError` if there are any validation errors. + /// + /// # Errors + /// + /// Returns a `ConfigError` if: + /// - The database URL is invalid or empty + /// - The metrics URL is invalid or empty + /// - The models configuration is invalid + /// - The top_k value is invalid + pub fn validate(&self) -> Result<(), ConfigError> { + Validate::validate(self).map_err(|e| ConfigError::ValidationError(e.to_string())) + } + /// Creates a new `AtomaStateManagerConfig` instance from a configuration file. /// /// # Arguments @@ -47,22 +105,18 @@ impl AtomaStateManagerConfig { /// /// Returns a new `AtomaStateManagerConfig` instance populated with values from the configuration file. /// - /// # Panics - /// - /// This method will panic if: - /// - The configuration file cannot be read or parsed. - /// - The "atoma-state" section is missing from the configuration file. - /// - The required fields are missing or have invalid types in the configuration file. + /// # Errors /// - /// # Examples + /// Returns a `ConfigError` if: + /// - The configuration file cannot be read or parsed + /// - The "atoma-state" section is missing from the configuration file + /// - The required fields are missing or have invalid types + /// - The configuration fails validation /// - /// ```rust,ignore - /// use std::path::Path; - /// use atoma_node::atoma_state::AtomaStateManagerConfig; + /// # Panics /// - /// let config = AtomaStateManagerConfig::from_file_path("path/to/config.toml"); - /// ``` - pub fn from_file_path>(config_file_path: P) -> Self { + /// This method will panic if the path cannot be converted to a string. + pub fn from_file_path>(config_file_path: P) -> Result { let builder = Config::builder() .add_source(config::File::with_name( config_file_path.as_ref().to_str().unwrap(), @@ -72,11 +126,12 @@ impl AtomaStateManagerConfig { .keep_prefix(true) .separator("__"), ); - let config = builder - .build() - .expect("Failed to generate atoma state configuration file"); - config - .get::("atoma_state") - .expect("Failed to generate configuration instance") + let config = builder.build()?; + let config = config.get::("atoma_state")?; + + // Validate the configuration + config.validate()?; + + Ok(config) } } diff --git a/atoma-state/src/config_error.rs b/atoma-state/src/config_error.rs new file mode 100644 index 00000000..dc84793b --- /dev/null +++ b/atoma-state/src/config_error.rs @@ -0,0 +1,25 @@ +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum ConfigError { + #[error("Invalid metrics URL: {0}")] + InvalidMetricsUrl(String), + + #[error("Invalid database URL: {0}")] + InvalidDatabaseUrl(String), + + #[error("Invalid model configuration: {0}")] + InvalidModelConfig(String), + + #[error("Invalid top_k value: {0}")] + InvalidTopK(String), + + #[error("Missing required field: {0}")] + MissingField(String), + + #[error("Configuration file error: {0}")] + FileError(#[from] config::ConfigError), + + #[error("Validation error: {0}")] + ValidationError(String), +} diff --git a/atoma-state/src/handlers.rs b/atoma-state/src/handlers.rs index 3b9ddee1..ba488ff6 100644 --- a/atoma-state/src/handlers.rs +++ b/atoma-state/src/handlers.rs @@ -14,7 +14,6 @@ use tracing::{error, info, instrument, trace}; use crate::{ state_manager::Result, - timestamp_to_datetime_or_now, types::{AtomaAtomaStateManagerEvent, Stack, StackSettlementTicket}, AtomaStateManager, AtomaStateManagerError, }; @@ -101,13 +100,8 @@ pub async fn handle_atoma_event( AtomaEvent::NodeUnsubscribedFromTaskEvent(event) => { handle_node_task_unsubscription_event(state_manager, event).await } - AtomaEvent::StackCreatedEvent((event, timestamp)) => { - handle_create_stack_stats( - state_manager, - event, - timestamp_to_datetime_or_now(timestamp), - ) - .await?; + AtomaEvent::StackCreatedEvent(event) => { + handle_create_stack_stats(state_manager, event).await?; Ok(()) } AtomaEvent::StackCreateAndUpdateEvent(event) => { @@ -115,13 +109,8 @@ pub async fn handle_atoma_event( info!("Stack creates and update event: {:?}", event); Ok(()) } - AtomaEvent::StackTrySettleEvent((event, timestamp)) => { - handle_stack_try_settle_event( - state_manager, - event, - timestamp_to_datetime_or_now(timestamp), - ) - .await + AtomaEvent::StackTrySettleEvent(event) => { + handle_stack_try_settle_event(state_manager, event).await } AtomaEvent::ClaimedStackEvent(event) => { handle_claimed_stack_event(state_manager, event).await @@ -613,12 +602,11 @@ pub async fn handle_stack_created_event( pub async fn handle_create_stack_stats( state_manager: &AtomaStateManager, event: StackCreatedEvent, - timestamp: DateTime, ) -> Result<()> { let stack = event.into(); state_manager .state - .new_stats_stack(stack, timestamp) + .new_stats_stack(stack, Utc::now()) .await?; Ok(()) } @@ -652,7 +640,6 @@ pub async fn handle_create_stack_stats( pub async fn handle_stack_try_settle_event( state_manager: &AtomaStateManager, event: StackTrySettleEvent, - timestamp: DateTime, ) -> Result<()> { trace!( target = "atoma-state-handlers", @@ -662,7 +649,7 @@ pub async fn handle_stack_try_settle_event( let stack_settlement_ticket = StackSettlementTicket::try_from(event)?; state_manager .state - .insert_new_stack_settlement_ticket(stack_settlement_ticket, timestamp) + .insert_new_stack_settlement_ticket(stack_settlement_ticket, Utc::now()) .await?; Ok(()) } diff --git a/atoma-state/src/lib.rs b/atoma-state/src/lib.rs index a919d749..c97ec12f 100644 --- a/atoma-state/src/lib.rs +++ b/atoma-state/src/lib.rs @@ -4,6 +4,7 @@ #![allow(clippy::doc_markdown)] pub mod config; +pub mod config_error; pub mod errors; pub mod handlers; pub mod metrics; diff --git a/atoma-state/src/metrics.rs b/atoma-state/src/metrics.rs index 997dbe40..ac35d520 100644 --- a/atoma-state/src/metrics.rs +++ b/atoma-state/src/metrics.rs @@ -691,9 +691,6 @@ impl NodeMetricsCollector { self.chat_completions_gpu_kv_cache_usage .with_label_values(&[model, node_small_id.to_string().as_str()]) .set(chat_completions.gpu_kv_cache_usage_perc); - self.chat_completions_cpu_kv_cache_usage - .with_label_values(&[model, node_small_id.to_string().as_str()]) - .set(chat_completions.cpu_kv_cache_usage_perc); self.chat_completions_ttft .with_label_values(&[model, node_small_id.to_string().as_str()]) .set(chat_completions.time_to_first_token); diff --git a/atoma-state/src/state_manager.rs b/atoma-state/src/state_manager.rs index ea6961bd..50e490f9 100644 --- a/atoma-state/src/state_manager.rs +++ b/atoma-state/src/state_manager.rs @@ -4073,18 +4073,13 @@ impl AtomaState { /// ``` #[instrument(level = "trace", skip(self))] pub async fn new_stats_stack(&self, stack: Stack, timestamp: DateTime) -> Result<()> { - let timestamp = timestamp - .with_second(0) - .and_then(|t| t.with_minute(0)) - .and_then(|t| t.with_nanosecond(0)) - .ok_or(AtomaStateManagerError::InvalidTimestamp)?; sqlx::query( "INSERT into stats_stacks (timestamp,num_compute_units) VALUES ($1,$2) ON CONFLICT (timestamp) DO UPDATE SET num_compute_units = stats_stacks.num_compute_units + EXCLUDED.num_compute_units", ) - .bind(timestamp) + .bind(Utc::now()) .bind(stack.num_compute_units) .execute(&self.db) .await?; diff --git a/atoma-state/src/tests.rs b/atoma-state/src/tests.rs index 3c0040b0..1df93763 100644 --- a/atoma-state/src/tests.rs +++ b/atoma-state/src/tests.rs @@ -1242,8 +1242,8 @@ fn test_store_chat_completions_metrics() { // Create test data let chat_completions = ChatCompletionsMetrics { + avg_request_queue_latency: Some(0.0), gpu_kv_cache_usage_perc: 75.5, - cpu_kv_cache_usage_perc: 45.2, time_to_first_token: 0.15, time_per_output_token: 0.05, num_running_requests: 3, @@ -1310,8 +1310,8 @@ fn test_store_chat_completions_metrics_multiple_models() { // Test data for first model let chat_completions_1 = ChatCompletionsMetrics { + avg_request_queue_latency: Some(0.0), gpu_kv_cache_usage_perc: 75.5, - cpu_kv_cache_usage_perc: 45.2, time_to_first_token: 0.15, time_per_output_token: 0.05, num_running_requests: 3, @@ -1322,8 +1322,8 @@ fn test_store_chat_completions_metrics_multiple_models() { // Test data for second model let chat_completions_2 = ChatCompletionsMetrics { + avg_request_queue_latency: Some(0.0), gpu_kv_cache_usage_perc: 60.0, - cpu_kv_cache_usage_perc: 30.0, time_to_first_token: 0.1, time_per_output_token: 0.03, num_running_requests: 1, @@ -1827,8 +1827,8 @@ fn test_reset_metrics() { // Store some test metrics let chat_completions = ChatCompletionsMetrics { + avg_request_queue_latency: Some(0.0), gpu_kv_cache_usage_perc: 75.5, - cpu_kv_cache_usage_perc: 45.2, time_to_first_token: 0.15, time_per_output_token: 0.05, num_running_requests: 3, @@ -2009,8 +2009,8 @@ fn test_store_metrics() { model_metrics.insert( "gpt-4".to_string(), ModelMetrics::ChatCompletions(ChatCompletionsMetrics { + avg_request_queue_latency: Some(0.0), gpu_kv_cache_usage_perc: 75.5, - cpu_kv_cache_usage_perc: 45.2, time_to_first_token: 0.15, time_per_output_token: 0.05, num_running_requests: 3, diff --git a/config.example.toml b/config.example.toml index aff3b32e..7eb4b132 100644 --- a/config.example.toml +++ b/config.example.toml @@ -25,6 +25,7 @@ models = [ top_k = 10 # Number of top performing nodes to return in rankings [atoma_service] +environment = "development" # or "production" based on your deployment used in sentry heartbeat_url = "my-heartbeat-url" # Heartbeat URL for the service hf_token = "" # Hugging Face API token (required for gated/private models) modalities = [ @@ -41,15 +42,14 @@ models = [ open_router_models_file = "/app/open_router.json" # Path to the Open Router JSON file for model configuration password = "password" # Authentication password for the service API revisions = [ "main", "main" ] # Model revision/version tags (must match models array length) +sentry_dsn = "sentry_dsn" # Sentry DSN for error reporting service_bind_address = "0.0.0.0:8080" # HTTP service binding address and port (must match docker-compose.yml) [atoma_proxy_service] -environment = "development" # or "production" based on your deployment used in sentry grafana_api_token = "" # Grafana API token (read-only permissions required) grafana_dashboard_tag = "" # Tag to filter which Grafana dashboards to expose as graphs grafana_stats_tag = "" # Tag to filter which Grafana dashboards to expose as stats grafana_url = "" # Grafana instance URL for metrics visualization -sentry_dsn = "sentry_dsn" # Sentry DSN for error reporting service_bind_address = "0.0.0.0:8081" # Proxy service binding address and port (must match docker-compose.yml) From 4fbd7d7026c3f0fdd34d04600a884697728c82bb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jorge=20Ant=C3=B3nio?= Date: Tue, 27 May 2025 15:33:59 +0100 Subject: [PATCH 51/61] feat: increase max pgpool connections (#486) * squash commits and sync with develop * update cargo lock * resolve failing tests * resolve failing tests * resolve failing tests --- Cargo.lock | 1 - atoma-state/Cargo.toml | 2 +- atoma-state/src/metrics.rs | 37 ++++++++++++++++++-------------- atoma-state/src/state_manager.rs | 9 +++++++- atoma-state/src/tests.rs | 18 ++++++++-------- 5 files changed, 39 insertions(+), 28 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 381926f6..d128285d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4808,7 +4808,6 @@ version = "0.30.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2e99fb7a497b1e3339bc746195567ed8d3e24945ecd636e3619d20b9de9e9149" dependencies = [ - "cc", "pkg-config", "vcpkg", ] diff --git a/atoma-state/Cargo.toml b/atoma-state/Cargo.toml index cc143b51..9b96ca0d 100644 --- a/atoma-state/Cargo.toml +++ b/atoma-state/Cargo.toml @@ -24,7 +24,7 @@ remote-attestation-verifier = { workspace = true } reqwest = { workspace = true } serde = { workspace = true, features = [ "derive" ] } serde_json = { workspace = true } -sqlx = { workspace = true, features = [ "chrono", "runtime-tokio-native-tls", "sqlite" ] } +sqlx = { workspace = true, features = [ "chrono", "postgres", "runtime-tokio-native-tls" ] } thiserror = { workspace = true } tokio = { workspace = true, features = [ "full" ] } topology = { workspace = true } diff --git a/atoma-state/src/metrics.rs b/atoma-state/src/metrics.rs index ac35d520..c610f3c9 100644 --- a/atoma-state/src/metrics.rs +++ b/atoma-state/src/metrics.rs @@ -318,12 +318,12 @@ pub struct NodeMetricsCollector { #[allow(dead_code)] registry: Registry, + /// Average waiting queue duration for chat completions + avg_waiting_queue_duration: GaugeVec, + /// GPU KV cache usage percentage for chat completions chat_completions_gpu_kv_cache_usage: GaugeVec, - /// CPU KV cache usage percentage for chat completions - chat_completions_cpu_kv_cache_usage: GaugeVec, - /// Time to first token for chat completions chat_completions_ttft: GaugeVec, @@ -369,8 +369,8 @@ impl NodeMetricsCollector { let registry = Registry::new(); let ( + avg_waiting_queue_duration, chat_completions_gpu_kv_cache_usage, - chat_completions_cpu_kv_cache_usage, chat_completions_ttft, chat_completions_tpot, chat_completions_num_running_requests, @@ -388,8 +388,8 @@ impl NodeMetricsCollector { Ok(Self { registry, + avg_waiting_queue_duration, chat_completions_gpu_kv_cache_usage, - chat_completions_cpu_kv_cache_usage, chat_completions_ttft, chat_completions_tpot, chat_completions_num_running_requests, @@ -691,6 +691,9 @@ impl NodeMetricsCollector { self.chat_completions_gpu_kv_cache_usage .with_label_values(&[model, node_small_id.to_string().as_str()]) .set(chat_completions.gpu_kv_cache_usage_perc); + self.avg_waiting_queue_duration + .with_label_values(&[model, node_small_id.to_string().as_str()]) + .set(chat_completions.avg_request_queue_latency.unwrap_or(0.0)); self.chat_completions_ttft .with_label_values(&[model, node_small_id.to_string().as_str()]) .set(chat_completions.time_to_first_token); @@ -825,13 +828,15 @@ impl NodeMetricsCollector { let gpu_kv_cache_usage = GaugeVec::new(gpu_kv_cache_usage_opts, &[MODEL_LABEL, NODE_SMALL_ID_LABEL]) .expect("Failed to create gauge"); - let cpu_kv_cache_usage_opts = Opts::new( - "chat_cpu_kv_cache_usage_perc", - "CPU KV cache usage percentage for chat completions", + let avg_waiting_queue_duration_opts = Opts::new( + "avg_waiting_queue_duration", + "Average waiting queue duration for chat completions", ); - let cpu_kv_cache_usage = - GaugeVec::new(cpu_kv_cache_usage_opts, &[MODEL_LABEL, NODE_SMALL_ID_LABEL]) - .expect("Failed to create gauge"); + let avg_waiting_queue_duration = GaugeVec::new( + avg_waiting_queue_duration_opts, + &[MODEL_LABEL, NODE_SMALL_ID_LABEL], + ) + .expect("Failed to create gauge"); let ttft_opts = Opts::new( "chat_time_to_first_token", "Time to first token for chat completions", @@ -864,7 +869,7 @@ impl NodeMetricsCollector { .expect("Failed to create gauge"); registry.register(Box::new(gpu_kv_cache_usage.clone()))?; - registry.register(Box::new(cpu_kv_cache_usage.clone()))?; + registry.register(Box::new(avg_waiting_queue_duration.clone()))?; registry.register(Box::new(ttft.clone()))?; registry.register(Box::new(tpot.clone()))?; registry.register(Box::new(num_running_requests.clone()))?; @@ -872,7 +877,7 @@ impl NodeMetricsCollector { Ok(( gpu_kv_cache_usage, - cpu_kv_cache_usage, + avg_waiting_queue_duration, ttft, tpot, num_running_requests, @@ -1079,7 +1084,7 @@ impl NodeMetricsCollector { /// Resets all the metrics in the Prometheus registry. pub fn reset_metrics(&self) { self.chat_completions_gpu_kv_cache_usage.reset(); - self.chat_completions_cpu_kv_cache_usage.reset(); + self.avg_waiting_queue_duration.reset(); self.chat_completions_ttft.reset(); self.chat_completions_tpot.reset(); self.chat_completions_num_running_requests.reset(); @@ -1101,8 +1106,8 @@ impl NodeMetricsCollector { #[cfg(test)] #[must_use] - pub const fn get_chat_completions_cpu_kv_cache_usage(&self) -> &GaugeVec { - &self.chat_completions_cpu_kv_cache_usage + pub const fn get_avg_waiting_queue_duration(&self) -> &GaugeVec { + &self.avg_waiting_queue_duration } #[cfg(test)] diff --git a/atoma-state/src/state_manager.rs b/atoma-state/src/state_manager.rs index 50e490f9..39d013f6 100644 --- a/atoma-state/src/state_manager.rs +++ b/atoma-state/src/state_manager.rs @@ -14,12 +14,16 @@ use atoma_p2p::AtomaP2pEvent; use atoma_sui::events::AtomaEvent; use chrono::{DateTime, Timelike, Utc}; use flume::{Receiver as FlumeReceiver, Sender as FlumeSender}; +use sqlx::postgres::PgPoolOptions; use sqlx::PgPool; use sqlx::{FromRow, Row}; use tokio::sync::oneshot; use tokio::sync::watch::Receiver; use tracing::instrument; +/// The maximum number of connections to the Postgres database. +const MAX_NUMBER_POOL_CONNECTIONS: u32 = 256; + pub type Result = std::result::Result; type AtomaP2pData = (AtomaP2pEvent, Option>); @@ -93,7 +97,10 @@ impl AtomaStateManager { p2p_event_receiver: FlumeReceiver, sui_address: String, ) -> Result { - let db = PgPool::connect(database_url).await?; + let db = PgPoolOptions::new() + .max_connections(MAX_NUMBER_POOL_CONNECTIONS) + .connect(database_url) + .await?; // run migrations sqlx::migrate!("./src/migrations").run(&db).await?; Ok(Self { diff --git a/atoma-state/src/tests.rs b/atoma-state/src/tests.rs index 1df93763..3580e3ea 100644 --- a/atoma-state/src/tests.rs +++ b/atoma-state/src/tests.rs @@ -1242,7 +1242,7 @@ fn test_store_chat_completions_metrics() { // Create test data let chat_completions = ChatCompletionsMetrics { - avg_request_queue_latency: Some(0.0), + avg_request_queue_latency: Some(45.2), gpu_kv_cache_usage_perc: 75.5, time_to_first_token: 0.15, time_per_output_token: 0.05, @@ -1267,7 +1267,7 @@ fn test_store_chat_completions_metrics() { ); assert_eq!( collector - .get_chat_completions_cpu_kv_cache_usage() + .get_avg_waiting_queue_duration() .with_label_values(&labels) .get(), 45.2_f64 @@ -1310,7 +1310,7 @@ fn test_store_chat_completions_metrics_multiple_models() { // Test data for first model let chat_completions_1 = ChatCompletionsMetrics { - avg_request_queue_latency: Some(0.0), + avg_request_queue_latency: Some(45.2), gpu_kv_cache_usage_perc: 75.5, time_to_first_token: 0.15, time_per_output_token: 0.05, @@ -1322,7 +1322,7 @@ fn test_store_chat_completions_metrics_multiple_models() { // Test data for second model let chat_completions_2 = ChatCompletionsMetrics { - avg_request_queue_latency: Some(0.0), + avg_request_queue_latency: Some(30.0), gpu_kv_cache_usage_perc: 60.0, time_to_first_token: 0.1, time_per_output_token: 0.03, @@ -1347,7 +1347,7 @@ fn test_store_chat_completions_metrics_multiple_models() { ); assert_eq!( collector - .get_chat_completions_cpu_kv_cache_usage() + .get_avg_waiting_queue_duration() .with_label_values(&labels_1) .get(), 45.2_f64 @@ -1392,7 +1392,7 @@ fn test_store_chat_completions_metrics_multiple_models() { ); assert_eq!( collector - .get_chat_completions_cpu_kv_cache_usage() + .get_avg_waiting_queue_duration() .with_label_values(&labels_2) .get(), 30.0_f64 @@ -1911,7 +1911,7 @@ fn test_reset_metrics() { ); assert_eq!( collector - .get_chat_completions_cpu_kv_cache_usage() + .get_avg_waiting_queue_duration() .with_label_values(&chat_labels) .get(), 0.0 @@ -2061,10 +2061,10 @@ fn test_store_metrics() { ); assert_eq!( collector - .get_chat_completions_cpu_kv_cache_usage() + .get_avg_waiting_queue_duration() .with_label_values(&chat_labels) .get(), - 45.2 + 0.0 ); assert_eq!( collector From 34a75e5fc3e2e0154c305f79f570c13c8477c360 Mon Sep 17 00:00:00 2001 From: Chad Nehemiah Date: Thu, 29 May 2025 04:08:15 -0500 Subject: [PATCH 52/61] fix: correctly track client errors (#493) * fix: correctly track 400 errors * fix: update to correctly use status code * fix: track errors more granularly * chore: remove unnecessary clones * chore: some more clippy clean up --- .../src/server/handlers/chat_completions.rs | 46 ++++-- .../src/server/handlers/completions.rs | 90 +++++++++--- atoma-proxy/src/server/handlers/embeddings.rs | 75 ++++++++-- .../src/server/handlers/image_generations.rs | 81 +++++++--- atoma-proxy/src/server/handlers/metrics.rs | 138 ++++++++++++++++++ 5 files changed, 372 insertions(+), 58 deletions(-) diff --git a/atoma-proxy/src/server/handlers/chat_completions.rs b/atoma-proxy/src/server/handlers/chat_completions.rs index 2609cb06..376c684f 100644 --- a/atoma-proxy/src/server/handlers/chat_completions.rs +++ b/atoma-proxy/src/server/handlers/chat_completions.rs @@ -34,6 +34,7 @@ use openai_api::{ }; use openai_api::{CreateChatCompletionRequest, CreateChatCompletionStreamRequest}; use opentelemetry::KeyValue; +use reqwest::StatusCode; use serde::Deserialize; use serde_json::Value; use sqlx::types::chrono::{DateTime, Utc}; @@ -46,9 +47,10 @@ use super::metrics::{ CHAT_COMPLETIONS_INPUT_TOKENS, CHAT_COMPLETIONS_INPUT_TOKENS_PER_USER, CHAT_COMPLETIONS_LATENCY_METRICS, CHAT_COMPLETIONS_NUM_REQUESTS, CHAT_COMPLETIONS_TOTAL_TOKENS, CHAT_COMPLETIONS_TOTAL_TOKENS_PER_USER, CHAT_COMPLETION_REQUESTS_PER_USER, - INTENTIONALLY_CANCELLED_CHAT_COMPLETION_STREAMING_REQUESTS, TOTAL_COMPLETED_REQUESTS, - TOTAL_FAILED_CHAT_REQUESTS, TOTAL_FAILED_REQUESTS, - UNSUCCESSFUL_CHAT_COMPLETION_REQUESTS_PER_USER, + INTENTIONALLY_CANCELLED_CHAT_COMPLETION_STREAMING_REQUESTS, TOTAL_BAD_REQUESTS, + TOTAL_COMPLETED_REQUESTS, TOTAL_FAILED_CHAT_REQUESTS, TOTAL_FAILED_REQUESTS, + TOTAL_LOCKED_REQUESTS, TOTAL_TOO_EARLY_REQUESTS, TOTAL_TOO_MANY_REQUESTS, + TOTAL_UNAUTHORIZED_REQUESTS, UNSUCCESSFUL_CHAT_COMPLETION_REQUESTS_PER_USER, }; use super::request_model::{ComputeUnitsEstimate, RequestModel}; use super::{ @@ -78,6 +80,12 @@ pub const CHAT_COMPLETIONS_PATH: &str = "/v1/chat/completions"; /// The messages field in the request payload. const MESSAGES: &str = "messages"; +/// The model key +const MODEL_KEY: &str = "model"; + +/// The user id key +const USER_ID_KEY: &str = "user_id"; + #[derive(OpenApi)] #[openapi( paths(chat_completions_create, chat_completions_create_stream), @@ -175,12 +183,32 @@ pub async fn chat_completions_create( Ok(response) } Err(e) => { - TOTAL_FAILED_CHAT_REQUESTS - .add(1, &[KeyValue::new("model", metadata.model_name.clone())]); - TOTAL_FAILED_REQUESTS - .add(1, &[KeyValue::new("model", metadata.model_name.clone())]); - UNSUCCESSFUL_CHAT_COMPLETION_REQUESTS_PER_USER - .add(1, &[KeyValue::new("user_id", metadata.user_id)]); + let model = metadata.model_name.clone(); + match e.status_code() { + StatusCode::TOO_MANY_REQUESTS => { + TOTAL_TOO_MANY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::BAD_REQUEST => { + TOTAL_BAD_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::LOCKED => { + TOTAL_LOCKED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::TOO_EARLY => { + TOTAL_TOO_EARLY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::UNAUTHORIZED => { + TOTAL_UNAUTHORIZED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + _ => { + TOTAL_FAILED_CHAT_REQUESTS + .add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + + UNSUCCESSFUL_CHAT_COMPLETION_REQUESTS_PER_USER + .add(1, &[KeyValue::new(USER_ID_KEY, metadata.user_id)]); + } + } if let Some(stack_small_id) = metadata.selected_stack_small_id { update_state_manager( &state.state_manager_sender, diff --git a/atoma-proxy/src/server/handlers/completions.rs b/atoma-proxy/src/server/handlers/completions.rs index 9f28644e..9f2d4078 100644 --- a/atoma-proxy/src/server/handlers/completions.rs +++ b/atoma-proxy/src/server/handlers/completions.rs @@ -23,6 +23,7 @@ use openai_api_completions::{ Usage, }; use opentelemetry::KeyValue; +use reqwest::StatusCode; use serde::Deserialize; use serde_json::Value; use sqlx::types::chrono::{DateTime, Utc}; @@ -31,11 +32,14 @@ use utoipa::OpenApi; use super::metrics::{ CHAT_COMPLETIONS_COMPLETIONS_TOKENS, CHAT_COMPLETIONS_COMPLETIONS_TOKENS_PER_USER, - CHAT_COMPLETIONS_INPUT_TOKENS, CHAT_COMPLETIONS_INPUT_TOKENS_PER_USER, - CHAT_COMPLETIONS_LATENCY_METRICS, CHAT_COMPLETIONS_NUM_REQUESTS, CHAT_COMPLETIONS_TOTAL_TOKENS, + CHAT_COMPLETIONS_CONFIDENTIAL_NUM_REQUESTS, CHAT_COMPLETIONS_INPUT_TOKENS, + CHAT_COMPLETIONS_INPUT_TOKENS_PER_USER, CHAT_COMPLETIONS_LATENCY_METRICS, + CHAT_COMPLETIONS_NUM_REQUESTS, CHAT_COMPLETIONS_TOTAL_TOKENS, CHAT_COMPLETIONS_TOTAL_TOKENS_PER_USER, CHAT_COMPLETION_REQUESTS_PER_USER, - INTENTIONALLY_CANCELLED_CHAT_COMPLETION_STREAMING_REQUESTS, TOTAL_COMPLETED_REQUESTS, - TOTAL_FAILED_CHAT_REQUESTS, TOTAL_FAILED_REQUESTS, + INTENTIONALLY_CANCELLED_CHAT_COMPLETION_STREAMING_REQUESTS, TOTAL_BAD_REQUESTS, + TOTAL_COMPLETED_REQUESTS, TOTAL_FAILED_CHAT_CONFIDENTIAL_REQUESTS, TOTAL_FAILED_CHAT_REQUESTS, + TOTAL_FAILED_REQUESTS, TOTAL_LOCKED_REQUESTS, TOTAL_TOO_EARLY_REQUESTS, + TOTAL_TOO_MANY_REQUESTS, TOTAL_UNAUTHORIZED_REQUESTS, UNSUCCESSFUL_CHAT_COMPLETION_REQUESTS_PER_USER, }; use super::request_model::{ComputeUnitsEstimate, RequestModel}; @@ -57,6 +61,12 @@ pub const CONFIDENTIAL_COMPLETIONS_PATH: &str = "/v1/confidential/completions"; /// The key for the prompt in the request. const PROMPT: &str = "prompt"; +/// The model key +const MODEL_KEY: &str = "model"; + +/// The user id key +const USER_ID_KEY: &str = "user_id"; + /// The OpenAPI schema for the completions endpoint. #[derive(OpenApi)] #[openapi( @@ -133,12 +143,33 @@ pub async fn completions_create( Ok(response) } Err(e) => { - TOTAL_FAILED_CHAT_REQUESTS - .add(1, &[KeyValue::new("model", metadata.model_name.clone())]); - TOTAL_FAILED_REQUESTS - .add(1, &[KeyValue::new("model", metadata.model_name.clone())]); - UNSUCCESSFUL_CHAT_COMPLETION_REQUESTS_PER_USER - .add(1, &[KeyValue::new("user_id", metadata.user_id)]); + let model = metadata.model_name.clone(); + match e.status_code() { + StatusCode::TOO_MANY_REQUESTS => { + TOTAL_TOO_MANY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::BAD_REQUEST => { + TOTAL_BAD_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::LOCKED => { + TOTAL_LOCKED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::TOO_EARLY => { + TOTAL_TOO_EARLY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::UNAUTHORIZED => { + TOTAL_UNAUTHORIZED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + _ => { + TOTAL_FAILED_CHAT_REQUESTS + .add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + + UNSUCCESSFUL_CHAT_COMPLETION_REQUESTS_PER_USER + .add(1, &[KeyValue::new(USER_ID_KEY, metadata.user_id)]); + } + } + if let Some(stack_small_id) = metadata.selected_stack_small_id { update_state_manager( &state.state_manager_sender, @@ -381,19 +412,38 @@ pub async fn confidential_completions_create( Ok(response) => { if !is_streaming { // The streaming metric is recorded in the streamer (final chunk) - TOTAL_COMPLETED_REQUESTS.add(1, &[KeyValue::new("model", metadata.model_name)]); + CHAT_COMPLETIONS_CONFIDENTIAL_NUM_REQUESTS + .add(1, &[KeyValue::new(MODEL_KEY, metadata.model_name)]); } Ok(response) } Err(e) => { - let model_label: String = metadata.model_name.clone(); - TOTAL_FAILED_CHAT_REQUESTS.add(1, &[KeyValue::new("model", model_label.clone())]); - - // Record the failed request in the total failed requests metric - TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new("model", model_label)]); - UNSUCCESSFUL_CHAT_COMPLETION_REQUESTS_PER_USER - .add(1, &[KeyValue::new("user_id", metadata.user_id)]); + let model = metadata.model_name.clone(); + match e.status_code() { + StatusCode::TOO_MANY_REQUESTS => { + TOTAL_TOO_MANY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::BAD_REQUEST => { + TOTAL_BAD_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::LOCKED => { + TOTAL_LOCKED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::TOO_EARLY => { + TOTAL_TOO_EARLY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::UNAUTHORIZED => { + TOTAL_UNAUTHORIZED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + _ => { + TOTAL_FAILED_CHAT_CONFIDENTIAL_REQUESTS + .add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + UNSUCCESSFUL_CHAT_COMPLETION_REQUESTS_PER_USER + .add(1, &[KeyValue::new(USER_ID_KEY, metadata.user_id)]); + } + } if let Some(stack_small_id) = metadata.selected_stack_small_id { update_state_manager( &state.state_manager_sender, @@ -670,7 +720,7 @@ async fn handle_non_streaming_response( /// * `node_address` - The address of the node /// * `user_id` - The user id /// * `headers` - The headers of the request -/// * `payload` - The payload of the request +/// * `payload` - The payload of the request /// * `num_input_tokens` - The number of input tokens /// * `estimated_output_tokens` - The estimated output tokens /// * `price_per_million` - The price per million @@ -687,7 +737,7 @@ async fn handle_non_streaming_response( /// * `serde_json::Error` - If the request fails /// * `flume::Error` - If the request fails /// * `tokio::Error` - If the request fails -/// +/// #[instrument( level = "info", skip_all, diff --git a/atoma-proxy/src/server/handlers/embeddings.rs b/atoma-proxy/src/server/handlers/embeddings.rs index fbc1db86..2e2eba48 100644 --- a/atoma-proxy/src/server/handlers/embeddings.rs +++ b/atoma-proxy/src/server/handlers/embeddings.rs @@ -9,6 +9,7 @@ use axum::{ Extension, Json, }; use opentelemetry::KeyValue; +use reqwest::StatusCode; use serde::{Deserialize, Serialize}; use serde_json::Value; use sqlx::types::chrono::{DateTime, Utc}; @@ -28,8 +29,10 @@ use super::{ handle_status_code_error, metrics::{ EMBEDDING_TOTAL_TOKENS_PER_USER, SUCCESSFUL_TEXT_EMBEDDING_REQUESTS_PER_USER, - TEXT_EMBEDDINGS_LATENCY_METRICS, TEXT_EMBEDDINGS_NUM_REQUESTS, TOTAL_COMPLETED_REQUESTS, - TOTAL_FAILED_REQUESTS, TOTAL_FAILED_TEXT_EMBEDDING_REQUESTS, + TEXT_EMBEDDINGS_LATENCY_METRICS, TEXT_EMBEDDINGS_NUM_REQUESTS, TOTAL_BAD_REQUESTS, + TOTAL_COMPLETED_REQUESTS, TOTAL_FAILED_CONFIDENTIAL_EMBEDDING_REQUESTS, + TOTAL_FAILED_REQUESTS, TOTAL_FAILED_TEXT_EMBEDDING_REQUESTS, TOTAL_LOCKED_REQUESTS, + TOTAL_TOO_EARLY_REQUESTS, TOTAL_TOO_MANY_REQUESTS, TOTAL_UNAUTHORIZED_REQUESTS, UNSUCCESSFUL_TEXT_EMBEDDING_REQUESTS_PER_USER, }, request_model::{ComputeUnitsEstimate, RequestModel}, @@ -52,6 +55,12 @@ pub const EMBEDDINGS_PATH: &str = "/v1/embeddings"; /// The input field in the request payload. const INPUT: &str = "input"; +/// The model key +const MODEL_KEY: &str = "model"; + +/// The user id key +const USER_ID_KEY: &str = "user_id"; + // A model representing an embeddings request payload. /// /// This struct encapsulates the necessary fields for processing an embeddings request @@ -224,11 +233,32 @@ pub async fn embeddings_create( Ok(Json(response).into_response()) } Err(e) => { - let model_label: String = metadata.model_name.clone(); - TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new("model", model_label.clone())]); - TOTAL_FAILED_TEXT_EMBEDDING_REQUESTS.add(1, &[KeyValue::new("model", model_label)]); - UNSUCCESSFUL_TEXT_EMBEDDING_REQUESTS_PER_USER - .add(1, &[KeyValue::new("user_id", metadata.user_id)]); + let model = metadata.model_name.clone(); + match e.status_code() { + StatusCode::TOO_MANY_REQUESTS => { + TOTAL_TOO_MANY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::BAD_REQUEST => { + TOTAL_BAD_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::LOCKED => { + TOTAL_LOCKED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::TOO_EARLY => { + TOTAL_TOO_EARLY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::UNAUTHORIZED => { + TOTAL_UNAUTHORIZED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + _ => { + TOTAL_FAILED_TEXT_EMBEDDING_REQUESTS + .add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + + UNSUCCESSFUL_TEXT_EMBEDDING_REQUESTS_PER_USER + .add(1, &[KeyValue::new(USER_ID_KEY, metadata.user_id)]); + } + } match metadata.selected_stack_small_id { Some(stack_small_id) => { update_state_manager( @@ -374,11 +404,32 @@ pub async fn confidential_embeddings_create( Ok(Json(response).into_response()) } Err(e) => { - let model_label: String = metadata.model_name.clone(); - TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new("model", model_label.clone())]); - TOTAL_FAILED_TEXT_EMBEDDING_REQUESTS.add(1, &[KeyValue::new("model", model_label)]); - UNSUCCESSFUL_TEXT_EMBEDDING_REQUESTS_PER_USER - .add(1, &[KeyValue::new("user_id", metadata.user_id)]); + let model = metadata.model_name.clone(); + match e.status_code() { + StatusCode::TOO_MANY_REQUESTS => { + TOTAL_TOO_MANY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::BAD_REQUEST => { + TOTAL_BAD_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::LOCKED => { + TOTAL_LOCKED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::TOO_EARLY => { + TOTAL_TOO_EARLY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::UNAUTHORIZED => { + TOTAL_UNAUTHORIZED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + _ => { + TOTAL_FAILED_CONFIDENTIAL_EMBEDDING_REQUESTS + .add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + + UNSUCCESSFUL_TEXT_EMBEDDING_REQUESTS_PER_USER + .add(1, &[KeyValue::new(USER_ID_KEY, metadata.user_id)]); + } + } match metadata.selected_stack_small_id { Some(stack_small_id) => { diff --git a/atoma-proxy/src/server/handlers/image_generations.rs b/atoma-proxy/src/server/handlers/image_generations.rs index 31dbbb80..78061d74 100644 --- a/atoma-proxy/src/server/handlers/image_generations.rs +++ b/atoma-proxy/src/server/handlers/image_generations.rs @@ -6,6 +6,7 @@ use axum::response::{IntoResponse, Response}; use axum::Extension; use axum::{extract::State, http::HeaderMap, Json}; use opentelemetry::KeyValue; +use reqwest::StatusCode; use serde::{Deserialize, Serialize}; use serde_json::Value; use sqlx::types::chrono::{DateTime, Utc}; @@ -19,8 +20,10 @@ use crate::server::{http_server::ProxyState, middleware::RequestMetadataExtensio use super::metrics::{ IMAGE_GENERATION_TOTAL_TOKENS_PER_USER, IMAGE_GEN_LATENCY_METRICS, IMAGE_GEN_NUM_REQUESTS, - SUCCESSFUL_IMAGE_GENERATION_REQUESTS_PER_USER, TOTAL_COMPLETED_REQUESTS, - TOTAL_FAILED_IMAGE_GENERATION_REQUESTS, TOTAL_FAILED_REQUESTS, + SUCCESSFUL_IMAGE_GENERATION_REQUESTS_PER_USER, TOTAL_BAD_REQUESTS, TOTAL_COMPLETED_REQUESTS, + TOTAL_FAILED_CONFIDENTIAL_IMAGE_GENERATION_REQUESTS, TOTAL_FAILED_IMAGE_GENERATION_REQUESTS, + TOTAL_FAILED_REQUESTS, TOTAL_LOCKED_REQUESTS, TOTAL_TOO_EARLY_REQUESTS, + TOTAL_TOO_MANY_REQUESTS, TOTAL_UNAUTHORIZED_REQUESTS, UNSUCCESSFUL_IMAGE_GENERATION_REQUESTS_PER_USER, }; use super::request_model::ComputeUnitsEstimate; @@ -47,6 +50,12 @@ const N: &str = "n"; /// The size field in the request payload. const SIZE: &str = "size"; +/// The model key +const MODEL_KEY: &str = "model"; + +/// The user id key +const USER_ID_KEY: &str = "user_id"; + /// A model representing the parameters for an image generation request. /// /// This struct encapsulates the required parameters for generating images through @@ -217,12 +226,32 @@ pub async fn image_generations_create( } Err(e) => { // Record the failed request in the image generations num requests metric - let model_label: String = metadata.model_name.clone(); - TOTAL_FAILED_IMAGE_GENERATION_REQUESTS - .add(1, &[KeyValue::new("model", model_label.clone())]); + let model: String = metadata.model_name.clone(); + match e.status_code() { + StatusCode::TOO_MANY_REQUESTS => { + TOTAL_TOO_MANY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::BAD_REQUEST => { + TOTAL_BAD_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::LOCKED => { + TOTAL_LOCKED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::TOO_EARLY => { + TOTAL_TOO_EARLY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::UNAUTHORIZED => { + TOTAL_UNAUTHORIZED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + _ => { + TOTAL_FAILED_IMAGE_GENERATION_REQUESTS + .add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); - // Record the failed request in the total failed requests metric - TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new("model", model_label)]); + UNSUCCESSFUL_IMAGE_GENERATION_REQUESTS_PER_USER + .add(1, &[KeyValue::new(USER_ID_KEY, metadata.user_id)]); + } + } match metadata.selected_stack_small_id { Some(stack_small_id) => { @@ -327,20 +356,38 @@ pub async fn confidential_image_generations_create( Ok(response) => { // NOTE: At this point, we do not need to update the stack num compute units, // because the image generation response was correctly generated by a TEE node. - TOTAL_COMPLETED_REQUESTS.add(1, &[KeyValue::new("model", metadata.model_name)]); + TOTAL_COMPLETED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, metadata.model_name)]); SUCCESSFUL_IMAGE_GENERATION_REQUESTS_PER_USER - .add(1, &[KeyValue::new("user_id", metadata.user_id)]); + .add(1, &[KeyValue::new(USER_ID_KEY, metadata.user_id)]); Ok(response.into_response()) } Err(e) => { - let model_label: String = metadata.model_name.clone(); - TOTAL_FAILED_IMAGE_GENERATION_REQUESTS - .add(1, &[KeyValue::new("model", model_label.clone())]); - - // Record the failed request in the total failed requests metric - TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new("model", model_label)]); - UNSUCCESSFUL_IMAGE_GENERATION_REQUESTS_PER_USER - .add(1, &[KeyValue::new("user_id", metadata.user_id)]); + let model: String = metadata.model_name.clone(); + match e.status_code() { + StatusCode::TOO_MANY_REQUESTS => { + TOTAL_TOO_MANY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::BAD_REQUEST => { + TOTAL_BAD_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::LOCKED => { + TOTAL_LOCKED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::TOO_EARLY => { + TOTAL_TOO_EARLY_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + StatusCode::UNAUTHORIZED => { + TOTAL_UNAUTHORIZED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + } + _ => { + TOTAL_FAILED_CONFIDENTIAL_IMAGE_GENERATION_REQUESTS + .add(1, &[KeyValue::new(MODEL_KEY, model.clone())]); + TOTAL_FAILED_REQUESTS.add(1, &[KeyValue::new(MODEL_KEY, model)]); + + UNSUCCESSFUL_IMAGE_GENERATION_REQUESTS_PER_USER + .add(1, &[KeyValue::new(USER_ID_KEY, metadata.user_id)]); + } + } match metadata.selected_stack_small_id { Some(stack_small_id) => { update_state_manager( diff --git a/atoma-proxy/src/server/handlers/metrics.rs b/atoma-proxy/src/server/handlers/metrics.rs index 09bfb38a..66014dfc 100644 --- a/atoma-proxy/src/server/handlers/metrics.rs +++ b/atoma-proxy/src/server/handlers/metrics.rs @@ -316,6 +316,112 @@ pub static TOTAL_FAILED_CHAT_REQUESTS: LazyLock> = LazyLock::new(|| .build() }); +/// Counter metric that tracks the total number of too many requests. +/// +/// # Metric Details +/// - Name: `atoma_total_too_many_requests` +/// - Type: Counter +/// - Labels: `model` +/// - Unit: requests (count) +pub static TOTAL_TOO_MANY_REQUESTS: LazyLock> = LazyLock::new(|| { + GLOBAL_METER + .u64_counter("atoma_total_too_many_requests") + .with_description("Total number of too many requests") + .with_unit("requests") + .build() +}); + +/// Counter metric that tracks the total number of bad requests. +/// +/// # Metric Details +/// - Name: `atoma_total_bad_requests` +/// - Type: Counter +/// - Labels: `model` +/// - Unit: requests (count) +pub static TOTAL_BAD_REQUESTS: LazyLock> = LazyLock::new(|| { + GLOBAL_METER + .u64_counter("atoma_total_bad_requests") + .with_description("Total number of bad requests") + .with_unit("requests") + .build() +}); + +/// Counter metric that tracks the total number of locked requests. +/// +/// # Metric Details +/// - Name: `atoma_total_locked_requests` +/// - Type: Counter +/// - Labels: `model` +/// - Unit: requests (count) +pub static TOTAL_LOCKED_REQUESTS: LazyLock> = LazyLock::new(|| { + GLOBAL_METER + .u64_counter("atoma_total_locked_requests") + .with_description("Total number of locked requests") + .with_unit("requests") + .build() +}); + +/// Counter metric that tracks the total number of too early requests. +/// +/// # Metric Details +/// - Name: `atoma_total_too_early_requests` +/// - Type: Counter +/// - Labels: `model` +/// - Unit: requests (count) +pub static TOTAL_TOO_EARLY_REQUESTS: LazyLock> = LazyLock::new(|| { + GLOBAL_METER + .u64_counter("atoma_total_too_early_requests") + .with_description("Total number of too early requests") + .with_unit("requests") + .build() +}); + +/// Counter metric that tracks the total number of unauthorized requests. +/// +/// # Metric Details +/// - Name: `atoma_total_unauthorized_requests` +/// - Type: Counter +/// - Labels: `model` +/// - Unit: requests (count) +pub static TOTAL_UNAUTHORIZED_REQUESTS: LazyLock> = LazyLock::new(|| { + GLOBAL_METER + .u64_counter("atoma_total_unauthorized_requests") + .with_description("Total number of unauthorized requests") + .with_unit("requests") + .build() +}); + +/// Counter metric that tracks the total number of confidential chat requests. +/// +/// # Metric Details +/// - Name: `atoma_total_confidential_chat_requests` +/// - Type: Counter +/// - Labels: `model` +/// - Unit: requests (count) +pub static CHAT_COMPLETIONS_CONFIDENTIAL_NUM_REQUESTS: LazyLock> = + LazyLock::new(|| { + GLOBAL_METER + .u64_counter("atoma_total_confidential_chat_requests") + .with_description("Total number of confidential chat requests") + .with_unit("requests") + .build() + }); + +/// Counter metric that tracks the total number of failed confidential chat requests. +/// +/// # Metric Details +/// - Name: `atoma_total_failed_confidential_chat_requests` +/// - Type: Counter +/// - Labels: `model` +/// - Unit: requests (count) +pub static TOTAL_FAILED_CHAT_CONFIDENTIAL_REQUESTS: LazyLock> = LazyLock::new(|| { + GLOBAL_METER + .u64_counter("atoma_total_failed_confidential_chat_requests") + .with_description("Total number of failed confidential chat requests") + .with_unit("requests") + .build() +}); + /// Counter metric that tracks the total number of failed image generation requests. /// /// # Metric Details @@ -331,6 +437,22 @@ pub static TOTAL_FAILED_IMAGE_GENERATION_REQUESTS: LazyLock> = Lazy .build() }); +/// Counter metric that tracks the total number of failed confidential image generation requests. +/// +/// # Metric Details +/// - Name: `atoma_total_failed_confidential_image_generation_requests` +/// - Type: Counter +/// - Labels: `model` +/// - Unit: requests (count) +pub static TOTAL_FAILED_CONFIDENTIAL_IMAGE_GENERATION_REQUESTS: LazyLock> = + LazyLock::new(|| { + GLOBAL_METER + .u64_counter("atoma_total_failed_confidential_image_generation_requests") + .with_description("Total number of failed confidential image generation requests") + .with_unit("requests") + .build() + }); + /// Counter metric that tracks the total number of failed text embedding requests. /// /// # Metric Details @@ -486,6 +608,22 @@ pub static UNSUCCESSFUL_TEXT_EMBEDDING_REQUESTS_PER_USER: LazyLock> .build() }); +/// Counter metric that tracks the total number of failed text embedding confidential requests. +/// +/// # Metric Details +/// - Name: `atoma_total_failed_text_embedding_confidential_requests` +/// - Type: Counter +/// - Labels: `model` +/// - Unit: requests (count) +pub static TOTAL_FAILED_CONFIDENTIAL_EMBEDDING_REQUESTS: LazyLock> = + LazyLock::new(|| { + GLOBAL_METER + .u64_counter("atoma_total_failed_text_embedding_confidential_requests") + .with_description("Total number of failed text embedding confidential requests") + .with_unit("requests") + .build() + }); + /// Counter metric that tracks the total number of chat completion tokens per user. /// /// # Metric Details From c493085e4705740e497d7b73c84cc3c929932e06 Mon Sep 17 00:00:00 2001 From: Chad Nehemiah Date: Thu, 29 May 2025 04:09:28 -0500 Subject: [PATCH 53/61] ci: use stable toolchain (#497) * ci: use stable toolchain * chore: address clippy issues * chore: fix type --- .github/workflows/ci.yml | 2 +- .github/workflows/coverage.yml | 2 +- Dockerfile | 4 +- atoma-auth/src/auth.rs | 108 +++++++++++++++--- atoma-proxy-service/src/components/openapi.rs | 2 +- atoma-proxy-service/src/handlers/auth.rs | 4 +- atoma-proxy/src/server/components/openapi.rs | 2 +- 7 files changed, 97 insertions(+), 27 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index f33cd6fb..6a3a0ac0 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -10,7 +10,7 @@ name: CI merge_group: env: - toolchain: 1.84.0 + toolchain: stable CARGO_HTTP_MULTIPLEXING: false CARGO_TERM_COLOR: always CARGO_UNSTABLE_SPARSE_REGISTRY: true diff --git a/.github/workflows/coverage.yml b/.github/workflows/coverage.yml index d7ab78d1..089ba8f3 100644 --- a/.github/workflows/coverage.yml +++ b/.github/workflows/coverage.yml @@ -3,7 +3,7 @@ name: Coverage on: [push, pull_request] env: - toolchain: 1.84.0 + toolchain: stable CARGO_HTTP_MULTIPLEXING: false CARGO_TERM_COLOR: always CARGO_UNSTABLE_SPARSE_REGISTRY: true diff --git a/Dockerfile b/Dockerfile index fbbbbc43..f1e5c1ec 100644 --- a/Dockerfile +++ b/Dockerfile @@ -17,8 +17,8 @@ RUN apt-get update && apt-get install -y \ curl \ && rm -rf /var/lib/apt/lists/* -# Install Rust 1.84.0 -RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain 1.84.0 \ +# Install Rust 1.87.0 +RUN curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y --default-toolchain 1.87.0 \ && . "$HOME/.cargo/env" # Add cargo to PATH diff --git a/atoma-auth/src/auth.rs b/atoma-auth/src/auth.rs index f58e6ff4..267d5fad 100644 --- a/atoma-auth/src/auth.rs +++ b/atoma-auth/src/auth.rs @@ -118,7 +118,7 @@ pub enum AuthError { TimestampConversionError, } -type Result = std::result::Result; +type Result = std::result::Result>; /// The Auth struct #[derive(Clone)] pub struct Auth { @@ -233,7 +233,7 @@ impl Auth { let claims = token_data.claims; if claims.refresh_token_hash.is_none() != is_refresh { - return Err(AuthError::NotRefreshToken); + return Err(Box::new(AuthError::NotRefreshToken)); } Ok(claims) } @@ -283,7 +283,7 @@ impl Auth { .check_refresh_token_validity(claims.user_id, &refresh_token_hash) .await? { - return Err(AuthError::InvalidRefreshToken); + return Err(Box::new(AuthError::InvalidRefreshToken)); } let expiration = Utc::now() + Duration::days(self.access_token_lifetime as i64); @@ -527,7 +527,7 @@ impl Auth { ) .await? { - return Err(AuthError::RevokedToken); + return Err(Box::new(AuthError::RevokedToken)); } Ok(claims) } @@ -571,21 +571,24 @@ impl Auth { /// /// Map a base64 string to a bit array by taking each char's index and convert it to binary form with one bit per u8 /// element in the output. Returns SignatureError if one of the characters is not in the base64 charset. + #[allow(clippy::cast_possible_truncation)] fn base64_to_bitarray(input: &str) -> Result> { - use itertools::Itertools; const BASE64_URL_CHARSET: &str = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_"; - input .chars() - .map(|c| { + .flat_map(|c| { BASE64_URL_CHARSET .find(c) - .map(|index| u8::try_from(index).map_err(AuthError::IntConversionError)) - .unwrap() - .map(|index| (0..6).rev().map(move |i| index >> i & 1)) + .ok_or_else(|| { + Box::new(AuthError::FailedToParseSignature(format!( + "Invalid character: {c}" + ))) + }) + .map(|index| (0..6).rev().map(move |i| Ok((index >> i & 1) as u8))) + .into_iter() }) - .flatten_ok() + .flatten() .collect() } @@ -701,9 +704,9 @@ impl Auth { } UserSignature::Simple(simple_signature) => (simple_signature, None), _ => { - return Err(AuthError::FailedToParseSignature( + return Err(Box::new(AuthError::FailedToParseSignature( "Unsupported signature".to_string(), - )) + ))) } }; match signature { @@ -845,14 +848,14 @@ impl Auth { if tag.address.to_hex() == self.sui.read().await.usdc_package_id.to_hex() { if balance_change.amount < 0 { if sender.is_some() { - return Err(AuthError::MultipleSenders); + return Err(Box::new(AuthError::MultipleSenders)); } if let Owner::AddressOwner(owner) = &balance_change.owner { sender = Some(*owner); } } else { if receiver.is_some() { - return Err(AuthError::MultipleReceivers); + return Err(Box::new(AuthError::MultipleReceivers)); } money_in = Some(balance_change.amount); if let Owner::AddressOwner(owner) = &balance_change.owner { @@ -863,7 +866,7 @@ impl Auth { } } if sender.is_none() || receiver.is_none() { - return Err(AuthError::SenderOrReceiverNotFound); + return Err(Box::new(AuthError::SenderOrReceiverNotFound)); } let sender = sender.unwrap(); let receiver = receiver.unwrap(); @@ -878,12 +881,12 @@ impl Auth { != Self::get_sui_address_from_signature(&signature, transaction_digest)? .to_string() { - return Err(AuthError::PaymentNotForThisUser); + return Err(Box::new(AuthError::PaymentNotForThisUser)); } } #[cfg(not(feature = "google-oauth"))] Some(_) => { - return Err(AuthError::ZkLoginNotEnabled); + return Err(Box::new(AuthError::ZkLoginNotEnabled)); } None => { let (result_sender, result_receiver) = oneshot::channel(); @@ -896,7 +899,7 @@ impl Auth { let is_their_wallet = result_receiver.await??; if !is_their_wallet { - return Err(AuthError::PaymentNotForThisUser); + return Err(Box::new(AuthError::PaymentNotForThisUser)); } } } @@ -939,6 +942,73 @@ impl Auth { } } +impl From for Box { + fn from(err: anyhow::Error) -> Self { + Self::new(AuthError::AnyhowError(err)) + } +} + +impl From for Box { + fn from(err: jsonwebtoken::errors::Error) -> Self { + Self::new(AuthError::JsonWebTokenError(err)) + } +} + +impl From> for Box { + fn from(err: flume::SendError) -> Self { + Self::new(AuthError::FlumeError(err)) + } +} + +impl From for Box { + fn from(err: tokio::sync::oneshot::error::RecvError) -> Self { + Self::new(AuthError::OneShotReceiveError(err)) + } +} + +impl From for Box { + fn from(err: AtomaStateManagerError) -> Self { + Self::new(AuthError::AtomaStateManagerError(err)) + } +} + +impl From for Box { + fn from(err: reqwest::Error) -> Self { + Self::new(AuthError::ReqwestError(err)) + } +} + +impl From for Box { + fn from(err: bcs::Error) -> Self { + Self::new(AuthError::BcsError(err)) + } +} + +impl From for Box { + fn from(err: FastCryptoError) -> Self { + Self::new(AuthError::FastCryptoError(err)) + } +} + +impl From for Box { + fn from(err: std::num::TryFromIntError) -> Self { + Self::new(AuthError::IntConversionError(err)) + } +} + +#[cfg(feature = "google-oauth")] +impl From for Box { + fn from(err: crate::google::GoogleError) -> Self { + Self::new(AuthError::GoogleError(err)) + } +} + +impl From for Box { + fn from(err: SuiError) -> Self { + Self::new(AuthError::SuiError(err)) + } +} + // TODO: Add more comprehensive tests, for now test the happy path only #[cfg(test)] mod test { diff --git a/atoma-proxy-service/src/components/openapi.rs b/atoma-proxy-service/src/components/openapi.rs index 0f9e7cad..07130368 100644 --- a/atoma-proxy-service/src/components/openapi.rs +++ b/atoma-proxy-service/src/components/openapi.rs @@ -147,7 +147,7 @@ pub fn openapi_router() -> Router { let spec_path = docs_dir.join("openapi.yml"); fs::write(&spec_path, spec).expect("Failed to write OpenAPI spec to file"); - println!("OpenAPI spec written to: {spec_path:?}"); + println!("OpenAPI spec written to: {}", spec_path.display()); } Router::new().merge(SwaggerUi::new("/swagger-ui").url("/api-docs/openapi.json", openapi)) diff --git a/atoma-proxy-service/src/handlers/auth.rs b/atoma-proxy-service/src/handlers/auth.rs index 73d44b08..77f74704 100644 --- a/atoma-proxy-service/src/handlers/auth.rs +++ b/atoma-proxy-service/src/handlers/auth.rs @@ -276,7 +276,7 @@ pub async fn register( .await .map_err(|e| { error!("Failed to register user: {:?}", e); - match e { + match *e { AuthError::UserAlreadyRegistered => StatusCode::CONFLICT, _ => StatusCode::INTERNAL_SERVER_ERROR, } @@ -325,7 +325,7 @@ pub async fn login( .await .map_err(|e| { error!("Failed to login user: {:?}", e); - match e { + match *e { AuthError::PasswordNotValidOrUserNotFound => StatusCode::UNAUTHORIZED, _ => StatusCode::INTERNAL_SERVER_ERROR, } diff --git a/atoma-proxy/src/server/components/openapi.rs b/atoma-proxy/src/server/components/openapi.rs index c2c65c70..e53cf2e2 100644 --- a/atoma-proxy/src/server/components/openapi.rs +++ b/atoma-proxy/src/server/components/openapi.rs @@ -240,7 +240,7 @@ pub fn openapi_routes() -> Router { let spec_path = docs_dir.join("openapi.yml"); fs::write(&spec_path, spec).expect("Failed to write OpenAPI spec to file"); - println!("OpenAPI spec written to: {spec_path:?}"); + println!("OpenAPI spec written to: {}", spec_path.display()); } Router::new() From cc94faacba6ee5a335133137952f223a317462b7 Mon Sep 17 00:00:00 2001 From: chad Date: Thu, 29 May 2025 13:08:50 -0500 Subject: [PATCH 54/61] chore: update loki helper template --- helm/atoma-proxy/values-dev.yaml | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/helm/atoma-proxy/values-dev.yaml b/helm/atoma-proxy/values-dev.yaml index 1a56f82e..9eb71a0b 100644 --- a/helm/atoma-proxy/values-dev.yaml +++ b/helm/atoma-proxy/values-dev.yaml @@ -104,6 +104,14 @@ loki: commonConfig: path_prefix: /loki replication_factor: 1 + # This is required by the helper template + storage: + type: filesystem + bucketNames: + chunks: loki-chunks + ruler: loki-ruler + admin: loki-admin + # This is the actual storage configuration storage_config: filesystem: chunks_directory: /loki/chunks From c27edef018f24ec1701f7acc8ce388fbb7700513 Mon Sep 17 00:00:00 2001 From: chad Date: Thu, 29 May 2025 14:47:42 -0500 Subject: [PATCH 55/61] build: update loki proxy config --- helm/atoma-proxy/values-dev.yaml | 110 ++++++++++++++++++++----------- 1 file changed, 70 insertions(+), 40 deletions(-) diff --git a/helm/atoma-proxy/values-dev.yaml b/helm/atoma-proxy/values-dev.yaml index 9eb71a0b..7a33fa67 100644 --- a/helm/atoma-proxy/values-dev.yaml +++ b/helm/atoma-proxy/values-dev.yaml @@ -101,49 +101,79 @@ grafana: loki: enabled: true auth_enabled: false - commonConfig: - path_prefix: /loki - replication_factor: 1 - # This is required by the helper template + deploymentMode: SingleBinary storage: type: filesystem bucketNames: - chunks: loki-chunks - ruler: loki-ruler - admin: loki-admin - # This is the actual storage configuration - storage_config: - filesystem: - chunks_directory: /loki/chunks - rules_directory: /loki/rules - persistence: - size: 5Gi - limits_config: - enforce_metric_name: false - reject_old_samples: true - reject_old_samples_max_age: 168h - volume_enabled: true - retention_period: 96h - schemaConfig: - configs: - - from: "2020-10-24" - index: - period: 24h - prefix: index_ - object_store: filesystem - schema: v11 - store: boltdb-shipper - ruler: - alertmanager_url: http://localhost:9093 - compactor: - retention_enabled: true - retention_delete_delay: 2h - retention_delete_worker_count: 150 - server: - http_listen_port: 3100 - memberlist: - join_members: - - loki-memberlist + chunks: chunks + ruler: ruler + admin: admin + commonConfig: + path_prefix: /var/loki + replication_factor: 1 + structuredConfig: + auth_enabled: false + server: + http_listen_port: 3100 + grpc_listen_port: 9095 + schema_config: + configs: + - from: "2020-10-24" + store: boltdb-shipper + object_store: filesystem + schema: v11 + index: + prefix: index_ + period: 24h + storage_config: + boltdb_shipper: + active_index_directory: /var/loki/index + cache_location: /var/loki/cache + shared_store: filesystem + filesystem: + directory: /var/loki/chunks + + singleBinary: + replicas: 1 + persistence: + enabled: true + size: 10Gi + storageClass: local-path + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + memory: 256Mi + + gateway: + enabled: true + replicas: 1 + affinity: {} + + write: + replicas: 0 + read: + replicas: 0 + backend: + replicas: 0 + + memcached: + enabled: false + memcachedExporter: + enabled: false + chunksCache: + enabled: false + resultsCache: + enabled: false + + monitoring: + serviceMonitor: + enabled: false + selfMonitoring: + enabled: false + grafanaAgent: + installOperator: false tempo: enabled: true From e850a9bb36e85a97957b2b03aac3f4297d57e80d Mon Sep 17 00:00:00 2001 From: chad Date: Thu, 29 May 2025 14:52:21 -0500 Subject: [PATCH 56/61] build: update loki config --- helm/atoma-proxy/values-dev.yaml | 127 +++++++++++++++++++++---------- 1 file changed, 86 insertions(+), 41 deletions(-) diff --git a/helm/atoma-proxy/values-dev.yaml b/helm/atoma-proxy/values-dev.yaml index 7a33fa67..6b8bd99a 100644 --- a/helm/atoma-proxy/values-dev.yaml +++ b/helm/atoma-proxy/values-dev.yaml @@ -100,22 +100,96 @@ grafana: loki: enabled: true - auth_enabled: false deploymentMode: SingleBinary + + # Single binary configuration - this should be at root level, not under structuredConfig + singleBinary: + replicas: 1 + persistence: + enabled: true + size: 10Gi + storageClass: local-path + resources: + requests: + cpu: 100m + memory: 128Mi + limits: + cpu: 200m + memory: 512Mi + extraVolumes: + - name: loki-config + emptyDir: {} + extraVolumeMounts: + - name: loki-config + mountPath: /var/loki + + # Gateway configuration + gateway: + enabled: true + replicas: 1 + + # Disable other components for single binary mode + write: + replicas: 0 + read: + replicas: 0 + backend: + replicas: 0 + + # Disable caching for development + memcached: + enabled: false + memcachedExporter: + enabled: false + chunksCache: + enabled: false + resultsCache: + enabled: false + + # Disable monitoring for development + monitoring: + serviceMonitor: + enabled: false + selfMonitoring: + enabled: false + grafanaAgent: + installOperator: false + + # Loki configuration + auth_enabled: false + + # Storage configuration storage: type: filesystem bucketNames: chunks: chunks ruler: ruler admin: admin + + # Common configuration commonConfig: path_prefix: /var/loki replication_factor: 1 + + # Structured configuration for Loki itself structuredConfig: auth_enabled: false + server: http_listen_port: 3100 grpc_listen_port: 9095 + + common: + path_prefix: /var/loki + storage: + filesystem: + chunks_directory: /var/loki/chunks + rules_directory: /var/loki/rules + replication_factor: 1 + ring: + kvstore: + store: inmemory + schema_config: configs: - from: "2020-10-24" @@ -125,6 +199,7 @@ loki: index: prefix: index_ period: 24h + storage_config: boltdb_shipper: active_index_directory: /var/loki/index @@ -133,47 +208,17 @@ loki: filesystem: directory: /var/loki/chunks - singleBinary: - replicas: 1 - persistence: - enabled: true - size: 10Gi - storageClass: local-path - resources: - requests: - cpu: 100m - memory: 128Mi - limits: - memory: 256Mi - - gateway: - enabled: true - replicas: 1 - affinity: {} - - write: - replicas: 0 - read: - replicas: 0 - backend: - replicas: 0 - - memcached: - enabled: false - memcachedExporter: - enabled: false - chunksCache: - enabled: false - resultsCache: - enabled: false + limits_config: + enforce_metric_name: false + reject_old_samples: true + reject_old_samples_max_age: 168h + volume_enabled: true + retention_period: 96h - monitoring: - serviceMonitor: - enabled: false - selfMonitoring: - enabled: false - grafanaAgent: - installOperator: false + compactor: + retention_enabled: true + retention_delete_delay: 2h + retention_delete_worker_count: 150 tempo: enabled: true From 8a72d78c4911c73de0874ebfbf7ca0958834a3cc Mon Sep 17 00:00:00 2001 From: chad Date: Thu, 29 May 2025 14:55:40 -0500 Subject: [PATCH 57/61] build: try with a minimal config --- helm/atoma-proxy/values-dev.yaml | 112 +++---------------------------- 1 file changed, 8 insertions(+), 104 deletions(-) diff --git a/helm/atoma-proxy/values-dev.yaml b/helm/atoma-proxy/values-dev.yaml index 6b8bd99a..62312fc8 100644 --- a/helm/atoma-proxy/values-dev.yaml +++ b/helm/atoma-proxy/values-dev.yaml @@ -101,34 +101,22 @@ grafana: loki: enabled: true deploymentMode: SingleBinary + auth_enabled: false + + storage: + type: filesystem + bucketNames: + chunks: chunks + ruler: ruler + admin: admin - # Single binary configuration - this should be at root level, not under structuredConfig singleBinary: replicas: 1 persistence: enabled: true size: 10Gi storageClass: local-path - resources: - requests: - cpu: 100m - memory: 128Mi - limits: - cpu: 200m - memory: 512Mi - extraVolumes: - - name: loki-config - emptyDir: {} - extraVolumeMounts: - - name: loki-config - mountPath: /var/loki - # Gateway configuration - gateway: - enabled: true - replicas: 1 - - # Disable other components for single binary mode write: replicas: 0 read: @@ -136,90 +124,6 @@ loki: backend: replicas: 0 - # Disable caching for development - memcached: - enabled: false - memcachedExporter: - enabled: false - chunksCache: - enabled: false - resultsCache: - enabled: false - - # Disable monitoring for development - monitoring: - serviceMonitor: - enabled: false - selfMonitoring: - enabled: false - grafanaAgent: - installOperator: false - - # Loki configuration - auth_enabled: false - - # Storage configuration - storage: - type: filesystem - bucketNames: - chunks: chunks - ruler: ruler - admin: admin - - # Common configuration - commonConfig: - path_prefix: /var/loki - replication_factor: 1 - - # Structured configuration for Loki itself - structuredConfig: - auth_enabled: false - - server: - http_listen_port: 3100 - grpc_listen_port: 9095 - - common: - path_prefix: /var/loki - storage: - filesystem: - chunks_directory: /var/loki/chunks - rules_directory: /var/loki/rules - replication_factor: 1 - ring: - kvstore: - store: inmemory - - schema_config: - configs: - - from: "2020-10-24" - store: boltdb-shipper - object_store: filesystem - schema: v11 - index: - prefix: index_ - period: 24h - - storage_config: - boltdb_shipper: - active_index_directory: /var/loki/index - cache_location: /var/loki/cache - shared_store: filesystem - filesystem: - directory: /var/loki/chunks - - limits_config: - enforce_metric_name: false - reject_old_samples: true - reject_old_samples_max_age: 168h - volume_enabled: true - retention_period: 96h - - compactor: - retention_enabled: true - retention_delete_delay: 2h - retention_delete_worker_count: 150 - tempo: enabled: true persistence: From f1f0f017651ccc22c09365010504c2638f31b591 Mon Sep 17 00:00:00 2001 From: chad Date: Thu, 29 May 2025 15:00:12 -0500 Subject: [PATCH 58/61] build: remove bucketNames and use raw config --- helm/atoma-proxy/values-dev.yaml | 35 +++++++++++++++++++++++++------- 1 file changed, 28 insertions(+), 7 deletions(-) diff --git a/helm/atoma-proxy/values-dev.yaml b/helm/atoma-proxy/values-dev.yaml index 62312fc8..d85ee507 100644 --- a/helm/atoma-proxy/values-dev.yaml +++ b/helm/atoma-proxy/values-dev.yaml @@ -98,17 +98,38 @@ grafana: annotations: metallb.universe.tf/address-pool: grafana-pool -loki: +loki: # Fixed typo: was "oki" enabled: true deploymentMode: SingleBinary auth_enabled: false - storage: - type: filesystem - bucketNames: - chunks: chunks - ruler: ruler - admin: admin + # Remove the storage.bucketNames section entirely + # Use raw config instead + config: | + auth_enabled: false + server: + http_listen_port: 3100 + common: + path_prefix: /var/loki + replication_factor: 1 + ring: + kvstore: + store: inmemory + schema_config: + configs: + - from: 2020-10-24 + store: boltdb-shipper + object_store: filesystem + schema: v11 + index: + prefix: index_ + period: 24h + storage_config: + boltdb_shipper: + active_index_directory: /var/loki/index + shared_store: filesystem + filesystem: + directory: /var/loki/chunks singleBinary: replicas: 1 From ae05331e8b31525a8cb87796b3d7219172ea917f Mon Sep 17 00:00:00 2001 From: Martin Stefcek <35243812+Cifko@users.noreply.github.com> Date: Mon, 2 Jun 2025 16:27:11 +0200 Subject: [PATCH 59/61] chore: refactor sending atoma state manager event (#499) --- atoma-auth/src/auth.rs | 214 ++++++++----- atoma-proxy/src/server/middleware.rs | 451 ++++++++++++--------------- 2 files changed, 340 insertions(+), 325 deletions(-) diff --git a/atoma-auth/src/auth.rs b/atoma-auth/src/auth.rs index 267d5fad..ab302f9d 100644 --- a/atoma-auth/src/auth.rs +++ b/atoma-auth/src/auth.rs @@ -119,6 +119,55 @@ pub enum AuthError { } type Result = std::result::Result>; + +/// Sends an event to the state manager +/// +/// # Arguments +/// * `state_manager_sender` - The sender for the state manager +/// * `event` - The event to be sent +/// +/// # Returns +/// * `Result<()>` - If the event was sent successfully +#[instrument(level = "trace", skip_all)] +fn send_event( + state_manager_sender: &Sender, + event: AtomaAtomaStateManagerEvent, +) -> Result<()> { + Ok(state_manager_sender.send(event)?) +} + +/// Sends an event to the state manager and waits for a response +/// This function is used to send an event to the state manager and wait for a response. +/// It uses a oneshot channel to send the event and receive the response. +/// +/// # Arguments +/// * `state_manager_sender` - The sender for the state manager +/// * `event_creator` - A closure that creates the event to be sent +/// +/// # Returns +/// * `Result` - The result of the event, which can be either a success or an error +/// +/// # Errors +/// Returns an error if: +/// - The event could not be sent to the state manager +/// - The response could not be received from the oneshot channel +/// - The response from the state manager is an error +#[instrument(level = "trace", skip_all)] +async fn send_event_with_response( + state_manager_sender: &Sender, + event_creator: impl FnOnce( + oneshot::Sender>, + ) -> AtomaAtomaStateManagerEvent, +) -> Result { + let (result_sender, result_receiver) = + oneshot::channel::>(); + + state_manager_sender.send(event_creator(result_sender))?; + + let result = result_receiver.await??; + Ok(result) +} + /// The Auth struct #[derive(Clone)] pub struct Auth { @@ -201,11 +250,13 @@ impl Auth { &claims, &EncodingKey::from_secret(self.secret_key.as_ref()), )?; - self.state_manager_sender - .send(AtomaAtomaStateManagerEvent::StoreRefreshToken { + send_event( + &self.state_manager_sender, + AtomaAtomaStateManagerEvent::StoreRefreshToken { user_id, refresh_token_hash: self.hash_string(&token), - })?; + }, + )?; Ok(token) } @@ -254,14 +305,14 @@ impl Auth { user_id: i64, refresh_token_hash: &str, ) -> Result { - let (result_sender, result_receiver) = oneshot::channel(); - self.state_manager_sender - .send(AtomaAtomaStateManagerEvent::IsRefreshTokenValid { + send_event_with_response(&self.state_manager_sender, |result_receiver| { + AtomaAtomaStateManagerEvent::IsRefreshTokenValid { user_id, refresh_token_hash: refresh_token_hash.to_string(), - result_sender, - })?; - Ok(result_receiver.await??) + result_sender: result_receiver, + } + }) + .await } /// Generate a new access token from a refresh token @@ -330,24 +381,23 @@ impl Auth { user_profile: &UserProfile, password: &str, ) -> Result<(String, String)> { - let (result_sender, result_receiver) = oneshot::channel(); let password_salt = rand::thread_rng() .sample_iter(&rand::distributions::Alphanumeric) .take(30) .map(char::from) .collect::(); - self.state_manager_sender - .send(AtomaAtomaStateManagerEvent::RegisterUserWithPassword { + let user_id = send_event_with_response(&self.state_manager_sender, |result_sender| { + AtomaAtomaStateManagerEvent::RegisterUserWithPassword { user_profile: user_profile.clone(), password: self.hash_string(&format!("{password_salt}:{password}")), password_salt, result_sender, - })?; - let user_id = result_receiver - .await?? - .map(|user_id| user_id as u64) - .ok_or_else(|| AuthError::UserAlreadyRegistered)?; + } + }) + .await? + .map(|user_id| user_id as u64) + .ok_or_else(|| AuthError::UserAlreadyRegistered)?; let refresh_token = self.generate_refresh_token(user_id as i64).await?; let access_token = self.generate_access_token(&refresh_token).await?; Ok((refresh_token, access_token)) @@ -363,28 +413,25 @@ impl Auth { email: &str, password: &str, ) -> Result<(String, String)> { - let (result_sender, result_receiver) = oneshot::channel(); - self.state_manager_sender - .send(AtomaAtomaStateManagerEvent::GetPasswordSalt { + let password_salt = send_event_with_response(&self.state_manager_sender, |result_sender| { + AtomaAtomaStateManagerEvent::GetPasswordSalt { email: email.to_string(), result_sender, - })?; - let password_salt = result_receiver.await??; - - let password_salt = - password_salt.ok_or_else(|| AuthError::PasswordNotValidOrUserNotFound)?; + } + }) + .await? + .ok_or_else(|| AuthError::PasswordNotValidOrUserNotFound)?; - let (result_sender, result_receiver) = oneshot::channel(); - self.state_manager_sender - .send(AtomaAtomaStateManagerEvent::GetUserIdByEmailPassword { + let user_id = send_event_with_response(&self.state_manager_sender, |result_sender| { + AtomaAtomaStateManagerEvent::GetUserIdByEmailPassword { email: email.to_string(), password: self.hash_string(&format!("{password_salt}:{password}")), result_sender, - })?; - let user_id = result_receiver - .await?? - .map(|user_id| user_id as u64) - .ok_or_else(|| AuthError::PasswordNotValidOrUserNotFound)?; + } + }) + .await? + .map(|user_id| user_id as u64) + .ok_or_else(|| AuthError::PasswordNotValidOrUserNotFound)?; let refresh_token = self.generate_refresh_token(user_id as i64).await?; let access_token = self.generate_access_token(&refresh_token).await?; Ok((refresh_token, access_token)) @@ -403,26 +450,25 @@ impl Auth { &self.google_public_keys, )?; - let (result_sender, result_receiver) = oneshot::channel(); - let email = match claims.email { - Some(email) => email, - None => { - return Err(google::GoogleError::EmailNotFound)?; - } + let Some(email) = claims.email else { + return Err(google::GoogleError::EmailNotFound)?; }; + // In case this user doesn't have an account yet, we will add the password salt let password_salt = rand::thread_rng() .sample_iter(&rand::distributions::Alphanumeric) .take(30) .map(char::from) .collect::(); - self.state_manager_sender - .send(AtomaAtomaStateManagerEvent::OAuth { + + let user_id = send_event_with_response(&self.state_manager_sender, |result_sender| { + AtomaAtomaStateManagerEvent::OAuth { email, password_salt, result_sender, - })?; - let user_id = result_receiver.await??; + } + }) + .await?; let refresh_token = self.generate_refresh_token(user_id).await?; let access_token = self.generate_access_token(&refresh_token).await?; Ok((refresh_token, access_token)) @@ -448,12 +494,14 @@ impl Auth { .take(API_TOKEN_LENGTH) .map(char::from) .collect(); - self.state_manager_sender - .send(AtomaAtomaStateManagerEvent::StoreNewApiToken { + send_event( + &self.state_manager_sender, + AtomaAtomaStateManagerEvent::StoreNewApiToken { user_id: claims.user_id, api_token: api_token.clone(), name, - })?; + }, + )?; Ok(api_token) } @@ -472,12 +520,14 @@ impl Auth { #[instrument(level = "info", skip(self))] pub async fn revoke_api_token(&self, jwt: &str, api_token_id: i64) -> Result<()> { let claims = self.get_claims_from_token(jwt).await?; - self.state_manager_sender - .send(AtomaAtomaStateManagerEvent::RevokeApiToken { + + send_event( + &self.state_manager_sender, + AtomaAtomaStateManagerEvent::RevokeApiToken { user_id: claims.user_id, api_token_id, - })?; - Ok(()) + }, + ) } /// Get all API tokens for a user @@ -495,13 +545,13 @@ impl Auth { pub async fn get_all_api_tokens(&self, jwt: &str) -> Result> { let claims = self.get_claims_from_token(jwt).await?; - let (result_sender, result_receiver) = oneshot::channel(); - self.state_manager_sender - .send(AtomaAtomaStateManagerEvent::GetApiTokensForUser { + send_event_with_response(&self.state_manager_sender, |result_sender| { + AtomaAtomaStateManagerEvent::GetApiTokensForUser { user_id: claims.user_id, result_sender, - })?; - Ok(result_receiver.await??) + } + }) + .await } /// Get the claims from the token @@ -780,12 +830,13 @@ impl Auth { ), )?; - self.state_manager_sender - .send(AtomaAtomaStateManagerEvent::UpdateSuiAddress { + send_event( + &self.state_manager_sender, + AtomaAtomaStateManagerEvent::UpdateSuiAddress { user_id: claims.user_id, sui_address: sui_address.to_string(), - })?; - Ok(()) + }, + ) } /// Updates the balance of the user @@ -818,14 +869,13 @@ impl Auth { ) -> Result<()> { let claims = self.validate_token(jwt, false)?; - let (result_sender, result_receiver) = oneshot::channel(); - self.state_manager_sender.send( + send_event_with_response(&self.state_manager_sender, |result_sender| { AtomaAtomaStateManagerEvent::InsertNewUsdcPaymentDigest { digest: transaction_digest.to_string(), result_sender, - }, - )?; - result_receiver.await??; + } + }) + .await?; let mut balance_changes = Err(anyhow!("No balance changes found")); for _ in 0..SUI_BALANCE_RETRY_COUNT { balance_changes = self @@ -889,14 +939,15 @@ impl Auth { return Err(Box::new(AuthError::ZkLoginNotEnabled)); } None => { - let (result_sender, result_receiver) = oneshot::channel(); - self.state_manager_sender - .send(AtomaAtomaStateManagerEvent::ConfirmUser { - sui_address: sender.to_string(), - user_id: claims.user_id, - result_sender, - })?; - let is_their_wallet = result_receiver.await??; + let is_their_wallet = + send_event_with_response(&self.state_manager_sender, |result_sender| { + AtomaAtomaStateManagerEvent::ConfirmUser { + sui_address: sender.to_string(), + user_id: claims.user_id, + result_sender, + } + }) + .await?; if !is_their_wallet { return Err(Box::new(AuthError::PaymentNotForThisUser)); @@ -905,13 +956,15 @@ impl Auth { } // We are the receiver and we know the sender - self.state_manager_sender - .send(AtomaAtomaStateManagerEvent::TopUpBalance { + send_event( + &self.state_manager_sender, + AtomaAtomaStateManagerEvent::TopUpBalance { user_id: claims.user_id, amount: i64::try_from(money_in.unwrap()).map_err(|e| { AuthError::AnyhowError(anyhow!("Failed to convert amount: {e}")) })?, - })?; + }, + )?; } Ok(()) } @@ -931,14 +984,13 @@ impl Auth { /// * If the verification fails pub async fn get_sui_address(&self, jwt: &str) -> Result> { let claims = self.validate_token(jwt, false)?; - let (result_sender, result_receiver) = oneshot::channel(); - self.state_manager_sender - .send(AtomaAtomaStateManagerEvent::GetSuiAddress { + send_event_with_response(&self.state_manager_sender, |result_sender| { + AtomaAtomaStateManagerEvent::GetSuiAddress { user_id: claims.user_id, result_sender, - })?; - let sui_address = result_receiver.await; - Ok(sui_address??) + } + }) + .await } } diff --git a/atoma-proxy/src/server/middleware.rs b/atoma-proxy/src/server/middleware.rs index 50816c89..a1107d48 100644 --- a/atoma-proxy/src/server/middleware.rs +++ b/atoma-proxy/src/server/middleware.rs @@ -899,6 +899,7 @@ pub mod auth { use atoma_auth::Sui; use atoma_state::types::CheapestNode; use atoma_state::types::Stack; + use atoma_state::AtomaStateManagerError; use atoma_state::{timestamp_to_datetime_or_now, types::AtomaAtomaStateManagerEvent}; use axum::http::HeaderMap; use flume::Sender; @@ -1092,6 +1093,97 @@ pub mod auth { } } + /// Sends an event to the Atoma state manager. + /// + /// This function is used to send events to the state manager for processing. + /// It handles the sending of events and returns a `Result` indicating success or failure. + /// + /// # Arguments + /// * `state_manager_sender` - The sender channel for the Atoma state manager events. + /// * `event` - The event to be sent to the state manager. + /// * `event_name` - A static string representing the name of the event, used for logging. + /// * `endpoint` - The endpoint from which the event is being sent, used for error reporting. + /// + /// # Returns + /// * `Result<()>` - Returns `Ok(())` if the event was sent successfully, or an `AtomaProxyError` if there was an error. + /// + /// # Errors + /// Returns `AtomaProxyError::InternalError` if the event could not be sent, with a message detailing the error. + #[instrument(level = "trace", skip_all)] + pub fn send_event( + state_manager_sender: &Sender, + event: AtomaAtomaStateManagerEvent, + event_name: &'static str, + endpoint: &str, + ) -> Result<()> { + state_manager_sender + .send(event) + .map_err(|err| AtomaProxyError::InternalError { + message: format!("Failed to send {event_name} event: {err:?}"), + client_message: None, + endpoint: endpoint.to_string(), + }) + } + + /// Sends an event to the Atoma state manager and waits for a response. + /// + /// This function is used to send events that require a response from the state manager. + /// It creates a oneshot channel to receive the response and handles any errors that may occur during sending or receiving. + /// + /// # Arguments + /// * `state_manager_sender` - The sender channel for the Atoma state manager events. + /// * `event_creator` - A closure that creates the event to be sent, taking a oneshot sender for the response. + /// * `event_name` - A static string representing the name of the event, used for logging. + /// * `endpoint` - The endpoint from which the event is being sent, used for error reporting. + /// + /// # Returns + /// * `Result` - Returns `Ok(T)` if the event was sent and a response was received successfully, or an `AtomaProxyError` if there was an error. + /// + /// # Errors + /// Returns `AtomaProxyError::InternalError` if: + /// * The event could not be sent to the state manager. + /// * The response could not be received from the oneshot channel. + /// * The response contained an error. + #[instrument(level = "trace", skip_all)] + pub async fn send_event_with_response( + state_manager_sender: &Sender, + event_creator: impl FnOnce( + oneshot::Sender>, + ) -> AtomaAtomaStateManagerEvent, + event_name: &'static str, + endpoint: &str, + ) -> Result { + let (result_sender, result_receiver) = + oneshot::channel::>(); + + state_manager_sender + .send(event_creator(result_sender)) + .map_err(|err| AtomaProxyError::InternalError { + message: format!("Failed to send {event_name} event: {err:?}"), + client_message: None, + endpoint: endpoint.to_string(), + })?; + + result_receiver + .await + .map_err(|err| AtomaProxyError::InternalError { + message: format!("Failed to receive {event_name} result: {err:?}"), + client_message: None, + endpoint: endpoint.to_string(), + })? + .map_err(|err| match err { + AtomaStateManagerError::InsufficientBalance => AtomaProxyError::BalanceError { + message: "Insufficient balance to lock compute units".to_string(), + endpoint: endpoint.to_string(), + }, + _ => AtomaProxyError::InternalError { + message: format!("Failed to get {event_name} result: {err:?}"), + client_message: None, + endpoint: endpoint.to_string(), + }, + }) + } + /// Authenticates a request and attempts to lock compute units for model execution. /// /// This function performs several key operations in sequence: @@ -1184,38 +1276,23 @@ pub mod auth { let node = get_cheapest_node(state, &model, endpoint).await?; // We don't have a stack for the user, lets check if the user is using fiat currency. - let (result_sender, result_receiver) = oneshot::channel(); let fiat_locked_input_amount = num_input_tokens as i64 * node.price_per_one_million_compute_units / ONE_MILLION as i64; let fiat_locked_output_amount = max_output_tokens as i64 * node.price_per_one_million_compute_units / ONE_MILLION as i64; - state - .state_manager_sender - .send(AtomaAtomaStateManagerEvent::LockUserFiatBalance { + let locked_fiat = send_event_with_response( + &state.state_manager_sender, + |result_sender| AtomaAtomaStateManagerEvent::LockUserFiatBalance { user_id, input_amount: fiat_locked_input_amount, output_amount: fiat_locked_output_amount, result_sender, - }) - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to send LockUserFiatBalance event: {err:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })?; - - let locked_fiat = result_receiver - .await - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to receive LockUserFiatBalance result: {err:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })? - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to get LockUserFiatBalance result: {err:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })?; + }, + "LockUserFiatBalance", + endpoint, + ) + .await?; if locked_fiat { return Ok(StackMetadata { @@ -1230,35 +1307,21 @@ pub mod auth { }); } - let (result_sender, result_receiver) = oneshot::channel(); - - state - .state_manager_sender - .send(AtomaAtomaStateManagerEvent::GetStacksForModel { - model: model.to_string(), - free_compute_units: (num_input_tokens + max_output_tokens) as i64, - user_id, - is_confidential: false, // NOTE: This method is only used for non-confidential compute - result_sender, - }) - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to send GetStacksForModel event: {err:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })?; - - let optional_stack = result_receiver - .await - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to receive GetStacksForModel result: {err:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })? - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to get GetStacksForModel result: {err:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })?; + let optional_stack = send_event_with_response( + &state.state_manager_sender, + |result_sender| { + AtomaAtomaStateManagerEvent::GetStacksForModel { + model: model.to_string(), + free_compute_units: (num_input_tokens + max_output_tokens) as i64, + user_id, + is_confidential: false, // NOTE: This method is only used for non-confidential compute + result_sender, + } + }, + "GetStacksForModel", + endpoint, + ) + .await?; Ok(StackMetadata { optional_stack, @@ -1301,34 +1364,19 @@ pub mod auth { endpoint: &str, total_tokens: u64, ) -> Result> { - let (result_sender, result_receiver) = oneshot::channel(); - - state - .state_manager_sender - .send(AtomaAtomaStateManagerEvent::GetStacksForTask { + let optional_stack = send_event_with_response( + &state.state_manager_sender, + |result_sender| AtomaAtomaStateManagerEvent::GetStacksForTask { task_small_id, free_compute_units: total_tokens as i64, user_id, result_sender, - }) - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to send GetStacksForTask event: {err:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })?; + }, + "GetStacksForTask", + endpoint, + ) + .await?; - let optional_stack = result_receiver - .await - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to receive GetStacksForTask result: {err:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })? - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to get GetStacksForTask result: {err:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })?; if let Some(stack) = optional_stack { Ok(Some(SelectedNodeMetadata { stack_small_id: Some(stack.stack_small_id), @@ -1387,35 +1435,20 @@ pub mod auth { endpoint: &str, ) -> Result { // Get node address - let (result_sender, result_receiver) = oneshot::channel(); - state - .state_manager_sender - .send(AtomaAtomaStateManagerEvent::GetNodePublicAddress { + let node_address = send_event_with_response( + &state.state_manager_sender, + |result_sender| AtomaAtomaStateManagerEvent::GetNodePublicAddress { node_small_id: selected_node_id, result_sender, - }) - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to send GetNodePublicAddress event: {err:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })?; - - let node_address = result_receiver - .await - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to receive GetNodePublicAddress result: {err:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })? - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to get GetNodePublicAddress result: {err:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })? - .ok_or_else(|| AtomaProxyError::NotFound { - message: format!("No node address found for node {selected_node_id}"), - endpoint: endpoint.to_string(), - })?; + }, + "GetNodePublicAddress", + endpoint, + ) + .await? + .ok_or_else(|| AtomaProxyError::NotFound { + message: format!("No node address found for node {selected_node_id}"), + endpoint: endpoint.to_string(), + })?; // Get signature let signature = state @@ -1659,32 +1692,19 @@ pub mod auth { let selected_node_id = event.selected_node_id.inner as i64; // Send the NewStackAcquired event to the state manager, so we have it in the DB. - let (result_sender, result_receiver) = oneshot::channel(); - state_manager_sender - .send(AtomaAtomaStateManagerEvent::NewStackAcquired { + send_event_with_response( + &state_manager_sender, + |result_sender| AtomaAtomaStateManagerEvent::NewStackAcquired { event, locked_compute_units: total_tokens as i64, transaction_timestamp: timestamp_to_datetime_or_now(timestamp_ms), user_id, result_sender, - }) - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to send NewStackAcquired event: {err:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })?; - result_receiver - .await - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to receive NewStackAcquired result: {err:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })? - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to receive NewStackAcquired result: {err:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })?; + }, + "NewStackAcquired", + &endpoint, + ) + .await?; Ok(SelectedNodeMetadata { stack_small_id: Some(stack_small_id), selected_node_id, @@ -1725,30 +1745,18 @@ pub mod auth { stack_size_to_buy: u64, endpoint: String, ) -> Result<()> { - let (result_sender, result_receiver) = oneshot::channel(); - state_manager_sender - .send(AtomaAtomaStateManagerEvent::DeductFromUsdc { + send_event_with_response( + &state_manager_sender, + |result_sender| AtomaAtomaStateManagerEvent::DeductFromUsdc { user_id, amount: (price_per_one_million_compute_units * stack_size_to_buy / ONE_MILLION) as i64, result_sender, - }) - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to send DeductFromUsdc event: {err:?}"), - client_message: None, - endpoint: endpoint.clone(), - })?; - result_receiver - .await - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to receive DeductFromUsdc result: {err:?}"), - client_message: None, - endpoint: endpoint.clone(), - })? - .map_err(|err| AtomaProxyError::BalanceError { - message: format!("Balance error : {err:?}"), - endpoint, - })?; + }, + "DeductFromUsdc", + &endpoint, + ) + .await?; Ok(()) } @@ -1782,31 +1790,18 @@ pub mod auth { stack_size_to_buy: u64, endpoint: String, ) -> Result<()> { - let (result_sender, result_receiver) = oneshot::channel(); - state_manager_sender - .send(AtomaAtomaStateManagerEvent::RefundUsdc { + send_event_with_response( + &state_manager_sender, + |result_sender| AtomaAtomaStateManagerEvent::RefundUsdc { user_id, amount: (price_per_one_million_compute_units * stack_size_to_buy / ONE_MILLION) as i64, result_sender, - }) - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to send RefundUsdc event: {err:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })?; - result_receiver - .await - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to receive RefundUsdc result: {err:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })? - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to refund USDC: {err:?}"), - client_message: None, - endpoint: endpoint.to_string(), - }) + }, + "RefundUsdc", + &endpoint, + ) + .await } /// Retrieves stack metadata for a locked user stack based on the endpoint type @@ -2058,31 +2053,21 @@ pub mod auth { model: &str, endpoint: &str, ) -> Result { - let (result_sender, result_receiver) = oneshot::channel(); - state - .state_manager_sender - .send(AtomaAtomaStateManagerEvent::GetCheapestNodeForModel { - model: model.to_string(), - is_confidential: false, - result_sender, - }) - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to send GetTasksForModel event: {err:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })?; - let node = result_receiver - .await - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to receive GetTasksForModel result: {err:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })? - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to get retrieve `CheapestNode` from the state manager with result: {err:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })?; + let node = send_event_with_response( + &state.state_manager_sender, + |result_sender| { + // Send the GetCheapestNodeForModel event to the state manager + AtomaAtomaStateManagerEvent::GetCheapestNodeForModel { + model: model.to_string(), + is_confidential: false, + result_sender, + } + }, + "GetCheapestNodeForModel", + endpoint, + ) + .await?; + node.map_or_else( || { Err(AtomaProxyError::RequestError { @@ -2205,33 +2190,19 @@ pub mod auth { is_confidential: bool, endpoint: &str, ) -> Result> { - let (result_sender, result_receiver) = oneshot::channel(); - state - .state_manager_sender - .send(AtomaAtomaStateManagerEvent::GetStacksForModel { + let maybe_stack = send_event_with_response( + &state.state_manager_sender, + |result_sender| AtomaAtomaStateManagerEvent::GetStacksForModel { model: model.to_string(), user_id, free_compute_units, is_confidential, result_sender, - }) - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to send GetStacksForModel event: {err:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })?; - let maybe_stack = result_receiver - .await - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to receive GetStacksForModel result: {err:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })? - .map_err(|err| AtomaProxyError::InternalError { - message: format!("Failed to get GetStacksForModel result: {err:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })?; + }, + "GetStacksForModel", + endpoint, + ) + .await?; Ok(maybe_stack.map(|stack| SelectedNodeMetadata { selected_node_id: stack.selected_node_id, stack_small_id: Some(stack.stack_small_id), @@ -2255,13 +2226,15 @@ pub mod utils { image_generations::CONFIDENTIAL_IMAGE_GENERATIONS_PATH, update_state_manager, }, http_server::{LockedComputeUnits, StackSmallId}, + middleware::auth::send_event, MODEL, }; use super::{ - auth, constants, instrument, AtomaAtomaStateManagerEvent, AtomaProxyError, Body, - HeaderValue, Parts, ProcessedRequest, ProxyState, Request, RequestMetadataExtension, - Result, State, Value, CONTENT_LENGTH, + auth::{self, send_event_with_response}, + constants, instrument, AtomaAtomaStateManagerEvent, AtomaProxyError, Body, HeaderValue, + Parts, ProcessedRequest, ProxyState, Request, RequestMetadataExtension, Result, State, + Value, CONTENT_LENGTH, }; /// Validates and prepares a request for processing by a specific stack and node. @@ -2742,29 +2715,19 @@ pub mod utils { stack_small_id: i64, endpoint: &str, ) -> Result<(String, i64)> { - let (result_sender, result_receiver) = tokio::sync::oneshot::channel(); - state - .state_manager_sender - .send(AtomaAtomaStateManagerEvent::GetNodePublicUrlAndSmallId { - stack_small_id, - result_sender, - }) - .map_err(|e| AtomaProxyError::InternalError { - message: format!("Failed to send GetNodePublicAddress event: {e:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })?; - let (node_address, node_small_id) = result_receiver - .await - .map_err(|e| AtomaProxyError::InternalError { - message: format!("Failed to receive GetNodePublicAddress result: {e:?}"), - client_message: None, - endpoint: endpoint.to_string(), - })? - .map_err(|e| AtomaProxyError::NotFound { - message: format!("Failed to get node public address: {e:?}"), - endpoint: endpoint.to_string(), - })?; + let (node_address, node_small_id) = send_event_with_response( + &state.state_manager_sender, + |result_sender| { + // Send the GetNodePublicUrlAndSmallId event to the state manager + AtomaAtomaStateManagerEvent::GetNodePublicUrlAndSmallId { + stack_small_id, + result_sender, + } + }, + "GetNodePublicUrlAndSmallId", + endpoint, + ) + .await?; if let Some(node_address) = node_address { return Ok((node_address, node_small_id)); } @@ -2843,13 +2806,13 @@ pub mod utils { stack_small_id = %stack_small_id, "Stack is in locked state for the requested node, trying to acquire a new stack" ); - state_manager_sender - .send(AtomaAtomaStateManagerEvent::LockStack { stack_small_id }) - .map_err(|e| AtomaProxyError::InternalError { - message: format!("Failed to send LockStack event: {e}"), - client_message: None, - endpoint: endpoint.to_string(), - })?; + send_event( + state_manager_sender, + AtomaAtomaStateManagerEvent::LockStack { stack_small_id }, + "LockStack", + endpoint, + )?; + Ok(()) } From a9125647d5777731b5b077a710446a9161368713 Mon Sep 17 00:00:00 2001 From: chad Date: Mon, 2 Jun 2025 15:57:18 -0500 Subject: [PATCH 60/61] chore: disable loki for now --- helm/atoma-proxy/values-dev.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helm/atoma-proxy/values-dev.yaml b/helm/atoma-proxy/values-dev.yaml index d85ee507..98d30d0f 100644 --- a/helm/atoma-proxy/values-dev.yaml +++ b/helm/atoma-proxy/values-dev.yaml @@ -99,7 +99,7 @@ grafana: metallb.universe.tf/address-pool: grafana-pool loki: # Fixed typo: was "oki" - enabled: true + enabled: false deploymentMode: SingleBinary auth_enabled: false From fa406261cc72915fb269f0d2c3014c246ba66ab8 Mon Sep 17 00:00:00 2001 From: chad Date: Tue, 3 Jun 2025 10:15:15 -0500 Subject: [PATCH 61/61] docs: update README --- helm/atoma-proxy/README.md | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/helm/atoma-proxy/README.md b/helm/atoma-proxy/README.md index ac75ef43..1d435dc6 100644 --- a/helm/atoma-proxy/README.md +++ b/helm/atoma-proxy/README.md @@ -152,6 +152,20 @@ Options: -h, --help Show this help message ``` +### Deployment configuration + +You need to ensure there is a `files` folder on the server you which to deploy to, located at `atoma-proxy/helm/atoma-proxy/files` which contains the `config.toml` and `sui_config` folder, as this is what the [sui-config-configmap.yaml](./templates/sui-config-configmap.yaml) and + +``` +helm/atoma-proxy/ +├── files/ +│ ├── config.toml # Your main config file +│ └── sui_config/ # Your Sui config files +│ ├── client.yaml +│ ├── sui.keystore +│ └── sui.aliases +``` + ## Environment-Specific Configurations ### Development