From 2ecf73f3612d2c4e069f347c3f3a649cb9b09fc7 Mon Sep 17 00:00:00 2001 From: "William K. Santiago" Date: Fri, 23 Jan 2026 09:48:09 -0400 Subject: [PATCH 1/5] fix: address security issues in WebSocket coordinator --- main/frost_coordinator.c | 595 ++++++++++++++++++++++++++++++++------- main/frost_coordinator.h | 47 ++++ 2 files changed, 543 insertions(+), 99 deletions(-) diff --git a/main/frost_coordinator.c b/main/frost_coordinator.c index c40c13b..da27e17 100644 --- a/main/frost_coordinator.c +++ b/main/frost_coordinator.c @@ -25,9 +25,26 @@ #define TAG "frost_coord" +#ifdef ESP_PLATFORM +#define COORDINATOR_LOCK() xSemaphoreTake(g_ctx.mutex, portMAX_DELAY) +#define COORDINATOR_UNLOCK() xSemaphoreGive(g_ctx.mutex) +#else +#define COORDINATOR_LOCK() +#define COORDINATOR_UNLOCK() +#endif + +typedef struct { + char *json; + size_t len; +} buffered_event_t; + typedef struct { char url[RELAY_URL_LEN]; coordinator_state_t state; + ws_health_t health; + ws_reconnect_t reconnect; + uint32_t success_count; + uint32_t fail_count; #ifdef ESP_PLATFORM esp_websocket_client_handle_t ws_handle; #else @@ -45,6 +62,12 @@ typedef struct { coordinator_state_t state; frost_coordinator_callbacks_t callbacks; NCContext *nc_ctx; + char current_subscription[64]; + bool has_subscription; + buffered_event_t event_buffer[WS_EVENT_BUFFER_SIZE]; + uint8_t buffer_head; + uint8_t buffer_count; + uint32_t disconnect_time; #ifdef ESP_PLATFORM SemaphoreHandle_t mutex; #endif @@ -54,113 +77,291 @@ static coordinator_ctx_t g_ctx; static bool g_initialized = false; #ifdef ESP_PLATFORM +#include "esp_timer.h" +#endif + +static uint32_t coordinator_now_ms(void) { +#ifdef ESP_PLATFORM + return (uint32_t)(esp_timer_get_time() / 1000); +#else + return 0; +#endif +} + +static uint32_t calculate_backoff(uint8_t attempt) { + uint32_t delay = WS_RECONNECT_BASE_MS; + for (uint8_t i = 0; i < attempt && delay < WS_RECONNECT_MAX_MS; i++) { + delay *= 2; + } + return (delay > WS_RECONNECT_MAX_MS) ? WS_RECONNECT_MAX_MS : delay; +} + +static void buffer_event(const char *event_json) { + if (g_ctx.buffer_count >= WS_EVENT_BUFFER_SIZE) { + uint8_t oldest = + (g_ctx.buffer_head + WS_EVENT_BUFFER_SIZE - g_ctx.buffer_count) % WS_EVENT_BUFFER_SIZE; + free(g_ctx.event_buffer[oldest].json); + g_ctx.event_buffer[oldest].json = NULL; + g_ctx.buffer_count--; + } + + size_t len = strlen(event_json); + char *copy = malloc(len + 1); + if (copy) { + memcpy(copy, event_json, len + 1); + g_ctx.event_buffer[g_ctx.buffer_head].json = copy; + g_ctx.event_buffer[g_ctx.buffer_head].len = len; + g_ctx.buffer_head = (g_ctx.buffer_head + 1) % WS_EVENT_BUFFER_SIZE; + g_ctx.buffer_count++; + } +} + +static void clear_event_buffer(void) { + for (int i = 0; i < WS_EVENT_BUFFER_SIZE; i++) { + if (g_ctx.event_buffer[i].json) { + free(g_ctx.event_buffer[i].json); + g_ctx.event_buffer[i].json = NULL; + } + } + g_ctx.buffer_head = 0; + g_ctx.buffer_count = 0; +} + +static int reconnect_relay(relay_connection_t *relay); +static void send_ping(relay_connection_t *relay); + +#ifdef ESP_PLATFORM +static void dispatch_frost_event(int kind, const char *event_str) { + if (kind == FROST_KIND_SIGN_REQUEST && g_ctx.callbacks.on_sign_request) { + frost_sign_request_t req; + if (frost_parse_sign_request(event_str, &g_ctx.current_group, g_ctx.privkey, &req) == 0) { + g_ctx.callbacks.on_sign_request(&req, g_ctx.callbacks.user_ctx); + frost_sign_request_free(&req); + } + } else if (kind == FROST_KIND_SIGN_RESPONSE && g_ctx.callbacks.on_sign_response) { + frost_sign_response_t resp; + if (frost_parse_sign_response(event_str, &g_ctx.current_group, g_ctx.privkey, &resp) == 0) { + g_ctx.callbacks.on_sign_response(&resp, g_ctx.callbacks.user_ctx); + } + } else if (kind == FROST_KIND_DKG_ROUND1 && g_ctx.callbacks.on_dkg_round1) { + frost_dkg_round1_t r1; + if (frost_parse_dkg_round1_event(event_str, &g_ctx.current_group, g_ctx.privkey, &r1) == + 0) { + g_ctx.callbacks.on_dkg_round1(&r1, g_ctx.callbacks.user_ctx); + } + } else if (kind == FROST_KIND_DKG_ROUND2 && g_ctx.callbacks.on_dkg_round2) { + frost_dkg_round2_t r2; + if (frost_parse_dkg_round2_event(event_str, &g_ctx.current_group, g_ctx.privkey, &r2) == + 0) { + g_ctx.callbacks.on_dkg_round2(&r2, g_ctx.callbacks.user_ctx); + } + } else if (kind == NIP46_KIND_NOSTR_CONNECT && g_ctx.callbacks.on_nip46_request) { + nip46_request_t nip46_req; + if (frost_parse_nip46_event(event_str, g_ctx.privkey, &nip46_req) == 0) { + g_ctx.callbacks.on_nip46_request(&nip46_req, g_ctx.callbacks.user_ctx); + frost_nip46_request_free(&nip46_req); + } + } +} + +static void handle_nostr_message(const char *msg) { + cJSON *arr = cJSON_Parse(msg); + if (!arr || !cJSON_IsArray(arr) || cJSON_GetArraySize(arr) < 1) { + cJSON_Delete(arr); + return; + } + + cJSON *type = cJSON_GetArrayItem(arr, 0); + if (!type || !cJSON_IsString(type) || strcmp(type->valuestring, "EVENT") != 0 || + cJSON_GetArraySize(arr) < 3) { + cJSON_Delete(arr); + return; + } + + cJSON *event = cJSON_GetArrayItem(arr, 2); + cJSON *kind = event ? cJSON_GetObjectItem(event, "kind") : NULL; + if (!event || !cJSON_IsObject(event) || !kind || !cJSON_IsNumber(kind)) { + cJSON_Delete(arr); + return; + } + + char *event_str = cJSON_PrintUnformatted(event); + if (event_str) { + dispatch_frost_event(kind->valueint, event_str); + free(event_str); + } + cJSON_Delete(arr); +} + +static void handle_ws_connected(relay_connection_t *relay) { + COORDINATOR_LOCK(); + ESP_LOGI(TAG, "Relay connected: %s", relay->url); + relay->state = COORDINATOR_STATE_CONNECTED; + relay->health.healthy = true; + relay->health.missed_pongs = 0; + relay->health.last_pong_received = coordinator_now_ms(); + relay->success_count++; + relay->reconnect.attempt_count = 0; + bool needs_resubscribe = relay->reconnect.had_subscription && g_ctx.has_subscription; + char sub_id[64]; + if (needs_resubscribe) { + strncpy(sub_id, g_ctx.current_subscription, sizeof(sub_id) - 1); + sub_id[sizeof(sub_id) - 1] = '\0'; + relay->reconnect.had_subscription = false; + } + COORDINATOR_UNLOCK(); + + if (needs_resubscribe) { + frost_coordinator_subscribe(sub_id); + } +} + +static void handle_ws_disconnected(relay_connection_t *relay) { + COORDINATOR_LOCK(); + ESP_LOGW(TAG, "Relay disconnected: %s", relay->url); + relay->reconnect.state_before_disconnect = relay->state; + relay->reconnect.had_subscription = g_ctx.has_subscription; + if (g_ctx.has_subscription) { + strncpy(relay->reconnect.subscription_id, g_ctx.current_subscription, 63); + relay->reconnect.subscription_id[63] = '\0'; + } + relay->state = COORDINATOR_STATE_RECONNECTING; + relay->health.healthy = false; + relay->fail_count++; + if (g_ctx.disconnect_time == 0) { + g_ctx.disconnect_time = coordinator_now_ms(); + } + COORDINATOR_UNLOCK(); +} + +static void handle_ws_data(relay_connection_t *relay, esp_websocket_event_data_t *data) { + COORDINATOR_LOCK(); + + if (data->op_code == 0x0A) { + relay->health.last_pong_received = coordinator_now_ms(); + relay->health.missed_pongs = 0; + relay->health.healthy = true; + COORDINATOR_UNLOCK(); + return; + } + + if (data->op_code != 0x01 || data->data_len == 0) { + COORDINATOR_UNLOCK(); + return; + } + + if (data->data_len > WS_MAX_EVENT_JSON_LEN) { + ESP_LOGW(TAG, "Message too large: %d bytes", data->data_len); + COORDINATOR_UNLOCK(); + return; + } + + char *msg = malloc(data->data_len + 1); + if (!msg) { + COORDINATOR_UNLOCK(); + return; + } + memcpy(msg, data->data_ptr, data->data_len); + msg[data->data_len] = '\0'; + + COORDINATOR_UNLOCK(); + handle_nostr_message(msg); + free(msg); +} + +static void handle_ws_error(relay_connection_t *relay) { + COORDINATOR_LOCK(); + ESP_LOGE(TAG, "Relay error: %s", relay->url); + relay->fail_count++; + relay->health.healthy = false; + if (relay->reconnect.attempt_count >= WS_RECONNECT_MAX_ATTEMPTS) { + relay->state = COORDINATOR_STATE_ERROR; + COORDINATOR_UNLOCK(); + return; + } + relay->reconnect.state_before_disconnect = relay->state; + relay->reconnect.had_subscription = g_ctx.has_subscription; + relay->state = COORDINATOR_STATE_RECONNECTING; + if (g_ctx.disconnect_time == 0) { + g_ctx.disconnect_time = coordinator_now_ms(); + } + COORDINATOR_UNLOCK(); +} + static void websocket_event_handler(void *handler_args, esp_event_base_t base, int32_t event_id, void *event_data) { - esp_websocket_event_data_t *data = (esp_websocket_event_data_t *)event_data; relay_connection_t *relay = (relay_connection_t *)handler_args; switch (event_id) { case WEBSOCKET_EVENT_CONNECTED: - ESP_LOGI(TAG, "Relay connected: %s", relay->url); - relay->state = COORDINATOR_STATE_CONNECTED; + handle_ws_connected(relay); break; - case WEBSOCKET_EVENT_DISCONNECTED: - ESP_LOGW(TAG, "Relay disconnected: %s", relay->url); - relay->state = COORDINATOR_STATE_IDLE; + handle_ws_disconnected(relay); break; - case WEBSOCKET_EVENT_DATA: - if (data->op_code == 0x01 && data->data_len > 0) { - char *msg = malloc(data->data_len + 1); - if (msg) { - memcpy(msg, data->data_ptr, data->data_len); - msg[data->data_len] = '\0'; - - cJSON *arr = cJSON_Parse(msg); - if (arr && cJSON_IsArray(arr) && cJSON_GetArraySize(arr) >= 1) { - cJSON *type = cJSON_GetArrayItem(arr, 0); - if (type && cJSON_IsString(type)) { - if (strcmp(type->valuestring, "EVENT") == 0 && - cJSON_GetArraySize(arr) >= 3) { - cJSON *event = cJSON_GetArrayItem(arr, 2); - if (event && cJSON_IsObject(event)) { - cJSON *kind = cJSON_GetObjectItem(event, "kind"); - if (kind && cJSON_IsNumber(kind)) { - char *event_str = cJSON_PrintUnformatted(event); - if (event_str) { - int k = kind->valueint; - if (k == FROST_KIND_SIGN_REQUEST && - g_ctx.callbacks.on_sign_request) { - frost_sign_request_t req; - if (frost_parse_sign_request( - event_str, &g_ctx.current_group, g_ctx.privkey, - &req) == 0) { - g_ctx.callbacks.on_sign_request( - &req, g_ctx.callbacks.user_ctx); - frost_sign_request_free(&req); - } - } else if (k == FROST_KIND_SIGN_RESPONSE && - g_ctx.callbacks.on_sign_response) { - frost_sign_response_t resp; - if (frost_parse_sign_response( - event_str, &g_ctx.current_group, g_ctx.privkey, - &resp) == 0) { - g_ctx.callbacks.on_sign_response( - &resp, g_ctx.callbacks.user_ctx); - } - } else if (k == FROST_KIND_DKG_ROUND1 && - g_ctx.callbacks.on_dkg_round1) { - frost_dkg_round1_t r1; - if (frost_parse_dkg_round1_event( - event_str, &g_ctx.current_group, g_ctx.privkey, - &r1) == 0) { - g_ctx.callbacks.on_dkg_round1( - &r1, g_ctx.callbacks.user_ctx); - } - } else if (k == FROST_KIND_DKG_ROUND2 && - g_ctx.callbacks.on_dkg_round2) { - frost_dkg_round2_t r2; - if (frost_parse_dkg_round2_event( - event_str, &g_ctx.current_group, g_ctx.privkey, - &r2) == 0) { - g_ctx.callbacks.on_dkg_round2( - &r2, g_ctx.callbacks.user_ctx); - } - } else if (k == NIP46_KIND_NOSTR_CONNECT && - g_ctx.callbacks.on_nip46_request) { - nip46_request_t nip46_req; - if (frost_parse_nip46_event(event_str, g_ctx.privkey, - &nip46_req) == 0) { - g_ctx.callbacks.on_nip46_request( - &nip46_req, g_ctx.callbacks.user_ctx); - frost_nip46_request_free(&nip46_req); - } - } - free(event_str); - } - } - } - } - } - cJSON_Delete(arr); - } else if (arr) { - cJSON_Delete(arr); - } - free(msg); - } - } + handle_ws_data(relay, (esp_websocket_event_data_t *)event_data); break; - case WEBSOCKET_EVENT_ERROR: - ESP_LOGE(TAG, "Relay error: %s", relay->url); - relay->state = COORDINATOR_STATE_ERROR; + handle_ws_error(relay); break; - default: break; } } + +static void send_ping(relay_connection_t *relay) { + if (relay->state != COORDINATOR_STATE_CONNECTED || !relay->ws_handle) + return; + + int ret = esp_websocket_client_send_with_opcode(relay->ws_handle, WS_TRANSPORT_OPCODES_PING, + NULL, 0, pdMS_TO_TICKS(WS_SEND_TIMEOUT_MS)); + if (ret < 0) { + ESP_LOGW(TAG, "Ping send timeout: %s", relay->url); + } + relay->health.last_ping_sent = coordinator_now_ms(); +} + +static int reconnect_relay(relay_connection_t *relay) { + if (relay->ws_handle) { + esp_websocket_client_stop(relay->ws_handle); + esp_websocket_client_destroy(relay->ws_handle); + relay->ws_handle = NULL; + } + + relay->reconnect.attempt_count++; + relay->reconnect.next_retry_ms = calculate_backoff(relay->reconnect.attempt_count); + relay->reconnect.last_attempt_time = coordinator_now_ms(); + + ESP_LOGI(TAG, "Reconnecting to %s (attempt %d/%d, backoff %lums)", relay->url, + relay->reconnect.attempt_count, WS_RECONNECT_MAX_ATTEMPTS, + (unsigned long)relay->reconnect.next_retry_ms); + + esp_websocket_client_config_t ws_cfg = { + .uri = relay->url, + .buffer_size = 4096, + }; + + relay->ws_handle = esp_websocket_client_init(&ws_cfg); + if (!relay->ws_handle) { + ESP_LOGE(TAG, "Failed to init websocket for reconnect: %s", relay->url); + return -1; + } + + esp_websocket_register_events(relay->ws_handle, WEBSOCKET_EVENT_ANY, websocket_event_handler, + relay); + + esp_err_t err = esp_websocket_client_start(relay->ws_handle); + if (err != ESP_OK) { + ESP_LOGE(TAG, "Failed to start websocket for reconnect: %s", relay->url); + esp_websocket_client_destroy(relay->ws_handle); + relay->ws_handle = NULL; + return -1; + } + + relay->state = COORDINATOR_STATE_CONNECTING; + return 0; +} #endif int frost_coordinator_init(const uint8_t privkey[32]) { @@ -258,17 +459,29 @@ coordinator_state_t frost_coordinator_get_state(void) { return g_ctx.state; } +static bool validate_websocket_url(const char *url) { + if (!url || strlen(url) < 6) + return false; + if (strncmp(url, "wss://", 6) == 0 || strncmp(url, "ws://", 5) == 0) + return true; + return false; +} + int frost_coordinator_add_relay(const char *url) { if (!g_initialized || !url) return -1; if (g_ctx.relay_count >= COORDINATOR_MAX_RELAYS) return -2; + if (!validate_websocket_url(url)) { + ESP_LOGE(TAG, "Invalid WebSocket URL: %s", url); + return -3; + } relay_connection_t *relay = &g_ctx.relays[g_ctx.relay_count]; + memset(relay, 0, sizeof(*relay)); strncpy(relay->url, url, RELAY_URL_LEN - 1); relay->url[RELAY_URL_LEN - 1] = '\0'; relay->state = COORDINATOR_STATE_IDLE; - relay->ws_handle = NULL; g_ctx.relay_count++; ESP_LOGI(TAG, "Added relay: %s", url); @@ -331,9 +544,14 @@ int frost_coordinator_disconnect(void) { relay->ws_handle = NULL; } relay->state = COORDINATOR_STATE_IDLE; + memset(&relay->health, 0, sizeof(relay->health)); + memset(&relay->reconnect, 0, sizeof(relay->reconnect)); } #endif + clear_event_buffer(); + g_ctx.has_subscription = false; + g_ctx.disconnect_time = 0; g_ctx.state = COORDINATOR_STATE_IDLE; ESP_LOGI(TAG, "Disconnected from all relays"); return 0; @@ -350,9 +568,31 @@ int frost_coordinator_set_group(const frost_group_t *group) { return 0; } +static bool validate_subscription_id(const char *id) { + if (!id) + return false; + size_t len = strlen(id); + if (len == 0 || len > WS_MAX_SUBSCRIPTION_ID) + return false; + for (size_t i = 0; i < len; i++) { + char c = id[i]; + if (c == '"' || c == '\\' || c < 0x20 || c > 0x7e) + return false; + } + return true; +} + int frost_coordinator_subscribe(const char *subscription_id) { if (!g_initialized || !g_ctx.has_group) return -1; + if (!validate_subscription_id(subscription_id)) { + ESP_LOGE(TAG, "Invalid subscription ID"); + return -2; + } + + strncpy(g_ctx.current_subscription, subscription_id, sizeof(g_ctx.current_subscription) - 1); + g_ctx.current_subscription[sizeof(g_ctx.current_subscription) - 1] = '\0'; + g_ctx.has_subscription = true; char pubkey_hex[65]; bytes_to_hex(g_ctx.pubkey, 32, pubkey_hex, sizeof(pubkey_hex)); @@ -367,7 +607,8 @@ int frost_coordinator_subscribe(const char *subscription_id) { for (int i = 0; i < g_ctx.relay_count; i++) { relay_connection_t *relay = &g_ctx.relays[i]; if (relay->state == COORDINATOR_STATE_CONNECTED && relay->ws_handle) { - esp_websocket_client_send_text(relay->ws_handle, filter, strlen(filter), portMAX_DELAY); + esp_websocket_client_send_text(relay->ws_handle, filter, strlen(filter), + pdMS_TO_TICKS(WS_SEND_TIMEOUT_MS)); ESP_LOGI(TAG, "Subscribed on %s", relay->url); } } @@ -380,6 +621,13 @@ int frost_coordinator_subscribe(const char *subscription_id) { int frost_coordinator_unsubscribe(const char *subscription_id) { if (!g_initialized) return -1; + if (!validate_subscription_id(subscription_id)) { + ESP_LOGE(TAG, "Invalid subscription ID"); + return -2; + } + + g_ctx.has_subscription = false; + memset(g_ctx.current_subscription, 0, sizeof(g_ctx.current_subscription)); char close_msg[128]; snprintf(close_msg, sizeof(close_msg), "[\"CLOSE\",\"%s\"]", subscription_id); @@ -389,7 +637,7 @@ int frost_coordinator_unsubscribe(const char *subscription_id) { relay_connection_t *relay = &g_ctx.relays[i]; if (relay->state == COORDINATOR_STATE_CONNECTED && relay->ws_handle) { esp_websocket_client_send_text(relay->ws_handle, close_msg, strlen(close_msg), - portMAX_DELAY); + pdMS_TO_TICKS(WS_SEND_TIMEOUT_MS)); } } #endif @@ -401,7 +649,13 @@ static int publish_event(const char *event_json) { if (!g_initialized) return -1; - size_t msg_len = strlen(event_json) + 12; + size_t json_len = strlen(event_json); + if (json_len > WS_MAX_EVENT_JSON_LEN - 12) { + ESP_LOGE(TAG, "Event JSON too large: %zu bytes", json_len); + return -1; + } + + size_t msg_len = json_len + 12; char *msg = malloc(msg_len); if (!msg) return -1; @@ -409,18 +663,30 @@ static int publish_event(const char *event_json) { snprintf(msg, msg_len, "[\"EVENT\",%s]", event_json); int published = 0; + bool any_reconnecting = false; + #ifdef ESP_PLATFORM for (int i = 0; i < g_ctx.relay_count; i++) { relay_connection_t *relay = &g_ctx.relays[i]; if (relay->state == COORDINATOR_STATE_CONNECTED && relay->ws_handle) { - esp_websocket_client_send_text(relay->ws_handle, msg, strlen(msg), portMAX_DELAY); + esp_websocket_client_send_text(relay->ws_handle, msg, strlen(msg), + pdMS_TO_TICKS(WS_SEND_TIMEOUT_MS)); published++; + } else if (relay->state == COORDINATOR_STATE_RECONNECTING) { + any_reconnecting = true; } } + + if (published == 0 && any_reconnecting) { + buffer_event(msg); + ESP_LOGI(TAG, "Buffered event during reconnection"); + } #endif free(msg); - ESP_LOGI(TAG, "Published to %d relays", published); + if (published > 0) { + ESP_LOGI(TAG, "Published to %d relays", published); + } return published; } @@ -491,17 +757,104 @@ int frost_coordinator_poll(int timeout_ms) { vTaskDelay(pdMS_TO_TICKS(timeout_ms > 0 ? timeout_ms : 10)); #endif + COORDINATOR_LOCK(); + + uint32_t now = coordinator_now_ms(); int connected = 0; + int reconnecting = 0; + for (int i = 0; i < g_ctx.relay_count; i++) { - if (g_ctx.relays[i].state == COORDINATOR_STATE_CONNECTED) { + relay_connection_t *relay = &g_ctx.relays[i]; + + if (relay->state == COORDINATOR_STATE_CONNECTED) { +#ifdef ESP_PLATFORM + if (now - relay->health.last_ping_sent >= WS_PING_INTERVAL_MS) { + send_ping(relay); + } + + if (now - relay->health.last_pong_received > WS_PONG_TIMEOUT_MS) { + relay->health.missed_pongs++; + relay->health.last_pong_received = now; + + if (relay->health.missed_pongs >= WS_MAX_MISSED_PONGS) { + ESP_LOGW(TAG, "Relay unhealthy (missed %d pongs): %s", + relay->health.missed_pongs, relay->url); + relay->health.healthy = false; + relay->reconnect.state_before_disconnect = relay->state; + relay->reconnect.had_subscription = g_ctx.has_subscription; + relay->state = COORDINATOR_STATE_RECONNECTING; + relay->fail_count++; + if (g_ctx.disconnect_time == 0) { + g_ctx.disconnect_time = now; + } + continue; + } + } +#endif connected++; + } else if (relay->state == COORDINATOR_STATE_RECONNECTING) { + reconnecting++; +#ifdef ESP_PLATFORM + if (relay->reconnect.attempt_count >= WS_RECONNECT_MAX_ATTEMPTS) { + ESP_LOGE(TAG, "Max reconnect attempts reached: %s", relay->url); + relay->state = COORDINATOR_STATE_ERROR; + continue; + } + + uint32_t elapsed = now - relay->reconnect.last_attempt_time; + if (elapsed >= relay->reconnect.next_retry_ms || relay->reconnect.attempt_count == 0) { + reconnect_relay(relay); + } +#endif } } - if (connected > 0 && g_ctx.state == COORDINATOR_STATE_CONNECTING) { - g_ctx.state = COORDINATOR_STATE_CONNECTED; + if (g_ctx.disconnect_time > 0 && connected == 0) { + uint32_t disconnect_elapsed = now - g_ctx.disconnect_time; + if (disconnect_elapsed > WS_SESSION_RECOVERY_MS) { + ESP_LOGE(TAG, "Session recovery timeout exceeded"); + g_ctx.state = COORDINATOR_STATE_ERROR; + clear_event_buffer(); + g_ctx.disconnect_time = 0; + COORDINATOR_UNLOCK(); + return -1; + } + } + + if (connected > 0) { + g_ctx.disconnect_time = 0; + +#ifdef ESP_PLATFORM + if (g_ctx.buffer_count > 0) { + ESP_LOGI(TAG, "Replaying %d buffered events", g_ctx.buffer_count); + uint8_t start = (g_ctx.buffer_head + WS_EVENT_BUFFER_SIZE - g_ctx.buffer_count) % + WS_EVENT_BUFFER_SIZE; + for (uint8_t j = 0; j < g_ctx.buffer_count; j++) { + uint8_t idx = (start + j) % WS_EVENT_BUFFER_SIZE; + if (g_ctx.event_buffer[idx].json) { + for (int k = 0; k < g_ctx.relay_count; k++) { + relay_connection_t *relay = &g_ctx.relays[k]; + if (relay->state == COORDINATOR_STATE_CONNECTED && relay->ws_handle) { + esp_websocket_client_send_text( + relay->ws_handle, g_ctx.event_buffer[idx].json, + g_ctx.event_buffer[idx].len, pdMS_TO_TICKS(WS_SEND_TIMEOUT_MS)); + } + } + } + } + clear_event_buffer(); + } +#endif + + if (g_ctx.state == COORDINATOR_STATE_CONNECTING || + g_ctx.state == COORDINATOR_STATE_RECONNECTING) { + g_ctx.state = COORDINATOR_STATE_CONNECTED; + } + } else if (reconnecting > 0 && g_ctx.state != COORDINATOR_STATE_ERROR) { + g_ctx.state = COORDINATOR_STATE_RECONNECTING; } + COORDINATOR_UNLOCK(); return connected; } @@ -511,3 +864,47 @@ int frost_coordinator_get_pubkey(uint8_t pubkey[32]) { memcpy(pubkey, g_ctx.pubkey, 32); return 0; } + +int frost_coordinator_get_status(coordinator_status_t *status) { + if (!g_initialized || !status) + return -1; + + COORDINATOR_LOCK(); + memset(status, 0, sizeof(*status)); + status->state = g_ctx.state; + status->total_relays = g_ctx.relay_count; + status->session_active = g_ctx.has_subscription; + + for (int i = 0; i < g_ctx.relay_count; i++) { + relay_connection_t *relay = &g_ctx.relays[i]; + if (relay->state == COORDINATOR_STATE_CONNECTED) { + status->connected_relays++; + } + if (relay->state == COORDINATOR_STATE_RECONNECTING) { + status->reconnect_attempts += relay->reconnect.attempt_count; + } + status->relay_scores[i].relay_index = i; + status->relay_scores[i].success_count = relay->success_count; + status->relay_scores[i].fail_count = relay->fail_count; + } + + COORDINATOR_UNLOCK(); + return 0; +} + +bool frost_coordinator_is_healthy(void) { + if (!g_initialized) + return false; + + COORDINATOR_LOCK(); + int healthy_count = 0; + for (int i = 0; i < g_ctx.relay_count; i++) { + if (g_ctx.relays[i].state == COORDINATOR_STATE_CONNECTED && + g_ctx.relays[i].health.healthy) { + healthy_count++; + } + } + COORDINATOR_UNLOCK(); + + return healthy_count > 0; +} diff --git a/main/frost_coordinator.h b/main/frost_coordinator.h index e1462c0..aff16d0 100644 --- a/main/frost_coordinator.h +++ b/main/frost_coordinator.h @@ -12,15 +12,59 @@ #define COORDINATOR_MAX_GROUPS 4 #define COORDINATOR_TIMEOUT_MS 30000 +#define WS_PING_INTERVAL_MS 30000 +#define WS_PONG_TIMEOUT_MS 10000 +#define WS_MAX_MISSED_PONGS 3 +#define WS_RECONNECT_BASE_MS 1000 +#define WS_RECONNECT_MAX_MS 30000 +#define WS_RECONNECT_MAX_ATTEMPTS 5 +#define WS_SESSION_RECOVERY_MS 60000 +#define WS_EVENT_BUFFER_SIZE 8 +#define WS_SEND_TIMEOUT_MS 5000 +#define WS_MAX_EVENT_JSON_LEN 65536 +#define WS_MAX_SUBSCRIPTION_ID 63 + typedef enum { COORDINATOR_STATE_IDLE, COORDINATOR_STATE_CONNECTING, COORDINATOR_STATE_CONNECTED, COORDINATOR_STATE_SUBSCRIBING, COORDINATOR_STATE_ACTIVE, + COORDINATOR_STATE_RECONNECTING, COORDINATOR_STATE_ERROR } coordinator_state_t; +typedef struct { + uint32_t last_ping_sent; + uint32_t last_pong_received; + uint8_t missed_pongs; + bool healthy; +} ws_health_t; + +typedef struct { + uint8_t attempt_count; + uint32_t next_retry_ms; + uint32_t last_attempt_time; + coordinator_state_t state_before_disconnect; + char subscription_id[64]; + bool had_subscription; +} ws_reconnect_t; + +typedef struct { + uint8_t relay_index; + uint32_t success_count; + uint32_t fail_count; +} relay_health_score_t; + +typedef struct { + coordinator_state_t state; + uint8_t connected_relays; + uint8_t total_relays; + uint8_t reconnect_attempts; + bool session_active; + relay_health_score_t relay_scores[COORDINATOR_MAX_RELAYS]; +} coordinator_status_t; + typedef void (*frost_sign_request_cb)(const frost_sign_request_t *request, void *ctx); typedef void (*frost_sign_response_cb)(const frost_sign_response_t *response, void *ctx); typedef void (*frost_dkg_round1_cb)(const frost_dkg_round1_t *round1, void *ctx); @@ -61,4 +105,7 @@ int frost_coordinator_poll(int timeout_ms); int frost_coordinator_get_pubkey(uint8_t pubkey[32]); +int frost_coordinator_get_status(coordinator_status_t *status); +bool frost_coordinator_is_healthy(void); + #endif From 9807d287e09ceb891956466d3d2e2ac733741721 Mon Sep 17 00:00:00 2001 From: "William K. Santiago" Date: Fri, 23 Jan 2026 09:55:23 -0400 Subject: [PATCH 2/5] refactor: simplify WebSocket coordinator helpers --- main/frost_coordinator.c | 141 ++++++++++++++++++++------------------- 1 file changed, 74 insertions(+), 67 deletions(-) diff --git a/main/frost_coordinator.c b/main/frost_coordinator.c index da27e17..83f8904 100644 --- a/main/frost_coordinator.c +++ b/main/frost_coordinator.c @@ -89,11 +89,8 @@ static uint32_t coordinator_now_ms(void) { } static uint32_t calculate_backoff(uint8_t attempt) { - uint32_t delay = WS_RECONNECT_BASE_MS; - for (uint8_t i = 0; i < attempt && delay < WS_RECONNECT_MAX_MS; i++) { - delay *= 2; - } - return (delay > WS_RECONNECT_MAX_MS) ? WS_RECONNECT_MAX_MS : delay; + uint32_t delay = WS_RECONNECT_BASE_MS << attempt; + return delay > WS_RECONNECT_MAX_MS ? WS_RECONNECT_MAX_MS : delay; } static void buffer_event(const char *event_json) { @@ -127,9 +124,38 @@ static void clear_event_buffer(void) { g_ctx.buffer_count = 0; } +#ifdef ESP_PLATFORM +static void replay_buffered_events(void) { + ESP_LOGI(TAG, "Replaying %d buffered events", g_ctx.buffer_count); + uint8_t start = + (g_ctx.buffer_head + WS_EVENT_BUFFER_SIZE - g_ctx.buffer_count) % WS_EVENT_BUFFER_SIZE; + + for (uint8_t j = 0; j < g_ctx.buffer_count; j++) { + uint8_t idx = (start + j) % WS_EVENT_BUFFER_SIZE; + if (!g_ctx.event_buffer[idx].json) + continue; + + for (int k = 0; k < g_ctx.relay_count; k++) { + relay_connection_t *relay = &g_ctx.relays[k]; + if (relay->state == COORDINATOR_STATE_CONNECTED && relay->ws_handle) { + esp_websocket_client_send_text(relay->ws_handle, g_ctx.event_buffer[idx].json, + g_ctx.event_buffer[idx].len, + pdMS_TO_TICKS(WS_SEND_TIMEOUT_MS)); + } + } + } + clear_event_buffer(); +} +#endif + static int reconnect_relay(relay_connection_t *relay); static void send_ping(relay_connection_t *relay); +static void copy_subscription_id(char *dest, size_t dest_size, const char *src) { + strncpy(dest, src, dest_size - 1); + dest[dest_size - 1] = '\0'; +} + #ifdef ESP_PLATFORM static void dispatch_frost_event(int kind, const char *event_str) { if (kind == FROST_KIND_SIGN_REQUEST && g_ctx.callbacks.on_sign_request) { @@ -194,6 +220,9 @@ static void handle_nostr_message(const char *msg) { } static void handle_ws_connected(relay_connection_t *relay) { + char sub_id[64] = {0}; + bool needs_resubscribe = false; + COORDINATOR_LOCK(); ESP_LOGI(TAG, "Relay connected: %s", relay->url); relay->state = COORDINATOR_STATE_CONNECTED; @@ -202,11 +231,10 @@ static void handle_ws_connected(relay_connection_t *relay) { relay->health.last_pong_received = coordinator_now_ms(); relay->success_count++; relay->reconnect.attempt_count = 0; - bool needs_resubscribe = relay->reconnect.had_subscription && g_ctx.has_subscription; - char sub_id[64]; - if (needs_resubscribe) { - strncpy(sub_id, g_ctx.current_subscription, sizeof(sub_id) - 1); - sub_id[sizeof(sub_id) - 1] = '\0'; + + if (relay->reconnect.had_subscription && g_ctx.has_subscription) { + needs_resubscribe = true; + copy_subscription_id(sub_id, sizeof(sub_id), g_ctx.current_subscription); relay->reconnect.had_subscription = false; } COORDINATOR_UNLOCK(); @@ -216,14 +244,12 @@ static void handle_ws_connected(relay_connection_t *relay) { } } -static void handle_ws_disconnected(relay_connection_t *relay) { - COORDINATOR_LOCK(); - ESP_LOGW(TAG, "Relay disconnected: %s", relay->url); +static void save_reconnect_state(relay_connection_t *relay) { relay->reconnect.state_before_disconnect = relay->state; relay->reconnect.had_subscription = g_ctx.has_subscription; if (g_ctx.has_subscription) { - strncpy(relay->reconnect.subscription_id, g_ctx.current_subscription, 63); - relay->reconnect.subscription_id[63] = '\0'; + copy_subscription_id(relay->reconnect.subscription_id, + sizeof(relay->reconnect.subscription_id), g_ctx.current_subscription); } relay->state = COORDINATOR_STATE_RECONNECTING; relay->health.healthy = false; @@ -231,13 +257,22 @@ static void handle_ws_disconnected(relay_connection_t *relay) { if (g_ctx.disconnect_time == 0) { g_ctx.disconnect_time = coordinator_now_ms(); } +} + +static void handle_ws_disconnected(relay_connection_t *relay) { + COORDINATOR_LOCK(); + ESP_LOGW(TAG, "Relay disconnected: %s", relay->url); + save_reconnect_state(relay); COORDINATOR_UNLOCK(); } static void handle_ws_data(relay_connection_t *relay, esp_websocket_event_data_t *data) { + const uint8_t WS_OPCODE_PONG = 0x0A; + const uint8_t WS_OPCODE_TEXT = 0x01; + COORDINATOR_LOCK(); - if (data->op_code == 0x0A) { + if (data->op_code == WS_OPCODE_PONG) { relay->health.last_pong_received = coordinator_now_ms(); relay->health.missed_pongs = 0; relay->health.healthy = true; @@ -245,7 +280,7 @@ static void handle_ws_data(relay_connection_t *relay, esp_websocket_event_data_t return; } - if (data->op_code != 0x01 || data->data_len == 0) { + if (data->op_code != WS_OPCODE_TEXT || data->data_len == 0) { COORDINATOR_UNLOCK(); return; } @@ -272,19 +307,16 @@ static void handle_ws_data(relay_connection_t *relay, esp_websocket_event_data_t static void handle_ws_error(relay_connection_t *relay) { COORDINATOR_LOCK(); ESP_LOGE(TAG, "Relay error: %s", relay->url); - relay->fail_count++; - relay->health.healthy = false; + if (relay->reconnect.attempt_count >= WS_RECONNECT_MAX_ATTEMPTS) { + relay->fail_count++; + relay->health.healthy = false; relay->state = COORDINATOR_STATE_ERROR; COORDINATOR_UNLOCK(); return; } - relay->reconnect.state_before_disconnect = relay->state; - relay->reconnect.had_subscription = g_ctx.has_subscription; - relay->state = COORDINATOR_STATE_RECONNECTING; - if (g_ctx.disconnect_time == 0) { - g_ctx.disconnect_time = coordinator_now_ms(); - } + + save_reconnect_state(relay); COORDINATOR_UNLOCK(); } @@ -460,11 +492,9 @@ coordinator_state_t frost_coordinator_get_state(void) { } static bool validate_websocket_url(const char *url) { - if (!url || strlen(url) < 6) + if (!url) return false; - if (strncmp(url, "wss://", 6) == 0 || strncmp(url, "ws://", 5) == 0) - return true; - return false; + return strncmp(url, "wss://", 6) == 0 || strncmp(url, "ws://", 5) == 0; } int frost_coordinator_add_relay(const char *url) { @@ -569,14 +599,14 @@ int frost_coordinator_set_group(const frost_group_t *group) { } static bool validate_subscription_id(const char *id) { - if (!id) + if (!id || *id == '\0') return false; - size_t len = strlen(id); - if (len == 0 || len > WS_MAX_SUBSCRIPTION_ID) - return false; - for (size_t i = 0; i < len; i++) { - char c = id[i]; - if (c == '"' || c == '\\' || c < 0x20 || c > 0x7e) + + size_t len = 0; + for (const char *p = id; *p; p++, len++) { + if (len > WS_MAX_SUBSCRIPTION_ID) + return false; + if (*p == '"' || *p == '\\' || *p < 0x20 || *p > 0x7e) return false; } return true; @@ -590,8 +620,8 @@ int frost_coordinator_subscribe(const char *subscription_id) { return -2; } - strncpy(g_ctx.current_subscription, subscription_id, sizeof(g_ctx.current_subscription) - 1); - g_ctx.current_subscription[sizeof(g_ctx.current_subscription) - 1] = '\0'; + copy_subscription_id(g_ctx.current_subscription, sizeof(g_ctx.current_subscription), + subscription_id); g_ctx.has_subscription = true; char pubkey_hex[65]; @@ -779,14 +809,7 @@ int frost_coordinator_poll(int timeout_ms) { if (relay->health.missed_pongs >= WS_MAX_MISSED_PONGS) { ESP_LOGW(TAG, "Relay unhealthy (missed %d pongs): %s", relay->health.missed_pongs, relay->url); - relay->health.healthy = false; - relay->reconnect.state_before_disconnect = relay->state; - relay->reconnect.had_subscription = g_ctx.has_subscription; - relay->state = COORDINATOR_STATE_RECONNECTING; - relay->fail_count++; - if (g_ctx.disconnect_time == 0) { - g_ctx.disconnect_time = now; - } + save_reconnect_state(relay); continue; } } @@ -826,23 +849,7 @@ int frost_coordinator_poll(int timeout_ms) { #ifdef ESP_PLATFORM if (g_ctx.buffer_count > 0) { - ESP_LOGI(TAG, "Replaying %d buffered events", g_ctx.buffer_count); - uint8_t start = (g_ctx.buffer_head + WS_EVENT_BUFFER_SIZE - g_ctx.buffer_count) % - WS_EVENT_BUFFER_SIZE; - for (uint8_t j = 0; j < g_ctx.buffer_count; j++) { - uint8_t idx = (start + j) % WS_EVENT_BUFFER_SIZE; - if (g_ctx.event_buffer[idx].json) { - for (int k = 0; k < g_ctx.relay_count; k++) { - relay_connection_t *relay = &g_ctx.relays[k]; - if (relay->state == COORDINATOR_STATE_CONNECTED && relay->ws_handle) { - esp_websocket_client_send_text( - relay->ws_handle, g_ctx.event_buffer[idx].json, - g_ctx.event_buffer[idx].len, pdMS_TO_TICKS(WS_SEND_TIMEOUT_MS)); - } - } - } - } - clear_event_buffer(); + replay_buffered_events(); } #endif @@ -897,14 +904,14 @@ bool frost_coordinator_is_healthy(void) { return false; COORDINATOR_LOCK(); - int healthy_count = 0; - for (int i = 0; i < g_ctx.relay_count; i++) { + bool healthy = false; + for (int i = 0; i < g_ctx.relay_count && !healthy; i++) { if (g_ctx.relays[i].state == COORDINATOR_STATE_CONNECTED && g_ctx.relays[i].health.healthy) { - healthy_count++; + healthy = true; } } COORDINATOR_UNLOCK(); - return healthy_count > 0; + return healthy; } From 130519901f44de923be1d0180e28e18444130dc4 Mon Sep 17 00:00:00 2001 From: "William K. Santiago" Date: Fri, 23 Jan 2026 10:22:46 -0400 Subject: [PATCH 3/5] fix: prevent silent truncation of URLs and subscription IDs --- main/frost_coordinator.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/main/frost_coordinator.c b/main/frost_coordinator.c index 83f8904..db17c68 100644 --- a/main/frost_coordinator.c +++ b/main/frost_coordinator.c @@ -506,11 +506,14 @@ int frost_coordinator_add_relay(const char *url) { ESP_LOGE(TAG, "Invalid WebSocket URL: %s", url); return -3; } + if (strlen(url) >= RELAY_URL_LEN) { + ESP_LOGE(TAG, "Relay URL too long: %s", url); + return -4; + } relay_connection_t *relay = &g_ctx.relays[g_ctx.relay_count]; memset(relay, 0, sizeof(*relay)); - strncpy(relay->url, url, RELAY_URL_LEN - 1); - relay->url[RELAY_URL_LEN - 1] = '\0'; + memcpy(relay->url, url, strlen(url) + 1); relay->state = COORDINATOR_STATE_IDLE; g_ctx.relay_count++; @@ -604,7 +607,7 @@ static bool validate_subscription_id(const char *id) { size_t len = 0; for (const char *p = id; *p; p++, len++) { - if (len > WS_MAX_SUBSCRIPTION_ID) + if (len >= WS_MAX_SUBSCRIPTION_ID) return false; if (*p == '"' || *p == '\\' || *p < 0x20 || *p > 0x7e) return false; From 33e4b820a119f7d8604fc18e0d69fd3da6e3b9c4 Mon Sep 17 00:00:00 2001 From: "William K. Santiago" Date: Fri, 23 Jan 2026 11:29:23 -0400 Subject: [PATCH 4/5] fix: add TLS certificate bundle for WebSocket connections --- main/frost_coordinator.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/main/frost_coordinator.c b/main/frost_coordinator.c index db17c68..281df3b 100644 --- a/main/frost_coordinator.c +++ b/main/frost_coordinator.c @@ -14,6 +14,7 @@ #ifdef ESP_PLATFORM #include "esp_log.h" #include "esp_websocket_client.h" +#include "esp_crt_bundle.h" #include #include #else @@ -372,6 +373,7 @@ static int reconnect_relay(relay_connection_t *relay) { esp_websocket_client_config_t ws_cfg = { .uri = relay->url, .buffer_size = 4096, + .crt_bundle_attach = esp_crt_bundle_attach, }; relay->ws_handle = esp_websocket_client_init(&ws_cfg); @@ -534,6 +536,7 @@ int frost_coordinator_connect(void) { esp_websocket_client_config_t ws_cfg = { .uri = relay->url, .buffer_size = 4096, + .crt_bundle_attach = esp_crt_bundle_attach, }; relay->ws_handle = esp_websocket_client_init(&ws_cfg); From 759b04b38ab8ca16e2c113504c80f9a0cc296a8b Mon Sep 17 00:00:00 2001 From: "William K. Santiago" Date: Fri, 23 Jan 2026 11:48:45 -0400 Subject: [PATCH 5/5] fix: add mutex protection for event buffer operations --- main/frost_coordinator.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) diff --git a/main/frost_coordinator.c b/main/frost_coordinator.c index 281df3b..fe8431e 100644 --- a/main/frost_coordinator.c +++ b/main/frost_coordinator.c @@ -95,6 +95,8 @@ static uint32_t calculate_backoff(uint8_t attempt) { } static void buffer_event(const char *event_json) { + COORDINATOR_LOCK(); + if (g_ctx.buffer_count >= WS_EVENT_BUFFER_SIZE) { uint8_t oldest = (g_ctx.buffer_head + WS_EVENT_BUFFER_SIZE - g_ctx.buffer_count) % WS_EVENT_BUFFER_SIZE; @@ -112,9 +114,11 @@ static void buffer_event(const char *event_json) { g_ctx.buffer_head = (g_ctx.buffer_head + 1) % WS_EVENT_BUFFER_SIZE; g_ctx.buffer_count++; } + + COORDINATOR_UNLOCK(); } -static void clear_event_buffer(void) { +static void clear_event_buffer_unlocked(void) { for (int i = 0; i < WS_EVENT_BUFFER_SIZE; i++) { if (g_ctx.event_buffer[i].json) { free(g_ctx.event_buffer[i].json); @@ -125,6 +129,12 @@ static void clear_event_buffer(void) { g_ctx.buffer_count = 0; } +static void clear_event_buffer(void) { + COORDINATOR_LOCK(); + clear_event_buffer_unlocked(); + COORDINATOR_UNLOCK(); +} + #ifdef ESP_PLATFORM static void replay_buffered_events(void) { ESP_LOGI(TAG, "Replaying %d buffered events", g_ctx.buffer_count); @@ -145,7 +155,7 @@ static void replay_buffered_events(void) { } } } - clear_event_buffer(); + clear_event_buffer_unlocked(); } #endif @@ -309,15 +319,25 @@ static void handle_ws_error(relay_connection_t *relay) { COORDINATOR_LOCK(); ESP_LOGE(TAG, "Relay error: %s", relay->url); + relay->fail_count++; + relay->health.healthy = false; + if (relay->reconnect.attempt_count >= WS_RECONNECT_MAX_ATTEMPTS) { - relay->fail_count++; - relay->health.healthy = false; relay->state = COORDINATOR_STATE_ERROR; COORDINATOR_UNLOCK(); return; } - save_reconnect_state(relay); + relay->reconnect.state_before_disconnect = relay->state; + relay->reconnect.had_subscription = g_ctx.has_subscription; + if (g_ctx.has_subscription) { + copy_subscription_id(relay->reconnect.subscription_id, + sizeof(relay->reconnect.subscription_id), g_ctx.current_subscription); + } + relay->state = COORDINATOR_STATE_RECONNECTING; + if (g_ctx.disconnect_time == 0) { + g_ctx.disconnect_time = coordinator_now_ms(); + } COORDINATOR_UNLOCK(); } @@ -843,7 +863,7 @@ int frost_coordinator_poll(int timeout_ms) { if (disconnect_elapsed > WS_SESSION_RECOVERY_MS) { ESP_LOGE(TAG, "Session recovery timeout exceeded"); g_ctx.state = COORDINATOR_STATE_ERROR; - clear_event_buffer(); + clear_event_buffer_unlocked(); g_ctx.disconnect_time = 0; COORDINATOR_UNLOCK(); return -1;