From 93e9d1b91eeca14846c8f3a25288b8bd91607f7d Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 15 Oct 2024 09:55:00 +0200 Subject: [PATCH 1/5] Treat failure as pending, log error --- relay-server/src/services/project_cache.rs | 38 +++++++++++++++++----- 1 file changed, 29 insertions(+), 9 deletions(-) diff --git a/relay-server/src/services/project_cache.rs b/relay-server/src/services/project_cache.rs index 815d265226..fe2432ca4d 100644 --- a/relay-server/src/services/project_cache.rs +++ b/relay-server/src/services/project_cache.rs @@ -1,4 +1,5 @@ use std::collections::{BTreeMap, BTreeSet}; +use std::convert::Infallible; use std::error::Error; use std::sync::Arc; use std::time::Duration; @@ -491,12 +492,11 @@ impl ProjectSource { project_key: ProjectKey, no_cache: bool, cached_state: ProjectFetchState, - ) -> Result { + ) -> Result { let state_opt = self .local_source .send(FetchOptionalProjectState { project_key }) - .await - .map_err(|_| ())?; + .await?; if let Some(state) = state_opt { return Ok(ProjectFetchState::new(state)); @@ -514,12 +514,11 @@ impl ProjectSource { if let Some(redis_source) = self.redis_source { let current_revision = current_revision.clone(); - let redis_permit = self.redis_semaphore.acquire().await.map_err(|_| ())?; + let redis_permit = self.redis_semaphore.acquire().await?; let state_fetch_result = tokio::task::spawn_blocking(move || { redis_source.get_config_if_changed(project_key, current_revision.as_deref()) }) - .await - .map_err(|_| ())?; + .await?; drop(redis_permit); match state_fetch_result { @@ -551,8 +550,7 @@ impl ProjectSource { current_revision, no_cache, }) - .await - .map_err(|_| ())?; + .await?; match state { UpstreamProjectState::New(state) => Ok(ProjectFetchState::new(state.sanitized())), @@ -561,6 +559,22 @@ impl ProjectSource { } } +#[derive(Debug, thiserror::Error)] +enum ProjectSourceError { + #[error("redis permit error {0}")] + RedisPermit(#[from] tokio::sync::AcquireError), + #[error("redis join error {0}")] + RedisJoin(#[from] tokio::task::JoinError), + #[error("upstream error {0}")] + Upstream(#[from] relay_system::SendError), +} + +impl From for ProjectSourceError { + fn from(value: Infallible) -> Self { + match value {} + } +} + /// Updates the cache with new project state information. struct UpdateProjectState { /// The public key to fetch the project by. @@ -775,7 +789,13 @@ impl ProjectCacheBroker { let state = source .fetch(project_key, no_cache, cached_state) .await - .unwrap_or_else(|()| ProjectFetchState::disabled()); + .unwrap_or_else(|e| { + relay_log::error!( + error = &e as &dyn Error, + "Failed to fetch project from source" + ); + ProjectFetchState::pending() + }); let message = UpdateProjectState { project_key, From 5b43d749f6f3e102c72964772c552d17478c494c Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 15 Oct 2024 10:20:17 +0200 Subject: [PATCH 2/5] extend test --- tests/integration/test_query.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/tests/integration/test_query.py b/tests/integration/test_query.py index a934319c2c..48556e9896 100644 --- a/tests/integration/test_query.py +++ b/tests/integration/test_query.py @@ -154,17 +154,12 @@ def get_project_config(): mini_sentry.clear_test_failures() -def test_query_retry_maxed_out(mini_sentry, relay_with_processing, events_consumer): +def test_query_retry_maxed_out(mini_sentry, relay): """ Assert that a query is not retried an infinite amount of times. - - This is not specific to processing or store, but here we have the outcomes - consumer which we can use to assert that an event has been dropped. """ request_count = 0 - events_consumer = events_consumer() - original_get_project_config = mini_sentry.app.view_functions["get_project_config"] @mini_sentry.app.endpoint("get_project_config") @@ -184,9 +179,7 @@ def get_project_config(): for retry in range(RETRIES): # 1 retry query_timeout += 1 * 1.5 ** (retry + 1) - relay = relay_with_processing( - {"limits": {"query_timeout": math.ceil(query_timeout)}} - ) + relay = relay(mini_sentry, {"limits": {"query_timeout": math.ceil(query_timeout)}}) # No error messages yet assert mini_sentry.test_failures.empty() @@ -199,6 +192,12 @@ def get_project_config(): assert {str(e) for _, e in mini_sentry.current_test_failures()} == { "Relay sent us event: error fetching project states: upstream request returned error 500 Internal Server Error: no error details", } + + time.sleep(1) # Wait for project to be cached + + # Relay still accepts events for this project + next_response = relay.send_event(42) + assert "id" in next_response finally: mini_sentry.clear_test_failures() From 8a1e6da4ed867100867eeb83dd6082592830a987 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 15 Oct 2024 10:36:32 +0200 Subject: [PATCH 3/5] doc: changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index bc96f5f8af..b483a6098d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ - Report invalid spans with appropriate outcome reason. ([#4051](https://github.com/getsentry/relay/pull/4051)) - Use the duration reported by the profiler instead of the transaction. ([#4058](https://github.com/getsentry/relay/pull/4058)) - Incorrect pattern matches involving adjacent any and wildcard matchers. ([#4072](https://github.com/getsentry/relay/pull/4072)) +- Accept incoming requests even if there was an error fetching their project config. ([4140](https://github.com/getsentry/relay/pull/4140)) **Features:** From 688d23ea694a2625aa0e5511b7501d8844ca12c1 Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 15 Oct 2024 10:46:01 +0200 Subject: [PATCH 4/5] doc: typo --- CHANGELOG.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b483a6098d..9b75fe89cc 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,7 @@ - Report invalid spans with appropriate outcome reason. ([#4051](https://github.com/getsentry/relay/pull/4051)) - Use the duration reported by the profiler instead of the transaction. ([#4058](https://github.com/getsentry/relay/pull/4058)) - Incorrect pattern matches involving adjacent any and wildcard matchers. ([#4072](https://github.com/getsentry/relay/pull/4072)) -- Accept incoming requests even if there was an error fetching their project config. ([4140](https://github.com/getsentry/relay/pull/4140)) +- Accept incoming requests even if there was an error fetching their project config. ([#4140](https://github.com/getsentry/relay/pull/4140)) **Features:** From d3c7e130e720a769ce4755c543f582f1bf3e46cc Mon Sep 17 00:00:00 2001 From: Joris Bayer Date: Tue, 15 Oct 2024 13:09:25 +0200 Subject: [PATCH 5/5] revert to just logging --- CHANGELOG.md | 1 - relay-server/src/services/projects/cache.rs | 5 ++++- tests/integration/test_query.py | 17 +++++++++-------- 3 files changed, 13 insertions(+), 10 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 9b75fe89cc..bc96f5f8af 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,7 +11,6 @@ - Report invalid spans with appropriate outcome reason. ([#4051](https://github.com/getsentry/relay/pull/4051)) - Use the duration reported by the profiler instead of the transaction. ([#4058](https://github.com/getsentry/relay/pull/4058)) - Incorrect pattern matches involving adjacent any and wildcard matchers. ([#4072](https://github.com/getsentry/relay/pull/4072)) -- Accept incoming requests even if there was an error fetching their project config. ([#4140](https://github.com/getsentry/relay/pull/4140)) **Features:** diff --git a/relay-server/src/services/projects/cache.rs b/relay-server/src/services/projects/cache.rs index b49d62996d..6bab22f493 100644 --- a/relay-server/src/services/projects/cache.rs +++ b/relay-server/src/services/projects/cache.rs @@ -794,9 +794,12 @@ impl ProjectCacheBroker { .unwrap_or_else(|e| { relay_log::error!( error = &e as &dyn Error, + tags.project_key = %project_key, "Failed to fetch project from source" ); - ProjectFetchState::pending() + // TODO: change this to ProjectFetchState::pending() once we consider it safe to do so. + // see https://github.com/getsentry/relay/pull/4140. + ProjectFetchState::disabled() }); let message = UpdateProjectState { diff --git a/tests/integration/test_query.py b/tests/integration/test_query.py index 48556e9896..a934319c2c 100644 --- a/tests/integration/test_query.py +++ b/tests/integration/test_query.py @@ -154,12 +154,17 @@ def get_project_config(): mini_sentry.clear_test_failures() -def test_query_retry_maxed_out(mini_sentry, relay): +def test_query_retry_maxed_out(mini_sentry, relay_with_processing, events_consumer): """ Assert that a query is not retried an infinite amount of times. + + This is not specific to processing or store, but here we have the outcomes + consumer which we can use to assert that an event has been dropped. """ request_count = 0 + events_consumer = events_consumer() + original_get_project_config = mini_sentry.app.view_functions["get_project_config"] @mini_sentry.app.endpoint("get_project_config") @@ -179,7 +184,9 @@ def get_project_config(): for retry in range(RETRIES): # 1 retry query_timeout += 1 * 1.5 ** (retry + 1) - relay = relay(mini_sentry, {"limits": {"query_timeout": math.ceil(query_timeout)}}) + relay = relay_with_processing( + {"limits": {"query_timeout": math.ceil(query_timeout)}} + ) # No error messages yet assert mini_sentry.test_failures.empty() @@ -192,12 +199,6 @@ def get_project_config(): assert {str(e) for _, e in mini_sentry.current_test_failures()} == { "Relay sent us event: error fetching project states: upstream request returned error 500 Internal Server Error: no error details", } - - time.sleep(1) # Wait for project to be cached - - # Relay still accepts events for this project - next_response = relay.send_event(42) - assert "id" in next_response finally: mini_sentry.clear_test_failures()