Introduce a searcher config option to timeout get requests. (#5467)

* Introduce a searcher config option to timeout get requests. This is done at a storage instead of the s3 client as the s3 client only had a global operation timeout parameter. Here the timeout is proportional to the length (in byte) of the slice being downloaded. By default, if the searcher config does not define a timeout explicitly, this PR is no-op. Closes #5466 * Apply suggestions from code review Co-authored-by: trinity-1686a <trinity@quickwit.io> --------- Co-authored-by: trinity-1686a <trinity@quickwit.io>
quickwit-oss · Oct 9, 2024 · cc7a97f · cc7a97f
1 parent 184286b
commit cc7a97f
Show file tree

Hide file tree

Showing 10 changed files with 389 additions and 7 deletions.
diff --git a/quickwit/quickwit-config/resources/tests/node_config/quickwit.json b/quickwit/quickwit-config/resources/tests/node_config/quickwit.json
@@ -64,7 +64,12 @@
         "fast_field_cache_capacity": "10G",
         "split_footer_cache_capacity": "1G",
         "max_num_concurrent_split_streams": 120,
-        "max_num_concurrent_split_searches": 150
+        "max_num_concurrent_split_searches": 150,
+        "storage_timeout_policy": {
+            "min_throughtput_bytes_per_secs": 100000,
+            "timeout_millis": 2000,
+            "max_num_retries": 2
+        }
     },
     "jaeger": {
         "enable_endpoint": true,

diff --git a/quickwit/quickwit-config/resources/tests/node_config/quickwit.toml b/quickwit/quickwit-config/resources/tests/node_config/quickwit.toml
@@ -56,6 +56,11 @@ split_footer_cache_capacity = "1G"
 max_num_concurrent_split_streams = 120
 max_num_concurrent_split_searches = 150
 
+[searcher.storage_timeout_policy]
+min_throughtput_bytes_per_secs = 100000
+timeout_millis = 2000
+max_num_retries = 2
+
 [jaeger]
 enable_endpoint = true
 lookback_period_hours = 24

diff --git a/quickwit/quickwit-config/resources/tests/node_config/quickwit.yaml b/quickwit/quickwit-config/resources/tests/node_config/quickwit.yaml
@@ -59,6 +59,10 @@ searcher:
   split_footer_cache_capacity: 1G
   max_num_concurrent_split_streams: 120
   max_num_concurrent_split_searches: 150
+  storage_timeout_policy:
+    min_throughtput_bytes_per_secs: 100000
+    timeout_millis: 2000
+    max_num_retries: 2
 
 jaeger:
   enable_endpoint: true

diff --git a/quickwit/quickwit-config/src/lib.rs b/quickwit/quickwit-config/src/lib.rs
@@ -75,7 +75,7 @@ pub use crate::metastore_config::{
 };
 pub use crate::node_config::{
     IndexerConfig, IngestApiConfig, JaegerConfig, NodeConfig, SearcherConfig, SplitCacheLimits,
-    DEFAULT_QW_CONFIG_PATH,
+    StorageTimeoutPolicy, DEFAULT_QW_CONFIG_PATH,
 };
 use crate::source_config::serialize::{SourceConfigV0_7, SourceConfigV0_8, VersionedSourceConfig};
 pub use crate::storage_config::{

diff --git a/quickwit/quickwit-config/src/node_config/mod.rs b/quickwit/quickwit-config/src/node_config/mod.rs
@@ -223,6 +223,42 @@ pub struct SearcherConfig {
     pub split_cache: Option<SplitCacheLimits>,
     #[serde(default = "SearcherConfig::default_request_timeout_secs")]
     request_timeout_secs: NonZeroU64,
+    #[serde(default)]
+    #[serde(skip_serializing_if = "Option::is_none")]
+    pub storage_timeout_policy: Option<StorageTimeoutPolicy>,
+}
+
+/// Configuration controlling how fast a searcher should timeout a `get_slice`
+/// request to retry it.
+///
+/// [Amazon's best practise](https://docs.aws.amazon.com/whitepapers/latest/s3-optimizing-performance-best-practices/timeouts-and-retries-for-latency-sensitive-applications.html)
+/// suggests that to ensure low latency, it is best to:
+/// - retry small GET request after 2s
+/// - retry large GET request when the throughput is below some percentile.
+///
+/// This policy is inspired by this guidance. It does not track instanteneous throughput, but
+/// computes an overall timeout using the following formula:
+/// `timeout_offset + num_bytes_get_request / min_throughtput`
+#[derive(Clone, Debug, Eq, PartialEq, Serialize, Deserialize)]
+pub struct StorageTimeoutPolicy {
+    pub min_throughtput_bytes_per_secs: u64,
+    pub timeout_millis: u64,
+    // Disclaimer: this is a number of retry, so the overall max number of
+    // attempts is `max_num_retries + 1``.
+    pub max_num_retries: usize,
+}
+
+impl StorageTimeoutPolicy {
+    pub fn compute_timeout(&self, num_bytes: usize) -> impl Iterator<Item = Duration> {
+        let min_download_time_secs: f64 = if self.min_throughtput_bytes_per_secs == 0 {
+            0.0f64
+        } else {
+            num_bytes as f64 / self.min_throughtput_bytes_per_secs as f64
+        };
+        let timeout = Duration::from_millis(self.timeout_millis)
+            + Duration::from_secs_f64(min_download_time_secs);
+        std::iter::repeat(timeout).take(self.max_num_retries + 1)
+    }
 }
 
 impl Default for SearcherConfig {
@@ -237,6 +273,7 @@ impl Default for SearcherConfig {
             aggregation_bucket_limit: 65000,
             split_cache: None,
             request_timeout_secs: Self::default_request_timeout_secs(),
+            storage_timeout_policy: None,
         }
     }
 }

diff --git a/quickwit/quickwit-config/src/node_config/serialize.rs b/quickwit/quickwit-config/src/node_config/serialize.rs
@@ -612,6 +612,11 @@ mod tests {
                 max_num_concurrent_split_streams: 120,
                 split_cache: None,
                 request_timeout_secs: NonZeroU64::new(30).unwrap(),
+                storage_timeout_policy: Some(crate::StorageTimeoutPolicy {
+                    min_throughtput_bytes_per_secs: 100_000,
+                    timeout_millis: 2_000,
+                    max_num_retries: 2
+                })
             }
         );
         assert_eq!(

diff --git a/quickwit/quickwit-search/src/leaf.rs b/quickwit/quickwit-search/src/leaf.rs
@@ -36,7 +36,7 @@ use quickwit_query::query_ast::{BoolQuery, QueryAst, QueryAstTransformer, RangeQ
 use quickwit_query::tokenizers::TokenizerManager;
 use quickwit_storage::{
     wrap_storage_with_cache, BundleStorage, MemorySizedCache, OwnedBytes, SplitCache, Storage,
-    StorageResolver,
+    StorageResolver, TimeoutAndRetryStorage,
 };
 use tantivy::aggregation::agg_req::{AggregationVariants, Aggregations};
 use tantivy::aggregation::AggregationLimitsGuard;
@@ -135,13 +135,34 @@ pub(crate) async fn open_index_with_caches(
     tokenizer_manager: Option<&TokenizerManager>,
     ephemeral_unbounded_cache: bool,
 ) -> anyhow::Result<Index> {
-    let (hotcache_bytes, bundle_storage) =
-        open_split_bundle(searcher_context, index_storage, split_and_footer_offsets).await?;
+    // Let's add a storage proxy to retry `get_slice` requests if they are taking too long,
+    // if configured in the searcher config.
+    //
+    // The goal here is too ensure a low latency.
+
+    let index_storage_with_retry_on_timeout = if let Some(storage_timeout_policy) =
+        &searcher_context.searcher_config.storage_timeout_policy
+    {
+        Arc::new(TimeoutAndRetryStorage::new(
+            index_storage,
+            storage_timeout_policy.clone(),
+        ))
+    } else {
+        index_storage
+    };
+
+    let (hotcache_bytes, bundle_storage) = open_split_bundle(
+        searcher_context,
+        index_storage_with_retry_on_timeout,
+        split_and_footer_offsets,
+    )
+    .await?;
 
     let bundle_storage_with_cache = wrap_storage_with_cache(
         searcher_context.fast_fields_cache.clone(),
         Arc::new(bundle_storage),
     );
+
     let directory = StorageDirectory::new(bundle_storage_with_cache);
 
     let hot_directory = if ephemeral_unbounded_cache {

diff --git a/quickwit/quickwit-storage/src/lib.rs b/quickwit/quickwit-storage/src/lib.rs
@@ -35,6 +35,7 @@ mod debouncer;
 mod file_descriptor_cache;
 mod metrics;
 mod storage;
+mod timeout_and_retry_storage;
 pub use debouncer::AsyncDebouncer;
 pub(crate) use debouncer::DebouncedStorage;
 
@@ -92,6 +93,7 @@ pub use self::test_suite::{
     storage_test_multi_part_upload, storage_test_single_part_upload, storage_test_suite,
     test_write_and_bulk_delete,
 };
+pub use self::timeout_and_retry_storage::TimeoutAndRetryStorage;
 pub use crate::error::{
     BulkDeleteError, DeleteFailure, StorageError, StorageErrorKind, StorageResolverError,
     StorageResult,

diff --git a/quickwit/quickwit-storage/src/metrics.rs b/quickwit/quickwit-storage/src/metrics.rs
@@ -21,7 +21,7 @@
 
 use once_cell::sync::Lazy;
 use quickwit_common::metrics::{
-    new_counter, new_counter_with_labels, new_gauge, IntCounter, IntGauge,
+    new_counter, new_counter_vec, new_counter_with_labels, new_gauge, IntCounter, IntGauge,
 };
 
 /// Counters associated to storage operations.
@@ -32,6 +32,8 @@ pub struct StorageMetrics {
     pub fast_field_cache: CacheMetrics,
     pub split_footer_cache: CacheMetrics,
     pub searcher_split_cache: CacheMetrics,
+    pub get_slice_timeout_successes: [IntCounter; 3],
+    pub get_slice_timeout_all_timeouts: IntCounter,
     pub object_storage_get_total: IntCounter,
     pub object_storage_put_total: IntCounter,
     pub object_storage_put_parts: IntCounter,
@@ -41,14 +43,30 @@ pub struct StorageMetrics {
 
 impl Default for StorageMetrics {
     fn default() -> Self {
+        let get_slice_timeout_outcome_total_vec = new_counter_vec(
+            "get_slice_timeout_outcome",
+            "Outcome of get_slice operations. success_after_1_timeout means the operation \
+             succeeded after a retry caused by a timeout.",
+            "storage",
+            &[],
+            ["outcome"],
+        );
+        let get_slice_timeout_successes = [
+            get_slice_timeout_outcome_total_vec.with_label_values(["success_after_0_timeout"]),
+            get_slice_timeout_outcome_total_vec.with_label_values(["success_after_1_timeout"]),
+            get_slice_timeout_outcome_total_vec.with_label_values(["success_after_2+_timeout"]),
+        ];
+        let get_slice_timeout_all_timeouts =
+            get_slice_timeout_outcome_total_vec.with_label_values(["all_timeouts"]);
         StorageMetrics {
             fast_field_cache: CacheMetrics::for_component("fastfields"),
             fd_cache_metrics: CacheMetrics::for_component("fd"),
             partial_request_cache: CacheMetrics::for_component("partial_request"),
             searcher_split_cache: CacheMetrics::for_component("searcher_split"),
             shortlived_cache: CacheMetrics::for_component("shortlived"),
             split_footer_cache: CacheMetrics::for_component("splitfooter"),
-
+            get_slice_timeout_successes,
+            get_slice_timeout_all_timeouts,
             object_storage_get_total: new_counter(
                 "object_storage_gets_total",
                 "Number of objects fetched.",