From 6f582fe21883ee8b59df19b46c45548b9aac5674 Mon Sep 17 00:00:00 2001 From: Filip Lazovic Date: Wed, 6 Sep 2023 16:29:12 +0200 Subject: [PATCH 1/5] [OCS-3013] Add retry limit for socket timeout --- lib/resty/cassandra/cluster.lua | 5 ++++- lib/resty/cassandra/policies/retry/init.lua | 1 + lib/resty/cassandra/policies/retry/simple.lua | 13 ++++++++++++- 3 files changed, 17 insertions(+), 2 deletions(-) diff --git a/lib/resty/cassandra/cluster.lua b/lib/resty/cassandra/cluster.lua index 32c1297..fbe8feb 100644 --- a/lib/resty/cassandra/cluster.lua +++ b/lib/resty/cassandra/cluster.lua @@ -920,7 +920,10 @@ local function handle_error(self, err, cql_code, coordinator, request) end elseif err == 'timeout' then if self.retry_on_timeout then - return self:send_retry(request, 'timeout') + local should_retry = self.retry_policy:on_socket_timeout(request) + if should_retry then + return self:send_retry(request, 'timeout') + end end else -- host seems down? diff --git a/lib/resty/cassandra/policies/retry/init.lua b/lib/resty/cassandra/policies/retry/init.lua index c143868..45bb2b5 100644 --- a/lib/resty/cassandra/policies/retry/init.lua +++ b/lib/resty/cassandra/policies/retry/init.lua @@ -6,6 +6,7 @@ function _M.new_policy(name) on_unavailable = function() error('on_unavailable() not implemented') end, on_read_timeout = function() error('on_read_timeout() not implemented') end, on_write_timeout = function() error('on_write_timeout() not implemented') end, + on_socket_timeout = function() error('on_socket_timeout() not implemented') end, } retry_mt.__index = retry_mt diff --git a/lib/resty/cassandra/policies/retry/simple.lua b/lib/resty/cassandra/policies/retry/simple.lua index 185afa8..2e2e740 100644 --- a/lib/resty/cassandra/policies/retry/simple.lua +++ b/lib/resty/cassandra/policies/retry/simple.lua @@ -24,14 +24,21 @@ local type = type -- -- @param[type=number] max_retries Maximum number of retries for a query -- before aborting and reporting the error. +-- @param[type=number] max_retries_socket_timeout Maximum number of retries when connection timeout happens +-- before aborting and reporting the error. -- @treturn table `policy`: A simple retry policy. -function _M.new(max_retries) +function _M.new(max_retries, max_retries_socket_timeout) if type(max_retries) ~= 'number' or max_retries < 1 then error('arg #1 max_retries must be a positive integer', 2) end + if max_retries_socket_timeout and type(max_retries_socket_timeout) ~= 'number' or max_retries < 1 then + error('arg #2 socket_timeout_max_retries must be a positive integer', 2) + end + local self = _M.super.new() self.max_retries = max_retries + self.max_retries_socket_timeout = max_retries_socket_timeout return self end @@ -47,4 +54,8 @@ function _M:on_write_timeout(request) return request.retries < self.max_retries end +function _M:on_socket_timeout(request) + return not self.max_retries_socket_timeout or request.retries < self.max_retries_socket_timeout +end + return _M From 43130760d93046c0de50b0de56312d73c5a063fd Mon Sep 17 00:00:00 2001 From: Filip Lazovic Date: Wed, 6 Sep 2023 17:14:00 +0200 Subject: [PATCH 2/5] rename from socket to connect --- lib/resty/cassandra/cluster.lua | 2 +- lib/resty/cassandra/policies/retry/init.lua | 2 +- lib/resty/cassandra/policies/retry/simple.lua | 12 ++++++------ 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/lib/resty/cassandra/cluster.lua b/lib/resty/cassandra/cluster.lua index fbe8feb..d889ca7 100644 --- a/lib/resty/cassandra/cluster.lua +++ b/lib/resty/cassandra/cluster.lua @@ -920,7 +920,7 @@ local function handle_error(self, err, cql_code, coordinator, request) end elseif err == 'timeout' then if self.retry_on_timeout then - local should_retry = self.retry_policy:on_socket_timeout(request) + local should_retry = self.retry_policy:on_connect_timeout(request) if should_retry then return self:send_retry(request, 'timeout') end diff --git a/lib/resty/cassandra/policies/retry/init.lua b/lib/resty/cassandra/policies/retry/init.lua index 45bb2b5..522f816 100644 --- a/lib/resty/cassandra/policies/retry/init.lua +++ b/lib/resty/cassandra/policies/retry/init.lua @@ -6,7 +6,7 @@ function _M.new_policy(name) on_unavailable = function() error('on_unavailable() not implemented') end, on_read_timeout = function() error('on_read_timeout() not implemented') end, on_write_timeout = function() error('on_write_timeout() not implemented') end, - on_socket_timeout = function() error('on_socket_timeout() not implemented') end, + on_connect_timeout = function() error('on_connect_timeout() not implemented') end, } retry_mt.__index = retry_mt diff --git a/lib/resty/cassandra/policies/retry/simple.lua b/lib/resty/cassandra/policies/retry/simple.lua index 2e2e740..305c6d7 100644 --- a/lib/resty/cassandra/policies/retry/simple.lua +++ b/lib/resty/cassandra/policies/retry/simple.lua @@ -24,21 +24,21 @@ local type = type -- -- @param[type=number] max_retries Maximum number of retries for a query -- before aborting and reporting the error. --- @param[type=number] max_retries_socket_timeout Maximum number of retries when connection timeout happens +-- @param[type=number] max_retries_timeout_connect Maximum number of retries when connection timeout happens -- before aborting and reporting the error. -- @treturn table `policy`: A simple retry policy. -function _M.new(max_retries, max_retries_socket_timeout) +function _M.new(max_retries, max_retries_timeout_connect) if type(max_retries) ~= 'number' or max_retries < 1 then error('arg #1 max_retries must be a positive integer', 2) end - if max_retries_socket_timeout and type(max_retries_socket_timeout) ~= 'number' or max_retries < 1 then + if max_retries_timeout_connect and type(max_retries_timeout_connect) ~= 'number' or max_retries < 1 then error('arg #2 socket_timeout_max_retries must be a positive integer', 2) end local self = _M.super.new() self.max_retries = max_retries - self.max_retries_socket_timeout = max_retries_socket_timeout + self.max_retries_timeout_connect = max_retries_timeout_connect return self end @@ -54,8 +54,8 @@ function _M:on_write_timeout(request) return request.retries < self.max_retries end -function _M:on_socket_timeout(request) - return not self.max_retries_socket_timeout or request.retries < self.max_retries_socket_timeout +function _M:on_connect_timeout(request) + return not self.max_retries_timeout_connect or request.retries < self.max_retries_timeout_connect end return _M From ccd23a3744afe53b1bac22971146f9a32af46eb7 Mon Sep 17 00:00:00 2001 From: Filip Lazovic Date: Thu, 7 Sep 2023 18:24:48 +0200 Subject: [PATCH 3/5] use max_retries for connect_timeout --- lib/resty/cassandra/policies/retry/simple.lua | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) diff --git a/lib/resty/cassandra/policies/retry/simple.lua b/lib/resty/cassandra/policies/retry/simple.lua index 305c6d7..57c671a 100644 --- a/lib/resty/cassandra/policies/retry/simple.lua +++ b/lib/resty/cassandra/policies/retry/simple.lua @@ -24,21 +24,14 @@ local type = type -- -- @param[type=number] max_retries Maximum number of retries for a query -- before aborting and reporting the error. --- @param[type=number] max_retries_timeout_connect Maximum number of retries when connection timeout happens --- before aborting and reporting the error. -- @treturn table `policy`: A simple retry policy. -function _M.new(max_retries, max_retries_timeout_connect) +function _M.new(max_retries) if type(max_retries) ~= 'number' or max_retries < 1 then error('arg #1 max_retries must be a positive integer', 2) end - if max_retries_timeout_connect and type(max_retries_timeout_connect) ~= 'number' or max_retries < 1 then - error('arg #2 socket_timeout_max_retries must be a positive integer', 2) - end - local self = _M.super.new() self.max_retries = max_retries - self.max_retries_timeout_connect = max_retries_timeout_connect return self end @@ -55,7 +48,7 @@ function _M:on_write_timeout(request) end function _M:on_connect_timeout(request) - return not self.max_retries_timeout_connect or request.retries < self.max_retries_timeout_connect + return request.retries < self.max_retries end return _M From 7746be512a62679fc9f4aa64af8df9245185425c Mon Sep 17 00:00:00 2001 From: Filip Lazovic Date: Fri, 8 Sep 2023 18:27:38 +0200 Subject: [PATCH 4/5] add logs for tests --- lib/resty/cassandra/cluster.lua | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/resty/cassandra/cluster.lua b/lib/resty/cassandra/cluster.lua index d889ca7..79e18e6 100644 --- a/lib/resty/cassandra/cluster.lua +++ b/lib/resty/cassandra/cluster.lua @@ -922,7 +922,10 @@ local function handle_error(self, err, cql_code, coordinator, request) if self.retry_on_timeout then local should_retry = self.retry_policy:on_connect_timeout(request) if should_retry then + log(ERR, _log_prefix, 'sending retry request, retry count - ', request.retries) return self:send_retry(request, 'timeout') + else + log(ERR, _log_prefix, 'Retries stopped', request.retries) end end else From 42132f5c0cbe832abe9b16129384234098b35015 Mon Sep 17 00:00:00 2001 From: Filip Lazovic Date: Tue, 12 Sep 2023 13:04:33 +0200 Subject: [PATCH 5/5] remove logs --- lib/resty/cassandra/cluster.lua | 3 --- 1 file changed, 3 deletions(-) diff --git a/lib/resty/cassandra/cluster.lua b/lib/resty/cassandra/cluster.lua index 9febf63..e11ac7a 100644 --- a/lib/resty/cassandra/cluster.lua +++ b/lib/resty/cassandra/cluster.lua @@ -956,10 +956,7 @@ local function handle_error(self, err, cql_code, coordinator, request) if self.retry_on_timeout then local should_retry = self.retry_policy:on_connect_timeout(request) if should_retry then - log(ERR, _log_prefix, 'sending retry request, retry count - ', request.retries) return self:send_retry(request, 'timeout') - else - log(ERR, _log_prefix, 'Retries stopped', request.retries) end end else