Skip to content

Commit b4e423d

Browse files
committed
Add unit of measurement to base_delay and max_delay. Expand the retry_reading test
1 parent 6902ab0 commit b4e423d

File tree

2 files changed

+39
-25
lines changed

2 files changed

+39
-25
lines changed

tests/serialize/runstate/dynamodb_state_store_test.py

Lines changed: 36 additions & 22 deletions
Original file line numberDiff line numberDiff line change
@@ -8,7 +8,9 @@
88
from moto.dynamodb2.responses import dynamo_json_dump
99

1010
from testifycompat import assert_equal
11+
from testifycompat.assertions import assert_in
1112
from tron.serialize.runstate.dynamodb_state_store import DynamoDBStateStore
13+
from tron.serialize.runstate.dynamodb_state_store import MAX_UNPROCESSED_KEYS_RETRIES
1214

1315

1416
def mock_transact_write_items(self):
@@ -294,7 +296,8 @@ def test_delete_item_with_json_partitions(self, store, small_object, large_objec
294296
vals = store.restore([key])
295297
assert key not in vals
296298

297-
def test_retry_saving(self, store, small_object, large_object):
299+
@mock.patch("time.sleep", return_value=None)
300+
def test_retry_saving(self, mock_sleep, store, small_object, large_object):
298301
with mock.patch(
299302
"moto.dynamodb2.responses.DynamoHandler.transact_write_items",
300303
side_effect=KeyError("foo"),
@@ -307,45 +310,56 @@ def test_retry_saving(self, store, small_object, large_object):
307310
except Exception:
308311
assert_equal(mock_failed_write.call_count, 3)
309312

310-
def test_retry_reading(self, store, small_object, large_object):
313+
@mock.patch("time.sleep")
314+
@mock.patch("random.uniform")
315+
def test_retry_reading(self, mock_random_uniform, mock_sleep, store, small_object, large_object):
311316
unprocessed_value = {
312-
"Responses": {
313-
store.name: [
314-
{
315-
"index": {"N": "0"},
316-
"key": {"S": "job_state 0"},
317-
},
318-
],
319-
},
317+
"Responses": {},
320318
"UnprocessedKeys": {
321319
store.name: {
322-
"ConsistentRead": True,
323320
"Keys": [
324321
{
325-
"index": {"N": "0"},
326322
"key": {"S": "job_state 0"},
323+
"index": {"N": "0"},
327324
}
328325
],
329-
},
326+
"ConsistentRead": True,
327+
}
330328
},
331-
"ResponseMetadata": {},
332329
}
333330
keys = [store.build_key("job_state", i) for i in range(1)]
334331
value = small_object
335-
pairs = zip(keys, (value for i in range(len(keys))))
332+
pairs = zip(keys, [value] * len(keys))
336333
store.save(pairs)
334+
store._consume_save_queue()
335+
336+
# Mock random.uniform to return the upper limit of the range so that we are simulating max jitter
337+
def side_effect_random_uniform(a, b):
338+
return b
339+
340+
mock_random_uniform.side_effect = side_effect_random_uniform
341+
337342
with mock.patch.object(
338343
store.client,
339344
"batch_get_item",
340345
return_value=unprocessed_value,
341346
) as mock_failed_read:
342-
try:
343-
with mock.patch("tron.config.static_config.load_yaml_file", autospec=True), mock.patch(
344-
"tron.config.static_config.build_configuration_watcher", autospec=True
345-
):
346-
store.restore(keys)
347-
except Exception:
348-
assert_equal(mock_failed_read.call_count, 10)
347+
with pytest.raises(Exception) as exec_info, mock.patch(
348+
"tron.config.static_config.load_yaml_file", autospec=True
349+
), mock.patch("tron.config.static_config.build_configuration_watcher", autospec=True):
350+
store.restore(keys)
351+
assert_in("failed to retrieve items with keys", str(exec_info.value))
352+
assert_equal(mock_failed_read.call_count, MAX_UNPROCESSED_KEYS_RETRIES)
353+
354+
# We also need to verify that sleep was called with expected delays
355+
expected_delays = []
356+
base_delay_seconds = 0.5
357+
max_delay_seconds = 10
358+
for attempt in range(1, MAX_UNPROCESSED_KEYS_RETRIES + 1):
359+
expected_delay = min(base_delay_seconds * (2 ** (attempt - 1)), max_delay_seconds)
360+
expected_delays.append(expected_delay)
361+
actual_delays = [call.args[0] for call in mock_sleep.call_args_list]
362+
assert_equal(actual_delays, expected_delays)
349363

350364
def test_restore_exception_propagation(self, store, small_object):
351365
# This test is to ensure that restore propagates exceptions upwards: see DAR-2328

tron/serialize/runstate/dynamodb_state_store.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -107,8 +107,8 @@ def _get_items(self, table_keys: list) -> object:
107107
# let's avoid potentially mutating our input :)
108108
cand_keys_list = copy.copy(table_keys)
109109
attempts = 0
110-
base_delay = 0.5
111-
max_delay = 10
110+
base_delay_seconds = 0.5
111+
max_delay_seconds = 10
112112

113113
while len(cand_keys_list) != 0 and attempts < MAX_UNPROCESSED_KEYS_RETRIES:
114114
with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
@@ -141,7 +141,7 @@ def _get_items(self, table_keys: list) -> object:
141141
if cand_keys_list:
142142
attempts += 1
143143
# Exponential backoff for retrying unprocessed keys
144-
exponential_delay = min(base_delay * (2 ** (attempts - 1)), max_delay)
144+
exponential_delay = min(base_delay_seconds * (2 ** (attempts - 1)), max_delay_seconds)
145145
# Full jitter (i.e. from 0 to exponential_delay) will help minimize the number and length of calls
146146
jitter = random.uniform(0, exponential_delay)
147147
delay = jitter

0 commit comments

Comments
 (0)