Skip to content

Commit 9b970f7

Browse files
authored
Allow last event to be garbage collected in ConcurrentExecution (#550)
[ML-8799](https://iguazio.atlassian.net/browse/ML-8799)
1 parent dc7f8c1 commit 9b970f7

File tree

2 files changed

+49
-1
lines changed

2 files changed

+49
-1
lines changed

storey/flow.py

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -840,9 +840,12 @@ def _init(self):
840840
self._lazy_init_complete = False
841841

842842
async def _worker(self):
843-
event = None
844843
try:
845844
while True:
845+
# Allow event to be garbage collected
846+
job = None # noqa
847+
event = None
848+
completed = None # noqa
846849
try:
847850
# If we don't handle the event before we remove it from the queue, the effective max_in_flight will
848851
# be 1 higher than requested. Hence, we peek.

tests/test_flow.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -71,6 +71,7 @@
7171
build_flow,
7272
)
7373
from storey.flow import (
74+
ConcurrentExecution,
7475
Context,
7576
ParallelExecution,
7677
ParallelExecutionRunnable,
@@ -360,6 +361,50 @@ def test_async_offset_commit_before_termination_with_nosqltarget():
360361
asyncio.run(async_offset_commit_before_termination_with_nosqltarget())
361362

362363

364+
async def async_offset_commit_before_termination_with_concurrent_execution():
365+
platform = Committer()
366+
context = CommitterContext(platform)
367+
368+
max_wait_before_commit = 1
369+
370+
controller = build_flow(
371+
[
372+
AsyncEmitSource(context=context, explicit_ack=True, max_wait_before_commit=max_wait_before_commit),
373+
ConcurrentExecution(event_processor=lambda x: x + 1),
374+
Filter(lambda x: x < 3),
375+
FlatMap(lambda x: [x, x * 10]),
376+
Reduce(0, lambda acc, x: acc + x),
377+
]
378+
).run()
379+
380+
num_shards = 10
381+
num_records_per_shard = 10
382+
383+
for offset in range(1, num_records_per_shard + 1):
384+
for shard in range(num_shards):
385+
event = Event(shard)
386+
event.shard_id = shard
387+
event.offset = offset
388+
await controller.emit(event)
389+
390+
del event
391+
392+
await asyncio.sleep(max_wait_before_commit + 1)
393+
394+
try:
395+
offsets = copy.copy(platform.offsets)
396+
assert offsets == {("/", i): num_records_per_shard for i in range(num_shards)}
397+
finally:
398+
await controller.terminate()
399+
termination_result = await controller.await_termination()
400+
assert termination_result == 330
401+
402+
403+
# ML-8799
404+
def test_async_offset_commit_before_termination_with_concurrent_execution():
405+
asyncio.run(async_offset_commit_before_termination_with_concurrent_execution())
406+
407+
363408
def test_offset_not_committed_prematurely():
364409
platform = Committer()
365410
context = CommitterContext(platform)

0 commit comments

Comments
 (0)