-
Notifications
You must be signed in to change notification settings - Fork 217
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Mark partition as busy when a new batch is sent to it #281
base: master
Are you sure you want to change the base?
Changes from 7 commits
174b162
2f7e3cd
d4aa8db
0b646e4
dacd2b8
f7d2cd0
64b61de
58ff5f4
d2ef60e
a848c73
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,5 +1,5 @@ | ||
from frontera.core.messagebus import BaseMessageBus, BaseSpiderLogStream, BaseStreamConsumer, \ | ||
BaseScoringLogStream, BaseSpiderFeedStream | ||
BaseScoringLogStream, BaseSpiderFeedStream, BaseStreamProducer | ||
|
||
|
||
class Consumer(BaseStreamConsumer): | ||
|
@@ -27,7 +27,7 @@ def get_offset(self, partition_id): | |
return self.offset | ||
|
||
|
||
class Producer(object): | ||
class Producer(BaseStreamProducer): | ||
|
||
def __init__(self): | ||
self.messages = [] | ||
|
@@ -70,23 +70,28 @@ def consumer(self, partition_id, type): | |
class SpiderFeedStream(BaseSpiderFeedStream): | ||
|
||
def __init__(self, messagebus): | ||
self.ready_partitions = set(messagebus.spider_feed_partitions) | ||
self._producer = Producer() | ||
self.max_next_requests = messagebus.max_next_requests | ||
self.partitions_offset = {} | ||
for partition_id in messagebus.spider_feed_partitions: | ||
self.partitions_offset[partition_id] = 0 | ||
|
||
def producer(self): | ||
return Producer() | ||
return self._producer | ||
|
||
def consumer(self, partition_id): | ||
return Consumer() | ||
|
||
def available_partitions(self): | ||
return self.ready_partitions | ||
|
||
def mark_ready(self, partition_id): | ||
self.ready_partitions.add(partition_id) | ||
|
||
def mark_busy(self, partition_id): | ||
self.ready_partitions.discard(partition_id) | ||
|
||
partitions = [] | ||
for partition_id, last_offset in self.partitions_offset.items(): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Here, if partition doesn't exist (yet) in this dict - it will not be returned as available, which is wrong. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. line 86 does create the keys for each partition. In the worst case, a new partition will first send an There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Ok! |
||
lag = self._producer.get_offset(partition_id) - last_offset | ||
if lag < self.max_next_requests or last_offset == 0: | ||
partitions.append(partition_id) | ||
return partitions | ||
|
||
def set_spider_offset(self, partition_id, offset): | ||
self.partitions_offset[partition_id] = offset | ||
|
||
class FakeMessageBus(BaseMessageBus): | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
this variable isn't defined
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Ha good catch, fixed