Skip to content

Commit 66f9d62

Browse files
authored
Simplified partial preemption algorithm. (#730)
- Simplified partial preemption algorithm for groups with multiple sequences. - Removed dividing into single sequence and multiple sequence path.
1 parent 97a05e1 commit 66f9d62

File tree

3 files changed

+12
-66
lines changed

3 files changed

+12
-66
lines changed

src/cpp/src/block_manager.hpp

Lines changed: 5 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -251,52 +251,17 @@ class BlockManager {
251251
return m_block_table[seq_id];
252252
}
253253

254-
const size_t free_rightest_blocks(SequenceGroup::Ptr sequence_group) {
255-
size_t blocks_released = 0;
254+
const size_t free_group_partially(SequenceGroup::Ptr sequence_group, size_t num_required_blocks) {
255+
size_t blocks_num = std::ceil(num_required_blocks / sequence_group->get_not_finished_sequences().size());
256256
auto running_sequences = sequence_group->get_not_finished_sequences();
257257
std::set<size_t> blocks_released_indices;
258258
for (size_t idx = 0; idx < running_sequences.size(); ++idx) {
259259
auto seq_id = running_sequences[idx]->get_id();
260260
OPENVINO_ASSERT(m_block_table.count(seq_id) > 0, "Invalid sequence group.");
261261
auto block_table = m_block_table[seq_id];
262-
if (free_last_block(seq_id)) {
263-
blocks_released++;
264-
}
265-
}
266-
return blocks_released;
267-
}
268-
269-
const bool free_group_partially_multiple_runnning_sequence(SequenceGroup::Ptr sequence_group, size_t num_required_blocks, size_t& phisical_blocks_released, size_t& logical_blocks_released) {
270-
phisical_blocks_released = 0;
271-
logical_blocks_released = 0;
272-
while (num_required_blocks > phisical_blocks_released) {
273-
size_t released_count = free_rightest_blocks(sequence_group);
274-
logical_blocks_released += 1;
275-
if (get_number_of_blocks_occupied_by_sequence(sequence_group) == 0) {
276-
break;
277-
}
278-
phisical_blocks_released += released_count;
262+
free_sequence_partially(seq_id, blocks_num);
279263
}
280-
return num_required_blocks <= phisical_blocks_released;
281-
}
282-
283-
const bool free_group_partially_single_runnning_sequence(SequenceGroup::Ptr sequence_group, size_t num_required_blocks, size_t& phisical_blocks_released) {
284-
auto sequences = sequence_group->get_not_finished_sequences();
285-
OPENVINO_ASSERT(sequences.size() == 1);
286-
auto running_sequence = sequences[0];
287-
auto seq_id = running_sequence->get_id();
288-
if (!has_block_table(seq_id)) {
289-
// no blocks are allocated for this sequence, so it can't be preempted
290-
return false;
291-
}
292-
auto block_table = get_block_table(seq_id);
293-
auto prev_blocks_count = num_free_blocks();
294-
free_sequence_partially_single_runnning_sequence(seq_id, num_required_blocks);
295-
296-
// calculate the number of released blocks
297-
phisical_blocks_released = num_free_blocks() - prev_blocks_count;
298-
299-
return num_required_blocks <= phisical_blocks_released;
264+
return blocks_num;
300265
}
301266

302267
const size_t get_number_of_blocks_occupied_by_sequence(SequenceGroup::Ptr sequence_group) {
@@ -399,15 +364,13 @@ class BlockManager {
399364
return block_table[block_idx]->is_free();
400365
}
401366

402-
void free_sequence_partially_single_runnning_sequence(size_t seq_id, size_t block_num) {
403-
// this method is applicable only for groups with single sequences
367+
void free_sequence_partially(size_t seq_id, size_t block_num) {
404368

405369
auto block_table = m_block_table[seq_id];
406370
OPENVINO_ASSERT(block_table.size() >= block_num);
407371
for (size_t idx = 0; idx < block_num; idx++) {
408372
size_t block_idx = m_block_table[seq_id].size() - idx - 1;
409373
m_allocator.free(block_table[block_idx]);
410-
OPENVINO_ASSERT(block_table[block_idx]->is_free());
411374
}
412375
m_block_table[seq_id].resize(m_block_table[seq_id].size() - block_num);
413376

src/cpp/src/scheduler.hpp

Lines changed: 6 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -117,31 +117,14 @@ class Scheduler {
117117
return m_block_manager.num_free_blocks() > prev_blocks_count;
118118
}
119119

120-
if (num_running_sequences > 1) {
121-
size_t phisycal_blocks_released;
122-
size_t logical_blocks_released;
123-
m_block_manager.free_group_partially_multiple_runnning_sequence(sequence_group, blocks_needed, phisycal_blocks_released, logical_blocks_released);
124-
125-
// calculate the number of preempted tokens
126-
auto tokens_in_last_block = processed_tokens % block_size;
127-
if (tokens_in_last_block == 0) {
128-
tokens_in_last_block = block_size;
129-
}
130-
preempted_tokens = tokens_in_last_block + std::max<size_t>((int)logical_blocks_released - 1, 0) * block_size;
120+
size_t logical_blocks_released = m_block_manager.free_group_partially(sequence_group, blocks_needed);
131121

122+
// calculate the number of preempted tokens
123+
auto tokens_in_last_block = processed_tokens % block_size;
124+
if (tokens_in_last_block == 0) {
125+
tokens_in_last_block = block_size;
132126
}
133-
else {
134-
OPENVINO_ASSERT(num_running_sequences == 1);
135-
size_t phisycal_blocks_released;
136-
m_block_manager.free_group_partially_single_runnning_sequence(sequence_group, blocks_needed, phisycal_blocks_released);
137-
138-
// calculate the number of preempted tokens
139-
auto tokens_in_last_block = processed_tokens % block_size;
140-
if (tokens_in_last_block == 0) {
141-
tokens_in_last_block = block_size;
142-
}
143-
preempted_tokens = tokens_in_last_block + std::max<size_t>((int)phisycal_blocks_released - 1, 0) * block_size;
144-
}
127+
preempted_tokens = tokens_in_last_block + std::max<size_t>((int)logical_blocks_released - 1, 0) * block_size;
145128

146129
// case when preemption requires preempt prompt tokens
147130
if (!m_config.dynamic_split_fuse && processed_tokens - preempted_tokens < sequence_group->get_prompt_len()) {

tests/cpp/block_manager.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,7 @@ TEST(TestBlockManager, general_test) {
2626
EXPECT_EQ(bm.get_block_table(seq_id).size(), 6);
2727
EXPECT_EQ(bm.num_free_blocks(), 0);
2828

29-
bm.free_sequence_partially_single_runnning_sequence(seq_id, 4);
29+
bm.free_sequence_partially(seq_id, 4);
3030
EXPECT_EQ(bm.get_block_table(seq_id).size(), 2);
3131
EXPECT_EQ(bm.num_free_blocks(), 4);
3232

0 commit comments

Comments
 (0)