From e2eef0e2998fcbef65b2d96fd9a7d00eb509db2a Mon Sep 17 00:00:00 2001 From: Vasily Shamporov Date: Fri, 20 Dec 2024 15:11:56 +0100 Subject: [PATCH] Fix max seq len --- src/cpp/src/continuous_batching_impl.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/cpp/src/continuous_batching_impl.cpp b/src/cpp/src/continuous_batching_impl.cpp index 63109a7fe5..c016f2611f 100644 --- a/src/cpp/src/continuous_batching_impl.cpp +++ b/src/cpp/src/continuous_batching_impl.cpp @@ -96,7 +96,7 @@ void ContinuousBatchingPipeline::ContinuousBatchingImpl::init( m_rotation_deltas_stores.push_back(store); } - size_t max_sequence_cache_occupation_length_in_blocks = scheduler_config.max_num_batched_tokens + 1; + size_t max_sequence_cache_occupation_length_in_blocks = (scheduler_config.max_num_batched_tokens) / m_scheduler->get_block_size() + 1; size_t embedding_size = device_config.get_head_size(); m_cache_rotation_calculator = std::make_shared( m_scheduler->get_block_size(),