Skip to content

Commit

Permalink
Static llm pipeline: stateful model (#1240)
Browse files Browse the repository at this point in the history
Related PRs:
- OpenVINO: *openvinotoolkit/openvino#27651
- OpenVINO Unroll SPDA:
*openvinotoolkit/openvino#27891
- OpenVINO Prefill/Generate sections:
*openvinotoolkit/openvino#28154
- OpenVINO LLMCompiledModel refactoring:
openvinotoolkit/openvino#28267

---------

Co-authored-by: Anatoliy Talamanov <anatoliy.talamanov@intel.com>
Co-authored-by: Ilya Lavrenov <ilya.lavrenov@intel.com>
  • Loading branch information
3 people authored Jan 6, 2025
1 parent 9a27715 commit db71b36
Show file tree
Hide file tree
Showing 3 changed files with 402 additions and 23 deletions.
12 changes: 6 additions & 6 deletions src/cpp/src/llm_pipeline.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,13 +20,13 @@ namespace {

/*
* NPU reads some properties from the config file, but when LLMPipeline is initialized
* from the model_str and weights_tensor, there are not files.
* from the model_str and weights_tensor, there are no files.
* In the later case ModelDesc is stored in properties.
* This function pops ModelDescr from the the properties and returns a pair of updated properties and ModelDescr.
*/
std::pair<ov::AnyMap, ov::genai::ModelConfigDesc> split_model_descr(const ov::AnyMap& properties) {
std::pair<ov::AnyMap, ov::genai::static_llm::ModelConfigDesc> split_model_descr(const ov::AnyMap& properties) {
ov::AnyMap main_properties = properties;
ov::genai::ModelConfigDesc model_descr;
ov::genai::static_llm::ModelConfigDesc model_descr;

auto pop_property = [](ov::AnyMap& orig_propertis, const std::string& key, auto& value) {
if (orig_propertis.find(key) != orig_propertis.end()) {
Expand Down Expand Up @@ -105,7 +105,7 @@ ov::genai::LLMPipeline::LLMPipeline(
auto [plugin_config, scheduler_config] = utils::split_scheduler_config(properties);
m_pimpl = std::make_unique<ContinuousBatchingAdapter>(models_path, tokenizer, scheduler_config, device, plugin_config);
} else if (device == "NPU") {
m_pimpl = std::make_unique<StaticLLMPipeline>(models_path, tokenizer, device, properties);
m_pimpl = static_llm::LLMPipelineFactory::create(models_path, tokenizer, device, properties);
} else {
m_pimpl = std::make_unique<StatefulLLMPipeline>(models_path, tokenizer, device, properties);
}
Expand All @@ -124,7 +124,7 @@ ov::genai::LLMPipeline::LLMPipeline(
auto [device_properties, scheduler_config] = utils::split_scheduler_config(properties);
m_pimpl = std::make_unique<ContinuousBatchingAdapter>(models_path, scheduler_config, device, device_properties);
} else if (device == "NPU") {
m_pimpl = std::make_unique<StaticLLMPipeline>(models_path, device, properties);
m_pimpl = static_llm::LLMPipelineFactory::create(models_path, device, properties);
} else {
m_pimpl = std::make_unique<StatefulLLMPipeline>(models_path, device, properties);
}
Expand Down Expand Up @@ -162,7 +162,7 @@ ov::genai::LLMPipeline::LLMPipeline(
// This will convert from AnyMap to ModelDesc.
auto [filtered_properties, model_descr] = split_model_descr(properties);

m_pimpl = std::make_unique<StaticLLMPipeline>(
m_pimpl = static_llm::LLMPipelineFactory::create(
utils::singleton_core().read_model(model_str, weights_tensor),
model_descr,
tokenizer,
Expand Down
Loading

0 comments on commit db71b36

Please sign in to comment.