Skip to content

Commit 7b0b764

Browse files
Merge branch 'releases/2024/3' into ea/phi3_optimum
2 parents 1e32327 + 2a80828 commit 7b0b764

File tree

3 files changed

+19
-38
lines changed

3 files changed

+19
-38
lines changed

.github/workflows/genai_package.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -113,5 +113,6 @@ jobs:
113113
&& cmake --install "samples build" --config ${{ matrix.build-type }} --component samples_bin --prefix samples_install
114114
if: ${{ 'Release' != matrix.build-type }}
115115
- run: call ov\setupvars.bat && "${{ github.workspace }}/samples_install/samples_bin/greedy_causal_lm" .\TinyLlama-1.1B-Chat-v1.0\ ""
116+
if: ${{ 'Release' == matrix.build-type }} # Tokenizers don't work in debug
116117
- run: call ov\setupvars.bat && python .\ov\samples\python\multinomial_causal_lm\multinomial_causal_lm.py .\TinyLlama-1.1B-Chat-v1.0\ 0
117118
if: ${{ 'Release' == matrix.build-type }} # Python bindings can be built in Release only

Dockerfile

Lines changed: 0 additions & 38 deletions
This file was deleted.

src/cpp/src/llm_pipeline_static.cpp

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,23 @@
1212

1313
namespace {
1414

15+
void align_u4_zp_constants(const std::shared_ptr<ov::Model>& model) {
16+
for (auto op : model->get_ops()) {
17+
if (ov::op::util::is_constant(op)) {
18+
auto cst_op = std::dynamic_pointer_cast<ov::op::v0::Constant>(op);
19+
const auto cst_op_out = cst_op->output(0);
20+
if (cst_op_out.get_element_type() == ov::element::u4 && ov::shape_size(cst_op_out.get_shape()) == 1u) {
21+
ov::Tensor cst_tensor(ov::element::u4, cst_op_out.get_shape());
22+
*static_cast<uint8_t*>(cst_tensor.data()) = cst_op->get_vector<uint8_t>()[0] & 0x0f;
23+
auto new_cst_op = std::make_shared<ov::op::v0::Constant>(cst_tensor);
24+
for (auto target_input : cst_op_out.get_target_inputs()) {
25+
target_input.replace_source_output(new_cst_op);
26+
}
27+
}
28+
}
29+
}
30+
}
31+
1532
std::shared_ptr<ov::Model> add_slices_to_kvcache_inputs(const std::shared_ptr<ov::Model>& model) {
1633
const auto kvcache_name_pattern = "past_key_values";
1734
std::vector<std::shared_ptr<ov::opset13::Parameter>> new_params;
@@ -147,6 +164,7 @@ StaticLLMPipeline::StaticLLMPipeline(
147164
m_kvcache_model = core.read_model(path / "openvino_model.xml");
148165
// (2) Expose KV-cache input and output layers from kvcache model
149166
ov::pass::StatefulToStateless().run_on_model(m_kvcache_model);
167+
align_u4_zp_constants(m_kvcache_model);
150168
// (3) Clone the model - this will be prefill
151169
m_prefill_model = m_kvcache_model->clone();
152170
m_prefill_model->set_friendly_name(m_kvcache_model->get_friendly_name() + "_prefill");

0 commit comments

Comments
 (0)