File tree Expand file tree Collapse file tree 3 files changed +19
-38
lines changed Expand file tree Collapse file tree 3 files changed +19
-38
lines changed Original file line number Diff line number Diff line change @@ -113,5 +113,6 @@ jobs:
113
113
&& cmake --install "samples build" --config ${{ matrix.build-type }} --component samples_bin --prefix samples_install
114
114
if: ${{ 'Release' != matrix.build-type }}
115
115
- run : call ov\setupvars.bat && "${{ github.workspace }}/samples_install/samples_bin/greedy_causal_lm" .\TinyLlama-1.1B-Chat-v1.0\ ""
116
+ if : ${{ 'Release' == matrix.build-type }} # Tokenizers don't work in debug
116
117
- run : call ov\setupvars.bat && python .\ov\samples\python\multinomial_causal_lm\multinomial_causal_lm.py .\TinyLlama-1.1B-Chat-v1.0\ 0
117
118
if : ${{ 'Release' == matrix.build-type }} # Python bindings can be built in Release only
Load Diff This file was deleted.
Original file line number Diff line number Diff line change 12
12
13
13
namespace {
14
14
15
+ void align_u4_zp_constants (const std::shared_ptr<ov::Model>& model) {
16
+ for (auto op : model->get_ops ()) {
17
+ if (ov::op::util::is_constant (op)) {
18
+ auto cst_op = std::dynamic_pointer_cast<ov::op::v0::Constant>(op);
19
+ const auto cst_op_out = cst_op->output (0 );
20
+ if (cst_op_out.get_element_type () == ov::element::u4 && ov::shape_size (cst_op_out.get_shape ()) == 1u ) {
21
+ ov::Tensor cst_tensor (ov::element::u4, cst_op_out.get_shape ());
22
+ *static_cast <uint8_t *>(cst_tensor.data ()) = cst_op->get_vector <uint8_t >()[0 ] & 0x0f ;
23
+ auto new_cst_op = std::make_shared<ov::op::v0::Constant>(cst_tensor);
24
+ for (auto target_input : cst_op_out.get_target_inputs ()) {
25
+ target_input.replace_source_output (new_cst_op);
26
+ }
27
+ }
28
+ }
29
+ }
30
+ }
31
+
15
32
std::shared_ptr<ov::Model> add_slices_to_kvcache_inputs (const std::shared_ptr<ov::Model>& model) {
16
33
const auto kvcache_name_pattern = " past_key_values" ;
17
34
std::vector<std::shared_ptr<ov::opset13::Parameter>> new_params;
@@ -147,6 +164,7 @@ StaticLLMPipeline::StaticLLMPipeline(
147
164
m_kvcache_model = core.read_model (path / " openvino_model.xml" );
148
165
// (2) Expose KV-cache input and output layers from kvcache model
149
166
ov::pass::StatefulToStateless ().run_on_model (m_kvcache_model);
167
+ align_u4_zp_constants (m_kvcache_model);
150
168
// (3) Clone the model - this will be prefill
151
169
m_prefill_model = m_kvcache_model->clone ();
152
170
m_prefill_model->set_friendly_name (m_kvcache_model->get_friendly_name () + " _prefill" );
You can’t perform that action at this time.
0 commit comments