@@ -214,19 +214,6 @@ static_params_t::static_params_t(const Xbyak::Reg64 ¶m1,
214214 : static_params_t (param1, get_all_strategies_supported_by_injector(),
215215 rhs_arg_static_params) {}
216216
217- rhs_arg_static_params_t::rhs_arg_static_params_t (
218- std::size_t rhs_dt_helper_vmm_idx, const Xbyak::Reg64 &rhs_addr_reg,
219- const Xbyak::Reg64 &rhs_helper_reg,
220- const Xbyak::Reg64 &rhs_addr_cache_reg, bool preserve_gpr_helpers,
221- bool preserve_vmm_helper, std::size_t abi_param_offset,
222- const memory_desc_wrapper &dst_d, std::size_t tail_size,
223- bool use_exact_tail_scalar_bcast)
224- : rhs_arg_static_params_t (rhs_dt_helper_vmm_idx, rhs_addr_reg,
225- rhs_helper_reg, rhs_addr_cache_reg, preserve_gpr_helpers,
226- preserve_vmm_helper, abi_param_offset, 0 , dst_d, tail_size,
227- Xbyak::Opmask (2 ), use_exact_tail_scalar_bcast, rhs_helper_reg,
228- false /* is_opmask_set*/ , false /* is_dst_orig_set*/ ) {}
229-
230217rhs_arg_static_params_t::rhs_arg_static_params_t (
231218 std::size_t rhs_dt_helper_vmm_idx, const Xbyak::Reg64 &rhs_addr_reg,
232219 const Xbyak::Reg64 &rhs_helper_reg,
@@ -238,23 +225,7 @@ rhs_arg_static_params_t::rhs_arg_static_params_t(
238225 rhs_helper_reg, rhs_addr_cache_reg, preserve_gpr_helpers,
239226 preserve_vmm_helper, abi_param_offset, dst_orig_offset, dst_d,
240227 tail_size, Xbyak::Opmask(2 ), use_exact_tail_scalar_bcast,
241- rhs_helper_reg, false /* is_opmask_set*/ , true /* is_dst_orig_set*/ ) {
242- }
243-
244- rhs_arg_static_params_t::rhs_arg_static_params_t (
245- std::size_t rhs_dt_helper_vmm_idx, const Xbyak::Reg64 &rhs_addr_reg,
246- const Xbyak::Reg64 &rhs_helper_reg,
247- const Xbyak::Reg64 &rhs_addr_cache_reg, bool preserve_gpr_helpers,
248- bool preserve_vmm_helper, std::size_t abi_param_offset,
249- const memory_desc_wrapper &dst_d, std::size_t tail_size,
250- const Xbyak::Opmask &tail_opmask, bool use_exact_tail_scalar_bcast, std::size_t rhs_prelu_helper_vmm_idx)
251- : rhs_arg_static_params_t (rhs_dt_helper_vmm_idx, rhs_addr_reg,
252- rhs_helper_reg, rhs_addr_cache_reg, preserve_gpr_helpers,
253- preserve_vmm_helper, abi_param_offset, 0 , dst_d, tail_size,
254- tail_opmask, use_exact_tail_scalar_bcast, rhs_helper_reg,
255- true /* is_opmask_set*/ , false /* is_dst_orig_set*/ ) {
256- this ->rhs_prelu_helper_vmm_idx = rhs_prelu_helper_vmm_idx;
257- }
228+ rhs_helper_reg, false /* is_opmask_set*/ ) {}
258229
259230rhs_arg_static_params_t::rhs_arg_static_params_t (
260231 std::size_t rhs_dt_helper_vmm_idx, const Xbyak::Reg64 &rhs_addr_reg,
@@ -268,23 +239,7 @@ rhs_arg_static_params_t::rhs_arg_static_params_t(
268239 rhs_helper_reg, rhs_addr_cache_reg, preserve_gpr_helpers,
269240 preserve_vmm_helper, abi_param_offset, dst_orig_offset, dst_d,
270241 tail_size, tail_opmask, use_exact_tail_scalar_bcast, rhs_helper_reg,
271- true /* is_opmask_set*/ , true /* is_dst_orig_set*/ ) {
272- this ->rhs_prelu_helper_vmm_idx = rhs_prelu_helper_vmm_idx;
273- }
274-
275- rhs_arg_static_params_t::rhs_arg_static_params_t (
276- std::size_t rhs_dt_helper_vmm_idx, const Xbyak::Reg64 &rhs_addr_reg,
277- const Xbyak::Reg64 &rhs_helper_reg,
278- const Xbyak::Reg64 &rhs_addr_cache_reg, bool preserve_gpr_helpers,
279- bool preserve_vmm_helper, std::size_t abi_param_offset,
280- const memory_desc_wrapper &dst_d, std::size_t tail_size,
281- const Xbyak::Opmask &tail_opmask, const Xbyak::Reg64 ®_tail_size,
282- bool use_exact_tail_scalar_bcast, std::size_t rhs_prelu_helper_vmm_idx)
283- : rhs_arg_static_params_t (rhs_dt_helper_vmm_idx, rhs_addr_reg,
284- rhs_helper_reg, rhs_addr_cache_reg, preserve_gpr_helpers,
285- preserve_vmm_helper, abi_param_offset, 0 , dst_d, tail_size,
286- tail_opmask, use_exact_tail_scalar_bcast, reg_tail_size,
287- true /* is_opmask_set*/ , false /* is_dst_orig_set*/ ) {
242+ true /* is_opmask_set*/ ) {
288243 this ->rhs_prelu_helper_vmm_idx = rhs_prelu_helper_vmm_idx;
289244}
290245
@@ -300,7 +255,7 @@ rhs_arg_static_params_t::rhs_arg_static_params_t(
300255 rhs_helper_reg, rhs_addr_cache_reg, preserve_gpr_helpers,
301256 preserve_vmm_helper, abi_param_offset, dst_orig_offset, dst_d,
302257 tail_size, tail_opmask, use_exact_tail_scalar_bcast, reg_tail_size,
303- true /* is_opmask_set*/ , true /* is_dst_orig_set */ ) {
258+ true /* is_opmask_set*/ ) {
304259 this ->rhs_prelu_helper_vmm_idx = rhs_prelu_helper_vmm_idx;
305260}
306261
@@ -312,7 +267,7 @@ rhs_arg_static_params_t::rhs_arg_static_params_t(
312267 std::size_t dst_orig_offset, const memory_desc_wrapper &dst_d,
313268 std::size_t tail_size, const Xbyak::Opmask &tail_opmask,
314269 bool use_exact_tail_scalar_bcast, const Xbyak::Reg64 ®_tail_size,
315- bool is_opmask_set, bool is_dst_orig_set )
270+ bool is_opmask_set)
316271 : rhs_dt_helper_vmm_idx(rhs_dt_helper_vmm_idx)
317272 , rhs_addr_reg(rhs_addr_reg)
318273 , rhs_helper_reg(rhs_helper_reg)
@@ -327,8 +282,7 @@ rhs_arg_static_params_t::rhs_arg_static_params_t(
327282 , use_exact_tail_scalar_bcast(use_exact_tail_scalar_bcast)
328283 , reg_tail_size(reg_tail_size)
329284 , is_tail(tail_size)
330- , is_opmask_set_(is_opmask_set)
331- , is_dst_orig_set_(is_dst_orig_set) {}
285+ , is_opmask_set_(is_opmask_set) {}
332286
333287template <cpu_isa_t isa, typename Vmm>
334288jit_uni_binary_injector_t <isa, Vmm>::jit_uni_binary_injector_t (
@@ -354,45 +308,14 @@ static bool rhs_arg_params_differ(size_t vmm_idx1, size_t vmm_idx2,
354308
355309 const auto &out_addr = rhs_arg_params.vmm_idx_to_out_addr ;
356310 const auto &out_reg = rhs_arg_params.vmm_idx_to_out_reg ;
357-
358- const auto &out_elem_off_addr = rhs_arg_params.vmm_idx_to_out_elem_off_addr ;
359311 const auto &out_elem_off_val = rhs_arg_params.vmm_idx_to_out_elem_off_val ;
360- const auto &out_off_oprnd = rhs_arg_params.vmm_idx_to_out_off_oprnd ;
361- const auto &oc_off_addr = rhs_arg_params.vmm_idx_to_oc_elem_off_addr ;
362- const auto &oc_off_val = rhs_arg_params.vmm_idx_to_oc_elem_off_val ;
363- const auto &oc_off_oprnd = rhs_arg_params.vmm_idx_to_oc_off_oprnd ;
364- const auto &sp_off_addr = rhs_arg_params.vmm_idx_to_sp_elem_off_addr ;
365- const auto &sp_off_val = rhs_arg_params.vmm_idx_to_sp_elem_off_val ;
366- const auto &sp_off_oprnd = rhs_arg_params.vmm_idx_to_sp_off_oprnd ;
367-
368- if (rhs_broadcasting_strategy == broadcasting_strategy_t ::scalar) {
369- return false ;
370- } else if (rhs_broadcasting_strategy
371- == broadcasting_strategy_t ::no_broadcast) {
372- return params_differ (out_addr, vmm_idx1, vmm_idx2)
373- || params_differ (out_reg, vmm_idx1, vmm_idx2)
374- || params_differ (out_elem_off_addr, vmm_idx1, vmm_idx2)
375- || params_differ (out_elem_off_val, vmm_idx1, vmm_idx2)
376- || params_differ (out_off_oprnd, vmm_idx1, vmm_idx2);
377- } else if (rhs_broadcasting_strategy == broadcasting_strategy_t ::per_oc
378- || rhs_broadcasting_strategy
379- == broadcasting_strategy_t ::per_oc_spatial) {
380- return params_differ (out_addr, vmm_idx1, vmm_idx2)
381- || params_differ (out_reg, vmm_idx1, vmm_idx2)
382- || params_differ (out_elem_off_val, vmm_idx1, vmm_idx2)
383- || params_differ (oc_off_addr, vmm_idx1, vmm_idx2)
384- || params_differ (oc_off_val, vmm_idx1, vmm_idx2)
385- || params_differ (oc_off_oprnd, vmm_idx1, vmm_idx2);
386- } else if (rhs_broadcasting_strategy
387- == broadcasting_strategy_t ::per_mb_spatial) {
312+
313+ if (rhs_broadcasting_strategy != broadcasting_strategy_t ::scalar) {
388314 return params_differ (out_addr, vmm_idx1, vmm_idx2)
389315 || params_differ (out_reg, vmm_idx1, vmm_idx2)
390- || params_differ (out_elem_off_val, vmm_idx1, vmm_idx2)
391- || params_differ (sp_off_addr, vmm_idx1, vmm_idx2)
392- || params_differ (sp_off_val, vmm_idx1, vmm_idx2)
393- || params_differ (sp_off_oprnd, vmm_idx1, vmm_idx2);
316+ || params_differ (out_elem_off_val, vmm_idx1, vmm_idx2);
394317 }
395- return true ;
318+ return false ;
396319}
397320
398321template <cpu_isa_t isa, typename Vmm>
@@ -520,8 +443,7 @@ void jit_uni_binary_injector_t<isa, Vmm>::compute_vector_range(
520443 const int blk_size = dst_d.blocking_desc ().inner_blks [0 ];
521444 const bool use_offset_conversions
522445 = (!rhs_arg_params.vmm_idx_to_out_addr .empty ()
523- || !rhs_arg_params.vmm_idx_to_out_reg .empty ())
524- && rhs_arg_static_params_.is_dst_orig_set ();
446+ || !rhs_arg_params.vmm_idx_to_out_reg .empty ());
525447 const bool should_preserve_oc_offset_conversion_regs
526448 = use_offset_conversions
527449 && utils::one_of (rhs_broadcasting_strategy,
@@ -662,13 +584,6 @@ Xbyak::Address jit_uni_binary_injector_t<isa, Vmm>::prepare_rhs_arg_addr(
662584 switch (rhs_broadcasting_strategy) {
663585 case broadcasting_strategy_t ::scalar: return host_->ptr_b [rhs_addr_reg];
664586 case broadcasting_strategy_t ::no_broadcast: {
665- append_offset_from_operand (rhs_arg_params.vmm_idx_to_out_off_oprnd ,
666- vmm_idx, rhs_addr_reg, rhs_helper_reg, rhs_arg_elem_size);
667- append_offset_under_mem_addr (
668- rhs_arg_params.vmm_idx_to_out_elem_off_addr , vmm_idx,
669- rhs_addr_reg, rhs_helper_reg, rhs_arg_elem_size);
670- append_value_offset (rhs_arg_params.vmm_idx_to_out_elem_off_val ,
671- vmm_idx, rhs_addr_reg, rhs_arg_elem_size);
672587 append_no_broadcast_offset (rhs_arg_params.vmm_idx_to_out_addr ,
673588 rhs_arg_params.vmm_idx_to_out_reg ,
674589 rhs_arg_params.vmm_idx_to_out_elem_off_val , vmm_idx,
@@ -678,13 +593,6 @@ Xbyak::Address jit_uni_binary_injector_t<isa, Vmm>::prepare_rhs_arg_addr(
678593 }
679594 case broadcasting_strategy_t ::per_oc:
680595 case broadcasting_strategy_t ::per_oc_spatial: {
681- append_offset_from_operand (rhs_arg_params.vmm_idx_to_oc_off_oprnd ,
682- vmm_idx, rhs_addr_reg, rhs_helper_reg, rhs_arg_elem_size);
683- append_offset_under_mem_addr (
684- rhs_arg_params.vmm_idx_to_oc_elem_off_addr , vmm_idx,
685- rhs_addr_reg, rhs_helper_reg, rhs_arg_elem_size);
686- append_value_offset (rhs_arg_params.vmm_idx_to_oc_elem_off_val ,
687- vmm_idx, rhs_addr_reg, rhs_arg_elem_size);
688596 append_oc_offset (rhs_arg_params.vmm_idx_to_out_addr ,
689597 rhs_arg_params.vmm_idx_to_out_reg ,
690598 rhs_arg_params.vmm_idx_to_out_elem_off_val , vmm_idx,
@@ -696,13 +604,6 @@ Xbyak::Address jit_uni_binary_injector_t<isa, Vmm>::prepare_rhs_arg_addr(
696604 : host_->ptr [rhs_addr_reg];
697605 }
698606 case broadcasting_strategy_t ::per_mb_spatial: {
699- append_offset_from_operand (rhs_arg_params.vmm_idx_to_sp_off_oprnd ,
700- vmm_idx, rhs_addr_reg, rhs_helper_reg, rhs_arg_elem_size);
701- append_offset_under_mem_addr (
702- rhs_arg_params.vmm_idx_to_sp_elem_off_addr , vmm_idx,
703- rhs_addr_reg, rhs_helper_reg, rhs_arg_elem_size);
704- append_value_offset (rhs_arg_params.vmm_idx_to_sp_elem_off_val ,
705- vmm_idx, rhs_addr_reg, rhs_arg_elem_size);
706607 append_mb_sp_offset (rhs_arg_params.vmm_idx_to_out_addr ,
707608 rhs_arg_params.vmm_idx_to_out_reg ,
708609 rhs_arg_params.vmm_idx_to_out_elem_off_val , vmm_idx,
@@ -711,13 +612,6 @@ Xbyak::Address jit_uni_binary_injector_t<isa, Vmm>::prepare_rhs_arg_addr(
711612 return host_->ptr [rhs_addr_reg];
712613 }
713614 case broadcasting_strategy_t ::per_mb_w: {
714- append_offset_from_operand (rhs_arg_params.vmm_idx_to_mb_w_off_oprnd ,
715- vmm_idx, rhs_addr_reg, rhs_helper_reg, rhs_arg_elem_size);
716- append_offset_under_mem_addr (
717- rhs_arg_params.vmm_idx_to_mb_w_elem_off_addr , vmm_idx,
718- rhs_addr_reg, rhs_helper_reg, rhs_arg_elem_size);
719- append_value_offset (rhs_arg_params.vmm_idx_to_mb_w_elem_off_val ,
720- vmm_idx, rhs_addr_reg, rhs_arg_elem_size);
721615 append_mb_w_offset (rhs_arg_params.vmm_idx_to_out_addr ,
722616 rhs_arg_params.vmm_idx_to_out_reg ,
723617 rhs_arg_params.vmm_idx_to_out_elem_off_val , vmm_idx,
@@ -726,13 +620,6 @@ Xbyak::Address jit_uni_binary_injector_t<isa, Vmm>::prepare_rhs_arg_addr(
726620 return host_->ptr [rhs_addr_reg];
727621 }
728622 case broadcasting_strategy_t ::per_w: {
729- append_offset_from_operand (rhs_arg_params.vmm_idx_to_w_off_oprnd ,
730- vmm_idx, rhs_addr_reg, rhs_helper_reg, rhs_arg_elem_size);
731- append_offset_under_mem_addr (
732- rhs_arg_params.vmm_idx_to_w_elem_off_addr , vmm_idx,
733- rhs_addr_reg, rhs_helper_reg, rhs_arg_elem_size);
734- append_value_offset (rhs_arg_params.vmm_idx_to_w_elem_off_val ,
735- vmm_idx, rhs_addr_reg, rhs_arg_elem_size);
736623 append_w_offset (rhs_arg_params.vmm_idx_to_out_addr ,
737624 rhs_arg_params.vmm_idx_to_out_reg ,
738625 rhs_arg_params.vmm_idx_to_out_elem_off_val , vmm_idx,
@@ -746,57 +633,6 @@ Xbyak::Address jit_uni_binary_injector_t<isa, Vmm>::prepare_rhs_arg_addr(
746633 return host_->ptr [rhs_addr_reg];
747634}
748635
749- template <cpu_isa_t isa, typename Vmm>
750- void jit_uni_binary_injector_t <isa, Vmm>::append_offset_from_operand(
751- const std::map<int , Xbyak::Operand> &vmm_idx_to_elem_operand_off,
752- int vmm_idx, const Xbyak::Reg64 &addr_reg, const Xbyak::Reg64 &tmp_reg,
753- std::size_t elem_size_bytes) const {
754-
755- const auto it_operand_off = vmm_idx_to_elem_operand_off.find (vmm_idx);
756- if (it_operand_off != vmm_idx_to_elem_operand_off.end ()
757- && !rhs_arg_static_params_.is_dst_orig_set ()) {
758- if (elem_size_bytes == 1 ) {
759- host_->add (addr_reg, it_operand_off->second );
760- } else {
761- const int shift_val = std::log2 (elem_size_bytes);
762- host_->mov (tmp_reg, it_operand_off->second );
763- host_->sal (tmp_reg, shift_val);
764- host_->add (addr_reg, tmp_reg);
765- }
766- }
767- }
768-
769- template <cpu_isa_t isa, typename Vmm>
770- void jit_uni_binary_injector_t <isa, Vmm>::append_offset_under_mem_addr(
771- const std::map<int , Xbyak::Address> &vmm_idx_to_elem_addr_off,
772- int vmm_idx, const Xbyak::Reg64 &addr_reg, const Xbyak::Reg64 &tmp_reg,
773- std::size_t elem_size_bytes) const {
774-
775- const auto it_off_addr = vmm_idx_to_elem_addr_off.find (vmm_idx);
776- if (it_off_addr != vmm_idx_to_elem_addr_off.end ()
777- && !rhs_arg_static_params_.is_dst_orig_set ()) {
778- if (elem_size_bytes == 1 ) {
779- host_->add (addr_reg, it_off_addr->second );
780- } else {
781- const int shift_val = std::log2 (elem_size_bytes);
782- host_->mov (tmp_reg, it_off_addr->second );
783- host_->sal (tmp_reg, shift_val);
784- host_->add (addr_reg, tmp_reg);
785- }
786- }
787- }
788-
789- template <cpu_isa_t isa, typename Vmm>
790- void jit_uni_binary_injector_t <isa, Vmm>::append_value_offset(
791- const std::map<int , size_t > &vmm_idx_to_elem_val_off, int vmm_idx,
792- const Xbyak::Reg64 &addr_reg, std::size_t elem_size_bytes) const {
793-
794- const auto it_off_val = vmm_idx_to_elem_val_off.find (vmm_idx);
795- if (it_off_val != vmm_idx_to_elem_val_off.end ()
796- && !rhs_arg_static_params_.is_dst_orig_set ())
797- host_->add (addr_reg, it_off_val->second * elem_size_bytes);
798- }
799-
800636template <cpu_isa_t isa, typename Vmm>
801637void jit_uni_binary_injector_t <isa, Vmm>::append_no_broadcast_offset(
802638 const std::map<int , Xbyak::Address> &vmm_idx_to_out_addr,
@@ -811,8 +647,6 @@ void jit_uni_binary_injector_t<isa, Vmm>::append_no_broadcast_offset(
811647 const bool is_out_addr = it_out_addr != vmm_idx_to_out_addr.end ();
812648 const bool is_out_reg = it_out_reg != vmm_idx_to_out_reg.end ();
813649 if (is_out_addr || is_out_reg) {
814- assert (rhs_arg_static_params_.is_dst_orig_set ()
815- && " dst base addr offset not set" );
816650 Xbyak::Address out_addr = is_out_addr ? it_out_addr->second
817651 : host_->ptr [it_out_reg->second ];
818652 const auto it_off_val = vmm_idx_to_out_elem_off_val.find (vmm_idx);
@@ -875,8 +709,6 @@ void jit_uni_binary_injector_t<isa, Vmm>::append_oc_offset(
875709 const bool is_out_reg = it_out_reg != vmm_idx_to_out_reg.end ();
876710
877711 if (is_out_addr || is_out_reg) {
878- assert (rhs_arg_static_params_.is_dst_orig_set ()
879- && " dst base addr offset not set" );
880712 Xbyak::Address out_addr = is_out_addr ? it_out_addr->second
881713 : host_->ptr [it_out_reg->second ];
882714 const auto it_off_val = vmm_idx_to_out_elem_off_val.find (vmm_idx);
@@ -1105,8 +937,6 @@ void jit_uni_binary_injector_t<isa, Vmm>::append_mb_sp_offset(
1105937 const bool is_out_reg = it_out_reg != vmm_idx_to_out_reg.end ();
1106938
1107939 if (is_out_addr || is_out_reg) {
1108- assert (rhs_arg_static_params_.is_dst_orig_set ()
1109- && " dst base addr offset not set" );
1110940 Xbyak::Address out_addr = is_out_addr ? it_out_addr->second
1111941 : host_->ptr [it_out_reg->second ];
1112942 const auto it_off_val = vmm_idx_to_out_elem_off_val.find (vmm_idx);
@@ -1388,8 +1218,6 @@ void jit_uni_binary_injector_t<isa, Vmm>::append_mb_w_offset(
13881218 const bool is_out_reg = it_out_reg != vmm_idx_to_out_reg.end ();
13891219
13901220 if (is_out_addr || is_out_reg) {
1391- assert (rhs_arg_static_params_.is_dst_orig_set ()
1392- && " dst base addr offset not set" );
13931221 Xbyak::Address out_addr = is_out_addr ? it_out_addr->second
13941222 : host_->ptr [it_out_reg->second ];
13951223 const auto it_off_val = vmm_idx_to_out_elem_off_val.find (vmm_idx);
@@ -1700,8 +1528,6 @@ void jit_uni_binary_injector_t<isa, Vmm>::append_w_offset(
17001528 const bool is_out_reg = it_out_reg != vmm_idx_to_out_reg.end ();
17011529
17021530 if (is_out_addr || is_out_reg) {
1703- assert (rhs_arg_static_params_.is_dst_orig_set ()
1704- && " dst base addr offset not set" );
17051531 Xbyak::Address out_addr = is_out_addr ? it_out_addr->second
17061532 : host_->ptr [it_out_reg->second ];
17071533 const auto it_off_val = vmm_idx_to_out_elem_off_val.find (vmm_idx);
0 commit comments