We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent c1ccc5e commit dfb1df5Copy full SHA for dfb1df5
benchmarks/kernels/benchmark_moe.py
@@ -155,7 +155,7 @@ def get_rocm_tuning_space(use_fp16):
155
# For now we see better perf with num_stages=0 for all gemm configs we care
156
# But keep this explicit so that we do not forget we may need to set it to
157
# other values in the future
158
- num_stage_range = [0]
+ num_stage_range = [2]
159
waves_per_eu_range = [0]
160
matrix_instr_nonkdim_range = [16, 32] if use_fp16 else []
161
kpack_range = [1, 2] if use_fp16 else []
0 commit comments