@@ -43,13 +43,17 @@ def print_metrics(
43
43
f"[{ iter_str } ] First token latency: { iter_data ['first_token_latency' ]:.2f} ms/{ latency_unit } , "
44
44
f"other tokens latency: { iter_data ['other_tokens_avg_latency' ]:.2f} ms/{ latency_unit } , len of tokens: { len (tms )} * { batch_size } " ,
45
45
)
46
+ else :
47
+ log .warning (f'[{ iter_str } ] No hook data output for first token latency and other tokens latency' )
46
48
if len (tms_infer ) > 0 :
47
49
iter_data ['first_token_infer_latency' ] = tms_infer [0 ] * 1000 if len (tms_infer ) > 0 else - 1
48
50
iter_data ['other_tokens_infer_avg_latency' ] = sum (tms_infer [1 :]) / (len (tms_infer ) - 1 ) * 1000 if len (tms_infer ) > 1 else - 1
49
51
log .info (
50
52
f"[{ iter_str } ] First infer latency: { iter_data ['first_token_infer_latency' ]:.2f} ms/infer, "
51
53
f"other infers latency: { iter_data ['other_tokens_infer_avg_latency' ]:.2f} ms/infer, inference count: { len (tms_infer )} " ,
52
54
)
55
+ else :
56
+ log .warning (f'[{ iter_str } ] No hook data output for first infer latency and other infers latency' )
53
57
if stable_diffusion is not None :
54
58
print_stable_diffusion_infer_latency (iter_str , iter_data , stable_diffusion )
55
59
output_str = ''
@@ -118,15 +122,16 @@ def output_avg_statis_tokens(prompt_dict, prompt_idx_list, iter_data_list, batch
118
122
if iter_data ['iteration' ] == 0 :
119
123
continue
120
124
if iter_data ['prompt_idx' ] == p_idx :
121
- avg_1st_token_latency += iter_data ['first_token_latency' ]
122
- avg_2nd_tokens_latency += iter_data ['other_tokens_avg_latency' ]
123
- avg_input_size += iter_data ['input_size' ]
125
+ avg_1st_token_latency += iter_data ['first_token_latency' ] if iter_data [ 'first_token_latency' ] != '' else 0
126
+ avg_2nd_tokens_latency += iter_data ['other_tokens_avg_latency' ] if iter_data [ 'other_tokens_avg_latency' ] != '' else 0
127
+ avg_input_size += iter_data ['input_size' ] if iter_data [ 'input_size' ] != '' else 0
124
128
index_num = index_num + 1
125
129
if index_num > 0 :
126
130
avg_1st_token_latency = avg_1st_token_latency / index_num
127
131
avg_2nd_tokens_latency = avg_2nd_tokens_latency / index_num
128
132
avg_input_size = int (avg_input_size / index_num )
129
- avg_2nd_token_tput = (1 / avg_2nd_tokens_latency ) * batch_size * 1000
133
+ if avg_2nd_tokens_latency > 0 :
134
+ avg_2nd_token_tput = (1 / avg_2nd_tokens_latency ) * batch_size * 1000
130
135
latency_unit = 'token'
131
136
if batch_size > 1 :
132
137
latency_unit = '{}tokens' .format (batch_size )
0 commit comments