@@ -238,6 +238,7 @@ def completion():
238238 latency = 0.0
239239 if simulator is not None :
240240 latency = simulator .execute (Request (arrived_at , input_tokens , output_tokens , arrived_next = arrived_next ))
241+ print (f"input_tokens { input_tokens } model metadata { simulator .model_metadata } " )
241242
242243 # Simulated response
243244 response = {
@@ -698,7 +699,9 @@ def metrics():
698699 if gpu_device != "disabled" :
699700 # Load the tokenizer for your model
700701 from transformers import AutoTokenizer
701-
702+ from transformers import AutoConfig
703+
704+ num_layers , num_heads , hidden_size = None
702705 default_model = 'bert-base-uncased'
703706 try :
704707 # can we make this as an application argument.
@@ -709,15 +712,30 @@ def metrics():
709712 token = HUGGINGFACE_TOKEN ,
710713 model_max_length = 16384 , # Suppress warning
711714 clean_up_tokenization_spaces = True )
715+ config = AutoConfig .from_pretrained (token_model )
716+ # Extract required details
717+ num_layers = config .get ("num_hidden_layers" , config .get ("n_layers" ))
718+ num_heads = config .get ("num_attention_heads" , config .get ("n_heads" ))
719+ hidden_size = config .get ("hidden_size" , config .get ("dim" ))
712720 except Exception as e :
713721 logger .error (f"Failed to initialize tokenizer, will use default tokenizer model: { e } " )
714722 tokenizer = AutoTokenizer .from_pretrained (
715723 default_model ,
716724 model_max_length = 16384 , # Suppress warning
717725 clean_up_tokenization_spaces = True )
726+ config = AutoConfig .from_pretrained (default_model )
727+ # Extract required details
728+ num_layers = config .get ("num_hidden_layers" , config .get ("n_layers" ))
729+ num_heads = config .get ("num_attention_heads" , config .get ("n_heads" ))
730+ hidden_size = config .get ("hidden_size" , config .get ("dim" ))
718731
719732 # TODO: check whether able to use argparse to build SimulationConfig
720733 simulator = Simulator (SimulationConfig .create_from_cli_args ())
734+
735+ simulator .model_metadata ['num_layers' ] = num_layers
736+ simulator .model_metadata ['num_heads' ] = num_heads
737+ simulator .model_metadata ['hidden_size' ] = hidden_size
738+
721739 overrides = {
722740 "total" : 100.0 ,
723741 "running" : 0 ,
0 commit comments