AI-Initiative-KAUST · jingwen0 · Dec 25, 2023
diff --git a/eval/utils/models.py b/eval/utils/models.py
@@ -88,9 +88,10 @@ def encode_sequences(self, sequences: List[str], response_with: str='') -> List[
 
         assert isinstance(sequences, list) and isinstance(sequences[0], str)
 
-        # Due to the model's maximum input length limitation of 2048, we need to ensure that the character count of the input text <= 2048.
-        max_sequence_length = 2048
-        sequences = [sentence[:max_sequence_length] if len(sentence) > max_sequence_length else sentence for sentence in sequences]
+        # Due to the model's maximum input length limitation of 2048, we need to skip the character count of the input text > 2048.
+        max_sequence_length = 2048 
+        padding_token = "<PAD>"  # Choose an appropriate padding token
+        sequences = [sentence if len(sentence) <= max_sequence_length else [padding_token] for sentence in sequences]
 
 
         special_encoding = get_special_encoding()