From f6f35fb8b76a5ae84ec90669563c4bc0aecc94aa Mon Sep 17 00:00:00 2001 From: Michelle Yoo Date: Tue, 3 Oct 2023 18:59:41 +0000 Subject: [PATCH] Fix max_predict_length to 64 and test_decode to use ici_tensor_parallelism for correct sharding --- MaxText/configs/base.yml | 2 +- end_to_end/test_decode.sh | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/MaxText/configs/base.yml b/MaxText/configs/base.yml index beb1cbcb5..36484e0db 100644 --- a/MaxText/configs/base.yml +++ b/MaxText/configs/base.yml @@ -145,7 +145,7 @@ max_target_length: 2048 max_eval_target_length: 512 # Maximum length cutoff for predicted tokens. -max_predict_length: 50 +max_predict_length: 64 # Sampling temperature for language model inference. sampling_temperature: 0.6 # Top k cutoff for logit sampling. If 0 then no top-k cutoff is used. diff --git a/end_to_end/test_decode.sh b/end_to_end/test_decode.sh index c05d40ef6..094963b0f 100644 --- a/end_to_end/test_decode.sh +++ b/end_to_end/test_decode.sh @@ -17,6 +17,7 @@ fi #Train python3 MaxText/decode.py MaxText/configs/base.yml run_name=$RUN_NAME\ steps=50 enable_checkpointing=False metrics_file='metrics.txt'\ - base_output_directory=$OUTPUT_PATH dataset_path=$DATASET_PATH + base_output_directory=$OUTPUT_PATH dataset_path=$DATASET_PATH \ + ici_tensor_parallelism=4 python3 end_to_end/eval_assert.py metrics_average metrics.txt $NUM_TOKEN_THRESHOLD num_tokens