Skip to content

Commit 210b259

Browse files
author
Rafi Witten
authored
Merge branch 'main' into rwitten_v4_convergence
2 parents 48a25d1 + 010fb68 commit 210b259

File tree

3 files changed

+10
-9
lines changed

3 files changed

+10
-9
lines changed

.github/workflows/UnitTests.yml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@ on:
2323
branches: [ "main" ]
2424
workflow_dispatch:
2525
schedule:
26-
# Run the job every 60 mins
27-
- cron: '*/60 * * * *'
26+
# Run the job every 2 hours
27+
- cron: '*/120 * * * *'
2828

2929
jobs:
3030
# IF YOU MODIFY THIS, YOU SHOULD ALSO ADD CORRESPONDING MODICATIONS TO 'gpu' job

end_to_end/llama_load_and_test.sh

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
11
set -e
22
idx=$(date +%Y-%m-%d-%H-%M)
3-
pip install torch
4-
gsutil cp -r gs://maxtext-llama/llama2-7b/meta-ckpt /tmp/
5-
python3 MaxText/convert_llama_ckpt.py --base-model-path /tmp/meta-ckpt --model-size 7b --maxtext-model-path gs://maxtext-llama/test/${idx}/decode-ckpt-maxtext/
6-
python3 MaxText/decode.py MaxText/configs/base.yml run_name=runner_${idx} base_output_directory=gs://runner-maxtext-logs dataset_path=gs://maxtext-dataset load_from_other_directory=gs://maxtext-llama/test/${idx}/decode-ckpt-maxtext/ load_from_other_directory_step=0 per_device_batch_size=1 model_name='llama2-7b' assets_path=gs://maxtext-llama/llama2-7b ici_tensor_parallelism=4 steps=1 max_prefill_predict_length=4 max_target_length=16 async_checkpointing=false prompt="I love to" autoregressive_decode_assert="read. I love to write. I love to share." attention=dot_product
3+
#TODO(internal bug -- migrate to XLML)
4+
#pip install torch
5+
#gsutil cp -r gs://maxtext-llama/llama2-7b/meta-ckpt /tmp/
6+
#python3 MaxText/convert_llama_ckpt.py --base-model-path /tmp/meta-ckpt --model-size 7b --maxtext-model-path gs://maxtext-llama/test/${idx}/decode-ckpt-maxtext/
7+
#python3 MaxText/decode.py MaxText/configs/base.yml run_name=runner_${idx} base_output_directory=gs://runner-maxtext-logs dataset_path=gs://maxtext-dataset load_from_other_directory=gs://maxtext-llama/test/${idx}/decode-ckpt-maxtext/ load_from_other_directory_step=0 per_device_batch_size=1 model_name='llama2-7b' assets_path=gs://maxtext-llama/llama2-7b ici_tensor_parallelism=4 steps=1 max_prefill_predict_length=4 max_target_length=16 async_checkpointing=false prompt="I love to" autoregressive_decode_assert="read. I love to write. I love to share." attention=dot_product
8+
9+
python3 MaxText/decode.py MaxText/configs/base.yml run_name=runner_${idx} base_output_directory=gs://runner-maxtext-logs dataset_path=gs://maxtext-dataset load_from_other_directory=gs://maxtext-llama/test/2024-01-12-17-46/decode-ckpt-maxtext/ load_from_other_directory_step=0 per_device_batch_size=1 model_name='llama2-7b' assets_path=gs://maxtext-llama/llama2-7b ici_tensor_parallelism=4 steps=1 max_prefill_predict_length=4 max_target_length=16 async_checkpointing=false prompt="I love to" autoregressive_decode_assert="read. I love to write. I love to share." attention=dot_product

end_to_end/test_decode.sh

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,6 +25,4 @@ fi
2525
python3 MaxText/decode.py MaxText/configs/base.yml run_name=$RUN_NAME\
2626
steps=50 enable_checkpointing=False metrics_file=/tmp/${RUN_NAME}_metrics.txt \
2727
base_output_directory=$OUTPUT_PATH dataset_path=$DATASET_PATH \
28-
attention=dot_product ici_tensor_parallelism=${ICI_TENSOR_PARALLELISM} add_eos=False
29-
30-
python3 end_to_end/eval_assert.py metrics_average /tmp/${RUN_NAME}_metrics.txt $NUM_TOKEN_THRESHOLD num_tokens
28+
attention=dot_product ici_tensor_parallelism=${ICI_TENSOR_PARALLELISM} add_eos=False

0 commit comments

Comments
 (0)