-
Notifications
You must be signed in to change notification settings - Fork 1.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
28 changed files
with
1,379 additions
and
937 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
transformers | ||
matplotlib | ||
deepspeed-mii>=0.2.0 | ||
vllm>=0.2.7 | ||
numpy |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,25 +1,15 @@ | ||
RAGGED_BATCH_SIZE=768 | ||
PARAM_SIZES=(7b 13b 70b) | ||
# Copyright (c) Microsoft Corporation. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
declare -A TP_SIZES | ||
TP_SIZES["7b"]="1" | ||
TP_SIZES["13b"]="1:2:4" | ||
TP_SIZES["70b"]="4:8" | ||
# DeepSpeed Team | ||
|
||
for PARAM_SIZE in ${PARAM_SIZES[@]}; do | ||
|
||
IFS=':' read -ra TP_VALUES <<< ${TP_SIZES[${PARAM_SIZE}]} | ||
for TP in ${TP_VALUES[@]}; do | ||
DEPLOYMENT_NAME=llama2-${PARAM_SIZE}-tp${TP}-b${RAGGED_BATCH_SIZE} | ||
python server.py --model_name meta-llama/Llama-2-${PARAM_SIZE}-hf -d ${DEPLOYMENT_NAME} -m ${TP} -b ${RAGGED_BATCH_SIZE} start | ||
MODELS=(meta-llama/Llama-2-7b-hf meta-llama/Llama-2-13b-hf meta-llama/Llama-2-70b-hf tiiuae/falcon-40B tiiuae/falcon-180B microsoft/phi-2 mistralai/Mixtral-8x7B-v0.1) | ||
|
||
DEPLOYMENT_NAME=${DEPLOYMENT_NAME} PROMPT_LENGTH=2600 MAX_NEW_TOKENS=60 bash ./run_benchmark_client.sh | ||
DEPLOYMENT_NAME=${DEPLOYMENT_NAME} PROMPT_LENGTH=2600 MAX_NEW_TOKENS=128 bash ./run_benchmark_client.sh | ||
DEPLOYMENT_NAME=${DEPLOYMENT_NAME} PROMPT_LENGTH=1200 MAX_NEW_TOKENS=60 bash ./run_benchmark_client.sh | ||
DEPLOYMENT_NAME=${DEPLOYMENT_NAME} PROMPT_LENGTH=1200 MAX_NEW_TOKENS=128 bash ./run_benchmark_client.sh | ||
|
||
echo "Stopping server" | ||
python server.py -d ${DEPLOYMENT_NAME} stop | ||
sleep 120 | ||
done | ||
for MODEL in ${MODELS[@]}; do | ||
python ./run_benchmark.py --model ${MODEL} --stream | ||
python ./run_benchmark.py --model ${MODEL} --stream --vllm | ||
done | ||
|
||
# Extra runs for Mixtral with non-default settings | ||
python ./run_benchmark.py --model mistralai/Mixtral-8x7B-v0.1 --stream --tp_size 4 --mean_prompt_length 500 --mean_max_new_tokens 150 500 1024 | ||
python ./run_benchmark.py --model mistralai/Mixtral-8x7B-v0.1 --stream --tp_size 4 --mean_prompt_length 500 --mean_max_new_tokens 150 500 1024 --vllm |
This file was deleted.
Oops, something went wrong.
This file was deleted.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,40 @@ | ||
# Copyright (c) Microsoft Corporation. | ||
# SPDX-License-Identifier: Apache-2.0 | ||
|
||
# DeepSpeed Team | ||
|
||
from src.client import run_client | ||
from src.server import start_server, stop_server | ||
from src.utils import ( | ||
get_args_product, | ||
parse_args, | ||
print_summary, | ||
results_exist, | ||
save_json_results, | ||
CLIENT_PARAMS, | ||
SERVER_PARAMS, | ||
) | ||
|
||
|
||
def run_benchmark() -> None: | ||
args = parse_args(server_args=True, client_args=True) | ||
|
||
for server_args in get_args_product(args, which=SERVER_PARAMS): | ||
start_server(server_args) | ||
|
||
for client_args in get_args_product(server_args, which=CLIENT_PARAMS): | ||
if results_exist(client_args) and not args.overwrite_results: | ||
print( | ||
f"Found existing results and skipping current setting. To ignore existing results, use --overwrite_results" | ||
) | ||
continue | ||
|
||
response_details = run_client(client_args) | ||
print_summary(client_args, response_details) | ||
save_json_results(client_args, response_details) | ||
|
||
stop_server(server_args) | ||
|
||
|
||
if __name__ == "__main__": | ||
run_benchmark() |
This file was deleted.
Oops, something went wrong.
Oops, something went wrong.