Skip to content

Commit

Permalink
Add new runtime images (#2181)
Browse files Browse the repository at this point in the history
Signed-off-by: Tarun Kumar <takumar@redhat.com>
  • Loading branch information
tarukumar authored Jan 20, 2025
1 parent c8a057d commit baee508
Show file tree
Hide file tree
Showing 6 changed files with 29 additions and 29 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ spec:
name: caikit
containers:
- name: kserve-container
image: quay.io/modh/caikit-nlp@sha256:3c33185fda84d7bac6715c8743c446a6713cdbc0cb0ed831acc0df89bd8bab6b
image: quay.io/modh/caikit-nlp@sha256:d4b045f880cebc2b22de6c1203915be4a07909ca818f3be3df32c28e6abff526
command: ["python", "-m", "caikit.runtime"]
env:
- name: RUNTIME_LOCAL_MODELS_DIR
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ spec:
name: caikit
containers:
- name: kserve-container
image: quay.io/modh/caikit-nlp@sha256:3c33185fda84d7bac6715c8743c446a6713cdbc0cb0ed831acc0df89bd8bab6b
image: quay.io/modh/caikit-nlp@sha256:d4b045f880cebc2b22de6c1203915be4a07909ca818f3be3df32c28e6abff526
command: ["python", "-m", "caikit.runtime"]
env:
- name: RUNTIME_LOCAL_MODELS_DIR
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ spec:
name: caikit
containers:
- name: kserve-container
image: quay.io/modh/text-generation-inference@sha256:bb36bb41cc744a8ff94d537f74c228e8b4e17c2468c50ccd89fc21ecc3940a70
image: quay.io/modh/text-generation-inference@sha256:81e55b32d10848b403d6a267a8a5c565d6a025c6a395fc0d99787140fa0fbc88
command: ["text-generation-launcher"]
args: ["--model-name=/mnt/models/artifacts/"]
env:
Expand All @@ -23,7 +23,7 @@ spec:
## Note: cannot add readiness/liveness probes to this container because knative will refuse them.
# multi-container probing will be available after https://github.com/knative/serving/pull/14853 is merged
- name: transformer-container
image: quay.io/modh/caikit-tgis-serving@sha256:fe0d1f1233d0b056ca7c690f765b20611e20837465674998e3d293df9b95e838
image: quay.io/modh/caikit-tgis-serving@sha256:a20e220608e9a5cb5ffc027bbe017c8d0e1715a3937285f8bc2a905939d57233
env:
- name: RUNTIME_LOCAL_MODELS_DIR
value: /mnt/models
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ spec:
name: caikit
containers:
- name: kserve-container
image: quay.io/modh/text-generation-inference@sha256:bb36bb41cc744a8ff94d537f74c228e8b4e17c2468c50ccd89fc21ecc3940a70
image: quay.io/modh/text-generation-inference@sha256:81e55b32d10848b403d6a267a8a5c565d6a025c6a395fc0d99787140fa0fbc88
command: ["text-generation-launcher"]
args: ["--model-name=/mnt/models/artifacts/"]
env:
Expand All @@ -21,7 +21,7 @@ spec:
# cpu: 8
# memory: 16Gi
- name: transformer-container
image: quay.io/modh/caikit-tgis-serving@sha256:fe0d1f1233d0b056ca7c690f765b20611e20837465674998e3d293df9b95e838
image: quay.io/modh/caikit-tgis-serving@sha256:a20e220608e9a5cb5ffc027bbe017c8d0e1715a3937285f8bc2a905939d57233
env:
- name: TRANSFORMERS_CACHE
value: /tmp/transformers_cache
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ spec:
name: pytorch
containers:
- name: kserve-container
image: quay.io/modh/text-generation-inference@sha256:bb36bb41cc744a8ff94d537f74c228e8b4e17c2468c50ccd89fc21ecc3940a70
image: quay.io/modh/text-generation-inference@sha256:81e55b32d10848b403d6a267a8a5c565d6a025c6a395fc0d99787140fa0fbc88
command: ["text-generation-launcher"]
args:
- "--model-name=/mnt/models/"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,10 +218,10 @@ Verify User Can Serve And Query A elyza/elyza-japanese-llama-2-7b-instruct Model
ELSE IF "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless"
Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http
... inference_type=completions n_times=1 query_idx=10
... namespace=${test_namespace} string_check_only=${FALSE}
... namespace=${test_namespace} validate_response=${FALSE}
Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http
... inference_type=chat-completions n_times=1 query_idx=9
... namespace=${test_namespace} string_check_only=${FALSE}
... namespace=${test_namespace} validate_response=${FALSE}
END
[Teardown] Run Keywords
... Clean Up Test Project test_ns=${test_namespace}
Expand Down Expand Up @@ -417,10 +417,10 @@ Verify User Can Serve And Query A meta-llama/llama-2-13b-chat Model # robocop
ELSE IF "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless"
Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http
... inference_type=chat-completions n_times=1 query_idx=12
... namespace=${test_namespace} string_check_only=${FALSE}
... namespace=${test_namespace} validate_response=${FALSE}
Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http
... inference_type=completions n_times=1 query_idx=11
... namespace=${test_namespace} string_check_only=${FALSE}
... namespace=${test_namespace} validate_response=${FALSE}
END
[Teardown] Run Keywords
... Clean Up Test Project test_ns=${test_namespace}
Expand Down Expand Up @@ -542,10 +542,10 @@ Verify User Can Serve And Query A instructlab/merlinite-7b-lab Model # roboco
ELSE IF "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless"
Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http
... inference_type=chat-completions n_times=1 query_idx=12
... namespace=${test_namespace} string_check_only=${FALSE}
... namespace=${test_namespace} validate_response=${FALSE}
Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http
... inference_type=completions n_times=1 query_idx=11
... namespace=${test_namespace} string_check_only=${FALSE}
... namespace=${test_namespace} validate_response=${FALSE}
END
[Teardown] Run Keywords
... Clean Up Test Project test_ns=${test_namespace}
Expand Down Expand Up @@ -605,10 +605,10 @@ Verify User Can Serve And Query A ibm-granite/granite-8b-code-base Model # ro
ELSE IF "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless"
Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http
... inference_type=chat-completions n_times=1 query_idx=12
... namespace=${test_namespace} string_check_only=${FALSE}
... namespace=${test_namespace} validate_response=${FALSE}
Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http
... inference_type=completions n_times=1 query_idx=11
... namespace=${test_namespace} string_check_only=${FALSE}
... namespace=${test_namespace} validate_response=${FALSE}
END
[Teardown] Run Keywords
... Clean Up Test Project test_ns=${test_namespace}
Expand Down Expand Up @@ -712,10 +712,10 @@ Verify User Can Serve And Query A meta-llama/llama-3-8B-Instruct Model # robo
ELSE IF "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless"
Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http
... inference_type=chat-completions n_times=1 query_idx=12
... namespace=${test_namespace} string_check_only=${FALSE}
... namespace=${test_namespace} validate_response=${FALSE}
Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http
... inference_type=completions n_times=1 query_idx=11
... namespace=${test_namespace} string_check_only=${FALSE}
... namespace=${test_namespace} validate_response=${FALSE}
END
[Teardown] Run Keywords
... Clean Up Test Project test_ns=${test_namespace}
Expand Down Expand Up @@ -775,10 +775,10 @@ Verify User Can Serve And Query A ibm-granite/granite-3b-code-instruct Model
ELSE IF "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless"
Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http
... inference_type=chat-completions n_times=1 query_idx=12
... namespace=${test_namespace} string_check_only=${FALSE}
... namespace=${test_namespace} validate_response=${FALSE}
Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http
... inference_type=completions n_times=1 query_idx=11
... namespace=${test_namespace} string_check_only=${FALSE}
... namespace=${test_namespace} validate_response=${FALSE}
END
[Teardown] Run Keywords
... Clean Up Test Project test_ns=${test_namespace}
Expand Down Expand Up @@ -838,10 +838,10 @@ Verify User Can Serve And Query A ibm-granite/granite-8b-code-instruct Model
ELSE IF "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless"
Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http
... inference_type=chat-completions n_times=1 query_idx=12
... namespace=${test_namespace} string_check_only=${FALSE}
... namespace=${test_namespace} validate_response=${FALSE}
Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http
... inference_type=completions n_times=1 query_idx=11
... namespace=${test_namespace} string_check_only=${FALSE}
... namespace=${test_namespace} validate_response=${FALSE}
END
[Teardown] Run Keywords
... Clean Up Test Project test_ns=${test_namespace}
Expand Down Expand Up @@ -901,10 +901,10 @@ Verify User Can Serve And Query A ibm-granite/granite-7b-lab Model # robocop:
ELSE IF "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless"
Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http
... inference_type=chat-completions n_times=1 query_idx=12
... namespace=${test_namespace} string_check_only=${FALSE}
... namespace=${test_namespace} validate_response=${FALSE}
Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http
... inference_type=completions n_times=1 query_idx=11
... namespace=${test_namespace} string_check_only=${FALSE}
... namespace=${test_namespace} validate_response=${FALSE}
END
[Teardown] Run Keywords
... Clean Up Test Project test_ns=${test_namespace}
Expand Down Expand Up @@ -1142,10 +1142,10 @@ Verify User Can Serve And Query RHAL AI granite-7b-starter Model # robocop: o
ELSE IF "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless"
Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http
... inference_type=chat-completions n_times=1 query_idx=12
... namespace=${test_namespace} string_check_only=${FALSE}
... namespace=${test_namespace} validate_response=${FALSE}
Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http
... inference_type=completions n_times=1 query_idx=11
... namespace=${test_namespace} string_check_only=${FALSE}
... namespace=${test_namespace} validate_response=${FALSE}
END
[Teardown] Run Keywords
... Clean Up Test Project test_ns=${test_namespace}
Expand Down Expand Up @@ -1275,10 +1275,10 @@ Verify User Can Serve And Query RHAL AI Granite-7b-redhat-lab Model # robocop
ELSE IF "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless"
Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http
... inference_type=chat-completions n_times=1 query_idx=12
... namespace=${test_namespace} string_check_only=${FALSE}
... namespace=${test_namespace} validate_response=${FALSE}
Query Model Multiple Times model_name=${model_name} runtime=${RUNTIME_NAME} protocol=http
... inference_type=completions n_times=1 query_idx=11
... namespace=${test_namespace} string_check_only=${FALSE}
... namespace=${test_namespace} validate_response=${FALSE}
END
[Teardown] Run Keywords
... Clean Up Test Project test_ns=${test_namespace}
Expand Down Expand Up @@ -1347,9 +1347,9 @@ Set Runtime Image
[Arguments] ${gpu_type}
IF "${RUNTIME_IMAGE}" == "${EMPTY}"
IF "${gpu_type}" == "nvidia"
Set Test Variable ${runtime_image} quay.io/modh/vllm@sha256:c86ff1e89c86bc9821b75d7f2bbc170b3c13e3ccf538bf543b1110f23e056316
Set Test Variable ${runtime_image} quay.io/modh/vllm@sha256:9689bffacabc38777555de87e0fce0dd95165de3716c68c1aa744358a592ee1f
ELSE IF "${gpu_type}" == "amd"
Set Test Variable ${runtime_image} quay.io/modh/vllm@sha256:10f09eeca822ebe77e127aad7eca2571f859a5536a6023a1baffc6764bcadc6e
Set Test Variable ${runtime_image} quay.io/modh/vllm@sha256:3719efefa24e6634b0cb4ccef25df521a26f363b724c23c37f56629df6111883
ELSE
FAIL msg=Provided GPU type is not yet supported. Only nvidia and amd gpu type are supported
END
Expand Down

0 comments on commit baee508

Please sign in to comment.