Add new runtime images (#2181)

Signed-off-by: Tarun Kumar <takumar@redhat.com>
red-hat-data-services · Jan 20, 2025 · baee508 · baee508
1 parent c8a057d
commit baee508
Show file tree

Hide file tree

Showing 6 changed files with 29 additions and 29 deletions.
diff --git a/ods_ci/tests/Resources/Files/llm/serving_runtimes/caikit_standalone_servingruntime_grpc.yaml b/ods_ci/tests/Resources/Files/llm/serving_runtimes/caikit_standalone_servingruntime_grpc.yaml
@@ -10,7 +10,7 @@ spec:
       name: caikit
   containers:
     - name: kserve-container
-      image: quay.io/modh/caikit-nlp@sha256:3c33185fda84d7bac6715c8743c446a6713cdbc0cb0ed831acc0df89bd8bab6b
+      image: quay.io/modh/caikit-nlp@sha256:d4b045f880cebc2b22de6c1203915be4a07909ca818f3be3df32c28e6abff526
       command: ["python", "-m", "caikit.runtime"]
       env:
         - name: RUNTIME_LOCAL_MODELS_DIR

diff --git a/ods_ci/tests/Resources/Files/llm/serving_runtimes/caikit_standalone_servingruntime_http.yaml b/ods_ci/tests/Resources/Files/llm/serving_runtimes/caikit_standalone_servingruntime_http.yaml
@@ -10,7 +10,7 @@ spec:
       name: caikit
   containers:
     - name: kserve-container
-      image: quay.io/modh/caikit-nlp@sha256:3c33185fda84d7bac6715c8743c446a6713cdbc0cb0ed831acc0df89bd8bab6b
+      image: quay.io/modh/caikit-nlp@sha256:d4b045f880cebc2b22de6c1203915be4a07909ca818f3be3df32c28e6abff526
       command: ["python", "-m", "caikit.runtime"]
       env:
         - name: RUNTIME_LOCAL_MODELS_DIR

diff --git a/ods_ci/tests/Resources/Files/llm/serving_runtimes/caikit_tgis_servingruntime_grpc.yaml b/ods_ci/tests/Resources/Files/llm/serving_runtimes/caikit_tgis_servingruntime_grpc.yaml
@@ -10,7 +10,7 @@ spec:
       name: caikit
   containers:
     - name: kserve-container
-      image: quay.io/modh/text-generation-inference@sha256:bb36bb41cc744a8ff94d537f74c228e8b4e17c2468c50ccd89fc21ecc3940a70
+      image: quay.io/modh/text-generation-inference@sha256:81e55b32d10848b403d6a267a8a5c565d6a025c6a395fc0d99787140fa0fbc88
       command: ["text-generation-launcher"]
       args: ["--model-name=/mnt/models/artifacts/"]
       env:
@@ -23,7 +23,7 @@ spec:
       ## Note: cannot add readiness/liveness probes to this container because knative will refuse them.
       # multi-container probing will be available after https://github.com/knative/serving/pull/14853 is merged
     - name: transformer-container
-      image: quay.io/modh/caikit-tgis-serving@sha256:fe0d1f1233d0b056ca7c690f765b20611e20837465674998e3d293df9b95e838
+      image: quay.io/modh/caikit-tgis-serving@sha256:a20e220608e9a5cb5ffc027bbe017c8d0e1715a3937285f8bc2a905939d57233
       env:
         - name: RUNTIME_LOCAL_MODELS_DIR
           value: /mnt/models

diff --git a/ods_ci/tests/Resources/Files/llm/serving_runtimes/caikit_tgis_servingruntime_http.yaml b/ods_ci/tests/Resources/Files/llm/serving_runtimes/caikit_tgis_servingruntime_http.yaml
@@ -10,7 +10,7 @@ spec:
       name: caikit
   containers:
     - name: kserve-container
-      image: quay.io/modh/text-generation-inference@sha256:bb36bb41cc744a8ff94d537f74c228e8b4e17c2468c50ccd89fc21ecc3940a70
+      image: quay.io/modh/text-generation-inference@sha256:81e55b32d10848b403d6a267a8a5c565d6a025c6a395fc0d99787140fa0fbc88
       command: ["text-generation-launcher"]
       args: ["--model-name=/mnt/models/artifacts/"]
       env:
@@ -21,7 +21,7 @@ spec:
       #     cpu: 8
       #     memory: 16Gi
     - name: transformer-container
-      image: quay.io/modh/caikit-tgis-serving@sha256:fe0d1f1233d0b056ca7c690f765b20611e20837465674998e3d293df9b95e838
+      image: quay.io/modh/caikit-tgis-serving@sha256:a20e220608e9a5cb5ffc027bbe017c8d0e1715a3937285f8bc2a905939d57233
       env:
         - name: TRANSFORMERS_CACHE
           value: /tmp/transformers_cache

diff --git a/ods_ci/tests/Resources/Files/llm/serving_runtimes/tgis_servingruntime_grpc.yaml b/ods_ci/tests/Resources/Files/llm/serving_runtimes/tgis_servingruntime_grpc.yaml
@@ -9,7 +9,7 @@ spec:
       name: pytorch
   containers:
     - name: kserve-container
-      image: quay.io/modh/text-generation-inference@sha256:bb36bb41cc744a8ff94d537f74c228e8b4e17c2468c50ccd89fc21ecc3940a70
+      image: quay.io/modh/text-generation-inference@sha256:81e55b32d10848b403d6a267a8a5c565d6a025c6a395fc0d99787140fa0fbc88
       command: ["text-generation-launcher"]
       args:
         - "--model-name=/mnt/models/"

diff --git a/...ts/Tests/1000__model_serving/1007__model_serving_llm/1007__model_serving_llm_models.robot b/...ts/Tests/1000__model_serving/1007__model_serving_llm/1007__model_serving_llm_models.robot
@@ -218,10 +218,10 @@ Verify User Can Serve And Query A elyza/elyza-japanese-llama-2-7b-instruct Model
     ELSE IF    "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless"
             Query Model Multiple Times    model_name=${model_name}      runtime=${RUNTIME_NAME}    protocol=http
             ...    inference_type=completions    n_times=1    query_idx=10
-            ...    namespace=${test_namespace}    string_check_only=${FALSE}
+            ...    namespace=${test_namespace}    validate_response=${FALSE}
             Query Model Multiple Times    model_name=${model_name}      runtime=${RUNTIME_NAME}    protocol=http
             ...    inference_type=chat-completions    n_times=1    query_idx=9
-            ...    namespace=${test_namespace}    string_check_only=${FALSE}
+            ...    namespace=${test_namespace}    validate_response=${FALSE}
     END
     [Teardown]    Run Keywords
     ...    Clean Up Test Project    test_ns=${test_namespace}
@@ -417,10 +417,10 @@ Verify User Can Serve And Query A meta-llama/llama-2-13b-chat Model    # robocop
     ELSE IF    "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless"
             Query Model Multiple Times    model_name=${model_name}      runtime=${RUNTIME_NAME}    protocol=http
             ...    inference_type=chat-completions    n_times=1    query_idx=12
-            ...    namespace=${test_namespace}    string_check_only=${FALSE}
+            ...    namespace=${test_namespace}    validate_response=${FALSE}
             Query Model Multiple Times    model_name=${model_name}      runtime=${RUNTIME_NAME}    protocol=http
             ...    inference_type=completions    n_times=1    query_idx=11
-            ...    namespace=${test_namespace}    string_check_only=${FALSE}
+            ...    namespace=${test_namespace}   validate_response=${FALSE}
     END
     [Teardown]    Run Keywords
     ...    Clean Up Test Project    test_ns=${test_namespace}
@@ -542,10 +542,10 @@ Verify User Can Serve And Query A instructlab/merlinite-7b-lab Model    # roboco
     ELSE IF    "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless"
             Query Model Multiple Times    model_name=${model_name}      runtime=${RUNTIME_NAME}    protocol=http
             ...    inference_type=chat-completions    n_times=1    query_idx=12
-            ...    namespace=${test_namespace}    string_check_only=${FALSE}
+            ...    namespace=${test_namespace}    validate_response=${FALSE}
             Query Model Multiple Times    model_name=${model_name}      runtime=${RUNTIME_NAME}    protocol=http
             ...    inference_type=completions    n_times=1    query_idx=11
-            ...    namespace=${test_namespace}    string_check_only=${FALSE}
+            ...    namespace=${test_namespace}    validate_response=${FALSE}
     END
     [Teardown]    Run Keywords
     ...    Clean Up Test Project    test_ns=${test_namespace}
@@ -605,10 +605,10 @@ Verify User Can Serve And Query A ibm-granite/granite-8b-code-base Model    # ro
     ELSE IF    "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless"
             Query Model Multiple Times    model_name=${model_name}      runtime=${RUNTIME_NAME}    protocol=http
             ...    inference_type=chat-completions    n_times=1    query_idx=12
-            ...    namespace=${test_namespace}    string_check_only=${FALSE}
+            ...    namespace=${test_namespace}    validate_response=${FALSE}
             Query Model Multiple Times    model_name=${model_name}      runtime=${RUNTIME_NAME}    protocol=http
             ...    inference_type=completions    n_times=1    query_idx=11
-            ...    namespace=${test_namespace}    string_check_only=${FALSE}
+            ...    namespace=${test_namespace}    validate_response=${FALSE}
     END
     [Teardown]    Run Keywords
     ...    Clean Up Test Project    test_ns=${test_namespace}
@@ -712,10 +712,10 @@ Verify User Can Serve And Query A meta-llama/llama-3-8B-Instruct Model    # robo
     ELSE IF    "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless"
             Query Model Multiple Times    model_name=${model_name}      runtime=${RUNTIME_NAME}    protocol=http
             ...    inference_type=chat-completions    n_times=1    query_idx=12
-            ...    namespace=${test_namespace}    string_check_only=${FALSE}
+            ...    namespace=${test_namespace}    validate_response=${FALSE}
             Query Model Multiple Times    model_name=${model_name}      runtime=${RUNTIME_NAME}    protocol=http
             ...    inference_type=completions    n_times=1    query_idx=11
-            ...    namespace=${test_namespace}    string_check_only=${FALSE}
+            ...    namespace=${test_namespace}    validate_response=${FALSE}
     END
     [Teardown]    Run Keywords
     ...    Clean Up Test Project    test_ns=${test_namespace}
@@ -775,10 +775,10 @@ Verify User Can Serve And Query A ibm-granite/granite-3b-code-instruct Model
     ELSE IF    "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless"
             Query Model Multiple Times    model_name=${model_name}      runtime=${RUNTIME_NAME}    protocol=http
             ...    inference_type=chat-completions    n_times=1    query_idx=12
-            ...    namespace=${test_namespace}    string_check_only=${FALSE}
+            ...    namespace=${test_namespace}    validate_response=${FALSE}
             Query Model Multiple Times    model_name=${model_name}      runtime=${RUNTIME_NAME}    protocol=http
             ...    inference_type=completions    n_times=1    query_idx=11
-            ...    namespace=${test_namespace}    string_check_only=${FALSE}
+            ...    namespace=${test_namespace}    validate_response=${FALSE}
     END
     [Teardown]    Run Keywords
     ...    Clean Up Test Project    test_ns=${test_namespace}
@@ -838,10 +838,10 @@ Verify User Can Serve And Query A ibm-granite/granite-8b-code-instruct Model
     ELSE IF    "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless"
             Query Model Multiple Times    model_name=${model_name}      runtime=${RUNTIME_NAME}    protocol=http
             ...    inference_type=chat-completions    n_times=1    query_idx=12
-            ...    namespace=${test_namespace}    string_check_only=${FALSE}
+            ...    namespace=${test_namespace}   validate_response=${FALSE}
             Query Model Multiple Times    model_name=${model_name}      runtime=${RUNTIME_NAME}    protocol=http
             ...    inference_type=completions    n_times=1    query_idx=11
-            ...    namespace=${test_namespace}    string_check_only=${FALSE}
+            ...    namespace=${test_namespace}    validate_response=${FALSE}
     END
     [Teardown]    Run Keywords
     ...    Clean Up Test Project    test_ns=${test_namespace}
@@ -901,10 +901,10 @@ Verify User Can Serve And Query A ibm-granite/granite-7b-lab Model    # robocop:
     ELSE IF    "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless"
             Query Model Multiple Times    model_name=${model_name}      runtime=${RUNTIME_NAME}    protocol=http
             ...    inference_type=chat-completions    n_times=1    query_idx=12
-            ...    namespace=${test_namespace}    string_check_only=${FALSE}
+            ...    namespace=${test_namespace}    validate_response=${FALSE}
             Query Model Multiple Times    model_name=${model_name}      runtime=${RUNTIME_NAME}    protocol=http
             ...    inference_type=completions    n_times=1    query_idx=11
-            ...    namespace=${test_namespace}    string_check_only=${FALSE}
+            ...    namespace=${test_namespace}    validate_response=${FALSE}
     END
     [Teardown]    Run Keywords
     ...    Clean Up Test Project    test_ns=${test_namespace}
@@ -1142,10 +1142,10 @@ Verify User Can Serve And Query RHAL AI granite-7b-starter Model    # robocop: o
     ELSE IF    "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless"
             Query Model Multiple Times    model_name=${model_name}      runtime=${RUNTIME_NAME}    protocol=http
             ...    inference_type=chat-completions    n_times=1    query_idx=12
-            ...    namespace=${test_namespace}    string_check_only=${FALSE}
+            ...    namespace=${test_namespace}     validate_response=${FALSE}
             Query Model Multiple Times    model_name=${model_name}      runtime=${RUNTIME_NAME}    protocol=http
             ...    inference_type=completions    n_times=1    query_idx=11
-            ...    namespace=${test_namespace}    string_check_only=${FALSE}
+            ...    namespace=${test_namespace}     validate_response=${FALSE}
     END
     [Teardown]    Run Keywords
     ...    Clean Up Test Project    test_ns=${test_namespace}
@@ -1275,10 +1275,10 @@ Verify User Can Serve And Query RHAL AI Granite-7b-redhat-lab Model    # robocop
     ELSE IF    "${RUNTIME_NAME}" == "vllm-runtime" and "${KSERVE_MODE}" == "Serverless"
             Query Model Multiple Times    model_name=${model_name}      runtime=${RUNTIME_NAME}    protocol=http
             ...    inference_type=chat-completions    n_times=1    query_idx=12
-            ...    namespace=${test_namespace}    string_check_only=${FALSE}
+            ...    namespace=${test_namespace}    validate_response=${FALSE}
             Query Model Multiple Times    model_name=${model_name}      runtime=${RUNTIME_NAME}    protocol=http
             ...    inference_type=completions    n_times=1    query_idx=11
-            ...    namespace=${test_namespace}    string_check_only=${FALSE}
+            ...    namespace=${test_namespace}   validate_response=${FALSE}
     END
     [Teardown]    Run Keywords
     ...    Clean Up Test Project    test_ns=${test_namespace}
@@ -1347,9 +1347,9 @@ Set Runtime Image
     [Arguments]    ${gpu_type}
     IF  "${RUNTIME_IMAGE}" == "${EMPTY}"
          IF  "${gpu_type}" == "nvidia"
-            Set Test Variable    ${runtime_image}    quay.io/modh/vllm@sha256:c86ff1e89c86bc9821b75d7f2bbc170b3c13e3ccf538bf543b1110f23e056316
+            Set Test Variable    ${runtime_image}    quay.io/modh/vllm@sha256:9689bffacabc38777555de87e0fce0dd95165de3716c68c1aa744358a592ee1f
          ELSE IF    "${gpu_type}" == "amd"
-            Set Test Variable    ${runtime_image}    quay.io/modh/vllm@sha256:10f09eeca822ebe77e127aad7eca2571f859a5536a6023a1baffc6764bcadc6e
+            Set Test Variable    ${runtime_image}    quay.io/modh/vllm@sha256:3719efefa24e6634b0cb4ccef25df521a26f363b724c23c37f56629df6111883
          ELSE
              FAIL   msg=Provided GPU type is not yet supported. Only nvidia and amd gpu type are supported
          END