From e71727d8337db43c4adf71f45efee87d76c65567 Mon Sep 17 00:00:00 2001 From: Helber Belmiro Date: Tue, 28 Jan 2025 15:37:20 -0300 Subject: [PATCH] chore(components): Added kfp_deploy_model_to_kserve_demo Signed-off-by: Helber Belmiro docs(backend): improved backend README (#11511) * improved backend README Signed-off-by: Daniel Dowler <12484302+dandawg@users.noreply.github.com> * Update backend/README.md Co-authored-by: Helber Belmiro Signed-off-by: Daniel Dowler <12484302+dandawg@users.noreply.github.com> * Update backend/README.md Co-authored-by: Helber Belmiro Signed-off-by: Daniel Dowler <12484302+dandawg@users.noreply.github.com> * Update backend/README.md Co-authored-by: Helber Belmiro Signed-off-by: Daniel Dowler <12484302+dandawg@users.noreply.github.com> * Update backend/README.md Co-authored-by: Helber Belmiro Signed-off-by: Daniel Dowler <12484302+dandawg@users.noreply.github.com> --------- Signed-off-by: Daniel Dowler <12484302+dandawg@users.noreply.github.com> Co-authored-by: Helber Belmiro fix(CI): Use the correct image registry for replacements in integration tests (#11564) * Use the correct image registry for replacements in integration tests The image registry was changed to GitHub Container Registry in the 2.4 release. Signed-off-by: mprahl * Print the pod logs when the pods fail to start in integration tests Signed-off-by: mprahl * Fix the sample compilation in the API server container build Signed-off-by: mprahl * Show the output when building the container images in CI Signed-off-by: mprahl --------- Signed-off-by: mprahl feat(api): Add SemaphoreKey and MutexName fields to proto (#11384) Signed-off-by: ddalvi --- .../manifests/argo/kustomization.yaml | 6 +- .../manifests/tekton/kustomization.yaml | 6 +- .github/resources/scripts/build-images.sh | 10 +- .../scripts/kfp-readiness/wait_for_pods.py | 13 + .../go/pipelinespec/pipeline_spec.pb.go | 36 +- api/v2alpha1/pipeline_spec.proto | 6 +- backend/Dockerfile | 4 +- backend/README.md | 94 ++-- .../Containerfile | 15 + .../kfp_deploy_model_to_kserve_demo/README.md | 45 ++ .../component.yaml | 52 ++ .../kservedeployer.py | 462 ++++++++++++++++++ .../manifests/dspa.yaml | 45 ++ .../manifests/kserve-serving-runtime.yaml | 38 ++ .../manifests/kustomization.yaml | 5 + .../manifests/role-binding.yaml | 13 + .../manifests/role.yaml | 9 + .../pipeline.py | 24 + .../pipeline.yaml | 170 +++++++ .../requirements.in | 2 + .../requirements.txt | 200 ++++++++ 21 files changed, 1193 insertions(+), 62 deletions(-) create mode 100644 components/openshift/kserve/kfp_deploy_model_to_kserve_demo/Containerfile create mode 100644 components/openshift/kserve/kfp_deploy_model_to_kserve_demo/README.md create mode 100644 components/openshift/kserve/kfp_deploy_model_to_kserve_demo/component.yaml create mode 100644 components/openshift/kserve/kfp_deploy_model_to_kserve_demo/kservedeployer.py create mode 100644 components/openshift/kserve/kfp_deploy_model_to_kserve_demo/manifests/dspa.yaml create mode 100644 components/openshift/kserve/kfp_deploy_model_to_kserve_demo/manifests/kserve-serving-runtime.yaml create mode 100644 components/openshift/kserve/kfp_deploy_model_to_kserve_demo/manifests/kustomization.yaml create mode 100644 components/openshift/kserve/kfp_deploy_model_to_kserve_demo/manifests/role-binding.yaml create mode 100644 components/openshift/kserve/kfp_deploy_model_to_kserve_demo/manifests/role.yaml create mode 100644 components/openshift/kserve/kfp_deploy_model_to_kserve_demo/pipeline.py create mode 100644 components/openshift/kserve/kfp_deploy_model_to_kserve_demo/pipeline.yaml create mode 100644 components/openshift/kserve/kfp_deploy_model_to_kserve_demo/requirements.in create mode 100644 components/openshift/kserve/kfp_deploy_model_to_kserve_demo/requirements.txt diff --git a/.github/resources/manifests/argo/kustomization.yaml b/.github/resources/manifests/argo/kustomization.yaml index cd2c6bdb0b35..825e2695db81 100644 --- a/.github/resources/manifests/argo/kustomization.yaml +++ b/.github/resources/manifests/argo/kustomization.yaml @@ -5,13 +5,13 @@ resources: - ../../../../manifests/kustomize/env/platform-agnostic images: -- name: gcr.io/ml-pipeline/api-server +- name: ghcr.io/kubeflow/kfp-api-server newName: kind-registry:5000/apiserver newTag: latest -- name: gcr.io/ml-pipeline/persistenceagent +- name: ghcr.io/kubeflow/kfp-persistence-agent newName: kind-registry:5000/persistenceagent newTag: latest -- name: gcr.io/ml-pipeline/scheduledworkflow +- name: ghcr.io/kubeflow/kfp-scheduled-workflow-controller newName: kind-registry:5000/scheduledworkflow newTag: latest diff --git a/.github/resources/manifests/tekton/kustomization.yaml b/.github/resources/manifests/tekton/kustomization.yaml index a86686a70b1d..391a26b93672 100644 --- a/.github/resources/manifests/tekton/kustomization.yaml +++ b/.github/resources/manifests/tekton/kustomization.yaml @@ -14,13 +14,13 @@ resources: # when application is deleted. images: -- name: gcr.io/ml-pipeline/api-server +- name: ghcr.io/kubeflow/kfp-api-server newName: kind-registry:5000/apiserver newTag: latest -- name: gcr.io/ml-pipeline/persistenceagent +- name: ghcr.io/kubeflow/kfp-persistence-agent newName: kind-registry:5000/persistenceagent newTag: latest -- name: gcr.io/ml-pipeline/scheduledworkflow +- name: ghcr.io/kubeflow/kfp-scheduled-workflow-controller newName: kind-registry:5000/scheduledworkflow newTag: latest - name: '*/aipipeline/tekton-exithandler-controller' diff --git a/.github/resources/scripts/build-images.sh b/.github/resources/scripts/build-images.sh index a70d295c2910..7cb06b3a037e 100755 --- a/.github/resources/scripts/build-images.sh +++ b/.github/resources/scripts/build-images.sh @@ -25,35 +25,35 @@ EXIT_CODE=0 docker system prune -a -f -docker build -q -t "${REGISTRY}/apiserver:${TAG}" -f backend/Dockerfile . && docker push "${REGISTRY}/apiserver:${TAG}" || EXIT_CODE=$? +docker build --progress=plain -t "${REGISTRY}/apiserver:${TAG}" -f backend/Dockerfile . && docker push "${REGISTRY}/apiserver:${TAG}" || EXIT_CODE=$? if [[ $EXIT_CODE -ne 0 ]] then echo "Failed to build apiserver image." exit $EXIT_CODE fi -docker build -q -t "${REGISTRY}/persistenceagent:${TAG}" -f backend/Dockerfile.persistenceagent . && docker push "${REGISTRY}/persistenceagent:${TAG}" || EXIT_CODE=$? +docker build --progress=plain -t "${REGISTRY}/persistenceagent:${TAG}" -f backend/Dockerfile.persistenceagent . && docker push "${REGISTRY}/persistenceagent:${TAG}" || EXIT_CODE=$? if [[ $EXIT_CODE -ne 0 ]] then echo "Failed to build persistenceagent image." exit $EXIT_CODE fi -docker build -q -t "${REGISTRY}/scheduledworkflow:${TAG}" -f backend/Dockerfile.scheduledworkflow . && docker push "${REGISTRY}/scheduledworkflow:${TAG}" || EXIT_CODE=$? +docker build --progress=plain -t "${REGISTRY}/scheduledworkflow:${TAG}" -f backend/Dockerfile.scheduledworkflow . && docker push "${REGISTRY}/scheduledworkflow:${TAG}" || EXIT_CODE=$? if [[ $EXIT_CODE -ne 0 ]] then echo "Failed to build scheduledworkflow image." exit $EXIT_CODE fi -docker build -q -t "${REGISTRY}/driver:${TAG}" -f backend/Dockerfile.driver . && docker push "${REGISTRY}/driver:${TAG}" || EXIT_CODE=$? +docker build --progress=plain -t "${REGISTRY}/driver:${TAG}" -f backend/Dockerfile.driver . && docker push "${REGISTRY}/driver:${TAG}" || EXIT_CODE=$? if [[ $EXIT_CODE -ne 0 ]] then echo "Failed to build driver image." exit $EXIT_CODE fi -docker build -q -t "${REGISTRY}/launcher:${TAG}" -f backend/Dockerfile.launcher . && docker push "${REGISTRY}/launcher:${TAG}" || EXIT_CODE=$? +docker build --progress=plain -t "${REGISTRY}/launcher:${TAG}" -f backend/Dockerfile.launcher . && docker push "${REGISTRY}/launcher:${TAG}" || EXIT_CODE=$? if [[ $EXIT_CODE -ne 0 ]] then echo "Failed to build launcher image." diff --git a/.github/resources/scripts/kfp-readiness/wait_for_pods.py b/.github/resources/scripts/kfp-readiness/wait_for_pods.py index ebc7546a300f..cc405bcbe213 100644 --- a/.github/resources/scripts/kfp-readiness/wait_for_pods.py +++ b/.github/resources/scripts/kfp-readiness/wait_for_pods.py @@ -13,6 +13,17 @@ config.load_kube_config() v1 = client.CoreV1Api() +def log_pods(): + pods = v1.list_namespaced_pod(namespace=namespace) + + for pod in pods.items: + try: + logging.info( + f"---- Pod {namespace}/{pod.metadata.name} logs ----\n" + + v1.read_namespaced_pod_log(pod.metadata.name, namespace) + ) + except client.exceptions.ApiException: + continue def get_pod_statuses(): pods = v1.list_namespaced_pod(namespace=namespace) @@ -74,6 +85,8 @@ def check_pods(calm_time=10, timeout=600, retries_after_ready=5): logging.info(f"Pods are still stabilizing. Retrying in {calm_time} seconds...") time.sleep(calm_time) else: + log_pods() + raise Exception("Pods did not stabilize within the timeout period.") logging.info("Final pod statuses:") diff --git a/api/v2alpha1/go/pipelinespec/pipeline_spec.pb.go b/api/v2alpha1/go/pipelinespec/pipeline_spec.pb.go index b4bca0cec5ff..8a6b64ab5d87 100644 --- a/api/v2alpha1/go/pipelinespec/pipeline_spec.pb.go +++ b/api/v2alpha1/go/pipelinespec/pipeline_spec.pb.go @@ -2525,6 +2525,11 @@ type PipelineConfig struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields + + // Name of the semaphore key to control pipeline concurrency + SemaphoreKey string `protobuf:"bytes,1,opt,name=semaphore_key,json=semaphoreKey,proto3" json:"semaphore_key,omitempty"` + // Name of the mutex to ensure mutual exclusion + MutexName string `protobuf:"bytes,2,opt,name=mutex_name,json=mutexName,proto3" json:"mutex_name,omitempty"` } func (x *PipelineConfig) Reset() { @@ -2559,6 +2564,20 @@ func (*PipelineConfig) Descriptor() ([]byte, []int) { return file_pipeline_spec_proto_rawDescGZIP(), []int{30} } +func (x *PipelineConfig) GetSemaphoreKey() string { + if x != nil { + return x.SemaphoreKey + } + return "" +} + +func (x *PipelineConfig) GetMutexName() string { + if x != nil { + return x.MutexName + } + return "" +} + // The runtime config of a PipelineJob. type PipelineJob_RuntimeConfig struct { state protoimpl.MessageState @@ -6523,12 +6542,17 @@ var file_pipeline_spec_proto_rawDesc = []byte{ 0x6b, 0x65, 0x79, 0x12, 0x2d, 0x0a, 0x05, 0x76, 0x61, 0x6c, 0x75, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x17, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x53, 0x74, 0x72, 0x75, 0x63, 0x74, 0x52, 0x05, 0x76, 0x61, 0x6c, - 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x10, 0x0a, 0x0e, 0x50, 0x69, 0x70, 0x65, 0x6c, 0x69, - 0x6e, 0x65, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x42, 0x3c, 0x5a, 0x3a, 0x67, 0x69, 0x74, 0x68, - 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6b, 0x75, 0x62, 0x65, 0x66, 0x6c, 0x6f, 0x77, 0x2f, - 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x73, 0x2f, 0x61, 0x70, 0x69, 0x2f, 0x76, 0x32, - 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2f, 0x67, 0x6f, 0x2f, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, - 0x6e, 0x65, 0x73, 0x70, 0x65, 0x63, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x75, 0x65, 0x3a, 0x02, 0x38, 0x01, 0x22, 0x54, 0x0a, 0x0e, 0x50, 0x69, 0x70, 0x65, 0x6c, 0x69, + 0x6e, 0x65, 0x43, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x12, 0x23, 0x0a, 0x0d, 0x73, 0x65, 0x6d, 0x61, + 0x70, 0x68, 0x6f, 0x72, 0x65, 0x5f, 0x6b, 0x65, 0x79, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x0c, 0x73, 0x65, 0x6d, 0x61, 0x70, 0x68, 0x6f, 0x72, 0x65, 0x4b, 0x65, 0x79, 0x12, 0x1d, 0x0a, + 0x0a, 0x6d, 0x75, 0x74, 0x65, 0x78, 0x5f, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x09, 0x6d, 0x75, 0x74, 0x65, 0x78, 0x4e, 0x61, 0x6d, 0x65, 0x42, 0x3c, 0x5a, 0x3a, + 0x67, 0x69, 0x74, 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x6b, 0x75, 0x62, 0x65, 0x66, + 0x6c, 0x6f, 0x77, 0x2f, 0x70, 0x69, 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x73, 0x2f, 0x61, 0x70, + 0x69, 0x2f, 0x76, 0x32, 0x61, 0x6c, 0x70, 0x68, 0x61, 0x31, 0x2f, 0x67, 0x6f, 0x2f, 0x70, 0x69, + 0x70, 0x65, 0x6c, 0x69, 0x6e, 0x65, 0x73, 0x70, 0x65, 0x63, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x33, } var ( diff --git a/api/v2alpha1/pipeline_spec.proto b/api/v2alpha1/pipeline_spec.proto index 5393b2e222bf..f85c6b5e8984 100644 --- a/api/v2alpha1/pipeline_spec.proto +++ b/api/v2alpha1/pipeline_spec.proto @@ -1106,5 +1106,9 @@ message PlatformDeploymentConfig { // Spec for pipeline-level config options. See PipelineConfig DSL class. message PipelineConfig { - // TODO add pipeline-level configs + // Name of the semaphore key to control pipeline concurrency + string semaphore_key = 1; + + // Name of the mutex to ensure mutual exclusion + string mutex_name = 2; } diff --git a/backend/Dockerfile b/backend/Dockerfile index 22d917aa24d6..082f910305fe 100644 --- a/backend/Dockerfile +++ b/backend/Dockerfile @@ -54,9 +54,9 @@ COPY backend/src/apiserver/config/sample_config.json /samples/ # Compiling the preloaded samples. # The default image is replaced with the GCR-hosted python image. RUN set -e; \ - < /samples/sample_config.json jq .[].file --raw-output | while read pipeline_yaml; do \ + < /samples/sample_config.json jq ".pipelines[].file" --raw-output | while read pipeline_yaml; do \ pipeline_py="${pipeline_yaml%.yaml}"; \ - python3 "$pipeline_py"; \ + echo "Compiling: \"$pipeline_py\"" && python3 "$pipeline_py" && echo -n "Output: " && ls "$pipeline_py.yaml"; \ done # 3. Start api web server diff --git a/backend/README.md b/backend/README.md index 86cd4e896fc4..a6c0f9b82c46 100644 --- a/backend/README.md +++ b/backend/README.md @@ -1,6 +1,20 @@ +# Kubeflow Pipelines Backend + +## Overview + This directory contains code for the components that comprise the Kubeflow Pipelines backend. +This README will help you set up your coding environment in order to build and run the Kubeflow Pipelines backend. The KFP backend powers the core functionality of the KFP platform, handling API requests, workflow management, and data persistence. + +## Prerequisites +Before you begin, ensure you have: +- Go programming language installed +- [go-licenses tool](../hack/install-go-licenses.sh) +- Docker or Podman installed (for building container images) + +Note that you may need to restart your shell after installing these resources in order for the changes to take effect. + ## Building & Testing To run all unittests for backend: @@ -15,56 +29,38 @@ The API server itself can be built using: go build -o /tmp/apiserver backend/src/apiserver/*.go ``` -## Code Style - -Backend codebase follows the [Google's Go Style Guide](https://google.github.io/styleguide/go/). Please, take time to get familiar with the [best practices](https://google.github.io/styleguide/go/best-practices). It is not intended to be exhaustive, but it often helps minimizing guesswork among developers and keep codebase uniform and consistent. - -We use [golangci-lint](https://golangci-lint.run/) tool that can catch common mistakes locally (see detailed configuration [here](https://github.com/kubeflow/pipelines/blob/master/.golangci.yaml)). It can be [conveniently integrated](https://golangci-lint.run/usage/integrations/) with multiple popular IDEs such as VS Code or Vim. - -Finally, it is advised to install [pre-commit](https://pre-commit.com/) in order to automate linter checks (see configuration [here](https://github.com/kubeflow/pipelines/blob/master/.pre-commit-config.yaml)) - -## Building APIServer image locally - The API server image can be built from the root folder of the repo using: ``` export API_SERVER_IMAGE=api_server docker build -f backend/Dockerfile . --tag $API_SERVER_IMAGE ``` -## Deploy APIServer with the image you own build +### Deploying the APIServer (from the image you built) on Kubernetes -Run +First, push your image to a registry that is accessible from your Kubernetes cluster. + +Then, run: ``` kubectl edit deployment.v1.apps/ml-pipeline -n kubeflow ``` -You'll see the field reference the api server docker image. +You'll see the field reference the api server container image (`spec.containers[0].image: gcr.io/ml-pipeline/api-server:`). Change it to point to your own build, after saving and closing the file, apiserver will restart with your change. -## Building client library and swagger files +### Building client library and swagger files After making changes to proto files, the Go client libraries, Python client libraries and swagger files need to be regenerated and checked-in. Refer to [backend/api](./api/README.md) for details. -## Updating licenses info - -1. [Install go-licenses tool](../hack/install-go-licenses.sh) and refer to [its documentation](https://github.com/google/go-licenses) for how to use it. +### Updating licenses info +1. [Install go-licenses tool](../hack/install-go-licenses.sh) (if you haven't already) and refer to [its documentation](https://github.com/google/go-licenses) for how to use it. 2. Run the tool to update all licenses: ```bash - make all + make -C backend all ``` -## Updating python dependencies - -[pip-tools](https://github.com/jazzband/pip-tools) is used to manage python -dependencies. To update dependencies, edit [requirements.in](requirements.in) -and run `./update_requirements.sh` to update and pin the transitive -dependencies. - -# Visualization Server Instructions - -## Updating python dependencies +### Updating python dependencies [pip-tools](https://github.com/jazzband/pip-tools) is used to manage python dependencies. To update dependencies, edit [requirements.in](requirements.in) @@ -72,7 +68,7 @@ and run `./update_requirements.sh` to update and pin the transitive dependencies. -## Building conformance tests (WIP) +### Building conformance tests (WIP) Run ``` @@ -81,7 +77,7 @@ docker build . -f backend/Dockerfile.conformance -t ## API Server Development -### Run Locally With a Kind Cluster +### Run the KFP Backend Locally With a Kind Cluster This deploys a local Kubernetes cluster leveraging [kind](https://kind.sigs.k8s.io/), with all the components required to run the Kubeflow Pipelines API server. Note that the `ml-pipeline` `Deployment` (API server) has its replicas set to @@ -99,6 +95,7 @@ pods on the cluster using the `ml-pipeline` `Service`. network interface through Docker/Podman Desktop. See [kind #1200](https://github.com/kubernetes-sigs/kind/issues/1200#issuecomment-1304855791) for an example manifest. * Optional: VSCode is installed to leverage a sample `launch.json` file. + * This relies on dlv: (go install -v github.com/go-delve/delve/cmd/dlv@latest) #### Provisioning the Cluster @@ -111,15 +108,9 @@ make -C backend dev-kind-cluster This may take several minutes since there are many pods. Note that many pods will be in "CrashLoopBackOff" status until all the pods have started. -#### Deleting the Cluster - -Run the following to delete the cluster: +Also, note that the config in the `make` command above sets the `ml-pipeline` `Deployment` (api server) to have 0 replicas. The intent is to replace it with a locally running API server for debugging and faster development. See the following steps to run the API server locally, and connect it to the KFP backend on your Kind cluster. Note that other backend components (for example, the persistence agent) may show errors until the API server is brought up and connected to the cluster. -```bash -kind delete clusters dev-pipelines-api -``` - -#### Launch the API Server With VSCode +#### Launching the API Server With VSCode After the cluster is provisioned, you may leverage the following sample `.vscode/launch.json` file to run the API server locally: @@ -168,12 +159,12 @@ You can also directly connect to the MariaDB database server with: mysql -h 127.0.0.1 -u root ``` -## Remote Debug the Driver +### Remote Debug the Driver These instructions assume you are leveraging the Kind cluster in the [Run Locally With a Kind Cluster](#run-locally-with-a-kind-cluster) section. -### Build the Driver Image With Debug Prerequisites +#### Build the Driver Image With Debug Prerequisites Run the following to create the `backend/Dockerfile.driver-debug` file and build the container image tagged as `kfp-driver:debug`. This container image is based on `backend/Dockerfile.driver` but installs @@ -197,7 +188,7 @@ Alternatively, you can use this Make target that does both. make -C kind-build-and-load-driver-debug ``` -### Run the API Server With Debug Configuration +#### Run the API Server With Debug Configuration You may use the following VS Code `launch.json` file to run the API server which overrides the Driver command to use Delve and the Driver image to use debug image built previously. @@ -229,7 +220,7 @@ command to use Delve and the Driver image to use debug image built previously. } ``` -### Starting a Remote Debug Session +#### Starting a Remote Debug Session Start by launching a pipeline. This will eventually create a Driver pod that is waiting for a remote debug connection. @@ -273,3 +264,22 @@ For debugging a specific Driver pod, you'll need to continuously port forward an without a breakpoint so that Delve will continue execution until the Driver pod you are interested in starts up. At that point, you can set a break point, port forward, and connect to the remote debug session to debug that specific Driver pod. + +### Deleting the Kind Cluster + +Run the following to delete the cluster (once you are finished): + +```bash +kind delete clusters dev-pipelines-api +``` + +## Contributing +### Code Style + +Backend codebase follows the [Google's Go Style Guide](https://google.github.io/styleguide/go/). Please, take time to get familiar with the [best practices](https://google.github.io/styleguide/go/best-practices). It is not intended to be exhaustive, but it often helps minimizing guesswork among developers and keep codebase uniform and consistent. + +We use [golangci-lint](https://golangci-lint.run/) tool that can catch common mistakes locally (see detailed configuration [here](https://github.com/kubeflow/pipelines/blob/master/.golangci.yaml)). It can be [conveniently integrated](https://golangci-lint.run/usage/integrations/) with multiple popular IDEs such as VS Code or Vim. + +Finally, it is advised to install [pre-commit](https://pre-commit.com/) in order to automate linter checks (see configuration [here](https://github.com/kubeflow/pipelines/blob/master/.pre-commit-config.yaml)) + + diff --git a/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/Containerfile b/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/Containerfile new file mode 100644 index 000000000000..7fd7d1ef792c --- /dev/null +++ b/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/Containerfile @@ -0,0 +1,15 @@ +FROM python:3.9-slim-bullseye +RUN apt-get update && apt-get install -y gcc python3-dev + +COPY requirements.txt . +RUN pip install --upgrade pip +RUN python3 -m pip install --upgrade -r \ + requirements.txt --quiet --no-cache-dir \ + && rm -f requirements.txt + +ENV APP_HOME /app +COPY kservedeployer.py $APP_HOME/kservedeployer.py +WORKDIR $APP_HOME + +ENTRYPOINT ["python"] +CMD ["kservedeployer.py"] diff --git a/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/README.md b/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/README.md new file mode 100644 index 000000000000..4155b4e88708 --- /dev/null +++ b/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/README.md @@ -0,0 +1,45 @@ +# Using Data Science Pipelines to deploy a model to KServe in OpenShift AI + +This example is based on https://github.com/kubeflow/pipelines/tree/b4ecbabbba1ac3c7cf0e762a48e9b8fcde239911/components/kserve. + +In a cluster with the following operators installed: + +* Red Hat OpenShift AI + * Create a `DataScienceCluster` instance +* Red Hat Authorino +* Red Hat OpenShift Service Mesh +* Red Hat OpenShift Serverless + +1. Set a namespace and deploy the manifests: + + ```shell + export NAMESPACE= + kustomize build manifests | envsubst | oc apply -f - + ``` + +2. Install the required Python dependencies + + ```shell + pip install -r requirements.txt + ``` + +3. Compile the pipeline + + ```shell + kfp dsl compile --py pipeline.py --output pipeline.yaml + ``` + +4. Deploy the compiled pipeline (`pipeline.yaml`) in the Red Hat OpenShift AI console +5. Run the pipeline in the Red Hat OpenShift AI console +6. When the pipeline completes you should be able to see the `example-precictor` pod and the `InferenceService` + + ```shell + oc get pods | grep 'example-predictor' + example-predictor-00001-deployment-7c5bf67574-p6rrs 2/2 Running 0 8m18s + ``` + + ```shell + oc get inferenceservice + NAME URL READY PREV LATEST PREVROLLEDOUTREVISION LATESTREADYREVISION AGE + example https://something.openshiftapps.com True 100 example-predictor-00001 12m + ``` \ No newline at end of file diff --git a/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/component.yaml b/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/component.yaml new file mode 100644 index 000000000000..abf16a7a9bf8 --- /dev/null +++ b/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/component.yaml @@ -0,0 +1,52 @@ +name: Serve a model with KServe +description: Serve Models using KServe +inputs: + - {name: Action, type: String, default: 'create', description: 'Action to execute on KServe'} + - {name: Model Name, type: String, default: '', description: 'Name to give to the deployed model'} + - {name: Model URI, type: String, default: '', description: 'Path of the S3 or GCS compatible directory containing the model.'} + - {name: Canary Traffic Percent, type: String, default: '100', description: 'The traffic split percentage between the candidate model and the last ready model'} + - {name: Namespace, type: String, default: '', description: 'Kubernetes namespace where the KServe service is deployed.'} + - {name: Framework, type: String, default: '', description: 'Machine Learning Framework for Model Serving.'} + - {name: Runtime Version, type: String, default: 'latest', description: 'Runtime Version of Machine Learning Framework'} + - {name: Resource Requests, type: String, default: '{"cpu": "0.5", "memory": "512Mi"}', description: 'CPU and Memory requests for Model Serving'} + - {name: Resource Limits, type: String, default: '{"cpu": "1", "memory": "1Gi"}', description: 'CPU and Memory limits for Model Serving'} + - {name: Custom Model Spec, type: String, default: '{}', description: 'Custom model runtime container spec in JSON'} + - {name: Autoscaling Target, type: String, default: '0', description: 'Autoscaling Target Number'} + - {name: Service Account, type: String, default: '', description: 'ServiceAccount to use to run the InferenceService pod'} + - {name: Enable Istio Sidecar, type: Bool, default: 'True', description: 'Whether to enable istio sidecar injection'} + - {name: InferenceService YAML, type: String, default: '{}', description: 'Raw InferenceService serialized YAML for deployment'} + - {name: Watch Timeout, type: String, default: '300', description: "Timeout seconds for watching until InferenceService becomes ready."} + - {name: Min Replicas, type: String, default: '-1', description: 'Minimum number of InferenceService replicas'} + - {name: Max Replicas, type: String, default: '-1', description: 'Maximum number of InferenceService replicas'} + - {name: Request Timeout, type: String, default: '60', description: "Specifies the number of seconds to wait before timing out a request to the component."} + - {name: Enable ISVC Status, type: Bool, default: 'True', description: "Specifies whether to store the inference service status as the output parameter"} + +outputs: + - {name: InferenceService Status, type: String, description: 'Status JSON output of InferenceService'} +implementation: + container: + image: quay.io/hbelmiro/kfp_deploy_model_to_kserve_demo:v0.0.3 + command: ['python'] + args: [ + -u, kservedeployer.py, + --action, {inputValue: Action}, + --model-name, {inputValue: Model Name}, + --model-uri, {inputValue: Model URI}, + --canary-traffic-percent, {inputValue: Canary Traffic Percent}, + --namespace, {inputValue: Namespace}, + --framework, {inputValue: Framework}, + --runtime-version, {inputValue: Runtime Version}, + --resource-requests, {inputValue: Resource Requests}, + --resource-limits, {inputValue: Resource Limits}, + --custom-model-spec, {inputValue: Custom Model Spec}, + --autoscaling-target, {inputValue: Autoscaling Target}, + --service-account, {inputValue: Service Account}, + --enable-istio-sidecar, {inputValue: Enable Istio Sidecar}, + --output-path, {outputPath: InferenceService Status}, + --inferenceservice-yaml, {inputValue: InferenceService YAML}, + --watch-timeout, {inputValue: Watch Timeout}, + --min-replicas, {inputValue: Min Replicas}, + --max-replicas, {inputValue: Max Replicas}, + --request-timeout, {inputValue: Request Timeout}, + --enable-isvc-status, {inputValue: Enable ISVC Status} + ] diff --git a/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/kservedeployer.py b/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/kservedeployer.py new file mode 100644 index 000000000000..1e635af007e7 --- /dev/null +++ b/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/kservedeployer.py @@ -0,0 +1,462 @@ +import argparse +from distutils.util import strtobool +import json +import os +import sys +import time +import yaml + +from kubernetes import client +from kubernetes.client.models import V1ResourceRequirements + +from kserve import constants +from kserve import KServeClient +from kserve import V1beta1InferenceService +from kserve import V1beta1InferenceServiceSpec +from kserve import V1beta1LightGBMSpec +from kserve import V1beta1ONNXRuntimeSpec +from kserve import V1beta1PMMLSpec +from kserve import V1beta1PredictorSpec +from kserve import V1beta1SKLearnSpec +from kserve import V1beta1TFServingSpec +from kserve import V1beta1TorchServeSpec +from kserve import V1beta1TritonSpec +from kserve import V1beta1XGBoostSpec +from kserve.api.watch import isvc_watch + + +AVAILABLE_FRAMEWORKS = { + 'tensorflow': V1beta1TFServingSpec, + 'pytorch': V1beta1TorchServeSpec, + 'sklearn': V1beta1SKLearnSpec, + 'xgboost': V1beta1XGBoostSpec, + 'onnx': V1beta1ONNXRuntimeSpec, + 'triton': V1beta1TritonSpec, + 'pmml': V1beta1PMMLSpec, + 'lightgbm': V1beta1LightGBMSpec +} + + +def create_predictor_spec(framework, runtime_version, resource_requests, resource_limits, + storage_uri, canary_traffic_percent, service_account, min_replicas, + max_replicas, containers, request_timeout): + """ + Create and return V1beta1PredictorSpec to be used in a V1beta1InferenceServiceSpec + object. + """ + + predictor_spec = V1beta1PredictorSpec( + service_account_name=service_account, + min_replicas=(min_replicas + if min_replicas >= 0 + else None + ), + max_replicas=(max_replicas + if max_replicas > 0 and max_replicas >= min_replicas + else None + ), + containers=(containers or None), + canary_traffic_percent=canary_traffic_percent, + timeout=request_timeout + ) + # If the containers field was set, then this is custom model serving. + if containers: + return predictor_spec + + if framework not in AVAILABLE_FRAMEWORKS: + raise ValueError("Error: No matching framework: " + framework) + + setattr( + predictor_spec, + framework, + AVAILABLE_FRAMEWORKS[framework]( + storage_uri=storage_uri, + resources=V1ResourceRequirements( + requests=resource_requests, + limits=resource_limits + ), + runtime_version=runtime_version + ) + ) + return predictor_spec + + +def create_custom_container_spec(custom_model_spec): + """ + Given a JSON container spec, return a V1Container object + representing the container. This is used for passing in + custom server images. The expected format for the input is: + + { "image": "test/containerimage", + "port":5000, + "name": "custom-container" } + """ + + env = ( + [ + client.V1EnvVar(name=i["name"], value=i["value"]) + for i in custom_model_spec["env"] + ] + if custom_model_spec.get("env", "") + else None + ) + ports = ( + [client.V1ContainerPort(container_port=int(custom_model_spec.get("port", "")), protocol="TCP")] + if custom_model_spec.get("port", "") + else None + ) + resources = ( + client.V1ResourceRequirements( + requests=(custom_model_spec["resources"]["requests"] + if custom_model_spec.get('resources', {}).get('requests') + else None + ), + limits=(custom_model_spec["resources"]["limits"] + if custom_model_spec.get('resources', {}).get('limits') + else None + ), + ) + if custom_model_spec.get("resources", {}) + else None + ) + return client.V1Container( + name=custom_model_spec.get("name", "custom-container"), + image=custom_model_spec["image"], + env=env, + ports=ports, + command=custom_model_spec.get("command", None), + args=custom_model_spec.get("args", None), + image_pull_policy=custom_model_spec.get("image_pull_policy", None), + working_dir=custom_model_spec.get("working_dir", None), + resources=resources + ) + + +def create_inference_service(metadata, predictor_spec): + """ + Build and return V1beta1InferenceService object. + """ + return V1beta1InferenceService( + api_version=constants.KSERVE_V1BETA1, + kind=constants.KSERVE_KIND, + metadata=metadata, + spec=V1beta1InferenceServiceSpec( + predictor=predictor_spec + ), + ) + + +def submit_api_request(kserve_client, action, name, isvc, namespace=None, + watch=False, timeout_seconds=300): + """ + Creates or updates a Kubernetes custom object. This code is borrowed from the + KServeClient.create/patch methods as using those directly doesn't allow for + sending in dicts as the InferenceService object which is needed for supporting passing + in raw InferenceService serialized YAML. + """ + custom_obj_api = kserve_client.api_instance + args = [constants.KSERVE_GROUP, constants.KSERVE_V1BETA1_VERSION, + namespace, constants.KSERVE_PLURAL] + if action == 'update': + outputs = custom_obj_api.patch_namespaced_custom_object(*args, name, isvc) + else: + outputs = custom_obj_api.create_namespaced_custom_object(*args, isvc) + + if watch: + # Sleep 3 to avoid status still be True within a very short time. + time.sleep(3) + isvc_watch( + name=outputs['metadata']['name'], + namespace=namespace, + timeout_seconds=timeout_seconds) + else: + return outputs + + +def perform_action(action, model_name, model_uri, canary_traffic_percent, namespace, framework, + runtime_version, resource_requests, resource_limits, custom_model_spec, + service_account, inferenceservice_yaml, request_timeout, autoscaling_target=0, + enable_istio_sidecar=True, watch_timeout=300, min_replicas=0, max_replicas=0): + """ + Perform the specified action. If the action is not 'delete' and `inferenceService_yaml` + was provided, the dict representation of the YAML will be sent directly to the + Kubernetes API. Otherwise, a V1beta1InferenceService object will be built using the + provided input and then sent for creation/update. + :return InferenceService JSON output + """ + kserve_client = KServeClient() + + if inferenceservice_yaml: + # Overwrite name and namespace if exists + if namespace: + inferenceservice_yaml['metadata']['namespace'] = namespace + + if model_name: + inferenceservice_yaml['metadata']['name'] = model_name + else: + model_name = inferenceservice_yaml['metadata']['name'] + + isvc = inferenceservice_yaml + + elif action != 'delete': + # Create annotations + annotations = {} + if int(autoscaling_target) != 0: + annotations["autoscaling.knative.dev/target"] = str(autoscaling_target) + if not enable_istio_sidecar: + annotations["sidecar.istio.io/inject"] = 'false' + if not annotations: + annotations = None + metadata = client.V1ObjectMeta( + name=model_name, namespace=namespace, annotations=annotations + ) + + # If a custom model container spec was provided, build the V1Container + # object using it. + containers = [] + if custom_model_spec: + containers = [create_custom_container_spec(custom_model_spec)] + + # Build the V1beta1PredictorSpec. + predictor_spec = create_predictor_spec( + framework, runtime_version, resource_requests, resource_limits, + model_uri, canary_traffic_percent, service_account, min_replicas, + max_replicas, containers, request_timeout + ) + + isvc = create_inference_service(metadata, predictor_spec) + + if action == "create": + submit_api_request(kserve_client, 'create', model_name, isvc, namespace, + watch=True, timeout_seconds=watch_timeout) + elif action == "update": + submit_api_request(kserve_client, 'update', model_name, isvc, namespace, + watch=True, timeout_seconds=watch_timeout) + elif action == "apply": + try: + submit_api_request(kserve_client, 'create', model_name, isvc, namespace, + watch=True, timeout_seconds=watch_timeout) + except Exception: + submit_api_request(kserve_client, 'update', model_name, isvc, namespace, + watch=True, timeout_seconds=watch_timeout) + elif action == "delete": + kserve_client.delete(model_name, namespace=namespace) + else: + raise ("Error: No matching action: " + action) + + model_status = kserve_client.get(model_name, namespace=namespace) + return model_status + + +def main(): + """ + This parses arguments passed in from the CLI and performs the corresponding action. + """ + parser = argparse.ArgumentParser() + parser.add_argument( + "--action", type=str, help="Action to execute on KServe", default="create" + ) + parser.add_argument( + "--model-name", type=str, help="Name to give to the deployed model" + ) + parser.add_argument( + "--model-uri", + type=str, + help="Path of the S3, GCS or PVC directory containing the model", + ) + parser.add_argument( + "--canary-traffic-percent", + type=str, + help="The traffic split percentage between the candidate model and the last ready model", + default="100", + ) + parser.add_argument( + "--namespace", + type=str, + help="Kubernetes namespace where the KServe service is deployed", + default="", + ) + parser.add_argument( + "--framework", + type=str, + help="Model serving framework to use. Available frameworks: " + + str(list(AVAILABLE_FRAMEWORKS.keys())), + default="" + ) + parser.add_argument( + "--runtime-version", + type=str, + help="Runtime Version of Machine Learning Framework", + default="latest" + ) + parser.add_argument( + "--resource-requests", + type=json.loads, + help="CPU and Memory requests for Model Serving", + default='{"cpu": "0.5", "memory": "512Mi"}', + ) + parser.add_argument( + "--resource-limits", + type=json.loads, + help="CPU and Memory limits for Model Serving", + default='{"cpu": "1", "memory": "1Gi"}', + ) + parser.add_argument( + "--custom-model-spec", + type=json.loads, + help="The container spec for a custom model runtime", + default="{}", + ) + parser.add_argument( + "--autoscaling-target", type=str, help="Autoscaling target number", default="0" + ) + parser.add_argument( + "--service-account", + type=str, + help="Service account containing s3 credentials", + default="", + ) + parser.add_argument( + "--enable-istio-sidecar", + type=strtobool, + help="Whether to inject istio sidecar", + default="True" + ) + parser.add_argument( + "--inferenceservice-yaml", + type=yaml.safe_load, + help="Raw InferenceService serialized YAML for deployment", + default="{}" + ) + parser.add_argument("--output-path", type=str, help="Path to store URI output") + parser.add_argument("--watch-timeout", + type=str, + help="Timeout seconds for watching until InferenceService becomes ready.", + default="300") + parser.add_argument( + "--min-replicas", type=str, help="Minimum number of replicas", default="-1" + ) + parser.add_argument( + "--max-replicas", type=str, help="Maximum number of replicas", default="-1" + ) + parser.add_argument("--request-timeout", + type=str, + help="Specifies the number of seconds to wait before timing out a request to the component.", + default="60") + parser.add_argument("--enable-isvc-status", + type=strtobool, + help="Specifies whether to store the inference service status as the output parameter", + default="True") + + args = parser.parse_args() + + action = args.action.lower() + model_name = args.model_name + model_uri = args.model_uri + canary_traffic_percent = int(args.canary_traffic_percent) + namespace = args.namespace + framework = args.framework.lower() + runtime_version = args.runtime_version.lower() + resource_requests = args.resource_requests + resource_limits = args.resource_limits + output_path = args.output_path + custom_model_spec = args.custom_model_spec + autoscaling_target = int(args.autoscaling_target) + service_account = args.service_account + enable_istio_sidecar = args.enable_istio_sidecar + inferenceservice_yaml = args.inferenceservice_yaml + watch_timeout = int(args.watch_timeout) + min_replicas = int(args.min_replicas) + max_replicas = int(args.max_replicas) + request_timeout = int(args.request_timeout) + enable_isvc_status = args.enable_isvc_status + + # Default the namespace. + if not namespace: + namespace = 'anonymous' + # If no namespace was provided, but one is listed in the YAML, use that. + if inferenceservice_yaml and inferenceservice_yaml.get('metadata', {}).get('namespace'): + namespace = inferenceservice_yaml['metadata']['namespace'] + + # Only require model name when an Isvc YAML was not provided. + if not inferenceservice_yaml and not model_name: + parser.error('{} argument is required when performing "{}" action'.format( + 'model_name', action + )) + # If the action isn't a delete, require 'model-uri' and 'framework' only if an Isvc YAML + # or custom model container spec are not provided. + if action != 'delete': + if not inferenceservice_yaml and not custom_model_spec and not (model_uri and framework): + parser.error('Arguments for {} and {} are required when performing "{}" action'.format( + 'model_uri', 'framework', action + )) + + model_status = perform_action( + action=action, + model_name=model_name, + model_uri=model_uri, + canary_traffic_percent=canary_traffic_percent, + namespace=namespace, + framework=framework, + runtime_version=runtime_version, + resource_requests=resource_requests, + resource_limits=resource_limits, + custom_model_spec=custom_model_spec, + autoscaling_target=autoscaling_target, + service_account=service_account, + enable_istio_sidecar=enable_istio_sidecar, + inferenceservice_yaml=inferenceservice_yaml, + request_timeout=request_timeout, + watch_timeout=watch_timeout, + min_replicas=min_replicas, + max_replicas=max_replicas + ) + + print(model_status) + + if action != 'delete': + # Check whether the model is ready + for condition in model_status["status"]["conditions"]: + if condition['type'] == 'Ready': + if condition['status'] == 'True': + print('Model is ready\n') + break + print('Model is timed out, please check the InferenceService events for more details.') + sys.exit(1) + try: + print(model_status["status"]["url"] + " is the Knative domain.") + print("Sample test commands: \n") + # model_status['status']['url'] is like http://flowers-sample.kubeflow.example.com/v1/models/flowers-sample + print("curl -v -X GET %s" % model_status["status"]["url"]) + print("\nIf the above URL is not accessible, it's recommended to setup Knative with a configured DNS.\n" + "https://knative.dev/docs/install/installing-istio/#configuring-dns") + except Exception: + print("Model is not ready, check the logs for the Knative URL status.") + sys.exit(1) + + if output_path: + if not enable_isvc_status: + model_status = {} + else: + try: + # Remove some less needed fields to reduce output size. + del model_status['metadata']['managedFields'] + del model_status['status']['conditions'] + if sys.getsizeof(model_status) > 3000: + del model_status['components']['predictor']['address']['url'] + del model_status['components']['predictor']['latestCreatedRevision'] + del model_status['components']['predictor']['latestReadyRevision'] + del model_status['components']['predictor']['latestRolledoutRevision'] + del model_status['components']['predictor']['url'] + del model_status['spec'] + except KeyError: + pass + + if not os.path.exists(os.path.dirname(output_path)): + os.makedirs(os.path.dirname(output_path)) + with open(output_path, "w") as report: + report.write(json.dumps(model_status, indent=4)) + + +if __name__ == "__main__": + main() diff --git a/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/manifests/dspa.yaml b/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/manifests/dspa.yaml new file mode 100644 index 000000000000..69d528604451 --- /dev/null +++ b/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/manifests/dspa.yaml @@ -0,0 +1,45 @@ +apiVersion: datasciencepipelinesapplications.opendatahub.io/v1alpha1 +kind: DataSciencePipelinesApplication +metadata: + name: pipelines-definition + namespace: ${NAMESPACE} +spec: + mlpipelineUI: + image: quay.io/opendatahub/ds-pipelines-frontend:latest + apiServer: + caBundleFileMountPath: "" + stripEOF: true + dbConfigConMaxLifetimeSec: 120 + applyTektonCustomResource: true + caBundleFileName: "" + deploy: true + artifactSignedURLExpirySeconds: 60 + enableSamplePipeline: false + autoUpdatePipelineDefaultVersion: true + archiveLogs: false + terminateStatus: Cancelled + enableOauth: true + trackArtifacts: true + collectMetrics: true + injectDefaultScript: true + database: + disableHealthCheck: false + mariaDB: + deploy: true + pipelineDBName: mlpipeline + pvcSize: 10Gi + username: mlpipeline + dspVersion: v2 + objectStorage: + minio: + bucket: data-science-pipelines + deploy: true + image: quay.io/minio/minio:RELEASE.2024-10-02T17-50-41Z + pvcSize: 10Gi + persistenceAgent: + deploy: true + numWorkers: 2 + podToPodTLS: true + scheduledWorkflow: + cronScheduleTimezone: UTC + deploy: true diff --git a/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/manifests/kserve-serving-runtime.yaml b/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/manifests/kserve-serving-runtime.yaml new file mode 100644 index 000000000000..cb06aa445a3a --- /dev/null +++ b/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/manifests/kserve-serving-runtime.yaml @@ -0,0 +1,38 @@ +apiVersion: serving.kserve.io/v1alpha1 +kind: ServingRuntime +metadata: + name: kserve-sklearnserver + namespace: ${NAMESPACE} +spec: + annotations: + prometheus.kserve.io/port: '8080' + prometheus.kserve.io/path: "/metrics" + supportedModelFormats: + - name: sklearn + version: "1" + autoSelect: true + priority: 1 + protocolVersions: + - v1 + - v2 + containers: + - name: kserve-container + image: docker.io/kserve/sklearnserver:latest + args: + - --model_name={{.Name}} + - --model_dir=/mnt/models + - --http_port=8080 + securityContext: + allowPrivilegeEscalation: false + privileged: false + runAsNonRoot: true + capabilities: + drop: + - ALL + resources: + requests: + cpu: "1" + memory: 2Gi + limits: + cpu: "1" + memory: 2Gi \ No newline at end of file diff --git a/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/manifests/kustomization.yaml b/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/manifests/kustomization.yaml new file mode 100644 index 000000000000..fc4897f38d35 --- /dev/null +++ b/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/manifests/kustomization.yaml @@ -0,0 +1,5 @@ +resources: + - kserve-serving-runtime.yaml + - dspa.yaml + - role.yaml + - role-binding.yaml \ No newline at end of file diff --git a/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/manifests/role-binding.yaml b/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/manifests/role-binding.yaml new file mode 100644 index 000000000000..6b00835d7681 --- /dev/null +++ b/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/manifests/role-binding.yaml @@ -0,0 +1,13 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + name: patch-inferenceservices-binding + namespace: ${NAMESPACE} +subjects: + - kind: ServiceAccount + name: pipeline-runner-pipelines-definition + namespace: ${NAMESPACE} +roleRef: + kind: Role + name: patch-inferenceservices + apiGroup: rbac.authorization.k8s.io \ No newline at end of file diff --git a/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/manifests/role.yaml b/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/manifests/role.yaml new file mode 100644 index 000000000000..a7eef5dd4217 --- /dev/null +++ b/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/manifests/role.yaml @@ -0,0 +1,9 @@ +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: patch-inferenceservices + namespace: ${NAMESPACE} +rules: + - apiGroups: ["serving.kserve.io"] + resources: ["inferenceservices"] + verbs: ["*"] diff --git a/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/pipeline.py b/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/pipeline.py new file mode 100644 index 000000000000..798b159456f6 --- /dev/null +++ b/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/pipeline.py @@ -0,0 +1,24 @@ +"""Example of a simple pipeline creating a kserve inference service""" + +from kfp import dsl +import os + + +@dsl.pipeline( + name='KServe Pipeline', + description='A pipeline for creating a KServe inference service.' +) +def kserve_pipeline(): + from kfp import components + + namespace = os.getenv('NAMESPACE') + + kserve_op = components.load_component_from_url( + 'https://raw.githubusercontent.com/hbelmiro/kfp_deploy_model_to_kserve_demo/refs/heads/main/component.yaml') + kserve_op( + action='apply', + namespace=namespace, + model_name='example', + model_uri='gs://kfserving-examples/models/sklearn/1.0/model', + framework='sklearn' + ) diff --git a/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/pipeline.yaml b/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/pipeline.yaml new file mode 100644 index 000000000000..2287b5ef920f --- /dev/null +++ b/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/pipeline.yaml @@ -0,0 +1,170 @@ +# PIPELINE DEFINITION +# Name: kserve-pipeline +# Description: A pipeline for creating a KServe inference service. +components: + comp-serve-a-model-with-kserve: + executorLabel: exec-serve-a-model-with-kserve + inputDefinitions: + parameters: + action: + defaultValue: create + isOptional: true + parameterType: STRING + autoscaling_target: + defaultValue: '0' + isOptional: true + parameterType: STRING + canary_traffic_percent: + defaultValue: '100' + isOptional: true + parameterType: STRING + custom_model_spec: + defaultValue: '{}' + isOptional: true + parameterType: STRING + enable_istio_sidecar: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + enable_isvc_status: + defaultValue: true + isOptional: true + parameterType: BOOLEAN + framework: + defaultValue: '' + isOptional: true + parameterType: STRING + inferenceservice_yaml: + defaultValue: '{}' + isOptional: true + parameterType: STRING + max_replicas: + defaultValue: '-1' + isOptional: true + parameterType: STRING + min_replicas: + defaultValue: '-1' + isOptional: true + parameterType: STRING + model_name: + defaultValue: '' + isOptional: true + parameterType: STRING + model_uri: + defaultValue: '' + isOptional: true + parameterType: STRING + namespace: + defaultValue: '' + isOptional: true + parameterType: STRING + request_timeout: + defaultValue: '60' + isOptional: true + parameterType: STRING + resource_limits: + defaultValue: '{"cpu": "1", "memory": "1Gi"}' + isOptional: true + parameterType: STRING + resource_requests: + defaultValue: '{"cpu": "0.5", "memory": "512Mi"}' + isOptional: true + parameterType: STRING + runtime_version: + defaultValue: latest + isOptional: true + parameterType: STRING + service_account: + defaultValue: '' + isOptional: true + parameterType: STRING + watch_timeout: + defaultValue: '300' + isOptional: true + parameterType: STRING + outputDefinitions: + parameters: + inferenceservice_status: + parameterType: STRING +deploymentSpec: + executors: + exec-serve-a-model-with-kserve: + container: + args: + - -u + - kservedeployer.py + - --action + - '{{$.inputs.parameters[''action'']}}' + - --model-name + - '{{$.inputs.parameters[''model_name'']}}' + - --model-uri + - '{{$.inputs.parameters[''model_uri'']}}' + - --canary-traffic-percent + - '{{$.inputs.parameters[''canary_traffic_percent'']}}' + - --namespace + - '{{$.inputs.parameters[''namespace'']}}' + - --framework + - '{{$.inputs.parameters[''framework'']}}' + - --runtime-version + - '{{$.inputs.parameters[''runtime_version'']}}' + - --resource-requests + - '{{$.inputs.parameters[''resource_requests'']}}' + - --resource-limits + - '{{$.inputs.parameters[''resource_limits'']}}' + - --custom-model-spec + - '{{$.inputs.parameters[''custom_model_spec'']}}' + - --autoscaling-target + - '{{$.inputs.parameters[''autoscaling_target'']}}' + - --service-account + - '{{$.inputs.parameters[''service_account'']}}' + - --enable-istio-sidecar + - '{{$.inputs.parameters[''enable_istio_sidecar'']}}' + - --output-path + - '{{$.outputs.parameters[''inferenceservice_status''].output_file}}' + - --inferenceservice-yaml + - '{{$.inputs.parameters[''inferenceservice_yaml'']}}' + - --watch-timeout + - '{{$.inputs.parameters[''watch_timeout'']}}' + - --min-replicas + - '{{$.inputs.parameters[''min_replicas'']}}' + - --max-replicas + - '{{$.inputs.parameters[''max_replicas'']}}' + - --request-timeout + - '{{$.inputs.parameters[''request_timeout'']}}' + - --enable-isvc-status + - '{{$.inputs.parameters[''enable_isvc_status'']}}' + command: + - python + image: quay.io/hbelmiro/kfp_deploy_model_to_kserve_demo:v0.0.3 +pipelineInfo: + description: A pipeline for creating a KServe inference service. + name: kserve-pipeline +root: + dag: + tasks: + serve-a-model-with-kserve: + cachingOptions: + enableCache: true + componentRef: + name: comp-serve-a-model-with-kserve + inputs: + parameters: + action: + runtimeValue: + constant: apply + framework: + runtimeValue: + constant: sklearn + model_name: + runtimeValue: + constant: example + model_uri: + runtimeValue: + constant: gs://kfserving-examples/models/sklearn/1.0/model + namespace: + runtimeValue: + constant: hbelmiro-kustomize + taskInfo: + name: serve-a-model-with-kserve +schemaVersion: 2.1.0 +sdkVersion: kfp-2.11.0 diff --git a/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/requirements.in b/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/requirements.in new file mode 100644 index 000000000000..569d2ccca24a --- /dev/null +++ b/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/requirements.in @@ -0,0 +1,2 @@ +kfp==2.11.0 +kserve==0.14.0 \ No newline at end of file diff --git a/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/requirements.txt b/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/requirements.txt new file mode 100644 index 000000000000..ed7328295596 --- /dev/null +++ b/components/openshift/kserve/kfp_deploy_model_to_kserve_demo/requirements.txt @@ -0,0 +1,200 @@ +# This file was autogenerated by uv via the following command: +# uv pip compile requirements.in -o requirements.txt +annotated-types==0.7.0 + # via pydantic +anyio==4.8.0 + # via + # httpx + # starlette + # watchfiles +cachetools==5.5.0 + # via google-auth +certifi==2024.12.14 + # via + # httpcore + # httpx + # kfp-server-api + # kubernetes + # requests +charset-normalizer==3.4.1 + # via requests +click==8.1.8 + # via + # kfp + # uvicorn +cloudevents==1.11.0 + # via kserve +deprecation==2.1.0 + # via cloudevents +docstring-parser==0.16 + # via kfp +exceptiongroup==1.2.2 + # via anyio +fastapi==0.115.6 + # via kserve +google-api-core==2.24.0 + # via + # google-cloud-core + # google-cloud-storage + # kfp +google-auth==2.37.0 + # via + # google-api-core + # google-cloud-core + # google-cloud-storage + # kfp + # kubernetes +google-cloud-core==2.4.1 + # via google-cloud-storage +google-cloud-storage==2.19.0 + # via kfp +google-crc32c==1.6.0 + # via + # google-cloud-storage + # google-resumable-media +google-resumable-media==2.7.2 + # via google-cloud-storage +googleapis-common-protos==1.66.0 + # via google-api-core +grpcio==1.69.0 + # via kserve +h11==0.14.0 + # via + # httpcore + # uvicorn +httpcore==1.0.7 + # via httpx +httptools==0.6.4 + # via uvicorn +httpx==0.27.2 + # via kserve +idna==3.10 + # via + # anyio + # httpx + # requests +kfp==2.11.0 + # via -r requirements.in +kfp-pipeline-spec==0.6.0 + # via kfp +kfp-server-api==2.3.0 + # via kfp +kserve==0.14.0 + # via -r requirements.in +kubernetes==30.1.0 + # via + # kfp + # kserve +numpy==1.26.4 + # via + # kserve + # pandas +oauthlib==3.2.2 + # via + # kubernetes + # requests-oauthlib +orjson==3.10.14 + # via kserve +packaging==24.2 + # via deprecation +pandas==2.2.3 + # via kserve +prometheus-client==0.20.0 + # via kserve +proto-plus==1.25.0 + # via google-api-core +protobuf==4.25.5 + # via + # google-api-core + # googleapis-common-protos + # kfp + # kfp-pipeline-spec + # kserve + # proto-plus +psutil==5.9.8 + # via kserve +pyasn1==0.6.1 + # via + # pyasn1-modules + # rsa +pyasn1-modules==0.4.1 + # via google-auth +pydantic==2.10.5 + # via + # fastapi + # kserve +pydantic-core==2.27.2 + # via pydantic +python-dateutil==2.9.0.post0 + # via + # kfp-server-api + # kserve + # kubernetes + # pandas +python-dotenv==1.0.1 + # via uvicorn +pytz==2024.2 + # via pandas +pyyaml==6.0.2 + # via + # kfp + # kserve + # kubernetes + # uvicorn +requests==2.32.3 + # via + # google-api-core + # google-cloud-storage + # kubernetes + # requests-oauthlib + # requests-toolbelt +requests-oauthlib==2.0.0 + # via kubernetes +requests-toolbelt==0.10.1 + # via kfp +rsa==4.9 + # via google-auth +six==1.17.0 + # via + # kfp-server-api + # kserve + # kubernetes + # python-dateutil +sniffio==1.3.1 + # via + # anyio + # httpx +starlette==0.41.3 + # via fastapi +tabulate==0.9.0 + # via + # kfp + # kserve +timing-asgi==0.3.1 + # via kserve +typing-extensions==4.12.2 + # via + # anyio + # fastapi + # pydantic + # pydantic-core + # starlette + # uvicorn +tzdata==2024.2 + # via pandas +urllib3==1.26.20 + # via + # kfp + # kfp-server-api + # kubernetes + # requests +uvicorn==0.30.6 + # via kserve +uvloop==0.21.0 + # via uvicorn +watchfiles==1.0.4 + # via uvicorn +websocket-client==1.8.0 + # via kubernetes +websockets==14.1 + # via uvicorn