Skip to content

Commit 1550b36

Browse files
feat(workflows): use built images in Github workflows (#11284)
* Patch deployments and include debugging info Signed-off-by: carter.fendley <carter.fendley@gmail.com> * Build and use driver / launcher too! Signed-off-by: carter.fendley <carter.fendley@gmail.com> * Modify waiting status message Signed-off-by: carter.fendley <carter.fendley@gmail.com> * Fix typo Signed-off-by: carter.fendley <carter.fendley@gmail.com> --------- Signed-off-by: carter.fendley <carter.fendley@gmail.com>
1 parent 753a2f1 commit 1550b36

File tree

3 files changed

+43
-6
lines changed

3 files changed

+43
-6
lines changed

scripts/deploy/github/build-images.sh

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,19 @@ then
4646
exit $EXIT_CODE
4747
fi
4848

49+
docker build -q -t "${REGISTRY}/driver:${TAG}" -f backend/Dockerfile.driver . && docker push "${REGISTRY}/driver:${TAG}" || EXIT_CODE=$?
50+
if [[ $EXIT_CODE -ne 0 ]]
51+
then
52+
echo "Failed to build driver image."
53+
exit $EXIT_CODE
54+
fi
55+
56+
docker build -q -t "${REGISTRY}/launcher:${TAG}" -f backend/Dockerfile.launcher . && docker push "${REGISTRY}/launcher:${TAG}" || EXIT_CODE=$?
57+
if [[ $EXIT_CODE -ne 0 ]]
58+
then
59+
echo "Failed to build launcher image."
60+
exit $EXIT_CODE
61+
fi
4962

5063
# clean up intermittent build caches to free up disk space
5164
docker system prune -a -f

scripts/deploy/github/deploy-kfp.sh

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,18 @@ then
4141
exit 1
4242
fi
4343

44+
echo "Patching deployments to use built docker images..."
45+
# Patch API server
46+
kubectl patch deployment ml-pipeline -p '{"spec": {"template": {"spec": {"containers": [{"name": "ml-pipeline-api-server", "image": "kind-registry:5000/apiserver"}]}}}}' -n kubeflow
47+
# Patch persistence agent
48+
kubectl patch deployment.apps/ml-pipeline-persistenceagent -p '{"spec": {"template": {"spec": {"containers": [{"name": "ml-pipeline-persistenceagent", "image": "kind-registry:5000/persistenceagent"}]}}}}' -n kubeflow
49+
# Patch scheduled workflow
50+
kubectl patch deployment.apps/ml-pipeline-scheduledworkflow -p '{"spec": {"template": {"spec": {"containers": [{"name": "ml-pipeline-scheduledworkflow", "image": "kind-registry:5000/scheduledworkflow"}]}}}}' -n kubeflow
51+
52+
# Update environment variables to override driver / launcher
53+
kubectl set env deployments/ml-pipeline V2_DRIVER_IMAGE=kind-registry:5000/driver -n kubeflow
54+
kubectl set env deployments/ml-pipeline V2_LAUNCHER_IMAGE=kind-registry:5000/launcher -n kubeflow
55+
4456
# Check if all pods are running - (10 minutes)
4557
wait_for_pods || EXIT_CODE=$?
4658
if [[ $EXIT_CODE -ne 0 ]]

scripts/deploy/github/kfp-readiness/wait_for_pods.py

Lines changed: 18 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -21,15 +21,25 @@ def get_pod_statuses():
2121
pod_name = pod.metadata.name
2222
pod_status = pod.status.phase
2323
container_statuses = pod.status.container_statuses or []
24-
ready_containers = sum(1 for status in container_statuses if status.ready)
25-
total_containers = len(container_statuses)
26-
statuses[pod_name] = (pod_status, ready_containers, total_containers)
24+
ready = 0
25+
total = 0
26+
waiting_messages = []
27+
for status in container_statuses:
28+
total += 1
29+
if status.ready:
30+
ready += 1
31+
if status.state.waiting is not None:
32+
if status.state.waiting.message is not None:
33+
waiting_messages.append(f'Waiting on Container: {status.name} - {status.state.waiting.reason}: {status.state.waiting.message}')
34+
else:
35+
waiting_messages.append(f'Waiting on Container: {status.name} - {status.state.waiting.reason}')
36+
statuses[pod_name] = (pod_status, ready, total, waiting_messages)
2737
return statuses
2838

2939

3040
def all_pods_ready(statuses):
3141
return all(pod_status == 'Running' and ready == total
32-
for pod_status, ready, total in statuses.values())
42+
for pod_status, ready, total, _ in statuses.values())
3343

3444

3545
def check_pods(calm_time=10, timeout=600, retries_after_ready=5):
@@ -41,8 +51,10 @@ def check_pods(calm_time=10, timeout=600, retries_after_ready=5):
4151
current_statuses = get_pod_statuses()
4252

4353
logging.info("Checking pod statuses...")
44-
for pod_name, (pod_status, ready, total) in current_statuses.items():
54+
for pod_name, (pod_status, ready, total, waiting_messages) in current_statuses.items():
4555
logging.info(f"Pod {pod_name} - Status: {pod_status}, Ready: {ready}/{total}")
56+
for waiting_msg in waiting_messages:
57+
logging.info(waiting_msg)
4658

4759
if current_statuses == previous_statuses:
4860
if all_pods_ready(current_statuses):
@@ -65,7 +77,7 @@ def check_pods(calm_time=10, timeout=600, retries_after_ready=5):
6577
raise Exception("Pods did not stabilize within the timeout period.")
6678

6779
logging.info("Final pod statuses:")
68-
for pod_name, (pod_status, ready, total) in previous_statuses.items():
80+
for pod_name, (pod_status, ready, total, _) in previous_statuses.items():
6981
if pod_status == 'Running' and ready == total:
7082
logging.info(f"Pod {pod_name} is fully ready ({ready}/{total})")
7183
else:

0 commit comments

Comments
 (0)