Skip to content

Commit

Permalink
Merge pull request galaxyproject#18514 from Bioconductor/dev
Browse files Browse the repository at this point in the history
Improvements for K8S deployment (especially ITs)
  • Loading branch information
nuwang authored Sep 10, 2024
2 parents eabea28 + ceec2f0 commit 0d4cb10
Show file tree
Hide file tree
Showing 5 changed files with 150 additions and 32 deletions.
62 changes: 61 additions & 1 deletion .github/workflows/build_container_image.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,67 @@ concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
cancel-in-progress: true
jobs:
ghcrbuild:
name: Build container image for GHCR
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v4
# https://stackoverflow.com/questions/59810838/how-to-get-the-short-sha-for-the-github-workflow
- name: Set outputs
id: commit
run: echo "sha_short=$(git rev-parse --short HEAD)" >> $GITHUB_OUTPUT
- name: Set branch name
id: branch
run: |
if [[ "$GITHUB_REF" == "refs/tags/"* ]]; then
echo "name=${GITHUB_REF#refs/tags/v}" >> $GITHUB_OUTPUT
elif [[ "$GITHUB_REF" == "refs/heads/dev" ]]; then
echo "name=dev" >> $GITHUB_OUTPUT
elif [[ "$GITHUB_REF" == "refs/heads/release_"* ]]; then
echo "name=${GITHUB_REF#refs/heads/release_}-auto" >> $GITHUB_OUTPUT
fi
shell: bash
- name: Extract metadata for container image
id: meta
uses: docker/metadata-action@v4
with:
images: ghcr.io/${{ github.repository }}
tags: |
type=raw,value=${{steps.branch.outputs.name}}
- name: Build args
id: buildargs
run: |
echo "gitcommit=$(git rev-parse HEAD)" >> $GITHUB_OUTPUT
echo "builddate=$(date -u +'%Y-%m-%dT%H:%M:%SZ')"
- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v2
with:
platforms: linux/amd64

- name: Login to GHCR
uses: docker/login-action@v2
with:
registry: ghcr.io
username: ${{ github.repository_owner }}
password: ${{ secrets.GITHUB_TOKEN }}

- name: Build and push container image to ghcr
uses: docker/build-push-action@v4
with:
build-args: |
GIT_COMMIT=${{ steps.buildargs.outputs.gitcommit }}
BUILD_DATE=${{ steps.buildargs.outputs.builddate }}
IMAGE_TAG=${{ steps.branch.outputs.name }}
file: .k8s_ci.Dockerfile
push: true
context: .
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
platforms: linux/amd64

build:
name: Build container image
name: Build container image for Galaxy repos
runs-on: ubuntu-latest
if: github.repository_owner == 'galaxyproject'
steps:
Expand Down Expand Up @@ -58,3 +117,4 @@ jobs:
uses: actions-hub/docker@master
with:
args: push galaxy/galaxy-min:${{ steps.branch.outputs.name }}

2 changes: 1 addition & 1 deletion lib/galaxy/dependencies/conditional-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ custos-sdk
chronos-python==1.2.1

# Kubernetes job runner
pykube-ng==21.3.0
pykube-ng==23.6.0

# Synnefo / Pithos+ object store client
kamaki
Expand Down
107 changes: 79 additions & 28 deletions lib/galaxy/jobs/runners/kubernetes.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
)
from galaxy.jobs.runners.util.pykube_util import (
deduplicate_entries,
DEFAULT_INGRESS_API_VERSION,
DEFAULT_JOB_API_VERSION,
delete_ingress,
delete_job,
Expand All @@ -34,6 +35,7 @@
HTTPError,
Ingress,
ingress_object_dict,
is_pod_running,
is_pod_unschedulable,
Job,
job_object_dict,
Expand Down Expand Up @@ -109,6 +111,8 @@ def __init__(self, app, nworkers, **kwargs):
k8s_interactivetools_use_ssl=dict(map=bool, default=False),
k8s_interactivetools_ingress_annotations=dict(map=str),
k8s_interactivetools_ingress_class=dict(map=str, default=None),
k8s_interactivetools_tls_secret=dict(map=str, default=None),
k8s_ingress_api_version=dict(map=str, default=DEFAULT_INGRESS_API_VERSION),
)

if "runner_param_specs" not in kwargs:
Expand Down Expand Up @@ -249,6 +253,7 @@ def __configure_port_routing(self, ajs):
service = Service(self._pykube_api, k8s_service_obj)
service.create()
ingress = Ingress(self._pykube_api, k8s_ingress_obj)
ingress.version = self.runner_params["k8s_ingress_api_version"]
ingress.create()

def __get_overridable_params(self, job_wrapper, param_key):
Expand Down Expand Up @@ -429,6 +434,54 @@ def __get_k8s_service_spec(self, ajs):
}
return k8s_spec_template

def __get_k8s_ingress_rules_spec(self, ajs, entry_points):
"""This represents the template for the "rules" portion of the Ingress spec."""
if "v1beta1" in self.runner_params["k8s_ingress_api_version"]:
rules_spec = [
{
"host": ep["domain"],
"http": {
"paths": [
{
"backend": {
"serviceName": self.__get_k8s_job_name(
self.__produce_k8s_job_prefix(), ajs.job_wrapper
),
"servicePort": int(ep["tool_port"]),
},
"path": ep.get("entry_path", "/"),
"pathType": "Prefix",
}
]
},
}
for ep in entry_points
]
else:
rules_spec = [
{
"host": ep["domain"],
"http": {
"paths": [
{
"backend": {
"service": {
"name": self.__get_k8s_job_name(
self.__produce_k8s_job_prefix(), ajs.job_wrapper
),
"port": {"number": int(ep["tool_port"])},
}
},
"path": ep.get("entry_path", "/"),
"pathType": "ImplementationSpecific",
}
]
},
}
for ep in entry_points
]
return rules_spec

def __get_k8s_ingress_spec(self, ajs):
"""The k8s spec template is nothing but a Ingress spec, except that it is nested and does not have an apiversion
nor kind."""
Expand Down Expand Up @@ -466,39 +519,22 @@ def __get_k8s_ingress_spec(self, ajs):
},
"annotations": {"app.galaxyproject.org/tool_id": ajs.job_wrapper.tool.id},
},
"spec": {
"rules": [
{
"host": ep["domain"],
"http": {
"paths": [
{
"backend": {
"service": {
"name": self.__get_k8s_job_name(
self.__produce_k8s_job_prefix(), ajs.job_wrapper
),
"port": {"number": int(ep["tool_port"])},
}
},
"path": ep.get("entry_path", "/"),
"pathType": "Prefix",
}
]
},
}
for ep in entry_points
],
},
"spec": {"rules": self.__get_k8s_ingress_rules_spec(ajs, entry_points)},
}
default_ingress_class = self.runner_params.get("k8s_interactivetools_ingress_class")
if default_ingress_class:
k8s_spec_template["spec"]["ingressClassName"] = default_ingress_class
if self.runner_params.get("k8s_interactivetools_use_ssl"):
domains = list({e["domain"] for e in entry_points})
k8s_spec_template["spec"]["tls"] = [
{"hosts": [domain], "secretName": re.sub("[^a-z0-9-]", "-", domain)} for domain in domains
]
override_secret = self.runner_params.get("k8s_interactivetools_tls_secret")
if override_secret:
k8s_spec_template["spec"]["tls"] = [
{"hosts": [domain], "secretName": override_secret} for domain in domains
]
else:
k8s_spec_template["spec"]["tls"] = [
{"hosts": [domain], "secretName": re.sub("[^a-z0-9-]", "-", domain)} for domain in domains
]
if self.runner_params.get("k8s_interactivetools_ingress_annotations"):
new_ann = yaml.safe_load(self.runner_params.get("k8s_interactivetools_ingress_annotations"))
k8s_spec_template["metadata"]["annotations"].update(new_ann)
Expand Down Expand Up @@ -767,10 +803,14 @@ def check_watched_item(self, job_state):
pass
else:
pass
else:
elif self.__check_job_pod_running(job_state):
log.debug("Job set to running...")
job_state.running = True
job_state.job_wrapper.change_state(model.Job.states.RUNNING)
else:
log.debug(
f"Job id: {job_state.job_id} with k8s id: {job.name} scheduled and is waiting to start..."
)
return job_state
elif job_persisted_state == model.Job.states.DELETED:
# Job has been deleted via stop_job and job has not been deleted,
Expand Down Expand Up @@ -921,6 +961,17 @@ def __job_failed_due_to_low_memory(self, job_state):

return False

def __check_job_pod_running(self, job_state):
"""
checks the state of the pod to see if it is running.
"""
pods = find_pod_object_by_name(self._pykube_api, job_state.job_id, self.runner_params["k8s_namespace"])
if not pods.response["items"]:
return False

pod = Pod(self._pykube_api, pods.response["items"][0])
return is_pod_running(self._pykube_api, pod, self.runner_params["k8s_namespace"])

def __job_pending_due_to_unschedulable_pod(self, job_state):
"""
checks the state of the pod to see if it is unschedulable.
Expand Down
8 changes: 8 additions & 0 deletions lib/galaxy/jobs/runners/util/pykube_util.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,13 @@ def find_pod_object_by_name(pykube_api, job_name, namespace=None):
return Pod.objects(pykube_api).filter(selector=f"job-name={job_name}", namespace=namespace)


def is_pod_running(pykube_api, pod, namespace=None):
if pod.obj["status"].get("phase") == "Running":
return True

return False


def is_pod_unschedulable(pykube_api, pod, namespace=None):
is_unschedulable = any(c.get("reason") == "Unschedulable" for c in pod.obj["status"].get("conditions", []))
if pod.obj["status"].get("phase") == "Pending" and is_unschedulable:
Expand Down Expand Up @@ -311,6 +318,7 @@ def galaxy_instance_id(params):
"find_pod_object_by_name",
"galaxy_instance_id",
"HTTPError",
"is_pod_running",
"is_pod_unschedulable",
"Job",
"Service",
Expand Down
3 changes: 1 addition & 2 deletions lib/galaxy/model/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2917,8 +2917,7 @@ def __init__(
@property
def active(self):
if self.configured and not self.deleted:
# FIXME: don't included queued?
return not self.job.finished
return self.job.running
return False

@property
Expand Down

0 comments on commit 0d4cb10

Please sign in to comment.