Skip to content

New Production Deployment and Tool Tests #922

New Production Deployment and Tool Tests

New Production Deployment and Tool Tests #922

name: New Production Deployment and Tool Tests
on:
schedule:
- cron: "0 0 * * *"
- cron: "0 12 * * *"
workflow_dispatch:
inputs:
chunk:
description: 'Chunk to run. Valid range [0:13]. Out of range inputs will be ignored.'
required: true
default: '3'
jobs:
deploygke:
env:
GKE_ZONE: us-east1-b
GKE_VERSION: "1.28"
GXY_TMP: /tmp/gxy
PREFIX: prod
runs-on: ubuntu-latest
permissions:
contents: 'read'
id-token: 'write'
outputs:
prefix: ${{ steps.prefix.outputs.prefix }}
steps:
- name: Set prefix with date
id: prefix
run: echo "prefix=$(echo $PREFIX-$(date +'%y-%m-%d-%H-%M'))" >> $GITHUB_OUTPUT
- uses: 'actions/checkout@v3'
- id: 'Auth'
uses: 'google-github-actions/auth@v0'
with:
workload_identity_provider: 'projects/526897014808/locations/global/workloadIdentityPools/galaxy-tests-identity-pool/providers/gxy-tests-provider'
service_account: 'galaxy-tests-repo-actions-sa@anvil-and-terra-development.iam.gserviceaccount.com'
- name: 'Set up Cloud SDK'
uses: 'google-github-actions/setup-gcloud@v1'
- name: gcloud version
run: gcloud --version
- name: Create GKE cluster
run: gcloud container clusters create "${{steps.prefix.outputs.prefix}}" --cluster-version="$GKE_VERSION" --disk-size=100 --num-nodes=1 --machine-type=e2-custom-20-81920 --zone "$GKE_ZONE"
testgalaxy1:
needs: deploygke
env:
GKE_ZONE: us-east1-b
GXY_TMP: /tmp/gxy
GKM_VERSION: "2.8.1"
USE_GKE_GCLOUD_AUTH_PLUGIN: True
runs-on: ubuntu-latest
permissions:
contents: 'read'
id-token: 'write'
steps:
- uses: actions/checkout@v3
- id: 'Auth'
uses: 'google-github-actions/auth@v0'
with:
workload_identity_provider: 'projects/526897014808/locations/global/workloadIdentityPools/galaxy-tests-identity-pool/providers/gxy-tests-provider'
service_account: 'galaxy-tests-repo-actions-sa@anvil-and-terra-development.iam.gserviceaccount.com'
- name: 'Set up Cloud SDK'
uses: 'google-github-actions/setup-gcloud@v1'
- name: Get cluster kubeconfig
run: gcloud container clusters get-credentials "${{needs.deploygke.outputs.prefix}}" --zone "$GKE_ZONE" --project ${{ secrets.GCP_PROJECT_ID }}
- name: Create two GCP Disks
run: gcloud compute disks create "${{needs.deploygke.outputs.prefix}}-1-postgres-pd" --size 10Gi --zone "$GKE_ZONE" && gcloud compute disks create "${{needs.deploygke.outputs.prefix}}-1-nfs-pd" --size 250Gi --zone "$GKE_ZONE"
- name: Setup gcloud auth plugin for kubectl and fetch creds
run: |
gcloud components install gke-gcloud-auth-plugin
gcloud container clusters get-credentials "${{needs.deploygke.outputs.prefix}}" --zone "$GKE_ZONE"
- name: Install Kubectl
run: curl -LO "https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl" && chmod +x ./kubectl && sudo mv ./kubectl /usr/local/bin/kubectl && kubectl version
- name: Install Helm
run: curl https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 | bash
- name: Add anvil repository
run: helm repo add anvil https://github.com/cloudve/helm-charts/raw/anvil
- name: Deploy GalaxyKubeMan
continue-on-error: true
run: >
(time sh -c "helm install --create-namespace -n ${{needs.deploygke.outputs.prefix}}-1 ${{needs.deploygke.outputs.prefix}}-1 anvil/galaxykubeman\
--wait\
--wait-for-jobs\
--timeout 720s\
--version $GKM_VERSION\
--set galaxy.service.type=LoadBalancer\
--set galaxy.configs.\"galaxy\.yml\".galaxy.admin_users=\"tests@fake.org\"\
--set galaxy.configs.\"galaxy\.yml\".galaxy.master_api_key=galaxypassword\
--set galaxy.configs.\"galaxy\.yml\".galaxy.single_user=\"tests@fake.org\"\
--set galaxy.configs.\"job_conf\.yml\".runners.k8s.k8s_node_selector=\"cloud.google.com/gke-nodepool: default-pool\"\
--set 'galaxy.configs.job_conf\.yml.execution.environments.tpv_dispatcher.tpv_config_files={https://raw.githubusercontent.com/galaxyproject/tpv-shared-database/main/tools.yml,lib/galaxy/jobs/rules/tpv_rules_local.yml,https://gist.githubusercontent.com/afgane/68d1dbbe0af2468ba347dc74b6d3f7fa/raw/20edda50161bdcb74ff38935e7f76d79bfdaf303/tvp_rules_tests.yml}'\
--set galaxy.nodeSelector.\"cloud\.google\.com\/gke-nodepool\"=\"default-pool\"\
--set galaxy.persistence.storageClass=\"nfs-${{needs.deploygke.outputs.prefix}}-1\"\
--set galaxy.postgresql.galaxyDatabasePassword=${{secrets.COMMON_PASSWORD}}\
--set galaxy.postgresql.master.nodeSelector.\"cloud\.google\.com\/gke-nodepool\"=\"default-pool\"\
--set galaxy.postgresql.persistence.existingClaim=\"${{needs.deploygke.outputs.prefix}}-1-postgres-disk-pvc\"\
--set galaxy.rabbitmq.nodeSelector.\"cloud\.google\.com\/gke-nodepool\"=\"default-pool\"\
--set galaxy.rabbitmq.persistence.storageClassName=\"nfs-${{needs.deploygke.outputs.prefix}}-1\"\
--set galaxy.ingress.ingressClassName=\"\"\
--set galaxy.tusd.ingress.ingressClassName=\"\"\
--set cvmfs.cvmfscsi.cache.alien.pvc.storageClass=\"nfs-${{needs.deploygke.outputs.prefix}}-1\"\
--set nfs.nodeSelector.\"cloud\.google\.com\/gke-nodepool\"=\"default-pool\"\
--set nfs.persistence.existingClaim=\"${{needs.deploygke.outputs.prefix}}-1-nfs-disk-pvc\"\
--set nfs.storageClass.name=\"nfs-${{needs.deploygke.outputs.prefix}}-1\"\
--set persistence.nfs.name=\"${{needs.deploygke.outputs.prefix}}-1-nfs-disk\"\
--set persistence.nfs.persistentVolume.extraSpec.gcePersistentDisk.pdName=\"${{needs.deploygke.outputs.prefix}}-1-nfs-pd\"\
--set persistence.postgres.name=\"${{needs.deploygke.outputs.prefix}}-1-postgres-disk\"\
--set persistence.postgres.persistentVolume.extraSpec.gcePersistentDisk.pdName=\"${{needs.deploygke.outputs.prefix}}-1-postgres-pd\"") 1> "$GXY_TMP" 2>> "$GXY_TMP"
- name: Print output
run: cat "$GXY_TMP"
- name: Helm get values
run: helm get values -n ${{needs.deploygke.outputs.prefix}}-1 ${{needs.deploygke.outputs.prefix}}-1
- name: Get pods, pvc, and pv, sc
run: kubectl get pods -n ${{needs.deploygke.outputs.prefix}}-1; kubectl get pvc -n ${{needs.deploygke.outputs.prefix}}-1; kubectl get pv -n ${{needs.deploygke.outputs.prefix}}-1; kubectl get sc -n ${{needs.deploygke.outputs.prefix}}-1
- name: Get events
run: kubectl get events -n ${{needs.deploygke.outputs.prefix}}-1
- uses: actions/setup-python@v4
with:
python-version: '3.11'
- name: Install required system packages
run: sudo apt-get update && sudo apt-get install -y python3-h5py pkg-config libhdf5-dev
- name: Install dependencies
run: python -m pip install planemo ephemeris pysam "galaxy-tool-util>=21.1.0.dev6" bioblend Jinja2 matplotlib gxabm
- uses: actions/checkout@v3
with:
persist-credentials: false
- name: Add report from this run
run: bash ./.github/scripts/report_deployment.sh "$GXY_TMP" anvil-production ${{ secrets.GIT_TOKEN }}
- name: Get Galaxy IP and port
id: galaxy
run: echo "url=$(echo \"http://$(kubectl get svc -n ${{needs.deploygke.outputs.prefix}}-1 ${{needs.deploygke.outputs.prefix}}-1-galaxy-nginx -o jsonpath='{.status.loadBalancer.ingress[0].ip}' | sed -e 's/\"//g'):$(kubectl get svc -n ${{needs.deploygke.outputs.prefix}}-1 ${{needs.deploygke.outputs.prefix}}-1-galaxy-nginx -o jsonpath='{.spec.ports[0].port}')$(kubectl get ingress -n ${{needs.deploygke.outputs.prefix}}-1 ${{needs.deploygke.outputs.prefix}}-1-galaxy -o jsonpath='{.spec.rules[0].http.paths[0].path}')/\")" >> $GITHUB_OUTPUT
- name: Create remote single user
run: curl ${{steps.galaxy.outputs.url}}
- name: Create reports dir for this date
id: reports
run: |
DIR=reports/anvil-production/tool-tests/${{needs.deploygke.outputs.prefix}}-1
mkdir -p $DIR
echo "dir=$DIR" >> $GITHUB_OUTPUT
# - name: Get tool list
# run: cp production/anvil/tools.yaml reports/anvil-production/tools.yaml
- name: Create and get API key
run: echo "key=$(python .github/scripts/create_api_key.py ${{steps.galaxy.outputs.url}} galaxypassword)" >> $GITHUB_OUTPUT
id: api
- name: Configure ABM
id: key
run: |
abm version
cp -R .abm ~/.abm
abm config url galaxy ${{ steps.galaxy.outputs.url }}
echo "Key is ${{ steps.api.outputs.key }}"
abm config key galaxy ${{ steps.api.outputs.key }}
abm config show galaxy
echo "key=$key" >> $GITHUB_OUTPUT
- name: Split tool tests (workflow_dispatch)
if: ${{ github.event_name == 'workflow_dispatch' }}
run: python .github/scripts/get_chunk.py --chunk ${{ github.event.inputs.chunk }} -o ${{ steps.reports.outputs.dir }} reports/anvil-production/tools.yaml
- name: Split tool tests (cron job)
if: ${{ github.event_name != 'workflow_dispatch' }}
run: python .github/scripts/get_chunk.py reports/anvil-production/tools.yaml -o ${{ steps.reports.outputs.dir }}
- name: Generate a history name
id: history
run: echo "history_name=anvil-test-$(date '+%F-%H-%M')" >> $GITHUB_OUTPUT
- name: Run tool tests
continue-on-error: true
run: sleep 30 && .github/scripts/run-galaxy-tests.sh ${{ steps.history.outputs.history_name }} ${{ steps.reports.outputs.dir }}
# - name: Write job results
# run: abm galaxy jobs list --history ${{ steps.history.outputs.history_name }} > ${{ steps.reports.outputs.dir }}/jobs.txt
- name: Generate Jobs reports
continue-on-error: true
shell: bash
run: |
abm galaxy jobs list --history ${{ steps.history.outputs.history_name }} > ${{ steps.reports.outputs.dir }}/jobs.txt
echo "Saving jobs that had errors."
abm galaxy jobs list --state error
for job in $(abm galaxy jobs list --state error | awk '{print $1}') ; do
if [[ ! -e ${{ steps.reports.outputs.dir }}/errors ]] ; then
mkdir -p ${{ steps.reports.outputs.dir }}/errors
fi
abm galaxy jobs show $job > ${{ steps.reports.outputs.dir }}/errors/$job.json
done
echo "Looking for paused jobs"
abm galaxy jobs list --state paused
for job in $(abm galaxy jobs list --state paused | awk '{print $1}') ; do
echo "Job $job was paused"
if [[ ! -e ${{ steps.reports.outputs.dir }}/paused ]] ; then
mkdir -p ${{ steps.reports.outputs.dir }}/paused
fi
abm galaxy jobs show $job > ${{ steps.reports.outputs.dir }}/paused/$job.json
done
abm galaxy history summarize ${{ steps.history.outputs.history_name }} > ${{ steps.reports.outputs.dir }}/job-metrics.csv
- name: Save job handler logs and node information
continue-on-error: true
shell: bash
run: |
pod=$(kubectl get pods -n ${{needs.deploygke.outputs.prefix}}-1 | grep galaxy-job | awk '{print $1}')
kubectl logs -n ${{needs.deploygke.outputs.prefix}}-1 $pod > ${{ steps.reports.outputs.dir }}/job.log
kubectl describe nodes > ${{ steps.reports.outputs.dir }}/node-info.txt
- name: Create HTML report
run: planemo test_reports ${{ steps.reports.outputs.dir }}/results.json --test_output_xunit ${{ steps.reports.outputs.dir }}/results.xunit --test_output ${{ steps.reports.outputs.dir }}/results.html
- name: Update Readme
run: python .github/scripts/update_readme.py ${{ steps.reports.outputs.dir }}/chunk.json reports/anvil-production/tool-tests/chunks.json reports/anvil-production/README.md
- name: Add updated README from this run
run: bash ./.github/scripts/report_tests.sh ${{ secrets.GIT_TOKEN }} "Updating anvil-production README for ${{needs.deploygke.outputs.prefix}}-1" reports/anvil-production
cleanup:
if: always()
needs: [deploygke,testgalaxy1]
env:
GKE_ZONE: us-east1-b
runs-on: ubuntu-latest
permissions:
contents: 'read'
id-token: 'write'
steps:
- uses: 'actions/checkout@v3'
- id: 'Auth'
uses: 'google-github-actions/auth@v0'
with:
workload_identity_provider: 'projects/526897014808/locations/global/workloadIdentityPools/galaxy-tests-identity-pool/providers/gxy-tests-provider'
service_account: 'galaxy-tests-repo-actions-sa@anvil-and-terra-development.iam.gserviceaccount.com'
- name: 'Set up Cloud SDK'
uses: 'google-github-actions/setup-gcloud@v1'
- name: Delete the GKE cluster
continue-on-error: true
run: gcloud container clusters delete "${{needs.deploygke.outputs.prefix}}" --zone "$GKE_ZONE" --quiet
- name: Delete the two GCP Disks for first Galaxy
run: gcloud compute disks delete "${{needs.deploygke.outputs.prefix}}-1-postgres-pd" --zone "$GKE_ZONE" --quiet && gcloud compute disks delete "${{needs.deploygke.outputs.prefix}}-1-nfs-pd" --zone "$GKE_ZONE" --quiet