New Production Deployment and Tool Tests #985
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
name: New Production Deployment and Tool Tests | |
on: | |
schedule: | |
- cron: "0 0 * * *" | |
- cron: "0 12 * * *" | |
workflow_dispatch: | |
inputs: | |
chunk: | |
description: 'Chunk to run. Valid range [0:13]. Out of range inputs will be ignored.' | |
required: true | |
default: '3' | |
jobs: | |
deploygke: | |
env: | |
GKE_ZONE: us-east1-b | |
GKE_VERSION: "1.28" | |
GXY_TMP: /tmp/gxy | |
PREFIX: prod | |
runs-on: ubuntu-latest | |
permissions: | |
contents: 'read' | |
id-token: 'write' | |
outputs: | |
prefix: ${{ steps.prefix.outputs.prefix }} | |
steps: | |
- name: Set prefix with date | |
id: prefix | |
run: echo "prefix=$(echo $PREFIX-$(date +'%y-%m-%d-%H-%M'))" >> $GITHUB_OUTPUT | |
- uses: 'actions/checkout@v3' | |
- id: 'Auth' | |
uses: 'google-github-actions/auth@v0' | |
with: | |
workload_identity_provider: 'projects/526897014808/locations/global/workloadIdentityPools/galaxy-tests-identity-pool/providers/gxy-tests-provider' | |
service_account: 'galaxy-tests-repo-actions-sa@anvil-and-terra-development.iam.gserviceaccount.com' | |
- name: 'Set up Cloud SDK' | |
uses: 'google-github-actions/setup-gcloud@v1' | |
- name: gcloud version | |
run: gcloud --version | |
- name: Create GKE cluster | |
run: gcloud container clusters create "${{steps.prefix.outputs.prefix}}" --cluster-version="$GKE_VERSION" --disk-size=100 --num-nodes=1 --machine-type=e2-custom-20-81920 --zone "$GKE_ZONE" | |
testgalaxy1: | |
needs: deploygke | |
env: | |
GKE_ZONE: us-east1-b | |
GXY_TMP: /tmp/gxy | |
GKM_VERSION: "2.8.1" | |
USE_GKE_GCLOUD_AUTH_PLUGIN: True | |
runs-on: ubuntu-latest | |
permissions: | |
contents: 'read' | |
id-token: 'write' | |
steps: | |
- uses: actions/checkout@v3 | |
- id: 'Auth' | |
uses: 'google-github-actions/auth@v0' | |
with: | |
workload_identity_provider: 'projects/526897014808/locations/global/workloadIdentityPools/galaxy-tests-identity-pool/providers/gxy-tests-provider' | |
service_account: 'galaxy-tests-repo-actions-sa@anvil-and-terra-development.iam.gserviceaccount.com' | |
- name: 'Set up Cloud SDK' | |
uses: 'google-github-actions/setup-gcloud@v1' | |
- name: Get cluster kubeconfig | |
run: gcloud container clusters get-credentials "${{needs.deploygke.outputs.prefix}}" --zone "$GKE_ZONE" --project ${{ secrets.GCP_PROJECT_ID }} | |
- name: Create two GCP Disks | |
run: gcloud compute disks create "${{needs.deploygke.outputs.prefix}}-1-postgres-pd" --size 10Gi --zone "$GKE_ZONE" && gcloud compute disks create "${{needs.deploygke.outputs.prefix}}-1-nfs-pd" --size 250Gi --zone "$GKE_ZONE" | |
- name: Setup gcloud auth plugin for kubectl and fetch creds | |
run: | | |
gcloud components install gke-gcloud-auth-plugin | |
gcloud container clusters get-credentials "${{needs.deploygke.outputs.prefix}}" --zone "$GKE_ZONE" | |
- name: Install Kubectl | |
run: curl -LO "https://storage.googleapis.com/kubernetes-release/release/$(curl -s https://storage.googleapis.com/kubernetes-release/release/stable.txt)/bin/linux/amd64/kubectl" && chmod +x ./kubectl && sudo mv ./kubectl /usr/local/bin/kubectl && kubectl version | |
- name: Install Helm | |
run: curl https://raw.githubusercontent.com/helm/helm/master/scripts/get-helm-3 | bash | |
- name: Add anvil repository | |
run: helm repo add anvil https://github.com/cloudve/helm-charts/raw/anvil | |
- name: Deploy GalaxyKubeMan | |
continue-on-error: true | |
run: > | |
(time sh -c "helm install --create-namespace -n ${{needs.deploygke.outputs.prefix}}-1 ${{needs.deploygke.outputs.prefix}}-1 anvil/galaxykubeman\ | |
--wait\ | |
--wait-for-jobs\ | |
--timeout 720s\ | |
--version $GKM_VERSION\ | |
--set galaxy.service.type=LoadBalancer\ | |
--set galaxy.configs.\"galaxy\.yml\".galaxy.admin_users=\"tests@fake.org\"\ | |
--set galaxy.configs.\"galaxy\.yml\".galaxy.master_api_key=galaxypassword\ | |
--set galaxy.configs.\"galaxy\.yml\".galaxy.single_user=\"tests@fake.org\"\ | |
--set galaxy.configs.\"job_conf\.yml\".runners.k8s.k8s_node_selector=\"cloud.google.com/gke-nodepool: default-pool\"\ | |
--set 'galaxy.configs.job_conf\.yml.execution.environments.tpv_dispatcher.tpv_config_files={https://raw.githubusercontent.com/galaxyproject/tpv-shared-database/main/tools.yml,lib/galaxy/jobs/rules/tpv_rules_local.yml,https://gist.githubusercontent.com/afgane/68d1dbbe0af2468ba347dc74b6d3f7fa/raw/20edda50161bdcb74ff38935e7f76d79bfdaf303/tvp_rules_tests.yml}'\ | |
--set galaxy.nodeSelector.\"cloud\.google\.com\/gke-nodepool\"=\"default-pool\"\ | |
--set galaxy.persistence.storageClass=\"nfs-${{needs.deploygke.outputs.prefix}}-1\"\ | |
--set galaxy.postgresql.galaxyDatabasePassword=${{secrets.COMMON_PASSWORD}}\ | |
--set galaxy.postgresql.master.nodeSelector.\"cloud\.google\.com\/gke-nodepool\"=\"default-pool\"\ | |
--set galaxy.postgresql.persistence.existingClaim=\"${{needs.deploygke.outputs.prefix}}-1-postgres-disk-pvc\"\ | |
--set galaxy.rabbitmq.nodeSelector.\"cloud\.google\.com\/gke-nodepool\"=\"default-pool\"\ | |
--set galaxy.rabbitmq.persistence.storageClassName=\"nfs-${{needs.deploygke.outputs.prefix}}-1\"\ | |
--set galaxy.ingress.ingressClassName=\"\"\ | |
--set galaxy.tusd.ingress.ingressClassName=\"\"\ | |
--set cvmfs.cvmfscsi.cache.alien.pvc.storageClass=\"nfs-${{needs.deploygke.outputs.prefix}}-1\"\ | |
--set nfs.nodeSelector.\"cloud\.google\.com\/gke-nodepool\"=\"default-pool\"\ | |
--set nfs.persistence.existingClaim=\"${{needs.deploygke.outputs.prefix}}-1-nfs-disk-pvc\"\ | |
--set nfs.storageClass.name=\"nfs-${{needs.deploygke.outputs.prefix}}-1\"\ | |
--set persistence.nfs.name=\"${{needs.deploygke.outputs.prefix}}-1-nfs-disk\"\ | |
--set persistence.nfs.persistentVolume.extraSpec.gcePersistentDisk.pdName=\"${{needs.deploygke.outputs.prefix}}-1-nfs-pd\"\ | |
--set persistence.postgres.name=\"${{needs.deploygke.outputs.prefix}}-1-postgres-disk\"\ | |
--set persistence.postgres.persistentVolume.extraSpec.gcePersistentDisk.pdName=\"${{needs.deploygke.outputs.prefix}}-1-postgres-pd\"") 1> "$GXY_TMP" 2>> "$GXY_TMP" | |
- name: Print output | |
run: cat "$GXY_TMP" | |
- name: Helm get values | |
run: helm get values -n ${{needs.deploygke.outputs.prefix}}-1 ${{needs.deploygke.outputs.prefix}}-1 | |
- name: Get pods, pvc, and pv, sc | |
run: kubectl get pods -n ${{needs.deploygke.outputs.prefix}}-1; kubectl get pvc -n ${{needs.deploygke.outputs.prefix}}-1; kubectl get pv -n ${{needs.deploygke.outputs.prefix}}-1; kubectl get sc -n ${{needs.deploygke.outputs.prefix}}-1 | |
- name: Get events | |
run: kubectl get events -n ${{needs.deploygke.outputs.prefix}}-1 | |
- uses: actions/setup-python@v4 | |
with: | |
python-version: '3.11' | |
- name: Install required system packages | |
run: sudo apt-get update && sudo apt-get install -y python3-h5py pkg-config libhdf5-dev | |
- name: Install dependencies | |
run: python -m pip install planemo ephemeris pysam "galaxy-tool-util>=21.1.0.dev6" bioblend Jinja2 matplotlib gxabm | |
- uses: actions/checkout@v3 | |
with: | |
persist-credentials: false | |
- name: Add report from this run | |
run: bash ./.github/scripts/report_deployment.sh "$GXY_TMP" anvil-production ${{ secrets.GIT_TOKEN }} | |
- name: Get Galaxy IP and port | |
id: galaxy | |
run: echo "url=$(echo \"http://$(kubectl get svc -n ${{needs.deploygke.outputs.prefix}}-1 ${{needs.deploygke.outputs.prefix}}-1-galaxy-nginx -o jsonpath='{.status.loadBalancer.ingress[0].ip}' | sed -e 's/\"//g'):$(kubectl get svc -n ${{needs.deploygke.outputs.prefix}}-1 ${{needs.deploygke.outputs.prefix}}-1-galaxy-nginx -o jsonpath='{.spec.ports[0].port}')$(kubectl get ingress -n ${{needs.deploygke.outputs.prefix}}-1 ${{needs.deploygke.outputs.prefix}}-1-galaxy -o jsonpath='{.spec.rules[0].http.paths[0].path}')/\")" >> $GITHUB_OUTPUT | |
- name: Create remote single user | |
run: curl ${{steps.galaxy.outputs.url}} | |
- name: Create reports dir for this date | |
id: reports | |
run: | | |
DIR=reports/anvil-production/tool-tests/${{needs.deploygke.outputs.prefix}}-1 | |
mkdir -p $DIR | |
echo "dir=$DIR" >> $GITHUB_OUTPUT | |
# - name: Get tool list | |
# run: cp production/anvil/tools.yaml reports/anvil-production/tools.yaml | |
- name: Create and get API key | |
run: echo "key=$(python .github/scripts/create_api_key.py ${{steps.galaxy.outputs.url}} galaxypassword)" >> $GITHUB_OUTPUT | |
id: api | |
- name: Configure ABM | |
id: key | |
run: | | |
abm version | |
cp -R .abm ~/.abm | |
abm config url galaxy ${{ steps.galaxy.outputs.url }} | |
echo "Key is ${{ steps.api.outputs.key }}" | |
abm config key galaxy ${{ steps.api.outputs.key }} | |
abm config show galaxy | |
echo "key=$key" >> $GITHUB_OUTPUT | |
- name: Split tool tests (workflow_dispatch) | |
if: ${{ github.event_name == 'workflow_dispatch' }} | |
run: python .github/scripts/get_chunk.py --chunk ${{ github.event.inputs.chunk }} -o ${{ steps.reports.outputs.dir }} reports/anvil-production/tools.yaml | |
- name: Split tool tests (cron job) | |
if: ${{ github.event_name != 'workflow_dispatch' }} | |
run: python .github/scripts/get_chunk.py reports/anvil-production/tools.yaml -o ${{ steps.reports.outputs.dir }} | |
- name: Generate a history name | |
id: history | |
run: echo "history_name=anvil-test-$(date '+%F-%H-%M')" >> $GITHUB_OUTPUT | |
- name: Run tool tests | |
continue-on-error: true | |
run: sleep 30 && .github/scripts/run-galaxy-tests.sh ${{ steps.history.outputs.history_name }} ${{ steps.reports.outputs.dir }} | |
# - name: Write job results | |
# run: abm galaxy jobs list --history ${{ steps.history.outputs.history_name }} > ${{ steps.reports.outputs.dir }}/jobs.txt | |
- name: Generate Jobs reports | |
continue-on-error: true | |
shell: bash | |
run: | | |
abm galaxy jobs list --history ${{ steps.history.outputs.history_name }} > ${{ steps.reports.outputs.dir }}/jobs.txt | |
echo "Saving jobs that had errors." | |
abm galaxy jobs list --state error | |
for job in $(abm galaxy jobs list --state error | awk '{print $1}') ; do | |
if [[ ! -e ${{ steps.reports.outputs.dir }}/errors ]] ; then | |
mkdir -p ${{ steps.reports.outputs.dir }}/errors | |
fi | |
abm galaxy jobs show $job > ${{ steps.reports.outputs.dir }}/errors/$job.json | |
done | |
echo "Looking for paused jobs" | |
abm galaxy jobs list --state paused | |
for job in $(abm galaxy jobs list --state paused | awk '{print $1}') ; do | |
echo "Job $job was paused" | |
if [[ ! -e ${{ steps.reports.outputs.dir }}/paused ]] ; then | |
mkdir -p ${{ steps.reports.outputs.dir }}/paused | |
fi | |
abm galaxy jobs show $job > ${{ steps.reports.outputs.dir }}/paused/$job.json | |
done | |
abm galaxy history summarize ${{ steps.history.outputs.history_name }} > ${{ steps.reports.outputs.dir }}/job-metrics.csv | |
- name: Save job handler logs and node information | |
continue-on-error: true | |
shell: bash | |
run: | | |
pod=$(kubectl get pods -n ${{needs.deploygke.outputs.prefix}}-1 | grep galaxy-job | awk '{print $1}') | |
kubectl logs -n ${{needs.deploygke.outputs.prefix}}-1 $pod > ${{ steps.reports.outputs.dir }}/job.log | |
kubectl describe nodes > ${{ steps.reports.outputs.dir }}/node-info.txt | |
- name: Create HTML report | |
run: planemo test_reports ${{ steps.reports.outputs.dir }}/results.json --test_output_xunit ${{ steps.reports.outputs.dir }}/results.xunit --test_output ${{ steps.reports.outputs.dir }}/results.html | |
- name: Update Readme | |
run: python .github/scripts/update_readme.py ${{ steps.reports.outputs.dir }}/chunk.json reports/anvil-production/tool-tests/chunks.json reports/anvil-production/README.md | |
- name: Add updated README from this run | |
run: bash ./.github/scripts/report_tests.sh ${{ secrets.GIT_TOKEN }} "Updating anvil-production README for ${{needs.deploygke.outputs.prefix}}-1" reports/anvil-production | |
cleanup: | |
if: always() | |
needs: [deploygke,testgalaxy1] | |
env: | |
GKE_ZONE: us-east1-b | |
runs-on: ubuntu-latest | |
permissions: | |
contents: 'read' | |
id-token: 'write' | |
steps: | |
- uses: 'actions/checkout@v3' | |
- id: 'Auth' | |
uses: 'google-github-actions/auth@v0' | |
with: | |
workload_identity_provider: 'projects/526897014808/locations/global/workloadIdentityPools/galaxy-tests-identity-pool/providers/gxy-tests-provider' | |
service_account: 'galaxy-tests-repo-actions-sa@anvil-and-terra-development.iam.gserviceaccount.com' | |
- name: 'Set up Cloud SDK' | |
uses: 'google-github-actions/setup-gcloud@v1' | |
- name: Delete the GKE cluster | |
continue-on-error: true | |
run: gcloud container clusters delete "${{needs.deploygke.outputs.prefix}}" --zone "$GKE_ZONE" --quiet | |
- name: Delete the two GCP Disks for first Galaxy | |
run: gcloud compute disks delete "${{needs.deploygke.outputs.prefix}}-1-postgres-pd" --zone "$GKE_ZONE" --quiet && gcloud compute disks delete "${{needs.deploygke.outputs.prefix}}-1-nfs-pd" --zone "$GKE_ZONE" --quiet |