diff --git a/Dockerfile b/Dockerfile index 0b2492c..d9b0710 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,7 +1,7 @@ FROM australia-southeast1-docker.pkg.dev/cpg-common/images/cpg_hail_gcloud:0.2.137.cpg1-2 ENV PYTHONDONTWRITEBYTECODE=1 -ENV VERSION=0.1.16 +ENV VERSION=0.1.17 WORKDIR /cpg_seqr_loader diff --git a/README.md b/README.md index 5d71eb4..7d84af6 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ CPG-Flow workflows are operated entirely by defining input Cohorts (see [here](h ```bash analysis-runner \ --skip-repo-checkout \ - --image australia-southeast1-docker.pkg.dev/cpg-common/images/cpg-flow-seqr-loader:0.1.16 \ + --image australia-southeast1-docker.pkg.dev/cpg-common/images/cpg-flow-seqr-loader:0.1.17 \ --config src/cpg_seqr_loader/config_template.toml \ --config cohorts.toml \ # containing the inputs_cohorts and sequencing_type --dataset seqr \ @@ -70,7 +70,7 @@ analysis-runner \ ```bash analysis-runner \ --skip-repo-checkout \ - --image australia-southeast1-docker.pkg.dev/cpg-common/images/cpg-flow-seqr-loader:0.1.16 \ + --image australia-southeast1-docker.pkg.dev/cpg-common/images/cpg-flow-seqr-loader:0.1.17 \ --config src/cpg_seqr_loader/config_template.toml \ --config cohorts.toml \ # containing the inputs_cohorts and sequencing_type --dataset seqr \ diff --git a/pyproject.toml b/pyproject.toml index 5e10e13..94c34a8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ description='Seqr-Loader (gVCF-combiner) implemented in CPG-Flow' readme = "README.md" # currently cpg-flow is pinned to this version requires-python = ">=3.10,<3.12" -version="0.1.16" +version="0.1.17" license={"file" = "LICENSE"} classifiers=[ 'Environment :: Console', @@ -120,7 +120,7 @@ hail = ["hail"] "src/cpg_seqr_loader/scripts/annotate_cohort.py" = ["E501"] [tool.bumpversion] -current_version = "0.1.16" +current_version = "0.1.17" parse = "(?P\\d+)\\.(?P\\d+)\\.(?P\\d+)" serialize = ["{major}.{minor}.{patch}"] commit = true diff --git a/src/cpg_seqr_loader/config_template.toml b/src/cpg_seqr_loader/config_template.toml index 6fea6d2..14c7677 100644 --- a/src/cpg_seqr_loader/config_template.toml +++ b/src/cpg_seqr_loader/config_template.toml @@ -24,7 +24,7 @@ driver_memory = "highmem" # string, e.g. "4Gi" driver_storage = "10Gi" # integer -driver_cores = 2 +driver_cores = 8 # highem, standard, or a string, e.g. "4Gi" worker_memory = "highmem" worker_cores = 1 @@ -49,7 +49,7 @@ worker_memory = "highmem" # highem, standard, or a string, e.g. "4Gi" driver_memory = "highmem" # integer -driver_cores = 2 +driver_cores = 4 [vcf_from_mt] # highem, standard, or a string, e.g. "4Gi" @@ -84,7 +84,6 @@ liftover_38_to_37 = "gs://cpg-common-main/references/liftover/grch38_to_grch37.o seqr_clinvar = "gs://cpg-common-main/references/seqr/v0/clinvar.GRCh38.ht" seqr_combined_reference_data = "gs://cpg-common-main/references/seqr/v0/combined_reference_data_grch38.ht" vep_mount = "gs://cpg-common-main/references/vep/110/mount" - # these are all related to VQSR axiom_poly_vcf = "gs://cpg-common-main/references/hg38/v0/Axiom_Exome_Plus.genotypes.all_populations.poly.hg38.vcf.gz" axiom_poly_vcf_index = "gs://cpg-common-main/references/hg38/v0/Axiom_Exome_Plus.genotypes.all_populations.poly.hg38.vcf.gz.tbi" diff --git a/src/cpg_seqr_loader/scripts/annotate_cohort.py b/src/cpg_seqr_loader/scripts/annotate_cohort.py index 4e0662e..2dfd58a 100644 --- a/src/cpg_seqr_loader/scripts/annotate_cohort.py +++ b/src/cpg_seqr_loader/scripts/annotate_cohort.py @@ -238,6 +238,11 @@ def annotate_cohort( clinvar_data=clinvar_ht[mt.row_key], ref_data=ref_ht[mt.row_key], ) + if avi_table := config.config_retrieve(['references', 'avi_table'], None): + refavis_ht = hl.read_table(avi_table) + loguru.logger.info('Annotating with refavis data') + mt = mt.annotate_rows(avis=(refavis_ht[mt.row_key].normalised_avis,)) + mt.describe() # annotate all the gnomAD v4 fields in a separate function mt = annotate_gnomad4(mt)