diff --git a/pyproject.toml b/pyproject.toml index dc96004..8ad05af 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -75,9 +75,7 @@ ignore = [ "ANN401", # Dynamically typed expressions (typing.Any) are disallowed "ANN204", # Missing type annotation for special method `__init__` "COM812", # Trailing comma prohibited - "E731", # Do not assign a lambda expression, use a def "G004", # Logging statement uses f-string - "PLW0603", # Using the global statement to update `` is discouraged "Q000", # Single quotes found but double quotes preferred "Q003", # Single quotes found but double quotes preferred "PLR0912", # Too many branches (> 12) @@ -104,8 +102,8 @@ commit_args = "" [[tool.bumpversion.files]] filename = "pyproject.toml" -search = "version='{current_version}'" -replace = "version='{new_version}'" +search = 'version="{current_version}"' +replace = 'version="{new_version}"' [[tool.bumpversion.files]] filename = "Dockerfile" diff --git a/src/cpg_flow_mito/jobs/annotations_update.py b/src/cpg_flow_mito/jobs/annotations_update.py new file mode 100644 index 0000000..8f85260 --- /dev/null +++ b/src/cpg_flow_mito/jobs/annotations_update.py @@ -0,0 +1,22 @@ +from cpg_utils import Path, config, hail_batch + + +def download_latest_annotations(output_path: Path, job_attrs: dict[str, str]): + """Trigger the MitoMap download, save to GCP.""" + + batch_instance = hail_batch.get_batch() + job = batch_instance.new_bash_job('Monthly annotation update', job_attrs | {'tool': 'mitoreport'}) + job.image(config.config_retrieve(['images', 'mitoreport'])) + + # noted that this succeeds locally, but may be fragile. Perhaps a retry wrap makes sense. + job.command(f""" + n=0 + until [ "$n" -ge 5 ] + do + java -jar mitoreport.jar mito-map-download --output {job.output} && break + n=$((n+1)) + sleep 20 + done + """) + batch_instance.write_output(job.output, output_path) + return job diff --git a/src/cpg_flow_mito/jobs/mito.py b/src/cpg_flow_mito/jobs/mito.py index 1f6f9a4..20529bc 100644 --- a/src/cpg_flow_mito/jobs/mito.py +++ b/src/cpg_flow_mito/jobs/mito.py @@ -703,6 +703,7 @@ def mitoreport( vcf_path: Path, cram_path: Path, output_path: Path, + annotations: Path, job_attrs: dict, ) -> Job: """ @@ -716,6 +717,7 @@ def mitoreport( res = resources.STANDARD.request_resources(ncpu=2) res.set_to_job(j) + localised_annotations = batch_instance.read_input(annotations) vcf = batch_instance.read_input_group(**{'vcf.gz': str(vcf_path)}) cram = batch_instance.read_input_group( **{ @@ -732,7 +734,7 @@ def mitoreport( java -jar mitoreport.jar mito-report \ -sample {sequencing_group.id} \ - -mann resources/mito_map_annotations.json \ + -mann {localised_annotations} \ -gnomad resources/gnomad.genomes.v3.1.sites.chrM.vcf.bgz \ -vcf {vcf['vcf.gz']} \ {sequencing_group.id}.bam ./resources/controls/*.bam diff --git a/src/cpg_flow_mito/stages.py b/src/cpg_flow_mito/stages.py index 8ea7f68..3c1fcb2 100644 --- a/src/cpg_flow_mito/stages.py +++ b/src/cpg_flow_mito/stages.py @@ -5,10 +5,45 @@ https://github.com/broadinstitute/gatk/blob/master/scripts/mitochondria_m2_wdl/MitochondriaPipeline.wdl """ +import zoneinfo +from datetime import datetime +from functools import cache + from cpg_flow import stage, targets, workflow -from cpg_utils import Path, config, hail_batch +from cpg_flow.stage import StageInput, StageOutput +from cpg_flow.targets import MultiCohort +from cpg_utils import Path, config, hail_batch, to_path + +from cpg_flow_mito.jobs import annotations_update, bcftools, mito, picard, vep + -from cpg_flow_mito.jobs import bcftools, mito, picard, vep +@cache +def get_path_to_mito_ref_data(): + """Build a path to the expected MitoMap annotations.""" + tz = zoneinfo.ZoneInfo('Australia/Brisbane') + this_month_as_string = datetime.now(tz=tz).strftime('%Y-%m') + common_default = config.config_retrieve(['storage', 'common', 'default']) + return to_path(common_default) / 'mitoreport_ref' / this_month_as_string / 'mito_map_annotations.json' + + +@stage.stage +class DownloadMitoMapData(stage.MultiCohortStage): + """A once-monthly download of the data required in Mitomap.""" + + def expected_outputs(self, _multicohort: MultiCohort) -> dict[str, Path]: + return {'annotations': get_path_to_mito_ref_data()} + + def queue_jobs( + self, + multicohort: MultiCohort, + _inputs: StageInput, + ) -> StageOutput: + output = self.expected_outputs(multicohort) + job = annotations_update.download_latest_annotations( + output['annotations'], + job_attrs=self.get_job_attrs(multicohort), + ) + return self.make_outputs(multicohort, output, jobs=job) @stage.stage( @@ -339,7 +374,7 @@ def queue_jobs( @stage.stage( - required_stages=[RealignMito, GenotypeMito], + required_stages=[DownloadMitoMapData, RealignMito, GenotypeMito], analysis_type='web', analysis_keys=['mitoreport'], ) @@ -370,6 +405,9 @@ def queue_jobs( ) -> stage.StageOutput: outputs = self.expected_outputs(sequencing_group) + multicohort = workflow.get_multicohort() + mitomap_annotations = inputs.as_path(multicohort, DownloadMitoMapData, 'annotations') + jobs = [] vep_j = vep.vep_one( @@ -385,6 +423,7 @@ def queue_jobs( vcf_path=outputs['vep_vcf'], cram_path=inputs.as_path(sequencing_group, RealignMito, 'non_shifted_cram'), output_path=outputs['mitoreport'], + annotations=mitomap_annotations, job_attrs=self.get_job_attrs(sequencing_group), ) if mitoreport_j: