Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 2 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -75,9 +75,7 @@ ignore = [
"ANN401", # Dynamically typed expressions (typing.Any) are disallowed
"ANN204", # Missing type annotation for special method `__init__`
"COM812", # Trailing comma prohibited
"E731", # Do not assign a lambda expression, use a def
"G004", # Logging statement uses f-string
"PLW0603", # Using the global statement to update `<VAR>` is discouraged
"Q000", # Single quotes found but double quotes preferred
"Q003", # Single quotes found but double quotes preferred
"PLR0912", # Too many branches (> 12)
Expand All @@ -104,8 +102,8 @@ commit_args = ""

[[tool.bumpversion.files]]
filename = "pyproject.toml"
search = "version='{current_version}'"
replace = "version='{new_version}'"
search = 'version="{current_version}"'
replace = 'version="{new_version}"'

[[tool.bumpversion.files]]
filename = "Dockerfile"
Expand Down
22 changes: 22 additions & 0 deletions src/cpg_flow_mito/jobs/annotations_update.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from cpg_utils import Path, config, hail_batch


def download_latest_annotations(output_path: Path, job_attrs: dict[str, str]):
"""Trigger the MitoMap download, save to GCP."""

batch_instance = hail_batch.get_batch()
job = batch_instance.new_bash_job('Monthly annotation update', job_attrs | {'tool': 'mitoreport'})
job.image(config.config_retrieve(['images', 'mitoreport']))

# noted that this succeeds locally, but may be fragile. Perhaps a retry wrap makes sense.
job.command(f"""
n=0
until [ "$n" -ge 5 ]
do
java -jar mitoreport.jar mito-map-download --output {job.output} && break
n=$((n+1))
sleep 20
done
""")
batch_instance.write_output(job.output, output_path)
return job
4 changes: 3 additions & 1 deletion src/cpg_flow_mito/jobs/mito.py
Original file line number Diff line number Diff line change
Expand Up @@ -703,6 +703,7 @@ def mitoreport(
vcf_path: Path,
cram_path: Path,
output_path: Path,
annotations: Path,
job_attrs: dict,
) -> Job:
"""
Expand All @@ -716,6 +717,7 @@ def mitoreport(
res = resources.STANDARD.request_resources(ncpu=2)
res.set_to_job(j)

localised_annotations = batch_instance.read_input(annotations)
vcf = batch_instance.read_input_group(**{'vcf.gz': str(vcf_path)})
cram = batch_instance.read_input_group(
**{
Expand All @@ -732,7 +734,7 @@ def mitoreport(

java -jar mitoreport.jar mito-report \
-sample {sequencing_group.id} \
-mann resources/mito_map_annotations.json \
-mann {localised_annotations} \
-gnomad resources/gnomad.genomes.v3.1.sites.chrM.vcf.bgz \
-vcf {vcf['vcf.gz']} \
{sequencing_group.id}.bam ./resources/controls/*.bam
Expand Down
45 changes: 42 additions & 3 deletions src/cpg_flow_mito/stages.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,45 @@
https://github.com/broadinstitute/gatk/blob/master/scripts/mitochondria_m2_wdl/MitochondriaPipeline.wdl
"""

import zoneinfo
from datetime import datetime
from functools import cache

from cpg_flow import stage, targets, workflow
from cpg_utils import Path, config, hail_batch
from cpg_flow.stage import StageInput, StageOutput
from cpg_flow.targets import MultiCohort
from cpg_utils import Path, config, hail_batch, to_path

from cpg_flow_mito.jobs import annotations_update, bcftools, mito, picard, vep


from cpg_flow_mito.jobs import bcftools, mito, picard, vep
@cache
def get_path_to_mito_ref_data():
"""Build a path to the expected MitoMap annotations."""
tz = zoneinfo.ZoneInfo('Australia/Brisbane')
this_month_as_string = datetime.now(tz=tz).strftime('%Y-%m')
common_default = config.config_retrieve(['storage', 'common', 'default'])
return to_path(common_default) / 'mitoreport_ref' / this_month_as_string / 'mito_map_annotations.json'


@stage.stage
class DownloadMitoMapData(stage.MultiCohortStage):
"""A once-monthly download of the data required in Mitomap."""

def expected_outputs(self, _multicohort: MultiCohort) -> dict[str, Path]:
return {'annotations': get_path_to_mito_ref_data()}

def queue_jobs(
self,
multicohort: MultiCohort,
_inputs: StageInput,
) -> StageOutput:
output = self.expected_outputs(multicohort)
job = annotations_update.download_latest_annotations(
output['annotations'],
job_attrs=self.get_job_attrs(multicohort),
)
return self.make_outputs(multicohort, output, jobs=job)


@stage.stage(
Expand Down Expand Up @@ -339,7 +374,7 @@ def queue_jobs(


@stage.stage(
required_stages=[RealignMito, GenotypeMito],
required_stages=[DownloadMitoMapData, RealignMito, GenotypeMito],
analysis_type='web',
analysis_keys=['mitoreport'],
)
Expand Down Expand Up @@ -370,6 +405,9 @@ def queue_jobs(
) -> stage.StageOutput:
outputs = self.expected_outputs(sequencing_group)

multicohort = workflow.get_multicohort()
mitomap_annotations = inputs.as_path(multicohort, DownloadMitoMapData, 'annotations')

jobs = []

vep_j = vep.vep_one(
Expand All @@ -385,6 +423,7 @@ def queue_jobs(
vcf_path=outputs['vep_vcf'],
cram_path=inputs.as_path(sequencing_group, RealignMito, 'non_shifted_cram'),
output_path=outputs['mitoreport'],
annotations=mitomap_annotations,
job_attrs=self.get_job_attrs(sequencing_group),
)
if mitoreport_j:
Expand Down