From 2d92805dbe2e0cb6a330cced900dfa2da044765f Mon Sep 17 00:00:00 2001 From: MattWellie Date: Fri, 17 Oct 2025 10:11:02 +1000 Subject: [PATCH 1/6] annotate_cohort MT lives in long term storage --- src/cpg_seqr_loader/stages.py | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/src/cpg_seqr_loader/stages.py b/src/cpg_seqr_loader/stages.py index da9f9d1..fea0ec2 100644 --- a/src/cpg_seqr_loader/stages.py +++ b/src/cpg_seqr_loader/stages.py @@ -395,15 +395,9 @@ def expected_outputs(self, multicohort: targets.MultiCohort) -> Path: """ Expected to write a matrix table. """ - return self.tmp_prefix / 'annotate_cohort.mt' + return self.prefix / 'annotate_cohort.mt' def queue_jobs(self, multicohort: targets.MultiCohort, inputs: stage.StageInput) -> stage.StageOutput: - """ - - Args: - multicohort (): - inputs (): - """ outputs = self.expected_outputs(multicohort) vep_ht_path = inputs.as_str(target=multicohort, stage=AnnotateVcfsWithVep) From b7515ffbad806bdd68595e97e26571d2e50708d5 Mon Sep 17 00:00:00 2001 From: MattWellie Date: Fri, 17 Oct 2025 10:13:00 +1000 Subject: [PATCH 2/6] remove checking for existence of a temp dir (accidentally re-runs combining) --- src/cpg_seqr_loader/jobs/CombineGvcfsIntoVds.py | 6 ++++-- src/cpg_seqr_loader/stages.py | 4 ++-- 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/src/cpg_seqr_loader/jobs/CombineGvcfsIntoVds.py b/src/cpg_seqr_loader/jobs/CombineGvcfsIntoVds.py index 4a3767b..34d877b 100644 --- a/src/cpg_seqr_loader/jobs/CombineGvcfsIntoVds.py +++ b/src/cpg_seqr_loader/jobs/CombineGvcfsIntoVds.py @@ -3,7 +3,7 @@ import loguru from cpg_flow import targets from cpg_flow import utils as cpg_flow_utils -from cpg_utils import Path, config, hail_batch +from cpg_utils import Path, config, hail_batch, to_path from cpg_seqr_loader import utils @@ -15,13 +15,15 @@ def create_combiner_jobs( multicohort: targets.MultiCohort, output_vds: Path, combiner_plan: Path, - temp_dir: Path, + temp_dir_string: str, job_attrs: dict[str, str], ) -> 'BashJob | None': vds_path: str | None = None sg_ids_in_vds: set[str] = set() sgs_to_remove: list[str] = [] + temp_dir = to_path(temp_dir_string) + # check for a VDS by ID - this is not the typical RD process if vds_id := config.config_retrieve(['workflow', 'use_specific_vds'], None): vds_result_or_none = utils.query_for_specific_vds(vds_id) diff --git a/src/cpg_seqr_loader/stages.py b/src/cpg_seqr_loader/stages.py index fea0ec2..d694ac4 100644 --- a/src/cpg_seqr_loader/stages.py +++ b/src/cpg_seqr_loader/stages.py @@ -35,7 +35,7 @@ class CombineGvcfsIntoVds(stage.MultiCohortStage): def expected_outputs(self, multicohort: targets.MultiCohort) -> dict[str, Path | str]: return { 'vds': self.prefix / f'{multicohort.name}.vds', - 'tmp': self.tmp_prefix / 'temp_dir', + 'tmp': str(self.tmp_prefix / 'temp_dir'), } def queue_jobs(self, multicohort: targets.MultiCohort, inputs: stage.StageInput) -> stage.StageOutput: @@ -45,7 +45,7 @@ def queue_jobs(self, multicohort: targets.MultiCohort, inputs: stage.StageInput) multicohort=multicohort, output_vds=outputs['vds'], combiner_plan=self.tmp_prefix / 'combiner_plan.json', - temp_dir=outputs['tmp'], + temp_dir_string=outputs['tmp'], job_attrs=self.get_job_attrs(multicohort), ) return self.make_outputs(multicohort, data=outputs, jobs=job) From 4c95c175c85c28bc76ab82c519e8a0bf1fc20bf5 Mon Sep 17 00:00:00 2001 From: MattWellie Date: Fri, 17 Oct 2025 10:13:07 +1000 Subject: [PATCH 3/6] =?UTF-8?q?Bump=20version:=200.1.7=20=E2=86=92=200.1.8?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/docker.yaml | 2 +- README.md | 4 ++-- pyproject.toml | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml index 87e9861..accc503 100644 --- a/.github/workflows/docker.yaml +++ b/.github/workflows/docker.yaml @@ -15,7 +15,7 @@ on: permissions: {} env: - VERSION: 0.1.7 + VERSION: 0.1.8 IMAGE_NAME: cpg-flow-seqr-loader DOCKER_DEV: australia-southeast1-docker.pkg.dev/cpg-common/images-dev DOCKER_MAIN: australia-southeast1-docker.pkg.dev/cpg-common/images diff --git a/README.md b/README.md index 604739d..d272abf 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ CPG-Flow workflows are operated entirely by defining input Cohorts (see [here](h ```bash analysis-runner \ --skip-repo-checkout \ - --image australia-southeast1-docker.pkg.dev/cpg-common/images/cpg-flow-seqr-loader:0.1.7 \ + --image australia-southeast1-docker.pkg.dev/cpg-common/images/cpg-flow-seqr-loader:0.1.8 \ --config src/cpg_seqr_loader/config_template.toml \ --config cohorts.toml \ # containing the inputs_cohorts and sequencing_type --dataset seqr \ @@ -70,7 +70,7 @@ analysis-runner \ ```bash analysis-runner \ --skip-repo-checkout \ - --image australia-southeast1-docker.pkg.dev/cpg-common/images/cpg-flow-seqr-loader:0.1.7 \ + --image australia-southeast1-docker.pkg.dev/cpg-common/images/cpg-flow-seqr-loader:0.1.8 \ --config src/cpg_seqr_loader/config_template.toml \ --config cohorts.toml \ # containing the inputs_cohorts and sequencing_type --dataset seqr \ diff --git a/pyproject.toml b/pyproject.toml index faf2fbd..02f2517 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ description='Seqr-Loader (gVCF-combiner) implemented in CPG-Flow' readme = "README.md" # currently cpg-flow is pinned to this version requires-python = ">=3.10,<3.11" -version="0.1.7" +version="0.1.8" license={"file" = "LICENSE"} classifiers=[ 'Environment :: Console', @@ -122,7 +122,7 @@ hail = ["hail"] "src/cpg_seqr_loader/scripts/annotate_cohort.py" = ["E501"] [tool.bumpversion] -current_version = "0.1.7" +current_version = "0.1.8" parse = "(?P\\d+)\\.(?P\\d+)\\.(?P\\d+)" serialize = ["{major}.{minor}.{patch}"] commit = true From 4f1a53a29b89396cd2fcde55e60f6c01a3fc4489 Mon Sep 17 00:00:00 2001 From: MattWellie Date: Fri, 17 Oct 2025 10:14:49 +1000 Subject: [PATCH 4/6] adjust to latest cpg-flow --- pyproject.toml | 3 +-- src/cpg_seqr_loader/first_workflow.py | 2 +- src/cpg_seqr_loader/full_workflow.py | 2 +- 3 files changed, 3 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 02f2517..9caca5e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -21,10 +21,9 @@ classifiers=[ ] dependencies=[ - 'cpg-flow', + 'cpg-flow>=1', 'elasticsearch==8.*', 'hatchling', - 'loguru', ] [project.urls] diff --git a/src/cpg_seqr_loader/first_workflow.py b/src/cpg_seqr_loader/first_workflow.py index 39dee54..70eeeb1 100755 --- a/src/cpg_seqr_loader/first_workflow.py +++ b/src/cpg_seqr_loader/first_workflow.py @@ -16,7 +16,7 @@ def cli_main(): parser.add_argument('--dry_run', action='store_true', help='Dry run') args = parser.parse_args() - workflow.run_workflow(stages=[DeleteCombinerTemp, CreateDenseMtFromVdsWithHail], dry_run=args.dry_run) + workflow.run_workflow(name='seqr_loader', stages=[DeleteCombinerTemp, CreateDenseMtFromVdsWithHail], dry_run=args.dry_run) if __name__ == '__main__': diff --git a/src/cpg_seqr_loader/full_workflow.py b/src/cpg_seqr_loader/full_workflow.py index 626890f..bcf3342 100755 --- a/src/cpg_seqr_loader/full_workflow.py +++ b/src/cpg_seqr_loader/full_workflow.py @@ -16,7 +16,7 @@ def cli_main(): parser.add_argument('--dry_run', action='store_true', help='Dry run') args = parser.parse_args() - workflow.run_workflow(stages=[ExportMtAsEsIndex, AnnotatedDatasetMtToVcf], dry_run=args.dry_run) + workflow.run_workflow(name='seqr_loader', stages=[ExportMtAsEsIndex, AnnotatedDatasetMtToVcf], dry_run=args.dry_run) if __name__ == '__main__': From 883ca051beec97405a493ec77ea6e85c0efe53a4 Mon Sep 17 00:00:00 2001 From: MattWellie Date: Fri, 17 Oct 2025 10:18:13 +1000 Subject: [PATCH 5/6] lint! --- src/cpg_seqr_loader/first_workflow.py | 6 +++++- src/cpg_seqr_loader/full_workflow.py | 6 +++++- src/cpg_seqr_loader/stages.py | 1 - 3 files changed, 10 insertions(+), 3 deletions(-) diff --git a/src/cpg_seqr_loader/first_workflow.py b/src/cpg_seqr_loader/first_workflow.py index 70eeeb1..b402a71 100755 --- a/src/cpg_seqr_loader/first_workflow.py +++ b/src/cpg_seqr_loader/first_workflow.py @@ -16,7 +16,11 @@ def cli_main(): parser.add_argument('--dry_run', action='store_true', help='Dry run') args = parser.parse_args() - workflow.run_workflow(name='seqr_loader', stages=[DeleteCombinerTemp, CreateDenseMtFromVdsWithHail], dry_run=args.dry_run) + workflow.run_workflow( + name='seqr_loader', + stages=[DeleteCombinerTemp, CreateDenseMtFromVdsWithHail], + dry_run=args.dry_run, + ) if __name__ == '__main__': diff --git a/src/cpg_seqr_loader/full_workflow.py b/src/cpg_seqr_loader/full_workflow.py index bcf3342..693804e 100755 --- a/src/cpg_seqr_loader/full_workflow.py +++ b/src/cpg_seqr_loader/full_workflow.py @@ -16,7 +16,11 @@ def cli_main(): parser.add_argument('--dry_run', action='store_true', help='Dry run') args = parser.parse_args() - workflow.run_workflow(name='seqr_loader', stages=[ExportMtAsEsIndex, AnnotatedDatasetMtToVcf], dry_run=args.dry_run) + workflow.run_workflow( + name='seqr_loader', + stages=[ExportMtAsEsIndex, AnnotatedDatasetMtToVcf], + dry_run=args.dry_run, + ) if __name__ == '__main__': diff --git a/src/cpg_seqr_loader/stages.py b/src/cpg_seqr_loader/stages.py index d694ac4..c97be48 100644 --- a/src/cpg_seqr_loader/stages.py +++ b/src/cpg_seqr_loader/stages.py @@ -398,7 +398,6 @@ def expected_outputs(self, multicohort: targets.MultiCohort) -> Path: return self.prefix / 'annotate_cohort.mt' def queue_jobs(self, multicohort: targets.MultiCohort, inputs: stage.StageInput) -> stage.StageOutput: - outputs = self.expected_outputs(multicohort) vep_ht_path = inputs.as_str(target=multicohort, stage=AnnotateVcfsWithVep) vqsr_vcf = inputs.as_str(target=multicohort, stage=RunIndelVqsr) From 76ae1f692af20540b1b06897d24f84aef375ab95 Mon Sep 17 00:00:00 2001 From: MattWellie Date: Fri, 17 Oct 2025 10:20:16 +1000 Subject: [PATCH 6/6] hatchling out of dependencies --- pyproject.toml | 1 - 1 file changed, 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 9caca5e..f192bc3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,6 @@ classifiers=[ dependencies=[ 'cpg-flow>=1', 'elasticsearch==8.*', - 'hatchling', ] [project.urls]