diff --git a/.github/workflows/docker.yaml b/.github/workflows/docker.yaml index 87e9861..accc503 100644 --- a/.github/workflows/docker.yaml +++ b/.github/workflows/docker.yaml @@ -15,7 +15,7 @@ on: permissions: {} env: - VERSION: 0.1.7 + VERSION: 0.1.8 IMAGE_NAME: cpg-flow-seqr-loader DOCKER_DEV: australia-southeast1-docker.pkg.dev/cpg-common/images-dev DOCKER_MAIN: australia-southeast1-docker.pkg.dev/cpg-common/images diff --git a/README.md b/README.md index 604739d..d272abf 100644 --- a/README.md +++ b/README.md @@ -55,7 +55,7 @@ CPG-Flow workflows are operated entirely by defining input Cohorts (see [here](h ```bash analysis-runner \ --skip-repo-checkout \ - --image australia-southeast1-docker.pkg.dev/cpg-common/images/cpg-flow-seqr-loader:0.1.7 \ + --image australia-southeast1-docker.pkg.dev/cpg-common/images/cpg-flow-seqr-loader:0.1.8 \ --config src/cpg_seqr_loader/config_template.toml \ --config cohorts.toml \ # containing the inputs_cohorts and sequencing_type --dataset seqr \ @@ -70,7 +70,7 @@ analysis-runner \ ```bash analysis-runner \ --skip-repo-checkout \ - --image australia-southeast1-docker.pkg.dev/cpg-common/images/cpg-flow-seqr-loader:0.1.7 \ + --image australia-southeast1-docker.pkg.dev/cpg-common/images/cpg-flow-seqr-loader:0.1.8 \ --config src/cpg_seqr_loader/config_template.toml \ --config cohorts.toml \ # containing the inputs_cohorts and sequencing_type --dataset seqr \ diff --git a/pyproject.toml b/pyproject.toml index faf2fbd..f192bc3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,7 @@ description='Seqr-Loader (gVCF-combiner) implemented in CPG-Flow' readme = "README.md" # currently cpg-flow is pinned to this version requires-python = ">=3.10,<3.11" -version="0.1.7" +version="0.1.8" license={"file" = "LICENSE"} classifiers=[ 'Environment :: Console', @@ -21,10 +21,8 @@ classifiers=[ ] dependencies=[ - 'cpg-flow', + 'cpg-flow>=1', 'elasticsearch==8.*', - 'hatchling', - 'loguru', ] [project.urls] @@ -122,7 +120,7 @@ hail = ["hail"] "src/cpg_seqr_loader/scripts/annotate_cohort.py" = ["E501"] [tool.bumpversion] -current_version = "0.1.7" +current_version = "0.1.8" parse = "(?P\\d+)\\.(?P\\d+)\\.(?P\\d+)" serialize = ["{major}.{minor}.{patch}"] commit = true diff --git a/src/cpg_seqr_loader/first_workflow.py b/src/cpg_seqr_loader/first_workflow.py index 39dee54..b402a71 100755 --- a/src/cpg_seqr_loader/first_workflow.py +++ b/src/cpg_seqr_loader/first_workflow.py @@ -16,7 +16,11 @@ def cli_main(): parser.add_argument('--dry_run', action='store_true', help='Dry run') args = parser.parse_args() - workflow.run_workflow(stages=[DeleteCombinerTemp, CreateDenseMtFromVdsWithHail], dry_run=args.dry_run) + workflow.run_workflow( + name='seqr_loader', + stages=[DeleteCombinerTemp, CreateDenseMtFromVdsWithHail], + dry_run=args.dry_run, + ) if __name__ == '__main__': diff --git a/src/cpg_seqr_loader/full_workflow.py b/src/cpg_seqr_loader/full_workflow.py index 626890f..693804e 100755 --- a/src/cpg_seqr_loader/full_workflow.py +++ b/src/cpg_seqr_loader/full_workflow.py @@ -16,7 +16,11 @@ def cli_main(): parser.add_argument('--dry_run', action='store_true', help='Dry run') args = parser.parse_args() - workflow.run_workflow(stages=[ExportMtAsEsIndex, AnnotatedDatasetMtToVcf], dry_run=args.dry_run) + workflow.run_workflow( + name='seqr_loader', + stages=[ExportMtAsEsIndex, AnnotatedDatasetMtToVcf], + dry_run=args.dry_run, + ) if __name__ == '__main__': diff --git a/src/cpg_seqr_loader/jobs/CombineGvcfsIntoVds.py b/src/cpg_seqr_loader/jobs/CombineGvcfsIntoVds.py index 4a3767b..34d877b 100644 --- a/src/cpg_seqr_loader/jobs/CombineGvcfsIntoVds.py +++ b/src/cpg_seqr_loader/jobs/CombineGvcfsIntoVds.py @@ -3,7 +3,7 @@ import loguru from cpg_flow import targets from cpg_flow import utils as cpg_flow_utils -from cpg_utils import Path, config, hail_batch +from cpg_utils import Path, config, hail_batch, to_path from cpg_seqr_loader import utils @@ -15,13 +15,15 @@ def create_combiner_jobs( multicohort: targets.MultiCohort, output_vds: Path, combiner_plan: Path, - temp_dir: Path, + temp_dir_string: str, job_attrs: dict[str, str], ) -> 'BashJob | None': vds_path: str | None = None sg_ids_in_vds: set[str] = set() sgs_to_remove: list[str] = [] + temp_dir = to_path(temp_dir_string) + # check for a VDS by ID - this is not the typical RD process if vds_id := config.config_retrieve(['workflow', 'use_specific_vds'], None): vds_result_or_none = utils.query_for_specific_vds(vds_id) diff --git a/src/cpg_seqr_loader/stages.py b/src/cpg_seqr_loader/stages.py index da9f9d1..c97be48 100644 --- a/src/cpg_seqr_loader/stages.py +++ b/src/cpg_seqr_loader/stages.py @@ -35,7 +35,7 @@ class CombineGvcfsIntoVds(stage.MultiCohortStage): def expected_outputs(self, multicohort: targets.MultiCohort) -> dict[str, Path | str]: return { 'vds': self.prefix / f'{multicohort.name}.vds', - 'tmp': self.tmp_prefix / 'temp_dir', + 'tmp': str(self.tmp_prefix / 'temp_dir'), } def queue_jobs(self, multicohort: targets.MultiCohort, inputs: stage.StageInput) -> stage.StageOutput: @@ -45,7 +45,7 @@ def queue_jobs(self, multicohort: targets.MultiCohort, inputs: stage.StageInput) multicohort=multicohort, output_vds=outputs['vds'], combiner_plan=self.tmp_prefix / 'combiner_plan.json', - temp_dir=outputs['tmp'], + temp_dir_string=outputs['tmp'], job_attrs=self.get_job_attrs(multicohort), ) return self.make_outputs(multicohort, data=outputs, jobs=job) @@ -395,16 +395,9 @@ def expected_outputs(self, multicohort: targets.MultiCohort) -> Path: """ Expected to write a matrix table. """ - return self.tmp_prefix / 'annotate_cohort.mt' + return self.prefix / 'annotate_cohort.mt' def queue_jobs(self, multicohort: targets.MultiCohort, inputs: stage.StageInput) -> stage.StageOutput: - """ - - Args: - multicohort (): - inputs (): - """ - outputs = self.expected_outputs(multicohort) vep_ht_path = inputs.as_str(target=multicohort, stage=AnnotateVcfsWithVep) vqsr_vcf = inputs.as_str(target=multicohort, stage=RunIndelVqsr)