diff --git a/.github/workflows/docker_build_publish_develop.yml b/.github/workflows/docker_build_publish_develop.yml index fde5a4faf..ed9982d64 100644 --- a/.github/workflows/docker_build_publish_develop.yml +++ b/.github/workflows/docker_build_publish_develop.yml @@ -15,7 +15,7 @@ jobs: strategy: fail-fast: true matrix: - container-name: [align_qc, annotate, ascatNgs, cadd, cnvkit, cnvpytor, coverage_qc, delly, gatk, htslib, multiqc, msisensorpro, purecn, somalier, varcall_py3, varcall_py27, vcf2cytosure] + container-name: [align_qc, annotate, ascatNgs, cadd, cnvkit, cnvpytor, coverage_qc, delly, d4tools, gatk, htslib, multiqc, msisensorpro, purecn, somalier, varcall_py3, varcall_py27, vcf2cytosure] steps: - name: Git checkout id: git_checkout diff --git a/.github/workflows/docker_build_publish_release.yml b/.github/workflows/docker_build_publish_release.yml index 0ad0eb024..35cd2179c 100644 --- a/.github/workflows/docker_build_publish_release.yml +++ b/.github/workflows/docker_build_publish_release.yml @@ -11,7 +11,7 @@ jobs: strategy: fail-fast: true matrix: - container-name: [align_qc, annotate, ascatNgs, cadd, cnvkit, cnvpytor, coverage_qc, delly, gatk, htslib, msisensorpro, multiqc, purecn, somalier, varcall_py3, varcall_py27, vcf2cytosure] + container-name: [align_qc, annotate, ascatNgs, cadd, cnvkit, cnvpytor, coverage_qc, delly, d4tools, gatk, htslib, msisensorpro, multiqc, purecn, somalier, varcall_py3, varcall_py27, vcf2cytosure] steps: - name: Git checkout id: git_checkout diff --git a/BALSAMIC/assets/scripts/cluster_job_status.py b/BALSAMIC/assets/scripts/cluster_job_status.py new file mode 100755 index 000000000..dd16aa91b --- /dev/null +++ b/BALSAMIC/assets/scripts/cluster_job_status.py @@ -0,0 +1,27 @@ +#!/usr/bin/env python3 + +import subprocess +import sys + +jobid = sys.argv[1] + +try: + output = subprocess.check_output( + ["/usr/bin/sacct", "-j", jobid, "--format=State", "--noheader"], + stderr=subprocess.DEVNULL, + ) + state = output.decode().split()[0].strip() +except subprocess.CalledProcessError: + # Job not found, maybe it was purged + print("unknown") + sys.exit(0) + +# Normalize states for Snakemake +if "COMPLETED" in state: + print("success") +elif any( + x in state for x in ["FAILED", "CANCELLED", "TIMEOUT", "NODE_FAIL", "OUT_OF_MEMORY"] +): + print("failed") +else: + print("running") diff --git a/BALSAMIC/assets/scripts/immediate_submit.py b/BALSAMIC/assets/scripts/immediate_submit.py deleted file mode 100644 index 8b6641141..000000000 --- a/BALSAMIC/assets/scripts/immediate_submit.py +++ /dev/null @@ -1,78 +0,0 @@ -"""Script to submit jobs to a cluster.""" -import shutil -from typing import Any, Dict, List, Optional - -import click -from snakemake import utils - -from BALSAMIC.commands.options import ( - OPTION_BENCHMARK, - OPTION_CLUSTER_ACCOUNT, - OPTION_CLUSTER_MAIL, - OPTION_CLUSTER_MAIL_TYPE, - OPTION_CLUSTER_PROFILE, - OPTION_CLUSTER_QOS, -) -from BALSAMIC.constants.cluster import QOS, ClusterProfile -from BALSAMIC.models.scheduler import Scheduler - - -@click.command() -@click.argument("case_id", nargs=1, required=True, type=click.STRING) -@click.argument("dependencies", nargs=-1, type=click.STRING) -@click.argument("job_script", nargs=1, type=click.Path(exists=True, resolve_path=True)) -@OPTION_CLUSTER_ACCOUNT -@OPTION_BENCHMARK -@OPTION_CLUSTER_MAIL_TYPE -@OPTION_CLUSTER_MAIL -@OPTION_CLUSTER_PROFILE -@OPTION_CLUSTER_QOS -@click.option( - "--log-dir", - type=click.Path(exists=True, resolve_path=True), - required=True, - help="Logging directory path", -) -@click.option( - "--script-dir", - type=click.Path(exists=True, resolve_path=True), - required=True, - help="Script directory path", -) -def immediate_submit( - account: str, - case_id: str, - job_script: str, - log_dir: str, - profile: ClusterProfile, - script_dir: str, - benchmark: Optional[bool] = False, - dependencies: Optional[List[str]] = None, - mail_type: Optional[str] = None, - mail_user: Optional[str] = None, - qos: Optional[QOS] = QOS.LOW, -) -> None: - """ - Submits jobs to the cluster. Each job is submitted sequentially, and their respective job IDs are collected - from the output. These job IDs are then forwarded as dependencies to the subsequent jobs. - """ - job_script: str = shutil.copy2(src=job_script, dst=script_dir) - job_properties: Dict[str, Any] = utils.read_job_properties(job_script) - scheduler: Scheduler = Scheduler( - account=account, - benchmark=benchmark, - case_id=case_id, - dependencies=dependencies, - job_properties=job_properties, - job_script=job_script, - log_dir=log_dir, - mail_type=mail_type, - mail_user=mail_user, - profile=profile, - qos=qos, - ) - scheduler.submit_job() - - -if __name__ == "__main__": - immediate_submit() diff --git a/BALSAMIC/assets/scripts/scan_finished_jobid_status.py b/BALSAMIC/assets/scripts/scan_finished_jobid_status.py new file mode 100644 index 000000000..82b16afc4 --- /dev/null +++ b/BALSAMIC/assets/scripts/scan_finished_jobid_status.py @@ -0,0 +1,94 @@ +import click +import re +import subprocess +from pathlib import Path + + +def extract_job_ids(log_dir): + jobid_pattern = re.compile(r"\.(\d+)\.(?:out|err)$") + return { + match.group(1) + for file in log_dir.iterdir() + if (match := jobid_pattern.search(file.name)) + } + + +def get_job_state(jobid): + try: + result = subprocess.run( + ["/usr/bin/scontrol", "show", "job", jobid], + capture_output=True, + text=True, + check=True, + ) + return result.stdout + except subprocess.CalledProcessError: + click.echo(f"Could not check job {jobid} (may not exist).") + return None + + +def extract_stderr_path(output_text): + match = re.search(r"StdErr=(\S+)", output_text) + return match.group(1) if match else "N/A" + + +def categorize_job(jobid, output_text, failed, cancelled): + stderr_path = extract_stderr_path(output_text) + if "JobState=FAILED" in output_text: + failed.append((jobid, stderr_path)) + elif "JobState=CANCELLED" in output_text: + cancelled.append((jobid, stderr_path)) + + +def write_results(output_file, failed, cancelled): + with output_file.open("a") as out_f: + if failed: + out_f.write("Failed jobs:\n") + for jobid, stderr in failed: + out_f.write(f"{jobid}\t{stderr}\n") + elif cancelled: + out_f.write("Cancelled jobs:\n") + for jobid, stderr in cancelled: + out_f.write(f"{jobid}\t{stderr}\n") + else: + click.echo("All jobs completed successfully.") + click.echo(f"Results written to {output_file}") + + +@click.command() +@click.argument( + "log_dir", type=click.Path(exists=True, file_okay=False, path_type=Path) +) +@click.option( + "--output", + "-o", + required=True, + type=click.Path(writable=True, path_type=Path), + help="Path to output file for failed jobs.", +) +def check_failed_jobs(log_dir: Path, output: Path): + """ + Scan LOG_DIR for SLURM log files (*.out, *.err), extract job IDs, + and check if any jobs have failed or been cancelled using `scontrol show job JOBID`. + + If --output is provided, results are written to a file. + """ + job_ids = extract_job_ids(log_dir) + + if not job_ids: + click.echo("No job IDs found in log filenames.") + return + + failed_jobs_info = [] + cancelled_jobs_info = [] + + for jobid in sorted(job_ids): + output_text = get_job_state(jobid) + if output_text: + categorize_job(jobid, output_text, failed_jobs_info, cancelled_jobs_info) + + write_results(output, failed_jobs_info, cancelled_jobs_info) + + +if __name__ == "__main__": + check_failed_jobs() diff --git a/BALSAMIC/commands/config/case.py b/BALSAMIC/commands/config/case.py index fbeec390c..b36a39337 100644 --- a/BALSAMIC/commands/config/case.py +++ b/BALSAMIC/commands/config/case.py @@ -40,7 +40,12 @@ OPTION_TUMOR_SAMPLE_NAME, OPTION_UMI_MIN_READS, ) -from BALSAMIC.constants.analysis import BIOINFO_TOOL_ENV, AnalysisWorkflow, Gender +from BALSAMIC.constants.analysis import ( + BIOINFO_TOOL_ENV, + AnalysisWorkflow, + Gender, + LogFile, +) from BALSAMIC.constants.cache import GenomeVersion from BALSAMIC.constants.constants import FileType from BALSAMIC.constants.paths import ( @@ -60,6 +65,7 @@ ) from BALSAMIC.utils.io import read_json, write_json from BALSAMIC.utils.utils import get_absolute_paths_dict +from BALSAMIC.utils.logging import add_file_logging LOG = logging.getLogger(__name__) @@ -129,6 +135,21 @@ def case_config( tumor_sample_name: str, umi_min_reads: str | None, ): + """Configure BALSAMIC workflow based on input arguments.""" + + LOG.info(f"Starting configuring analysis case: {case_id}.") + + LOG.info(f"Creating case analysis directory: {analysis_dir}/{case_id}.") + Path(analysis_dir, case_id).mkdir(exist_ok=True) + + log_file = Path(analysis_dir, case_id, LogFile.LOGNAME).as_posix() + LOG.info(f"Setting BALSAMIC logfile path to: {log_file}.") + add_file_logging(log_file, logger_name=__name__) + + LOG.info(f"Running BALSAMIC version {balsamic_version} -- CONFIG CASE") + LOG.info(f"BALSAMIC started with log level {context.obj['log_level']}.") + + LOG.info("Collecting reference and annotation file paths.") references_path: Path = Path(balsamic_cache, cache_version, genome_version) references: Dict[str, Path] = get_absolute_paths_dict( base_path=references_path, @@ -154,7 +175,6 @@ def case_config( if path is not None } ) - variants_observations = { "artefact_snv_observations": artefact_snv_observations, "clinical_snv_observations": clinical_snv_observations, @@ -172,10 +192,13 @@ def case_config( if path is not None } ) + LOG.info(f"Collected references: {references}") analysis_fastq_dir: str = get_analysis_fastq_files_directory( case_dir=Path(analysis_dir, case_id).as_posix(), fastq_path=fastq_path ) + LOG.info(f"Prepared analysis fastq-dir: {analysis_fastq_dir}") + result_dir: Path = Path(analysis_dir, case_id, "analysis") log_dir: Path = Path(analysis_dir, case_id, "logs") script_dir: Path = Path(analysis_dir, case_id, "scripts") @@ -186,6 +209,8 @@ def case_config( for directory in [result_dir, log_dir, script_dir, benchmark_dir]: directory.mkdir(exist_ok=True) + LOG.info("Created analysis and log directories.") + LOG.info("Validating configuration data in pydantic model.") config_collection_dict = ConfigModel( sentieon={ "sentieon_install_dir": sentieon_install_dir, diff --git a/BALSAMIC/commands/init/base.py b/BALSAMIC/commands/init/base.py index 59582e5eb..9ccfbebef 100644 --- a/BALSAMIC/commands/init/base.py +++ b/BALSAMIC/commands/init/base.py @@ -12,9 +12,7 @@ from BALSAMIC.commands.options import ( OPTION_CACHE_VERSION, OPTION_CLUSTER_ACCOUNT, - OPTION_CLUSTER_CONFIG, - OPTION_CLUSTER_MAIL, - OPTION_CLUSTER_MAIL_TYPE, + OPTION_CACHE_PROFILE, OPTION_CLUSTER_PROFILE, OPTION_CLUSTER_QOS, OPTION_COSMIC_KEY, @@ -31,16 +29,14 @@ from BALSAMIC.constants.cache import REFERENCE_FILES, GenomeVersion from BALSAMIC.constants.cluster import ( QOS, - ClusterConfigType, - ClusterMailType, - ClusterProfile, ) from BALSAMIC.models.cache import CacheConfig, ReferencesCanFam, ReferencesHg from BALSAMIC.models.snakemake import SnakemakeExecutable from BALSAMIC.utils.analysis import get_cache_singularity_bind_paths from BALSAMIC.utils.cache import get_containers -from BALSAMIC.utils.cli import get_config_path, get_snakefile +from BALSAMIC.utils.cli import get_snakefile from BALSAMIC.utils.io import generate_workflow_graph, write_json +from BALSAMIC.utils.rule import get_script_path LOG = logging.getLogger(__name__) @@ -51,10 +47,8 @@ @OPTION_OUT_DIR @OPTION_CACHE_VERSION @OPTION_CLUSTER_ACCOUNT -@OPTION_CLUSTER_CONFIG -@OPTION_CLUSTER_MAIL -@OPTION_CLUSTER_MAIL_TYPE @OPTION_CLUSTER_PROFILE +@OPTION_CACHE_PROFILE @OPTION_CLUSTER_QOS @OPTION_COSMIC_KEY @OPTION_FORCE_ALL @@ -69,14 +63,12 @@ def initialize( context: click.Context, account: Optional[str], cache_version: str, - cluster_config: Path, cosmic_key: str, force_all: bool, genome_version: GenomeVersion, - mail_type: Optional[ClusterMailType], - mail_user: Optional[str], out_dir: str, - profile: ClusterProfile, + cluster_profile: Path, + cache_profile: Path, qos: QOS, quiet: bool, run_analysis: bool, @@ -149,15 +141,12 @@ def initialize( snakemake_executable: SnakemakeExecutable = SnakemakeExecutable( account=account, case_id=cache_config.analysis.case_id, - cluster_config_path=cluster_config - if cluster_config - else get_config_path(ClusterConfigType.CACHE), config_path=config_path, force=force_all, log_dir=log_dir, - mail_type=mail_type, - mail_user=mail_user, - profile=profile, + cluster_profile=cluster_profile, + cluster_job_status_script=get_script_path("cluster_job_status.py"), + workflow_profile=cache_profile, qos=qos, quiet=quiet, run_analysis=run_analysis, diff --git a/BALSAMIC/commands/options.py b/BALSAMIC/commands/options.py index 39164a548..d5944f31c 100644 --- a/BALSAMIC/commands/options.py +++ b/BALSAMIC/commands/options.py @@ -1,9 +1,9 @@ """Balsamic command options.""" import click - from BALSAMIC import __version__ as balsamic_version from BALSAMIC.constants.analysis import ( + SubmitSnakemake, ANALYSIS_WORKFLOWS, PON_WORKFLOWS, RUN_MODES, @@ -14,15 +14,12 @@ ) from BALSAMIC.constants.cache import GENOME_VERSIONS, CacheVersion, GenomeVersion from BALSAMIC.constants.cluster import ( - CLUSTER_MAIL_TYPES, - CLUSTER_PROFILES, QOS, QOS_OPTIONS, - ClusterProfile, ) from BALSAMIC.constants.constants import LOG_LEVELS, LogLevel from BALSAMIC.constants.rules import DELIVERY_RULES -from BALSAMIC.constants.workflow_params import VCF_DICT +from BALSAMIC.constants.paths import CONSTANTS_DIR, WORKFLOW_PROFILE, CACHE_PROFILE from BALSAMIC.utils.cli import ( validate_cache_version, validate_exome_option, @@ -65,13 +62,6 @@ help="Path to BALSAMIC cache", ) -OPTION_BENCHMARK = click.option( - "--benchmark", - default=False, - is_flag=True, - help="Profile slurm jobs. Make sure you have slurm profiler enabled in your HPC.", -) - OPTION_CACHE_VERSION = click.option( "--cache-version", show_default=True, @@ -135,10 +125,11 @@ help="Cluster account to run jobs", ) -OPTION_CLUSTER_CONFIG = click.option( - "--cluster-config", - type=click.Path(), - help="Cluster configuration JSON file path", +OPTION_RUN_INTERACTIVELY = click.option( + "--run-interactively", + is_flag=True, + default=False, + help="Run Snakemake job submission interactively instead of submitting the submitter to cluster.", ) OPTION_SOFT_FILTER_NORMAL = click.option( @@ -148,25 +139,38 @@ help="Flag to disable hard-filtering on presence of variants in matched normal sample", ) -OPTION_CLUSTER_MAIL = click.option( - "--mail-user", - type=click.STRING, - help="User email to receive notifications from the cluster", +OPTION_CLUSTER_PROFILE = click.option( + "-p", + "--cluster-profile", + show_default=True, + type=click.Path(exists=True, resolve_path=True), + default=CONSTANTS_DIR, + help="Directory containing snakemake cluster profile", ) -OPTION_CLUSTER_MAIL_TYPE = click.option( - "--mail-type", - type=click.Choice(CLUSTER_MAIL_TYPES), - help="The mail type triggering cluster emails", +OPTION_MAX_RUN_HOURS = click.option( + "--max-run-hours", + required=False, + show_default=True, + default=SubmitSnakemake.MAX_RUN_HOURS, + type=click.INT, + help="The maximum number of hours that the sbatch script for snakemake is allowed to run on the cluster.", ) -OPTION_CLUSTER_PROFILE = click.option( - "-p", - "--profile", +OPTION_WORKFLOW_PROFILE = click.option( + "--workflow-profile", + show_default=True, + type=click.Path(exists=True, resolve_path=True), + default=WORKFLOW_PROFILE, + help="Directory containing snakemake workflow profile specifying rule resources", +) + +OPTION_CACHE_PROFILE = click.option( + "--cache-profile", show_default=True, - default=ClusterProfile.SLURM, - type=click.Choice(CLUSTER_PROFILES), - help="Cluster profile to submit jobs", + type=click.Path(exists=True, resolve_path=True), + default=CACHE_PROFILE, + help="Directory containing snakemake cache profile specifying rule resources for cache workflow", ) OPTION_CLUSTER_QOS = click.option( @@ -185,12 +189,6 @@ help="Cosmic DB authentication key", ) -OPTION_DISABLE_VARIANT_CALLER = click.option( - "--disable-variant-caller", - help=f"Run workflow with selected variant caller(s) disable. Use comma to remove multiple variant callers. Valid " - f"values are: {list(VCF_DICT.keys())}", -) - OPTION_DRAGEN = click.option( "--dragen", is_flag=True, diff --git a/BALSAMIC/commands/run/analysis.py b/BALSAMIC/commands/run/analysis.py index b54105dfc..177340248 100644 --- a/BALSAMIC/commands/run/analysis.py +++ b/BALSAMIC/commands/run/analysis.py @@ -2,61 +2,61 @@ import json import logging import os +import re import subprocess +import textwrap import sys from pathlib import Path from typing import List import click +from BALSAMIC import __version__ as balsamic_version + from BALSAMIC.commands.options import ( - OPTION_BENCHMARK, OPTION_CLUSTER_ACCOUNT, - OPTION_CLUSTER_CONFIG, - OPTION_CLUSTER_MAIL, - OPTION_CLUSTER_MAIL_TYPE, + OPTION_WORKFLOW_PROFILE, OPTION_CLUSTER_PROFILE, + OPTION_MAX_RUN_HOURS, OPTION_CLUSTER_QOS, - OPTION_DISABLE_VARIANT_CALLER, OPTION_DRAGEN, OPTION_FORCE_ALL, OPTION_QUIET, OPTION_RUN_ANALYSIS, OPTION_RUN_MODE, + OPTION_RUN_INTERACTIVELY, OPTION_SAMPLE_CONFIG, OPTION_SNAKEFILE, OPTION_SNAKEMAKE_OPT, ) -from BALSAMIC.constants.analysis import RunMode +from BALSAMIC.constants.analysis import RunMode, LogFile from BALSAMIC.constants.cluster import ( QOS, - ClusterConfigType, - ClusterMailType, - ClusterProfile, ) from BALSAMIC.models.config import ConfigModel +from BALSAMIC.models.sbatchsubmitter import SbatchSubmitter from BALSAMIC.models.snakemake import SnakemakeExecutable from BALSAMIC.utils.analysis import get_singularity_bind_paths -from BALSAMIC.utils.cli import createDir, get_config_path, get_snakefile -from BALSAMIC.utils.io import write_sacct_to_yaml +from BALSAMIC.utils.cli import createDir, get_snakefile +from BALSAMIC.utils.io import write_json +from BALSAMIC.utils.logging import add_file_logging +from BALSAMIC.utils.rule import get_script_path LOG = logging.getLogger(__name__) @click.command("analysis", short_help="Run the analysis on a sample config-file") -@OPTION_BENCHMARK @OPTION_CLUSTER_ACCOUNT -@OPTION_CLUSTER_CONFIG -@OPTION_CLUSTER_MAIL -@OPTION_CLUSTER_MAIL_TYPE @OPTION_CLUSTER_PROFILE +@OPTION_MAX_RUN_HOURS +@OPTION_WORKFLOW_PROFILE @OPTION_CLUSTER_QOS -@OPTION_DISABLE_VARIANT_CALLER @OPTION_DRAGEN @OPTION_FORCE_ALL @OPTION_QUIET @OPTION_RUN_ANALYSIS @OPTION_RUN_MODE +@OPTION_RUN_INTERACTIVELY @OPTION_SAMPLE_CONFIG @OPTION_SNAKEFILE @OPTION_SNAKEMAKE_OPT @@ -66,22 +66,38 @@ def analysis( snakefile: Path, sample_config: Path, run_mode: RunMode, - cluster_config: Path, - benchmark: bool, dragen: bool, - profile: ClusterProfile, + cluster_profile: Path, + max_run_hours: int, + workflow_profile: Path, run_analysis: bool, + run_interactively: bool, qos: QOS, force_all: bool, snakemake_opt: List[str], account: str, - mail_user: str, - mail_type: ClusterMailType, quiet: bool, - disable_variant_caller: str, ): """Run BALSAMIC workflow on the provided sample's config file.""" + + LOG.info(f"Initializing balsamic config model from config JSON: {sample_config}.") + sample_config_path: Path = Path(sample_config).absolute() + with open(sample_config_path, "r") as sample_fh: + sample_config = json.load(sample_fh) + + config_model = ConfigModel.model_validate(sample_config) + case_id = config_model.analysis.case_id + + log_file = Path( + config_model.analysis.analysis_dir, case_id, LogFile.LOGNAME + ).as_posix() + LOG.info(f"Setting BALSAMIC logfile path to: {log_file}.") + add_file_logging(log_file, logger_name=__name__) + + LOG.info(f"Running BALSAMIC version {balsamic_version} -- RUN ANALYSIS") LOG.info(f"BALSAMIC started with log level {context.obj['log_level']}.") + LOG.info(f"Using case config file: {sample_config_path}") + LOG.info(f"Starting analysis on: {case_id}.") if run_mode == RunMode.CLUSTER and not run_analysis: LOG.info("Changing run-mode to local on dry-run") @@ -91,21 +107,13 @@ def analysis( LOG.info("An account is required for cluster run mode") raise click.Abort() - sample_config_path: Path = Path(sample_config).absolute() - with open(sample_config_path, "r") as sample_fh: - sample_config = json.load(sample_fh) - - # Initialize balsamic model to run validation tests - config_model = ConfigModel.model_validate(sample_config) - - case_name = config_model.analysis.case_id - # Create directories for results, logs, scripts and benchmark files result_path: Path = Path(config_model.analysis.result) log_path: Path = Path(config_model.analysis.log) script_path: Path = Path(config_model.analysis.script) benchmark_path: Path = Path(config_model.analysis.benchmark) + LOG.info("Creating analysis and log directories.") analysis_directories_list = [result_path, log_path, script_path, benchmark_path] for analysis_sub_dir in analysis_directories_list: @@ -118,8 +126,15 @@ def analysis( log_path = Path(createDir(log_path.as_posix(), [])) script_path = Path(createDir(script_path.as_posix(), [])) - for analysis_sub_dir in analysis_directories_list: - analysis_sub_dir.mkdir(exist_ok=True) + LOG.info(f"Updating config model with account: {account}, QOS: {qos}") + config_model.qos = qos + config_model.account = account + config_model.analysis.log = log_path.as_posix() + config_model.analysis.script = script_path.as_posix() + + config_model_dict: dict = config_model.model_dump(by_alias=True, exclude_none=True) + LOG.info(f"Dumping updated config model to JSON: {sample_config_path}") + write_json(json_obj=config_model_dict, path=sample_config_path) analysis_type = config_model.analysis.analysis_type analysis_workflow = config_model.analysis.analysis_workflow @@ -128,22 +143,17 @@ def analysis( snakefile if snakefile else get_snakefile(analysis_type, analysis_workflow) ) - LOG.info(f"Starting {analysis_workflow} workflow...") + LOG.info("Organizing snakemake run information") snakemake_executable: SnakemakeExecutable = SnakemakeExecutable( account=account, - benchmark=benchmark, - case_id=case_name, - cluster_config_path=cluster_config - if cluster_config - else get_config_path(ClusterConfigType.ANALYSIS), + case_id=case_id, config_path=sample_config_path, - disable_variant_caller=disable_variant_caller, dragen=dragen, force=force_all, log_dir=log_path.as_posix(), - mail_type=mail_type, - mail_user=mail_user, - profile=profile, + cluster_profile=cluster_profile, + cluster_job_status_script=get_script_path("cluster_job_status.py"), + workflow_profile=workflow_profile, qos=qos, quiet=quiet, run_analysis=run_analysis, @@ -152,18 +162,34 @@ def analysis( singularity_bind_paths=get_singularity_bind_paths(sample_config), snakefile=snakefile, snakemake_options=snakemake_opt, - working_dir=Path(analysis_dir, case_name, "BALSAMIC_run"), - ) - subprocess.run( - f"{sys.executable} -m {snakemake_executable.get_command()}", - shell=True, + working_dir=Path(analysis_dir, case_id, "BALSAMIC_run"), ) - if run_analysis and run_mode == "cluster": - sacct_file_path: Path = Path(log_path, f"{case_name}.sacct") - yaml_file_path: Path = Path(result_path, f"{profile}_jobids.yaml") - write_sacct_to_yaml( - case_id=case_name, - sacct_file_path=sacct_file_path, - yaml_file_path=yaml_file_path, + if not run_interactively: + LOG.info(f"Submitting {analysis_workflow} workflow to cluster.") + submitter = SbatchSubmitter( + case_id=case_id, + script_path=Path(script_path), + result_path=Path(result_path), + scan_finished_jobid_status=get_script_path("scan_finished_jobid_status.py"), + log_path=Path(log_path), + account=account, + qos=qos, + max_run_hours=max_run_hours, + snakemake_executable=snakemake_executable, + logger=LOG, + ) + submitter.create_sbatch_script() + job_id = submitter.submit_job() + + if job_id: + submitter.write_job_id_yaml(job_id) + else: + LOG.warning("Could not retrieve job id from SLURM.") + + else: + LOG.info(f"Starting {analysis_workflow} workflow interactively.") + subprocess.run( + f"{sys.executable} -m {snakemake_executable.get_command()}", + shell=True, ) diff --git a/BALSAMIC/constants/analysis.py b/BALSAMIC/constants/analysis.py index 8f1ec0b77..6767358c2 100644 --- a/BALSAMIC/constants/analysis.py +++ b/BALSAMIC/constants/analysis.py @@ -5,6 +5,18 @@ from BALSAMIC.constants.cache import DockerContainers +class LogFile: + """Logfile constants""" + + LOGNAME: str = "balsamic.log" + + +class SubmitSnakemake: + """Constants for sbatch script running snakemake on cluster""" + + MAX_RUN_HOURS: int = 120 + + class RunMode(StrEnum): """Balsamic workflow run mode.""" @@ -104,6 +116,7 @@ class BioinfoTools(StrEnum): COMPRESS: str = "compress" CSVKIT: str = "csvkit" DELLY: str = "delly" + D4TOOLS: str = "d4tools" VEP: str = "ensembl-vep" FASTP: str = "fastp" FASTQC: str = "fastqc" @@ -152,6 +165,7 @@ class PonParams: BioinfoTools.BEDTOOLS: DockerContainers.ALIGN_QC, BioinfoTools.BWA: DockerContainers.ALIGN_QC, BioinfoTools.COMPRESS: DockerContainers.ALIGN_QC, + BioinfoTools.D4TOOLS: DockerContainers.D4TOOLS, BioinfoTools.FASTQC: DockerContainers.ALIGN_QC, BioinfoTools.SAMTOOLS: DockerContainers.ALIGN_QC, BioinfoTools.PICARD: DockerContainers.ALIGN_QC, diff --git a/BALSAMIC/constants/cache.py b/BALSAMIC/constants/cache.py index a01188512..b0031a50d 100644 --- a/BALSAMIC/constants/cache.py +++ b/BALSAMIC/constants/cache.py @@ -48,6 +48,7 @@ class DockerContainers(StrEnum): CNVKIT: str = "cnvkit" CNVPYTOR: str = "cnvpytor" COVERAGE_QC: str = "coverage_qc" + D4TOOLS: str = "d4tools" DELLY: str = "delly" GATK: str = "gatk" HTSLIB: str = "htslib" diff --git a/BALSAMIC/snakemake_rules/misc/__init__.py b/BALSAMIC/constants/cache_profile/__init__.py similarity index 100% rename from BALSAMIC/snakemake_rules/misc/__init__.py rename to BALSAMIC/constants/cache_profile/__init__.py diff --git a/BALSAMIC/constants/cache_profile/config.yaml b/BALSAMIC/constants/cache_profile/config.yaml new file mode 100644 index 000000000..7a5def445 --- /dev/null +++ b/BALSAMIC/constants/cache_profile/config.yaml @@ -0,0 +1,49 @@ +set-resources: + all: + threads: 1 + runtime: 15 + mem_mb: 5000 + index_cadd: + threads: 12 + runtime: 120 + mem_mb: 15000 + convert_delly_exclusion_file: + threads: 2 + runtime: 60 + mem_mb: 2000 + download_references: + threads: 24 + runtime: 6000 + mem_mb: 50000 + picard_dict_reference_genome: + threads: 12 + runtime: 60 + mem_mb: 15000 + fasta_index_reference_genome: + threads: 24 + runtime: 60 + mem_mb: 15000 + bwa_index_reference_genome: + threads: 24 + runtime: 60 + mem_mb: 15000 + compress_vcfs: + threads: 12 + runtime: 60 + mem_mb: 15000 + index_vcfs: + threads: 12 + runtime: 60 + mem_mb: 15000 + preprocess_refseq: + threads: 12 + runtime: 60 + mem_mb: 15000 + download_containers: + threads: 12 + runtime: 60 + mem_mb: 15000 + download_vep: + threads: 24 + runtime: 120 + mem_mb: 30000 \ No newline at end of file diff --git a/BALSAMIC/constants/cluster.py b/BALSAMIC/constants/cluster.py index b2aaf8b49..17f8cd433 100644 --- a/BALSAMIC/constants/cluster.py +++ b/BALSAMIC/constants/cluster.py @@ -5,22 +5,6 @@ MAX_JOBS: int = 999 -class ClusterConfigType(StrEnum): - """Analysis workflow config type.""" - - ANALYSIS: str = "cluster_analysis" - CACHE: str = "cluster_cache" - - -class ClusterProfile(StrEnum): - """Profile to submit jobs to the cluster.""" - - SLURM: str = "slurm" - - -CLUSTER_PROFILES: List[ClusterProfile] = [profile for profile in ClusterProfile] - - class ClusterAccount(StrEnum): """Cluster job submission account.""" @@ -37,18 +21,3 @@ class QOS(StrEnum): QOS_OPTIONS: List[QOS] = [qos for qos in QOS] - - -class ClusterMailType(StrEnum): - """Cluster job mail type notification.""" - - ALL: str = "ALL" - BEGIN: str = "BEGIN" - END: str = "END" - FAIL: str = "FAIL" - NONE: str = "NONE" - REQUEUE: str = "REQUEUE" - TIME_LIMIT: str = "TIME_LIMIT" - - -CLUSTER_MAIL_TYPES: List[ClusterMailType] = [mail_type for mail_type in ClusterMailType] diff --git a/BALSAMIC/constants/cluster_analysis.json b/BALSAMIC/constants/cluster_analysis.json deleted file mode 100644 index 2a551a47b..000000000 --- a/BALSAMIC/constants/cluster_analysis.json +++ /dev/null @@ -1,490 +0,0 @@ -{ - "__default__": { - "name": "BALSAMIC.{rule}.{wildcards}", - "time": "12:00:00", - "n": 8, - "mail_type": "FAIL", - "partition": "core" - }, - "all": { - "mail_type": "END", - "time": "00:15:00", - "n": 1 - }, - "gens_preprocess": { - "time": "01:00:00", - "n": 4 - }, - "postprocess_bam": { - "time": "03:00:00", - "n": 12 - }, - "cap_base_quality": { - "time": "05:00:00", - "n": 10 - }, - "extend_short_bedregions": { - "time": "01:00:00", - "n": 1 - }, - "pad_bedfile": { - "time": "01:00:00", - "n": 1 - }, - "cnvkit_create_coverage": { - "time": "6:00:00", - "n": 18 - }, - "cnvkit_create_targets": { - "time": "6:00:00", - "n": 2 - }, - "finalize_gens_outputfiles": { - "time": "01:00:00", - "n": 2 - }, - "CollectAlignmentSummaryMetrics": { - "time": "03:30:00", - "n": 8 - }, - "CollectHsMetrics": { - "time": "03:30:00", - "n": 8 - }, - "CollectInsertSizeMetrics": { - "time": "03:30:00", - "n": 8 - }, - "IndelRealigner": { - "time": "15:00:00", - "n": 10 - }, - "picard_markduplicates": { - "time": "06:00:00", - "n": 8 - }, - "picard_qc": { - "time": "06:00:00", - "n": 8 - }, - "bwa_mem": { - "time": "08:00:00", - "n": 6 - }, - "concatenate": { - "time": "00:30:00", - "n": 1 - }, - "bcftools_merge_germlineSNV_research": { - "time": "4:00:00", - "n": 18 - }, - "bcftools_normalise_vcfs": { - "time": "2:00:00", - "n": 2 - }, - "bcftools_concatenate_vcfs": { - "time": "2:00:00", - "n": 2 - }, - "cnvkit_segment_CNV_research": { - "time": "6:00:00", - "n": 18 - }, - "purecn_call_CNV_research": { - "time": "6:00:00", - "n": 10 - }, - "cnvkit_call_CNV_research": { - "time": "6:00:00", - "n": 10 - }, - "bcftools_sort_cnvkitCNV_research": { - "time": "4:00:00", - "n": 10 - }, - "dragen_align_call_tumor_only": { - "time": "10:00:00", - "n": 36, - "partition": "dragen" - }, - "fastqc":{ - "time": "12:00:00", - "n": 4 - }, - "fastp_quality_trim":{ - "time": "24:00:00", - "n": 4 - }, - "fastp_remove_umi":{ - "time": "12:00:00", - "n": 4 - }, - "gatk_collectreadcounts":{ - "time": "10:00:00", - "n": 5 - }, - "gatk_denoise_read_counts":{ - "time": "10:00:00", - "n": 10 - }, - "gatk_create_readcount_pon":{ - "time": "60:00:00", - "n": 86 - }, - "genmod_score_snvs":{ - "time": "05:00:00", - "n": 8 - }, - "manta_germline": { - "time": "05:00:00", - "n": 16 - }, - "manta_tumor_only": { - "time": "10:00:00", - "n": 12 - }, - "manta_tumor_normal": { - "time": "10:00:00", - "n": 12 - }, - "mergeBam_normal_gatk": { - "time": "04:30:00", - "n": 8 - }, - "mergeBam_tumor_gatk": { - "time": "04:30:00", - "n": 8 - }, - "multiqc": { - "time": "00:15:00", - "n": 4 - }, - "sambamba_exon_depth": { - "time": "06:00:00", - "n": 8 - }, - "sambamba_panel_depth": { - "time": "02:30:00", - "n": 8 - }, - "samtools_fixmate": { - "time": "04:30:00", - "n": 16 - }, - "sentieon_qc_metrics": { - "time": "06:00:00", - "n": 8 - }, - "sentieon_plot_qc_metrics": { - "time": "03:00:00", - "n": 2 - }, - "sentieon_DNAscope": { - "time": "24:00:00", - "n": 36 - }, - "sentieon_DNAscope_gnomad": { - "time": "24:00:00", - "n": 36 - }, - "sentieon_DNAscope_gnomad_tga": { - "time": "24:00:00", - "n": 12 - }, - "sex_prediction": { - "time": "01:00:00", - "n": 2 - }, - "vardict_merge": { - "time": "01:30:00", - "n": 5 - }, - "vardict_sort": { - "time": "01:30:00", - "n": 1 - }, - "post_process_vardict": { - "time": "01:30:00", - "n": 2 - }, - "vardict_tumor_normal": { - "time": "12:00:00", - "n": 8, - "mem": 30000 - }, - "vardict_tumor_only": { - "time": "10:00:00", - "n": 8, - "mem": 30000 - }, - "sentieon_TNscope": { - "time": "24:00:00", - "n": 36 - }, - "sentieon_tnscope_wgs_tumor_normal": { - "time": "24:00:00", - "n": 36, - "mem": 80000 - }, - "sentieon_TNscope_tumor_only": { - "time": "24:00:00", - "n": 36 - }, - "sentieon_align_sort": { - "time": "24:00:00", - "n": 12 - }, - "sentieon_base_calibration": { - "time": "24:00:00", - "n": 36 - }, - "sentieon_dedup": { - "time": "24:00:00", - "n": 36 - }, - "sentieon_realign": { - "time": "24:00:00", - "n": 36 - }, - "sentieon_filter_TNscope": { - "time": "24:00:00", - "n": 36 - }, - "somatic_snv_indel_vcf_merge": { - "time": "06:00:00", - "n": 10 - }, - "sentieon_bwa_umiextract": { - "time": "8:00:00", - "n": 36 - }, - "sentieon_consensuscall_umi": { - "time": "6:00:00", - "n": 36 - }, - "sentieon_bwa_umiconsensus": { - "time": "8:00:00", - "n": 36 - }, - "sentieon_consensusfilter_umi": { - "time": "4:00:00", - "n": 10 - }, - "sentieon_tnscope_umi": { - "time": "4:00:00", - "n": 12 - }, - "sentieon_tnscope_tga_t_only": { - "time": "4:00:00", - "n": 16 - }, - "sentieon_tnscope_tga_tumor_normal": { - "time": "5:00:00", - "n": 32 - }, - "bcftools_get_somaticINDEL_research": { - "time": "1:00:00", - "n": 4 - }, - "bcftools_annotate_somaticINDEL_research": { - "time": "1:00:00", - "n": 4 - }, - "cadd_annotate_somaticINDEL_research": { - "time": "18:00:00", - "n": 4 - }, - "vep_annotate_somaticSNV_research": { - "time": "18:00:00", - "n": 12 - }, - "vcfanno_annotate_somaticSNV_clinical": { - "time": "18:00:00", - "n" : 12 - }, - "vep_somatic_research_sv": { - "time": "12:00:00", - "n": 36 - }, - "svdb_annotate_somatic_research_sv": { - "time": "12:00:00", - "n": 8 - }, - "svdb_annotate_clinical_obs_somatic_clinical_sv": { - "time": "10:00:00", - "n": 8 - }, - "svdb_annotate_somatic_obs_somatic_clinical_sv": { - "time": "10:00:00", - "n": 8 - }, - "vep_germline": { - "time": "06:00:00", - "n": 10 - }, - "picard_umiaware": { - "time": "4:00:00", - "n": 12 - }, - "bcftools_query_generatebackgroundaf_umitable": { - "time": "2:00:00", - "n": 8 - }, - "samtools_view_calculatemeanfamilydepth_umi": { - "time": "2:00:00", - "n": 8 - }, - "bcftools_query_calculatenoiseAF_umi": { - "time": "2:00:00", - "n": 8 - }, - "delly_tumor_normal": { - "time": "12:00:00", - "n": 36 - }, - "delly_tumor_only": { - "time": "10:00:00", - "n": 36 - }, - "ascat_tumor_normal": { - "time": "12:00:00", - "n": 36 - }, - "csv_to_pdf": { - "time": "00:15:00", - "n": 1 - }, - "txt_to_pdf": { - "time": "00:15:00", - "n": 1 - }, - "image_to_pdf": { - "time": "00:15:00", - "n": 1 - }, - "merge_cnv_pdf_reports": { - "time": "00:15:00", - "n": 1 - }, - "collect_custom_qc_metrics": { - "time": "00:15:00", - "n": 1 - }, - "create_final_vcf_namemap": { - "time": "00:15:00", - "n": 1 - }, - "svdb_merge_tumor_normal": { - "time": "01:00:00", - "n": 8 - }, - "svdb_merge_tumor_only": { - "time": "02:00:00", - "n": 8 - }, - "tiddit_sv_tumor_only": { - "time": "10:00:00", - "n": 24 - }, - "tiddit_sv_tumor_normal": { - "time": "12:00:00", - "n": 24 - }, - "cnvpytor_tumor_only": { - "time": "05:00:00", - "n": 24 - }, - "bcftools_process_SV_CNV": { - "time": "01:00:00", - "n": 8 - }, - "post_process_tnscope": { - "time": "01:00:00", - "n": 4 - }, - "vcf2cytosure_convert": { - "time": "02:00:00", - "n": 8 - }, - "bcftools_filter_svdb_research": { - "time": "02:00:00", - "n": 8 - }, - "bcftools_filter_svdb_clinical": { - "time": "01:00:00", - "n": 8 - }, - "vcfheader_rename_germline": { - "time": "01:00:00", - "n": 8 - }, - "bcftools_filter_tnscope_research_tumor_only": { - "time": "04:00:00", - "n": 8 - }, - "bcftools_filter_tnscope_clinical_tumor_only": { - "time": "03:00:00", - "n": 8 - }, - "bcftools_filter_tnscope_research_tumor_normal": { - "time": "04:00:00", - "n": 8 - }, - "bcftools_filter_tnscope_clinical_tumor_normal": { - "time": "03:00:00", - "n": 8 - }, - "bcftools_filter_tnscope_umi_research_tumor_only": { - "time": "04:00:00", - "n": 8 - }, - "bcftools_filter_tnscope_umi_tumor_only": { - "time": ":03:00:00", - "n": 8 - }, - "bcftools_filter_TNscope_umi_research_tumor_normal": { - "time": "04:00:00", - "n": 8 - }, - "bcftools_filter_TNscope_umi_clinical_tumor_normal": { - "time": "03:00:00", - "n": 8 - }, - "somalier_extract_normal": { - "time": "01:00:00", - "n": 1 - }, - "somalier_extract_tumor": { - "time": "01:00:00", - "n": 1 - }, - "somalier_relate": { - "time": "01:00:00", - "n": 1 - }, - "bedtools_merge": { - "time": "01:00:00", - "n": 1 - }, - "bam_compress": { - "time": "04:00:00", - "n": 20 - }, - "samtools_qc": { - "time": "04:00:00", - "n": 16 - }, - "igh_dux4_detection": { - "time": "02:00:00", - "n": 1 - }, - "msisensorpro_scan_reference": { - "time": "04:00:00", - "n": 1 - }, - "msisensorpro_msi_tumor_normal": { - "time": "08:00:00", - "n": 24 - } -} diff --git a/BALSAMIC/constants/cluster_cache.json b/BALSAMIC/constants/cluster_cache.json deleted file mode 100644 index a1d13e646..000000000 --- a/BALSAMIC/constants/cluster_cache.json +++ /dev/null @@ -1,86 +0,0 @@ -{ - "__default__": { - "n": 12, - "time": "02:00:00", - "mail_type": "FAIL", - "partition": "core" - }, - "all": { - "n": 1, - "time": "00:15:00", - "mail_type": "END", - "partition": "core" - }, - "download_containers": { - "n": 12, - "time": "00:15:00", - "mail_type": "FAIL", - "partition": "core" - }, - "download_references": { - "n": 24, - "time": "10:00:00", - "mail_type": "FAIL", - "partition": "core" - }, - "compress_vcfs": { - "n": 12, - "time": "00:30:00", - "mail_type": "FAIL", - "partition": "core" - }, - "index_vcfs": { - "n": 12, - "time": "00:30:00", - "mail_type": "FAIL", - "partition": "core" - }, - "index_cadd": { - "n": 12, - "time": "01:00:00", - "mail_type": "FAIL", - "partition": "core" - }, - "picard_dict_reference_genome": { - "n": 12, - "time": "00:30:00", - "mail_type": "FAIL", - "partition": "core" - }, - "convert_delly_exclusion_file": { - "n": 2, - "time": "00:15:00", - "mail_type": "FAIL", - "partition": "core" - }, - "fasta_index_reference_genome": { - "n": 24, - "time": "01:00:00", - "mail_type": "FAIL", - "partition": "core" - }, - "bwa_index_reference_genome": { - "n": 24, - "time": "01:00:00", - "mail_type": "FAIL", - "partition": "core" - }, - "preprocess_refseq": { - "n": 12, - "time": "00:30:00", - "mail_type": "FAIL", - "partition": "core" - }, - "preprocess_refseq_canfam": { - "n": 12, - "time": "00:30:00", - "mail_type": "FAIL", - "partition": "core" - }, - "download_vep": { - "n": 24, - "time": "01:00:00", - "mail_type": "FAIL", - "partition": "core" - } -} diff --git a/BALSAMIC/constants/config.yaml b/BALSAMIC/constants/config.yaml new file mode 100644 index 000000000..47a96f0d8 --- /dev/null +++ b/BALSAMIC/constants/config.yaml @@ -0,0 +1,9 @@ +cluster: "sbatch --parsable --account {config[account]} --partition core --qos {config[qos]} --time={resources.runtime} --mem={resources.mem_mb}M --cpus-per-task={resources.threads} --error {config[analysis][log]}/BALSAMIC.{config[analysis][case_id]}.{rulename}.%j.err --output {config[analysis][log]}/BALSAMIC.{config[analysis][case_id]}.{rulename}.%j.out" +jobs: 999 +default-resources: + runtime: 120 + mem_mb: 4000 + threads: 2 +max-status-checks-per-second: 10 +local-cores: 1 +restart-times: 2 \ No newline at end of file diff --git a/BALSAMIC/constants/paths.py b/BALSAMIC/constants/paths.py index 919830f37..142642803 100644 --- a/BALSAMIC/constants/paths.py +++ b/BALSAMIC/constants/paths.py @@ -5,11 +5,12 @@ # Balsamic working directory constants BALSAMIC_DIR: Path = Path(sys.modules["BALSAMIC"].__file__).parent.resolve() CONSTANTS_DIR: Path = Path(BALSAMIC_DIR, "constants") +WORKFLOW_PROFILE: Path = Path(CONSTANTS_DIR, "workflow_profile") +CACHE_PROFILE: Path = Path(CONSTANTS_DIR, "cache_profile") CONTAINERS_DIR: Path = Path(BALSAMIC_DIR, "containers") ASSETS_DIR: Path = Path(BALSAMIC_DIR, "assets") SCRIPT_DIR: Path = Path(ASSETS_DIR, "scripts") REFSEQ_SCRIPT_PATH: Path = Path(SCRIPT_DIR, "refseq_sql.awk") -IMMEDIATE_SUBMIT_PATH: Path = Path(SCRIPT_DIR, "immediate_submit.py") # Sentieon specific constants SENTIEON_MODELS_DIR: Path = Path(BALSAMIC_DIR, "assets", "sentieon_models") diff --git a/BALSAMIC/constants/rules.py b/BALSAMIC/constants/rules.py index fb6a8daeb..891370ecf 100644 --- a/BALSAMIC/constants/rules.py +++ b/BALSAMIC/constants/rules.py @@ -2,12 +2,6 @@ from typing import Dict, List from BALSAMIC.constants.cache import GenomeVersion -from BALSAMIC.constants.analysis import ( - AnalysisType, - AnalysisWorkflow, - SequencingType, - WorkflowSolution, -) common_cache_rules: List[str] = [ "snakemake_rules/cache/singularity_containers.rule", @@ -23,16 +17,12 @@ "snakemake_rules/cache/vep.rule", ] -canfam_cache_rules: List[str] = common_cache_rules + [ - "snakemake_rules/cache/refseq_canfam.rule" -] - - SNAKEMAKE_RULES: Dict[str, Dict[str, list]] = { "common": { - "misc": ["snakemake_rules/misc/sleep.rule"], "qc": [ "snakemake_rules/quality_control/fastqc.rule", + "snakemake_rules/quality_control/d4tools.rule", + "snakemake_rules/umi/d4tools_umi.rule", "snakemake_rules/quality_control/multiqc.rule", "snakemake_rules/quality_control/qc_metrics.rule", "snakemake_rules/quality_control/picard_common.rule", @@ -181,7 +171,6 @@ "cache": { GenomeVersion.HG19: hg_cache_rules, GenomeVersion.HG38: hg_cache_rules, - GenomeVersion.CanFam3: canfam_cache_rules, }, } @@ -189,6 +178,8 @@ # QC "multiqc", "collect_custom_qc_metrics", + "create_d4file", + "create_d4file_umi", # Alignment "bam_compress_tumor_umi", "bam_compress_normal_umi", diff --git a/BALSAMIC/constants/workflow_params.py b/BALSAMIC/constants/workflow_params.py index 7dc33672d..a435f4662 100644 --- a/BALSAMIC/constants/workflow_params.py +++ b/BALSAMIC/constants/workflow_params.py @@ -107,8 +107,6 @@ }, } -SLEEP_BEFORE_START = 1500 - WORKFLOW_PARAMS = { "bam_post_processing": { "manta_max_base_quality": 70, diff --git a/BALSAMIC/constants/workflow_profile/__init__.py b/BALSAMIC/constants/workflow_profile/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/BALSAMIC/constants/workflow_profile/config.yaml b/BALSAMIC/constants/workflow_profile/config.yaml new file mode 100644 index 000000000..d5ad4ddeb --- /dev/null +++ b/BALSAMIC/constants/workflow_profile/config.yaml @@ -0,0 +1,653 @@ +set-resources: + post_process_tnscope_info_fields_wgs: + threads: 1 + runtime: 60 + mem_mb: 5000 + post_process_tnscope_info_fields_tga: + threads: 1 + runtime: 60 + mem_mb: 5000 + tnscope_sort: + threads: 1 + runtime: 60 + mem_mb: 10000 + merge_tnscope_to_mnvs: + threads: 4 + runtime: 60 + mem_mb: 10000 + cnvpytor_tumor_only_wgs: + threads: 24 + runtime: 600 + mem_mb: 60000 + igh_dux4_detection_tumor_only_wgs: + threads: 2 + runtime: 120 + mem_mb: 5000 + igh_dux4_detection_tumor_normal_wgs: + threads: 2 + runtime: 120 + mem_mb: 5000 + tiddit_sv_tumor_only_wgs: + threads: 24 + runtime: 600 + mem_mb: 30000 + tiddit_sv_tumor_normal_wgs: + threads: 24 + runtime: 600 + mem_mb: 40000 + ascat_tumor_normal_wgs: + threads: 24 + runtime: 600 + mem_mb: 60000 + delly_cnv_tumor_only: + threads: 12 + runtime: 340 + mem_mb: 20000 + delly_cnv_tumor_normal: + threads: 12 + runtime: 340 + mem_mb: 20000 + delly_cnv_tumor_only_wgs: + threads: 24 + runtime: 600 + mem_mb: 30000 + delly_cnv_tumor_normal_wgs: + threads: 24 + runtime: 600 + mem_mb: 30000 + delly_sv_tumor_only_wgs: + threads: 24 + runtime: 600 + mem_mb: 30000 + delly_sv_tumor_normal_wgs: + threads: 24 + runtime: 600 + mem_mb: 30000 + manta_tumor_only_wgs: + threads: 24 + runtime: 600 + mem_mb: 10000 + manta_tumor_normal_wgs: + threads: 24 + runtime: 600 + mem_mb: 15000 + delly_sv_tumor_only_tga: + threads: 12 + runtime: 340 + mem_mb: 10000 + delly_sv_tumor_normal_tga: + threads: 12 + runtime: 340 + mem_mb: 10000 + manta_tumor_only_tga: + threads: 12 + runtime: 340 + mem_mb: 10000 + manta_tumor_normal_tga: + threads: 12 + runtime: 340 + mem_mb: 15000 + bcftools_quality_filter_svdb: + threads: 4 + runtime: 60 + mem_mb: 10000 + svdb_merge_tumor_only: + threads: 8 + runtime: 60 + mem_mb: 10000 + svdb_merge_tumor_normal: + threads: 8 + runtime: 60 + mem_mb: 10000 + bcftools_process_SV_CNV: + threads: 4 + runtime: 60 + mem_mb: 10000 + bcftools_sort_cnvkitCNV_research: + threads: 2 + runtime: 60 + mem_mb: 10000 + cnvkit_call_CNV_research: + threads: 10 + mem_mb: 5000 + runtime: 360 + purecn_call_CNV_research: + mem_mb: 20000 + runtime: 360 + threads: 10 + bedtools_pad_bedfile: + mem_mb: 1000 + threads: 1 + runtime: 20 + vardict_sort: + threads: 1 + mem_mb: 20000 + runtime: 30 + gatk_update_vcf_sequence_dictionary: + runtime: 600 + threads: 5 + mem_mb: 10000 + cnvkit_segment_CNV_research: + runtime: 360 + mem_mb: 5000 + threads: 18 + bcftools_merge_germlineSNV_research: + runtime: 280 + mem_mb: 10000 + threads: 2 + vardict_tumor_normal: + runtime: 280 + mem_mb: 40000 + threads: 16 + sentieon_tnscope_tga_tumor_normal: + runtime: 280 + mem_mb: 10000 + threads: 16 + post_process_vardict: + runtime: 280 + mem_mb: 5000 + threads: 2 + vardict_tumor_only: + runtime: 280 + mem_mb: 40000 + threads: 16 + sentieon_tnscope_tga_tumor_only: + runtime: 280 + mem_mb: 20000 + threads: 16 + bcftools_quality_filter_TNscope_umi_tumor_only: + runtime: 100 + mem_mb: 10000 + threads: 8 + bcftools_quality_filter_TNscope_umi_tumor_normal: + runtime: 100 + mem_mb: 10000 + threads: 8 + bcftools_quality_filter_vardict_tumor_normal: + runtime: 100 + mem_mb: 10000 + threads: 8 + bcftools_quality_filter_tnscope_tumor_normal_tga: + runtime: 100 + mem_mb: 10000 + threads: 8 + bcftools_quality_filter_vardict_tumor_only: + runtime: 100 + mem_mb: 10000 + threads: 8 + bcftools_quality_filter_tnscope_tumor_only_tga: + runtime: 100 + mem_mb: 10000 + threads: 8 + bcftools_quality_filter_tnscope_tumor_normal_wgs: + runtime: 100 + mem_mb: 10000 + threads: 8 + bcftools_quality_filter_tnscope_tumor_only_wgs: + runtime: 100 + mem_mb: 10000 + threads: 8 + sentieon_tnscope_wgs_tumor_normal: + runtime: 1440 + mem_mb: 40000 + threads: 36 + sentieon_tnscope_wgs_tumor_only: + runtime: 1440 + mem_mb: 45000 + threads: 36 + merge_snv_vcfs: + runtime: 120 + mem_mb: 20000 + threads: 2 + bcftools_normalise_vcfs: + runtime: 120 + mem_mb: 4000 + threads: 2 + manta_germline_wgs: + runtime: 1440 + mem_mb: 60000 + threads: 36 + sentieon_DNAscope_wgs: + runtime: 1440 + mem_mb: 50000 + threads: 36 + manta_germline_tga: + runtime: 300 + mem_mb: 40000 + threads: 16 + sentieon_DNAscope_tga: + runtime: 1440 + mem_mb: 25000 + threads: 36 + finalize_gens_outputfiles: + runtime: 60 + mem_mb: 2000 + threads: 1 + gens_preprocess_tga: + runtime: 60 + mem_mb: 10000 + threads: 1 + sentieon_DNAscope_gnomad_tga: + runtime: 1440 + mem_mb: 20000 + threads: 12 + gens_preprocess_wgs: + runtime: 60 + mem_mb: 10000 + threads: 1 + gatk_denoise_read_counts: + runtime: 600 + mem_mb: 70000 + threads: 10 + sentieon_DNAscope_gnomad: + runtime: 1440 + mem_mb: 60000 + threads: 36 + gatk_collectreadcounts: + runtime: 350 + mem_mb: 40000 + threads: 5 + bedtools_sort_and_merge: + runtime: 20 + mem_mb: 400 + threads: 1 + extend_short_bedregions: + runtime: 20 + mem_mb: 400 + threads: 1 + create_coverage: + runtime: 360 + mem_mb: 30000 + threads: 18 + create_target: + runtime: 360 + mem_mb: 5000 + threads: 2 + create_d4file: + runtime: 360 + mem_mb: 40000 + threads: 2 + create_d4file_umi: + runtime: 360 + mem_mb: 20000 + threads: 2 + sentieon_align_sort_umireads_umi: + runtime: 1440 + mem_mb: 26000 + threads: 12 + sentieon_tnscope_umi_tn: + runtime: 280 + mem_mb: 10000 + threads: 12 + sentieon_tnscope_umi: + runtime: 280 + mem_mb: 10000 + threads: 12 + bam_compress_normal_umi: + runtime: 280 + mem_mb: 16000 + threads: 20 + bam_compress_tumor_umi: + runtime: 280 + mem_mb: 16000 + threads: 20 + sentieon_consensusfilter_umi: + runtime: 280 + mem_mb: 16000 + threads: 10 + sentieon_bwa_umiconsensus: + runtime: 480 + mem_mb: 30000 + threads: 36 + sentieon_consensuscall_umi: + runtime: 360 + mem_mb: 16000 + threads: 36 + samtools_view_calculatemeanfamilydepth_umi: + runtime: 280 + mem_mb: 16000 + threads: 8 + picard_collecthsmetrics_umi: + runtime: 280 + mem_mb: 16000 + threads: 8 + picard_umiaware: + runtime: 280 + mem_mb: 16000 + threads: 12 + modify_tnscope_infofield_umi: + runtime: 60 + mem_mb: 10000 + threads: 2 + bcftools_query_generatebackgroundaf_umitable: + runtime: 60 + mem_mb: 10000 + threads: 8 + merge_cnv_pdf_reports: + runtime: 60 + mem_mb: 5000 + threads: 1 + image_to_pdf: + runtime: 60 + mem_mb: 5000 + threads: 1 + msi_to_pdf: + runtime: 60 + mem_mb: 5000 + threads: 1 + txt_to_pdf: + runtime: 60 + mem_mb: 5000 + threads: 1 + csv_to_pdf: + runtime: 60 + mem_mb: 5000 + threads: 1 + somalier_relate: + runtime: 60 + mem_mb: 5000 + threads: 1 + somalier_extract_tumor: + runtime: 60 + mem_mb: 5000 + threads: 1 + somalier_extract_normal: + runtime: 60 + mem_mb: 5000 + threads: 1 + sex_prediction_tga_tumor_only: + runtime: 60 + mem_mb: 5000 + threads: 2 + sex_prediction_tga_tumor_normal: + runtime: 60 + mem_mb: 5000 + threads: 2 + sex_prediction_wgs_tumor_only: + runtime: 60 + mem_mb: 5000 + threads: 2 + sex_prediction_wgs_tumor_normal: + runtime: 60 + mem_mb: 5000 + threads: 2 + sentieon_plot_qc_metrics_tga: + runtime: 30 + mem_mb: 2000 + threads: 4 + sentieon_plot_qc_metrics_wgs: + runtime: 30 + mem_mb: 5000 + threads: 4 + sentieon_qc_metrics_tga: + runtime: 280 + mem_mb: 30000 + threads: 8 + sentieon_qc_metric_wgs: + runtime: 280 + mem_mb: 25000 + threads: 8 + sentieon_wgs_metrics: + runtime: 280 + mem_mb: 20000 + threads: 8 + picard_CollectWgsMetrics: + runtime: 600 + mem_mb: 20000 + threads: 8 + samtools_qc_umi_collapsed: + runtime: 280 + mem_mb: 8000 + threads: 16 + samtools_qc_tga: + runtime: 280 + mem_mb: 8000 + threads: 16 + samtools_qc_wgs: + runtime: 280 + mem_mb: 8000 + threads: 16 + sambamba_exon_depth: + runtime: 150 + mem_mb: 12000 + threads: 8 + sambamba_panel_depth: + runtime: 150 + mem_mb: 8000 + threads: 8 + cnv_report: + runtime: 20 + mem_mb: 100 + threads: 1 + collect_custom_qc_metrics: + runtime: 20 + mem_mb: 1000 + threads: 1 + picard_CollectHsMetrics_WGS: + runtime: 360 + mem_mb: 25000 + threads: 8 + picard_CollectAlignmentSummaryMetrics: + runtime: 500 + mem_mb: 25000 + threads: 8 + picard_CollectHsMetrics: + runtime: 360 + mem_mb: 25000 + threads: 8 + multiqc: + runtime: 60 + mem_mb: 5000 + threads: 4 + mosdepth_coverage: + runtime: 280 + mem_mb: 5000 + threads: 12 + gatk_create_readcount_pon: + runtime: 3600 + mem_mb: 170000 + threads: 86 + create_reference: + runtime: 120 + mem_mb: 15000 + threads: 8 + dragen_align_call_tumor_only: + runtime: 1200 + mem_mb: 120000 + threads: 36 + concatenate_fastqs: + runtime: 60 + mem_mb: 5000 + threads: 1 + vcfheader_rename_germline: + runtime: 120 + mem_mb: 1000 + threads: 4 + vcf2cytosure_convert_tumor_normal: + runtime: 120 + mem_mb: 10000 + threads: 8 + vcf2cytosure_convert_tumor_only: + runtime: 120 + mem_mb: 10000 + threads: 8 + vcf2cytosure_convert: + runtime: 120 + mem_mb: 10000 + threads: 8 + bcftools_filter_sv_research: + runtime: 120 + mem_mb: 10000 + threads: 8 + bcftools_filter_sv_clinical: + runtime: 120 + mem_mb: 10000 + threads: 8 + bcftools_filter_tnscope_clinical_tumor_only: + runtime: 280 + mem_mb: 15000 + threads: 8 + bcftools_filter_tnscope_research_tumor_only: + runtime: 280 + mem_mb: 15000 + threads: 8 + bcftools_filter_TNscope_umi_clinical_tumor_only: + runtime: 280 + mem_mb: 5000 + threads: 4 + bcftools_filter_TNscope_umi_research_tumor_only: + runtime: 280 + mem_mb: 5000 + threads: 4 + bcftools_filter_merged_clinical_tumor_only: + runtime: 280 + mem_mb: 15000 + threads: 8 + bcftools_filter_merged_research_tumor_only: + runtime: 280 + mem_mb: 15000 + threads: 8 + bcftools_filter_tnscope_clinical_tumor_normal: + runtime: 280 + mem_mb: 10000 + threads: 8 + bcftools_filter_tnscope_research_tumor_normal: + runtime: 280 + mem_mb: 10000 + threads: 8 + bcftools_filter_TNscope_umi_clinical_tumor_normal: + runtime: 280 + mem_mb: 5000 + threads: 4 + bcftools_filter_TNscope_umi_research_tumor_normal: + runtime: 280 + mem_mb: 5000 + threads: 4 + bcftools_filter_merged_clinical_tumor_normal: + runtime: 280 + mem_mb: 10000 + threads: 8 + bcftools_filter_merged_research_tumor_normal: + runtime: 280 + mem_mb: 10000 + threads: 8 + svdb_annotate_clinical_obs_somatic_clinical_sv: + runtime: 600 + mem_mb: 15000 + threads: 8 + svdb_annotate_somatic_obs_somatic_clinical_sv: + runtime: 600 + mem_mb: 15000 + threads: 8 + svdb_annotate_somatic_research_sv: + runtime: 720 + mem_mb: 15000 + threads: 36 + vep_somatic_research_sv: + runtime: 720 + mem_mb: 120000 + threads: 36 + vcfanno_annotate_somaticSNV_clinical: + runtime: 1080 + mem_mb: 10000 + threads: 12 + vep_annotate_somaticSNV_research: + runtime: 1080 + mem_mb: 50000 + threads: 12 + bcftools_annotate_somaticINDEL_research: + runtime: 60 + mem_mb: 4000 + threads: 4 + cadd_annotate_somaticINDEL_research: + runtime: 1080 + mem_mb: 20000 + threads: 4 + bcftools_get_somaticINDEL_research: + runtime: 60 + mem_mb: 5000 + threads: 4 + tmb_calculation: + runtime: 280 + mem_mb: 5000 + threads: 4 + genmod_score_snvs: + runtime: 280 + mem_mb: 5000 + threads: 8 + msisensorpro_msi_tumor_normal: + runtime: 280 + mem_mb: 10000 + threads: 24 + msisensorpro_scan_reference: + runtime: 280 + mem_mb: 1000 + threads: 1 + vep_annotate_germlineVAR_normal: + runtime: 360 + mem_mb: 30000 + threads: 10 + vep_annotate_germlineVAR_tumor: + runtime: 360 + mem_mb: 30000 + threads: 10 + sentieon_align_sort: + runtime: 1440 + mem_mb: 40000 + threads: 12 + sentieon_realign: + runtime: 1440 + mem_mb: 80000 + threads: 36 + sentieon_align_sort_umireads: + runtime: 1440 + mem_mb: 40000 + threads: 12 + cap_base_quality: + mem_mb: 10000 + runtime: 300 + threads: 10 + sentieon_dedup: + runtime: 1440 + mem_mb: 80000 + threads: 36 + sentieon_dedup_consensus: + runtime: 1440 + mem_mb: 60000 + threads: 36 + sentieon_base_calibration: + mem_mb: 25000 + runtime: 1440 + threads: 36 + samtools_fixmate: + mem_mb: 80000 + runtime: 560 + threads: 16 + bam_compress_tumor: + mem_mb: 8000 + runtime: 280 + threads: 20 + bam_compress_normal: + mem_mb: 8000 + runtime: 280 + threads: 20 + fastqc: + threads: 4 + mem_mb: 6000 + runtime: 280 + fastp_quality_and_adapter_trim_wgs: + mem_mb: 6000 + runtime: 350 + threads: 12 + fastp_adapter_trim_tga: + mem_mb: 6000 + runtime: 400 + threads: 4 + fastp_quality_trim_tga: + mem_mb: 16000 + runtime: 350 + threads: 8 + sentieon_umiextract_tga: + mem_mb: 15000 + runtime: 350 + threads: 8 \ No newline at end of file diff --git a/BALSAMIC/containers/d4tools/Dockerfile b/BALSAMIC/containers/d4tools/Dockerfile new file mode 100644 index 000000000..9f5badaac --- /dev/null +++ b/BALSAMIC/containers/d4tools/Dockerfile @@ -0,0 +1,52 @@ +FROM python:3.10-slim + +LABEL base.image="python:3.10-slim" \ + maintainer="Clinical Genomics" \ + about.contact="support@clinicalgenomics.se" \ + software="D4tools" \ + software.version="0.3.10" \ + about.summary="Efficient storage and query format for genomic depth data" \ + about.home="https://github.com/38/d4-format" \ + about.documentation="https://github.com/38/d4-format" \ + about.license="MIT License" + +ENV DEBIAN_FRONTEND=noninteractive +ENV VENV /opt/venv +ENV PATH="${VENV}/bin:$PATH" + +# Install required packages and Rust toolchain +RUN apt-get update && \ + apt-get install -y --no-install-recommends \ + build-essential \ + ca-certificates \ + curl \ + git \ + liblzma-dev \ + pkg-config \ + zlib1g-dev && \ + apt-get clean && rm -rf /var/lib/apt/lists/* && \ + curl https://sh.rustup.rs -sSf | bash -s -- -y + +ENV PATH="/root/.cargo/bin:$PATH" + +# Clone, build, and install d4tools v0.3.10 +RUN git clone --depth 1 --branch v0.3.10 https://github.com/38/d4-format.git /tmp/d4-format && \ + cd /tmp/d4-format && \ + cargo build --release && \ + cp target/release/d4tools /usr/local/bin/d4tools && \ + rm -rf /tmp/d4-format + +# Optional Python venv +RUN python -m venv ${VENV} && \ + pip install --upgrade pip + +# Create user and working directory +RUN adduser --disabled-password --gecos "" ubuntu && \ + chsh -s /bin/bash ubuntu && \ + mkdir -p /home/ubuntu + +USER ubuntu +WORKDIR /home/ubuntu + +# Default to interactive shell +CMD ["/bin/bash"] \ No newline at end of file diff --git a/BALSAMIC/containers/d4tools/__init__.py b/BALSAMIC/containers/d4tools/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/BALSAMIC/containers/d4tools/d4tools.yaml b/BALSAMIC/containers/d4tools/d4tools.yaml new file mode 100644 index 000000000..84c5a5df6 --- /dev/null +++ b/BALSAMIC/containers/d4tools/d4tools.yaml @@ -0,0 +1 @@ +- d4tools=0.3.10 diff --git a/BALSAMIC/models/config.py b/BALSAMIC/models/config.py index 67cc2ace0..2a6a96092 100644 --- a/BALSAMIC/models/config.py +++ b/BALSAMIC/models/config.py @@ -8,6 +8,7 @@ from pydantic import AfterValidator, BaseModel, field_validator, model_validator from BALSAMIC import __version__ as balsamic_version +from BALSAMIC.constants.cluster import QOS from BALSAMIC.constants.analysis import ( AnalysisType, AnalysisWorkflow, @@ -215,6 +216,8 @@ class ConfigModel(BaseModel): analysis: Field(AnalysisModel); Pydantic model containing workflow variables custom_filters: Field(CustomFilters); custom parameters for variant filtering sentieon: Field(required); Sentieon model attributes + qos: Field(QOS); + account: Cluster account to run jobs This class also contains functions that help retrieve sample and file information, facilitating BALSAMIC run operations in Snakemake. @@ -242,6 +245,8 @@ class ConfigModel(BaseModel): analysis: AnalysisModel custom_filters: CustomFilters | None = None sentieon: Sentieon + qos: Optional[QOS] = None + account: Optional[str] = None @field_validator("reference") def abspath_as_str(cls, reference: Dict[str, Path]): diff --git a/BALSAMIC/models/sbatchsubmitter.py b/BALSAMIC/models/sbatchsubmitter.py new file mode 100644 index 000000000..99bb283e2 --- /dev/null +++ b/BALSAMIC/models/sbatchsubmitter.py @@ -0,0 +1,148 @@ +import os +import re +import textwrap +import subprocess +from pathlib import Path +from typing import Optional +from BALSAMIC.utils.io import write_yaml + + +class SbatchSubmitter: + """SLURM job submission model for running a Snakemake workflow. + + Attributes: + case_id (str) : Identifier for the analysis case. + script_path (Path) : Directory where the sbatch script will be created. + result_path (Path) : Directory where the job ID YAML file will be written. + check_jobid_status_script (str): Python script for reporting failed or cancelled statuses of jobs in logdir + log_path (Path) : Directory where SLURM output and error logs will be written. + account (str) : SLURM account to charge for the job. + qos (str) : SLURM quality of service level. + max_run_hours (int) : Maximum allowed run time for the job, in hours. + snakemake_executable : Object representing the Snakemake command to be executed. + logger : Logger instance for capturing logs. + conda_env_path (str) : Path to the active conda environment, from $CONDA_PREFIX. + sbatch_script_path (Path) : Path to the generated sbatch script file. + """ + + def __init__( + self, + case_id: str, + script_path: Path, + result_path: Path, + scan_finished_jobid_status: str, + log_path: Path, + account: str, + qos: str, + max_run_hours: int, + snakemake_executable, + logger, + ): + self.case_id = case_id + self.script_path = script_path + self.result_path = result_path + self.scan_finished_jobid_status = scan_finished_jobid_status + self.log_path = log_path + self.account = account + self.qos = qos + self.max_run_hours = max_run_hours + self.snakemake_executable = snakemake_executable + self.log = logger + + self.conda_env_path = os.environ.get("CONDA_PREFIX", "") + self.sbatch_script_path = self.script_path / "BALSAMIC_snakemake_submit.sh" + + def create_sbatch_script(self) -> None: + self.log.info("Creating sbatch script to submit jobs.") + self.log.info(f"Using conda environment: {self.conda_env_path}") + + sbatch_header = textwrap.dedent( + f"""\ + #!/bin/bash -l + #SBATCH --account={self.account} + #SBATCH --job-name=BALSAMIC_snakemake_submit.{self.case_id}.%j + #SBATCH --output={self.log_path}/BALSAMIC_snakemake_submit.{self.case_id}.%j.out + #SBATCH --error={self.log_path}/BALSAMIC_snakemake_submit.{self.case_id}.%j.err + #SBATCH --ntasks=1 + #SBATCH --mem=5G + #SBATCH --time={self.max_run_hours}:00:00 + #SBATCH --qos={self.qos} + #SBATCH --cpus-per-task=1 + """ + ) + + # Run snakemake workflow + sbatch_command = f"\nconda run -p {self.conda_env_path} {self.snakemake_executable.get_command()}\n" + + # Check the status of submitted jobs + job_status_check = f"\nconda run -p {self.conda_env_path} python {self.scan_finished_jobid_status} {self.log_path} --output {self.result_path}/analysis_status.txt\n" + + # Check the final success status of the workflow + success_status_check = textwrap.dedent( + f"""\n + if [[ -f "{self.result_path}/analysis_status.txt" ]]; then + STATUS=$(cat "{self.result_path}/analysis_status.txt") + echo "Snakemake analysis status: $STATUS" + if [[ "$STATUS" != "SUCCESS" ]]; then + echo "Analysis failed: $STATUS" + exit 1 + fi + else + echo "No status file found; assuming failure" + exit 2 + fi \n + """ + ) + + full_script = ( + sbatch_header + sbatch_command + job_status_check + success_status_check + ) + + with open(self.sbatch_script_path, "w") as f: + f.write(full_script) + + self.log.info(f"Sbatch script written to: {self.sbatch_script_path}") + + def submit_job(self) -> Optional[str]: + """Submit the generated sbatch script to the SLURM scheduler. + + Returns: + Optional[str]: The SLURM job ID if the submission is successful, otherwise None. + """ + command = ["sbatch", str(self.sbatch_script_path)] + self.log.info(f"Submitting job with command: {' '.join(command)}") + + try: + result = subprocess.run( + command, + capture_output=True, + text=True, + check=True, + ) + output = result.stdout.strip() + match = re.search(r"Submitted batch job (\d+)", output) + if match: + job_id = match.group(1) + self.log.info(f"Job submitted successfully with Job ID: {job_id}") + return job_id + else: + self.log.warning( + f"Could not extract Job ID from sbatch output: {output}" + ) + except subprocess.CalledProcessError as e: + self.log.error(f"sbatch submission failed: {e.stderr.strip()}") + + return None + + def write_job_id_yaml(self, job_id: str) -> None: + """Write the submitted job ID to a YAML file. + + The file is saved at `case_id/analysis/slurm_jobids.yaml` and stores the job ID + under the corresponding `case_id`. + + Args: + job_id (str): The SLURM job ID to record. + """ + yaml_path = self.result_path / "slurm_jobids.yaml" + write_yaml({self.case_id: [job_id]}, yaml_path) + self.log.info(f"Job ID written to {yaml_path}") diff --git a/BALSAMIC/models/scheduler.py b/BALSAMIC/models/scheduler.py deleted file mode 100644 index 4a4e77247..000000000 --- a/BALSAMIC/models/scheduler.py +++ /dev/null @@ -1,199 +0,0 @@ -"""Scheduler models.""" -import logging -import subprocess -import sys -from pathlib import Path -from re import Match, search -from typing import Any, Dict, List, Optional - -from pydantic import BaseModel, DirectoryPath, Field, FilePath, field_validator -from pydantic_core.core_schema import ValidationInfo - -from BALSAMIC.constants.cluster import QOS, ClusterMailType, ClusterProfile -from BALSAMIC.utils.utils import remove_unnecessary_spaces - -LOG = logging.getLogger(__name__) - - -class Scheduler(BaseModel): - """ - Scheduler model handling cluster job submissions. - - Attributes: - account (str) : Cluster account to run jobs. - benchmark (Optional[bool]) : Flag to profile slurm jobs. - case_id (str) : Case identifier. - dependencies (Optional[List[str]]) : List of job dependencies. - job_properties (Dict[str, Any]) : Job properties defined in a snakemake jobscript. - job_script (FilePath) : Snakemake job script path. - log_dir (DirectoryPath) : Logging directory. - mail_type (Optional[ClusterMailType]) : Email type triggering job status notifications. - mail_user (Optional[str]) : User email to receive job status notifications. - profile (ClusterProfile) : Cluster profile to submit jobs. - profiling_interval (Optional[int]) : Sampling interval for a profiling type. - profiling_type (Optional[str]) : Collected data types. - qos (Optional[QOS]) : QOS for sbatch jobs. - """ - - account: str - benchmark: Optional[bool] = False - case_id: str - dependencies: Optional[List[str]] = None - job_properties: Dict[str, Any] - job_script: FilePath - log_dir: DirectoryPath - mail_type: Optional[ClusterMailType] = Field(default=None, validate_default=True) - mail_user: Optional[str] = Field(default=None, validate_default=True) - profile: ClusterProfile - profiling_interval: int = 10 - profiling_type: str = "task" - qos: Optional[QOS] = QOS.LOW - - @field_validator("account") - def get_account_option(cls, account: str) -> str: - """Return string representation of the account option.""" - return f"--account {account}" - - @field_validator("mail_type") - def get_mail_type_option( - cls, mail_type: Optional[ClusterMailType], info: ValidationInfo - ) -> str: - """Return string representation of the mail_type option.""" - if mail_type: - return f"--mail-type {mail_type}" - cluster_mail_type: Optional[ClusterMailType] = ( - info.data["job_properties"]["cluster"].get("mail_type") - if info.data.get("job_properties") - else None - ) - if cluster_mail_type: - return f"--mail-type {cluster_mail_type}" - return "" - - @field_validator("mail_user") - def get_mail_user_option(cls, mail_user: Optional[str]) -> str: - """Return string representation of the mail_user option.""" - if mail_user: - return f"--mail-user {mail_user}" - return "" - - @field_validator("qos") - def get_qos_option(cls, qos: Optional[QOS]) -> str: - """Return string representation of the mail_user option.""" - if qos: - return f"--qos {qos}" - return "" - - def get_dependency_option(self) -> str: - """Return string representation of the dependency option.""" - if self.dependencies: - dependencies: str = ",".join( - [job for job in self.dependencies if job.isdigit()] - ) - return f"--dependency afterok:{dependencies}" - return "" - - def get_error_option(self) -> str: - """Return the standard error file path.""" - return f"--error {Path(self.log_dir, f'{Path(self.job_script).name}.%j.err').as_posix()}" - - def get_output_option(self) -> str: - """Return the standard output file path.""" - return f"--output {Path(self.log_dir, f'{Path(self.job_script).name}.%j.out').as_posix()}" - - def get_profile_option(self) -> str: - """Return string representation of the slurm profile option.""" - if self.benchmark and self.profile == ClusterProfile.SLURM: - return f"--profile {self.profiling_type}" - return "" - - def get_acctg_freq_option(self) -> str: - """Return string representation of the profiling sampling intervals in seconds option.""" - if self.benchmark and self.profile == ClusterProfile.SLURM: - return f"--acctg-freq {self.profiling_type}={self.profiling_interval}" - return "" - - def get_ntasks_option(self) -> str: - """Return the maximum number of tasks for allocation.""" - ntasks: str = self.job_properties["cluster"].get("n") - if ntasks: - return f"--ntasks {ntasks}" - return "" - - def get_memory_option(self) -> str: - """Return the maximum memory allocation.""" - mem: str = self.job_properties["cluster"].get("mem") - if mem: - return f"--mem {mem}MB" - return "" - - def get_time_option(self) -> str: - """Return the allocation time.""" - time: str = self.job_properties["cluster"].get("time") - if time: - return f"--time {time}" - return "" - - def get_partition_option(self) -> str: - """Return the specific partition for the resource allocation.""" - partition: str = self.job_properties["cluster"].get("partition") - if partition: - return f"--partition {partition}" - return "" - - @staticmethod - def get_job_id_from_stdout(stdout: str) -> str: - """Return job ID from the standard output.""" - job_id_match: Match[str] = search("Submitted batch job (\d+)", stdout) - if job_id_match: - job_id: str = job_id_match.group(1) - LOG.info(f"Submitted job with ID: {job_id}") - return job_id - LOG.error("Failed to extract job ID from the submission result") - raise ValueError - - def write_job_log_data(self, job_id: str, command: str) -> None: - """Write accounting information for jobs.""" - log_path: Path = Path(self.log_dir, f"{self.case_id}.sacct") - with open(log_path, "a") as file: - file.write(f"{job_id},{command}\n") - - def get_command(self) -> str: - """Return the command to submit a specific job to the cluster.""" - command: str = ( - f"sbatch " - f"{self.account} " - f"{self.mail_type} " - f"{self.mail_user} " - f"{self.qos} " - f"{self.get_dependency_option()} " - f"{self.get_error_option()} " - f"{self.get_output_option()} " - f"{self.get_profile_option()} " - f"{self.get_acctg_freq_option()} " - f"{self.get_ntasks_option()} " - f"{self.get_time_option()} " - f"{self.get_memory_option()} " - f"{self.get_partition_option()} " - f"{self.job_script} " - ) - return remove_unnecessary_spaces(command) - - def submit_job(self) -> None: - """Submit a job to the cluster.""" - cluster_command: str = self.get_command() - try: - result: subprocess.CompletedProcess = subprocess.run( - cluster_command, - check=True, - shell=True, - stdout=subprocess.PIPE, - text=True, - ) - job_id: str = self.get_job_id_from_stdout(result.stdout) - self.write_job_log_data(job_id=job_id, command=cluster_command) - # Send job ID to stdout for dependency parsing - print(job_id, file=sys.stdout) - except Exception: - LOG.error(f"Failed to submit: {cluster_command}") - raise diff --git a/BALSAMIC/models/snakemake.py b/BALSAMIC/models/snakemake.py index 577e7387a..921df5c4b 100644 --- a/BALSAMIC/models/snakemake.py +++ b/BALSAMIC/models/snakemake.py @@ -3,11 +3,10 @@ from pathlib import Path from typing import List, Optional -from pydantic import BaseModel, DirectoryPath, Field, FilePath, field_validator +from pydantic import BaseModel, DirectoryPath, FilePath from BALSAMIC.constants.analysis import RunMode -from BALSAMIC.constants.cluster import MAX_JOBS, QOS, ClusterMailType, ClusterProfile -from BALSAMIC.constants.paths import IMMEDIATE_SUBMIT_PATH +from BALSAMIC.constants.cluster import MAX_JOBS, QOS from BALSAMIC.utils.utils import remove_unnecessary_spaces @@ -28,17 +27,14 @@ class SnakemakeExecutable(BaseModel): Attributes: account (Optional[str]) : Scheduler account. - benchmark (Optional[bool]) : Slurm jobs profiling option. case_id (str) : Analysis case name. - cluster_config_path (Optional[FilePath]) : Cluster configuration file path. config_path (FilePath) : Sample configuration file. - disable_variant_caller (Optional[str]) : Disable variant caller. - dragen (Optional[bool]) : FLag for enabling or disabling Dragen suite. + dragen (Optional[bool]) : Flag for enabling or disabling Dragen suite. force (bool) : Force snakemake execution. log_dir (Optional[DirectoryPath]) : Logging directory. - mail_type (Optional[ClusterMailType]) : Email type triggering job status notifications. - mail_user (Optional[str]) : User email to receive job status notifications. - profile (Optional[ClusterProfile]) : Cluster profile to submit jobs. + cluster_profile: Path : Directory containing snakemake cluster profile + cluster_job_status_script (FilePath) : Path to script for snakemake to parse more slurm job-statuses + workflow_profile: Path : Directory contianing snakemake workflow profile specifying rule resources qos (Optional[QOS]) : QOS for sbatch jobs. quiet (Optional[bool]) : Quiet mode for snakemake. run_analysis (bool) : Flag to run the actual analysis. @@ -52,17 +48,14 @@ class SnakemakeExecutable(BaseModel): """ account: Optional[str] = None - benchmark: bool = False case_id: str - cluster_config_path: Optional[FilePath] = None config_path: FilePath - disable_variant_caller: Optional[str] = Field(default=None, validate_default=True) dragen: bool = False force: bool = False log_dir: Optional[DirectoryPath] = None - mail_type: Optional[ClusterMailType] = None - mail_user: Optional[str] = None - profile: Optional[ClusterProfile] = None + cluster_profile: Path + cluster_job_status_script: FilePath + workflow_profile: Path qos: Optional[QOS] = None quiet: bool = False run_analysis: bool = False @@ -73,26 +66,20 @@ class SnakemakeExecutable(BaseModel): snakemake_options: Optional[List[str]] = None working_dir: Path - @field_validator("disable_variant_caller") - def get_disable_variant_caller_option(cls, disable_variant_caller: str) -> str: - """Return string representation of the disable_variant_caller option.""" - if disable_variant_caller: - return f"disable_variant_caller={disable_variant_caller}" - return "" - - def get_config_files_option(self) -> str: - """Return string representation of the config files.""" - config_files_option: str = f"--configfiles {self.config_path.as_posix()}" - if self.cluster_config_path: - config_files_option += f" {self.cluster_config_path.as_posix()}" - return config_files_option + def get_config_file_option(self) -> str: + """Return string representation of the config file.""" + return f"--configfile {self.config_path.as_posix()}" def get_config_options(self) -> str: """Return Snakemake config options to be submitted.""" return remove_unnecessary_spaces( - f"--config {self.disable_variant_caller} {self.get_dragen_flag()}" + f"{f'--config {self.get_dragen_flag()}' if self.get_dragen_flag() else ''} " ) + def get_cluster_status_script(self) -> str: + """Return cluster-status argument.""" + return f'--cluster-status "python {self.cluster_job_status_script.as_posix()}"' + def get_dragen_flag(self) -> str: """Return string representation of the dragen flag.""" if self.dragen: @@ -140,13 +127,14 @@ def get_command(self) -> str: f"snakemake --notemp -p --rerun-trigger mtime " f"--directory {self.working_dir.as_posix()} " f"--snakefile {self.snakefile.as_posix()} " - f"{self.get_config_files_option()} " + f"{self.get_config_file_option()} " + f"{self.get_config_options()} " f"{self.get_singularity_bind_paths_option()} " f"{self.get_quiet_flag()} " f"{self.get_force_flag()} " f"{self.get_run_analysis_flag()} " f"{self.get_snakemake_cluster_options()} " - f"{self.get_config_options()} " + f"{self.get_cluster_status_script()} " f"{self.get_snakemake_options_command()}" ) return remove_unnecessary_spaces(snakemake_command) @@ -155,27 +143,9 @@ def get_snakemake_cluster_options(self) -> str: """Return Snakemake cluster options to be submitted.""" if self.run_mode == RunMode.CLUSTER: snakemake_cluster_options: str = ( - f"--immediate-submit -j {MAX_JOBS} " + f"-j {MAX_JOBS} " f"--jobname BALSAMIC.{self.case_id}.{{rulename}}.{{jobid}}.sh " - f"--cluster-config {self.cluster_config_path.as_posix()} " - f"--cluster {self.get_cluster_submit_command()}" + f"--profile {self.cluster_profile} --workflow-profile {self.workflow_profile}" ) return remove_unnecessary_spaces(snakemake_cluster_options) - return "" - - def get_cluster_submit_command(self) -> str: - """Get cluster command to be submitted by Snakemake.""" - cluster_submit_command: str = ( - f"'{sys.executable} {IMMEDIATE_SUBMIT_PATH.as_posix()} " - f"--account {self.account} " - f"{'--benchmark' if self.benchmark else ''} " - f"--log-dir {self.log_dir.as_posix()} " - f"{f'--mail-type {self.mail_type}' if self.mail_type else ''} " - f"{f'--mail-user {self.mail_user}' if self.mail_user else ''} " - f"--profile {self.profile} " - f"--qos {self.qos} " - f"--script-dir {self.script_dir.as_posix()} " - f"{self.case_id} " - "{dependencies}'" - ) - return remove_unnecessary_spaces(cluster_submit_command) + return "--default-resources mem_mb=32000 threads=8" diff --git a/BALSAMIC/snakemake_rules/align/bam_compress.rule b/BALSAMIC/snakemake_rules/align/bam_compress.rule index 28f793a18..9c7c32a0c 100644 --- a/BALSAMIC/snakemake_rules/align/bam_compress.rule +++ b/BALSAMIC/snakemake_rules/align/bam_compress.rule @@ -14,13 +14,11 @@ rule bam_compress_tumor: params: sample_id = "{sample}", housekeeper_id= {"id": "{sample}", "tags": "tumor"} - threads: - get_threads(cluster_config, "bam_compress") message: "Compressing bam to cram for {params.sample_id}" shell: """ -samtools view -h -T {input.fasta} --threads {threads} -C -o {output.cram} {input.bam}; +samtools view -h -T {input.fasta} --threads {resources.threads} -C -o {output.cram} {input.bam}; samtools index {output.cram}; """ @@ -37,12 +35,10 @@ rule bam_compress_normal: params: sample_id = "{sample}", housekeeper_id= {"id": "{sample}", "tags": "normal"} - threads: - get_threads(cluster_config, "bam_compress") message: "Compressing bam to cram for {params.sample_id}" shell: """ -samtools view -h -T {input.fasta} --threads {threads} -C -o {output.cram} {input.bam}; +samtools view -h -T {input.fasta} --threads {resources.threads} -C -o {output.cram} {input.bam}; samtools index {output.cram}; """ diff --git a/BALSAMIC/snakemake_rules/align/tga_bam_postprocess.rule b/BALSAMIC/snakemake_rules/align/tga_bam_postprocess.rule index 3584ab173..f4b310cc5 100644 --- a/BALSAMIC/snakemake_rules/align/tga_bam_postprocess.rule +++ b/BALSAMIC/snakemake_rules/align/tga_bam_postprocess.rule @@ -17,8 +17,6 @@ rule sentieon_dedup_consensus: sentieon_exec = config_model.sentieon.sentieon_exec, sentieon_lic = config_model.sentieon.sentieon_license, sample_id = "{sample}" - threads: - get_threads(cluster_config, 'sentieon_dedup') message: ("Collects read information using sentieon LocusCollector and collapse duplicate reads based on UMIs. " "Current sample: {params.sample_id}") @@ -29,7 +27,7 @@ export SENTIEON_TMPDIR={params.tmpdir}; export SENTIEON_LICENSE={params.sentieon_lic}; {params.sentieon_exec} driver \ --t {threads} \ +-t {resources.threads} \ -r {input.ref} \ -i {input.bam} \ --algo LocusCollector \ @@ -37,7 +35,7 @@ export SENTIEON_LICENSE={params.sentieon_lic}; --consensus --umi_tag XR --fun score_info {output.score} ; {params.sentieon_exec} driver \ --t {threads} \ +-t {resources.threads} \ -r {input.ref} \ -i {input.bam} \ --algo Dedup \ @@ -69,8 +67,6 @@ rule samtools_fixmate: tmpdir = tempfile.mkdtemp(prefix=tmp_dir), postprocess_fixmate_script = get_script_path("postprocess_fixmate_bam.awk"), sample_id="{sample}" - threads: - get_threads(cluster_config,"samtools_fixmate") message: "Running samtools collate and fixmate for: {params.sample_id}" shell: @@ -79,11 +75,11 @@ export TMPDIR={params.tmpdir} ; mkdir -p {params.tmpdir}/collate ; mkdir -p {params.tmpdir}/sort ; -samtools collate -@ {threads} -O -u {input.bam} {params.tmpdir}/collate/{wildcards.sample}_collate_tmp | \ -samtools fixmate -O SAM --reference {input.ref} -@ {threads} - - | \ +samtools collate -@ {resources.threads} -O -u {input.bam} {params.tmpdir}/collate/{wildcards.sample}_collate_tmp | \ +samtools fixmate -O SAM --reference {input.ref} -@ {resources.threads} - - | \ awk -f {params.postprocess_fixmate_script} - | \ -samtools sort -@ {threads} -m 4G -O BAM -T {params.tmpdir}/sort/{wildcards.sample}_sort_tmp - | \ -samtools calmd -@ {threads} -b - {input.ref} > {output.bam} ; +samtools sort -@ {resources.threads} -m 4G -O BAM -T {params.tmpdir}/sort/{wildcards.sample}_sort_tmp - | \ +samtools calmd -@ {resources.threads} -b - {input.ref} > {output.bam} ; samtools index {output.bam} ; rm -rf {params.tmpdir} @@ -102,12 +98,10 @@ rule cap_base_quality: sample_id = "{sample}", cap_base_qualities = get_script_path("cap_base_quality_in_bam.py"), max_quality = params.bam_post_processing.manta_max_base_quality - threads: - get_threads(cluster_config, "cap_base_quality") message: "Capping base qualities to {params.max_quality} in bamfile using pysam in sample: {params.sample_id}" shell: """ python {params.cap_base_qualities} --max-quality {params.max_quality} {input.bam} {output.bam} ; -samtools index -@ {threads} {output.bam} +samtools index -@ {resources.threads} {output.bam} """ diff --git a/BALSAMIC/snakemake_rules/align/tga_sentieon_alignment.rule b/BALSAMIC/snakemake_rules/align/tga_sentieon_alignment.rule index 2fc85d04e..609800b84 100644 --- a/BALSAMIC/snakemake_rules/align/tga_sentieon_alignment.rule +++ b/BALSAMIC/snakemake_rules/align/tga_sentieon_alignment.rule @@ -18,8 +18,6 @@ rule sentieon_align_sort_umireads: sample_id = "{sample}", sample_type = lambda wildcards: config_model.get_sample_type_by_name(wildcards.sample, uppercase=True), ip_bases = params.umicommon.align_intbases, - threads: - get_threads(cluster_config, 'sentieon_align_sort') message: ("Align fastq reads using sentieon bwa-mem and sort reads using samtools for sample type: " "{params.sample_type} : {params.sample_id}") @@ -32,7 +30,7 @@ export SENTIEON_LICENSE={params.sentieon_lic}; {params.sentieon_exec} bwa mem \ -R '@RG\\tID:{params.sample_id}\\tSM:{params.sample_type}\\tPL:ILLUMINA' \ -K {params.ip_bases} \ --t {threads} -C \ +-t {resources.threads} -C \ {input.ref} {input.fastq_r1} {input.fastq_r2} | \ {params.sentieon_exec} util sort \ -r {input.ref} \ diff --git a/BALSAMIC/snakemake_rules/align/wgs_bam_postprocess.rule b/BALSAMIC/snakemake_rules/align/wgs_bam_postprocess.rule index 420ba3528..b5a5d64ef 100644 --- a/BALSAMIC/snakemake_rules/align/wgs_bam_postprocess.rule +++ b/BALSAMIC/snakemake_rules/align/wgs_bam_postprocess.rule @@ -14,8 +14,6 @@ rule sentieon_dedup: sentieon_exec = config_model.sentieon.sentieon_exec, sentieon_lic = config_model.sentieon.sentieon_license, sample_id = "{sample}" - threads: - get_threads(cluster_config, 'sentieon_dedup') message: ("Collects read information using sentieon LocusCollector and mark duplicated reads. " "Current sample: {params.sample_id}") @@ -28,13 +26,13 @@ export SENTIEON_LICENSE={params.sentieon_lic}; shell_bam_files=$(echo {input.bam_files} | sed 's/ / -i /g') ; {params.sentieon_exec} driver \ --t {threads} \ +-t {resources.threads} \ -i $shell_bam_files \ --algo LocusCollector \ --fun score_info {output.score} ; {params.sentieon_exec} driver \ --t {threads} \ +-t {resources.threads} \ -i $shell_bam_files \ --algo Dedup \ --score_info {output.score} \ @@ -59,8 +57,6 @@ rule sentieon_realign: sentieon_exec = config_model.sentieon.sentieon_exec, sentieon_lic = config_model.sentieon.sentieon_license, sample_id = "{sample}" - threads: - get_threads(cluster_config, 'sentieon_realign') message: "INDEL realignment using sentieon realigner for sample: {params.sample_id}" shell: @@ -71,7 +67,7 @@ export SENTIEON_LICENSE={params.sentieon_lic}; {params.sentieon_exec} driver \ -r {input.ref} \ --t {threads} \ +-t {resources.threads} \ -i {input.bam} \ --algo Realigner \ -k {input.mills} \ @@ -99,8 +95,6 @@ rule sentieon_base_calibration: sentieon_exec = config_model.sentieon.sentieon_exec, sentieon_lic = config_model.sentieon.sentieon_license, sample = "{sample_type}" - threads: - get_threads(cluster_config, 'sentieon_base_calibration') message: "Base recalibration using Sentieon tools for {params.sample}" shell: @@ -111,7 +105,7 @@ export SENTIEON_LICENSE={params.sentieon_lic}; {params.sentieon_exec} driver \ -r {input.ref} \ --t {threads} \ +-t {resources.threads} \ -i {input.bam} \ --algo QualCal \ -k {input.dbsnp} \ @@ -120,7 +114,7 @@ export SENTIEON_LICENSE={params.sentieon_lic}; {params.sentieon_exec} driver \ -r {input.ref} \ --t {threads} \ +-t {resources.threads} \ -i {input.bam} \ -q {output.recal_data_table} \ --algo QualCal \ @@ -129,7 +123,7 @@ export SENTIEON_LICENSE={params.sentieon_lic}; -k {input.indel_1kg} {output.recal_data_table}.post; {params.sentieon_exec} driver \ --t {threads} \ +-t {resources.threads} \ --algo QualCal \ --plot --before {output.recal_data_table} \ --after {output.recal_data_table}.post {output.qual_recal}; diff --git a/BALSAMIC/snakemake_rules/align/wgs_sentieon_alignment.rule b/BALSAMIC/snakemake_rules/align/wgs_sentieon_alignment.rule index 4a49d209b..1af69f283 100644 --- a/BALSAMIC/snakemake_rules/align/wgs_sentieon_alignment.rule +++ b/BALSAMIC/snakemake_rules/align/wgs_sentieon_alignment.rule @@ -17,8 +17,6 @@ rule sentieon_align_sort: sample_id = "{sample}", sample_type = lambda wildcards: config_model.get_sample_type_by_name(wildcards.sample, uppercase=True), fastq_pattern = "{fastq_pattern}" - threads: - get_threads(cluster_config, 'sentieon_align_sort') message: ("Align fastq reads using sentieon bwa-mem and sort reads using samtools for sample type: " "{params.sample_type} : {params.sample_id}, {params.fastq_pattern}") @@ -30,12 +28,12 @@ export SENTIEON_LICENSE={params.sentieon_lic}; {params.sentieon_exec} bwa mem -M \ -R '@RG\\tID:{wildcards.fastq_pattern}\\tSM:{params.sample_type}\\tPL:ILLUMINA' \ --t {threads} \ +-t {resources.threads} \ -K 50000000 \ {input.ref} {input.fastq_r1} {input.fastq_r2} \ | {params.sentieon_exec} util sort \ -o {output.bam_out} \ --t {threads} \ +-t {resources.threads} \ --block_size 3G \ --sam2bam -i -; diff --git a/BALSAMIC/snakemake_rules/annotation/final_vcf_reheader.rule b/BALSAMIC/snakemake_rules/annotation/final_vcf_reheader.rule index 0cac24155..e3173adb6 100644 --- a/BALSAMIC/snakemake_rules/annotation/final_vcf_reheader.rule +++ b/BALSAMIC/snakemake_rules/annotation/final_vcf_reheader.rule @@ -10,8 +10,6 @@ rule create_final_vcf_namemap: status_to_sample_id = status_to_sample_id message: "Creating final vcf namemap." - threads: - get_threads(cluster_config, "create_final_vcf_namemap") shell: """ echo -e {params.status_to_sample_id} > {output.namemap}; diff --git a/BALSAMIC/snakemake_rules/annotation/germline_annotation.rule b/BALSAMIC/snakemake_rules/annotation/germline_annotation.rule index ccfcebf5b..ed37c99c8 100644 --- a/BALSAMIC/snakemake_rules/annotation/germline_annotation.rule +++ b/BALSAMIC/snakemake_rules/annotation/germline_annotation.rule @@ -18,8 +18,6 @@ rule vep_annotate_germlineVAR_tumor: sample = 'tumor', vep_cache = config["reference"]["vep_dir"], vep_defaults = params.vep.vep_filters - threads: - get_threads(cluster_config, 'vep_germline') message: "Running vep annotation on germline variants for {params.sample} sample" shell: @@ -33,7 +31,7 @@ vep \ --dir_plugins $vep_path \ --input_file {input.vcf} \ --output_file {output.vcf_tumor} \ ---fork {threads} \ +--fork {resources.threads} \ {params.vep_defaults} \ --custom {input.cosmic},COSMIC,vcf,exact,0,CDS,GENE,STRAND,CNT,AA; @@ -57,8 +55,6 @@ rule vep_annotate_germlineVAR_normal: sample = 'normal', vep_cache = config["reference"]["vep_dir"], vep_defaults = params.vep.vep_filters - threads: - get_threads(cluster_config, 'vep_germline') message: "Running vep annotation on germline variants for {params.sample} sample" shell: @@ -72,7 +68,7 @@ vep \ --dir_plugins $vep_path \ --input_file {input.vcf} \ --output_file {output.vcf_normal} \ ---fork {threads} \ +--fork {resources.threads} \ {params.vep_defaults} \ --custom {input.cosmic},COSMIC,vcf,exact,0,CDS,GENE,STRAND,CNT,AA; diff --git a/BALSAMIC/snakemake_rules/annotation/msi_tumor_normal.rule b/BALSAMIC/snakemake_rules/annotation/msi_tumor_normal.rule index 167b188a6..9f04458c6 100644 --- a/BALSAMIC/snakemake_rules/annotation/msi_tumor_normal.rule +++ b/BALSAMIC/snakemake_rules/annotation/msi_tumor_normal.rule @@ -5,15 +5,12 @@ rule msisensorpro_scan_reference: input: fa = config["reference"]["reference_genome"], - wake_up = result_dir + "start_analysis" output: msi_scan = f"{vcf_dir}MSI.somatic.{config['analysis']['case_id']}.msisensorpro.list" benchmark: Path(f"{benchmark_dir}msisensorpro_scan_reference_{config['analysis']['case_id']}.tsv").as_posix() singularity: Path(singularity_image, config["bioinfo_tools"].get("msisensorpro") + ".sif").as_posix() - threads: - get_threads(cluster_config, "msisensorpro_scan_reference") params: tmpdir = tempfile.mkdtemp(prefix=tmp_dir), case_id = config["analysis"]["case_id"], @@ -37,8 +34,6 @@ rule msisensorpro_msi_tumor_normal: Path(f"{benchmark_dir}/msisensorpro_msi_tumor_normal_{config['analysis']['case_id']}.tsv").as_posix() singularity: Path(singularity_image,config["bioinfo_tools"].get("msisensorpro") + ".sif").as_posix() - threads: - get_threads(cluster_config,"msisensorpro_msi_tumor_normal") params: tmpdir=tempfile.mkdtemp(prefix=tmp_dir), case_id=config["analysis"]["case_id"], @@ -47,7 +42,7 @@ rule msisensorpro_msi_tumor_normal: "Analysing MSI using msisensor-pro for {params.case_id}" shell: """ -msisensor-pro msi -b {threads} -z 1 -d {input.msi_list} -t {input.bamT} -n {input.bamN} -o {params.tmpdir}/msi_{params.case_id}; +msisensor-pro msi -b {resources.threads} -z 1 -d {input.msi_list} -t {input.bamT} -n {input.bamN} -o {params.tmpdir}/msi_{params.case_id}; sed 's/\%/MSI/g' {params.tmpdir}/msi_{params.case_id} > {output.msi_result}; diff --git a/BALSAMIC/snakemake_rules/annotation/rankscore.rule b/BALSAMIC/snakemake_rules/annotation/rankscore.rule index 31850ffd1..d425f6c09 100644 --- a/BALSAMIC/snakemake_rules/annotation/rankscore.rule +++ b/BALSAMIC/snakemake_rules/annotation/rankscore.rule @@ -16,8 +16,6 @@ rule genmod_score_snvs: params: case_name = "{case_name}", housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "scored"} - threads: - get_threads(cluster_config, 'genmod_score_snvs') message: ("Scoring annotated SNV variants using genmod for {params.case_name}") shell: diff --git a/BALSAMIC/snakemake_rules/annotation/somatic_computations.rule b/BALSAMIC/snakemake_rules/annotation/somatic_computations.rule index a5c935857..7b558d6b9 100644 --- a/BALSAMIC/snakemake_rules/annotation/somatic_computations.rule +++ b/BALSAMIC/snakemake_rules/annotation/somatic_computations.rule @@ -18,8 +18,6 @@ rule tmb_calculation: bed = config["panel"]["capture_kit"] if "panel" in config else "", message_text = "{var_type}.somatic.{case_name}.{var_caller}.research", tmpdir = tempfile.mkdtemp(prefix=tmp_dir), - threads: - get_threads(cluster_config, "vep") message: "Calculating TMB score for {params.message_text}" shell: diff --git a/BALSAMIC/snakemake_rules/annotation/somatic_snv_annotation.rule b/BALSAMIC/snakemake_rules/annotation/somatic_snv_annotation.rule index 7ec5adf8f..830fac2be 100644 --- a/BALSAMIC/snakemake_rules/annotation/somatic_snv_annotation.rule +++ b/BALSAMIC/snakemake_rules/annotation/somatic_snv_annotation.rule @@ -13,13 +13,11 @@ rule bcftools_get_somaticINDEL_research: Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() params: message_text = "SNV.somatic.{case_name}.{var_caller}.research.vcf.gz", - threads: - get_threads(cluster_config, "bcftools_get_somaticINDEL_research") message: "Running bcftools to get INDELs from {params.message_text}" shell: """ -bcftools view --threads {threads} --output-type v --output-file {output.vcf_indel_research} --types indels {input.vcf_research}; +bcftools view --threads {resources.threads} --output-type v --output-file {output.vcf_indel_research} --types indels {input.vcf_research}; tabix -p vcf -f {output.vcf_indel_research} """ @@ -34,8 +32,6 @@ rule cadd_annotate_somaticINDEL_research: Path(singularity_image, config["bioinfo_tools"].get("cadd") + ".sif").as_posix() params: message_text = "SNV.somatic.{case_name}.{var_caller}.research.vcf.gz", - threads: - get_threads(cluster_config, "cadd_annotate_somaticINDEL_research") message: "Running cadd annotation for INDELs on {params.message_text}" shell: @@ -58,8 +54,6 @@ rule bcftools_annotate_somaticINDEL_research: params: header_line = temp(f"{vep_dir}cadd_header_line.txt"), message_text = "SNV.somatic.{case_name}.{var_caller}.research.vcf.gz", - threads: - get_threads(cluster_config, "bcftools_annotate_somaticINDEL_research") message: "Running bcftools to annotate INDELs on {params.message_text}" shell: @@ -69,7 +63,7 @@ echo '##INFO= {output.vcfanno_research_toml}; -vcfanno -p {threads} {output.vcfanno_research_toml} {input.vcf_snv_research} \ -| bcftools view --threads {threads} -O z -o {params.tmp_vcf_research} ; +vcfanno -p {resources.threads} {output.vcfanno_research_toml} {input.vcf_snv_research} \ +| bcftools view --threads {resources.threads} -O z -o {params.tmp_vcf_research} ; vep \ --dir $vep_path \ @@ -118,7 +110,7 @@ vep \ --dir_plugins $vep_path \ --input_file {params.tmp_vcf_research} \ --output_file {output.vcf_snv_unfiltered} \ ---fork {threads} \ +--fork {resources.threads} \ {params.vep_defaults} \ --custom {input.cosmic},COSMIC,vcf,exact,0,CDS,GENE,STRAND,CNT,AA ; @@ -144,16 +136,14 @@ rule vcfanno_annotate_somaticSNV_clinical: cancer_somatic_snv = cancer_somatic_snv_obs, vcfanno_clinical_annotations = dump_toml(clinical_annotations), vcfanno_clinical_toml = temp(vep_dir + "SNV.somatic.{case_name}.{var_caller}_vcfanno_clinical.toml"), - threads: - get_threads(cluster_config, "vcfanno_annotate_somaticSNV_clinical") message: "Running vcfanno annotation for single nuceotide variants on {params.message_text}" shell: """ if [[ -f "{params.clinical_snv}" || -f "{params.cancer_germline_snv}" || -f "{params.cancer_somatic_snv}" || -f "{params.artefact_snv}" ]]; then echo \'{params.vcfanno_clinical_annotations}\' > {params.vcfanno_clinical_toml}; - vcfanno -p {threads} {params.vcfanno_clinical_toml} {input.vcf_snv_research} | \ - bcftools view --threads {threads} -O z -o {output.vcf_snv_clinical}; + vcfanno -p {resources.threads} {params.vcfanno_clinical_toml} {input.vcf_snv_research} | \ + bcftools view --threads {resources.threads} -O z -o {output.vcf_snv_clinical}; else cp {input.vcf_snv_research} {output.vcf_snv_clinical}; fi diff --git a/BALSAMIC/snakemake_rules/annotation/somatic_sv_annotation.rule b/BALSAMIC/snakemake_rules/annotation/somatic_sv_annotation.rule index 3f1bfe352..6494884aa 100644 --- a/BALSAMIC/snakemake_rules/annotation/somatic_sv_annotation.rule +++ b/BALSAMIC/snakemake_rules/annotation/somatic_sv_annotation.rule @@ -4,35 +4,33 @@ rule vep_somatic_research_sv: - input: - vcf_research = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.research.vcf.gz", - header = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.sample_name_map", - output: - vcf_research_vep = vep_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.research.vep.vcf.gz", - benchmark: - Path(benchmark_dir, "vep_somatic_research_sv." + config["analysis"]["case_id"] + ".svdb.tsv").as_posix() - singularity: - Path(singularity_image, config["bioinfo_tools"].get("ensembl-vep") + ".sif").as_posix() - params: - message_text = "SV.somatic." + config["analysis"]["case_id"] + ".svdb.research.vcf.gz", - vep_cache = config["reference"]["vep_dir"], - vep_defaults = params.vep.vep_filters - threads: - get_threads(cluster_config, "vep_somatic_research_sv") - message: - "Running vep annotation for structural and copy number variants on {params.message_text}" - shell: + input: + vcf_research = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.research.vcf.gz", + header = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.sample_name_map", + output: + vcf_research_vep = vep_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.research.vep.vcf.gz", + benchmark: + Path(benchmark_dir, "vep_somatic_research_sv." + config["analysis"]["case_id"] + ".svdb.tsv").as_posix() + singularity: + Path(singularity_image, config["bioinfo_tools"].get("ensembl-vep") + ".sif").as_posix() + params: + message_text = "SV.somatic." + config["analysis"]["case_id"] + ".svdb.research.vcf.gz", + vep_cache = config["reference"]["vep_dir"], + vep_defaults = params.vep.vep_filters + message: + "Running vep annotation for structural and copy number variants on {params.message_text}" + shell: """ vep_path=$(dirname $(readlink -f $(which vep))); export PERL5LIB=; -bcftools reheader --threads {threads} -s {input.header} {input.vcf_research} | \ -bcftools view --threads {threads} -O v | \ +bcftools reheader --threads {resources.threads} -s {input.header} {input.vcf_research} | \ +bcftools view --threads {resources.threads} -O v | \ vep \ --dir $vep_path \ --dir_cache {params.vep_cache} \ --dir_plugins $vep_path \ --output_file {output.vcf_research_vep} \ ---fork {threads} \ +--fork {resources.threads} \ {params.vep_defaults} \ tabix -p vcf -f {output.vcf_research_vep}; @@ -40,24 +38,22 @@ tabix -p vcf -f {output.vcf_research_vep}; rule svdb_annotate_somatic_research_sv: - input: - vcf_sv_research_vep = vep_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.research.vep.vcf.gz", - header = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.sample_name_map", - output: - vcf_research = vep_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.research.vcf.gz", - benchmark: - Path(benchmark_dir, "svdb_annotate_somatic_research_sv." + config["analysis"]["case_id"] + ".svdb.tsv").as_posix() - singularity: - Path(singularity_image, config["bioinfo_tools"].get("svdb") + ".sif").as_posix() - params: - swegen_sv_frequency = swegen_sv, - tmp_vcf = temp(vep_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.research.tmp.swegen.vcf"), - message_text = "SV.somatic." + config["analysis"]["case_id"] + ".svdb.research.vep.vcf.gz", - threads: - get_threads(cluster_config, "svdb_annotate_somatic_research_sv") - message: - "Running SVDB for annotating research SVs with Swegen database on {params.message_text}" - shell: + input: + vcf_sv_research_vep = vep_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.research.vep.vcf.gz", + header = vcf_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.sample_name_map", + output: + vcf_research = vep_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.research.vcf.gz", + benchmark: + Path(benchmark_dir, "svdb_annotate_somatic_research_sv." + config["analysis"]["case_id"] + ".svdb.tsv").as_posix() + singularity: + Path(singularity_image, config["bioinfo_tools"].get("svdb") + ".sif").as_posix() + params: + swegen_sv_frequency = swegen_sv, + tmp_vcf = temp(vep_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.research.tmp.swegen.vcf"), + message_text = "SV.somatic." + config["analysis"]["case_id"] + ".svdb.research.vep.vcf.gz", + message: + "Running SVDB for annotating research SVs with Swegen database on {params.message_text}" + shell: """ if [[ -f "{params.swegen_sv_frequency}" ]]; then svdb --query --bnd_distance 5000 --overlap 0.80 \ @@ -69,7 +65,6 @@ else fi tabix -p vcf -f {output.vcf_research}; - """ @@ -86,8 +81,6 @@ rule svdb_annotate_clinical_obs_somatic_clinical_sv: case_name = config["analysis"]["case_id"], clinical_sv_observations = clinical_sv, vcf_clinical_obs = temp(vep_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.clinical_obs.vcf"), - threads: - get_threads(cluster_config, "svdb_annotate_clinical_obs_somatic_clinical_sv") message: "Annotating structural and copy number variants with clinical observations using SVDB for {params.case_name}", shell: @@ -120,8 +113,6 @@ rule svdb_annotate_somatic_obs_somatic_clinical_sv: case_name = config["analysis"]["case_id"], somatic_sv_observations = somatic_sv, vcf_somatic_obs = temp(vep_dir + "SV.somatic." + config["analysis"]["case_id"] + ".svdb.somatic_obs.vcf"), - threads: - get_threads(cluster_config, "svdb_annotate_somatic_obs_somatic_clinical_sv") message: "Annotating structural and copy number variants with clinical observations using SVDB for {params.case_name}", shell: diff --git a/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_normal.rule b/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_normal.rule index c08b9dd42..a85898f80 100644 --- a/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_normal.rule +++ b/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_normal.rule @@ -18,16 +18,14 @@ rule bcftools_filter_merged_research_tumor_normal: swegen_freq = get_tag_and_filtername(snv_research_filters, "SWEGENAF"), hard_filters = TgaSNVFilters.get_bcftools_filter_string(category="research", analysis_type=analysis_type, soft_filter_normals=soft_filter_normal), case_name = '{case_name}' - threads: - get_threads(cluster_config, 'bcftools_filter_tnscope_research_tumor_normal') message: "Filtering TGA tumor-normal merged annotated research variants using bcftools for {params.case_name}" shell: """ -bcftools view --threads {threads} {input.vcf_snv_research} |\ -bcftools filter --threads {threads} --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' |\ -bcftools filter --threads {threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\ -bcftools view --threads {threads} --exclude '{params.hard_filters}' -o {output.vcf_pass_merged} -O z; +bcftools view --threads {resources.threads} {input.vcf_snv_research} |\ +bcftools filter --threads {resources.threads} --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' |\ +bcftools filter --threads {resources.threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\ +bcftools view --threads {resources.threads} --exclude '{params.hard_filters}' -o {output.vcf_pass_merged} -O z; tabix -p vcf -f {output.vcf_pass_merged}; @@ -54,18 +52,16 @@ rule bcftools_filter_merged_clinical_tumor_normal: hard_filters = TgaSNVFilters.get_bcftools_filter_string(category="clinical", analysis_type=analysis_type, soft_filter_normals=soft_filter_normal), housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "clinical"}, case_name = '{case_name}', - threads: - get_threads(cluster_config, 'bcftools_filter_tnscope_clinical_tumor_normal') message: "Filtering TGA tumor-normal merged annotated clinical variants using bcftools for {params.case_name}" shell: """ -bcftools reheader --threads {threads} -s {input.namemap} {input.vcf_snv_clinical} | \ -bcftools filter --threads {threads} --include 'INFO/ArtefactFrq <= {params.artefact_freq[0]} || INFO/ArtefactFrq == \".\"' --soft-filter '{params.artefact_freq[1]}' --mode '+' |\ -bcftools filter --threads {threads} --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' | \ -bcftools filter --threads {threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\ -bcftools filter --threads {threads} --include 'INFO/Frq <= {params.loqusdb_clinical_freq[0]} || INFO/Frq == \".\"' --soft-filter '{params.loqusdb_clinical_freq[1]}' --mode '+' |\ -bcftools view --threads {threads} --exclude '{params.hard_filters}' -O z -o {output.vcf_pass_merged}; +bcftools reheader --threads {resources.threads} -s {input.namemap} {input.vcf_snv_clinical} | \ +bcftools filter --threads {resources.threads} --include 'INFO/ArtefactFrq <= {params.artefact_freq[0]} || INFO/ArtefactFrq == \".\"' --soft-filter '{params.artefact_freq[1]}' --mode '+' |\ +bcftools filter --threads {resources.threads} --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' | \ +bcftools filter --threads {resources.threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\ +bcftools filter --threads {resources.threads} --include 'INFO/Frq <= {params.loqusdb_clinical_freq[0]} || INFO/Frq == \".\"' --soft-filter '{params.loqusdb_clinical_freq[1]}' --mode '+' |\ +bcftools view --threads {resources.threads} --exclude '{params.hard_filters}' -O z -o {output.vcf_pass_merged}; tabix -p vcf -f {output.vcf_pass_merged}; diff --git a/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_normal_umi.rule b/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_normal_umi.rule index 619f089bc..7d90b0aba 100644 --- a/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_normal_umi.rule +++ b/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_normal_umi.rule @@ -20,24 +20,22 @@ rule bcftools_filter_TNscope_umi_research_tumor_normal: case_name = '{case_name}', edit_vcf_script = get_script_path("edit_vcf_info.py"), variant_caller = "tnscope_umi" - threads: - get_threads(cluster_config, 'bcftools_filter_TNscope_umi_research_tumor_normal') message: "Filtering TNscope_umi tumor-normal annotated research variants using bcftools and " "adding FOUND_IN tags to the output VCF file for {params.case_name} " shell: """ -bcftools view --threads {threads} {input.vcf_snv_research} | \ -bcftools filter --threads {threads} --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' | \ -bcftools filter --threads {threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\ -bcftools view --threads {threads} --exclude '{params.hard_filters}' -o {output.vcf_pass_tnscope_umi}.temp1 -O z; +bcftools view --threads {resources.threads} {input.vcf_snv_research} | \ +bcftools filter --threads {resources.threads} --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' | \ +bcftools filter --threads {resources.threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\ +bcftools view --threads {resources.threads} --exclude '{params.hard_filters}' -o {output.vcf_pass_tnscope_umi}.temp1 -O z; python {params.edit_vcf_script} \ --input_vcf {output.vcf_pass_tnscope_umi}.temp1 \ --output_vcf {output.vcf_pass_tnscope_umi}.temp2 \ --variant_caller {params.variant_caller}; -bgzip -@ {threads} -l 9 -c {output.vcf_pass_tnscope_umi}.temp2 > {output.vcf_pass_tnscope_umi} +bgzip -@ {resources.threads} -l 9 -c {output.vcf_pass_tnscope_umi}.temp2 > {output.vcf_pass_tnscope_umi} tabix -p vcf -f {output.vcf_pass_tnscope_umi}; @@ -70,26 +68,24 @@ rule bcftools_filter_TNscope_umi_clinical_tumor_normal: hard_filters = TgaUmiSNVFilters.get_bcftools_filter_string(category="clinical", analysis_type=analysis_type, soft_filter_normals=soft_filter_normal), edit_vcf_script = get_script_path("edit_vcf_info.py"), variant_caller = "tnscope_umi" - threads: - get_threads(cluster_config, 'bcftools_filter_TNscope_umi_clinical_tumor_normal') message: "Filtering TNscope_umi tumor-normal annotated clinical variants using bcftools and " "adding FOUND_IN tags to the output VCF file for {params.case_name} " shell: """ -bcftools reheader --threads {threads} -s {input.namemap} {input.vcf_snv_clinical} |\ -bcftools filter --threads {threads} --include 'INFO/ArtefactFrq <= {params.artefact_freq[0]} || INFO/ArtefactFrq == \".\"' --soft-filter '{params.artefact_freq[1]}' --mode '+' |\ -bcftools filter --threads {threads} --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' | \ -bcftools filter --threads {threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\ -bcftools filter --threads {threads} --include 'INFO/Frq <= {params.loqusdb_clinical_freq[0]} || INFO/Frq == \".\"' --soft-filter '{params.loqusdb_clinical_freq[1]}' --mode '+' |\ -bcftools view --threads {threads} --exclude '{params.hard_filters}' -O z -o {output.vcf_pass_tnscope_umi}.temp1; +bcftools reheader --threads {resources.threads} -s {input.namemap} {input.vcf_snv_clinical} |\ +bcftools filter --threads {resources.threads} --include 'INFO/ArtefactFrq <= {params.artefact_freq[0]} || INFO/ArtefactFrq == \".\"' --soft-filter '{params.artefact_freq[1]}' --mode '+' |\ +bcftools filter --threads {resources.threads} --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' | \ +bcftools filter --threads {resources.threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\ +bcftools filter --threads {resources.threads} --include 'INFO/Frq <= {params.loqusdb_clinical_freq[0]} || INFO/Frq == \".\"' --soft-filter '{params.loqusdb_clinical_freq[1]}' --mode '+' |\ +bcftools view --threads {resources.threads} --exclude '{params.hard_filters}' -O z -o {output.vcf_pass_tnscope_umi}.temp1; python {params.edit_vcf_script} \ --input_vcf {output.vcf_pass_tnscope_umi}.temp1 \ --output_vcf {output.vcf_pass_tnscope_umi}.temp2 \ --variant_caller {params.variant_caller}; -bgzip -@ {threads} -l 9 -c {output.vcf_pass_tnscope_umi}.temp2 > {output.vcf_pass_tnscope_umi} +bgzip -@ {resources.threads} -l 9 -c {output.vcf_pass_tnscope_umi}.temp2 > {output.vcf_pass_tnscope_umi} tabix -p vcf -f {output.vcf_pass_tnscope_umi}; diff --git a/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_normal_wgs.rule b/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_normal_wgs.rule index 6451e2900..48dee40e9 100644 --- a/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_normal_wgs.rule +++ b/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_normal_wgs.rule @@ -18,16 +18,14 @@ rule bcftools_filter_tnscope_research_tumor_normal: swegen_freq = get_tag_and_filtername(snv_research_filters, "SWEGENAF"), hard_filters = WgsSNVFilters.get_bcftools_filter_string(category="research", analysis_type=analysis_type, soft_filter_normals=soft_filter_normal), case_name = '{case_name}' - threads: - get_threads(cluster_config, 'bcftools_filter_tnscope_research_tumor_normal') message: "Filtering WGS tumor-normal tnscope annotated research variants using bcftools for {params.case_name}" shell: """ -bcftools view --threads {threads} {input.vcf_snv_research} |\ -bcftools filter --threads {threads} --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' |\ -bcftools filter --threads {threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\ -bcftools view --threads {threads} --exclude '{params.hard_filters}' -o {output.vcf_pass_tnscope} -O z; +bcftools view --threads {resources.threads} {input.vcf_snv_research} |\ +bcftools filter --threads {resources.threads} --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' |\ +bcftools filter --threads {resources.threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\ +bcftools view --threads {resources.threads} --exclude '{params.hard_filters}' -o {output.vcf_pass_tnscope} -O z; tabix -p vcf -f {output.vcf_pass_tnscope}; @@ -54,18 +52,16 @@ rule bcftools_filter_tnscope_clinical_tumor_normal: hard_filters = WgsSNVFilters.get_bcftools_filter_string(category="clinical", analysis_type=analysis_type, soft_filter_normals=soft_filter_normal), housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "clinical"}, case_name = '{case_name}', - threads: - get_threads(cluster_config, 'bcftools_filter_tnscope_clinical_tumor_normal') message: "Filtering WGS tumor-normal tnscope annotated clinical variants using bcftools for {params.case_name}" shell: """ -bcftools reheader --threads {threads} -s {input.namemap} {input.vcf_snv_clinical} | \ -bcftools filter --threads {threads} --include 'INFO/ArtefactFrq <= {params.artefact_freq[0]} || INFO/ArtefactFrq == \".\"' --soft-filter '{params.artefact_freq[1]}' --mode '+' |\ -bcftools filter --threads {threads} --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' | \ -bcftools filter --threads {threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\ -bcftools filter --threads {threads} --include 'INFO/Frq <= {params.loqusdb_clinical_freq[0]} || INFO/Frq == \".\"' --soft-filter '{params.loqusdb_clinical_freq[1]}' --mode '+' |\ -bcftools view --threads {threads} --exclude '{params.hard_filters}' -O z -o {output.vcf_pass_tnscope}; +bcftools reheader --threads {resources.threads} -s {input.namemap} {input.vcf_snv_clinical} | \ +bcftools filter --threads {resources.threads} --include 'INFO/ArtefactFrq <= {params.artefact_freq[0]} || INFO/ArtefactFrq == \".\"' --soft-filter '{params.artefact_freq[1]}' --mode '+' |\ +bcftools filter --threads {resources.threads} --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' | \ +bcftools filter --threads {resources.threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\ +bcftools filter --threads {resources.threads} --include 'INFO/Frq <= {params.loqusdb_clinical_freq[0]} || INFO/Frq == \".\"' --soft-filter '{params.loqusdb_clinical_freq[1]}' --mode '+' |\ +bcftools view --threads {resources.threads} --exclude '{params.hard_filters}' -O z -o {output.vcf_pass_tnscope}; tabix -p vcf -f {output.vcf_pass_tnscope}; diff --git a/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_only.rule b/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_only.rule index ebd1592de..ddae06fe4 100644 --- a/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_only.rule +++ b/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_only.rule @@ -19,17 +19,15 @@ rule bcftools_filter_merged_research_tumor_only: hard_filters = TgaSNVFilters.get_bcftools_filter_string(category="research", analysis_type=analysis_type, soft_filter_normals=soft_filter_normal), housekeeper_id= {"id": config["analysis"]["case_id"],"tags": "research"}, case_name = '{case_name}' - threads: - get_threads(cluster_config, 'bcftools_filter_tnscope_research_tumor_only') message: "Filtering TGA tumor-only merged annotated research variants using bcftools for {params.case_name}" shell: """ -bcftools view --threads {threads} {input.vcf_snv_research} | \ -bcftools filter --threads {threads} --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' | \ -bcftools filter --threads {threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\ -bcftools view --threads {threads} --exclude '{params.hard_filters}' -o {output.vcf_pass_merged} -O z; +bcftools view --threads {resources.threads} {input.vcf_snv_research} | \ +bcftools filter --threads {resources.threads} --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' | \ +bcftools filter --threads {resources.threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\ +bcftools view --threads {resources.threads} --exclude '{params.hard_filters}' -o {output.vcf_pass_merged} -O z; tabix -p vcf -f {output.vcf_pass_merged}; @@ -55,20 +53,18 @@ rule bcftools_filter_merged_clinical_tumor_only: hard_filters = TgaSNVFilters.get_bcftools_filter_string(category="clinical", analysis_type=analysis_type, soft_filter_normals=soft_filter_normal), housekeeper_id= {"id": config["analysis"]["case_id"],"tags": "clinical"}, case_name = '{case_name}' - threads: - get_threads(cluster_config, 'bcftools_filter_tnscope_clinical_tumor_only') message: "Filtering TGA tumor-only merged annotated clinical variants using bcftools for {params.case_name}" shell: """ bcftools view {input.vcf_snv_clinical} | \ -bcftools reheader --threads {threads} -s {input.namemap} | \ -bcftools filter --threads {threads} --include 'INFO/ArtefactFrq <= {params.artefact_freq[0]} || INFO/ArtefactFrq == \".\"' --soft-filter '{params.artefact_freq[1]}' --mode '+' |\ -bcftools filter --threads {threads} --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' | \ -bcftools filter --threads {threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' | \ -bcftools filter --threads {threads} --include 'INFO/Frq <= {params.loqusdb_clinical_freq[0]} || INFO/Frq == \".\"' --soft-filter '{params.loqusdb_clinical_freq[1]}' --mode '+' | \ -bcftools view --threads {threads} --exclude '{params.hard_filters}' -o {output.vcf_pass_merged} -O z; +bcftools reheader --threads {resources.threads} -s {input.namemap} | \ +bcftools filter --threads {resources.threads} --include 'INFO/ArtefactFrq <= {params.artefact_freq[0]} || INFO/ArtefactFrq == \".\"' --soft-filter '{params.artefact_freq[1]}' --mode '+' |\ +bcftools filter --threads {resources.threads} --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' | \ +bcftools filter --threads {resources.threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' | \ +bcftools filter --threads {resources.threads} --include 'INFO/Frq <= {params.loqusdb_clinical_freq[0]} || INFO/Frq == \".\"' --soft-filter '{params.loqusdb_clinical_freq[1]}' --mode '+' | \ +bcftools view --threads {resources.threads} --exclude '{params.hard_filters}' -o {output.vcf_pass_merged} -O z; tabix -p vcf -f {output.vcf_pass_merged}; diff --git a/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_only_umi.rule b/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_only_umi.rule index b3bb170f4..b712d191d 100644 --- a/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_only_umi.rule +++ b/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_only_umi.rule @@ -20,24 +20,22 @@ rule bcftools_filter_TNscope_umi_research_tumor_only: case_name = '{case_name}', edit_vcf_script = get_script_path("edit_vcf_info.py"), variant_caller = "tnscope_umi" - threads: - get_threads(cluster_config, 'bcftools_filter_tnscope_umi_research_tumor_only') message: "Filtering tnscope_umi tumor-only annotated research variants using bcftools and " "adding FOUND_IN tags to the output VCF for {params.case_name}" shell: """ -bcftools view --threads {threads} {input.vcf_snv_research} | \ +bcftools view --threads {resources.threads} {input.vcf_snv_research} | \ bcftools filter --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' | \ -bcftools filter --threads {threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\ -bcftools view --threads {threads} --exclude '{params.hard_filters}' -o {output.vcf_pass_tnscope_umi}.temp1 -O z; +bcftools filter --threads {resources.threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\ +bcftools view --threads {resources.threads} --exclude '{params.hard_filters}' -o {output.vcf_pass_tnscope_umi}.temp1 -O z; python {params.edit_vcf_script} \ --input_vcf {output.vcf_pass_tnscope_umi}.temp1 \ --output_vcf {output.vcf_pass_tnscope_umi}.temp2 \ --variant_caller {params.variant_caller}; -bgzip -@ {threads} -l 9 -c {output.vcf_pass_tnscope_umi}.temp2 > {output.vcf_pass_tnscope_umi} +bgzip -@ {resources.threads} -l 9 -c {output.vcf_pass_tnscope_umi}.temp2 > {output.vcf_pass_tnscope_umi} tabix -p vcf -f {output.vcf_pass_tnscope_umi}; @@ -70,26 +68,24 @@ rule bcftools_filter_TNscope_umi_clinical_tumor_only: case_name='{case_name}', edit_vcf_script=get_script_path("edit_vcf_info.py"), variant_caller="tnscope_umi" - threads: - get_threads(cluster_config,'bcftools_filter_tnscope_umi_tumor_only') message: "Filtering tnscope_umi tumor-only annotated clinical variants using bcftools and " "adding FOUND_IN tags to the output VCF for {params.case_name}" shell: """ -bcftools reheader --threads {threads} -s {input.namemap} {input.vcf_snv_clinical} |\ -bcftools filter --threads {threads} --include 'INFO/ArtefactFrq <= {params.artefact_freq[0]} || INFO/ArtefactFrq == \".\"' --soft-filter '{params.artefact_freq[1]}' --mode '+' |\ -bcftools filter --threads {threads} --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' | \ -bcftools filter --threads {threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\ -bcftools filter --threads {threads} --include 'INFO/Frq <= {params.loqusdb_clinical_freq[0]} || INFO/Frq == \".\"' --soft-filter '{params.loqusdb_clinical_freq[1]}' --mode '+' |\ -bcftools view --threads {threads} --exclude '{params.hard_filters}' -O z -o {output.vcf_pass_tnscope_umi}.temp1; +bcftools reheader --threads {resources.threads} -s {input.namemap} {input.vcf_snv_clinical} |\ +bcftools filter --threads {resources.threads} --include 'INFO/ArtefactFrq <= {params.artefact_freq[0]} || INFO/ArtefactFrq == \".\"' --soft-filter '{params.artefact_freq[1]}' --mode '+' |\ +bcftools filter --threads {resources.threads} --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' | \ +bcftools filter --threads {resources.threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\ +bcftools filter --threads {resources.threads} --include 'INFO/Frq <= {params.loqusdb_clinical_freq[0]} || INFO/Frq == \".\"' --soft-filter '{params.loqusdb_clinical_freq[1]}' --mode '+' |\ +bcftools view --threads {resources.threads} --exclude '{params.hard_filters}' -O z -o {output.vcf_pass_tnscope_umi}.temp1; python {params.edit_vcf_script} \ --input_vcf {output.vcf_pass_tnscope_umi}.temp1 \ --output_vcf {output.vcf_pass_tnscope_umi}.temp2 \ --variant_caller {params.variant_caller}; -bgzip -@ {threads} -l 9 -c {output.vcf_pass_tnscope_umi}.temp2 > {output.vcf_pass_tnscope_umi} +bgzip -@ {resources.threads} -l 9 -c {output.vcf_pass_tnscope_umi}.temp2 > {output.vcf_pass_tnscope_umi} tabix -p vcf -f {output.vcf_pass_tnscope_umi}; diff --git a/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_only_wgs.rule b/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_only_wgs.rule index ab913e5b2..1eaa771a9 100644 --- a/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_only_wgs.rule +++ b/BALSAMIC/snakemake_rules/annotation/varcaller_filter_tumor_only_wgs.rule @@ -19,18 +19,16 @@ rule bcftools_filter_tnscope_research_tumor_only: swegen_freq = get_tag_and_filtername(snv_research_filters, "SWEGENAF"), hard_filters = WgsSNVFilters.get_bcftools_filter_string(category="research", analysis_type=analysis_type, soft_filter_normals=soft_filter_normal), case_name = '{case_name}' - threads: - get_threads(cluster_config, 'bcftools_filter_tnscope_research_tumor_only') message: "Filtering WGS tumor-only tnscope annotated research variants using bcftools for {params.case_name}" shell: """ grep -v '^@' {input.wgs_calling_file} > {input.wgs_calling_file}.bed -bcftools view --threads {threads} --regions-file {input.wgs_calling_file}.bed {input.vcf_snv_research} | \ -bcftools filter --threads {threads} --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' | \ -bcftools filter --threads {threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\ -bcftools view --threads {threads} --exclude '{params.hard_filters}' -o {output.vcf_pass_tnscope} -O z; +bcftools view --threads {resources.threads} --regions-file {input.wgs_calling_file}.bed {input.vcf_snv_research} | \ +bcftools filter --threads {resources.threads} --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' | \ +bcftools filter --threads {resources.threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\ +bcftools view --threads {resources.threads} --exclude '{params.hard_filters}' -o {output.vcf_pass_tnscope} -O z; tabix -p vcf -f {output.vcf_pass_tnscope}; @@ -57,8 +55,6 @@ rule bcftools_filter_tnscope_clinical_tumor_only: hard_filters = WgsSNVFilters.get_bcftools_filter_string(category="clinical", analysis_type=analysis_type, soft_filter_normals=soft_filter_normal), housekeeper_id= {"id": config["analysis"]["case_id"],"tags": "clinical"}, case_name = '{case_name}' - threads: - get_threads(cluster_config, 'bcftools_filter_tnscope_clinical_tumor_only') message: "Filtering WGS tumor-only tnscope annotated clinical variants using bcftools for {params.case_name}" shell: @@ -66,12 +62,12 @@ rule bcftools_filter_tnscope_clinical_tumor_only: grep -v '^@' {input.wgs_calling_file} > {input.wgs_calling_file}.bed; bcftools view --regions-file {input.wgs_calling_file}.bed {input.vcf_snv_clinical} | \ -bcftools reheader --threads {threads} -s {input.namemap} | \ -bcftools filter --threads {threads} --include 'INFO/ArtefactFrq <= {params.artefact_freq[0]} || INFO/ArtefactFrq == \".\"' --soft-filter '{params.artefact_freq[1]}' --mode '+' |\ -bcftools filter --threads {threads} --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' | \ -bcftools filter --threads {threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' | \ -bcftools filter --threads {threads} --include 'INFO/Frq <= {params.loqusdb_clinical_freq[0]} || INFO/Frq == \".\"' --soft-filter '{params.loqusdb_clinical_freq[1]}' --mode '+' | \ -bcftools view --threads {threads} --exclude '{params.hard_filters}' -o {output.vcf_pass_tnscope} -O z; +bcftools reheader --threads {resources.threads} -s {input.namemap} | \ +bcftools filter --threads {resources.threads} --include 'INFO/ArtefactFrq <= {params.artefact_freq[0]} || INFO/ArtefactFrq == \".\"' --soft-filter '{params.artefact_freq[1]}' --mode '+' |\ +bcftools filter --threads {resources.threads} --include 'INFO/GNOMADAF_popmax <= {params.pop_freq[0]} || INFO/GNOMADAF_popmax == \".\"' --soft-filter '{params.pop_freq[1]}' --mode '+' | \ +bcftools filter --threads {resources.threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' | \ +bcftools filter --threads {resources.threads} --include 'INFO/Frq <= {params.loqusdb_clinical_freq[0]} || INFO/Frq == \".\"' --soft-filter '{params.loqusdb_clinical_freq[1]}' --mode '+' | \ +bcftools view --threads {resources.threads} --exclude '{params.hard_filters}' -o {output.vcf_pass_tnscope} -O z; tabix -p vcf -f {output.vcf_pass_tnscope}; diff --git a/BALSAMIC/snakemake_rules/annotation/varcaller_sv_filter.rule b/BALSAMIC/snakemake_rules/annotation/varcaller_sv_filter.rule index 6215b5c16..52b6a92a8 100644 --- a/BALSAMIC/snakemake_rules/annotation/varcaller_sv_filter.rule +++ b/BALSAMIC/snakemake_rules/annotation/varcaller_sv_filter.rule @@ -17,14 +17,12 @@ rule bcftools_filter_sv_research: case_name = "{case_name}", swegen_freq = [SVDB_FILTERS.swegen_sv_freq.tag_value, SVDB_FILTERS.swegen_sv_freq.filter_name], housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "research"}, - threads: - get_threads(cluster_config, "bcftools_filter_svdb_research") message: "Filtering merged research structural and copy number variants using bcftools for {params.case_name}" shell: """ -bcftools filter --threads {threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' {input.vcf_sv_research} |\ -bcftools view --threads {threads} -f .,PASS -O z -o {output.vcf_pass_svdb}; +bcftools filter --threads {resources.threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' {input.vcf_sv_research} |\ +bcftools view --threads {resources.threads} -f .,PASS -O z -o {output.vcf_pass_svdb}; tabix -p vcf -f {output.vcf_pass_svdb}; @@ -48,16 +46,14 @@ rule bcftools_filter_sv_clinical: swegen_freq = [SVDB_FILTERS.swegen_sv_freq.tag_value, SVDB_FILTERS.swegen_sv_freq.filter_name], loqusdb_clinical_freq = [SVDB_FILTERS.loqusdb_clinical_sv_freq.tag_value, SVDB_FILTERS.loqusdb_clinical_sv_freq.filter_name], housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "clinical"}, - threads: - get_threads(cluster_config, "bcftools_filter_svdb_clinical") message: "Filtering merged clinical structural and copy number variants using bcftools for {params.case_name}" shell: """ -bcftools reheader --threads {threads} -s {input.namemap} {input.vcf_sv_clinical} |\ -bcftools filter --threads {threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\ -bcftools filter --threads {threads} --include 'INFO/Frq <= {params.loqusdb_clinical_freq[0]} || INFO/Frq == \".\"' --soft-filter '{params.loqusdb_clinical_freq[1]}' --mode '+' |\ -bcftools view --threads {threads} -f .,PASS -O z -o {output.vcf_pass_svdb}; +bcftools reheader --threads {resources.threads} -s {input.namemap} {input.vcf_sv_clinical} |\ +bcftools filter --threads {resources.threads} --include 'INFO/SWEGENAF <= {params.swegen_freq[0]} || INFO/SWEGENAF == \".\"' --soft-filter '{params.swegen_freq[1]}' --mode '+' |\ +bcftools filter --threads {resources.threads} --include 'INFO/Frq <= {params.loqusdb_clinical_freq[0]} || INFO/Frq == \".\"' --soft-filter '{params.loqusdb_clinical_freq[1]}' --mode '+' |\ +bcftools view --threads {resources.threads} -f .,PASS -O z -o {output.vcf_pass_svdb}; tabix -p vcf -f {output.vcf_pass_svdb}; diff --git a/BALSAMIC/snakemake_rules/annotation/vcf2cytosure_convert.rule b/BALSAMIC/snakemake_rules/annotation/vcf2cytosure_convert.rule index f1bc991ed..79ea68fe0 100644 --- a/BALSAMIC/snakemake_rules/annotation/vcf2cytosure_convert.rule +++ b/BALSAMIC/snakemake_rules/annotation/vcf2cytosure_convert.rule @@ -13,8 +13,6 @@ if config["analysis"]["sequencing_type"] != 'wgs': Path(benchmark_dir, 'vcf2cytosure_convert_' + config["analysis"]["case_id"] + ".tsv").as_posix() singularity: Path(singularity_image, config["bioinfo_tools"].get("vcf2cytosure") + ".sif").as_posix() - threads: - get_threads(cluster_config, "vcf2cytosure_convert") params: case_name = config["analysis"]["case_id"], gender = config["analysis"]["gender"], @@ -36,8 +34,6 @@ elif config["analysis"]["sequencing_type"] == 'wgs' and config["analysis"]["anal Path(benchmark_dir, 'vcf2cytosure_convert_tumor_only_' + config["analysis"]["case_id"] + ".tsv").as_posix() singularity: Path(singularity_image, config["bioinfo_tools"].get("vcf2cytosure") + ".sif").as_posix() - threads: - get_threads(cluster_config, "vcf2cytosure_convert") params: case_name = config["analysis"]["case_id"], gender = config["analysis"]["gender"], @@ -62,8 +58,6 @@ elif config["analysis"]["sequencing_type"] == "wgs" and config["analysis"]["anal Path(benchmark_dir, 'vcf2cytosure_convert_tumor_normal_' + config["analysis"]["case_id"] + ".tsv").as_posix() singularity: Path(singularity_image, config["bioinfo_tools"].get("vcf2cytosure") + ".sif").as_posix() - threads: - get_threads(cluster_config, "vcf2cytosure_convert") params: case_name = config["analysis"]["case_id"], gender= config["analysis"]["gender"], diff --git a/BALSAMIC/snakemake_rules/annotation/vcfheader_rename.rule b/BALSAMIC/snakemake_rules/annotation/vcfheader_rename.rule index 49f38d19d..4bc985214 100644 --- a/BALSAMIC/snakemake_rules/annotation/vcfheader_rename.rule +++ b/BALSAMIC/snakemake_rules/annotation/vcfheader_rename.rule @@ -16,15 +16,13 @@ rule vcfheader_rename_germline: housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "genotype"}, sample_name = "NORMAL", sample_id = config_model.get_sample_name_by_type(SampleType.NORMAL) - threads: - get_threads(cluster_config,'vcfheader_rename_germline') message: "Renaming header in DNAscope germline VCF output from {params.sample_name} to {params.sample_id}" shell: """ echo -e \"{params.sample_name}\\t{params.sample_id}\" > {output.namemap}; -bcftools reheader --threads {threads} \ +bcftools reheader --threads {resources.threads} \ -s {output.namemap} {input.vcf} \ -o {output.vcf_dnascope}; diff --git a/BALSAMIC/snakemake_rules/cache/cadd.rule b/BALSAMIC/snakemake_rules/cache/cadd.rule index 3b3404a17..45a310ff2 100644 --- a/BALSAMIC/snakemake_rules/cache/cadd.rule +++ b/BALSAMIC/snakemake_rules/cache/cadd.rule @@ -10,7 +10,6 @@ rule index_cadd: cadd_snv_tbi=f"{cache_config.references.cadd_snv.file_path}.{FileType.TBI}" singularity: f"{config['containers_dir']}/{config['bioinfo_tools']['tabix']}.{FileType.SIF}" - threads: get_threads(cluster_config=cluster_config, rule_name="index_cadd") message: "Indexing CADD file {input.cadd_snv}" benchmark: diff --git a/BALSAMIC/snakemake_rules/cache/delly.rule b/BALSAMIC/snakemake_rules/cache/delly.rule index da16fffc4..1133a7db6 100644 --- a/BALSAMIC/snakemake_rules/cache/delly.rule +++ b/BALSAMIC/snakemake_rules/cache/delly.rule @@ -7,7 +7,6 @@ rule convert_delly_exclusion_file: exclusion_file=cache_config.references.delly_exclusion.file_path, output: exclusion_converted_file=cache_config.references.get_delly_exclusion_converted_file_path(), - threads: get_threads(cluster_config=cluster_config, rule_name="convert_delly_exclusion_file") message: "Converting delly exclusion file {input.exclusion_file}" benchmark: diff --git a/BALSAMIC/snakemake_rules/cache/reference_download.rule b/BALSAMIC/snakemake_rules/cache/reference_download.rule index 043c39de9..9a75d01d8 100644 --- a/BALSAMIC/snakemake_rules/cache/reference_download.rule +++ b/BALSAMIC/snakemake_rules/cache/reference_download.rule @@ -11,7 +11,6 @@ rule download_references: reference=lambda wildcards: cache_config.get_reference_by_path( Path(config["references_dir"], wildcards.reference_path).as_posix() ), - threads: get_threads(cluster_config=cluster_config, rule_name="download_references") message: "Downloading reference file {output.reference_path}" benchmark: diff --git a/BALSAMIC/snakemake_rules/cache/reference_genome_index.rule b/BALSAMIC/snakemake_rules/cache/reference_genome_index.rule index 27ae9caef..6077f2656 100644 --- a/BALSAMIC/snakemake_rules/cache/reference_genome_index.rule +++ b/BALSAMIC/snakemake_rules/cache/reference_genome_index.rule @@ -12,7 +12,6 @@ rule picard_dict_reference_genome: ), singularity: f"{config['containers_dir']}/{config['bioinfo_tools']['picard']}.{FileType.SIF}" - threads: get_threads(cluster_config=cluster_config, rule_name="picard_dict_reference_genome") message: "Creating a sequence dictionary for a reference file {input.reference_genome}" benchmark: @@ -35,7 +34,7 @@ rule fasta_index_reference_genome: indexed_reference_genome=f"{cache_config.references.reference_genome.file_path}.{FileType.FAI}", singularity: f"{config['containers_dir']}/{config['bioinfo_tools']['samtools']}.{FileType.SIF}" - threads: get_threads(cluster_config=cluster_config, rule_name="fasta_index_reference_genome") + message: "FASTA format indexing of the reference genome file {input.reference_genome}" benchmark: @@ -57,7 +56,6 @@ rule bwa_index_reference_genome: indexed_reference_genome=cache_config.references.get_reference_genome_bwa_index_file_paths(), singularity: f"{config['containers_dir']}/{config['bioinfo_tools']['bwa']}.{FileType.SIF}" - threads: get_threads(cluster_config=cluster_config, rule_name="bwa_index_reference_genome") message: "BWA indexing of the reference genome file {input.reference_genome}" benchmark: diff --git a/BALSAMIC/snakemake_rules/cache/reference_vcf.rule b/BALSAMIC/snakemake_rules/cache/reference_vcf.rule index 3ac6083b0..ede374213 100644 --- a/BALSAMIC/snakemake_rules/cache/reference_vcf.rule +++ b/BALSAMIC/snakemake_rules/cache/reference_vcf.rule @@ -14,7 +14,6 @@ rule compress_vcfs: vcf_gz=f"{{vcf}}.{FileType.GZ}", singularity: f"{config['containers_dir']}/{config['bioinfo_tools']['bgzip']}.{FileType.SIF}" - threads: get_threads(cluster_config=cluster_config, rule_name="compress_vcfs") message: "Compressing VCF variant file {input.vcf}" benchmark: @@ -36,7 +35,6 @@ rule index_vcfs: vcf_gz_tbi=f"{{vcf}}.{FileType.GZ}.{FileType.TBI}", singularity: f"{config['containers_dir']}/{config['bioinfo_tools']['tabix']}.{FileType.SIF}" - threads: get_threads(cluster_config=cluster_config, rule_name="index_vcfs") message: "Indexing VCF variant file {input.vcf_gz}" benchmark: diff --git a/BALSAMIC/snakemake_rules/cache/refseq.rule b/BALSAMIC/snakemake_rules/cache/refseq.rule index d1c623146..ada314fa4 100644 --- a/BALSAMIC/snakemake_rules/cache/refseq.rule +++ b/BALSAMIC/snakemake_rules/cache/refseq.rule @@ -15,7 +15,6 @@ rule preprocess_refseq: refseq_script_path=REFSEQ_SCRIPT_PATH.as_posix(), singularity: f"{config['containers_dir']}/{config['bioinfo_tools']['bedtools']}.{FileType.SIF}" - threads: get_threads(cluster_config=cluster_config, rule_name="preprocess_refseq") message: "Preprocessing RefSeq's gene files {input.refgene_sql} and {input.refgene_txt}" benchmark: diff --git a/BALSAMIC/snakemake_rules/cache/refseq_canfam.rule b/BALSAMIC/snakemake_rules/cache/refseq_canfam.rule deleted file mode 100644 index 2d97fb93a..000000000 --- a/BALSAMIC/snakemake_rules/cache/refseq_canfam.rule +++ /dev/null @@ -1,43 +0,0 @@ -"""Rules to process canine RefSeq's gene files.""" - - -rule preprocess_refseq_canfam: - """Preprocess RefSeq's gene files.""" - input: - singularity_image=f"{config['containers_dir']}/{config['bioinfo_tools']['bedtools']}.{FileType.SIF}", - refgene_sql=cache_config.references.refgene_sql.file_path, - refgene_txt=cache_config.references.refgene_txt.file_path, - output: - refgene_bed=cache_config.references.get_refgene_bed_file_path(), - refgene_flat=cache_config.references.get_refgene_flat_file_path(), - params: - refseq_script_path=REFSEQ_SCRIPT_PATH.as_posix(), - singularity: - f"{config['containers_dir']}/{config['bioinfo_tools']['bedtools']}.{FileType.SIF}" - threads: get_threads(cluster_config=cluster_config, rule_name="preprocess_refseq_canfam") - message: - "Preprocessing RefSeq's gene files {input.refgene_sql} and {input.refgene_txt}" - benchmark: - f"{cache_config.references.get_refgene_bed_file_path()}.benchmark.{FileType.TSV}" - log: - refgene_bed=f"{cache_config.references.get_refgene_bed_file_path()}.{FileType.LOG}", - refgene_flat=f"{cache_config.references.get_refgene_flat_file_path()}.{FileType.LOG}", - shell: - """ - # Generate RefSeq's BED file - ( - header=$(awk -f "{params.refseq_script_path}" "{input.refgene_sql}") - (echo \"$header\"; cat "{input.refgene_txt}") |\ - csvcut -t -c chrom,exonStarts,exonEnds,name,score,strand,exonCount,txStart,txEnd,name2 |\ - csvformat -T |\ - bedtools expand -c 2,3 |\ - awk '$1~/chr[1-9]/ && $1!~/[_]/' |\ - sort -k1,1 -k2,2n > "{output.refgene_bed}" - ) &> "{log.refgene_bed}" - - # Generate RefSeq's flat file - ( - awk -v OFS=\"\\t\" '$3!~/_/ {{ gsub(\"chr\",\"chr\",$3); $1=$13; print }}' "{input.refgene_txt}" |\ - cut -f 1-11 > "{output.refgene_flat}" - ) &> "{log.refgene_flat}" - """ diff --git a/BALSAMIC/snakemake_rules/cache/singularity_containers.rule b/BALSAMIC/snakemake_rules/cache/singularity_containers.rule index 5333c6f60..336c65ae9 100644 --- a/BALSAMIC/snakemake_rules/cache/singularity_containers.rule +++ b/BALSAMIC/snakemake_rules/cache/singularity_containers.rule @@ -13,7 +13,6 @@ rule download_containers: dockerhub_image=lambda wildcards: config["containers"][ wildcards.singularity_image ], - threads: get_threads(cluster_config=cluster_config, rule_name="download_containers") message: "Downloading singularity image {output.container_path}" benchmark: diff --git a/BALSAMIC/snakemake_rules/cache/vep.rule b/BALSAMIC/snakemake_rules/cache/vep.rule index ac06283a1..56fa16caf 100644 --- a/BALSAMIC/snakemake_rules/cache/vep.rule +++ b/BALSAMIC/snakemake_rules/cache/vep.rule @@ -13,7 +13,6 @@ rule download_vep: plugins=VEP_PLUGINS, singularity: f"{config['containers_dir']}/{config['bioinfo_tools']['ensembl-vep']}.{FileType.SIF}" - threads: get_threads(cluster_config=cluster_config, rule_name="download_vep") message: "Downloading and installing VEP package in {output.vep_dir}" benchmark: diff --git a/BALSAMIC/snakemake_rules/concatenation/concatenation.rule b/BALSAMIC/snakemake_rules/concatenation/concatenation.rule index 20a38a0ab..f98682f2d 100644 --- a/BALSAMIC/snakemake_rules/concatenation/concatenation.rule +++ b/BALSAMIC/snakemake_rules/concatenation/concatenation.rule @@ -3,7 +3,6 @@ rule concatenate_fastqs: input: - wake_up = result_dir + "start_analysis", fastqs_fwd=lambda wildcards: config_model.get_all_fastqs_for_sample( sample_name=wildcards.sample, fastq_types=[FastqName.FWD] ), @@ -18,8 +17,6 @@ rule concatenate_fastqs: params: fastq_dir=config["analysis"]["fastq_path"], sample="{sample}", - threads: - get_threads(cluster_config, "concatenate") message: "Sample {params.sample} FASTQ concatenation per read-direction, before UMI extraction" shell: diff --git a/BALSAMIC/snakemake_rules/dragen_suite/dragen_dna.rule b/BALSAMIC/snakemake_rules/dragen_suite/dragen_dna.rule index d02027ff6..80373162d 100644 --- a/BALSAMIC/snakemake_rules/dragen_suite/dragen_dna.rule +++ b/BALSAMIC/snakemake_rules/dragen_suite/dragen_dna.rule @@ -6,8 +6,8 @@ rule dragen_align_call_tumor_only: input: reference = config["reference"]["reference_genome"], - fastq_r1 = Path(fastq_dir, "{sample}_concat_R_1.fp.fastq.gz".format(sample=tumor_sample)).as_posix(), - fastq_r2 = Path(fastq_dir, "{sample}_concat_R_2.fp.fastq.gz".format(sample=tumor_sample)).as_posix() + fastq_r1 = fastq_dir + "concat.tumor.{sample}_1.fastq.gz", + fastq_r2 = fastq_dir + "concat.tumor.{sample}_2.fastq.gz" output: bam = Path(result_dir, "dragen", "SNV.somatic." + config["analysis"]["case_id"] + ".dragen_tumor.bam").as_posix(), vcf = Path(result_dir, "dragen", "SNV.somatic." + config["analysis"]["case_id"] + ".dragen.vcf.gz").as_posix() @@ -18,8 +18,6 @@ rule dragen_align_call_tumor_only: tmp_reference_dir = Path(result_dir, "dragen", "reference").as_posix(), sample_name = tumor_sample, output_file_prefix = "SNV.somatic." + config["analysis"]["case_id"] + ".dragen" - threads: - get_threads(cluster_config, "dragen_align_call_tumor_only") message: "DRAGEN align and variant call for {params.sample_name}" shell: diff --git a/BALSAMIC/snakemake_rules/misc/sleep.rule b/BALSAMIC/snakemake_rules/misc/sleep.rule deleted file mode 100644 index ddd79784e..000000000 --- a/BALSAMIC/snakemake_rules/misc/sleep.rule +++ /dev/null @@ -1,15 +0,0 @@ - -rule sleep_before_start: - """Wait the specified number of seconds before starting any processing to avoid key_error issue.""" - output: - wake_up = result_dir + "start_analysis" - params: - sleep_seconds = SLEEP_BEFORE_START - threads: get_threads(cluster_config, "sleep_before_start") - message: - "Sleeping for {params.sleep_seconds} seconds before starting analysis." - shell: - """ -sleep {params.sleep_seconds} -echo "Waited: {params.sleep_seconds} seconds. Now starting analysis." >> {output.wake_up} - """ diff --git a/BALSAMIC/snakemake_rules/pon/gens_create_pon.rule b/BALSAMIC/snakemake_rules/pon/gens_create_pon.rule index 5800216b9..95f24824d 100644 --- a/BALSAMIC/snakemake_rules/pon/gens_create_pon.rule +++ b/BALSAMIC/snakemake_rules/pon/gens_create_pon.rule @@ -13,8 +13,6 @@ rule gatk_create_readcount_pon: Path(benchmark_dir, "gatk_create_readcount_pon.{gender}.{version}.tsv").as_posix() singularity: Path(singularity_image,config["bioinfo_tools"].get("gatk") + ".sif").as_posix() - threads: - get_threads(cluster_config, "gatk_create_readcount_pon") message: "Running GATK CreateReadCountPanelOfNormals for {params.gender} PON for GENS." shell: diff --git a/BALSAMIC/snakemake_rules/quality_control/GATK.rule b/BALSAMIC/snakemake_rules/quality_control/GATK.rule deleted file mode 100644 index 5a11e6151..000000000 --- a/BALSAMIC/snakemake_rules/quality_control/GATK.rule +++ /dev/null @@ -1,40 +0,0 @@ -# vim: syntax=python tabstop=4 expandtab -# coding: utf-8 - - - -rule PreparePopVCF: - input: - bam = config_model.get_final_bam_name(bam_dir = bam_dir, sample_name = tumor_sample), - ref1kg = config["reference"]["vcf_1kg"], - output: - popvcf = result_dir + "popvcf.vcf" - benchmark: - Path(benchmark_dir, "PreparePopVCF_" + "tumor.tsv").as_posix() - singularity: - Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() - params: - anno_str1 = "FORMAT/GT,FORMAT/GL,FORMAT/DS,^INFO/AC,^INFO/AF,^INFO/AN,^INFO/", - popcode = "EUR" - message: - "Generate intermediate pop vcf file for gatk analysis" - shell: - """ -readlink -f {input.bam}; - -bcftools annotate \ --x {params.anno_str1}{params.popcode}_AF \ -{input.ref1kg} \ -| bcftools annotate \ --i INFO/{params.popcode}_AF!=0.0 \ -| awk -v OFS=\"\\t\" '$1~/^#/ {{ print; }} $1!~/^#/ {{ split($8,INFO,\";\"); newINFO=\"\";" - -for (i in INFO) {{ \ -if (INFO[i]~\"{params.popcode}_AF\") {{ \ -split(INFO[i],AF,\"=\"); P=substr(AF[1], 1, length(AF[1])-3); \ -INFO[i]=P\"={{\"$4\"*=\"AF[2]\",\"$5\"=\"1-AF[2]\"}}\"; \ -INFO[i]=INFO[i]\";set=\"P; }} \ -newINFO=INFO[i] \";\" newINFO; }} \ -$8=sustr(newINFO, 1, length(newINFO)-1); print; }}' \ - > {output.popvcf}; - """ diff --git a/BALSAMIC/snakemake_rules/quality_control/d4tools.rule b/BALSAMIC/snakemake_rules/quality_control/d4tools.rule new file mode 100644 index 000000000..6268146ce --- /dev/null +++ b/BALSAMIC/snakemake_rules/quality_control/d4tools.rule @@ -0,0 +1,19 @@ + +rule create_d4file: + input: + bam = lambda wildcards: config_model.get_final_bam_name(bam_dir = bam_dir, sample_name = wildcards.sample), + output: + d4file = Path(qc_dir,"{sample_type}.{sample}.d4").as_posix(), + benchmark: + Path(benchmark_dir, "create_d4file.{sample_type}.{sample}.tsv").as_posix() + singularity: + Path(singularity_image, config["bioinfo_tools"].get("d4tools") + ".sif").as_posix() + params: + housekeeper_id = {"id": "{sample}", "tags": "qc-metrics"}, + sample = "{sample}" + message: + "Creating d4 coverage file for sample {params.sample}" + shell: + """ + d4tools create -Az {input.bam} {output.d4file} + """ diff --git a/BALSAMIC/snakemake_rules/quality_control/fastp_tga.rule b/BALSAMIC/snakemake_rules/quality_control/fastp_tga.rule index 6d187a847..1757df7dd 100644 --- a/BALSAMIC/snakemake_rules/quality_control/fastp_tga.rule +++ b/BALSAMIC/snakemake_rules/quality_control/fastp_tga.rule @@ -18,8 +18,6 @@ rule fastp_adapter_trim_tga: tmpdir = tempfile.mkdtemp(prefix=tmp_dir), adapter_trim = " ".join(fastp_parameters["fastp_trim_adapter"]), sample = "{sample}" - threads: - get_threads(cluster_config, 'fastp_quality_trim') message: "Adapter trimming for fastqs for sample: {params.sample}" shell: @@ -27,7 +25,7 @@ rule fastp_adapter_trim_tga: export TMPDIR={params.tmpdir}; fastp \ ---thread {threads} \ +--thread {resources.threads} \ --in1 {input.fastq_r1} \ --in2 {input.fastq_r2} \ --out1 {output.fastq_r1} \ @@ -57,8 +55,6 @@ rule sentieon_umiextract_tga: sentieon_lic = config_model.sentieon.sentieon_license, ds_params = params.umiextract.read_structure, sample = "{sample}" - threads: - get_threads(cluster_config, "sentieon_umiextract") message: "Extracing UMI tags using sentieon for {params.sample}" shell: @@ -93,8 +89,6 @@ rule fastp_quality_trim_tga: tmpdir = tempfile.mkdtemp(prefix=tmp_dir), quality_trim = " ".join(fastp_parameters["fastp_trim_qual"]), sample = "{sample}" - threads: - get_threads(cluster_config, 'fastp_quality_trim') message: "Quality for fastqs for sample: {params.sample}" shell: @@ -102,7 +96,7 @@ rule fastp_quality_trim_tga: export TMPDIR={params.tmpdir}; fastp \ ---thread {threads} \ +--thread {resources.threads} \ --in1 {input.interleaved_fastq} \ --interleaved_in \ --out1 {output.fastq_r1} \ diff --git a/BALSAMIC/snakemake_rules/quality_control/fastp_wgs.rule b/BALSAMIC/snakemake_rules/quality_control/fastp_wgs.rule index 278b38974..25619911b 100644 --- a/BALSAMIC/snakemake_rules/quality_control/fastp_wgs.rule +++ b/BALSAMIC/snakemake_rules/quality_control/fastp_wgs.rule @@ -4,7 +4,6 @@ rule fastp_quality_and_adapter_trim_wgs: """Fastq data pre-processing for WGS.""" input: - wake_up = result_dir + "start_analysis", fastq_r1 = lambda wildcards: config_model.get_fastq_by_fastq_pattern(wildcards.fastq_pattern, FastqName.FWD), fastq_r2 = lambda wildcards: config_model.get_fastq_by_fastq_pattern(wildcards.fastq_pattern, FastqName.REV) output: @@ -21,8 +20,6 @@ rule fastp_quality_and_adapter_trim_wgs: quality_trim = " ".join(fastp_parameters["fastp_trim_qual"]), adapter_trim = " ".join(fastp_parameters["fastp_trim_adapter"]), fastq_pattern = "{fastq_pattern}" - threads: - get_threads(cluster_config, 'fastp') message: "Quality control and trimming of UMI-removed fastqs for fastq pattern: {params.fastq_pattern}" shell: @@ -30,7 +27,7 @@ rule fastp_quality_and_adapter_trim_wgs: export TMPDIR={params.tmpdir}; fastp \ ---thread {threads} \ +--thread {resources.threads} \ --in1 {input.fastq_r1} \ --in2 {input.fastq_r2} \ --out1 {output.fastq_r1} \ diff --git a/BALSAMIC/snakemake_rules/quality_control/fastqc.rule b/BALSAMIC/snakemake_rules/quality_control/fastqc.rule index 5b2ff77e7..90a4a8032 100644 --- a/BALSAMIC/snakemake_rules/quality_control/fastqc.rule +++ b/BALSAMIC/snakemake_rules/quality_control/fastqc.rule @@ -4,7 +4,6 @@ rule fastqc: """Perform quality control checks on raw sequence data.""" input: - wake_up = result_dir + "start_analysis", fastq = input_fastq_dir + "{fastq_file_names}.fastq.gz" output: fastqc_zip = fastqc_dir + "{fastq_file_names}_fastqc.zip" @@ -16,14 +15,13 @@ rule fastqc: fastqc_dir = fastqc_dir, fastq_file_name = "{fastq_file_names}", tmpdir = tempfile.mkdtemp(prefix=tmp_dir) - threads: get_threads(cluster_config, "fastqc") message: "Running FastQC on {params.fastq_file_name}" shell: """ export TMPDIR={params.tmpdir}; -fastqc --threads {threads} {input.fastq} \ +fastqc --threads {resources.threads} {input.fastq} \ --dir {params.tmpdir} \ --outdir {params.fastqc_dir}; diff --git a/BALSAMIC/snakemake_rules/quality_control/mosdepth.rule b/BALSAMIC/snakemake_rules/quality_control/mosdepth.rule index 96d9f3afa..7bcfd26c4 100644 --- a/BALSAMIC/snakemake_rules/quality_control/mosdepth.rule +++ b/BALSAMIC/snakemake_rules/quality_control/mosdepth.rule @@ -24,8 +24,6 @@ rule mosdepth_coverage: quantize = params.mosdepth.quantize, sample_name = '{sample}', output_dir = qc_dir, - threads: - get_threads(cluster_config, "mosdepth_coverage") message: "Calculate coverage using mosdepth for sample {params.sample_name}" shell: @@ -41,7 +39,7 @@ mosdepth \ --mapq {params.mapq} \ --flag {params.samflag} \ --quantize {params.quantize} \ ---threads {threads} \ +--threads {resources.threads} \ {params.output_dir}{wildcards.sample_type}.{wildcards.sample} \ {input.bam}; """ diff --git a/BALSAMIC/snakemake_rules/quality_control/picard.rule b/BALSAMIC/snakemake_rules/quality_control/picard.rule index b290c31f5..ae0e06c43 100644 --- a/BALSAMIC/snakemake_rules/quality_control/picard.rule +++ b/BALSAMIC/snakemake_rules/quality_control/picard.rule @@ -23,8 +23,6 @@ rule picard_CollectHsMetrics: tmpdir = tempfile.mkdtemp(prefix=tmp_dir), baitsetname = Path(config["panel"]["capture_kit"]).name, sample = "{sample}" - threads: - get_threads(cluster_config, "picard_qc") message: "Calculating picard HsMetrics for sample {params.sample}" shell: diff --git a/BALSAMIC/snakemake_rules/quality_control/picard_common.rule b/BALSAMIC/snakemake_rules/quality_control/picard_common.rule index aec655194..95258e152 100644 --- a/BALSAMIC/snakemake_rules/quality_control/picard_common.rule +++ b/BALSAMIC/snakemake_rules/quality_control/picard_common.rule @@ -21,8 +21,6 @@ rule picard_CollectAlignmentSummaryMetrics: tmpdir = tempfile.mkdtemp(prefix=tmp_dir), adapter = config["QC"]["adapter"], sample = "{sample}" - threads: - get_threads(cluster_config, "picard_qc") message: "Calculating picard alignment summary metrics for sample {params.sample}" shell: diff --git a/BALSAMIC/snakemake_rules/quality_control/picard_wgs.rule b/BALSAMIC/snakemake_rules/quality_control/picard_wgs.rule index 52b26b7a5..b33c4991b 100644 --- a/BALSAMIC/snakemake_rules/quality_control/picard_wgs.rule +++ b/BALSAMIC/snakemake_rules/quality_control/picard_wgs.rule @@ -18,8 +18,6 @@ rule picard_CollectHsMetrics_WGS: tmpdir = tempfile.mkdtemp(prefix=tmp_dir), bed_name = Path(config_model.reference["refgene_bed"]).name, sample = "{sample}" - threads: - get_threads(cluster_config, "picard_CollectHsMetrics") message: "Calculating picard HsMetrics for sample {params.sample}" shell: @@ -61,8 +59,6 @@ rule picard_CollectWgsMetrics: mem = "16g", tmpdir = tempfile.mkdtemp(prefix=tmp_dir), sample = "{sample}" - threads: - get_threads(cluster_config,"picard_CollectWgsMetrics") message: "Collecting various picard quality metrics for wgs sample {params.sample}" shell: diff --git a/BALSAMIC/snakemake_rules/quality_control/qc_metrics.rule b/BALSAMIC/snakemake_rules/quality_control/qc_metrics.rule index 267568f36..389f9dbf4 100644 --- a/BALSAMIC/snakemake_rules/quality_control/qc_metrics.rule +++ b/BALSAMIC/snakemake_rules/quality_control/qc_metrics.rule @@ -27,8 +27,6 @@ if config["analysis"]["analysis_workflow"] != "balsamic-qc": config_path = f"{analysis_dir_home}/{case_id}/{case_id}.json", collect_qc_metrics_script = get_script_path("collect_qc_metrics.py"), housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "qc-metrics"} - threads: - get_threads(cluster_config, "collect_custom_qc_metrics") message: "Extract the manually specified QC metric for validation and delivery" shell: @@ -46,8 +44,6 @@ else: config_path = f"{analysis_dir_home}/{case_id}/{case_id}.json", collect_qc_metrics_script = get_script_path("collect_qc_metrics.py"), housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "qc-metrics"} - threads: - get_threads(cluster_config, "collect_custom_qc_metrics") message: "Extract the manually specified QC metric for validation and delivery" shell: diff --git a/BALSAMIC/snakemake_rules/quality_control/report.rule b/BALSAMIC/snakemake_rules/quality_control/report.rule index 073b5a7f6..c5ea5921a 100644 --- a/BALSAMIC/snakemake_rules/quality_control/report.rule +++ b/BALSAMIC/snakemake_rules/quality_control/report.rule @@ -9,8 +9,6 @@ rule cnv_report: params: housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "clinical"}, cnv_report_script= get_script_path("generate_cnv_report.py"), - threads: - get_threads(cluster_config, "cnv_report") message: "Generating CNV report PDF" shell: diff --git a/BALSAMIC/snakemake_rules/quality_control/sambamba_depth.rule b/BALSAMIC/snakemake_rules/quality_control/sambamba_depth.rule index db45349d5..0993fb271 100644 --- a/BALSAMIC/snakemake_rules/quality_control/sambamba_depth.rule +++ b/BALSAMIC/snakemake_rules/quality_control/sambamba_depth.rule @@ -20,8 +20,6 @@ rule sambamba_panel_depth: cov_step = 50, filter_string="'not (unmapped or mate_is_unmapped) and not duplicate and not failed_quality_control and mapping_quality > 10'", sample = '{sample}' - threads: - get_threads(cluster_config, 'sambamba_panel_depth') message: "Calculate depth statistics using sambamba for sample {params.sample}" shell: @@ -32,7 +30,7 @@ sambamba depth region \ --regions {input.bed} \ --min-base-quality={params.base_qual} \ --filter {params.filter_string} \ ---nthreads {threads} \ +--nthreads {resources.threads} \ `echo $covStr` {input.bam} > {output}; """ @@ -56,8 +54,6 @@ rule sambamba_exon_depth: cov_5 = "250", filter_string = "'not (unmapped or mate_is_unmapped) and not duplicate and not failed_quality_control and mapping_quality > 10'", sample = '{sample}' - threads: - get_threads(cluster_config,'sambamba_exon_depth') message: "Calculate exon depth stastics using sambamba for sample {params.sample}" shell: @@ -69,5 +65,5 @@ sambamba depth region \ --cov-threshold {params.cov_1} --cov-threshold {params.cov_2} \ --cov-threshold {params.cov_3} --cov-threshold {params.cov_4} \ --cov-threshold {params.cov_5} \ ---nthreads {threads} {input.bam} > {output}; +--nthreads {resources.threads} {input.bam} > {output}; """ diff --git a/BALSAMIC/snakemake_rules/quality_control/samtools_qc_tga.rule b/BALSAMIC/snakemake_rules/quality_control/samtools_qc_tga.rule index 27984d506..d5e20f978 100644 --- a/BALSAMIC/snakemake_rules/quality_control/samtools_qc_tga.rule +++ b/BALSAMIC/snakemake_rules/quality_control/samtools_qc_tga.rule @@ -2,7 +2,7 @@ # coding: utf-8 -rule samtools_qc: +rule samtools_qc_tga: input: bam = lambda wildcards: config_model.get_final_bam_name(bam_dir = bam_dir, sample_name = wildcards.sample, specified_suffix="align_sort") output: @@ -15,15 +15,13 @@ rule samtools_qc: Path(singularity_image,config["bioinfo_tools"].get("samtools") + ".sif").as_posix() params: sample_id = "{sample}" - threads: - get_threads(cluster_config, "samtools_qc") message: "Calculating alignment stats for sample: {params.sample_id}" shell: """ -samtools flagstats --threads {threads} {input.bam} > {output.flagstats}; -samtools stats --threads {threads} {input.bam} > {output.stats}; -samtools idxstats --threads {threads} {input.bam} > {output.idxstats}; +samtools flagstats --threads {resources.threads} {input.bam} > {output.flagstats}; +samtools stats --threads {resources.threads} {input.bam} > {output.stats}; +samtools idxstats --threads {resources.threads} {input.bam} > {output.idxstats}; """ rule samtools_qc_umi_collapsed: @@ -39,13 +37,11 @@ rule samtools_qc_umi_collapsed: Path(singularity_image,config["bioinfo_tools"].get("samtools") + ".sif").as_posix() params: sample_id = "{sample}" - threads: - get_threads(cluster_config, "samtools_qc") message: "Calculating alignment stats for sample after UMI collapse: {params.sample_id}" shell: """ -samtools flagstats --threads {threads} {input.bam} > {output.flagstats}; -samtools stats --threads {threads} {input.bam} > {output.stats}; -samtools idxstats --threads {threads} {input.bam} > {output.idxstats}; +samtools flagstats --threads {resources.threads} {input.bam} > {output.flagstats}; +samtools stats --threads {resources.threads} {input.bam} > {output.stats}; +samtools idxstats --threads {resources.threads} {input.bam} > {output.idxstats}; """ diff --git a/BALSAMIC/snakemake_rules/quality_control/samtools_qc_wgs.rule b/BALSAMIC/snakemake_rules/quality_control/samtools_qc_wgs.rule index 6ebf351a0..ee0f0b351 100644 --- a/BALSAMIC/snakemake_rules/quality_control/samtools_qc_wgs.rule +++ b/BALSAMIC/snakemake_rules/quality_control/samtools_qc_wgs.rule @@ -1,7 +1,7 @@ # vim: syntax=python tabstop=4 expandtab # coding: utf-8 -rule samtools_qc: +rule samtools_qc_wgs: input: bam = lambda wildcards: config_model.get_final_bam_name(bam_dir = bam_dir, sample_name = wildcards.sample) output: @@ -14,13 +14,11 @@ rule samtools_qc: Path(singularity_image,config["bioinfo_tools"].get("samtools") + ".sif").as_posix() params: sample_id = "{sample}" - threads: - get_threads(cluster_config, "samtools_qc") message: "Calculating alignment stats for sample: {params.sample_id}" shell: """ -samtools flagstats --threads {threads} {input.bam} > {output.flagstats}; -samtools stats --threads {threads} {input.bam} > {output.stats}; -samtools idxstats --threads {threads} {input.bam} > {output.idxstats}; +samtools flagstats --threads {resources.threads} {input.bam} > {output.flagstats}; +samtools stats --threads {resources.threads} {input.bam} > {output.stats}; +samtools idxstats --threads {resources.threads} {input.bam} > {output.idxstats}; """ diff --git a/BALSAMIC/snakemake_rules/quality_control/sentieon_qc_metrics.rule b/BALSAMIC/snakemake_rules/quality_control/sentieon_qc_metrics.rule index a591ad0d5..f9fdc4d10 100644 --- a/BALSAMIC/snakemake_rules/quality_control/sentieon_qc_metrics.rule +++ b/BALSAMIC/snakemake_rules/quality_control/sentieon_qc_metrics.rule @@ -20,8 +20,6 @@ if config["analysis"]["sequencing_type"] == 'wgs': sentieon_exec = config_model.sentieon.sentieon_exec, sentieon_lic = config_model.sentieon.sentieon_license, sample = '{sample}' - threads: - get_threads(cluster_config, 'sentieon_wgs_metrics') message: "Calculate coverage metrics for wgs cases using sentieon tools for sample {params.sample}" shell: @@ -70,8 +68,6 @@ if config["analysis"]["sequencing_type"] == 'wgs': sample_id = "{sample}", min_read_ratio=params.insert_size_metrics.min_read_ratio, adapter = config["QC"]["adapter"] - threads: - get_threads(cluster_config, 'sentieon_qc_metrics') message: ("Creates multiple different alignment QC metrics based on Picard tools" "Current sample: {params.sample_id}") @@ -84,7 +80,7 @@ if config["analysis"]["sequencing_type"] == 'wgs': shell_bam_files=$(echo {input.bam_files} | sed 's/ / -i /g') ; {params.sentieon_exec} driver \ - -t {threads} \ + -t {resources.threads} \ -r {input.ref} \ -i $shell_bam_files \ --algo InsertSizeMetricAlgo --min_read_ratio {params.min_read_ratio} {output.insert_size_metrics} \ @@ -114,8 +110,6 @@ else: sample_id="{sample}", adapter=config["QC"]["adapter"], min_read_ratio=params.insert_size_metrics.min_read_ratio, - threads: - get_threads(cluster_config,'sentieon_qc_metrics') message: ("Creates multiple different alignment QC metrics based on Picard tools" "Current sample: {params.sample_id}") @@ -126,7 +120,7 @@ else: export SENTIEON_LICENSE={params.sentieon_lic}; {params.sentieon_exec} driver \ - -t {threads} \ + -t {resources.threads} \ -r {input.ref} \ -i {input.bam} \ --algo InsertSizeMetricAlgo --min_read_ratio {params.min_read_ratio} {output.insert_size_metrics} \ @@ -155,8 +149,6 @@ if config["analysis"]["sequencing_type"] == 'wgs': sample_id = "{sample}", sentieon_exec = config_model.sentieon.sentieon_exec, sentieon_lic = config_model.sentieon.sentieon_license, - threads: - get_threads(cluster_config,'sentieon_plot_qc_metrics') message: ("Creates plots from various QC metrics by Sentieon mimicking Picard tools" "Current sample: {params.sample_id}") @@ -183,8 +175,6 @@ else: sample_id = "{sample}", sentieon_exec = config_model.sentieon.sentieon_exec, sentieon_lic = config_model.sentieon.sentieon_license, - threads: - get_threads(cluster_config,'sentieon_plot_qc_metrics') message: ("Creates plots from various QC metrics by Sentieon mimicking Picard tools" "Current sample: {params.sample_id}") diff --git a/BALSAMIC/snakemake_rules/quality_control/sex_check.rule b/BALSAMIC/snakemake_rules/quality_control/sex_check.rule index a33fafd1d..02a67a0fb 100644 --- a/BALSAMIC/snakemake_rules/quality_control/sex_check.rule +++ b/BALSAMIC/snakemake_rules/quality_control/sex_check.rule @@ -16,8 +16,6 @@ if config["analysis"]["sequencing_type"] == 'wgs' and config["analysis"]["analys params: sex_check_script = get_script_path("sex_prediction_wgs.py"), case_name = config["analysis"]["case_id"], - threads: - get_threads(cluster_config, "sex_prediction") message: "Running sex_prediction on case {params.case_name}" shell: @@ -41,8 +39,6 @@ elif config["analysis"]["sequencing_type"] == 'wgs' and config["analysis"]["anal params: sex_check_script = get_script_path("sex_prediction_wgs.py"), case_name = config["analysis"]["case_id"], - threads: - get_threads(cluster_config, "sex_prediction") message: "Running sex_prediction on case {params.case_name}" shell: @@ -66,8 +62,6 @@ elif config["analysis"]["sequencing_type"] == 'targeted' and config["analysis"][ params: sex_check_script = get_script_path("sex_prediction_tga.py"), case_name = config["analysis"]["case_id"], - threads: - get_threads(cluster_config, "sex_prediction") message: "Running sex_prediction on tumor and normal sample of {params.case_name}" shell: @@ -91,8 +85,6 @@ elif config["analysis"]["sequencing_type"] == 'targeted' and config["analysis"][ params: sex_check_script = get_script_path("sex_prediction_tga.py"), case_name = config["analysis"]["case_id"], - threads: - get_threads(cluster_config, "sex_prediction") message: "Running sex_prediction on tumor sample of {params.case_name}" shell: diff --git a/BALSAMIC/snakemake_rules/quality_control/somalier.rule b/BALSAMIC/snakemake_rules/quality_control/somalier.rule index 530ea1d25..351deb225 100644 --- a/BALSAMIC/snakemake_rules/quality_control/somalier.rule +++ b/BALSAMIC/snakemake_rules/quality_control/somalier.rule @@ -16,8 +16,6 @@ rule somalier_extract_normal: params: outdir = Path(qc_dir,'somalier').as_posix(), case_name = config["analysis"]["case_id"], - threads: - get_threads(cluster_config,"somalier_extract_normal") message: "Running somalier extract for normal sample of {params.case_name}" shell: @@ -40,8 +38,6 @@ rule somalier_extract_tumor: params: outdir = Path(qc_dir,'somalier').as_posix(), case_name=config["analysis"]["case_id"], - threads: - get_threads(cluster_config,"somalier_extract_tumor") message: "Running somalier extract for tumor sample of {params.case_name}" shell: @@ -65,8 +61,6 @@ rule somalier_relate: params: outprefix = Path(qc_dir,'somalier', 'somalier').as_posix(), case_name=config["analysis"]["case_id"], - threads: - get_threads(cluster_config,"somalier_relate") message: "Running somalier relate for {params.case_name}" shell: diff --git a/BALSAMIC/snakemake_rules/report/generate_pdf.rule b/BALSAMIC/snakemake_rules/report/generate_pdf.rule index 77981a0ef..22c5f7e41 100644 --- a/BALSAMIC/snakemake_rules/report/generate_pdf.rule +++ b/BALSAMIC/snakemake_rules/report/generate_pdf.rule @@ -11,8 +11,6 @@ rule csv_to_pdf: csv_to_pdf_script=get_script_path("csv_to_pdf.py"), loh_regions=f"{cnv_dir}CNV.somatic.{config['analysis']['case_id']}.purecn.LOHregions.csv", loh_genes=f"{cnv_dir}CNV.somatic.{config['analysis']['case_id']}.purecn.LOHgenes.csv", - threads: - get_threads(cluster_config=cluster_config, rule_name="csv_to_pdf") message: "Converting CSV files to PDF" shell: @@ -32,8 +30,6 @@ rule txt_to_pdf: sample_statistics_pdf=f"{vcf_dir}CNV.somatic.{config['analysis']['case_id']}.ascat.samplestatistics.txt.pdf", params: csv_to_pdf_script=get_script_path("csv_to_pdf.py"), - threads: - get_threads(cluster_config=cluster_config, rule_name="txt_to_pdf") message: "Converting AscatNgs statistics TXT file to PDF" shell: @@ -50,8 +46,6 @@ rule msi_to_pdf: msi_result_pdf= f"{vcf_dir}MSI.somatic.{config['analysis']['case_id']}.msisensorpro.msi.pdf", params: csv_to_pdf_script=get_script_path("csv_to_pdf.py"), - threads: - get_threads(cluster_config=cluster_config, rule_name="txt_to_pdf") message: "Converting MSI TXT file to PDF" shell: @@ -69,8 +63,6 @@ rule image_to_pdf: plot="|".join(config_model.get_cnv_report_plots()), params: image_to_pdf_script=get_script_path("image_to_pdf.py"), - threads: - get_threads(cluster_config=cluster_config, rule_name="image_to_pdf") message: "Converting CNV PNG files to PDF" shell: diff --git a/BALSAMIC/snakemake_rules/report/merge_pdfs.rule b/BALSAMIC/snakemake_rules/report/merge_pdfs.rule index dd384dd20..e0b3fbdb6 100644 --- a/BALSAMIC/snakemake_rules/report/merge_pdfs.rule +++ b/BALSAMIC/snakemake_rules/report/merge_pdfs.rule @@ -12,8 +12,6 @@ rule merge_cnv_pdf_reports: merge_pdfs_script=get_script_path("merge_pdfs.py"), loh_regions_pdf=f"{cnv_dir}CNV.somatic.{config['analysis']['case_id']}.purecn.LOHregions.csv.pdf", loh_genes_pdf=f"{cnv_dir}CNV.somatic.{config['analysis']['case_id']}.purecn.LOHgenes.csv.pdf", - threads: - get_threads(cluster_config=cluster_config, rule_name="merge_cnv_pdf_reports") message: "Merging CNV PDF reports {output.cnv_report_pdf}" benchmark: diff --git a/BALSAMIC/snakemake_rules/umi/d4tools_umi.rule b/BALSAMIC/snakemake_rules/umi/d4tools_umi.rule new file mode 100644 index 000000000..069cb45a3 --- /dev/null +++ b/BALSAMIC/snakemake_rules/umi/d4tools_umi.rule @@ -0,0 +1,19 @@ + +rule create_d4file_umi: + input: + bam = umi_dir + "{sample_type}.{sample}_consensusfiltered_umi.bam", + output: + d4file_umi = Path(qc_dir,"{sample_type}.{sample}.umi.d4").as_posix(), + benchmark: + Path(benchmark_dir, "create_d4file_umi.{sample_type}.{sample}.tsv").as_posix() + singularity: + Path(singularity_image, config["bioinfo_tools"].get("d4tools") + ".sif").as_posix() + params: + housekeeper_id = {"id": "{sample}", "tags": "qc-metrics"}, + sample = "{sample}" + message: + "Creating d4 coverage file for sample {params.sample}" + shell: + """ + d4tools create -Az {input.bam} {output.d4file} + """ diff --git a/BALSAMIC/snakemake_rules/umi/generate_AF_tables.rule b/BALSAMIC/snakemake_rules/umi/generate_AF_tables.rule index 4faf00b11..52dfb9cc3 100644 --- a/BALSAMIC/snakemake_rules/umi/generate_AF_tables.rule +++ b/BALSAMIC/snakemake_rules/umi/generate_AF_tables.rule @@ -16,8 +16,6 @@ rule bcftools_query_generatebackgroundaf_umitable: params: background_variant_region = background_variant_file, case_name = "{case_name}" - threads: - get_threads(cluster_config, "bcftools_query_generatebackgroundaf_umitable") message: "Creating Allelic frequency table from VCF file for {params.case_name}" shell: diff --git a/BALSAMIC/snakemake_rules/umi/modify_tnscope_infofield_umi.rule b/BALSAMIC/snakemake_rules/umi/modify_tnscope_infofield_umi.rule index 8f6727347..29df5dc8d 100644 --- a/BALSAMIC/snakemake_rules/umi/modify_tnscope_infofield_umi.rule +++ b/BALSAMIC/snakemake_rules/umi/modify_tnscope_infofield_umi.rule @@ -12,8 +12,6 @@ rule modify_tnscope_infofield_umi: modify_tnscope_infofield = get_script_path("modify_tnscope_infofield.py"), tmpdir = tempfile.mkdtemp(prefix=tmp_dir), case_name = config["analysis"]["case_id"] - threads: - get_threads(cluster_config, 'modify_tnscope_infofield') message: "Add DP and AF tumor sample info to INFO field for case: {params.case_name}" shell: diff --git a/BALSAMIC/snakemake_rules/umi/qc_umi.rule b/BALSAMIC/snakemake_rules/umi/qc_umi.rule index cba0e7dcb..3b046a432 100644 --- a/BALSAMIC/snakemake_rules/umi/qc_umi.rule +++ b/BALSAMIC/snakemake_rules/umi/qc_umi.rule @@ -17,8 +17,6 @@ rule picard_umiaware: params: tmpdir = tempfile.mkdtemp(prefix=tmp_dir), sample_id = "{sample}" - threads: - get_threads(cluster_config, "picard_umiaware") message: "Marking duplicates using Picardtools with UmiAware for {params.sample_id}" shell: @@ -50,8 +48,6 @@ rule picard_collecthsmetrics_umi: tmpdir = tempfile.mkdtemp(prefix=tmp_dir), baitsetname = os.path.basename(config["panel"]["capture_kit"]), sample_id = "{sample}" - threads: - get_threads(cluster_config, "CollectHsMetrics") message: "Collecting HSmetrics using Picardtools for {params.sample_id}" shell: @@ -89,15 +85,13 @@ rule samtools_view_calculatemeanfamilydepth_umi: params: sample_id = "{sample}", tmpdir= tempfile.mkdtemp(prefix=tmp_dir), - threads: - get_threads(cluster_config, "samtools_view_calculatemeanfamilydepth_umi") message: "Calculating mean family depth using samtools and awk for {params.sample_id}" shell: """ export TMPDIR={params.tmpdir}; -samtools view -@ {threads} {input.bam} | \ +samtools view -@ {resources.threads} {input.bam} | \ grep 'RX:Z:' | \ sed 's/.*RX:Z:\\([ACGT-].*\\).*/\\1/' | \ cut -f1 | \ diff --git a/BALSAMIC/snakemake_rules/umi/sentieon_consensuscall.rule b/BALSAMIC/snakemake_rules/umi/sentieon_consensuscall.rule index 5fec3004e..bacd2dffb 100644 --- a/BALSAMIC/snakemake_rules/umi/sentieon_consensuscall.rule +++ b/BALSAMIC/snakemake_rules/umi/sentieon_consensuscall.rule @@ -17,8 +17,6 @@ rule sentieon_consensuscall_umi: tag = params.umiconsensuscall.tag, ip_format = params.umiconsensuscall.align_format, sample_id = '{sample}' - threads: - get_threads(cluster_config, "sentieon_consensuscall_umi") message: "Calling consensus molecules using sentieon for {params.sample_id}" shell: @@ -31,7 +29,7 @@ export SENTIEON_TMPDIR={params.tmpdir}; export SENTIEON_LICENSE={params.sentieon_lic}; {params.sentieon_exec} umi consensus \ --t {threads} \ +-t {resources.threads} \ -i {input.bam} \ -o {output.fastq_consensus} \ --input_format {params.ip_format} \ @@ -58,8 +56,6 @@ rule sentieon_bwa_umiconsensus: ip_bases = params.umicommon.align_intbases, sample_id = "{sample}", sample_type = lambda wildcards: config_model.get_sample_type_by_name(wildcards.sample, uppercase=True), - threads: - get_threads(cluster_config, "sentieon_bwa_umiconsensus") message: "Mapping consensus reads and sorting using sentieon bwa-mem for {params.sample_id}" shell: @@ -73,7 +69,7 @@ export SENTIEON_LICENSE={params.sentieon_lic}; {params.sentieon_exec} bwa mem \ -R '@RG\\tID:{params.sample_id}\\tSM:{params.sample_type}\\tLB:TargetPanel\\tPL:ILLUMINA' \ --t {threads} \ +-t {resources.threads} \ -K {params.ip_bases} \ -p -C {input.ref_fa} \ {input.fq_consensus} | \ @@ -102,8 +98,6 @@ rule sentieon_consensusfilter_umi: consensusfilter_script = get_script_path("FilterDuplexUMIconsensus.awk"), minreads = params.umiconsensuscall.filter_minreads, sample_id = '{sample}', - threads: - get_threads(cluster_config, "sentieon_consensusfilter_umi") message: "Filtering consensus reads based on XZ tag for {params.sample_id}" shell: @@ -130,13 +124,11 @@ rule bam_compress_tumor_umi: params: sample_id = "{sample}", housekeeper_id= {"id": tumor_sample, "tags": "umi_tumor"} - threads: - get_threads(cluster_config, "bam_compress") message: "Compressing UMI bam to cram for {params.sample_id}" shell: """ -samtools view -h -T {input.fasta} --threads {threads} -C -o {output.cram} {input.bam}; +samtools view -h -T {input.fasta} --threads {resources.threads} -C -o {output.cram} {input.bam}; samtools index {output.cram}; """ @@ -154,12 +146,10 @@ if config['analysis']['analysis_type'] == "paired": params: sample_id = "{sample}", housekeeper_id= {"id": normal_sample, "tags": "umi_normal"} - threads: - get_threads(cluster_config, "bam_compress") message: "Compressing UMI bam to cram for {params.sample_id}" shell: """ - samtools view -h -T {input.fasta} --threads {threads} -C -o {output.cram} {input.bam}; + samtools view -h -T {input.fasta} --threads {resources.threads} -C -o {output.cram} {input.bam}; samtools index {output.cram}; """ diff --git a/BALSAMIC/snakemake_rules/umi/sentieon_varcall_tnscope.rule b/BALSAMIC/snakemake_rules/umi/sentieon_varcall_tnscope.rule index 4fd414c0b..4d37490ee 100644 --- a/BALSAMIC/snakemake_rules/umi/sentieon_varcall_tnscope.rule +++ b/BALSAMIC/snakemake_rules/umi/sentieon_varcall_tnscope.rule @@ -31,8 +31,6 @@ rule sentieon_tnscope_umi: padding = params.tnscope_umi.padding, tumor = "TUMOR", pcr_model = params.tnscope_umi.pcr_model - threads: - get_threads(cluster_config, "sentieon_tnscope_umi") message: "Calling single nucleotide variants using TNscope for {params.tumor}" shell: @@ -42,7 +40,7 @@ export SENTIEON_TMPDIR={params.tmpdir}; export SENTIEON_LICENSE={params.sentieon_lic}; {params.sentieon_exec} driver \ --t {threads} \ +-t {resources.threads} \ -r {input.ref_fa} \ -i {input.bam} \ --interval {input.bed} \ diff --git a/BALSAMIC/snakemake_rules/umi/sentieon_varcall_tnscope_tn.rule b/BALSAMIC/snakemake_rules/umi/sentieon_varcall_tnscope_tn.rule index e400c73f2..c764f9e69 100644 --- a/BALSAMIC/snakemake_rules/umi/sentieon_varcall_tnscope_tn.rule +++ b/BALSAMIC/snakemake_rules/umi/sentieon_varcall_tnscope_tn.rule @@ -33,8 +33,6 @@ rule sentieon_tnscope_umi_tn: tumor = "TUMOR", normal = "NORMAL", case_name= config["analysis"]["case_id"] - threads: - get_threads(cluster_config, "sentieon_tnscope_umi") message: "Calling single nucleotide variants using TNscope for {params.case_name}" shell: @@ -44,7 +42,7 @@ export SENTIEON_TMPDIR={params.tmpdir}; export SENTIEON_LICENSE={params.sentieon_lic}; {params.sentieon_exec} driver \ --t {threads} \ +-t {resources.threads} \ -r {input.ref_fa} \ -i {input.bamT} \ -i {input.bamN} \ diff --git a/BALSAMIC/snakemake_rules/umi/umi_sentieon_alignment.rule b/BALSAMIC/snakemake_rules/umi/umi_sentieon_alignment.rule index f07efd0ca..d58109d54 100644 --- a/BALSAMIC/snakemake_rules/umi/umi_sentieon_alignment.rule +++ b/BALSAMIC/snakemake_rules/umi/umi_sentieon_alignment.rule @@ -18,8 +18,6 @@ rule sentieon_align_sort_umireads_umi: sample_id = "{sample}", sample_type = lambda wildcards: config_model.get_sample_type_by_name(wildcards.sample, uppercase=True), ip_bases = params.umicommon.align_intbases, - threads: - get_threads(cluster_config, 'sentieon_align_sort') message: ("Align fastq reads using sentieon bwa-mem and sort reads using samtools for sample type: " "{params.sample_type} : {params.sample_id}") @@ -32,7 +30,7 @@ export SENTIEON_LICENSE={params.sentieon_lic}; {params.sentieon_exec} bwa mem \ -R '@RG\\tID:{params.sample_id}\\tSM:{params.sample_type}\\tPL:ILLUMINA' \ -K {params.ip_bases} \ --p -t {threads} -C \ +-p -t {resources.threads} -C \ {input.ref} {input.interleaved_fastq} | \ {params.sentieon_exec} util sort \ -r {input.ref} \ diff --git a/BALSAMIC/snakemake_rules/variant_calling/cnvkit_preprocess.rule b/BALSAMIC/snakemake_rules/variant_calling/cnvkit_preprocess.rule index 55859b5f8..d6aa84a59 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/cnvkit_preprocess.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/cnvkit_preprocess.rule @@ -3,14 +3,11 @@ rule create_target: bed_expanded_merged = Path(cnv_dir + "capture_kit_expanded_merged.bed").as_posix(), refgene_flat = config_model.reference["refgene_flat"], access_bed = config_model.reference["access_regions"], - wake_up = result_dir + "start_analysis", output: targets = cnv_dir + "targets.bed", antitargets = cnv_dir + "antitarget.bed", singularity: Path(singularity_image, "cnvkit.sif").as_posix() - threads: - get_threads(cluster_config, "cnvkit_create_targets") benchmark: Path(benchmark_dir, "cnvkit.targets.tsv").as_posix() shell: @@ -35,16 +32,14 @@ rule create_coverage: min_mapq = params.common.min_mapq, singularity: Path(singularity_image, "cnvkit.sif").as_posix() - threads: - get_threads(cluster_config, "cnvkit_create_coverage") message: "Segmenting genomic regions using CNVkit for {params.case_name}" shell: """ export TMPDIR={params.tmpdir} ; -cnvkit.py coverage {input.bam} {input.target_bed} -o {output.target_cnn} --min-mapq {params.min_mapq} --processes {threads} ; -cnvkit.py coverage {input.bam} {input.antitarget_bed} -o {output.antitarget_cnn} --min-mapq {params.min_mapq} --processes {threads} ; +cnvkit.py coverage {input.bam} {input.target_bed} -o {output.target_cnn} --min-mapq {params.min_mapq} --processes {resources.threads} ; +cnvkit.py coverage {input.bam} {input.antitarget_bed} -o {output.antitarget_cnn} --min-mapq {params.min_mapq} --processes {resources.threads} ; rm -rf {params.tmpdir} """ diff --git a/BALSAMIC/snakemake_rules/variant_calling/extend_bed.rule b/BALSAMIC/snakemake_rules/variant_calling/extend_bed.rule index 453730107..46c2c3687 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/extend_bed.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/extend_bed.rule @@ -2,7 +2,6 @@ rule extend_short_bedregions: input: baits_bed = config_model.panel.capture_kit, - wake_up= result_dir + "start_analysis", output: baits_bed_expanded=Path(cnv_dir + "capture_kit_expanded.bed").as_posix(), benchmark: @@ -12,8 +11,6 @@ rule extend_short_bedregions: params: bedfile_extend_script = get_script_path("extend_bedfile.py"), minimum_region_size = params.bed_pre_processing.minimum_region_size - threads: - get_threads(cluster_config, "extend_short_bedregions") message: "Extending regions in bedfile to a minimum size of {params.minimum_region_size}." shell: @@ -31,8 +28,6 @@ rule bedtools_sort_and_merge: Path(benchmark_dir, 'bedtools_merge_expanded_bedfile.tsv').as_posix() singularity: Path(singularity_image, config["bioinfo_tools"].get("bedtools") + ".sif").as_posix() - threads: - get_threads(cluster_config, "bedtools_merge") message: "Running bedtools sort and merge to merge potentially overlapping regions." shell: diff --git a/BALSAMIC/snakemake_rules/variant_calling/gatk_read_counts.rule b/BALSAMIC/snakemake_rules/variant_calling/gatk_read_counts.rule index 63f32ec82..396eedc51 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/gatk_read_counts.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/gatk_read_counts.rule @@ -14,8 +14,6 @@ rule gatk_collectreadcounts: Path(benchmark_dir, "gatk_collectreadcounts_{sample}.tsv").as_posix() singularity: Path(singularity_image,config["bioinfo_tools"].get("gatk") + ".sif").as_posix() - threads: - get_threads(cluster_config, "gatk_collectreadcounts") message: "Running GATK CollectReadCounts on {params.sample} for GENS." shell: diff --git a/BALSAMIC/snakemake_rules/variant_calling/gens_preprocessing.rule b/BALSAMIC/snakemake_rules/variant_calling/gens_preprocessing.rule index d5ee5dc80..fc40442d2 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/gens_preprocessing.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/gens_preprocessing.rule @@ -18,8 +18,6 @@ if config["analysis"]["sequencing_type"] == SequencingType.WGS: sample = "{sample}" benchmark: Path(benchmark_dir, "sentieon_DNAscope_gnomad_{sample}.tsv").as_posix() - threads: - get_threads(cluster_config, "sentieon_DNAscope_gnomad") message: "Calling germline variants on positions in Gnomad AF > 0.05 using Sentieon DNAscope for {params.sample}" shell: @@ -30,7 +28,7 @@ if config["analysis"]["sequencing_type"] == SequencingType.WGS: export SENTIEON_DNASCOPE={params.sentieon_ml_dnascope}; {params.sentieon_exec} driver \ - -t {threads} \ + -t {resources.threads} \ -r {input.ref} \ -i {input.bam} \ --algo DNAscope \ @@ -54,8 +52,6 @@ if config["analysis"]["sequencing_type"] == SequencingType.WGS: Path(benchmark_dir,"gatk_denoise_read_counts_{sample}.tsv").as_posix() singularity: Path(singularity_image,config["bioinfo_tools"].get("gatk") + ".sif").as_posix() - threads: - get_threads(cluster_config,"gatk_denoise_read_counts") message: "Running GATK DenoiseReadCounts on {params.sample} for GENS." shell: @@ -85,8 +81,6 @@ if config["analysis"]["sequencing_type"] == SequencingType.WGS: sample="{sample}" benchmark: Path(benchmark_dir, "gens_preprocess_wgs_{sample}.tsv").as_posix() - threads: - get_threads(cluster_config, "gens_preprocess") message: "Formatting output for GENS for sample: {params.sample}." shell: @@ -111,8 +105,6 @@ else: sample="{sample}" benchmark: Path(benchmark_dir,"sentieon_DNAscope_gnomad_{sample}.tsv").as_posix() - threads: - get_threads(cluster_config,"sentieon_DNAscope_gnomad_tga") message: "Calling germline variants on positions in Gnomad AF > 0.05 using Sentieon DNAscope for {params.sample}" shell: @@ -122,7 +114,7 @@ else: export SENTIEON_LICENSE={params.sentieon_lic}; {params.sentieon_exec} driver \ - -t {threads} \ + -t {resources.threads} \ -r {input.ref} \ -i {input.bam} \ --interval {input.bed} \ @@ -149,8 +141,6 @@ else: sample="{sample}" benchmark: Path(benchmark_dir, "gens_preprocess_tga_{sample}.tsv").as_posix() - threads: - get_threads(cluster_config, "gens_preprocess") message: "Formatting output for GENS for sample: {params.sample}." shell: @@ -172,8 +162,6 @@ rule finalize_gens_outputfiles: Path(benchmark_dir, "finalize_gens_outputfiles_{sample}_{gens_input}.tsv").as_posix() singularity: Path(singularity_image,config["bioinfo_tools"].get("bgzip") + ".sif").as_posix() - threads: - get_threads(cluster_config, "finalize_gens_outputfiles") message: "Bgzip and index GENS output: {params.gens_input} for sample: {params.sample_id}." shell: diff --git a/BALSAMIC/snakemake_rules/variant_calling/germline_tga.rule b/BALSAMIC/snakemake_rules/variant_calling/germline_tga.rule index 9136a07b7..245776fc5 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/germline_tga.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/germline_tga.rule @@ -1,7 +1,7 @@ # vim: syntax=python tabstop=4 expandtab # coding: utf-8 -rule sentieon_DNAscope: +rule sentieon_DNAscope_tga: input: bam = lambda wildcards: config_model.get_final_bam_name(bam_dir = bam_dir, sample_type = wildcards.sample_type), ref = config["reference"]["reference_genome"], @@ -16,8 +16,6 @@ rule sentieon_DNAscope: sentieon_exec = config_model.sentieon.sentieon_exec, sentieon_lic = config_model.sentieon.sentieon_license, sample = '{sample_type}' - threads: - get_threads(cluster_config, 'sentieon_DNAscope') message: "Calling germline variants using Sentieon DNAscope for {params.sample}" shell: @@ -27,7 +25,7 @@ export SENTIEON_TMPDIR={params.tmpdir}; export SENTIEON_LICENSE={params.sentieon_lic}; {params.sentieon_exec} driver \ --t {threads} \ +-t {resources.threads} \ -r {input.ref} \ -i {input.bam} \ --interval {input.interval} \ @@ -38,7 +36,7 @@ rm -rf {params.tmpdir}; """ -rule manta_germline: +rule manta_germline_tga: input: fa = config["reference"]["reference_genome"], bam = lambda wildcards: config_model.get_final_bam_name(bam_dir = bam_dir, sample_type = wildcards.sample_type, specified_suffix="dedup.fixmate.qualcapped"), @@ -52,8 +50,6 @@ rule manta_germline: tmpdir = tempfile.mkdtemp(prefix=tmp_dir), runmode = "local", sample = "{sample_type}" - threads: - get_threads(cluster_config,"manta_germline") message: "Calling germline variants using manta for {params.sample}" shell: @@ -65,7 +61,7 @@ configManta.py \ --referenceFasta={input.fa} \ --runDir={params.tmpdir}; -python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads}; +python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {resources.threads}; cp {params.tmpdir}/results/variants/diploidSV.vcf.gz {output.final}; diff --git a/BALSAMIC/snakemake_rules/variant_calling/germline_wgs.rule b/BALSAMIC/snakemake_rules/variant_calling/germline_wgs.rule index a9af0eabb..4150800b3 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/germline_wgs.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/germline_wgs.rule @@ -2,7 +2,7 @@ # coding: utf-8 -rule sentieon_DNAscope: +rule sentieon_DNAscope_wgs: input: ref = config["reference"]["reference_genome"], dbsnp = config["reference"]["dbsnp"], @@ -10,6 +10,8 @@ rule sentieon_DNAscope: recal_table = bam_dir + "{sample_type}.recal_data.table" output: vcf = vcf_dir + "SNV.germline.{sample_type}.dnascope.vcf.gz", + resources: + mem_mb = lambda wc, input: min(max(1.1 * input.size_mb, 10000), 500000) # Cap at 500GB params: tmpdir = tempfile.mkdtemp(prefix=tmp_dir), pcr_model = params.common.pcr_model, @@ -19,8 +21,6 @@ rule sentieon_DNAscope: sample = "{sample_type}" benchmark: Path(benchmark_dir, 'sentieon_DNAscope_' + "{sample_type}.tsv").as_posix() - threads: - get_threads(cluster_config, 'sentieon_DNAscope') message: "Calling germline variants using Sentieon DNAscope for {params.sample}" shell: @@ -31,7 +31,7 @@ export SENTIEON_LICENSE={params.sentieon_lic}; export SENTIEON_DNASCOPE={params.sentieon_ml_dnascope}; {params.sentieon_exec} driver \ --t {threads} \ +-t {resources.threads} \ -r {input.ref} \ -i {input.bam} \ -q {input.recal_table} \ @@ -43,7 +43,7 @@ rm -rf {params.tmpdir}; """ -rule manta_germline: +rule manta_germline_wgs: input: fa = config["reference"]["reference_genome"], bam = lambda wildcards: config_model.get_final_bam_name(bam_dir = bam_dir, sample_type = wildcards.sample_type), @@ -57,8 +57,6 @@ rule manta_germline: tmpdir = tempfile.mkdtemp(prefix=tmp_dir), runmode = "local", sample = "{sample_type}" - threads: - get_threads(cluster_config,"manta_germline") message: "Calling germline variants using manta for {params.sample}" shell: @@ -70,7 +68,7 @@ configManta.py \ --referenceFasta={input.fa} \ --runDir={params.tmpdir}; -python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads}; +python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {resources.threads}; cp {params.tmpdir}/results/variants/diploidSV.vcf.gz {output.final}; diff --git a/BALSAMIC/snakemake_rules/variant_calling/merge_snv_vcfs.rule b/BALSAMIC/snakemake_rules/variant_calling/merge_snv_vcfs.rule index e0474c391..db8041ae5 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/merge_snv_vcfs.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/merge_snv_vcfs.rule @@ -12,8 +12,6 @@ rule bcftools_normalise_vcfs: params: case_name = config["analysis"]["case_id"], variant_caller = "{caller}", - threads: - get_threads(cluster_config,'bcftools_normalise_vcfs') message: "Normalising variants for {params.variant_caller} {params.case_name}" shell: @@ -36,8 +34,6 @@ rule merge_snv_vcfs: params: modify_tnscope_infofield = get_script_path("merge_snv_variantcallers.py"), tmpdir = tempfile.mkdtemp(prefix=tmp_dir), - threads: - get_threads(cluster_config, "bcftools_concatenate_vcfs") message: "Merging VCFs with custom python script using vcfpy" shell: diff --git a/BALSAMIC/snakemake_rules/variant_calling/sentieon_t_varcall_wgs.rule b/BALSAMIC/snakemake_rules/variant_calling/sentieon_t_varcall_wgs.rule index 4f2484e78..a5bb5f4d3 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/sentieon_t_varcall_wgs.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/sentieon_t_varcall_wgs.rule @@ -26,8 +26,6 @@ rule sentieon_tnscope_wgs_tumor_only: disable_detect = params.tnscope_wgs.disable_detect, case_name = config["analysis"]["case_id"], housekeeper_id= {"id": config["analysis"]["case_id"],"tags": "research"} - threads: - get_threads(cluster_config, 'sentieon_TNscope_tumor_only') message: "Calling SNVs using sentieon TNscope for {params.case_name}" shell: @@ -38,7 +36,7 @@ export SENTIEON_TMPDIR={params.tmpdir}; export SENTIEON_LICENSE={params.sentieon_lic}; {params.sentieon_exec} driver \ --t {threads} \ +-t {resources.threads} \ -r {input.ref} \ -i {input.bam} \ -q {input.recal} \ diff --git a/BALSAMIC/snakemake_rules/variant_calling/sentieon_tn_varcall_wgs.rule b/BALSAMIC/snakemake_rules/variant_calling/sentieon_tn_varcall_wgs.rule index ff80d06dd..566ffee39 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/sentieon_tn_varcall_wgs.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/sentieon_tn_varcall_wgs.rule @@ -31,8 +31,6 @@ rule sentieon_tnscope_wgs_tumor_normal: disable_detect = params.tnscope_wgs.disable_detect, case_name = config["analysis"]["case_id"], housekeeper_id= {"id": config["analysis"]["case_id"],"tags": "research"} - threads: - get_threads(cluster_config, 'sentieon_tnscope_wgs_tumor_normal') message: ("Calling SNVs using Sentieon TNscope for sample {params.case_name}") shell: @@ -43,7 +41,7 @@ export SENTIEON_TMPDIR={params.tmpdir}; export SENTIEON_LICENSE={params.sentieon_lic}; {params.sentieon_exec} driver \ --t {threads} \ +-t {resources.threads} \ -r {input.ref} \ -i {input.bamT} \ -q {input.recalT} \ diff --git a/BALSAMIC/snakemake_rules/variant_calling/snv_quality_filter.rule b/BALSAMIC/snakemake_rules/variant_calling/snv_quality_filter.rule index f17525adb..cf3f8dacc 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/snv_quality_filter.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/snv_quality_filter.rule @@ -21,8 +21,6 @@ if config["analysis"]["sequencing_type"] == 'wgs' and config["analysis"]["analys sor = get_tag_and_filtername(snv_quality_filters, "balsamic_high_strand_oddsratio"), hard_filters = WgsSNVFilters.get_bcftools_filter_string(category="quality", analysis_type=analysis_type, variant_caller=BioinfoTools.TNSCOPE, soft_filter_normals=soft_filter_normal), case_name = config["analysis"]["case_id"], - threads: - get_threads(cluster_config, 'bcftools_quality_filter_tnscope_tumor_only') message: "Quality filtering WGS tumor-only tnscope variants using bcftools for {params.case_name}" shell: @@ -31,13 +29,13 @@ export TMPDIR={params.tmpdir}; grep -v '^@' {input.wgs_calling_file} > {input.wgs_calling_file}.bed -bcftools view --threads {threads} --regions-file {input.wgs_calling_file}.bed {input.vcf_snv} \ -| bcftools filter --threads {threads} --include 'SUM(FORMAT/AD[0:0]+FORMAT/AD[0:1]) >= {params.DP[0]}' --soft-filter '{params.DP[1]}' --mode '+' \ -| bcftools filter --threads {threads} --include 'FORMAT/AD[0:1] > {params.AD[0]}' --soft-filter '{params.AD[1]}' --mode '+' \ -| bcftools filter --threads {threads} --include 'FORMAT/AF > {params.AF_min[0]}' --soft-filter '{params.AF_min[1]}' --mode '+' \ -| bcftools filter --threads {threads} --include 'SUM(FORMAT/QSS)/SUM(FORMAT/AD) >= {params.qss[0]}' --soft-filter '{params.qss[1]}' --mode '+' \ -| bcftools filter --threads {threads} --include 'FORMAT/ALT_F1R2 > {params.strand_reads[0]} && (FORMAT/ALT_F1R2 > 0 && FORMAT/ALT_F2R1 > {params.strand_reads[0]} && FORMAT/REF_F1R2 > {params.strand_reads[0]} && FORMAT/REF_F2R1 > {params.strand_reads[0]})' --soft-filter '{params.strand_reads[1]}' --mode '+' \ -| bcftools filter --threads {threads} --include "INFO/SOR < {params.sor[0]}" --soft-filter '{params.sor[1]}' --mode '+' \ +bcftools view --threads {resources.threads} --regions-file {input.wgs_calling_file}.bed {input.vcf_snv} \ +| bcftools filter --threads {resources.threads} --include 'SUM(FORMAT/AD[0:0]+FORMAT/AD[0:1]) >= {params.DP[0]}' --soft-filter '{params.DP[1]}' --mode '+' \ +| bcftools filter --threads {resources.threads} --include 'FORMAT/AD[0:1] > {params.AD[0]}' --soft-filter '{params.AD[1]}' --mode '+' \ +| bcftools filter --threads {resources.threads} --include 'FORMAT/AF > {params.AF_min[0]}' --soft-filter '{params.AF_min[1]}' --mode '+' \ +| bcftools filter --threads {resources.threads} --include 'SUM(FORMAT/QSS)/SUM(FORMAT/AD) >= {params.qss[0]}' --soft-filter '{params.qss[1]}' --mode '+' \ +| bcftools filter --threads {resources.threads} --include 'FORMAT/ALT_F1R2 > {params.strand_reads[0]} && (FORMAT/ALT_F1R2 > 0 && FORMAT/ALT_F2R1 > {params.strand_reads[0]} && FORMAT/REF_F1R2 > {params.strand_reads[0]} && FORMAT/REF_F2R1 > {params.strand_reads[0]})' --soft-filter '{params.strand_reads[1]}' --mode '+' \ +| bcftools filter --threads {resources.threads} --include "INFO/SOR < {params.sor[0]}" --soft-filter '{params.sor[1]}' --mode '+' \ | bcftools view --exclude '{params.hard_filters}' -o {output.vcf_filtered}; rm -rf {params.tmpdir}; @@ -62,19 +60,17 @@ elif config["analysis"]["sequencing_type"] == 'wgs' and config["analysis"]["anal in_normal = get_tag_and_filtername(snv_quality_filters, "in_normal"), hard_filters = WgsSNVFilters.get_bcftools_filter_string(category="quality", analysis_type=analysis_type, variant_caller=BioinfoTools.TNSCOPE, soft_filter_normals=soft_filter_normal), case_name = config["analysis"]["case_id"], - threads: - get_threads(cluster_config, 'bcftools_quality_filter_tnscope_tumor_normal') message: "Quality filtering WGS tumor-normal tnscope variants using bcftools for {params.case_name}" shell: """ bcftools view {input.vcf_snv} \ -| bcftools filter --threads {threads} --include 'SUM(FORMAT/AD[0:0]+FORMAT/AD[0:1]) >= {params.DP[0]} || SUM(FORMAT/AD[1:0]+FORMAT/AD[1:1]) >= {params.DP[0]}' --soft-filter '{params.DP[1]}' --mode '+' \ -| bcftools filter --threads {threads} --include 'FORMAT/AD[0:1] >= {params.AD[0]}' --soft-filter '{params.AD[1]}' --mode '+' \ -| bcftools filter --threads {threads} --include 'FORMAT/AF[0] >= {params.AF_min[0]}' --soft-filter '{params.AF_min[1]}' --mode '+' \ -| bcftools filter --threads {threads} --include "INFO/SOR < {params.sor[0]}" --soft-filter '{params.sor[1]}' --mode '+' \ +| bcftools filter --threads {resources.threads} --include 'SUM(FORMAT/AD[0:0]+FORMAT/AD[0:1]) >= {params.DP[0]} || SUM(FORMAT/AD[1:0]+FORMAT/AD[1:1]) >= {params.DP[0]}' --soft-filter '{params.DP[1]}' --mode '+' \ +| bcftools filter --threads {resources.threads} --include 'FORMAT/AD[0:1] >= {params.AD[0]}' --soft-filter '{params.AD[1]}' --mode '+' \ +| bcftools filter --threads {resources.threads} --include 'FORMAT/AF[0] >= {params.AF_min[0]}' --soft-filter '{params.AF_min[1]}' --mode '+' \ +| bcftools filter --threads {resources.threads} --include "INFO/SOR < {params.sor[0]}" --soft-filter '{params.sor[1]}' --mode '+' \ | bcftools annotate -x FILTER/alt_allele_in_normal \ -| bcftools filter --threads {threads} --exclude 'sum(FORMAT/AF[1])/sum(FORMAT/AF[0])>{params.in_normal[0]}' --soft-filter '{params.in_normal[1]}' --mode '+' \ +| bcftools filter --threads {resources.threads} --exclude 'sum(FORMAT/AF[1])/sum(FORMAT/AF[0])>{params.in_normal[0]}' --soft-filter '{params.in_normal[1]}' --mode '+' \ | bcftools view --exclude '{params.hard_filters}' -o {output.vcf_filtered}; """ @@ -99,8 +95,6 @@ if config["analysis"]["sequencing_type"] == 'targeted' and config["analysis"]["a rpa=get_tag_and_filtername(snv_quality_filters,"balsamic_high_tnscope_rpa"), hard_filters=TgaSNVFilters.get_bcftools_filter_string(category="quality", analysis_type=analysis_type, variant_caller=BioinfoTools.TNSCOPE, soft_filter_normals=soft_filter_normal, exome=exome), case_name=config["analysis"]["case_id"], - threads: - get_threads(cluster_config,'bcftools_quality_filter_vardict_tumor_only') message: "Quality filtering vardict tumor-only annotated variants using bcftools for {params.case_name}" shell: @@ -108,9 +102,9 @@ if config["analysis"]["sequencing_type"] == 'targeted' and config["analysis"]["a bcftools view {input.vcf} \ | bcftools filter --include 'SUM(FORMAT/AD[0:0]+FORMAT/AD[0:1]) >= {params.DP[0]}' --soft-filter '{params.DP[1]}' --mode '+' \ | bcftools filter --include 'FORMAT/AD[0:1] >= {params.AD[0]}' --soft-filter '{params.AD[1]}' --mode '+' \ - | bcftools filter --threads {threads} --include 'SUM(FORMAT/QSS)/SUM(FORMAT/AD) >= {params.qss[0]}' --soft-filter '{params.qss[1]}' --mode '+' \ - | bcftools filter --threads {threads} --include "INFO/SOR < {params.sor[0]}" --soft-filter '{params.sor[1]}' --mode '+' \ - | bcftools filter --threads {threads} --exclude "INFO/RPA > {params.rpa[0]}" --soft-filter '{params.rpa[1]}' --mode + \ + | bcftools filter --threads {resources.threads} --include 'SUM(FORMAT/QSS)/SUM(FORMAT/AD) >= {params.qss[0]}' --soft-filter '{params.qss[1]}' --mode '+' \ + | bcftools filter --threads {resources.threads} --include "INFO/SOR < {params.sor[0]}" --soft-filter '{params.sor[1]}' --mode '+' \ + | bcftools filter --threads {resources.threads} --exclude "INFO/RPA > {params.rpa[0]}" --soft-filter '{params.rpa[1]}' --mode + \ | bcftools filter --include 'FORMAT/AF[0] >= {params.AF_min[0]}' --soft-filter '{params.AF_min[1]}' --mode '+' \ | bcftools view --exclude '{params.hard_filters}' -o {output.vcf_filtered} -O z; @@ -133,8 +127,6 @@ if config["analysis"]["sequencing_type"] == 'targeted' and config["analysis"]["a AF_min=get_tag_and_filtername(snv_quality_filters, "balsamic_low_af"), hard_filters=TgaSNVFilters.get_bcftools_filter_string(category="quality", analysis_type=analysis_type, variant_caller=BioinfoTools.VARDICT, soft_filter_normals=soft_filter_normal, exome=exome), case_name=config["analysis"]["case_id"], - threads: - get_threads(cluster_config,'bcftools_quality_filter_vardict_tumor_only') message: "Quality filtering vardict tumor-only annotated variants using bcftools for {params.case_name}" shell: @@ -170,20 +162,18 @@ elif config["analysis"]["sequencing_type"] == 'targeted' and config["analysis"][ in_normal = get_tag_and_filtername(snv_quality_filters, "in_normal"), hard_filters = TgaSNVFilters.get_bcftools_filter_string(category="quality", analysis_type=analysis_type, variant_caller=BioinfoTools.TNSCOPE, soft_filter_normals=soft_filter_normal, exome=exome), case_name = config["analysis"]["case_id"], - threads: - get_threads(cluster_config,'bcftools_quality_filter_vardict_tumor_normal') message: "Quality filtering vardict tumor-normal variants using bcftools for {params.case_name} " shell: """ bcftools view {input.vcf} \ | bcftools annotate -x FILTER/alt_allele_in_normal \ - | bcftools filter --threads {threads} --exclude 'sum(FORMAT/AF[1])/sum(FORMAT/AF[0])>{params.in_normal[0]}' --soft-filter '{params.in_normal[1]}' --mode '+' \ - | bcftools filter --threads {threads} --include 'SUM(FORMAT/AD[0:0]+FORMAT/AD[0:1]) >= {params.DP[0]} || SUM(FORMAT/AD[1:0]+FORMAT/AD[1:1]) >= {params.DP[0]}' --soft-filter '{params.DP[1]}' --mode '+' \ - | bcftools filter --threads {threads} --include "INFO/SOR < {params.sor[0]}" --soft-filter '{params.sor[1]}' --mode '+' \ - | bcftools filter --threads {threads} --exclude "INFO/RPA > {params.rpa[0]}" --soft-filter '{params.rpa[1]}' --mode + \ - | bcftools filter --threads {threads} --include 'FORMAT/AD[0:1] >= {params.AD[0]}' --soft-filter '{params.AD[1]}' --mode '+' \ - | bcftools filter --threads {threads} --include 'FORMAT/AF[0] >= {params.AF_min[0]}' --soft-filter '{params.AF_min[1]}' --mode '+' \ + | bcftools filter --threads {resources.threads} --exclude 'sum(FORMAT/AF[1])/sum(FORMAT/AF[0])>{params.in_normal[0]}' --soft-filter '{params.in_normal[1]}' --mode '+' \ + | bcftools filter --threads {resources.threads} --include 'SUM(FORMAT/AD[0:0]+FORMAT/AD[0:1]) >= {params.DP[0]} || SUM(FORMAT/AD[1:0]+FORMAT/AD[1:1]) >= {params.DP[0]}' --soft-filter '{params.DP[1]}' --mode '+' \ + | bcftools filter --threads {resources.threads} --include "INFO/SOR < {params.sor[0]}" --soft-filter '{params.sor[1]}' --mode '+' \ + | bcftools filter --threads {resources.threads} --exclude "INFO/RPA > {params.rpa[0]}" --soft-filter '{params.rpa[1]}' --mode + \ + | bcftools filter --threads {resources.threads} --include 'FORMAT/AD[0:1] >= {params.AD[0]}' --soft-filter '{params.AD[1]}' --mode '+' \ + | bcftools filter --threads {resources.threads} --include 'FORMAT/AF[0] >= {params.AF_min[0]}' --soft-filter '{params.AF_min[1]}' --mode '+' \ | bcftools view --exclude '{params.hard_filters}' -o {output.vcf_filtered} -O z; tabix -p vcf -f {output.vcf_filtered}; @@ -206,8 +196,6 @@ elif config["analysis"]["sequencing_type"] == 'targeted' and config["analysis"][ in_normal=get_tag_and_filtername(snv_quality_filters,"in_normal"), hard_filters=TgaSNVFilters.get_bcftools_filter_string(category="quality", analysis_type=analysis_type, variant_caller=BioinfoTools.VARDICT, soft_filter_normals=soft_filter_normal, exome=exome), case_name=config["analysis"]["case_id"], - threads: - get_threads(cluster_config,'bcftools_quality_filter_vardict_tumor_normal') message: "Quality filtering vardict tumor-normal variants using bcftools for {params.case_name} " shell: @@ -215,7 +203,7 @@ elif config["analysis"]["sequencing_type"] == 'targeted' and config["analysis"][ bcftools view {input.vcf} | \ bcftools annotate -x FILTER/MAF0.05 | \ bcftools filter --include 'SMPL_MIN(FMT/MQ) >= {params.MQ[0]}' --soft-filter '{params.MQ[1]}' --mode + | \ - bcftools filter --threads {threads} --exclude 'sum(FORMAT/AF[1:0])/sum(FORMAT/AF[0:0])>{params.in_normal[0]}' --soft-filter '{params.in_normal[1]}' --mode '+' | \ + bcftools filter --threads {resources.threads} --exclude 'sum(FORMAT/AF[1:0])/sum(FORMAT/AF[0:0])>{params.in_normal[0]}' --soft-filter '{params.in_normal[1]}' --mode '+' | \ bcftools filter --include 'INFO/DP >= {params.DP[0]}' --soft-filter '{params.DP[1]}' --mode '+' | \ bcftools filter --include 'INFO/VD >= {params.AD[0]}' --soft-filter '{params.AD[1]}' --mode '+' | \ bcftools filter --include 'INFO/AF >= {params.AF_min[0]}' --soft-filter '{params.AF_min[1]}' --mode '+' | \ @@ -239,15 +227,13 @@ if config_model.analysis.analysis_workflow == AnalysisWorkflow.BALSAMIC_UMI and case_name = config["analysis"]["case_id"], in_normal=get_tag_and_filtername(umi_snv_quality_filters,"in_normal"), hard_filters=TgaUmiSNVFilters.get_bcftools_filter_string(category="quality", analysis_type=analysis_type, variant_caller=BioinfoTools.TNSCOPE, soft_filter_normals=soft_filter_normal), - threads: - get_threads(cluster_config,'bcftools_quality_filter_TNscope_umi_tumor_normal') message: "Quality filtering TNscope_umi tumor-normal annotated variants using bcftools for {params.case_name} " shell: """ bcftools view {input.vcf} \ | bcftools annotate -x FILTER/alt_allele_in_normal \ - | bcftools filter --threads {threads} --exclude 'sum(FORMAT/AF[1])/sum(FORMAT/AF[0])>{params.in_normal[0]}' --soft-filter '{params.in_normal[1]}' --mode '+' \ + | bcftools filter --threads {resources.threads} --exclude 'sum(FORMAT/AF[1])/sum(FORMAT/AF[0])>{params.in_normal[0]}' --soft-filter '{params.in_normal[1]}' --mode '+' \ | bcftools view --exclude '{params.hard_filters}' -o {output.vcf_filtered} -O z; tabix -p vcf -f {output.vcf_filtered}; @@ -267,8 +253,6 @@ elif config_model.analysis.analysis_workflow == AnalysisWorkflow.BALSAMIC_UMI an params: hard_filters=TgaUmiSNVFilters.get_bcftools_filter_string(category="quality", analysis_type=analysis_type, variant_caller=BioinfoTools.TNSCOPE, soft_filter_normals=soft_filter_normal), case_name=config["analysis"]["case_id"], - threads: - get_threads(cluster_config,'bcftools_quality_filter_tnscope_umi_tumor_only') message: "Quality filtering tnscope_umi tumor-only variants using bcftools for {params.case_name}" shell: diff --git a/BALSAMIC/snakemake_rules/variant_calling/snv_t_varcall_tga.rule b/BALSAMIC/snakemake_rules/variant_calling/snv_t_varcall_tga.rule index afb4e2bdb..a6cb81e94 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/snv_t_varcall_tga.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/snv_t_varcall_tga.rule @@ -31,8 +31,6 @@ rule sentieon_tnscope_tga_tumor_only: tumor_af = params.tnscope_tga.filter_tumor_af, tumor_lod = params.tnscope_tga.min_tumorLOD, housekeeper_id= {"id": config["analysis"]["case_id"],"tags": "research"} - threads: - get_threads(cluster_config, "sentieon_tnscope_tga_t_only") message: "Calling single nucleotide variants using TNscope for {params.tumor}" shell: @@ -43,7 +41,7 @@ export SENTIEON_TMPDIR={params.tmpdir}; export SENTIEON_LICENSE={params.sentieon_lic}; {params.sentieon_exec} driver \ --t {threads} \ +-t {resources.threads} \ -r {input.ref_fa} \ -i {input.bam} \ --interval {input.bed} \ @@ -85,8 +83,6 @@ rule vardict_tumor_only: max_mm = params.vardict.max_mm, col_info = params.vardict.column_info, case_name = config["analysis"]["case_id"], - threads: - get_threads(cluster_config, "vardict_tumor_only") message: "Calling single nucleotide variants using vardict for {params.case_name}" shell: @@ -101,7 +97,7 @@ vardict-java -I 600 \ -G {input.fa} \ -f {params.af} \ -N {params.case_name} \ --th {threads} \ +-th {resources.threads} \ -b {input.bamT} \ {params.col_info} {input.bed} \ | teststrandbias.R \ @@ -126,8 +122,6 @@ rule post_process_vardict: Path(benchmark_dir,'vardict_merge_' + config["analysis"]["case_id"] + ".tsv").as_posix() singularity: Path(singularity_image,config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() - threads: - get_threads(cluster_config,"post_process_vardict") message: ("Bgzip, index and reheader merged VarDict vcf and add FOUND_IN for case: {params.case_name}") shell: diff --git a/BALSAMIC/snakemake_rules/variant_calling/snv_tn_varcall_tga.rule b/BALSAMIC/snakemake_rules/variant_calling/snv_tn_varcall_tga.rule index 4ea566a04..7447bc927 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/snv_tn_varcall_tga.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/snv_tn_varcall_tga.rule @@ -33,8 +33,6 @@ rule sentieon_tnscope_tga_tumor_normal: sentieon_exec=config_model.sentieon.sentieon_exec, sentieon_lic=config_model.sentieon.sentieon_license, housekeeper_id= {"id": config["analysis"]["case_id"],"tags": "research"} - threads: - get_threads(cluster_config, "sentieon_tnscope_tga_tumor_normal") message: "Calling single nucleotide variants using TNscope for {params.case_name}" shell: @@ -45,7 +43,7 @@ export SENTIEON_TMPDIR={params.tmpdir}; export SENTIEON_LICENSE={params.sentieon_lic}; {params.sentieon_exec} driver \ --t {threads} \ +-t {resources.threads} \ -r {input.ref_fa} \ -i {input.bamT} \ -i {input.bamN} \ @@ -88,8 +86,6 @@ rule vardict_tumor_normal: max_mm = params.vardict.max_mm, col_info = params.vardict.column_info, case_name = config["analysis"]["case_id"], - threads: - get_threads(cluster_config, "vardict_tumor_normal") message: "Calling variants using vardict for {params.case_name}" shell: @@ -100,7 +96,7 @@ export VAR_DICT_OPTS='\"-Xms10G\" \"-Xmx25G\" \"-XX:+UseG1GC\" \"-XX:MaxHeapFree vardict-java -I 600 -G {input.fa} -f {params.af} -N {params.case_name} \ -b \"{input.bamT}|{input.bamN}\" \ --th {threads} \ +-th {resources.threads} \ {params.col_info} {input.bed} \ | testsomatic.R \ | var2vcf_paired.pl -P {params.max_pval} \ @@ -124,8 +120,6 @@ rule post_process_vardict: Path(benchmark_dir,'post_process_vardict_' + config["analysis"]["case_id"] + ".tsv").as_posix() singularity: Path(singularity_image,config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() - threads: - get_threads(cluster_config,"post_process_vardict") message: ("Bgzip, index and reheader merged VarDict vcf for case: {params.case_name}") shell: diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_cnv_tumor_normal_tga.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_cnv_tumor_normal_tga.rule index 18f817213..dca6a7a0a 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_cnv_tumor_normal_tga.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_cnv_tumor_normal_tga.rule @@ -12,8 +12,6 @@ rule bcftools_merge_germlineSNV_research: Path(f"{benchmark_dir}/bcftools_merge_germlineSNV_{config['analysis']['case_id']}.tsv").as_posix() singularity: Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() - threads: - get_threads(cluster_config, "bcftools_merge_germlineSNV_research") params: tmpdir = tempfile.mkdtemp(prefix=tmp_dir), case_name = config["analysis"]["case_id"], @@ -23,7 +21,7 @@ rule bcftools_merge_germlineSNV_research: """ # merge the tumor and normal VCF bcftools merge \ ---threads {threads} \ +--threads {resources.threads} \ -O z -o {params.tmpdir}/SNV.merged.vcf.gz \ {input.snv_vcf_tumor} {input.snv_vcf_normal}; @@ -57,8 +55,6 @@ rule cnvkit_segment_CNV_research: Path(f"{benchmark_dir}/cnvkit_segment_{config['analysis']['case_id']}.tsv").as_posix() singularity: Path(singularity_image, config["bioinfo_tools"].get("cnvkit") + ".sif").as_posix() - threads: - get_threads(cluster_config, "cnvkit_segment_CNV_research") params: housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "cnv"}, tmpdir = tempfile.mkdtemp(prefix=tmp_dir), @@ -109,7 +105,7 @@ cnvkit.py segment {output.cnr} \ --output {output.cns_initial} \ --method cbs \ --drop-low-coverage \ ---processes {threads}; +--processes {resources.threads}; # Convert copy number segments (initial.cns) to standard SEG format to be used in PureCN cnvkit.py export seg {output.cns_initial} \ @@ -131,8 +127,6 @@ rule purecn_call_CNV_research: Path(f"{benchmark_dir}/purecn_call_{config['analysis']['case_id']}.tsv").as_posix() singularity: Path(singularity_image, config["bioinfo_tools"].get("purecn") + ".sif").as_posix() - threads: - get_threads(cluster_config, "purecn_call_CNV_research") params: cnv_dir = cnv_dir, name = config["analysis"]["case_id"], @@ -223,8 +217,6 @@ rule cnvkit_call_CNV_research: Path(benchmark_dir + "cnvkit_call_CNV_research" + config["analysis"]["case_id"] + ".tsv").as_posix() singularity: Path(singularity_image, config["bioinfo_tools"].get("cnvkit") + ".sif").as_posix() - threads: - get_threads(cluster_config, "cnvkit_call_CNV_research") params: housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "cnv"}, tmpdir = tempfile.mkdtemp(prefix=tmp_dir), @@ -307,8 +299,6 @@ rule delly_cnv_tumor_normal: normal = "NORMAL", tumor = "TUMOR", case_name = config["analysis"]["case_id"] - threads: - get_threads(cluster_config, "delly_tumor_normal") message: ("Calling copy number variants using delly for {params.case_name}") shell: @@ -346,8 +336,6 @@ rule bcftools_sort_cnvkitCNV_research: Path(f"{benchmark_dir}/bcftools_sort_cnvkitCNV_research_{config['analysis']['case_id']}.tsv").as_posix() singularity: Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() - threads: - get_threads(cluster_config, "bcftools_sort_cnvkitCNV_research") params: tmpdir = tempfile.mkdtemp(prefix=tmp_dir), cnv_dir = cnv_dir, diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_cnv_tumor_only_tga.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_cnv_tumor_only_tga.rule index 865ee2eb7..eefbeeb9b 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_cnv_tumor_only_tga.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_cnv_tumor_only_tga.rule @@ -16,8 +16,6 @@ rule cnvkit_segment_CNV_research: Path(f"{benchmark_dir}/cnvkit_segment_{config['analysis']['case_id']}.tsv").as_posix() singularity: Path(singularity_image, config["bioinfo_tools"].get("cnvkit") + ".sif").as_posix() - threads: - get_threads(cluster_config,"cnvkit_segment_CNV_research") params: tmpdir = tempfile.mkdtemp(prefix=tmp_dir), housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "cnv"}, @@ -62,7 +60,7 @@ cnvkit.py segment {output.cnr} \ --output {output.cns_initial} \ --method cbs \ --drop-low-coverage \ ---processes {threads}; +--processes {resources.threads}; # Convert copy number segments (initial.cns) to standard SEG format to be used for PureCN cnvkit.py export seg {output.cns_initial} --output {output.segment}; @@ -83,8 +81,6 @@ rule purecn_call_CNV_research: Path(f"{benchmark_dir}/purecn_call_{config['analysis']['case_id']}.tsv").as_posix() singularity: Path(singularity_image, config["bioinfo_tools"].get("purecn") + ".sif").as_posix() - threads: - get_threads(cluster_config, "purecn_call_CNV_research") params: tmpdir = tempfile.mkdtemp(prefix=tmp_dir), cnv_dir = cnv_dir, @@ -175,8 +171,6 @@ rule cnvkit_call_CNV_research: Path(f"{benchmark_dir}/cnvkit_call_{config['analysis']['case_id']}.tsv").as_posix() singularity: Path(singularity_image, config["bioinfo_tools"].get("cnvkit") + ".sif").as_posix() - threads: - get_threads(cluster_config, "cnvkit_call_CNV_research") params: tmpdir = tempfile.mkdtemp(prefix=tmp_dir), housekeeper_id = {"id": config["analysis"]["case_id"], "tags": "cnv"}, @@ -254,8 +248,6 @@ rule delly_cnv_tumor_only: housekeeper_id={"id": config["analysis"]["case_id"], "tags": "clinical"}, tumor="TUMOR", case_name=config["analysis"]["case_id"] - threads: - get_threads(cluster_config,"delly_tumor_only") message: ("Calling copy number variants using delly for {params.case_name}") shell: @@ -281,8 +273,6 @@ rule bcftools_sort_cnvkitCNV_research: Path(f"{benchmark_dir}/bcftools_sort_cnvkitCNV_research_{config['analysis']['case_id']}.tsv").as_posix() singularity: Path(singularity_image, config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() - threads: - get_threads(cluster_config, "bcftools_sort_cnvkitCNV_research") params: tmpdir = tempfile.mkdtemp(prefix=tmp_dir), cnv_dir = cnv_dir, diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_postprocess_and_filter_tumor_normal.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_postprocess_and_filter_tumor_normal.rule index 5ec493d49..1cad72a4f 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_postprocess_and_filter_tumor_normal.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_postprocess_and_filter_tumor_normal.rule @@ -16,15 +16,13 @@ if config["analysis"]["sequencing_type"] == 'wgs': params: case_name=config["analysis"]["case_id"], process_cnv=get_script_path("process_CNV.py"), - threads: - get_threads(cluster_config,"bcftools_process_SV_CNV") message: ("Processing SVs and CNVs for {params.case_name}") shell: """ - bcftools view --threads {threads} -f PASS -O z -o {output.delly_sv} {input.delly_sv}; + bcftools view --threads {resources.threads} -f PASS -O z -o {output.delly_sv} {input.delly_sv}; - bcftools view --threads {threads} -f PASS -O z -o {output.tmp_delly_cnv} {input.delly_cnv}; + bcftools view --threads {resources.threads} -f PASS -O z -o {output.tmp_delly_cnv} {input.delly_cnv}; python {params.process_cnv} -f {output.tmp_delly_cnv} -c delly | bgzip -l 9 -c > {output.delly_cnv}; @@ -54,15 +52,13 @@ else: params: case_name = config["analysis"]["case_id"], process_cnv = get_script_path("process_CNV.py"), - threads: - get_threads(cluster_config, "bcftools_process_SV_CNV") message: ("Processing SVs and CNVs for {params.case_name}") shell: """ - bcftools view --threads {threads} -f PASS -O z -o {output.delly_sv} {input.delly_sv}; + bcftools view --threads {resources.threads} -f PASS -O z -o {output.delly_sv} {input.delly_sv}; - bcftools view --threads {threads} -f PASS -O z -o {output.tmp_delly_cnv} {input.delly_cnv}; + bcftools view --threads {resources.threads} -f PASS -O z -o {output.tmp_delly_cnv} {input.delly_cnv}; python {params.process_cnv} -f {output.tmp_delly_cnv} -c delly | bgzip -l 9 -c > {output.delly_cnv}; @@ -93,8 +89,6 @@ rule svdb_merge_tumor_normal: case_name = config["analysis"]["case_id"], vcf= lambda wildcards, input:[input[index] + ":" + svdb_callers_prio[index] for index in range(0,len(input))], svdb_priority= ",".join(svdb_callers_prio) - threads: - get_threads(cluster_config, "svdb_merge_tumor_normal") message: "Merging structural and copy number variants using SVDB for {params.case_name}" shell: @@ -119,13 +113,11 @@ rule bcftools_quality_filter_svdb: Path(singularity_image,config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() params: case_name = config["analysis"]["case_id"], - threads: - get_threads(cluster_config, "bcftools_quality_filter_svdb") message: "Filtering merged research structural and copy number variants using bcftools for {params.case_name}" shell: """ - bcftools view --threads {threads} -f .,PASS -o {output.vcf_pass_svdb_research} -O z {input.vcf_svdb}; + bcftools view --threads {resources.threads} -f .,PASS -o {output.vcf_pass_svdb_research} -O z {input.vcf_svdb}; tabix -p vcf -f {output.vcf_pass_svdb_research}; """ diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_postprocess_and_filter_tumor_only.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_postprocess_and_filter_tumor_only.rule index 0ebf3026d..9207b4638 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_postprocess_and_filter_tumor_only.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_postprocess_and_filter_tumor_only.rule @@ -14,15 +14,13 @@ rule bcftools_process_SV_CNV: params: case_name = config["analysis"]["case_id"], process_cnv = get_script_path("process_CNV.py"), - threads: - get_threads(cluster_config, "bcftools_process_SV_CNV") message: ("Converting BCF from delly to VCF for {params.case_name}") shell: """ - bcftools view --threads {threads} -f PASS -O z -o {output.delly_sv} {input.delly_sv}; + bcftools view --threads {resources.threads} -f PASS -O z -o {output.delly_sv} {input.delly_sv}; - bcftools view --threads {threads} -f PASS -O z -o {output.cnv} {input.delly_cnv}; + bcftools view --threads {resources.threads} -f PASS -O z -o {output.cnv} {input.delly_cnv}; python {params.process_cnv} -f {output.cnv} -c delly | bgzip -l 9 -c > {output.delly_cnv}; @@ -52,8 +50,6 @@ rule svdb_merge_tumor_only: case_name = config["analysis"]["case_id"], vcf= lambda wildcards, input:[input[index] + ":" + svdb_callers_prio[index] for index in range(0,len(input))], svdb_priority= ",".join(svdb_callers_prio) - threads: - get_threads(cluster_config, "svdb_merge_tumor_only") message: "Merging structural and copy number variants using SVDB for {params.case_name}" shell: @@ -78,13 +74,11 @@ rule bcftools_quality_filter_svdb: Path(singularity_image,config["bioinfo_tools"].get("bcftools") + ".sif").as_posix() params: case_name = config["analysis"]["case_id"], - threads: - get_threads(cluster_config, "bcftools_quality_filter_svdb") message: "Filtering merged research structural and copy number variants using bcftools for {params.case_name}" shell: """ - bcftools view --threads {threads} -f .,PASS -o {output.vcf_pass_svdb_research} -O z {input.vcf_svdb}; + bcftools view --threads {resources.threads} -f .,PASS -o {output.vcf_pass_svdb_research} -O z {input.vcf_svdb}; tabix -p vcf -f {output.vcf_pass_svdb_research}; """ \ No newline at end of file diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal_tga.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal_tga.rule index 9119d0348..451b8741c 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal_tga.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal_tga.rule @@ -2,7 +2,7 @@ # coding: utf-8 -rule manta_tumor_normal: +rule manta_tumor_normal_tga: input: fa = config["reference"]["reference_genome"], bamN = config_model.get_final_bam_name(bam_dir=bam_dir, sample_name=normal_sample, specified_suffix="dedup.fixmate.qualcapped"), @@ -23,8 +23,6 @@ rule manta_tumor_normal: case_name = case_id, low_pr_sr_count_value = MANTA_FILTERS.low_pr_sr_count.tag_value, low_pr_sr_count_filter_name = MANTA_FILTERS.low_pr_sr_count.filter_name, - threads: - get_threads(cluster_config, "manta_tumor_normal") message: ("Calling structural variants using manta for {params.case_name} and " "index the compressed vcf file") @@ -40,7 +38,7 @@ configManta.py \ --referenceFasta={input.fa} \ --runDir={params.tmpdir}; -python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads}; +python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {resources.threads}; convertInversion.py \ $samtools_path \ @@ -49,7 +47,7 @@ convertInversion.py \ bgzip -l 9 {params.tmpdir}/results/variants/somaticSV_converted.vcf ; -bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[1:1]+FORMAT/SR[1:1]) < {params.low_pr_sr_count_value}' --soft-filter '{params.low_pr_sr_count_filter_name}' --mode '+' -o {output.final} -O z {params.tmpdir}/results/variants/somaticSV_converted.vcf.gz +bcftools filter --threads {resources.threads} --exclude 'SUM(FORMAT/PR[1:1]+FORMAT/SR[1:1]) < {params.low_pr_sr_count_value}' --soft-filter '{params.low_pr_sr_count_filter_name}' --mode '+' -o {output.final} -O z {params.tmpdir}/results/variants/somaticSV_converted.vcf.gz tabix -p vcf -f {output.final}; @@ -58,7 +56,7 @@ echo -e \"{params.normal}\\tNORMAL\\n{params.tumor}\\tTUMOR\" > {output.namemap} rm -rf {params.tmpdir}; """ -rule delly_sv_tumor_normal: +rule delly_sv_tumor_normal_tga: input: fa = config["reference"]["reference_genome"], bamN = config_model.get_final_bam_name(bam_dir = bam_dir, sample_name = normal_sample), @@ -76,8 +74,6 @@ rule delly_sv_tumor_normal: tumor = "TUMOR", normal = "NORMAL", case_name = config["analysis"]["case_id"] - threads: - get_threads(cluster_config, "delly_tumor_normal") message: ("Calling structural variants using delly for {params.case_name}," "filter somatic variants and finally convert from bcf to compressed vcf file") diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal_wgs.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal_wgs.rule index d6af35990..93fb9008b 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal_wgs.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_normal_wgs.rule @@ -2,7 +2,7 @@ # coding: utf-8 -rule manta_tumor_normal: +rule manta_tumor_normal_wgs: input: fa = config["reference"]["reference_genome"], bamN = config_model.get_final_bam_name(bam_dir = bam_dir, sample_name = normal_sample), @@ -23,8 +23,6 @@ rule manta_tumor_normal: case_name = case_id, low_pr_sr_count_value = MANTA_FILTERS.low_pr_sr_count.tag_value, low_pr_sr_count_filter_name = MANTA_FILTERS.low_pr_sr_count.filter_name, - threads: - get_threads(cluster_config, "manta_tumor_normal") message: ("Calling structural variants using manta for {params.case_name} and " "index the compressed vcf file") @@ -40,7 +38,7 @@ configManta.py \ --referenceFasta={input.fa} \ --runDir={params.tmpdir}; -python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads}; +python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {resources.threads}; convertInversion.py \ $samtools_path \ @@ -49,7 +47,7 @@ convertInversion.py \ bgzip -l 9 {params.tmpdir}/results/variants/somaticSV_converted.vcf ; -bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[1:1]+FORMAT/SR[1:1]) < {params.low_pr_sr_count_value}' --soft-filter '{params.low_pr_sr_count_filter_name}' --mode '+' -o {output.final} -O z {params.tmpdir}/results/variants/somaticSV_converted.vcf.gz +bcftools filter --threads {resources.threads} --exclude 'SUM(FORMAT/PR[1:1]+FORMAT/SR[1:1]) < {params.low_pr_sr_count_value}' --soft-filter '{params.low_pr_sr_count_filter_name}' --mode '+' -o {output.final} -O z {params.tmpdir}/results/variants/somaticSV_converted.vcf.gz tabix -p vcf -f {output.final}; @@ -58,7 +56,7 @@ echo -e \"{params.normal}\\tNORMAL\\n{params.tumor}\\tTUMOR\" > {output.namemap} rm -rf {params.tmpdir}; """ -rule delly_sv_tumor_normal: +rule delly_sv_tumor_normal_wgs: input: fa = config["reference"]["reference_genome"], bamN = config_model.get_final_bam_name(bam_dir = bam_dir, sample_name = normal_sample), @@ -76,8 +74,6 @@ rule delly_sv_tumor_normal: tumor = "TUMOR", normal = "NORMAL", case_name = config["analysis"]["case_id"] - threads: - get_threads(cluster_config, "delly_tumor_normal") message: ("Calling structural variants using delly for {params.case_name}," "filter somatic variants and finally convert from bcf to compressed vcf file") @@ -97,7 +93,7 @@ rm -rf {params.tmpdir}; """ -rule delly_cnv_tumor_normal: +rule delly_cnv_tumor_normal_wgs: input: fa = config["reference"]["reference_genome"], bamN = config_model.get_final_bam_name(bam_dir = bam_dir, sample_name = normal_sample), @@ -117,8 +113,6 @@ rule delly_cnv_tumor_normal: normal = "NORMAL", tumor = "TUMOR", case_name = config["analysis"]["case_id"] - threads: - get_threads(cluster_config, "delly_tumor_normal") message: ("Calling copy number variants using delly for {params.case_name}") shell: @@ -146,7 +140,7 @@ echo -e \"{params.tumor}\\tTUMOR\\n{params.normal}\\tNORMAL\" > {output.namemap} rm -rf {params.tmpdir}; """ -rule ascat_tumor_normal: +rule ascat_tumor_normal_wgs: input: fa = config["reference"]["reference_genome"] , bamN = config_model.get_final_bam_name(bam_dir = bam_dir, sample_name = normal_sample), @@ -175,8 +169,6 @@ rule ascat_tumor_normal: genome = config["reference"]["genome_version"], case_name = config["analysis"]["case_id"], gender = config["analysis"]["gender"] - threads: - get_threads(cluster_config, "ascat_tumor_normal") message: ("Calling copy number variants using ascatNGS for {params.case_name}") shell: @@ -191,7 +183,7 @@ ascat.pl \ -species human \ -genderChr Y \ -assembly {params.genome} \ - -cpus {threads} \ + -cpus {resources.threads} \ -reference {input.fa} \ -snp_gc {input.gccorrection} \ -outdir {params.tmpdir} \ @@ -224,7 +216,7 @@ echo -e \"{params.tumor}\\tTUMOR\\n{params.normal}\\tNORMAL\" > {output.namemap} rm -rf {params.tmpdir}; """ -rule tiddit_sv_tumor_normal: +rule tiddit_sv_tumor_normal_wgs: input: fa = config["reference"]["reference_genome"], bamN = config_model.get_final_bam_name(bam_dir = bam_dir, sample_name = normal_sample), @@ -246,8 +238,6 @@ rule tiddit_sv_tumor_normal: normal = "NORMAL", case_name = config["analysis"]["case_id"], filter_svs = get_script_path("filter_SVs.py") - threads: - get_threads(cluster_config, "tiddit_sv_tumor_normal") message: ("Calling structural variants using tiddit for {params.case_name}") shell: @@ -288,7 +278,7 @@ rm -rf {params.tmpdir}; """ -rule igh_dux4_detection_tumor_normal: +rule igh_dux4_detection_tumor_normal_wgs: input: fa = config["reference"]["reference_genome"], bamT = config_model.get_final_bam_name(bam_dir = bam_dir, sample_name = tumor_sample), @@ -303,8 +293,6 @@ rule igh_dux4_detection_tumor_normal: genome_version = config["reference"]["genome_version"], custom_sv_detection_script = get_script_path("igh_dux4_detection.sh"), case_name = config["analysis"]["case_id"], - threads: - get_threads(cluster_config, "igh_dux4_detection") message: "Detecting IGH::DUX4 rearrangement for {params.case_name} using samtools." shell: diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only_tga.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only_tga.rule index a5dc50451..8c654f5ba 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only_tga.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only_tga.rule @@ -2,7 +2,7 @@ # coding: utf-8 -rule manta_tumor_only: +rule manta_tumor_only_tga: input: fa = config["reference"]["reference_genome"], bamT = config_model.get_final_bam_name(bam_dir=bam_dir, sample_name=tumor_sample, specified_suffix="dedup.fixmate.qualcapped"), @@ -20,8 +20,6 @@ rule manta_tumor_only: tumor = config_model.get_sample_name_by_type(SampleType.TUMOR), case_name = config["analysis"]["case_id"], low_pr_sr_count = [MANTA_FILTERS.low_pr_sr_count.tag_value,MANTA_FILTERS.low_pr_sr_count.filter_name], - threads: - get_threads(cluster_config, "manta_tumor_only") message: ("Calling structural variants using manta for {params.case_name} and" "index the compressed vcf file") @@ -36,7 +34,7 @@ configManta.py \ --referenceFasta={input.fa} \ --runDir={params.tmpdir}; -python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads}; +python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {resources.threads}; convertInversion.py \ $samtools_path \ @@ -45,7 +43,7 @@ convertInversion.py \ bgzip -l 9 {params.tmpdir}/results/variants/tumorSV_converted.vcf; -bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < {params.low_pr_sr_count[0]}' --soft-filter '{params.low_pr_sr_count[1]}' --mode '+' -o {output.final} -O z {params.tmpdir}/results/variants/tumorSV_converted.vcf.gz +bcftools filter --threads {resources.threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < {params.low_pr_sr_count[0]}' --soft-filter '{params.low_pr_sr_count[1]}' --mode '+' -o {output.final} -O z {params.tmpdir}/results/variants/tumorSV_converted.vcf.gz tabix -p vcf -f {output.final}; @@ -54,7 +52,7 @@ echo -e \"{params.tumor}\\tTUMOR\" > {output.namemap}; rm -rf {params.tmpdir}; """ -rule delly_sv_tumor_only: +rule delly_sv_tumor_only_tga: input: fa = config["reference"]["reference_genome"], bamT = config_model.get_final_bam_name(bam_dir = bam_dir, sample_name = tumor_sample), @@ -70,8 +68,6 @@ rule delly_sv_tumor_only: tmpdir = tempfile.mkdtemp(prefix=tmp_dir), tumor = "TUMOR", case_name = config["analysis"]["case_id"] - threads: - get_threads(cluster_config, "delly_tumor_only") message: ("Calling structural variants using delly for {params.case_name}") shell: diff --git a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only_wgs.rule b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only_wgs.rule index 260f50aa3..33e4da8ad 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only_wgs.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/somatic_sv_tumor_only_wgs.rule @@ -2,7 +2,7 @@ # coding: utf-8 -rule manta_tumor_only: +rule manta_tumor_only_wgs: input: fa = config["reference"]["reference_genome"], bamT = config_model.get_final_bam_name(bam_dir = bam_dir, sample_name = tumor_sample) @@ -20,8 +20,6 @@ rule manta_tumor_only: tumor = config_model.get_sample_name_by_type(SampleType.TUMOR), case_name = config["analysis"]["case_id"], low_pr_sr_count = [MANTA_FILTERS.low_pr_sr_count.tag_value,MANTA_FILTERS.low_pr_sr_count.filter_name], - threads: - get_threads(cluster_config, "manta_tumor_only") message: ("Calling structural variants using manta for {params.case_name} and" "index the compressed vcf file") @@ -36,7 +34,7 @@ configManta.py \ --referenceFasta={input.fa} \ --runDir={params.tmpdir}; -python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {threads}; +python {params.tmpdir}/runWorkflow.py -m {params.runmode} -j {resources.threads}; convertInversion.py \ $samtools_path \ @@ -45,7 +43,7 @@ convertInversion.py \ bgzip -l 9 {params.tmpdir}/results/variants/tumorSV_converted.vcf; -bcftools filter --threads {threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < {params.low_pr_sr_count[0]}' --soft-filter '{params.low_pr_sr_count[1]}' --mode '+' -o {output.final} -O z {params.tmpdir}/results/variants/tumorSV_converted.vcf.gz +bcftools filter --threads {resources.threads} --exclude 'SUM(FORMAT/PR[0:1]+FORMAT/SR[0:1]) < {params.low_pr_sr_count[0]}' --soft-filter '{params.low_pr_sr_count[1]}' --mode '+' -o {output.final} -O z {params.tmpdir}/results/variants/tumorSV_converted.vcf.gz tabix -p vcf -f {output.final}; @@ -55,7 +53,7 @@ rm -rf {params.tmpdir}; """ -rule delly_sv_tumor_only: +rule delly_sv_tumor_only_wgs: input: fa = config["reference"]["reference_genome"], bamT = config_model.get_final_bam_name(bam_dir = bam_dir, sample_name = tumor_sample), @@ -71,8 +69,6 @@ rule delly_sv_tumor_only: tmpdir = tempfile.mkdtemp(prefix=tmp_dir), tumor = "TUMOR", case_name = config["analysis"]["case_id"] - threads: - get_threads(cluster_config, "delly_tumor_only") message: ("Calling structural variants using delly for {params.case_name}") shell: @@ -87,7 +83,7 @@ rm -rf {params.tmpdir}; """ -rule delly_cnv_tumor_only: +rule delly_cnv_tumor_only_wgs: input: fa = config["reference"]["reference_genome"], bamT = config_model.get_final_bam_name(bam_dir = bam_dir, sample_name = tumor_sample), @@ -106,8 +102,6 @@ rule delly_cnv_tumor_only: housekeeper_id= {"id": config["analysis"]["case_id"],"tags": "clinical"}, tumor = "TUMOR", case_name = config["analysis"]["case_id"] - threads: - get_threads(cluster_config, "delly_tumor_only") message: ("Calling copy number variants using delly for {params.case_name}") shell: @@ -121,7 +115,7 @@ echo -e \"{params.tumor}\\tTUMOR\" > {output.namemap}; rm -rf {params.tmpdir}; """ -rule tiddit_sv_tumor_only: +rule tiddit_sv_tumor_only_wgs: input: fa = config["reference"]["reference_genome"], bamT = config_model.get_final_bam_name(bam_dir = bam_dir, sample_name = tumor_sample) @@ -138,8 +132,6 @@ rule tiddit_sv_tumor_only: housekeeper_id= {"id": config["analysis"]["case_id"],"tags": "clinical"}, tumor = "TUMOR", case_name = config["analysis"]["case_id"] - threads: - get_threads(cluster_config, "tiddit_sv_tumor_only") message: ("Calling structural variants using tiddit for {params.case_name}") shell: @@ -161,7 +153,7 @@ echo -e \"{params.tumor}\\tTUMOR\" > {output.namemap}; rm -rf {params.tmpdir}; """ -rule cnvpytor_tumor_only: +rule cnvpytor_tumor_only_wgs: input: bamT = config_model.get_final_bam_name(bam_dir = bam_dir, sample_name = tumor_sample), vcfT = vcf_dir + "SNV.somatic." + config["analysis"]["case_id"] + ".tnscope.vcf.gz", @@ -179,8 +171,6 @@ rule cnvpytor_tumor_only: housekeeper_id= {"id": config["analysis"]["case_id"],"tags": "clinical"}, tumor = "TUMOR", case_name = config["analysis"]["case_id"] - threads: - get_threads(cluster_config, "cnvpytor_tumor_only") message: ("Calling copy number variants using cnvpytor for {params.case_name}") shell: @@ -190,19 +180,19 @@ export tumor={params.tumor}; export tumor_file={params.tmpdir}/$tumor -cnvpytor --max_cores {threads} -root {params.tmpdir}/{params.tumor}.pytor -rd {input.bamT}; +cnvpytor --max_cores {resources.threads} -root {params.tmpdir}/{params.tumor}.pytor -rd {input.bamT}; -cnvpytor --max_cores {threads} -root {params.tmpdir}/{params.tumor}.pytor -his 1000 10000 100000; +cnvpytor --max_cores {resources.threads} -root {params.tmpdir}/{params.tumor}.pytor -his 1000 10000 100000; -cnvpytor --max_cores {threads} -root {params.tmpdir}/{params.tumor}.pytor -partition 1000 10000 100000; +cnvpytor --max_cores {resources.threads} -root {params.tmpdir}/{params.tumor}.pytor -partition 1000 10000 100000; -cnvpytor --max_cores {threads} -root {params.tmpdir}/{params.tumor}.pytor -snp {input.vcfT} -sample {params.tumor}; +cnvpytor --max_cores {resources.threads} -root {params.tmpdir}/{params.tumor}.pytor -snp {input.vcfT} -sample {params.tumor}; -cnvpytor --max_cores {threads} -root {params.tmpdir}/{params.tumor}.pytor -mask_snps; +cnvpytor --max_cores {resources.threads} -root {params.tmpdir}/{params.tumor}.pytor -mask_snps; -cnvpytor --max_cores {threads} -root {params.tmpdir}/{params.tumor}.pytor -baf 10000 100000; +cnvpytor --max_cores {resources.threads} -root {params.tmpdir}/{params.tumor}.pytor -baf 10000 100000; -cnvpytor --max_cores {threads} -root {params.tmpdir}/{params.tumor}.pytor -call 1000 10000 100000 1> {params.tmpdir}/{params.tumor}.call +cnvpytor --max_cores {resources.threads} -root {params.tmpdir}/{params.tumor}.pytor -call 1000 10000 100000 1> {params.tmpdir}/{params.tumor}.call echo -e \"set print_filename $tumor_file.vcf\\n\ set Q0_range -1 0.5\\nset p_range 0 0.0001\\n\ @@ -228,7 +218,7 @@ echo -e \"{params.tumor}\\tTUMOR\" > {output.namemap}; rm -rf {params.tmpdir}; """ -rule igh_dux4_detection_tumor_only: +rule igh_dux4_detection_tumor_only_wgs: input: fa = config["reference"]["reference_genome"], bamT = config_model.get_final_bam_name(bam_dir = bam_dir, sample_name = tumor_sample) @@ -242,8 +232,6 @@ rule igh_dux4_detection_tumor_only: genome_version = config["reference"]["genome_version"], custom_sv_detection_script = get_script_path("igh_dux4_detection.sh"), case_name = config["analysis"]["case_id"], - threads: - get_threads(cluster_config, "igh_dux4_detection") message: "Detecting IGH::DUX4 rearrangement for {params.case_name} using samtools." shell: diff --git a/BALSAMIC/snakemake_rules/variant_calling/tnscope_post_process.rule b/BALSAMIC/snakemake_rules/variant_calling/tnscope_post_process.rule index 962ff503e..9693a35c6 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/tnscope_post_process.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/tnscope_post_process.rule @@ -21,8 +21,6 @@ if config["analysis"]["sequencing_type"] == 'targeted': matched_normal_filternames=",".join(BaseSNVFilters.MATCHED_NORMAL_FILTER_NAMES), sentieon_exec=config_model.sentieon.sentieon_exec, sentieon_lic=config_model.sentieon.sentieon_license, - threads: - get_threads(cluster_config,'post_process_tnscope') message: "Merge TNscope SNVs with same phaseID to MNVs." "Add DP and AF tumor sample info and FOUND_IN to INFO field: {params.case_name}" @@ -46,8 +44,6 @@ if config["analysis"]["sequencing_type"] == 'targeted': case_name=config["analysis"]["case_id"], benchmark: Path(benchmark_dir,'tnscope_sort_' + config["analysis"]["case_id"] + ".tsv").as_posix() - threads: - get_threads(cluster_config,"tnscope_sort") message: ("Sorting TNscope MNV post-processed files with awk {params.case_name}") shell: @@ -75,8 +71,6 @@ if config["analysis"]["sequencing_type"] == 'targeted': tmpdir=tempfile.mkdtemp(prefix=tmp_dir), case_name=config["analysis"]["case_id"], variant_caller="tnscope" - threads: - get_threads(cluster_config,'post_process_tnscope') message: "Add DP and AF tumor sample info and FOUND_IN to INFO field and remove MERGED variants: {params.case_name}" shell: @@ -106,8 +100,6 @@ else: tmpdir = tempfile.mkdtemp(prefix=tmp_dir), case_name = config["analysis"]["case_id"], variant_caller= "tnscope" - threads: - get_threads(cluster_config, 'post_process_tnscope') message: "Merge TNscope SNVs with same phaseID to MNVs." "Add DP and AF tumor sample info and FOUND_IN to INFO field: {params.case_name}" diff --git a/BALSAMIC/snakemake_rules/variant_calling/vardict_pre_and_postprocessing.rule b/BALSAMIC/snakemake_rules/variant_calling/vardict_pre_and_postprocessing.rule index fb157a20d..6e59989a4 100644 --- a/BALSAMIC/snakemake_rules/variant_calling/vardict_pre_and_postprocessing.rule +++ b/BALSAMIC/snakemake_rules/variant_calling/vardict_pre_and_postprocessing.rule @@ -3,16 +3,13 @@ rule bedtools_pad_bedfile: input: bed = config["panel"]["capture_kit"], - chrom = config["reference"]["genome_chrom_size"], - bam = config_model.get_final_bam_name(bam_dir = bam_dir, sample_name = tumor_sample) + chrom = config["reference"]["genome_chrom_size"] output: bed = vcf_dir + "pad_bedfile/" + "100bp_padding_" + capture_kit benchmark: Path(benchmark_dir, 'bedtools_pad_bedfile.tsv').as_posix() singularity: Path(singularity_image, config["bioinfo_tools"].get("bedtools") + ".sif").as_posix() - threads: - get_threads(cluster_config, "pad_bedfile") params: tmpdir = tempfile.mkdtemp(prefix=tmp_dir), pad_bed_dir = vcf_dir + "pad_bedfile/", @@ -30,7 +27,6 @@ bedtools slop -b 100 -i {input.bed} -g {params.pad_bed_dir}hg19.chrom.sizes \ | sort -k1,1 -k2,2n \ | bedtools merge > {params.pad_bed_dir}100bp_padding_{params.origin_bed} ; -readlink -f {input.bam}; """ rule vardict_sort: @@ -44,8 +40,6 @@ rule vardict_sort: case_name=config["analysis"]["case_id"], benchmark: Path(benchmark_dir, 'vardict_sort_' + config["analysis"]["case_id"] + ".tsv").as_posix() - threads: - get_threads(cluster_config,"vardict_sort") message: ("Sorting VarDict VCF with awk {params.case_name}") shell: @@ -71,8 +65,6 @@ rule gatk_update_vcf_sequence_dictionary: Path(benchmark_dir,"gatk_update_vcf_sequence_dictionary" + config["analysis"]["case_id"] + ".tsv").as_posix() singularity: Path(singularity_image,config["bioinfo_tools"].get("gatk") + ".sif").as_posix() - threads: - get_threads(cluster_config,"gatk_collectreadcounts") message: "Running GATK UpdateVCFSequenceDictionary on VarDict VCF." shell: diff --git a/BALSAMIC/utils/cli.py b/BALSAMIC/utils/cli.py index a146fa4d7..2af8584c9 100644 --- a/BALSAMIC/utils/cli.py +++ b/BALSAMIC/utils/cli.py @@ -17,9 +17,7 @@ from BALSAMIC import __version__ as balsamic_version from BALSAMIC.constants.analysis import FASTQ_SUFFIXES, FastqName, PonParams, SampleType from BALSAMIC.constants.cache import CacheVersion -from BALSAMIC.constants.cluster import ClusterConfigType from BALSAMIC.constants.constants import FileType -from BALSAMIC.constants.paths import CONSTANTS_DIR from BALSAMIC.models.config import FastqInfoModel, SampleInstanceModel from BALSAMIC.utils.exc import BalsamicError @@ -96,11 +94,6 @@ def get_snakefile(analysis_type, analysis_workflow="balsamic") -> str: return str(snakefile) -def get_config_path(config_type: ClusterConfigType) -> Path: - """Return a config path given its type.""" - return Path(CONSTANTS_DIR, f"{config_type}.{FileType.JSON}") - - def find_file_index(file_path): indexible_files = { ".bam": [".bam.bai", ".bai"], diff --git a/BALSAMIC/utils/logging.py b/BALSAMIC/utils/logging.py new file mode 100644 index 000000000..66b271192 --- /dev/null +++ b/BALSAMIC/utils/logging.py @@ -0,0 +1,31 @@ +import logging + + +def add_file_logging(log_file: str, logger_name: str = None): + """Adds a file handler to the specified logger without modifying its format. + + Args: + log_file (str): Path to the log file. + logger_name (str, optional): Name of the logger to configure. If None, uses the root logger. + """ + logger = logging.getLogger(logger_name) if logger_name else logging.getLogger() + + # Avoid adding duplicate file handlers + if any(isinstance(h, logging.FileHandler) for h in logger.handlers): + return + + file_handler = logging.FileHandler(log_file) + file_handler.setLevel(logging.INFO) + + # Try to copy the formatter from an existing handler + for handler in logger.handlers: + if handler.formatter: + file_handler.setFormatter(handler.formatter) + break + else: + # Fallback to a default formatter if no other handler has a formatter + file_handler.setFormatter( + logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") + ) + + logger.addHandler(file_handler) diff --git a/BALSAMIC/utils/workflowscripts.py b/BALSAMIC/utils/workflowscripts.py deleted file mode 100644 index 6fae1634f..000000000 --- a/BALSAMIC/utils/workflowscripts.py +++ /dev/null @@ -1,143 +0,0 @@ -import json -from pathlib import Path -import pandas as pd -import matplotlib.pyplot as plt -import numpy as np -import h5py -import typing - -from BALSAMIC.constants.cluster import ClusterConfigType -from BALSAMIC.utils.rule import get_threads -from BALSAMIC.utils.cli import get_config_path - - -def plot_analysis( - log_file: Path, h5_file: Path, fig_name: Path -) -> typing.Union[None, Path]: - """ - plots analysis job. - """ - - cluster_config = get_config_path(ClusterConfigType.ANALYSIS) - with open(cluster_config, "r") as f: - cluster_config = json.load(f) - - log_file_list = Path(log_file).name.split(".") - - job_name = ".".join(log_file_list[0:4]) - rule_name = log_file_list[2] - mem_per_core = 5222 - requested_cores = get_threads(cluster_config, rule_name) - case_name = log_file_list[1] - job_id = log_file_list[4].split("_")[1] - - # This is lazy and memory inefficient, but it gets the job done. - df_array = h5py.File(h5_file, "r") - node_name = list(df_array["Steps"]["batch"]["Nodes"].keys())[0] - - if not "Tasks" in list(df_array["Steps"]["batch"]["Nodes"][node_name]): - return None - - df = pd.DataFrame( - np.array(df_array["Steps"]["batch"]["Nodes"][node_name]["Tasks"]["0"]) - ) - - # Convert kilohurtz to gigahurtz - df["CPUFrequency"] = df["CPUFrequency"] / 1e6 - - # Convert kb to gb - df["RSS"] = df["RSS"] / 1e6 - df["VMSize"] = df["VMSize"] / 1e6 - - figure_title = "Case name: {}\nRule: {}\nRun time: {} seconds\nJob name: {}\nJob ID: {}".format( - case_name, rule_name, df["ElapsedTime"].iloc[-1], job_name, job_id - ) - - plt.rcParams["figure.figsize"] = [10, 10] - - fig, (cpu_ax, mem_ax, io_ax) = plt.subplots(nrows=3) - fig.suptitle(figure_title, fontsize=12, horizontalalignment="center") - - cpu_ax_color = "b" - df.plot( - y="CPUUtilization", x="ElapsedTime", ax=cpu_ax, color=cpu_ax_color, style="--" - ) - cpu_ax.set_title("CPU statistics") - cpu_ax.set_xlabel("Wall seconds") - cpu_ax.set_ylabel("Core usage (max {}%)".format(requested_cores * 100)) - cpu_ax.yaxis.label.set_color(cpu_ax_color) - cpu_ax.yaxis.label.set_color(cpu_ax_color) - cpu_ax.tick_params(axis="y", colors=cpu_ax_color) - cpu_ax.legend(loc="best", frameon=False) - cpu_ax.spines["top"].set_visible(False) - cpu_ax.spines["right"].set_visible(False) - max_cpu_line = cpu_ax.axhline(requested_cores * 100, color=cpu_ax_color, ls="-") - max_cpu_line.set_label("Max available") - - mem_ax_color = "g" - df.plot(y="VMSize", x="ElapsedTime", ax=mem_ax, color=mem_ax_color, style="--") - mem_ax.set_title("Memory statistics") - mem_ax.set_xlabel("Wall seconds") - mem_ax.set_ylabel( - "Memory usage GB (max {}GB)".format( - round(mem_per_core * requested_cores / 1024) - ) - ) - mem_ax.yaxis.label.set_color(mem_ax_color) - mem_ax.yaxis.label.set_color(mem_ax_color) - mem_ax.tick_params(axis="y", colors=mem_ax_color) - mem_ax.legend(loc="best", frameon=False) - mem_ax.spines["top"].set_visible(False) - mem_ax.spines["right"].set_visible(False) - max_cpu_line = mem_ax.axhline( - round(mem_per_core * requested_cores / 1024), color=mem_ax_color, ls="-" - ) - max_cpu_line.set_label("Max available mem") - - read_io_ax_color = "m" - read_io_ax = df.plot( - y="ReadMB", - x="ElapsedTime", - color=read_io_ax_color, - style="--", - ax=io_ax, - legend=False, - ) - read_io_ax.set_xlabel("Wall seconds") - read_io_ax.set_ylabel("Disk read (MANIFEST.inB)") - read_io_ax.yaxis.label.set_color(read_io_ax_color) - read_io_ax.yaxis.label.set_color(read_io_ax_color) - read_io_ax.tick_params(axis="y", colors=read_io_ax_color) - read_io_ax.spines["top"].set_visible(False) - - write_io_ax = read_io_ax.twinx() - write_io_ax_color = "olive" - write_io_ax = df.plot( - y="WriteMB", - x="ElapsedTime", - ax=write_io_ax, - color=write_io_ax_color, - style="--", - legend=False, - ) - write_io_ax.set_title("Disk I/O statistics") - write_io_ax.set_xlabel("Wall seconds") - write_io_ax.set_ylabel("Disk write (MB)") - write_io_ax.yaxis.label.set_color(write_io_ax_color) - write_io_ax.yaxis.label.set_color(write_io_ax_color) - write_io_ax.tick_params(axis="y", colors=write_io_ax_color) - write_io_ax.yaxis.tick_right() - write_io_ax.spines["top"].set_visible(False) - - handles, labels = [], [] - for ax in [write_io_ax, read_io_ax]: - for h, l in zip(*ax.get_legend_handles_labels()): - handles.append(h) - labels.append(l) - - plt.legend(handles, labels, loc="best", ncol=len(handles), frameon=False) - - plt.tight_layout() - plt.savefig(fig_name, dpi=300) - plt.close() - return fig_name diff --git a/BALSAMIC/workflows/PON.smk b/BALSAMIC/workflows/PON.smk index 9b5971f7f..d91f4a95e 100644 --- a/BALSAMIC/workflows/PON.smk +++ b/BALSAMIC/workflows/PON.smk @@ -10,7 +10,7 @@ from typing import Dict, List from BALSAMIC.constants.analysis import FastqName, Gender, PONWorkflow, SampleType, SequencingType from BALSAMIC.constants.paths import BALSAMIC_DIR -from BALSAMIC.constants.workflow_params import WORKFLOW_PARAMS, SLEEP_BEFORE_START +from BALSAMIC.constants.workflow_params import WORKFLOW_PARAMS from BALSAMIC.models.config import ConfigModel from BALSAMIC.models.params import BalsamicWorkflowConfig from BALSAMIC.utils.exc import BalsamicError @@ -65,7 +65,6 @@ sequencing_type = config_model.analysis.sequencing_type rules_to_include = [] -rules_to_include.append("snakemake_rules/misc/sleep.rule") if sequencing_type == SequencingType.TARGETED: rules_to_include.append("snakemake_rules/concatenation/concatenation.rule") rules_to_include.append("snakemake_rules/quality_control/fastp_tga.rule") diff --git a/BALSAMIC/workflows/QC.smk b/BALSAMIC/workflows/QC.smk index 912ef9da7..3fefc0e05 100644 --- a/BALSAMIC/workflows/QC.smk +++ b/BALSAMIC/workflows/QC.smk @@ -9,7 +9,7 @@ from typing import Dict, List from BALSAMIC.constants.analysis import AnalysisType, FastqName, SampleType from BALSAMIC.constants.paths import BALSAMIC_DIR from BALSAMIC.constants.rules import SNAKEMAKE_RULES -from BALSAMIC.constants.workflow_params import WORKFLOW_PARAMS, SLEEP_BEFORE_START +from BALSAMIC.constants.workflow_params import WORKFLOW_PARAMS from BALSAMIC.models.config import ConfigModel from BALSAMIC.models.params import BalsamicWorkflowConfig from BALSAMIC.utils.cli import check_executable, generate_h5 diff --git a/BALSAMIC/workflows/balsamic.smk b/BALSAMIC/workflows/balsamic.smk index 6b26b4fb8..1e8f28748 100644 --- a/BALSAMIC/workflows/balsamic.smk +++ b/BALSAMIC/workflows/balsamic.smk @@ -30,7 +30,6 @@ from BALSAMIC.constants.variant_filters import ( ) from BALSAMIC.constants.workflow_params import ( WORKFLOW_PARAMS, - SLEEP_BEFORE_START, ) from BALSAMIC.models.config import ConfigModel from BALSAMIC.models.params import BalsamicWorkflowConfig, StructuralVariantFilters @@ -57,8 +56,6 @@ from BALSAMIC.utils.rule import ( get_variant_callers, get_vcf, ) -from BALSAMIC.utils.workflowscripts import plot_analysis -from pypdf import PdfWriter from snakemake.exceptions import RuleException, WorkflowError from yapf.yapflib.yapf_api import FormatFile @@ -334,10 +331,6 @@ if "swegen_sv_frequency" in config["reference"]: if config["analysis"]["sequencing_type"] != "wgs": capture_kit = os.path.split(config["panel"]["capture_kit"])[1] -# explicitly check if cluster_config dict has zero keys. -if len(cluster_config.keys()) == 0: - cluster_config = config - if "hg38" in config["reference"]["reference_genome"]: config["reference"]["genome_version"] = "hg38" elif "canfam3" in config["reference"]["reference_genome"]: @@ -538,6 +531,14 @@ quality_control_results = [ Path(qc_dir, "multiqc_report.html").as_posix(), Path(qc_dir, "multiqc_data/multiqc_data.json").as_posix(), ] +quality_control_results.append(expand(qc_dir + "{sample_type}.{sample}.d4", sample_type="tumor", sample=tumor_sample)) +if config["analysis"]["analysis_type"] == "paired": + quality_control_results.append(expand(qc_dir + "{sample_type}.{sample}.d4", sample_type="normal", sample=normal_sample)) + +if config["analysis"]["analysis_workflow"] == "balsamic-umi": + quality_control_results.append(expand(qc_dir + "{sample_type}.{sample}.umi.d4",sample_type="tumor",sample=tumor_sample)) + if config["analysis"]["analysis_type"] == "paired": + quality_control_results.append(expand(qc_dir + "{sample_type}.{sample}.umi.d4",sample_type="normal",sample=normal_sample)) # Analysis results analysis_specific_results = [] @@ -715,40 +716,6 @@ if ( LOG.info(f"Following outputs will be delivered {analysis_specific_results}") -if "benchmark_plots" in config: - log_dir = config["analysis"]["log"] - if not check_executable("sh5util"): - LOG.warning("sh5util executable does not exist. Won't be able to plot analysis") - else: - # Make individual plot per job - for log_file in Path(log_dir).glob("*.err"): - log_file_list = log_file.name.split(".") - job_name = ".".join(log_file_list[0:4]) - job_id = log_file_list[4].split("_")[1] - h5_file = generate_h5(job_name, job_id, log_file.parent) - benchmark_plot = Path(benchmark_dir, job_name + ".pdf") - - log_file_plot = plot_analysis(log_file, h5_file, benchmark_plot) - logging.debug( - "Plot file for {} available at: {}".format( - log_file.as_posix(), log_file_plot - ) - ) - - # Merge plots into one based on rule name - for my_rule in vars(rules).keys(): - my_rule_pdf = PdfWriter() - my_rule_plots = list() - for plots in Path(benchmark_dir).glob(f"BALSAMIC*.{my_rule}.*.pdf"): - my_rule_pdf.append(plots.as_posix()) - my_rule_plots.append(plots) - my_rule_pdf.write(Path(benchmark_dir, my_rule + ".pdf").as_posix()) - my_rule_pdf.close() - - # Delete previous plots after merging - for plots in my_rule_plots: - plots.unlink() - if "delivery" in config: wildcard_dict = { "sample": sample_names, @@ -809,26 +776,42 @@ rule all: params: tmp_dir=tmp_dir, case_name=config["analysis"]["case_id"], + status_file=Path(get_result_dir(config), "analysis_status.txt").as_posix(), message: "Finalizing analysis for {params.case_name}" run: import datetime import shutil - from BALSAMIC.utils.metrics import validate_qc_metrics - # Perform validation of extracted QC metrics + status = "SUCCESS" + + error_message = "" try: validate_qc_metrics(read_yaml(input[0])) except ValueError as val_exc: LOG.error(val_exc) - raise BalsamicError - - # Remove temporary directory tree + error_message = str(val_exc) + status = "QC_VALIDATION_FAILED" + except Exception as exc: + LOG.error(exc) + error_message = str(exc) + status = "UNKNOWN_ERROR" + + # Clean up tmp try: shutil.rmtree(params.tmp_dir) except OSError as e: print("Error: %s - %s." % (e.filename, e.strerror)) - # Finish timestamp file + # Write status to file + with open(params.status_file,"w") as status_fh: + status_fh.write(status + "\n") + status_fh.write(error_message + "\n") + + # Always write finish file if we've reached here write_finish_file(file_path=output.finish_file) + + # Raise to trigger rule failure if needed + if status != "SUCCESS": + raise ValueError(f"Final rule failed with status: {status}") \ No newline at end of file diff --git a/CHANGELOG.rst b/CHANGELOG.rst index 580213b99..654a8ebd2 100644 --- a/CHANGELOG.rst +++ b/CHANGELOG.rst @@ -4,19 +4,27 @@ Added: ^^^^^^ +* sbatch script for snakemake sequential job submission https://github.com/Clinical-Genomics/BALSAMIC/pull/1558 +* logfile for balsamic wrapper https://github.com/Clinical-Genomics/BALSAMIC/pull/1558 +* analysis status text file for easier prodbioinfo handling https://github.com/Clinical-Genomics/BALSAMIC/pull/1558 +* requested memory to each rule https://github.com/Clinical-Genomics/BALSAMIC/pull/1558 +* added d4file to all workflows for use in chanjo2 https://github.com/Clinical-Genomics/BALSAMIC/pull/1592 Changed: ^^^^^^^^ - +* changed snakemake from running immediate submit to submitting snakemake job to cluster https://github.com/Clinical-Genomics/BALSAMIC/pull/1558 * Updated ensembl vep to version 113.4 https://github.com/Clinical-Genomics/BALSAMIC/pull/1567 * Updated rank model to use GNOMADAF_popmax instead of deprecated gnomAD_AF https://github.com/Clinical-Genomics/BALSAMIC/pull/1567 Removed: ^^^^^^^^ +* removed immediate submit functionality https://github.com/Clinical-Genomics/BALSAMIC/pull/1558 +* removed unused code for benchmark plotting https://github.com/Clinical-Genomics/BALSAMIC/pull/1558 +* removed functionality to disable variant callers https://github.com/Clinical-Genomics/BALSAMIC/pull/1558 + Fixed: ^^^^^^ - * Fixed varcall_py27 container for Manta https://github.com/Clinical-Genomics/BALSAMIC/pull/1567 diff --git a/container_tests/d4tools/d4tools.sh b/container_tests/d4tools/d4tools.sh new file mode 100644 index 000000000..9ad2519b7 --- /dev/null +++ b/container_tests/d4tools/d4tools.sh @@ -0,0 +1,15 @@ +#!/bin/bash +# Test if commands exist + +valid_commands=( "d4tools") + +for valid_command in "${valid_commands[@]}" +do + if ! command -v "${valid_command}" &> /dev/null + then + echo "${valid_command} could not be found" + exit 1 + else + echo "${valid_command} command is found and valid" + fi +done \ No newline at end of file diff --git a/tests/commands/init/test_init.py b/tests/commands/init/test_init.py index 2403a9f55..03931c82e 100644 --- a/tests/commands/init/test_init.py +++ b/tests/commands/init/test_init.py @@ -46,36 +46,6 @@ def test_init_hg( assert result.exit_code == EXIT_SUCCESS -def test_init_canfam( - invoke_cli: partial, - tmp_path: Path, - cosmic_key: str, - config_json: str, - reference_graph: str, -): - """Test Balsamic canine workflow init command.""" - - # GIVEN a temporary output directory and a COSMIC key - - # WHEN invoking the init command - result: Result = invoke_cli( - [ - "init", - "--out-dir", - tmp_path.as_posix(), - "--genome-version", - GenomeVersion.CanFam3, - ] - ) - - # THEN the canine reference generation workflow should have successfully started - assert Path(tmp_path, balsamic_version, GenomeVersion.CanFam3, config_json).exists() - assert Path( - tmp_path, balsamic_version, GenomeVersion.CanFam3, reference_graph - ).exists() - assert result.exit_code == EXIT_SUCCESS - - def test_init_hg_no_cosmic_key(invoke_cli: partial, tmp_path: Path, cosmic_key: str): """Test Balsamic init command when a COSMIC key is not provided.""" diff --git a/tests/commands/run/test_run_analysis.py b/tests/commands/run/test_run_analysis.py index e3a594102..27efa5032 100644 --- a/tests/commands/run/test_run_analysis.py +++ b/tests/commands/run/test_run_analysis.py @@ -3,54 +3,76 @@ import json from unittest import mock from pathlib import Path +from unittest.mock import patch, MagicMock def test_run_analysis_dragen(invoke_cli, tumor_only_wgs_config): - # GIVEN a WGS config file + # GIVEN a Mock subprocess.run to simulate successful sbatch submission + mock_result = MagicMock() + mock_result.stdout = "Submitted batch job 12345" + mock_result.returncode = 0 + + # GIVEN a tumor-only config file # WHEN running analysis - result = invoke_cli(["run", "analysis", "-s", tumor_only_wgs_config, "--dragen"]) - # THEN it should run without any error + with patch( + "BALSAMIC.models.sbatchsubmitter.subprocess.run", return_value=mock_result + ) as mock_run: + result = invoke_cli( + ["run", "analysis", "-s", tumor_only_wgs_config, "--dragen"] + ) + mock_run.assert_called_once() + assert "sbatch" in mock_run.call_args[0][0] assert result.exit_code == 0 -def test_run_analysis_disable_variant_caller(invoke_cli, tumor_only_config): - # GIVEN a tumor-only config file and variant caller to disable - disabled_varcaller = "mutect" +def test_run_analysis_tumor_normal_dry_run(invoke_cli, tumor_normal_config): + # GIVEN a Mock subprocess.run to simulate successful sbatch submission + mock_result = MagicMock() + mock_result.stdout = "Submitted batch job 12345" + mock_result.returncode = 0 + # GIVEN a tumor-normal config file # WHEN running analysis - result = invoke_cli( - [ - "run", - "analysis", - "-s", - tumor_only_config, - "--disable-variant-caller", - disabled_varcaller, - ] - ) + with patch( + "BALSAMIC.models.sbatchsubmitter.subprocess.run", return_value=mock_result + ) as mock_run: + result = invoke_cli(["run", "analysis", "-s", tumor_normal_config]) + mock_run.assert_called_once() + assert "sbatch" in mock_run.call_args[0][0] # THEN it should run without any error assert result.exit_code == 0 - assert disabled_varcaller not in result.output -def test_run_analysis_tumor_normal_dry_run(invoke_cli, tumor_normal_config): +def test_run_analysis_tumor_normal_run_interactively(invoke_cli, tumor_normal_config): # GIVEN a tumor-normal config file # WHEN running analysis - result = invoke_cli(["run", "analysis", "-s", tumor_normal_config]) + result = invoke_cli( + ["run", "analysis", "-s", tumor_normal_config, "--run-interactively"] + ) - # THEN it should run without any error + # THEN it should run without error assert result.exit_code == 0 + # THEN the start interactive string should be printed + assert "Starting balsamic workflow interactively" in result.output def test_run_analysis_tumor_only_dry_run(invoke_cli, tumor_only_config): + # GIVEN a Mock subprocess.run to simulate successful sbatch submission + mock_result = MagicMock() + mock_result.stdout = "Submitted batch job 12345" + mock_result.returncode = 0 + # GIVEN a tumor-only config file # WHEN running analysis - result = invoke_cli( - ["run", "analysis", "-s", tumor_only_config, "--mail-type", "FAIL"] - ) + with patch( + "BALSAMIC.models.sbatchsubmitter.subprocess.run", return_value=mock_result + ) as mock_run: + result = invoke_cli(["run", "analysis", "-s", tumor_only_config]) + mock_run.assert_called_once() + assert "sbatch" in mock_run.call_args[0][0] # THEN it should run without any error assert result.exit_code == 0 @@ -83,7 +105,6 @@ def test_run_analysis_create_dir(invoke_cli, tumor_only_config): "-s", tumor_only_config, "-r", - "--benchmark", "--account", "development", ] diff --git a/tests/commands/test_cli.py b/tests/commands/test_cli.py index 7925dd47e..6c2ca07c9 100644 --- a/tests/commands/test_cli.py +++ b/tests/commands/test_cli.py @@ -71,7 +71,7 @@ def test_run_analysis(invoke_cli): assert "--snakefile" in result.output assert "--sample-config" in result.output assert "--run-mode" in result.output - assert "--cluster-config" in result.output + assert "--workflow-profile" in result.output assert "--run-analysis" in result.output diff --git a/tests/conftest.py b/tests/conftest.py index 22bda6cda..eb3862db6 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -5,15 +5,15 @@ from functools import partial from pathlib import Path from typing import Any, Dict, List -from unittest import mock import pytest from _pytest.tmpdir import TempPathFactory from click.testing import CliRunner from pydantic_core import Url +from unittest.mock import MagicMock +from snakemake.resources import DefaultResources from BALSAMIC import __version__ as balsamic_version -from BALSAMIC.assets.scripts.sex_prediction_tga import predict_sex_main from BALSAMIC.assets.scripts.preprocess_gens import cli as gens_preprocessing_cli from BALSAMIC.commands.base import cli from BALSAMIC.constants.analysis import ( @@ -23,16 +23,9 @@ RunMode, ) from BALSAMIC.constants.cache import REFERENCE_FILES, DockerContainers, GenomeVersion -from BALSAMIC.constants.cluster import ( - QOS, - ClusterAccount, - ClusterConfigType, - ClusterProfile, - ClusterMailType, -) +from BALSAMIC.constants.cluster import QOS, ClusterAccount from BALSAMIC.constants.constants import FileType from BALSAMIC.constants.paths import ( - CONSTANTS_DIR, FASTQ_TEST_INFO, TEST_DATA_DIR, SENTIEON_TNSCOPE_MODEL, @@ -46,8 +39,8 @@ References, ReferencesHg, ) +from BALSAMIC.models.sbatchsubmitter import SbatchSubmitter from BALSAMIC.models.config import ConfigModel -from BALSAMIC.models.scheduler import Scheduler from BALSAMIC.models.snakemake import SingularityBindPath, SnakemakeExecutable from BALSAMIC.utils.io import read_json, read_yaml, write_json from .helpers import ConfigHelper, Map @@ -328,12 +321,28 @@ def environ(): return "os.environ" +@pytest.fixture +def submitter(tmp_path): + return SbatchSubmitter( + case_id="test_case", + script_path=tmp_path, + result_path=tmp_path, + scan_finished_jobid_status=tmp_path, + log_path=tmp_path, + account="dummy_account", + qos="low", + max_run_hours=2, + snakemake_executable=MagicMock( + get_command=lambda: "snakemake --snakefile Snakefile" + ), + logger=MagicMock(), + ) + + @pytest.fixture(scope="session") -def cluster_analysis_config_path() -> str: - """Return cluster analysis configuration file.""" - return Path( - CONSTANTS_DIR, f"{ClusterConfigType.ANALYSIS}.{FileType.JSON}" - ).as_posix() +def default_snakemake_resources() -> DefaultResources: + """Return snakemake default resource.""" + return DefaultResources(["threads=1", "mem_mb=4000", "runtime=60"]) @pytest.fixture(scope="session") @@ -2181,6 +2190,7 @@ def fixture_develop_containers() -> Dict[str, str]: DockerContainers.CNVKIT: "docker://clinicalgenomics/balsamic:develop-cnvkit", DockerContainers.COVERAGE_QC: "docker://clinicalgenomics/balsamic:develop-coverage_qc", DockerContainers.DELLY: "docker://clinicalgenomics/balsamic:develop-delly", + DockerContainers.D4TOOLS: "docker://clinicalgenomics/balsamic:develop-d4tools", DockerContainers.CADD: "docker://clinicalgenomics/balsamic:develop-cadd", DockerContainers.HTSLIB: "docker://clinicalgenomics/balsamic:develop-htslib", DockerContainers.PURECN: "docker://clinicalgenomics/balsamic:develop-purecn", @@ -2496,7 +2506,7 @@ def fixture_singularity_bind_path( @pytest.fixture(scope="session", name="snakemake_options_command") def fixture_snakemake_options_command() -> List[str]: - """Return mocked singularity bind path model.""" + """Return mocked snakemake options.""" return ["--cores", "36"] @@ -2519,13 +2529,13 @@ def fixture_snakemake_executable_data( return { "account": ClusterAccount.DEVELOPMENT.value, "case_id": case_id_tumor_only, - "cluster_config_path": reference_file, "config_path": reference_file, - "disable_variant_caller": "tnscope", "log_dir": session_tmp_path, - "mail_user": mail_user_option, - "profile": ClusterProfile.SLURM, + "cluster_profile": reference_file, + "cluster_job_status_script": reference_file, + "workflow_profile": reference_file, "qos": QOS.HIGH, + "dragen": True, "quiet": True, "run_analysis": True, "run_mode": RunMode.CLUSTER, @@ -2557,17 +2567,14 @@ def fixture_snakemake_executable_validated_data( """Return snakemake model expected data.""" return { "account": ClusterAccount.DEVELOPMENT.value, - "benchmark": False, "case_id": case_id_tumor_only, - "cluster_config_path": reference_file, "config_path": reference_file, - "disable_variant_caller": "disable_variant_caller=tnscope", - "dragen": False, + "dragen": True, "force": False, "log_dir": session_tmp_path, - "mail_type": None, - "mail_user": mail_user_option, - "profile": ClusterProfile.SLURM, + "cluster_profile": reference_file, + "cluster_job_status_script": reference_file, + "workflow_profile": reference_file, "qos": QOS.HIGH, "quiet": True, "run_analysis": True, @@ -2584,72 +2591,3 @@ def fixture_snakemake_executable_validated_data( def job_id() -> str: """Return cluster job identifier.""" return "12345" - - -@pytest.fixture(scope="session") -def job_properties() -> Dict[str, Any]: - """Cluster job properties.""" - return { - "cluster": { - "partition": "core", - "n": "1", - "time": "10:00:00", - "mem": "1000", - "mail_type": ClusterMailType.ALL.value, - } - } - - -@pytest.fixture(scope="session") -def scheduler_data( - case_id_tumor_only: str, - job_properties: Dict[str, Any], - empty_file: Path, - empty_dir: Path, - mail_user_option: str, -) -> Dict[str, Any]: - """Return raw scheduler model data.""" - return { - "account": ClusterAccount.DEVELOPMENT.value, - "case_id": case_id_tumor_only, - "dependencies": ["1", "2", "3"], - "job_properties": job_properties, - "job_script": empty_file.as_posix(), - "log_dir": empty_dir.as_posix(), - "mail_user": mail_user_option, - "mail_type": ClusterMailType.FAIL.value, - "profile": ClusterProfile.SLURM.value, - "qos": QOS.HIGH, - } - - -@pytest.fixture(scope="session") -def scheduler_validated_data( - case_id_tumor_only: str, - job_properties: Dict[str, Any], - empty_file: Path, - empty_dir: Path, - mail_user_option: str, -) -> Dict[str, Any]: - """Return scheduler model validated data.""" - return { - "account": f"--account {ClusterAccount.DEVELOPMENT}", - "benchmark": False, - "case_id": case_id_tumor_only, - "dependencies": ["1", "2", "3"], - "job_properties": job_properties, - "job_script": empty_file, - "log_dir": empty_dir, - "mail_type": "--mail-type FAIL", - "mail_user": f"--mail-user {mail_user_option}", - "profile": ClusterProfile.SLURM, - "profiling_interval": 10, - "profiling_type": "task", - "qos": "--qos high", - } - - -@pytest.fixture(scope="session") -def scheduler_model(scheduler_data: Dict[str, Any]) -> Scheduler: - """Return scheduler pydantic model.""" - return Scheduler(**scheduler_data) diff --git a/tests/models/test_sbatchsubmitter.py b/tests/models/test_sbatchsubmitter.py new file mode 100644 index 000000000..7b637d1fc --- /dev/null +++ b/tests/models/test_sbatchsubmitter.py @@ -0,0 +1,73 @@ +"""Test Sbatch submitter class""" + +from unittest.mock import patch, mock_open, MagicMock +from pathlib import Path +import subprocess + + +def test_create_sbatch_script(submitter): + """Test that `create_sbatch_script` writes an sbatch script file. + + Ensures that the method attempts to open the target script file for writing + and that content is written to it. + """ + with patch("builtins.open", mock_open()) as mock_file: + submitter.create_sbatch_script() + # Check that the file was attempted to be written + mock_file.assert_called_once_with(submitter.sbatch_script_path, "w") + handle = mock_file() + handle.write.assert_called() # Basic check that something was written + + +def test_submit_job_success(submitter): + """Test `submit_job` returns the job ID on successful sbatch submission. + + Simulates a valid SLURM response and verifies that the job ID is correctly parsed + and logged. + """ + fake_output = "Submitted batch job 12345" + with patch("subprocess.run") as mock_run: + mock_run.return_value.returncode = 0 + mock_run.return_value.stdout = fake_output + job_id = submitter.submit_job() + assert job_id == "12345" + submitter.log.info.assert_any_call( + "Job submitted successfully with Job ID: 12345" + ) + + +def test_submit_job_failure(submitter): + """Test `submit_job` handles a failed sbatch call.""" + error = subprocess.CalledProcessError( + returncode=1, cmd="sbatch", stderr="Error submitting job" + ) + + with patch("BALSAMIC.models.sbatchsubmitter.subprocess.run", side_effect=error): + job_id = submitter.submit_job() + assert job_id is None + submitter.log.error.assert_called_once() + + +def test_submit_job_no_job_id(submitter): + """Test `submit_job` handles a successful sbatch call with unexpected output. + + Verifies that if sbatch output does not include a recognizable job ID, + a warning is logged and None is returned. + """ + with patch("subprocess.run") as mock_run: + mock_run.return_value.returncode = 0 + mock_run.return_value.stdout = "Some unrelated output" + job_id = submitter.submit_job() + assert job_id is None + submitter.log.warning.assert_called_once() + + +def test_write_job_id_yaml(submitter): + """Test that `write_job_id_yaml` writes the job ID to a YAML file. + + Confirms that after calling the method, the expected YAML file exists + in the result path. + """ + job_id = "12345" + submitter.write_job_id_yaml(job_id) + assert Path(submitter.result_path, "slurm_jobids.yaml").is_file() diff --git a/tests/models/test_scheduler_models.py b/tests/models/test_scheduler_models.py deleted file mode 100644 index f1744d6dd..000000000 --- a/tests/models/test_scheduler_models.py +++ /dev/null @@ -1,273 +0,0 @@ -"""Tests for the Scheduler model related methods.""" -import logging -import subprocess -from pathlib import Path -from typing import Any, Dict -from unittest import mock - -import pytest -from _pytest.logging import LogCaptureFixture -from pydantic import ValidationError - -from BALSAMIC.constants.constants import EXIT_SUCCESS -from BALSAMIC.models.scheduler import Scheduler - - -def test_scheduler_model( - scheduler_data: Dict[str, Any], scheduler_validated_data: Dict[str, Any] -): - """Test immediate submit scheduler model initialisation.""" - - # GIVEN a scheduler dictionary data - - # WHEN initialising the model - scheduler_model: Scheduler = Scheduler(**scheduler_data) - - # THEN the model should have been correctly built - assert scheduler_model.model_dump() == scheduler_validated_data - - -def test_scheduler_model_empty(): - """Test scheduler empty model initialisation.""" - - # GIVEN no input for the scheduler model - - # WHEN initialising the model - with pytest.raises(ValidationError): - # THEN an empty model should raise a ValidationError - Scheduler() - - -def test_get_dependency_option(scheduler_model: Scheduler): - """Test get dependency option for cluster submission.""" - - # GIVEN a scheduler model - - # WHEN retrieving the formatted option - dependency_option: str = scheduler_model.get_dependency_option() - - # THEN the error option should be appended to the command - assert "--dependency afterok:1,2,3" in dependency_option - - -def test_get_error_option(scheduler_model: Scheduler): - """Test get error option for cluster submission.""" - - # GIVEN a scheduler model - - # WHEN retrieving the formatted option - error_option: str = scheduler_model.get_error_option() - - # THEN the error option should be appended to the command - assert "--error" in error_option - - -def test_get_output_option(scheduler_model: Scheduler): - """Test parsing of the stdout option.""" - - # GIVEN a scheduler model - - # WHEN getting the output option from the scheduler - output_option: str = scheduler_model.get_output_option() - - # THEN the output option should be appended to the command - assert "--output" in output_option - - -def test_get_profile_option(scheduler_model: Scheduler): - """Test get scheduler profile option.""" - - # GIVEN a scheduler model with a benchmarking option - scheduler_model.benchmark = True - - # WHEN retrieving the profile option - profile_option: str = scheduler_model.get_profile_option() - - # THEN the profile option should be appended to the command - assert "--profile" in profile_option - - -def test_get_acctg_freq_option(scheduler_model: Scheduler): - """Test get profiling sampling intervals.""" - - # GIVEN a scheduler model with a benchmarking option - scheduler_model.benchmark = True - - # WHEN getting the profiling option - freq_option: str = scheduler_model.get_acctg_freq_option() - - # THEN the profiling option should be appended to the command - assert "--acctg-freq" in freq_option - - -def test_get_ntasks_option(scheduler_model: Scheduler): - """Test number of tasks for cluster resources allocation.""" - - # GIVEN a scheduler model - - # WHEN getting the ntasks cluster resource option - ntasks_option: str = scheduler_model.get_ntasks_option() - - # THEN the number of tasks option should be returned - assert "--ntasks" in ntasks_option - - -def test_get_time_option(scheduler_model: Scheduler): - """Test get time resource from job properties.""" - - # GIVEN a scheduler model - - # WHEN getting the time cluster resource option - time_option: str = scheduler_model.get_time_option() - - # THEN the number of tasks option should be returned - assert "--time" in time_option - - -def test_get_memory_option(scheduler_model: Scheduler): - """Test get memory resource from job properties.""" - - # GIVEN a scheduler model - - # WHEN getting the memory cluster resource option - mem_option: str = scheduler_model.get_memory_option() - - # THEN the mem be returned - assert "--mem" in mem_option - - -def test_get_partition_option(scheduler_model: Scheduler): - """Test get partition from cluster properties.""" - - # GIVEN a scheduler model - - # WHEN getting the partition cluster resource option - partition_option: str = scheduler_model.get_partition_option() - - # THEN the partition option should be returned - assert "--partition" in partition_option - - -def test_get_empty_options(scheduler_data: Dict[str, Any]): - """Test parsing of empty options.""" - - # GIVEN scheduler data with empty options - scheduler_data["qos"] = None - scheduler_data["job_properties"]["cluster"]["n"] = None - scheduler_data["job_properties"]["cluster"]["time"] = None - scheduler_data["job_properties"]["cluster"]["partition"] = None - scheduler_data["dependencies"] = None - - # WHEN initialising the model - scheduler_model: Scheduler = Scheduler(**scheduler_data) - - # THEN the empty options should be returned as empty string - assert scheduler_model.qos == "" - assert scheduler_model.get_ntasks_option() == "" - assert scheduler_model.get_time_option() == "" - assert scheduler_model.get_partition_option() == "" - assert scheduler_model.get_dependency_option() == "" - - -def test_get_job_id_from_stdout(job_id: str, scheduler_model: Scheduler): - """Test get job identifier from the scheduler standard output.""" - - # GIVEN a scheduler model and a stdout with a job identifier - stdout: str = f"Submitted batch job {job_id}" - - # WHEN retrieving the job id - retrieved_job_id: str = scheduler_model.get_job_id_from_stdout(stdout) - - # THEN the expected job identifier should be returned - assert retrieved_job_id == job_id - - -def test_get_job_id_from_stdout_error( - job_id: str, scheduler_model: Scheduler, caplog: LogCaptureFixture -): - """Test get job identifier from the scheduler incorrect standard output.""" - - # GIVEN a scheduler model and an incorrect stdout with a job identifier - stdout: str = "Submitted batch job no ID" - - # WHEN retrieving the job id - with pytest.raises(ValueError): - scheduler_model.get_job_id_from_stdout(stdout) - - # THEN the expected error should be catched - assert "Failed to extract job ID from the submission result" in caplog.text - - -def test_write_job_log_data(job_id: str, scheduler_model: Scheduler): - """Test writing job log data.""" - - # GIVEN a scheduler model, a case id, a job id, and a scheduler command - command: str = "sbatch --acount development --dependency 'afterok:00001'" - - # WHEN writing to the log file - scheduler_model.write_job_log_data(job_id=job_id, command=command) - - # THEN the log file should have been created - log_file: Path = Path(scheduler_model.log_dir, f"{scheduler_model.case_id}.sacct") - assert log_file.is_file() - with open(log_file, "r") as file: - assert f"{job_id},{command}" in file.read() - - -def test_get_command( - scheduler_model: Scheduler, scheduler_validated_data: Dict[str, Any] -): - """Test scheduler command build.""" - - # GIVEN a scheduler model and a validated data dictionary - - # WHEN getting the command to be submitted - command: str = scheduler_model.get_command() - - # THEN the mandatory options should be appended to the command - for option, value in scheduler_validated_data.items(): - if option not in [ - "benchmark", - "case_id", - "dependencies", - "job_properties", - "profile", - ]: - assert str(value) in command - - -def test_submit_job(job_id: str, scheduler_model: Scheduler, caplog: LogCaptureFixture): - """Test job submission to the cluster.""" - caplog.set_level(logging.INFO) - - # GIVEN a scheduler model and the expected standard output command - stdout: str = f"Submitted batch job {job_id}" - - # WHEN submitting a cluster job - with mock.patch.object( - subprocess, - "run", - return_value=subprocess.CompletedProcess( - args="", returncode=EXIT_SUCCESS, stdout=stdout, stderr="" - ), - ): - scheduler_model.submit_job() - - # THEN the cluster job should be submitted and the log file created - assert f"Submitted job with ID: {job_id}" in caplog.text - assert Path(scheduler_model.log_dir, f"{scheduler_model.case_id}.sacct").is_file() - - -def test_submit_job_error( - job_id: str, scheduler_model: Scheduler, caplog: LogCaptureFixture -): - """Test job submission to the cluster when an exception is raised.""" - - # GIVEN a scheduler model - - # WHEN submitting a cluster job that has not been mocked - with pytest.raises(Exception): - scheduler_model.submit_job() - - # THEN the cluster job should fail to be submitted - assert f"Failed to submit: {scheduler_model.get_command()}" in caplog.text diff --git a/tests/models/test_snakemake_models.py b/tests/models/test_snakemake_models.py index e85dd6699..3f323fe7a 100644 --- a/tests/models/test_snakemake_models.py +++ b/tests/models/test_snakemake_models.py @@ -5,8 +5,7 @@ from typing import Any, Dict import pytest -from BALSAMIC.constants.cluster import MAX_JOBS, QOS, ClusterAccount, ClusterProfile -from BALSAMIC.constants.paths import IMMEDIATE_SUBMIT_PATH +from BALSAMIC.constants.cluster import MAX_JOBS, QOS, ClusterAccount from BALSAMIC.models.snakemake import SingularityBindPath, SnakemakeExecutable from pydantic import ValidationError @@ -61,25 +60,22 @@ def test_get_config_files_option( # GIVEN a snakemake executable model with a mocked config file # WHEN calling the method - config_files_option: str = snakemake_executable.get_config_files_option() + config_files_option: str = snakemake_executable.get_config_file_option() # THEN the expected format should be returned - assert ( - config_files_option - == f"--configfiles {reference_file.as_posix()} {reference_file.as_posix()}" - ) + assert config_files_option == f"--configfile {reference_file.as_posix()}" def test_get_config_options(snakemake_executable: SnakemakeExecutable): """Test formatting of the snakemake config options.""" - # GIVEN a snakemake executable model disabling some variant callers + # GIVEN a snakemake executable model with a dragen flag # WHEN calling the method snakemake_config_options: str = snakemake_executable.get_config_options() # THEN the expected format should be returned - assert snakemake_config_options == "--config disable_variant_caller=tnscope" + assert snakemake_config_options == "--config dragen=True" def test_get_dragen_flag(snakemake_executable: SnakemakeExecutable): @@ -176,32 +172,8 @@ def test_get_snakemake_options_command(snakemake_executable: SnakemakeExecutable assert snakemake_options_command == "--cores 36" -def test_get_cluster_submit_command( - case_id_tumor_only: str, - mail_user_option: str, - session_tmp_path: Path, - snakemake_executable: SnakemakeExecutable, -): - """Test formatting of the cluster submit command.""" - - # GIVEN a snakemake executable model with working environment paths - - # WHEN calling the method - snakemake_cluster_submit_command: str = ( - snakemake_executable.get_cluster_submit_command() - ) - - # THEN the expected format should be returned - assert snakemake_cluster_submit_command == ( - f"'{sys.executable} {IMMEDIATE_SUBMIT_PATH.as_posix()} --account {ClusterAccount.DEVELOPMENT} " - f"--log-dir {session_tmp_path} --mail-user {mail_user_option} --profile {ClusterProfile.SLURM} " - f"--qos {QOS.HIGH} --script-dir {session_tmp_path} {case_id_tumor_only} {{dependencies}}'" - ) - - def test_get_snakemake_cluster_options( case_id_tumor_only: str, - mail_user_option: str, reference_file: Path, session_tmp_path: Path, snakemake_executable: SnakemakeExecutable, @@ -218,17 +190,13 @@ def test_get_snakemake_cluster_options( # THEN the expected format should be returned assert ( snakemake_cluster_options - == f"--immediate-submit -j {MAX_JOBS} --jobname BALSAMIC.{case_id_tumor_only}.{{rulename}}.{{jobid}}.sh " - f"--cluster-config {reference_file.as_posix()} --cluster '{sys.executable} {IMMEDIATE_SUBMIT_PATH.as_posix()} " - f"--account {ClusterAccount.DEVELOPMENT} --log-dir {session_tmp_path} --mail-user {mail_user_option} " - f"--profile {ClusterProfile.SLURM} --qos {QOS.HIGH} --script-dir {session_tmp_path} {case_id_tumor_only} " - "{dependencies}'" + == f"-j {MAX_JOBS} --jobname BALSAMIC.{case_id_tumor_only}.{{rulename}}.{{jobid}}.sh " + f"--profile {reference_file.as_posix()} --workflow-profile {reference_file.as_posix()}" ) def test_get_snakemake_command( case_id_tumor_only: str, - mail_user_option: str, reference_file: Path, session_tmp_path: Path, snakemake_executable: SnakemakeExecutable, @@ -245,11 +213,8 @@ def test_get_snakemake_command( snakemake_command == f"snakemake --notemp -p --rerun-trigger mtime --directory {session_tmp_path.as_posix()} " f"--snakefile {reference_file.as_posix()} " - f"--configfiles {reference_file.as_posix()} {reference_file.as_posix()} " + f"--configfile {reference_file.as_posix()} --config dragen=True " f"--use-singularity --singularity-args '--cleanenv --bind {session_tmp_path.as_posix()}:/' --quiet " - f"--immediate-submit -j {MAX_JOBS} --jobname BALSAMIC.{case_id_tumor_only}.{{rulename}}.{{jobid}}.sh " - f"--cluster-config {reference_file.as_posix()} --cluster '{sys.executable} {IMMEDIATE_SUBMIT_PATH.as_posix()} " - f"--account {ClusterAccount.DEVELOPMENT} --log-dir {session_tmp_path} --mail-user {mail_user_option} " - f"--profile {ClusterProfile.SLURM} --qos {QOS.HIGH} --script-dir {session_tmp_path} {case_id_tumor_only} " - "{dependencies}' --config disable_variant_caller=tnscope --cores 36" + f"-j {MAX_JOBS} --jobname BALSAMIC.{case_id_tumor_only}.{{rulename}}.{{jobid}}.sh " + f'--profile {reference_file.as_posix()} --workflow-profile {reference_file.as_posix()} --cluster-status "python {reference_file.as_posix()}" --cores 36' ) diff --git a/tests/scripts/test_immediate_submit.py b/tests/scripts/test_immediate_submit.py deleted file mode 100644 index 4c6dafbe5..000000000 --- a/tests/scripts/test_immediate_submit.py +++ /dev/null @@ -1,66 +0,0 @@ -"""Test immediate submit script.""" -import logging -import subprocess -from pathlib import Path -from typing import Any, Dict -from unittest import mock - -from _pytest.logging import LogCaptureFixture -from click.testing import CliRunner, Result -from snakemake import utils - -from BALSAMIC.assets.scripts.immediate_submit import immediate_submit -from BALSAMIC.constants.constants import EXIT_SUCCESS - - -def test_immediate_submit( - job_id: str, - job_properties: Dict[str, Any], - scheduler_data: Dict[str, Any], - session_tmp_path: Path, - caplog: LogCaptureFixture, - cli_runner: CliRunner, -): - """Test immediate submit script execution.""" - caplog.set_level(logging.INFO) - - # GIVEN some scheduler data and a CLI runner with the expected standard output - stdout: str = f"Submitted batch job {job_id}" - - # WHEN calling the immediate submit script - with mock.patch.object( - utils, "read_job_properties", return_value=job_properties - ), mock.patch.object( - subprocess, - "run", - return_value=subprocess.CompletedProcess( - args="", returncode=EXIT_SUCCESS, stdout=stdout, stderr="" - ), - ): - result: Result = cli_runner.invoke( - immediate_submit, - [ - scheduler_data["case_id"], - "dependencies", - scheduler_data["job_script"], - "--account", - "development", - "--log-dir", - scheduler_data["log_dir"], - "--mail-user", - "balsamic@scilifelab.se", - "--profile", - "slurm", - "--qos", - "high", - "--script-dir", - session_tmp_path.as_posix(), - ], - ) - - # THEN the command should succeed and the log file created - assert result.exit_code == 0 - assert f"Submitted job with ID: {job_id}" in caplog.text - assert Path( - scheduler_data["log_dir"], f"{scheduler_data['case_id']}.sacct" - ).is_file() diff --git a/tests/test_workflow.py b/tests/test_workflow.py index ebf5fef36..006039013 100644 --- a/tests/test_workflow.py +++ b/tests/test_workflow.py @@ -12,6 +12,7 @@ def test_workflow_tumor_only_tga_hg19( tumor_only_config, sentieon_install_dir, sentieon_license, + default_snakemake_resources, caplog, ): # GIVEN a sample config dict and a snakefile @@ -30,7 +31,12 @@ def test_workflow_tumor_only_tga_hg19( "SENTIEON_INSTALL_DIR": sentieon_install_dir, }, ): - assert snakemake.snakemake(snakefile, configfiles=[config_json], dryrun=True) + assert snakemake.snakemake( + snakefile, + configfiles=[config_json], + dryrun=True, + default_resources=default_snakemake_resources, + ) # THEN the following rules should not be included assert "igh_dux4_detection_tumor_only" not in caplog.text @@ -40,6 +46,7 @@ def test_workflow_tumor_normal_tga_hg19( tumor_normal_config, sentieon_install_dir, sentieon_license, + default_snakemake_resources, caplog, ): # GIVEN a sample config dict and a snakefile @@ -58,7 +65,12 @@ def test_workflow_tumor_normal_tga_hg19( "SENTIEON_INSTALL_DIR": sentieon_install_dir, }, ): - assert snakemake.snakemake(snakefile, configfiles=[config_json], dryrun=True) + assert snakemake.snakemake( + snakefile, + configfiles=[config_json], + dryrun=True, + default_resources=default_snakemake_resources, + ) # THEN the following rules should not be included assert "igh_dux4_detection_tumor_normal" not in caplog.text @@ -68,6 +80,7 @@ def test_workflow_tumor_only_wgs_hg19( tumor_only_wgs_config, sentieon_install_dir, sentieon_license, + default_snakemake_resources, caplog, ): # GIVEN a sample config dict and a snakefile @@ -86,7 +99,12 @@ def test_workflow_tumor_only_wgs_hg19( "SENTIEON_INSTALL_DIR": sentieon_install_dir, }, ): - assert snakemake.snakemake(snakefile, configfiles=[config_json], dryrun=True) + assert snakemake.snakemake( + snakefile, + configfiles=[config_json], + dryrun=True, + default_resources=default_snakemake_resources, + ) # THEN the following rules should be included assert "igh_dux4_detection_tumor_only" in caplog.text @@ -96,6 +114,7 @@ def test_workflow_tumor_normal_wgs_hg19( tumor_normal_wgs_config, sentieon_install_dir, sentieon_license, + default_snakemake_resources, caplog, ): # GIVEN a sample config dict and a snakefile @@ -114,14 +133,22 @@ def test_workflow_tumor_normal_wgs_hg19( "SENTIEON_INSTALL_DIR": sentieon_install_dir, }, ): - assert snakemake.snakemake(snakefile, configfiles=[config_json], dryrun=True) + assert snakemake.snakemake( + snakefile, + configfiles=[config_json], + dryrun=True, + default_resources=default_snakemake_resources, + ) # THEN the following rules should be included assert "igh_dux4_detection_tumor_normal" in caplog.text def test_workflow_qc_tumor_only_hg19( - tumor_only_config_qc, sentieon_install_dir, sentieon_license + tumor_only_config_qc, + sentieon_install_dir, + sentieon_license, + default_snakemake_resources, ): # GIVEN a sample config dict and a snakefile workflow = "single" @@ -137,11 +164,19 @@ def test_workflow_qc_tumor_only_hg19( "SENTIEON_INSTALL_DIR": sentieon_install_dir, }, ): - assert snakemake.snakemake(snakefile, configfiles=[config_json], dryrun=True) + assert snakemake.snakemake( + snakefile, + configfiles=[config_json], + dryrun=True, + default_resources=default_snakemake_resources, + ) def test_workflow_qc_tumor_normal_hg19( - tumor_normal_config_qc, sentieon_install_dir, sentieon_license + tumor_normal_config_qc, + sentieon_install_dir, + sentieon_license, + default_snakemake_resources, ): # GIVEN a sample config dict and a snakefile workflow = "paired" @@ -157,4 +192,9 @@ def test_workflow_qc_tumor_normal_hg19( "SENTIEON_INSTALL_DIR": sentieon_install_dir, }, ): - assert snakemake.snakemake(snakefile, configfiles=[config_json], dryrun=True) + assert snakemake.snakemake( + snakefile, + configfiles=[config_json], + dryrun=True, + default_resources=default_snakemake_resources, + ) diff --git a/tests/utils/test_logging.py b/tests/utils/test_logging.py new file mode 100644 index 000000000..0bdcfd730 --- /dev/null +++ b/tests/utils/test_logging.py @@ -0,0 +1,106 @@ +"""Test Balsamic logging utility methods.""" +import logging +import coloredlogs + +from pathlib import Path + +import pytest + +from BALSAMIC.utils.logging import add_file_logging +from BALSAMIC.constants.constants import LogLevel + + +def test_writes_messages_to_file(tmp_path: Path, caplog: pytest.LogCaptureFixture): + """The helper should attach a FileHandler that receives log records.""" + + # GIVEN logging instance + LOG = logging.getLogger(__name__) + + coloredlogs.DEFAULT_FIELD_STYLES = { + "asctime": {"color": "green"}, + "hostname": {"color": "magenta"}, + "levelname": {"color": "yellow", "bold": True}, + "programname": {"color": "cyan"}, + "name": {"color": "blue"}, + } + coloredlogs.install( + level=LogLevel.INFO, + fmt="%(programname)s %(hostname)s %(asctime)s %(name)s pid:%(process)d [%(levelname)s] %(message)s", + ) + + log_file = tmp_path / "run.log" + + # GIVEN logfile path to store log messages + add_file_logging(str(log_file), logger_name=__name__) + + # WHEN printing log information + LOG.info("hello world") + + # THEN logfile should be created + assert log_file.exists() + + # THEN message should exist inside the logfile + assert "hello world" in log_file.read_text() + + # WHEN printing log debug info + LOG.debug("invisible") + + # THEN message should not be stored in logfile + assert "invisible" not in log_file.read_text() + + +def test_add_file_logging_does_not_duplicate_handlers(tmp_path: Path): + """Test that logger function correctly only adds one filehandler in case its accidentally added twice.""" + + # GIVEN logging instance + log_file = tmp_path / "run.log" + + logger = logging.getLogger("test_logger") + logger.setLevel(logging.INFO) + logger.handlers.clear() + + # WHEN adding filehandler + add_file_logging(log_file, logger_name="test_logger") + + # THEN 1 file handler should exist + assert sum(isinstance(h, logging.FileHandler) for h in logger.handlers) == 1 + + # WHEN adding a second filderhandler + add_file_logging(log_file, logger_name="test_logger") + + # THEN 1 file handler should exist + assert sum(isinstance(h, logging.FileHandler) for h in logger.handlers) == 1 + + +def test_add_file_logging_uses_existing_formatter(tmp_path: Path): + """Test that add_file_logging copies the formatter from an existing handler.""" + + # GIVEN logging instance + log_file = tmp_path / "run.log" + logger = logging.getLogger("formatted_logger") + logger.setLevel(logging.INFO) + logger.handlers.clear() + + # GIVEN a StreamHandler with a custom formatter + stream_handler = logging.StreamHandler() + custom_formatter = logging.Formatter("CUSTOM FORMAT: %(message)s") + stream_handler.setFormatter(custom_formatter) + logger.addHandler(stream_handler) + + # WHEN adding file logging + add_file_logging(str(log_file), logger_name="formatted_logger") + + # THEN the FileHandler that was added should be found + file_handlers = [h for h in logger.handlers if isinstance(h, logging.FileHandler)] + assert len(file_handlers) == 1 + + file_handler = file_handlers[0] + + # THEN the formatter should be correctly copied to the file logger + assert isinstance(file_handler.formatter, logging.Formatter) + assert file_handler.formatter._fmt == "CUSTOM FORMAT: %(message)s" + + # THEN the messages written to logger ends up in file with the right format + logger.info("formatter test") + contents = log_file.read_text() + assert "CUSTOM FORMAT: formatter test" in contents diff --git a/tests/utils/test_utils.py b/tests/utils/test_utils.py index 72d252c52..01971ae83 100644 --- a/tests/utils/test_utils.py +++ b/tests/utils/test_utils.py @@ -17,7 +17,6 @@ from BALSAMIC.commands.config.case import case_config from BALSAMIC.constants.analysis import BIOINFO_TOOL_ENV, SampleType, SequencingType from BALSAMIC.constants.cache import CacheVersion -from BALSAMIC.constants.cluster import ClusterConfigType from BALSAMIC.constants.constants import FileType from BALSAMIC.constants.paths import CONTAINERS_DIR from BALSAMIC.models.config import ConfigModel, FastqInfoModel, SampleInstanceModel @@ -30,7 +29,6 @@ generate_h5, get_analysis_fastq_files_directory, get_bioinfo_tools_version, - get_config_path, get_fastq_info, get_file_extension, get_file_status_string, @@ -62,7 +60,6 @@ get_rule_output, get_sample_type_from_sample_name, get_script_path, - get_threads, get_variant_callers, get_vcf, ) @@ -425,19 +422,6 @@ def test_capturestdout(): assert "".join(captured_stdout_message) == test_stdout_message -def test_get_config_path(cluster_analysis_config_path: str): - """Test return of a config path given its type.""" - - # GIVEN an analysis config path - - # WHEN retrieving the cluster analysis configuration - cluster_analysis: Path = get_config_path(ClusterConfigType.ANALYSIS) - - # THEN an analysis cluster json should be returned - assert cluster_analysis.exists() - assert cluster_analysis.as_posix() == cluster_analysis_config_path - - def test_write_json(tmp_path, reference): # GIVEN a dict from sample json file tmp = tmp_path / "tmp" @@ -619,16 +603,6 @@ def test_write_yaml(metrics_yaml_path: str, tmp_path: Path): assert written_metrics_data == metrics_data -def test_get_threads(cluster_analysis_config_path: str): - # GIVEN cluster config file and rule name - cluster_config = json.load(open(cluster_analysis_config_path, "r")) - rule_name = "sentieon_align_sort" - - # WHEN passing cluster_config and rule_name - # THEN It should return threads value '12' - assert get_threads(cluster_config, rule_name) - - def test_get_file_status_string_file_exists(tmpdir): # GIVEN an existing file and condition_str False file_exist = tmpdir.mkdir("temporary_path").join("file_exists") diff --git a/tests/utils/test_workflowscripts.py b/tests/utils/test_workflowscripts.py deleted file mode 100644 index 03ff138d9..000000000 --- a/tests/utils/test_workflowscripts.py +++ /dev/null @@ -1,37 +0,0 @@ -from unittest import mock -from pathlib import Path -import pytest - -from BALSAMIC.utils.workflowscripts import plot_analysis - - -def test_plot_analysis(tmp_path_factory): - # GIVEN a dummy log file - dummy_log_file = Path( - "tests/test_data/dummy_run_logs/BALSAMIC.T_panel.bwa_mem.123.sh_31415926535.err" - ) - dummy_h5 = "tests/test_data/dummy_run_logs/BALSAMIC.T_panel.bwa_mem.123.h5" - dummy_path = tmp_path_factory.mktemp("dummy_pdf_path") - dummy_pdf_name = dummy_path / "BALSAMIC.T_panel.bwa_mem.123.pdf" - dummy_pdf_name.touch() - - # WHEN calling plot_analysis - actual_pdf_file = plot_analysis(dummy_log_file, dummy_h5, dummy_pdf_name) - - assert Path(actual_pdf_file).exists() - - -def test_plot_analysis_bad_h5(tmp_path_factory): - # GIVEN a dummy log file - dummy_log_file = Path( - "tests/test_data/dummy_run_logs/BALSAMIC.T_panel.bwa_mem.123.sh_31415926535.err" - ) - dummy_h5 = "tests/test_data/dummy_run_logs/bad_format.h5" - dummy_path = tmp_path_factory.mktemp("dummy_pdf_path") - dummy_pdf_name = dummy_path / "plot_file.pdf" - dummy_pdf_name.touch() - - # WHEN calling plot_analysis - actual_pdf_file = plot_analysis(dummy_log_file, dummy_h5, dummy_pdf_name) - - assert actual_pdf_file is None