Skip to content

Commit

Permalink
Update Moka pipeline IDs, add new oncodeep run type, formatting, move…
Browse files Browse the repository at this point in the history
… samplesheet copying to demultiplexing, move auth string to top of commands files, move get_pipeline and get_samplename_dict functions to toolbox, simplify upload runfolder class
  • Loading branch information
RachelDuffin committed Jun 19, 2024
1 parent 500d1b8 commit f2490f9
Show file tree
Hide file tree
Showing 17 changed files with 310 additions and 231 deletions.
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,6 @@ The above image describes the possible associations in the Class Diagram. In the
| Setoff workflows output | Catches any traceback from errors when running the cron job that are not caught by exception handling within the script | `TIMESTAMP.txt` | `/usr/local/src/mokaguys/automate_demultiplexing_logfiles/Upload_agent_stdout` |
| sw (script_logger) | Records script-level logs for the setoff workflows script | `TIMESTAMP_setoff_workflow.log` | `/usr/local/src/mokaguys/automate_demultiplexing_logfiles/sw_script_logfiles/` |
| sw (rf_loggers["sw"]) | Records runfolder-level logs for the setoff workflows script | `RUNFOLDERNAME_setoff_workflow.log` | `/usr/local/src/mokaguys/automate_demultiplexing_logfiles/sw_script_logfiles/` |
| upload_agent | Records upload agent logs (stdout and stderr of the upload agent) | `DNANexus_upload_started.txt` | Within the runfolder |
| dx_run_script | Records the dx run commands for processing the run. N.B. this is not written to by logging | `RUNFOLDERNAME_dx_run_commands.sh` | `/usr/local/src/mokaguys/automate_demultiplexing_logfiles/dx_run_commands` |
| decision_support_upload_cmds | Records the dx run commands to set off the congenica upload apps. N.B. this is not written to by logging | `RUNFOLDERNAME_decision_support.sh` | `/usr/local/src/mokaguys/automate_demultiplexing_logfiles/dx_run_commands` |
| proj_creation_script | Records the commands for creating the DNAnexus project. N.B. this is not written to by logging | `RUNFOLDERNAME_create_nexus_project.sh` | `/usr/local/src/mokaguys/automate_demultiplexing_logfiles/dx_run_commands` |
Expand Down
2 changes: 1 addition & 1 deletion ad_email/ad_email.py
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ class AdEmail(AdEmailConfig):
def __init__(self, logger: logging.Logger):
"""
Constructor for the AdEmail class
:param logger: Logger object
:param logger (logging.Logger): Logger
"""
self.logger = logger
self.sender = AdEmailConfig.MAIL_SETTINGS["alerts_email"]
Expand Down
7 changes: 2 additions & 5 deletions ad_logger/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,9 @@ self.script_logger.info(

logfiles_config = {
"sw": sw_runfolder_logfile,
"demultiplex": demultiplex_runfolder_logfile,
"upload_agent": upload_flagfile,
"demux": demultiplex_runfolder_logfile,
"backup": upload_runfolder_logfile,
"project": proj_creation_script,
"dx_run": runfolder_dx_run_script,
"post_run_cmds": post_run_dx_run_script,
"bcl2fastq2": bcl2fastqlog_file,
"ss_validator": samplesheet_validator_logfile,
}

Expand Down
26 changes: 19 additions & 7 deletions config/ad_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,7 @@
"congenica_upload": f"{TOOLS_PROJECT}:applet-G8QGBK80jy1zJK6g9yVP7P8V", # congenica_upload_v1.3.2"
"congenica_sftp": f"{TOOLS_PROJECT}:applet-GFfJpj80jy1x1Bz1P1Bk3vQf", # wes_congenica_sftp_upload_v1.0
"qiagen_upload": f"{TOOLS_PROJECT}:applet-Gb6G4k00v09KXfq8f6BP7f23", # qiagen_upload_v1.0.0
"oncodeep_upload": f"{TOOLS_PROJECT}:", #TODO finish this
"upload_multiqc": f"{TOOLS_PROJECT}:applet-G2XY8QQ0p7kzvPZBJGFygP6f", # upload_multiqc_v1.4.0
"multiqc": f"{TOOLS_PROJECT}:applet-GXqBzg00jy1pXkQVkY027QqV", # multiqc_v1.18.0
"sompy": f"{TOOLS_PROJECT}:applet-G9yPb780jy1p660k6yBvQg07", # sompy_v1.2
Expand Down Expand Up @@ -154,6 +155,8 @@
},
}
NEXUS_IDS["WORKFLOWS"]["archerdx"] = NEXUS_IDS["APPS"]["fastqc"]
NEXUS_IDS["WORKFLOWS"]["oncodeep"] = NEXUS_IDS["APPS"]["fastqc"]


APP_INPUTS = { # Inputs for apps run outside of DNAnexus workflows
"tso500": {
Expand Down Expand Up @@ -225,6 +228,10 @@
"sample_name": "-isample_name=",
"sample_zip_folder": "-isample_zip_folder=${PROJECT_ID}:/results/",
},
"oncodeep_upload": {
"run_identifier": "-irun_identifier=",
"file_to_upload": "-ifile_to_upload=${PROJECT_ID}:",
},
"duty_csv": {
"project_name": "-iproject_name=${PROJECT_NAME}",
"tso_pannumbers": "-itso_pannumbers=",
Expand All @@ -235,7 +242,7 @@
UPLOAD_ARGS = {
"dest": "--dest=${PROJECT_ID}",
"proj": "--project=${PROJECT_NAME}",
"token": "--brief --auth %s)",
"token": "--brief --auth ${AUTH})",
"depends": "${DEPENDS_LIST}",
"depends_gatk": "${DEPENDS_LIST_GATK}",
# Arguments to capture jobids. Job IDS are built into a string that can be passed to
Expand All @@ -251,7 +258,7 @@
}

DX_CMDS = {
"create_proj": 'PROJECT_ID="$(dx new project --bill-to %s "%s" --brief --auth %s)"',
"create_proj": 'PROJECT_ID="$(dx new project --bill-to %s "%s" --brief --auth ${AUTH})"',
"find_proj_name": (
f"{SDK_SOURCE}; dx find projects --name *%s* " "--auth %s | awk '{print $3}'"
),
Expand All @@ -265,7 +272,7 @@
f"{SDK_SOURCE}; dx find data --project=%s --tag as_upload --auth %s | "
"grep -v 'automated_scripts_logfiles' | wc -l"
),
"invite_user": "USER_INVITE_OUT=$(dx invite %s ${PROJECT_ID} %s --no-email --auth %s)",
"invite_user": "USER_INVITE_OUT=$(dx invite %s ${PROJECT_ID} %s --no-email --auth ${AUTH})",
"file_upload_cmd": (
f"{UPLOAD_AGENT_EXE} --auth %s --project %s --folder '%s' --do-not-compress "
"--upload-threads 10 %s --tag as_upload"
Expand Down Expand Up @@ -305,7 +312,11 @@
),
# Sleep command ensures the number of concurrent jobs does not surpass the QCII limit of 10
"qiagen_upload": (
f"sleep 1.5m; JOB_ID=$(dx run {NEXUS_IDS['APPS']['qiagen_upload']} --priority high -y {JOB_NAME_STR}"
f"sleep 1.2m; JOB_ID=$(dx run {NEXUS_IDS['APPS']['qiagen_upload']} --priority high -y {JOB_NAME_STR}"
),
"oncodeep_upload": (
f"JOB_ID=$(dx run {NEXUS_IDS['APPS']['oncodeep_upload']} --priority high -y "
f"{JOB_NAME_STR} -iaccount_type=Production"
),
"sompy": f"JOB_ID=$(dx run {NEXUS_IDS['APPS']['sompy']} --priority high -y {JOB_NAME_STR}",
"sambamba": f"JOB_ID=$(dx run {NEXUS_IDS['APPS']['sambamba']} --priority high -y {JOB_NAME_STR}",
Expand Down Expand Up @@ -463,11 +474,12 @@ class SWConfig(PanelConfig):
SQL_IDS = {
# Moka IDs for generating SQLs to update the Moka database (audit trail)
"WORKFLOWS": {
"pipe": 5229,
"pipe": 5302,
"wes": 5078,
"archerdx": 5238,
"archerdx": 5300,
"snp": 5091,
"tso500": 5288,
"tso500": 5301,
"oncodeep": 5299,
},
"WES_TEST_STATUS": {
"nextseq_sequencing": 1202218804, # Test Status = NextSEQ sequencing
Expand Down
5 changes: 3 additions & 2 deletions config/log_msgs_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -142,8 +142,9 @@
"running_cmds": "Running dx run commands using dx run bash script",
"dx_run_err": "Error when setting off dx run command. Command: %s. Stdout: %s. Stderr: %s",
"dx_run_success": "dx run commands issued successfully",
"ss_copy_success": "SampleSheet copied to runfolder: %s",
"ss_copy_fail": "SampleSheet not copied to runfolder",
"samplesheet_nonexistent": "Samplesheet does not exist at location: %s",
"file_copy_success": "File successfully copied from %s to %s",
"file_copy_fail": "Could not copy file - file does not exist: %s",
"uploading_rf": (
"Uploading rest of run folder to DNAnexus using upload_runfolder, "
"ignoring: %s. Stdout stored in logfile: %s"
Expand Down
13 changes: 13 additions & 0 deletions config/panel_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -237,6 +237,15 @@ class PanelConfig:
"coverage_min_basecall_qual": 25,
"coverage_min_mapping_qual": 30,
},
"oncodeep": {
**DEFAULT_DICT,
"panel_name": "oncodeep",
"pipeline": "oncodeep",
"sample_prefix": "OKD",
"runtype": "OKD",
"capture_type": "Hybridisation",
"multiqc_coverage_level": 30, # We don't align for OncoDEEP
},
}
PIPELINES = list(set([v["pipeline"] for k, v in CAPTURE_PANEL_DICT.items()]))
PANEL_DICT = {
Expand Down Expand Up @@ -268,6 +277,9 @@ class PanelConfig:
**CAPTURE_PANEL_DICT["archerdx"],
},
),
"Pan5226": { # OncoDEEP
**CAPTURE_PANEL_DICT["oncodeep"],
},
"Pan5085": { # TSO500 High throughput Synnovis. no UTRs. TERT promoter
**CAPTURE_PANEL_DICT["tso500"],
},
Expand Down Expand Up @@ -821,6 +833,7 @@ class PanelConfig:
WES_PANELS = [k for k, v in PANEL_DICT.items() if v["pipeline"] == "wes"]
SNP_PANELS = [k for k, v in PANEL_DICT.items() if v["pipeline"] == "snp"]
ARCHER_PANELS = [k for k, v in PANEL_DICT.items() if v["pipeline"] == "archerdx"]
ONCODEEP_PANELS = [k for k, v in PANEL_DICT.items() if v["pipeline"] == "oncodeep"]
LRPCR_PANELS = [k for k, v in PANEL_DICT.items() if v["panel_name"] == "lrpcr"]
DEVELOPMENT_PANEL = "".join(
[k for k, v in PANEL_DICT.items() if v["development_run"]]
Expand Down
38 changes: 37 additions & 1 deletion demultiplex/demultiplex.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
import sys
import os
import re
from shutil import copyfile
from typing import Optional, Tuple
import samplesheet_validator.samplesheet_validator as samplesheet_validator
from config.ad_config import DemultiplexConfig
Expand Down Expand Up @@ -260,6 +261,8 @@ class DemultiplexRunfolder(DemultiplexConfig):
Validate the created fastqs in the BaseCalls directory and log success
or failure error message accordingly. If any failure, remove bcl2fastq log
file to trigger re-demultiplex on next script run
copy_file()
Copy file from source path to dest path
"""

def __init__(
Expand Down Expand Up @@ -326,6 +329,18 @@ def setoff_workflow(self) -> Optional[bool]:
elif self.run_demultiplexing(): # All other runs require demultiplexing
self.run_processed = True
self.validate_fastqs()
self.copy_file(
self.rf_obj.samplesheet_path,
self.rf_obj.runfolder_samplesheet_path
)
samplename_dict = self.rf_obj.get_samplename_dict(
self.demux_rf_logger,
)
if self.rf_obj.get_pipeline(self.demux_rf_logger, samplename_dict) == "oncodeep":
self.copy_file(
self.rf_obj.masterfile_path,
self.rf_obj.runfolder_masterfile_path
)
return True

def demultiplexing_required(self) -> Optional[bool]:
Expand Down Expand Up @@ -465,7 +480,7 @@ def checksumfile_exists(self) -> Optional[bool]:
)
return True
else:
self.demux_rf_logger.error(
self.demux_rf_logger.info(
self.demux_rf_logger.log_msgs["checksumfile_absent"],
self.rf_obj.checksumfile_path,
)
Expand Down Expand Up @@ -782,3 +797,24 @@ def validate_fastqs(self) -> None:
self.rf_obj.bcl2fastqlog_file
) # Bcl2fastq log file removed to trigger re-demultiplex
self.demux_rf_logger.error(self.demux_rf_logger.log_msgs["re_demultiplex"])

def copy_file(self, source_path: str, dest_path: str) -> None:
"""
Copy file from source path to dest path
:param source_path (str): Path of file to copy
:param dest_path (str): Path to copy to
:return None:
"""
if os.path.exists(source_path): # Try to copy SampleSheet into project
copyfile(source_path, dest_path)
self.rf_obj.rf_loggers["sw"].info(
self.rf_obj.rf_loggers["sw"].log_msgs["file_copy_success"],
source_path,
dest_path,
)
else:
self.rf_obj.rf_loggers["sw"].error(
self.rf_obj.rf_loggers["sw"].log_msgs["file_copy_fail"],
source_path
)
sys.exit(1)
1 change: 0 additions & 1 deletion setoff_workflows/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,6 @@ Logging is performed using [ad_logger](../ad_logger/ad_logger.py).
| Setoff workflows output | Catches any traceback from errors when running the cron job that are not caught by exception handling within the script | `TIMESTAMP.txt` | `/usr/local/src/mokaguys/automate_demultiplexing_logfiles/Upload_agent_stdout` |
| sw (script_loggers) | Records script-level logs for the setoff workflows script | `TIMESTAMP_setoff_workflow.log` | `/usr/local/src/mokaguys/automate_demultiplexing_logfiles/sw_script_logfiles/` |
| sw (rf_loggers["sw"]) | Records runfolder-level logs for the setoff workflows script | `RUNFOLDERNAME_setoff_workflow.log` | `/usr/local/src/mokaguys/automate_demultiplexing_logfiles/sw_script_logfiles/` |
| upload_agent | Records upload agent logs (stdout and stderr of the upload agent) | `DNANexus_upload_started.txt` | Within the runfolder |
| dx_run_script | Records the dx run commands for processing the run. N.B. this is not written to by logging | `RUNFOLDERNAME_dx_run_commands.sh` | `/usr/local/src/mokaguys/automate_demultiplexing_logfiles/dx_run_commands` |
| decision_support_upload_cmds | Records the dx run commands to set off the Congenica upload apps. N.B. this is not written to by logging | `RUNFOLDERNAME_decision_support.sh` | `/usr/local/src/mokaguys/automate_demultiplexing_logfiles/dx_run_commands` |
| proj_creation_script | Records the commands for creating the DNAnexus project. N.B. this is not written to by logging | `RUNFOLDERNAME_create_nexus_project.sh` | `/usr/local/src/mokaguys/automate_demultiplexing_logfiles/dx_run_commands` |
Expand Down
1 change: 0 additions & 1 deletion setoff_workflows/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,6 @@
from setoff_workflows.setoff_workflows import SequencingRuns
from ad_logger.ad_logger import set_root_logger


set_root_logger()

sequencing_runs = SequencingRuns()
Expand Down
Loading

0 comments on commit f2490f9

Please sign in to comment.