From a02eeeaf8510c77629e0a7b1dd94aa1b8cb862e5 Mon Sep 17 00:00:00 2001 From: Junjun Zhang Date: Fri, 16 Apr 2021 09:48:30 -0400 Subject: [PATCH 1/5] [wfpm v0.7.7] started a new version sanger-wxs-variant-calling@3.1.6-3.4.0 from sanger-wxs-variant-calling@3.1.6-3.3.0 which was released --- sanger-wxs-variant-calling/main.nf | 2 +- sanger-wxs-variant-calling/pkg.json | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/sanger-wxs-variant-calling/main.nf b/sanger-wxs-variant-calling/main.nf index b55e112..9047250 100755 --- a/sanger-wxs-variant-calling/main.nf +++ b/sanger-wxs-variant-calling/main.nf @@ -2,7 +2,7 @@ nextflow.enable.dsl = 2 name = 'sanger-wxs-variant-calling' short_name = 'sanger-wxs' -version = '3.1.6-3.3.0' +version = '3.1.6-3.4.0' /* diff --git a/sanger-wxs-variant-calling/pkg.json b/sanger-wxs-variant-calling/pkg.json index aea4f62..112b69a 100644 --- a/sanger-wxs-variant-calling/pkg.json +++ b/sanger-wxs-variant-calling/pkg.json @@ -1,6 +1,6 @@ { "name": "sanger-wxs-variant-calling", - "version": "3.1.6-3.3.0", + "version": "3.1.6-3.4.0", "description": "ICGC ARGO Sanger WXS Variant Calling Workflow", "main": "main.nf", "deprecated": false, From 2a5fb8fbdc0ff7cbb4f4557700046637b7a43d9f Mon Sep 17 00:00:00 2001 From: Junjun Zhang Date: Fri, 16 Apr 2021 10:53:32 -0400 Subject: [PATCH 2/5] updated prep-sanger-qc to 0.1.3.0, payload-gen-variant-calling to 0.4.0 --- sanger-wxs-variant-calling/main.nf | 4 +- sanger-wxs-variant-calling/pkg.json | 1 + .../.dockerignore | 5 + .../Dockerfile | 18 + .../payload-gen-variant-calling@0.4.0/main.nf | 98 +++++ .../payload-gen-variant-calling@0.4.0/main.py | 338 ++++++++++++++++++ .../nextflow.config | 4 + .../pkg.json | 36 ++ .../wfpr_modules | 1 + 9 files changed, 503 insertions(+), 2 deletions(-) create mode 100644 wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling@0.4.0/.dockerignore create mode 100644 wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling@0.4.0/Dockerfile create mode 100755 wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling@0.4.0/main.nf create mode 100755 wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling@0.4.0/main.py create mode 100644 wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling@0.4.0/nextflow.config create mode 100644 wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling@0.4.0/pkg.json create mode 120000 wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling@0.4.0/wfpr_modules diff --git a/sanger-wxs-variant-calling/main.nf b/sanger-wxs-variant-calling/main.nf index 9047250..39888e0 100755 --- a/sanger-wxs-variant-calling/main.nf +++ b/sanger-wxs-variant-calling/main.nf @@ -224,9 +224,9 @@ include { sangerWxsVariantCaller as sangerWxs } from './modules/raw.githubuserco include { repackSangerResults as repack } from './modules/raw.githubusercontent.com/icgc-argo/variant-calling-tools/repack-sanger-results.0.2.0.0/tools/repack-sanger-results/repack-sanger-results' params(repackSangerResults_params) include { cavemanVcfFix as cavemanFix } from './modules/raw.githubusercontent.com/icgc-argo/variant-calling-tools/caveman-vcf-fix.0.1.0.0/tools/caveman-vcf-fix/caveman-vcf-fix' params(cavemanVcfFix_params) include { prepSangerSupplement as prepSupp } from './modules/raw.githubusercontent.com/icgc-argo/variant-calling-tools/prep-sanger-supplement.0.1.2.0/tools/prep-sanger-supplement/prep-sanger-supplement' params(prepSangerSupplement_params) -include { prepSangerQc as prepQc } from './modules/raw.githubusercontent.com/icgc-argo/variant-calling-tools/prep-sanger-qc.0.1.2.0/tools/prep-sanger-qc/prep-sanger-qc' params(prepSangerQc_params) +include { prepSangerQc as prepQc } from './modules/raw.githubusercontent.com/icgc-argo/variant-calling-tools/prep-sanger-qc.0.1.3.0/tools/prep-sanger-qc/prep-sanger-qc' params(prepSangerQc_params) include { extractFilesFromTarball as extractVarSnv; extractFilesFromTarball as extractVarIndel; extractFilesFromTarball as extractQC } from './modules/raw.githubusercontent.com/icgc-argo/data-processing-utility-tools/extract-files-from-tarball.0.2.0.0/tools/extract-files-from-tarball/extract-files-from-tarball' params(extractSangerCall_params) -include { payloadGenVariantCalling as pGenVarSnv; payloadGenVariantCalling as pGenVarIndel; payloadGenVariantCalling as pGenVarSupp; payloadGenVariantCalling as pGenQc } from "./modules/raw.githubusercontent.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling.0.3.6.0/tools/payload-gen-variant-calling/payload-gen-variant-calling" params(payloadGenVariantCall_params) +include { payloadGenVariantCalling as pGenVarSnv; payloadGenVariantCalling as pGenVarIndel; payloadGenVariantCalling as pGenVarSupp; payloadGenVariantCalling as pGenQc } from "./wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling@0.4.0/main" params(payloadGenVariantCall_params) include { SongScoreUpload as upSnv; SongScoreUpload as upIndel; SongScoreUpload as upQc; SongScoreUpload as upSupp} from './wfpr_modules/github.com/icgc-argo/nextflow-data-processing-utility-tools/song-score-upload@2.6.1/main.nf' params(upload_params) include { cleanupWorkdir as cleanup } from './wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/cleanup-workdir@1.0.0/main' include { getSecondaryFiles } from './wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/helper-functions@1.0.0/main' diff --git a/sanger-wxs-variant-calling/pkg.json b/sanger-wxs-variant-calling/pkg.json index 112b69a..16df042 100644 --- a/sanger-wxs-variant-calling/pkg.json +++ b/sanger-wxs-variant-calling/pkg.json @@ -14,6 +14,7 @@ "url": "https://github.com/icgc-argo/sanger-wxs-variant-calling.git" }, "dependencies": [ + "github.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling@0.4.0", "github.com/icgc-argo/data-processing-utility-tools/payload-add-uniform-ids@0.1.1", "github.com/icgc-argo/data-processing-utility-tools/helper-functions@1.0.0", "github.com/icgc-argo/data-processing-utility-tools/cleanup-workdir@1.0.0", diff --git a/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling@0.4.0/.dockerignore b/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling@0.4.0/.dockerignore new file mode 100644 index 0000000..71266ec --- /dev/null +++ b/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling@0.4.0/.dockerignore @@ -0,0 +1,5 @@ +.gitignore +.nextflow* +tests +work +outdir diff --git a/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling@0.4.0/Dockerfile b/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling@0.4.0/Dockerfile new file mode 100644 index 0000000..0864377 --- /dev/null +++ b/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling@0.4.0/Dockerfile @@ -0,0 +1,18 @@ +FROM python:3.7.9 + +LABEL org.opencontainers.image.source https://github.com/icgc-argo/data-processing-utility-tools + +RUN groupadd -g 1000 ubuntu && \ + useradd -l -u 1000 -g ubuntu ubuntu && \ + install -d -m 0755 -o ubuntu -g ubuntu /home/ubuntu + +ENV PATH="/tools:${PATH}" + +COPY *.py /tools/ + +WORKDIR /tools + +USER ubuntu + +ENTRYPOINT ["/usr/bin/env"] +CMD ["/bin/bash"] diff --git a/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling@0.4.0/main.nf b/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling@0.4.0/main.nf new file mode 100755 index 0000000..9c2f2a0 --- /dev/null +++ b/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling@0.4.0/main.nf @@ -0,0 +1,98 @@ +#!/usr/bin/env nextflow + +/* + * Copyright (c) 2019-2021, Ontario Institute for Cancer Research (OICR). + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published + * by the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +/* + * Author Junjun Zhang + */ + +/********************************************************************/ +/* this block is auto-generated based on info from pkg.json where */ +/* changes can be made if needed, do NOT modify this block manually */ +nextflow.enable.dsl = 2 +version = '0.4.0' // package version + +container = [ + 'ghcr.io': 'ghcr.io/icgc-argo/data-processing-utility-tools.payload-gen-variant-calling' +] +default_container_registry = 'ghcr.io' +/********************************************************************/ + + +// universal params go here +params.container_registry = "" +params.container_version = "" +params.container = "" + +params.cpus = 1 +params.mem = 1 // GB +params.publish_dir = "" // set to empty string will disable publishDir + +// tool specific parmas go here, add / change as needed +params.normal_analysis = "" +params.tumour_analysis = "" +params.files_to_upload = [] +params.wf_name = "" +params.wf_short_name = "" +params.wf_version = "" + + +process payloadGenVariantCalling { + container "${params.container ?: container[params.container_registry ?: default_container_registry]}:${params.container_version ?: version}" + cpus params.cpus + memory "${params.mem} GB" + publishDir "${params.publish_dir}/${task.process.replaceAll(':', '_')}", mode: "copy", enabled: "${params.publish_dir ? true : ''}" + + input: + path normal_analysis + path tumour_analysis + path files_to_upload + val wf_name + val wf_short_name + val wf_version + + output: + path "*.payload.json", emit: payload + path "out/*{.tgz,.vcf.gz,.vcf.gz.tbi}", emit: files_to_upload + + script: + args_tumour_analysis = !tumour_analysis.empty() ? "-t ${tumour_analysis}" : "" + """ + main.py \ + -f ${files_to_upload} \ + -n ${normal_analysis} \ + -r ${workflow.runName} \ + -j ${workflow.sessionId} \ + -w ${wf_name} \ + -s ${wf_short_name} \ + -v ${wf_version} ${args_tumour_analysis} + """ +} + +// this provides an entry point for this main script, so it can be run directly without clone the repo +// using this command: nextflow run ///.nf -r .v --params-file xxx +workflow { + payloadGenVariantCalling( + file(params.normal_analysis), + file(params.tumour_analysis), + Channel.fromPath(params.files_to_upload).collect(), + params.wf_name, + params.wf_short_name, + params.wf_version + ) +} diff --git a/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling@0.4.0/main.py b/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling@0.4.0/main.py new file mode 100755 index 0000000..8c93d95 --- /dev/null +++ b/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling@0.4.0/main.py @@ -0,0 +1,338 @@ +#!/usr/bin/env python3 + +""" + Copyright (c) 2019-2020, Ontario Institute for Cancer Research (OICR). + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published + by the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + + Authors: + Junjun Zhang + """ + +import os +import sys +import uuid +import json +import hashlib +import copy +import re +import tarfile +from datetime import date +from argparse import ArgumentParser + + +variant_type_to_data_type_etc = { + 'snv': ['Simple Nucleotide Variation', ['Raw SNV Calls', None], ['CaVEMan'], ['GATK:Mutect2']], # dataCategory, [dataType, data_subtype], analysis_tools + 'indel': ['Simple Nucleotide Variation', ['Raw InDel Calls', None], ['Pindel'], ['GATK:Mutect2']], + 'cnv': ['Copy Number Variation', ['Raw CNV Calls', None], ['ASCAT']], + 'sv': ['Structural Variation', ['Raw SV Calls', None], ['BRASS']], + 'caveman-supplement': ['Simple Nucleotide Variation', ['SNV Supplement', None], ['CaVEMan']], + 'pindel-supplement': ['Simple Nucleotide Variation', ['InDel Supplement', None], ['Pindel']], + 'ascat-supplement': ['Copy Number Variation', ['CNV Supplement', None], ['ASCAT']], + 'brass-supplement': ['Structural Variation', ['SV Supplement', None], ['BRASS']], + 'timings-supplement': ['Quality Control Metrics', ['Analysis QC', 'Runtime Stats'], None, None], + 'bas_metrics': ['Quality Control Metrics', ['Aligned Reads QC', 'Alignment Metrics'], ['Sanger:bam_stats']], + 'contamination_metrics': ['Quality Control Metrics', ['Analysis QC', 'Cross Sample Contamination'], ['Sanger:verifyBamHomChk'], ['GATK:CalculateContamination']], + 'ascat_metrics': ['Quality Control Metrics', ['Analysis QC', 'Ploidy and Purity Estimation'], ['ASCAT']], + 'genotyped_gender_metrics': ['Quality Control Metrics', ['Analysis QC', 'Genotyping Stats'], ['Sanger:compareBamGenotypes']], + 'mutect_filtering_metrics': ['Quality Control Metrics', ['Analysis QC', 'Variant Filtering Stats'], [], ['GATK:FilterMutectCalls']], + 'mutect_callable_metrics': ['Quality Control Metrics', ['Analysis QC', 'Variant Callable Stats'], [], ['GATK:Mutect2']], +} + +workflow_full_name = { + 'sanger-wgs-variant-calling': 'Sanger WGS Variant Calling', + 'sanger-wxs-variant-calling': 'Sanger WXS Variant Calling', + 'gatk-mutect2-variant-calling': 'GATK Mutect2 Variant Calling' +} + + +def calculate_size(file_path): + return os.stat(file_path).st_size + + +def calculate_md5(file_path): + md5 = hashlib.md5() + with open(file_path, 'rb') as f: + for chunk in iter(lambda: f.read(1024 * 1024), b''): + md5.update(chunk) + return md5.hexdigest() + + +def get_files_info(file_to_upload, wf_short_name, wf_version, somatic_or_germline, normal_analysis, tumour_analysis, date_str): + file_info = { + 'fileType': 'VCF' if file_to_upload.endswith('.vcf.gz') else file_to_upload.split(".")[-1].upper(), + 'fileSize': calculate_size(file_to_upload), + 'fileMd5sum': calculate_md5(file_to_upload), + 'fileAccess': 'controlled', + 'info': {} + } + + if somatic_or_germline == 'Somatic': + metadata = tumour_analysis + elif somatic_or_germline == 'Germline': + metadata = normal_analysis + else: + pass # should never happen + + experimental_strategy = metadata['experiment']['experimental_strategy'].lower() if 'experimental_strategy' in metadata['experiment'] else metadata['experiment']['library_strategy'].lower() + + variant_type = '' + if wf_short_name in (['sanger-wgs', 'sanger-wxs']): + fname_sample_part = metadata['samples'][0]['sampleId'] + if file_to_upload.endswith('.flagged.muts.vcf.gz') or file_to_upload.endswith('.flagged.muts.vcf.gz.tbi'): + variant_type = 'snv' + elif file_to_upload.endswith('.flagged.vcf.gz') or file_to_upload.endswith('.flagged.vcf.gz.tbi'): + variant_type = 'indel' + elif file_to_upload.endswith('.copynumber.caveman.vcf.gz') or file_to_upload.endswith('.copynumber.caveman.vcf.gz.tbi'): + variant_type = 'cnv' + elif file_to_upload.endswith('.annot.vcf.gz') or file_to_upload.endswith('.annot.vcf.gz.tbi'): + variant_type = 'sv' + elif file_to_upload.endswith('-supplement.tgz'): + variant_type = file_to_upload.split(".")[-2] + elif file_to_upload.endswith('_metrics.tgz'): + variant_type = file_to_upload.split(".")[-2] + if re.match(r'.+?\.normal.contamination_metrics.tgz', file_to_upload) \ + or (re.match(r'.+?\.normal.bas_metrics.tgz', file_to_upload)): + fname_sample_part = normal_analysis['samples'][0]['sampleId'] + else: + sys.exit('Error: unknown file type "%s"' % file_to_upload) + + elif wf_short_name in (['gatk-mutect2']): + fname_sample_part = metadata['samples'][0]['sampleId'] + if file_to_upload.endswith('mutect2-snv.vcf.gz') or file_to_upload.endswith('mutect2-snv.vcf.gz.tbi'): + variant_type = 'snv' + elif file_to_upload.endswith('mutect2-indel.vcf.gz') or file_to_upload.endswith('mutect2-indel.vcf.gz.tbi'): + variant_type = 'indel' + elif file_to_upload.endswith('_metrics.tgz'): + if file_to_upload.endswith('.contamination_metrics.tgz'): + variant_type = file_to_upload.split(".")[-2] + if re.match(r'.+?\.normal.contamination_metrics.tgz', file_to_upload): + fname_sample_part = normal_analysis['samples'][0]['sampleId'] + elif file_to_upload.endswith('.filtering_metrics.tgz'): + variant_type = 'mutect_filtering_metrics' + elif file_to_upload.endswith('.callable_metrics.tgz'): + variant_type = 'mutect_callable_metrics' + else: + sys.exit('Error: unknown file type "%s"' % file_to_upload) + + elif wf_short_name in (['HaplotypeCaller']): + sys.exit('Error: not implemented yet for "%s"' % wf_short_name) + + else: + sys.exit('Error: unknown variant calling workflow "%s"' % wf_short_name) + + # file naming patterns: + # pattern: .[wgs|wxs]...[somatic|germline].[snv|indel|cnv|sv].vcf.gz + # example: TEST-PR.DO250183.SA610229.wxs.20200319.sanger-wxs.somatic.snv.vcf.gz + new_fname = '.'.join([ + metadata['studyId'], + metadata['samples'][0]['donor']['donorId'], + fname_sample_part, + experimental_strategy, + date_str, + wf_short_name, + somatic_or_germline.lower(), + variant_type, + 'vcf.gz' if variant_type in ['snv', 'indel', 'cnv', 'sv'] else 'tgz' + ] + (['tbi'] if file_to_upload.endswith('.tbi') else [])) + + file_info['fileName'] = new_fname + + file_info['info'] = { + 'data_category': variant_type_to_data_type_etc[variant_type][0], + 'data_subtype': None + } + + extra_info = {} + if new_fname.endswith('.vcf.gz'): + file_info['dataType'] = variant_type_to_data_type_etc[variant_type][1][0] + file_info['info']['data_subtype'] = variant_type_to_data_type_etc[variant_type][1][1] + elif new_fname.endswith('.vcf.gz.tbi'): + file_info['dataType'] = 'VCF Index' + elif new_fname.endswith('.tgz'): + if new_fname.endswith('-supplement.tgz'): + file_info['dataType'] = variant_type_to_data_type_etc[variant_type][1][0] + file_info['info']['data_subtype'] = variant_type_to_data_type_etc[variant_type][1][1] + elif new_fname.endswith('_metrics.tgz'): + file_info['dataType'] = variant_type_to_data_type_etc[variant_type][1][0] + file_info['info']['data_subtype'] = variant_type_to_data_type_etc[variant_type][1][1] + else: + sys.exit('Error: unknown file type "%s"' % file_to_upload) + + tar = tarfile.open(file_to_upload) + for member in tar.getmembers(): + if member.name.endswith('.extra_info.json'): + f = tar.extractfile(member) + extra_info = json.load(f) + break + else: + sys.exit('Error: unknown file type "%s"' % file_to_upload) + + if wf_short_name in (['sanger-wgs', 'sanger-wxs']): + file_info['info']['analysis_tools'] = variant_type_to_data_type_etc[variant_type][2] + elif wf_short_name in (['gatk-mutect2']): + file_info['info']['analysis_tools'] = variant_type_to_data_type_etc[variant_type][3] + + if extra_info: + file_info['info'].update(extra_info) + + new_dir = 'out' + try: + os.mkdir(new_dir) + except FileExistsError: + pass + + dst = os.path.join(os.getcwd(), new_dir, new_fname) + os.symlink(os.path.abspath(file_to_upload), dst) + + return file_info + + +def get_sample_info(sample_list): + samples = copy.deepcopy(sample_list) + for sample in samples: + for item in ['info', 'sampleId', 'specimenId', 'donorId', 'studyId']: + sample.pop(item, None) + sample['specimen'].pop(item, None) + sample['donor'].pop(item, None) + + return samples + + +def main(args): + normal_analysis = {} + normal_origin = None + with open(args.normal_analysis, 'r') as f: + normal_analysis = json.load(f) + if 'info' in normal_analysis and 'origin' in normal_analysis['info']: + normal_origin = normal_analysis['info']['origin'] + + tumour_analysis = {} + tumour_origin = None + if args.tumour_analysis: + with open(args.tumour_analysis, 'r') as f: + tumour_analysis = json.load(f) + if 'info' in tumour_analysis and 'origin' in tumour_analysis['info']: + tumour_origin = tumour_analysis['info']['origin'] + + if normal_origin is not None and tumour_origin is not None: + if normal_origin == tumour_origin: + analysis_origin = normal_origin + else: + sys.exit(f'Origins of normal and tumour analyses differ, normal: {normal_origin}, tumour: {tumour_origin}') + elif normal_origin is not None: + analysis_origin = normal_origin + elif tumour_origin is not None: + analysis_origin = tumour_origin + else: + analysis_origin = None + + somatic_or_germline = 'Somatic' # default + if args.wf_short_name in ['sanger-wgs', 'sanger-wxs', 'gatk-mutect2']: + if not tumour_analysis: + sys.exit('Error: metadata for tumour is missing!') + elif args.wf_short_name in ['HaplotypeCaller']: + somatic_or_germline = 'Germline' + else: + sys.exit("Unsupported variant caller: %s" % args.wf_short_name) + + payload = { + 'analysisType': { + 'name': None + }, + 'info': { + 'origin': analysis_origin # adding this here, so the info field will maintain it's position + }, + 'studyId': normal_analysis.get('studyId'), # normal/tumour analysis should always from the same study + 'experiment': {}, + 'samples': [], + 'files': [], + 'workflow': { + 'workflow_name': workflow_full_name[args.wf_name], + 'workflow_short_name': args.wf_short_name, + 'workflow_version': args.wf_version, + 'run_id': args.wf_run, + 'session_id': args.wf_session, + 'inputs': [], + 'genome_build': 'GRCh38_hla_decoy_ebv' + } + } + + if analysis_origin is None: # if origin is None, remove the info field + payload.pop('info') + + # get sample of the payload + if somatic_or_germline == 'Somatic': # somatic variants + payload['samples'] = get_sample_info(tumour_analysis.get('samples')) + payload['workflow']['inputs'] = [ + { + "tumour_analysis_id": tumour_analysis.get("analysisId"), + "analysis_type": "sequencing_alignment" + }, + { + "normal_analysis_id": normal_analysis.get("analysisId"), + "analysis_type": "sequencing_alignment" + } + ] + payload['experiment'] = { + 'experimental_strategy': tumour_analysis['experiment']['experimental_strategy'] if 'experimental_strategy' in tumour_analysis['experiment'] else tumour_analysis['experiment']['library_strategy'], + 'platform': tumour_analysis['experiment']['platform'] + } + else: # germline variants + payload['samples'] = get_sample_info(normal_analysis.get('samples')) + payload['workflow']['inputs'] = [ + { + "normal_analysis_id": normal_analysis.get("analysisId"), + "analysis_type": "sequencing_alignment" + } + ] + payload['experiment'] = { + 'experimental_strategy': normal_analysis['experiment']['experimental_strategy'] if 'experimental_strategy' in normal_analysis['experiment'] else normal_analysis['experiment']['library_strategy'], + 'platform': normal_analysis['experiment']['platform'] + } + + analysis_type = 'variant_calling' + date_str = date.today().strftime("%Y%m%d") + for f in args.files_to_upload: + if f.endswith('-supplement.tgz'): analysis_type = 'variant_calling_supplement' + if f.endswith('_metrics.tgz'): analysis_type = 'qc_metrics' + + file_info = get_files_info(f, args.wf_short_name, args.wf_version, somatic_or_germline, normal_analysis, tumour_analysis, date_str) + + payload['files'].append(file_info) + + payload['analysisType']['name'] = analysis_type + + if not analysis_type == "qc_metrics": + payload['variant_class'] = somatic_or_germline + + with open("%s.%s.payload.json" % (str(uuid.uuid4()), analysis_type), 'w') as f: + f.write(json.dumps(payload, indent=2)) + + +if __name__ == "__main__": + parser = ArgumentParser() + parser.add_argument("-n", dest="normal_analysis", required=True, + help="json file containing sequencing_alignment SONG analysis for normal sample") + parser.add_argument("-t", dest="tumour_analysis", required=False, + help="json file containing sequencing_alignment SONG analysis for tumour sample") + parser.add_argument("-f", dest="files_to_upload", type=str, nargs="+", help="Files to be uploaded", required=True) + parser.add_argument("-w", dest="wf_name", type=str, help="workflow full name", required=True) + parser.add_argument("-s", dest="wf_short_name", type=str, help="workflow short name", required=True) + parser.add_argument("-v", dest="wf_version", type=str, required=True, help="workflow version") + parser.add_argument("-r", dest="wf_run", type=str, required=True, help="workflow run ID") + parser.add_argument("-j", dest="wf_session", type=str, required=True, help="workflow session ID") + args = parser.parse_args() + + main(args) diff --git a/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling@0.4.0/nextflow.config b/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling@0.4.0/nextflow.config new file mode 100644 index 0000000..f2cd1e3 --- /dev/null +++ b/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling@0.4.0/nextflow.config @@ -0,0 +1,4 @@ +docker { + enabled = true + runOptions = '-u \$(id -u):\$(id -g)' +} diff --git a/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling@0.4.0/pkg.json b/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling@0.4.0/pkg.json new file mode 100644 index 0000000..560dcd4 --- /dev/null +++ b/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling@0.4.0/pkg.json @@ -0,0 +1,36 @@ +{ + "name": "payload-gen-variant-calling", + "version": "0.4.0", + "description": "A tool to generate SONG payloads for variant calling workflows", + "main": "main.nf", + "deprecated": false, + "keywords": [ + "bioinformatics", + "metadata", + "payload" + ], + "repository": { + "type": "git", + "url": "https://github.com/icgc-argo/data-processing-utility-tools.git" + }, + "container": { + "registries": [ + { + "registry": "ghcr.io", + "type": "docker", + "org": "icgc-argo", + "default": true + } + ] + }, + "dependencies": [], + "devDependencies": [], + "contributors": [ + { + "name": "Junjun Zhang" + } + ], + "license": "GNU Affero General Public License v3", + "bugReport": "https://github.com/icgc-argo/data-processing-utility-tools/issues", + "homepage": "https://github.com/icgc-argo/data-processing-utility-tools#readme" +} diff --git a/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling@0.4.0/wfpr_modules b/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling@0.4.0/wfpr_modules new file mode 120000 index 0000000..1cc74ba --- /dev/null +++ b/wfpr_modules/github.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling@0.4.0/wfpr_modules @@ -0,0 +1 @@ +../../../../../wfpr_modules \ No newline at end of file From 6ede3241f222c0a9cfdb94f1f0739c5598462e23 Mon Sep 17 00:00:00 2001 From: Junjun Zhang Date: Fri, 16 Apr 2021 11:06:34 -0400 Subject: [PATCH 3/5] ran installation script for the old modules --- .../payload-gen-variant-calling.nf | 68 ------------------- .../tools/prep-sanger-qc/prep-sanger-qc.nf | 6 +- 2 files changed, 5 insertions(+), 69 deletions(-) delete mode 100644 modules/raw.githubusercontent.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling.0.3.6.0/tools/payload-gen-variant-calling/payload-gen-variant-calling.nf rename modules/raw.githubusercontent.com/icgc-argo/variant-calling-tools/{prep-sanger-qc.0.1.2.0 => prep-sanger-qc.0.1.3.0}/tools/prep-sanger-qc/prep-sanger-qc.nf (89%) diff --git a/modules/raw.githubusercontent.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling.0.3.6.0/tools/payload-gen-variant-calling/payload-gen-variant-calling.nf b/modules/raw.githubusercontent.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling.0.3.6.0/tools/payload-gen-variant-calling/payload-gen-variant-calling.nf deleted file mode 100644 index 63cf934..0000000 --- a/modules/raw.githubusercontent.com/icgc-argo/data-processing-utility-tools/payload-gen-variant-calling.0.3.6.0/tools/payload-gen-variant-calling/payload-gen-variant-calling.nf +++ /dev/null @@ -1,68 +0,0 @@ -#!/usr/bin/env nextflow - -/* - * Copyright (c) 2019-2021, Ontario Institute for Cancer Research (OICR). - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published - * by the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -/* - * Author Junjun Zhang - */ - -nextflow.enable.dsl = 2 -version = '0.3.6.0' - -params.normal_analysis = "" -params.tumour_analysis = "" -params.files_to_upload = [] -params.wf_name = "" -params.wf_short_name = "" -params.wf_version = "" -params.container_version = '' -params.cpus = 1 -params.mem = 1 // GB -params.publish_dir = "" - -process payloadGenVariantCalling { - container "quay.io/icgc-argo/payload-gen-variant-calling:payload-gen-variant-calling.${params.container_version ?: version}" - cpus params.cpus - memory "${params.mem} GB" - publishDir "${params.publish_dir}/${task.process.replaceAll(':', '_')}", mode: "copy", enabled: "${params.publish_dir ? true : ''}" - - input: - path normal_analysis - path tumour_analysis - path files_to_upload - val wf_name - val wf_short_name - val wf_version - - output: - path "*.payload.json", emit: payload - path "out/*{.tgz,.vcf.gz,.vcf.gz.tbi}", emit: files_to_upload - - script: - args_tumour_analysis = !tumour_analysis.empty() ? "-t ${tumour_analysis}" : "" - """ - payload-gen-variant-calling.py \ - -f ${files_to_upload} \ - -n ${normal_analysis} \ - -r ${workflow.runName} \ - -j ${workflow.sessionId} \ - -w ${wf_name} \ - -s ${wf_short_name} \ - -v ${wf_version} ${args_tumour_analysis} - """ -} diff --git a/modules/raw.githubusercontent.com/icgc-argo/variant-calling-tools/prep-sanger-qc.0.1.2.0/tools/prep-sanger-qc/prep-sanger-qc.nf b/modules/raw.githubusercontent.com/icgc-argo/variant-calling-tools/prep-sanger-qc.0.1.3.0/tools/prep-sanger-qc/prep-sanger-qc.nf similarity index 89% rename from modules/raw.githubusercontent.com/icgc-argo/variant-calling-tools/prep-sanger-qc.0.1.2.0/tools/prep-sanger-qc/prep-sanger-qc.nf rename to modules/raw.githubusercontent.com/icgc-argo/variant-calling-tools/prep-sanger-qc.0.1.3.0/tools/prep-sanger-qc/prep-sanger-qc.nf index f8d1352..778bda3 100644 --- a/modules/raw.githubusercontent.com/icgc-argo/variant-calling-tools/prep-sanger-qc.0.1.2.0/tools/prep-sanger-qc/prep-sanger-qc.nf +++ b/modules/raw.githubusercontent.com/icgc-argo/variant-calling-tools/prep-sanger-qc.0.1.3.0/tools/prep-sanger-qc/prep-sanger-qc.nf @@ -23,9 +23,11 @@ */ nextflow.preview.dsl = 2 -version = '0.1.2.0' +version = '0.1.3.0' params.qc_files = "" +params.publish_dir = "" + params.container_version = "" params.cpus = 1 params.mem = 2 // in GB @@ -33,6 +35,8 @@ params.mem = 2 // in GB process prepSangerQc { container "quay.io/icgc-argo/prep-sanger-qc:prep-sanger-qc.${params.container_version ?: version}" + publishDir "${params.publish_dir}/${task.process.replaceAll(':', '_')}", mode: "copy", enabled: params.publish_dir + cpus params.cpus memory "${params.mem} GB" From b0621f177eff69e9fce02287023daa4459fbf1e6 Mon Sep 17 00:00:00 2001 From: Linda Xiang Date: Fri, 16 Apr 2021 13:53:31 -0400 Subject: [PATCH 4/5] add test job in qa --- .../tests/local-test-qa.nf.json | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 sanger-wxs-variant-calling/tests/local-test-qa.nf.json diff --git a/sanger-wxs-variant-calling/tests/local-test-qa.nf.json b/sanger-wxs-variant-calling/tests/local-test-qa.nf.json new file mode 100644 index 0000000..0a31b5a --- /dev/null +++ b/sanger-wxs-variant-calling/tests/local-test-qa.nf.json @@ -0,0 +1,59 @@ +{ + "study_id": "TEST-PR", + "tumour_aln_analysis_id": "03905962-f1e1-41e8-9059-62f1e1c1e8fc", + "normal_aln_analysis_id": "9940db0f-c100-496a-80db-0fc100d96ac1", + "max_retries": 3, + "first_retry_wait_time": 5, + "cleanup": false, + "song_url": "https://song.rdpc-qa.cancercollaboratory.org", + "score_url": "https://score.rdpc-qa.cancercollaboratory.org", + "download": { + "song_url": "https://song.rdpc-qa.cancercollaboratory.org", + "song_cpus": 2, + "song_mem": 2, + "score_url": "https://score.rdpc-qa.cancercollaboratory.org", + "score_cpus": 3, + "score_mem": 8 + }, + "sangerWxsVariantCaller": { + "cpus": 4, + "mem": 10, + "exclude": "chr1,chr2,chr3,chr4,chr5,chr6,chr7,chr8,chr9,chr10,chr11,chr12,chr13,chr14,chr15,chr16,chr17,chr18,chr19,chr20,chr22,chrX,chrY,chrUn%,HLA%,%_alt,%_random,chrM,chrEBV", + "vagrent_annot": "/nfs-dev-1-vol-qa-1/reference/sanger-variant-calling/VAGrENT_ref_GRCh38_hla_decoy_ebv_ensembl_91.tar.gz", + "ref_genome_tar": "/nfs-dev-1-vol-qa-1/reference/sanger-variant-calling/core_ref_GRCh38_hla_decoy_ebv.tar.gz", + "ref_snv_indel_tar": "/nfs-dev-1-vol-qa-1/reference/sanger-variant-calling/SNV_INDEL_ref_GRCh38_hla_decoy_ebv-fragment.tar.gz" + }, + "generateBas": { + "cpus":2, + "mem":8, + "ref_genome_fa": "/nfs-dev-1-vol-qa-1/reference/GRCh38_hla_decoy_ebv/GRCh38_hla_decoy_ebv.fa" + }, + "repackSangerResults": { + "cpus": 2, + "mem": 4 + }, + "cavemanVcfFix": { + "cpus": 2, + "mem": 4 + }, + "prepSangerSupplement": { + "cpus": 2, + "mem": 4 + }, + "prepSangerQc": { + "cpus": 2, + "mem": 4 + }, + "extractSangerCall": { + "cpus": 2, + "mem": 4 + }, + "payloadGenVariantCall": { + "cpus": 2, + "mem": 4 + }, + "uploadVariant": { + "cpus": 2, + "mem": 4 + } +} From 1e7c75ccbca36b8074db4b815ed8ea03547f1fd9 Mon Sep 17 00:00:00 2001 From: Linda Xiang Date: Fri, 16 Apr 2021 14:07:00 -0400 Subject: [PATCH 5/5] add example-params --- example-params.json | 59 ++++++++++++++++++ .../tests/local-test-qa.nf.json | 60 +------------------ 2 files changed, 60 insertions(+), 59 deletions(-) create mode 100644 example-params.json mode change 100644 => 120000 sanger-wxs-variant-calling/tests/local-test-qa.nf.json diff --git a/example-params.json b/example-params.json new file mode 100644 index 0000000..05e5116 --- /dev/null +++ b/example-params.json @@ -0,0 +1,59 @@ +{ + "study_id": "TEST-PR", + "tumour_aln_analysis_id": "03905962-f1e1-41e8-9059-62f1e1c1e8fc", + "normal_aln_analysis_id": "9940db0f-c100-496a-80db-0fc100d96ac1", + "max_retries": 3, + "first_retry_wait_time": 5, + "cleanup": true, + "song_url": "https://song.rdpc-qa.cancercollaboratory.org", + "score_url": "https://score.rdpc-qa.cancercollaboratory.org", + "download": { + "song_url": "https://song.rdpc-qa.cancercollaboratory.org", + "song_cpus": 2, + "song_mem": 2, + "score_url": "https://score.rdpc-qa.cancercollaboratory.org", + "score_cpus": 3, + "score_mem": 8 + }, + "sangerWxsVariantCaller": { + "cpus": 4, + "mem": 10, + "exclude": "chr1,chr2,chr3,chr4,chr5,chr6,chr7,chr8,chr9,chr10,chr11,chr12,chr13,chr14,chr15,chr16,chr17,chr18,chr19,chr20,chr22,chrX,chrY,chrUn%,HLA%,%_alt,%_random,chrM,chrEBV", + "vagrent_annot": "/nfs-dev-1-vol-qa-1/reference/sanger-variant-calling/VAGrENT_ref_GRCh38_hla_decoy_ebv_ensembl_91.tar.gz", + "ref_genome_tar": "/nfs-dev-1-vol-qa-1/reference/sanger-variant-calling/core_ref_GRCh38_hla_decoy_ebv.tar.gz", + "ref_snv_indel_tar": "/nfs-dev-1-vol-qa-1/reference/sanger-variant-calling/SNV_INDEL_ref_GRCh38_hla_decoy_ebv-fragment.tar.gz" + }, + "generateBas": { + "cpus":2, + "mem":8, + "ref_genome_fa": "/nfs-dev-1-vol-qa-1/reference/GRCh38_hla_decoy_ebv/GRCh38_hla_decoy_ebv.fa" + }, + "repackSangerResults": { + "cpus": 2, + "mem": 4 + }, + "cavemanVcfFix": { + "cpus": 2, + "mem": 4 + }, + "prepSangerSupplement": { + "cpus": 2, + "mem": 4 + }, + "prepSangerQc": { + "cpus": 2, + "mem": 4 + }, + "extractSangerCall": { + "cpus": 2, + "mem": 4 + }, + "payloadGenVariantCall": { + "cpus": 2, + "mem": 4 + }, + "uploadVariant": { + "cpus": 2, + "mem": 4 + } +} diff --git a/sanger-wxs-variant-calling/tests/local-test-qa.nf.json b/sanger-wxs-variant-calling/tests/local-test-qa.nf.json deleted file mode 100644 index 0a31b5a..0000000 --- a/sanger-wxs-variant-calling/tests/local-test-qa.nf.json +++ /dev/null @@ -1,59 +0,0 @@ -{ - "study_id": "TEST-PR", - "tumour_aln_analysis_id": "03905962-f1e1-41e8-9059-62f1e1c1e8fc", - "normal_aln_analysis_id": "9940db0f-c100-496a-80db-0fc100d96ac1", - "max_retries": 3, - "first_retry_wait_time": 5, - "cleanup": false, - "song_url": "https://song.rdpc-qa.cancercollaboratory.org", - "score_url": "https://score.rdpc-qa.cancercollaboratory.org", - "download": { - "song_url": "https://song.rdpc-qa.cancercollaboratory.org", - "song_cpus": 2, - "song_mem": 2, - "score_url": "https://score.rdpc-qa.cancercollaboratory.org", - "score_cpus": 3, - "score_mem": 8 - }, - "sangerWxsVariantCaller": { - "cpus": 4, - "mem": 10, - "exclude": "chr1,chr2,chr3,chr4,chr5,chr6,chr7,chr8,chr9,chr10,chr11,chr12,chr13,chr14,chr15,chr16,chr17,chr18,chr19,chr20,chr22,chrX,chrY,chrUn%,HLA%,%_alt,%_random,chrM,chrEBV", - "vagrent_annot": "/nfs-dev-1-vol-qa-1/reference/sanger-variant-calling/VAGrENT_ref_GRCh38_hla_decoy_ebv_ensembl_91.tar.gz", - "ref_genome_tar": "/nfs-dev-1-vol-qa-1/reference/sanger-variant-calling/core_ref_GRCh38_hla_decoy_ebv.tar.gz", - "ref_snv_indel_tar": "/nfs-dev-1-vol-qa-1/reference/sanger-variant-calling/SNV_INDEL_ref_GRCh38_hla_decoy_ebv-fragment.tar.gz" - }, - "generateBas": { - "cpus":2, - "mem":8, - "ref_genome_fa": "/nfs-dev-1-vol-qa-1/reference/GRCh38_hla_decoy_ebv/GRCh38_hla_decoy_ebv.fa" - }, - "repackSangerResults": { - "cpus": 2, - "mem": 4 - }, - "cavemanVcfFix": { - "cpus": 2, - "mem": 4 - }, - "prepSangerSupplement": { - "cpus": 2, - "mem": 4 - }, - "prepSangerQc": { - "cpus": 2, - "mem": 4 - }, - "extractSangerCall": { - "cpus": 2, - "mem": 4 - }, - "payloadGenVariantCall": { - "cpus": 2, - "mem": 4 - }, - "uploadVariant": { - "cpus": 2, - "mem": 4 - } -} diff --git a/sanger-wxs-variant-calling/tests/local-test-qa.nf.json b/sanger-wxs-variant-calling/tests/local-test-qa.nf.json new file mode 120000 index 0000000..4d32cdc --- /dev/null +++ b/sanger-wxs-variant-calling/tests/local-test-qa.nf.json @@ -0,0 +1 @@ +../../example-params.json \ No newline at end of file