" }.join("\n") %>
diff --git a/assets/email_template.txt b/assets/email_template.txt
index 9b78bb18..9cf77902 100644
--- a/assets/email_template.txt
+++ b/assets/email_template.txt
@@ -6,7 +6,6 @@
`._,._,'
nf-core/viralrecon v${version}
----------------------------------------------------
-
Run Name: $runName
<% if (!success){
diff --git a/assets/methods_description_template.yml b/assets/methods_description_template.yml
new file mode 100644
index 00000000..056f5bfb
--- /dev/null
+++ b/assets/methods_description_template.yml
@@ -0,0 +1,25 @@
+id: "nf-core-viralrecon-methods-description"
+description: "Suggested text and references to use when describing pipeline usage within the methods section of a publication."
+section_name: "nf-core/viralrecon Methods Description"
+section_href: "https://github.com/nf-core/viralrecon"
+plot_type: "html"
+## TODO nf-core: Update the HTML below to your prefered methods description, e.g. add publication citation for this pipeline
+## You inject any metadata in the Nextflow '${workflow}' object
+data: |
+
Methods
+
Data was processed using nf-core/viralrecon v${workflow.manifest.version} ${doi_text} of the nf-core collection of workflows (Ewels et al., 2020).
+
The pipeline was executed with Nextflow v${workflow.nextflow.version} (Di Tommaso et al., 2017) with the following command:
+
${workflow.commandLine}
+
References
+
+
Di Tommaso, P., Chatzou, M., Floden, E. W., Barja, P. P., Palumbo, E., & Notredame, C. (2017). Nextflow enables reproducible computational workflows. Nature Biotechnology, 35(4), 316-319. https://doi.org/10.1038/nbt.3820
+
Ewels, P. A., Peltzer, A., Fillinger, S., Patel, H., Alneberg, J., Wilm, A., Garcia, M. U., Di Tommaso, P., & Nahnsen, S. (2020). The nf-core framework for community-curated bioinformatics pipelines. Nature Biotechnology, 38(3), 276-278. https://doi.org/10.1038/s41587-020-0439-x
+
+
+
Notes:
+
+ ${nodoi_text}
+
The command above does not include parameters contained in any configs or profiles that may have been used. Ensure the config file is also uploaded with your publication!
+
You should also cite all software used within this run. Check the "Software Versions" of this report to get version information.
+
+
diff --git a/assets/multiqc_config_illumina.yml b/assets/multiqc_config_illumina.yml
index 1979a953..2f7b84c2 100644
--- a/assets/multiqc_config_illumina.yml
+++ b/assets/multiqc_config_illumina.yml
@@ -101,6 +101,10 @@ module_order:
- "./assembly_minia/*.tsv"
report_section_order:
+ fail_mapped_reads:
+ after: summary_variants_metrics
+ fail_mapped_samples:
+ after: summary_variants_metrics
summary_assembly_metrics:
before: summary_variants_metrics
amplicon_heatmap:
diff --git a/assets/multiqc_config_nanopore.yml b/assets/multiqc_config_nanopore.yml
index 5468d7fd..f4851ee2 100644
--- a/assets/multiqc_config_nanopore.yml
+++ b/assets/multiqc_config_nanopore.yml
@@ -37,6 +37,14 @@ module_order:
- "./quast/*.tsv"
report_section_order:
+ fail_barcodes_no_sample:
+ after: summary_variants_metrics
+ fail_no_barcode_samples:
+ after: summary_variants_metrics
+ fail_barcode_count_samples:
+ after: summary_variants_metrics
+ fail_guppyplex_count_samples:
+ after: summary_variants_metrics
amplicon_heatmap:
before: summary_variants_metrics
software_versions:
diff --git a/assets/slackreport.json b/assets/slackreport.json
new file mode 100644
index 00000000..043d02f2
--- /dev/null
+++ b/assets/slackreport.json
@@ -0,0 +1,34 @@
+{
+ "attachments": [
+ {
+ "fallback": "Plain-text summary of the attachment.",
+ "color": "<% if (success) { %>good<% } else { %>danger<%} %>",
+ "author_name": "sanger-tol/readmapping v${version} - ${runName}",
+ "author_icon": "https://www.nextflow.io/docs/latest/_static/favicon.ico",
+ "text": "<% if (success) { %>Pipeline completed successfully!<% } else { %>Pipeline completed with errors<% } %>",
+ "fields": [
+ {
+ "title": "Command used to launch the workflow",
+ "value": "```${commandLine}```",
+ "short": false
+ }
+ <%
+ if (!success) { %>
+ ,
+ {
+ "title": "Full error message",
+ "value": "```${errorReport}```",
+ "short": false
+ },
+ {
+ "title": "Pipeline configuration",
+ "value": "<% out << summary.collect{ k,v -> k == "hook_url" ? "_${k}_: (_hidden_)" : ( ( v.class.toString().contains('Path') || ( v.class.toString().contains('String') && v.contains('/') ) ) ? "_${k}_: `${v}`" : (v.class.toString().contains('DateTime') ? ("_${k}_: " + v.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM))) : "_${k}_: ${v}") ) }.join(",\n") %>",
+ "short": false
+ }
+ <% }
+ %>
+ ],
+ "footer": "Completed at <% out << dateComplete.format(java.time.format.DateTimeFormatter.ofLocalizedDateTime(java.time.format.FormatStyle.MEDIUM)) %> (duration: ${duration})"
+ }
+ ]
+}
diff --git a/bin/check_samplesheet.py b/bin/check_samplesheet.py
index 261ddb95..f866fd86 100755
--- a/bin/check_samplesheet.py
+++ b/bin/check_samplesheet.py
@@ -58,17 +58,12 @@ def check_illumina_samplesheet(file_in, file_out):
sample_mapping_dict = {}
with open(file_in, "r") as fin:
-
## Check header
MIN_COLS = 2
HEADER = ["sample", "fastq_1", "fastq_2"]
header = [x.strip('"') for x in fin.readline().strip().split(",")]
if header[: len(HEADER)] != HEADER:
- print(
- "ERROR: Please check samplesheet header -> {} != {}".format(
- ",".join(header), ",".join(HEADER)
- )
- )
+ print("ERROR: Please check samplesheet header -> {} != {}".format(",".join(header), ",".join(HEADER)))
sys.exit(1)
## Check sample entries
@@ -85,9 +80,7 @@ def check_illumina_samplesheet(file_in, file_out):
num_cols = len([x for x in lspl if x])
if num_cols < MIN_COLS:
print_error(
- "Invalid number of populated columns (minimum = {})!".format(
- MIN_COLS
- ),
+ "Invalid number of populated columns (minimum = {})!".format(MIN_COLS),
"Line",
line,
)
@@ -95,9 +88,7 @@ def check_illumina_samplesheet(file_in, file_out):
## Check sample name entries
sample, fastq_1, fastq_2 = lspl[: len(HEADER)]
if sample.find(" ") != -1:
- print(
- f"WARNING: Spaces have been replaced by underscores for sample: {sample}"
- )
+ print(f"WARNING: Spaces have been replaced by underscores for sample: {sample}")
sample = sample.replace(" ", "_")
if not sample:
print_error("Sample entry has not been specified!", "Line", line)
@@ -139,21 +130,15 @@ def check_illumina_samplesheet(file_in, file_out):
with open(file_out, "w") as fout:
fout.write(",".join(["sample", "single_end", "fastq_1", "fastq_2"]) + "\n")
for sample in sorted(sample_mapping_dict.keys()):
-
## Check that multiple runs of the same sample are of the same datatype
- if not all(
- x[0] == sample_mapping_dict[sample][0][0]
- for x in sample_mapping_dict[sample]
- ):
+ if not all(x[0] == sample_mapping_dict[sample][0][0] for x in sample_mapping_dict[sample]):
print_error(
"Multiple runs of a sample must be of the same datatype!",
"Sample: {}".format(sample),
)
for idx, val in enumerate(sample_mapping_dict[sample]):
- fout.write(
- ",".join(["{}_T{}".format(sample, idx + 1)] + val) + "\n"
- )
+ fout.write(",".join(["{}_T{}".format(sample, idx + 1)] + val) + "\n")
else:
print_error("No entries to process!", "Samplesheet: {}".format(file_in))
@@ -173,17 +158,12 @@ def check_nanopore_samplesheet(file_in, file_out):
sample_mapping_dict = {}
with open(file_in, "r") as fin:
-
## Check header
MIN_COLS = 2
HEADER = ["sample", "barcode"]
header = [x.strip('"') for x in fin.readline().strip().split(",")]
if header[: len(HEADER)] != HEADER:
- print(
- "ERROR: Please check samplesheet header -> {} != {}".format(
- ",".join(header), ",".join(HEADER)
- )
- )
+ print("ERROR: Please check samplesheet header -> {} != {}".format(",".join(header), ",".join(HEADER)))
sys.exit(1)
## Check sample entries
@@ -200,9 +180,7 @@ def check_nanopore_samplesheet(file_in, file_out):
num_cols = len([x for x in lspl if x])
if num_cols < MIN_COLS:
print_error(
- "Invalid number of populated columns (minimum = {})!".format(
- MIN_COLS
- ),
+ "Invalid number of populated columns (minimum = {})!".format(MIN_COLS),
"Line",
line,
)
@@ -210,14 +188,10 @@ def check_nanopore_samplesheet(file_in, file_out):
## Check sample entry
sample, barcode = lspl[: len(HEADER)]
if sample.find(" ") != -1:
- print(
- f"WARNING: Spaces have been replaced by underscores for sample: {sample}"
- )
+ print(f"WARNING: Spaces have been replaced by underscores for sample: {sample}")
sample = sample.replace(" ", "_")
if sample.find("-") != -1:
- print(
- f"WARNING: Dashes have been replaced by underscores for sample: {sample}"
- )
+ print(f"WARNING: Dashes have been replaced by underscores for sample: {sample}")
sample = sample.replace("-", "_")
if not sample:
print_error("Sample entry has not been specified!", "Line", line)
@@ -274,4 +248,4 @@ def main(args=None):
if __name__ == "__main__":
- sys.exit(main())
\ No newline at end of file
+ sys.exit(main())
diff --git a/bin/collapse_primer_bed.py b/bin/collapse_primer_bed.py
index 7a439f36..d04d7744 100755
--- a/bin/collapse_primer_bed.py
+++ b/bin/collapse_primer_bed.py
@@ -56,9 +56,7 @@ def collapse_primer_bed(file_in, file_out, left_primer_suffix, right_primer_suff
line = fin.readline()
if line:
chrom, start, end, name, score, strand = line.strip().split("\t")
- primer = re.sub(
- r"(?:{}|{}).*".format(left_primer_suffix, right_primer_suffix), "", name
- )
+ primer = re.sub(r"(?:{}|{}).*".format(left_primer_suffix, right_primer_suffix), "", name)
if primer not in interval_dict:
interval_dict[primer] = []
interval_dict[primer].append((chrom, int(start), int(end), score))
@@ -81,9 +79,7 @@ def collapse_primer_bed(file_in, file_out, left_primer_suffix, right_primer_suff
def main(args=None):
args = parse_args(args)
- collapse_primer_bed(
- args.FILE_IN, args.FILE_OUT, args.LEFT_PRIMER_SUFFIX, args.RIGHT_PRIMER_SUFFIX
- )
+ collapse_primer_bed(args.FILE_IN, args.FILE_OUT, args.LEFT_PRIMER_SUFFIX, args.RIGHT_PRIMER_SUFFIX)
if __name__ == "__main__":
diff --git a/bin/fastq_dir_to_samplesheet.py b/bin/fastq_dir_to_samplesheet.py
index f56d5a00..b2e08eed 100755
--- a/bin/fastq_dir_to_samplesheet.py
+++ b/bin/fastq_dir_to_samplesheet.py
@@ -7,9 +7,7 @@
def parse_args(args=None):
- Description = (
- "Generate nf-core/viralrecon samplesheet from a directory of FastQ files."
- )
+ Description = "Generate nf-core/viralrecon samplesheet from a directory of FastQ files."
Epilog = "Example usage: python fastq_dir_to_samplesheet.py "
parser = argparse.ArgumentParser(description=Description, epilog=Epilog)
@@ -79,9 +77,7 @@ def sanitize_sample(path, extension):
sample = os.path.basename(path).replace(extension, "")
if sanitise_name:
sample = sanitise_name_delimiter.join(
- os.path.basename(path).split(sanitise_name_delimiter)[
- :sanitise_name_index
- ]
+ os.path.basename(path).split(sanitise_name_delimiter)[:sanitise_name_index]
)
return sample
@@ -92,9 +88,7 @@ def get_fastqs(extension):
sorted results.
See also https://stackoverflow.com/questions/6773584/how-is-pythons-glob-glob-ordered
"""
- return sorted(
- glob.glob(os.path.join(fastq_dir, f"*{extension}"), recursive=False)
- )
+ return sorted(glob.glob(os.path.join(fastq_dir, f"*{extension}"), recursive=False))
read_dict = {}
@@ -128,9 +122,7 @@ def get_fastqs(extension):
sample_info = ",".join([sample, read_1, read_2])
fout.write(f"{sample_info}\n")
else:
- error_str = (
- "\nWARNING: No FastQ files found so samplesheet has not been created!\n\n"
- )
+ error_str = "\nWARNING: No FastQ files found so samplesheet has not been created!\n\n"
error_str += "Please check the values provided for the:\n"
error_str += " - Path to the directory containing the FastQ files\n"
error_str += " - '--read1_extension' parameter\n"
diff --git a/bin/fetch_sra_runinfo.py b/bin/fetch_sra_runinfo.py
deleted file mode 100755
index 378c1745..00000000
--- a/bin/fetch_sra_runinfo.py
+++ /dev/null
@@ -1,259 +0,0 @@
-#!/usr/bin/env python
-
-import os
-import re
-import sys
-import csv
-import errno
-import requests
-import argparse
-
-
-## Example ids supported by this script
-SRA_IDS = [
- "PRJNA63463",
- "SAMN00765663",
- "SRA023522",
- "SRP003255",
- "SRR390278",
- "SRS282569",
- "SRX111814",
-]
-ENA_IDS = [
- "ERA2421642",
- "ERP120836",
- "ERR674736",
- "ERS4399631",
- "ERX629702",
- "PRJEB7743",
- "SAMEA3121481",
-]
-GEO_IDS = ["GSE18729", "GSM465244"]
-ID_REGEX = r"^[A-Z]+"
-PREFIX_LIST = sorted(
- list(set([re.search(ID_REGEX, x).group() for x in SRA_IDS + ENA_IDS + GEO_IDS]))
-)
-
-
-def parse_args(args=None):
- Description = "Download and create a run information metadata file from SRA/ENA/GEO identifiers."
- Epilog = """Example usage: python fetch_sra_runinfo.py """
-
- parser = argparse.ArgumentParser(description=Description, epilog=Epilog)
- parser.add_argument(
- "FILE_IN", help="File containing database identifiers, one per line."
- )
- parser.add_argument("FILE_OUT", help="Output file in tab-delimited format.")
- parser.add_argument(
- "-pl",
- "--platform",
- type=str,
- dest="PLATFORM",
- default="",
- help="Comma-separated list of platforms to use for filtering. Accepted values = 'ILLUMINA', 'OXFORD_NANOPORE' (default: '').",
- )
- parser.add_argument(
- "-ll",
- "--library_layout",
- type=str,
- dest="LIBRARY_LAYOUT",
- default="",
- help="Comma-separated list of library layouts to use for filtering. Accepted values = 'SINGLE', 'PAIRED' (default: '').",
- )
- return parser.parse_args(args)
-
-
-def validate_csv_param(param, validVals, param_desc):
- validList = []
- if param:
- userVals = param.split(",")
- intersect = list(set(userVals) & set(validVals))
- if len(intersect) == len(userVals):
- validList = intersect
- else:
- print(
- "ERROR: Please provide a valid {} parameter!\nProvided values = {}\nAccepted values = {}".format(
- param_desc, param, ",".join(validVals)
- )
- )
- sys.exit(1)
- return validList
-
-
-def make_dir(path):
- if not len(path) == 0:
- try:
- os.makedirs(path)
- except OSError as exception:
- if exception.errno != errno.EEXIST:
- raise
-
-
-def fetch_url(url, encoding="utf-8"):
- try:
- r = requests.get(url)
- except requests.exceptions.RequestException as e:
- raise SystemExit(e)
- if r.status_code != 200:
- print("ERROR: Connection failed\nError code '{}'".format(r.status_code))
- sys.exit(1)
- return r.content.decode(encoding).splitlines()
-
-
-def id_to_srx(db_id):
- ids = []
- url = "https://trace.ncbi.nlm.nih.gov/Traces/sra/sra.cgi?save=efetch&db=sra&rettype=runinfo&term={}".format(
- db_id
- )
- for row in csv.DictReader(fetch_url(url), delimiter=","):
- ids.append(row["Experiment"])
- return ids
-
-
-def id_to_erx(db_id):
- ids = []
- fields = ["run_accession", "experiment_accession"]
- url = "http://www.ebi.ac.uk/ena/data/warehouse/filereport?accession={}&result=read_run&fields={}".format(
- db_id, ",".join(fields)
- )
- for row in csv.DictReader(fetch_url(url), delimiter="\t"):
- ids.append(row["experiment_accession"])
- return ids
-
-
-def gse_to_srx(db_id):
- ids = []
- url = "https://www.ncbi.nlm.nih.gov/geo/query/acc.cgi?acc={}&targ=gsm&view=data&form=text".format(
- db_id
- )
- gsm_ids = [x.split("=")[1].strip() for x in fetch_url(url) if x.find("GSM") != -1]
- for gsm_id in gsm_ids:
- ids += id_to_srx(gsm_id)
- return ids
-
-
-def get_ena_fields():
- fields = []
- url = "https://www.ebi.ac.uk/ena/portal/api/returnFields?dataPortal=ena&format=tsv&result=read_run"
- for row in csv.DictReader(fetch_url(url), delimiter="\t"):
- fields.append(row["columnId"])
- return fields
-
-
-def fetch_sra_runinfo(FileIn, FileOut, platformList=[], libraryLayoutList=[]):
- total_out = 0
- seen_ids = []
- run_ids = []
- header = []
- make_dir(os.path.dirname(FileOut))
- ena_fields = get_ena_fields()
- fin = open(FileIn, "r")
- fout = open(FileOut, "w")
- while True:
- line = fin.readline()
- if line:
- db_id = line.strip()
- match = re.search(ID_REGEX, db_id)
- if match:
- prefix = match.group()
- if prefix in PREFIX_LIST:
- if not db_id in seen_ids:
-
- ids = [db_id]
- ## Resolve/expand these ids against GEO URL
- if prefix in ["GSE"]:
- ids = gse_to_srx(db_id)
-
- ## Resolve/expand these ids against SRA URL
- elif prefix in ["GSM", "PRJNA", "SAMN", "SRR"]:
- ids = id_to_srx(db_id)
-
- ## Resolve/expand these ids against ENA URL
- elif prefix in ["ERR"]:
- ids = id_to_erx(db_id)
-
- ## Resolve/expand to get run identifier from ENA and write to file
- for id in ids:
- url = "http://www.ebi.ac.uk/ena/data/warehouse/filereport?accession={}&result=read_run&fields={}".format(
- id, ",".join(ena_fields)
- )
- csv_dict = csv.DictReader(fetch_url(url), delimiter="\t")
- for row in csv_dict:
- run_id = row["run_accession"]
- if not run_id in run_ids:
-
- writeID = True
- if platformList:
- if (
- row["instrument_platform"]
- not in platformList
- ):
- writeID = False
- if libraryLayoutList:
- if (
- row["library_layout"]
- not in libraryLayoutList
- ):
- writeID = False
-
- if writeID:
- if total_out == 0:
- header = sorted(row.keys())
- fout.write(
- "{}\n".format("\t".join(sorted(header)))
- )
- else:
- if header != sorted(row.keys()):
- print(
- "ERROR: Metadata columns do not match for id {}!\nLine: '{}'".format(
- run_id, line.strip()
- )
- )
- sys.exit(1)
- fout.write(
- "{}\n".format(
- "\t".join([row[x] for x in header])
- )
- )
- total_out += 1
- run_ids.append(run_id)
- seen_ids.append(db_id)
- else:
- id_str = ", ".join([x + "*" for x in PREFIX_LIST])
- print(
- "ERROR: Please provide a valid database id starting with {}!\nLine: '{}'".format(
- id_str, line.strip()
- )
- )
- sys.exit(1)
- else:
- id_str = ", ".join([x + "*" for x in PREFIX_LIST])
- print(
- "ERROR: Please provide a valid database id starting with {}!\nLine: '{}'".format(
- id_str, line.strip()
- )
- )
- sys.exit(1)
- else:
- break
- fin.close()
- fout.close()
-
-
-def main(args=None):
- args = parse_args(args)
- platformList = validate_csv_param(
- args.PLATFORM,
- validVals=["ILLUMINA", "OXFORD_NANOPORE"],
- param_desc="--platform",
- )
- libraryLayoutList = validate_csv_param(
- args.LIBRARY_LAYOUT,
- validVals=["SINGLE", "PAIRED"],
- param_desc="--library_layout",
- )
- fetch_sra_runinfo(args.FILE_IN, args.FILE_OUT, platformList, libraryLayoutList)
-
-
-if __name__ == "__main__":
- sys.exit(main())
diff --git a/bin/ivar_variants_to_vcf.py b/bin/ivar_variants_to_vcf.py
index a900f502..d00b3988 100755
--- a/bin/ivar_variants_to_vcf.py
+++ b/bin/ivar_variants_to_vcf.py
@@ -1,5 +1,6 @@
#!/usr/bin/env python
+from email.charset import QP
import os
import sys
import re
@@ -82,8 +83,8 @@ def parse_ivar_line(line):
return:
CHROM, POS, ID, REF, ALT, QUAL, INFO, FORMAT, REF_CODON, ALT_CODON, pass_test, var_type
"""
- line = re.split("\t", line)
+ line = line.strip("\n").split("\t")
## Assign intial fields to variables
CHROM = line[0]
POS = line[1]
@@ -92,7 +93,12 @@ def parse_ivar_line(line):
ALT = line[3]
## REF/ALF depths and quals
- REF_DP = int(line[4])
+ try:
+ REF_DP = int(line[4])
+ except ValueError:
+ print(line)
+ print(line[4])
+ exit(-1)
REF_RV = int(line[5])
REF_FW = REF_DP - REF_RV
REF_QUAL = int(line[6])
@@ -120,7 +126,7 @@ def parse_ivar_line(line):
QUAL = "."
## Determine FILTER field
- INFO = f"DP={line[11]}"
+ INFO = f"DP={int(float(line[11]))}"
pass_test = line[13]
return (
@@ -172,9 +178,7 @@ def strand_bias_filter(format):
# table:
## REF_FW REF_RV
## ALT_FW ALT_RV
- table = np.array(
- [[format[0] - format[1], format[1]], [format[3] - format[4], format[4]]]
- )
+ table = np.array([[format[0] - format[1], format[1]], [format[3] - format[4], format[4]]])
oddsr, pvalue = fisher_exact(table, alternative="greater")
# h0: both strands are equally represented.
@@ -202,9 +206,7 @@ def write_vcf_header(ref, ignore_strand_bias, file_out, filename):
if ref:
header_contig = []
for record in SeqIO.parse(ref, "fasta"):
- header_contig += [
- "##contig="
- ]
+ header_contig += ["##contig="]
header_source += header_contig
@@ -225,9 +227,7 @@ def write_vcf_header(ref, ignore_strand_bias, file_out, filename):
]
header_cols = [f"#CHROM\tPOS\tID\tREF\tALT\tQUAL\tFILTER\tINFO\tFORMAT\t{filename}"]
if not ignore_strand_bias:
- header_filter += [
- '##FILTER='
- ]
+ header_filter += ['##FILTER=']
header = header_source + header_info + header_filter + header_format + header_cols
fout = open(file_out, "w")
@@ -289,7 +289,8 @@ def check_consecutive(mylist):
return:
Number of items consecutive in the list - [False, 2, 3,..]
"""
- my_list = list(map(int, mylist))
+ # getting first index of tuple for consecutive checking
+ my_list = list(map(int, [i[0] for i in mylist]))
## Check if the list contains consecutive numbers
if len(my_list) == 1:
return False
@@ -316,8 +317,9 @@ def get_diff_position(seq1, seq2):
Returns:
Returns position where seq1 != seq2
"""
+ # If codon is NA treat as not same codon
if seq1 == "NA":
- return False
+ return 2
ind_diff = [i for i in range(len(seq1)) if seq1[i] != seq2[i]]
if len(ind_diff) > 1:
@@ -380,9 +382,7 @@ def process_variants(variants, num_collapse):
"""
# Collapsed variant parameters equal to first variant
key_list = ["chrom", "pos", "id", "qual", "filter", "info", "format"]
- chrom, pos, id, qual, filter, info, format = [
- variants[next(iter(variants))][key] for key in key_list
- ]
+ chrom, pos, id, qual, filter, info, format = [variants[next(iter(variants))][key] for key in key_list]
# If no consecutive, process one variant line
# If two consecutive, process two variant lines into one
@@ -390,7 +390,7 @@ def process_variants(variants, num_collapse):
ref = ""
alt = ""
iter_variants = iter(variants)
- for i in range(num_collapse):
+ for _ in range(num_collapse): # fixed notation
var = next(iter_variants)
ref += variants[var]["ref"]
alt += variants[var]["alt"]
@@ -409,6 +409,7 @@ def main(args=None):
var_count_dict = {"SNP": 0, "INS": 0, "DEL": 0} # variant counts
variants = OrderedDict() # variant dict (merge codon)
q_pos = deque([], maxlen=3) # pos fifo queue (merge codon)
+ last_pos = ""
# Create output directory
make_dir(out_dir)
@@ -423,8 +424,7 @@ def main(args=None):
#################################
with open(args.file_in, "r") as fin:
for line in fin:
- if not re.match("REGION", line):
-
+ if "REGION" not in line:
################
## Parse line ##
################
@@ -445,6 +445,12 @@ def main(args=None):
pass_test,
var_type,
) = parse_ivar_line(line)
+
+ ## If pos is duplicated due to annotation skip lines
+ if pos == last_pos:
+ continue
+
+ last_pos = pos
#####################
## Process filters ##
#####################
@@ -469,10 +475,7 @@ def main(args=None):
if args.pass_only and filter != "PASS":
write_line = False
### AF filtering. ALT_DP/(ALT_DP+REF_DP)
- if (
- float(format[3] / (format[0] + format[3]))
- < args.allele_freq_threshold
- ):
+ if float(format[3] / (format[0] + format[3])) < args.allele_freq_threshold:
write_line = False
### Duplication filter
if (chrom, pos, ref, alt) in var_list:
@@ -486,7 +489,7 @@ def main(args=None):
############################################################
if not args.ignore_merge_codons and var_type == "SNP":
## re-fill queue and dict accordingly
- q_pos.append(pos)
+ q_pos.append((pos, var_type)) # adding type information
variants[(chrom, pos, ref, alt)] = {
"chrom": chrom,
"pos": pos,
@@ -504,9 +507,7 @@ def main(args=None):
if len(q_pos) == q_pos.maxlen:
fe_codon_ref = variants[next(iter(variants))]["ref_codon"]
fe_codon_alt = variants[next(iter(variants))]["alt_codon"]
- num_collapse = check_merge_codons(
- q_pos, fe_codon_ref, fe_codon_alt
- )
+ num_collapse = check_merge_codons(q_pos, fe_codon_ref, fe_codon_alt)
(
chrom,
pos,
@@ -520,7 +521,7 @@ def main(args=None):
) = process_variants(variants, num_collapse)
## Empty variants dict and queue accordingly
- for i in range(num_collapse):
+ for _ in range(num_collapse):
variants.popitem(last=False)
q_pos.popleft()
else:
@@ -549,28 +550,44 @@ def main(args=None):
## handle last lines ##
#######################
while len(q_pos) > 0:
- fe_codon_ref = variants[next(iter(variants))]["ref_codon"]
- fe_codon_alt = variants[next(iter(variants))]["alt_codon"]
- num_collapse = check_merge_codons(q_pos, fe_codon_ref, fe_codon_alt)
- (chrom, pos, id, ref, alt, qual, filter, info, format) = process_variants(
- variants, num_collapse
- )
-
- var_count_dict[var_type] += 1
- write_vcf_line(
- chrom, pos, id, ref, alt, filter, qual, info, format, args.file_out
- )
- ## Empty variants dict and queue accordingly
- for i in range(num_collapse):
- variants.popitem(last=False)
- q_pos.popleft()
+ try:
+ fe_codon_ref = variants[next(iter(variants))]["ref_codon"]
+ fe_codon_alt = variants[next(iter(variants))]["alt_codon"]
+ except StopIteration:
+ break
+ else:
+ num_collapse = check_merge_codons(q_pos, fe_codon_ref, fe_codon_alt)
+ (chrom, pos, id, ref, alt, qual, filter, info, format) = process_variants(variants, num_collapse)
+
+ var_count_dict[q_pos[0][1]] += 1
+ write_vcf_line(chrom, pos, id, ref, alt, filter, qual, info, format, args.file_out)
+ ## Empty variants dict and queue accordingly
+ for _ in range(num_collapse):
+ variants.popitem(last=False)
+ q_pos.popleft()
#############################################
## variant counts to pass to MultiQC ##
#############################################
var_count_list = [(k, str(v)) for k, v in sorted(var_count_dict.items())]
- print("\t".join(["sample"] + [x[0] for x in var_count_list]))
- print("\t".join([filename] + [x[1] for x in var_count_list]))
+
+ # format output table a little more cleanly
+ # row_spacing = len(filename)
+
+ row = create_f_string(30, "<") # an arbitraily long value to fit most sample names
+ row += create_f_string(10) * len(var_count_list) # A spacing of ten looks pretty
+
+ headers = ["sample"]
+ headers.extend([x[0] for x in var_count_list])
+ data = [filename]
+ data.extend([x[1] for x in var_count_list])
+ print(row.format(*headers))
+ print(row.format(*data))
+
+
+def create_f_string(str_size, placement="^"):
+ row_size = "{: " + placement + str(str_size) + "}"
+ return row_size
if __name__ == "__main__":
diff --git a/bin/make_bed_mask.py b/bin/make_bed_mask.py
index 46e06bed..29b07a27 100755
--- a/bin/make_bed_mask.py
+++ b/bin/make_bed_mask.py
@@ -49,10 +49,10 @@ def make_bed_mask(bed_in, bed_out, indels_pos_len):
for position in indels_positions:
indel_init_pos = position
indel_whole_length = indels_pos_len[position]
- indel_end_pos = int(indel_init_pos) + int(indel_whole_length)-1
- if int(init_pos) in range(
+ indel_end_pos = int(indel_init_pos) + int(indel_whole_length) - 1
+ if int(init_pos) in range(int(indel_init_pos), int(indel_end_pos)) or int(end_pos) in range(
int(indel_init_pos), int(indel_end_pos)
- ) or int(end_pos) in range(int(indel_init_pos), int(indel_end_pos)):
+ ):
test = False
break
else:
diff --git a/bin/make_variants_long_table.py b/bin/make_variants_long_table.py
index a19c495f..f0bad221 100755
--- a/bin/make_variants_long_table.py
+++ b/bin/make_variants_long_table.py
@@ -14,22 +14,66 @@
logger = logging.getLogger()
-pd.set_option('display.max_columns', None)
-pd.set_option('display.max_rows', None)
+pd.set_option("display.max_columns", None)
+pd.set_option("display.max_rows", None)
def parser_args(args=None):
- Description = 'Create long/wide tables containing variant information.'
+ Description = "Create long/wide tables containing variant information."
Epilog = """Example usage: python make_variants_long_table.py --bcftools_query_dir ./bcftools_query/ --snpsift_dir ./snpsift/ --pangolin_dir ./pangolin/"""
parser = argparse.ArgumentParser(description=Description, epilog=Epilog)
- parser.add_argument("-bd", "--bcftools_query_dir" , type=str, default="./bcftools_query" , help="Directory containing output of BCFTools query for each sample (default: './bcftools_query').")
- parser.add_argument("-sd", "--snpsift_dir" , type=str, default="./snpsift" , help="Directory containing output of SnpSift for each sample (default: './snpsift').")
- parser.add_argument("-pd", "--pangolin_dir" , type=str, default="./pangolin" , help="Directory containing output of Pangolin for each sample (default: './pangolin').")
- parser.add_argument("-bs", "--bcftools_file_suffix", type=str, default=".bcftools_query.txt" , help="Suffix to trim off BCFTools query file name to obtain sample name (default: '.bcftools_query.txt').")
- parser.add_argument("-ss", "--snpsift_file_suffix" , type=str, default=".snpsift.txt" , help="Suffix to trim off SnpSift file name to obtain sample name (default: '.snpsift.txt').")
- parser.add_argument("-ps", "--pangolin_file_suffix", type=str, default=".pangolin.csv" , help="Suffix to trim off Pangolin file name to obtain sample name (default: '.pangolin.csv').")
- parser.add_argument("-of", "--output_file" , type=str, default="variants_long_table.csv", help="Full path to output file (default: 'variants_long_table.csv').")
- parser.add_argument("-vc", "--variant_caller" , type=str, default="ivar" , help="Tool used to call the variants (default: 'ivar').")
+ parser.add_argument(
+ "-bd",
+ "--bcftools_query_dir",
+ type=str,
+ default="./bcftools_query",
+ help="Directory containing output of BCFTools query for each sample (default: './bcftools_query').",
+ )
+ parser.add_argument(
+ "-sd",
+ "--snpsift_dir",
+ type=str,
+ default="./snpsift",
+ help="Directory containing output of SnpSift for each sample (default: './snpsift').",
+ )
+ parser.add_argument(
+ "-pd",
+ "--pangolin_dir",
+ type=str,
+ default="./pangolin",
+ help="Directory containing output of Pangolin for each sample (default: './pangolin').",
+ )
+ parser.add_argument(
+ "-bs",
+ "--bcftools_file_suffix",
+ type=str,
+ default=".bcftools_query.txt",
+ help="Suffix to trim off BCFTools query file name to obtain sample name (default: '.bcftools_query.txt').",
+ )
+ parser.add_argument(
+ "-ss",
+ "--snpsift_file_suffix",
+ type=str,
+ default=".snpsift.txt",
+ help="Suffix to trim off SnpSift file name to obtain sample name (default: '.snpsift.txt').",
+ )
+ parser.add_argument(
+ "-ps",
+ "--pangolin_file_suffix",
+ type=str,
+ default=".pangolin.csv",
+ help="Suffix to trim off Pangolin file name to obtain sample name (default: '.pangolin.csv').",
+ )
+ parser.add_argument(
+ "-of",
+ "--output_file",
+ type=str,
+ default="variants_long_table.csv",
+ help="Full path to output file (default: 'variants_long_table.csv').",
+ )
+ parser.add_argument(
+ "-vc", "--variant_caller", type=str, default="ivar", help="Tool used to call the variants (default: 'ivar')."
+ )
return parser.parse_args(args)
@@ -43,113 +87,138 @@ def make_dir(path):
def get_file_dict(file_dir, file_suffix):
- files = glob.glob(os.path.join(file_dir, f'*{file_suffix}'))
- samples = [os.path.basename(x).removesuffix(f'{file_suffix}') for x in files]
+ files = glob.glob(os.path.join(file_dir, f"*{file_suffix}"))
+ samples = [os.path.basename(x).removesuffix(f"{file_suffix}") for x in files]
return dict(zip(samples, files))
def three_letter_aa_to_one(hgvs_three):
- aa_dict= {
- 'Ala': 'A', 'Arg': 'R', 'Asn': 'N', 'Asp': 'D', 'Cys': 'C',
- 'Gln': 'Q', 'Glu': 'E', 'Gly': 'G', 'His': 'H', 'Ile': 'I',
- 'Leu': 'L', 'Lys': 'K', 'Met': 'M', 'Phe': 'F', 'Pro': 'P',
- 'Pyl': 'O', 'Ser': 'S', 'Sec': 'U', 'Thr': 'T', 'Trp': 'W',
- 'Tyr': 'Y', 'Val': 'V', 'Asx': 'B', 'Glx': 'Z', 'Xaa': 'X',
- 'Xle': 'J', 'Ter': '*'
+ aa_dict = {
+ "Ala": "A",
+ "Arg": "R",
+ "Asn": "N",
+ "Asp": "D",
+ "Cys": "C",
+ "Gln": "Q",
+ "Glu": "E",
+ "Gly": "G",
+ "His": "H",
+ "Ile": "I",
+ "Leu": "L",
+ "Lys": "K",
+ "Met": "M",
+ "Phe": "F",
+ "Pro": "P",
+ "Pyl": "O",
+ "Ser": "S",
+ "Sec": "U",
+ "Thr": "T",
+ "Trp": "W",
+ "Tyr": "Y",
+ "Val": "V",
+ "Asx": "B",
+ "Glx": "Z",
+ "Xaa": "X",
+ "Xle": "J",
+ "Ter": "*",
}
hgvs_one = hgvs_three
for key in aa_dict:
if key in hgvs_one:
- hgvs_one = hgvs_one.replace(str(key),str(aa_dict[key]))
+ hgvs_one = hgvs_one.replace(str(key), str(aa_dict[key]))
return hgvs_one
## Returns a pandas dataframe in the format:
- # CHROM POS REF ALT FILTER DP REF_DP ALT_DP AF
- # 0 MN908947.3 241 C T PASS 642 375 266 0.41
- # 1 MN908947.3 1875 C T PASS 99 63 34 0.34
+# CHROM POS REF ALT FILTER DP REF_DP ALT_DP AF
+# 0 MN908947.3 241 C T PASS 642 375 266 0.41
+# 1 MN908947.3 1875 C T PASS 99 63 34 0.34
def ivar_bcftools_query_to_table(bcftools_query_file):
- table = pd.read_table(bcftools_query_file, header='infer')
- table = table.dropna(how='all', axis=1)
+ table = pd.read_table(bcftools_query_file, header="infer")
+ table = table.dropna(how="all", axis=1)
old_colnames = list(table.columns)
- new_colnames = [x.split(']')[-1].split(':')[-1] for x in old_colnames]
+ new_colnames = [x.split("]")[-1].split(":")[-1] for x in old_colnames]
table.rename(columns=dict(zip(old_colnames, new_colnames)), inplace=True)
if not table.empty:
table[["ALT_DP", "DP"]] = table[["ALT_DP", "DP"]].apply(pd.to_numeric)
- table['AF'] = table['ALT_DP'] / table['DP']
- table['AF'] = table['AF'].round(2)
+ table["AF"] = table["ALT_DP"] / table["DP"]
+ table["AF"] = table["AF"].round(2)
return table
## Returns a pandas dataframe in the format:
- # CHROM POS REF ALT FILTER DP REF_DP ALT_DP AF
- # 0 MN908947.3 241 C T . 24 8 16 0.67
- # 1 MN908947.3 3037 C T . 17 5 12 0.71
+# CHROM POS REF ALT FILTER DP REF_DP ALT_DP AF
+# 0 MN908947.3 241 C T . 24 8 16 0.67
+# 1 MN908947.3 3037 C T . 17 5 12 0.71
def bcftools_bcftools_query_to_table(bcftools_query_file):
- table = pd.read_table(bcftools_query_file, header='infer')
- table = table.dropna(how='all', axis=1)
+ table = pd.read_table(bcftools_query_file, header="infer")
+ table = table.dropna(how="all", axis=1)
old_colnames = list(table.columns)
- new_colnames = [x.split(']')[-1].split(':')[-1] for x in old_colnames]
+ new_colnames = [x.split("]")[-1].split(":")[-1] for x in old_colnames]
table.rename(columns=dict(zip(old_colnames, new_colnames)), inplace=True)
if not table.empty:
- table[['REF_DP','ALT_DP']] = table['AD'].str.split(',', expand=True)
+ table[["REF_DP", "ALT_DP"]] = table["AD"].str.split(",", expand=True)
table[["ALT_DP", "DP"]] = table[["ALT_DP", "DP"]].apply(pd.to_numeric)
- table['AF'] = table['ALT_DP'] / table['DP']
- table['AF'] = table['AF'].round(2)
- table.drop('AD', axis=1, inplace=True)
+ table["AF"] = table["ALT_DP"] / table["DP"]
+ table["AF"] = table["AF"].round(2)
+ table.drop("AD", axis=1, inplace=True)
return table
## Returns a pandas dataframe in the format:
- # CHROM POS REF ALT FILTER DP REF_DP ALT_DP AF
- # 0 MN908947.3 241 C T PASS 30 1 29 0.97
- # 1 MN908947.3 1163 A T PASS 28 0 28 1.00
+# CHROM POS REF ALT FILTER DP REF_DP ALT_DP AF
+# 0 MN908947.3 241 C T PASS 30 1 29 0.97
+# 1 MN908947.3 1163 A T PASS 28 0 28 1.00
def nanopolish_bcftools_query_to_table(bcftools_query_file):
- table = pd.read_table(bcftools_query_file, header='infer')
- table = table.dropna(how='all', axis=1)
+ table = pd.read_table(bcftools_query_file, header="infer")
+ table = table.dropna(how="all", axis=1)
old_colnames = list(table.columns)
- new_colnames = [x.split(']')[-1].split(':')[-1] for x in old_colnames]
+ new_colnames = [x.split("]")[-1].split(":")[-1] for x in old_colnames]
table.rename(columns=dict(zip(old_colnames, new_colnames)), inplace=True)
## Split out ref/alt depths from StrandSupport column
if not table.empty:
table_cp = table.copy()
- table_cp[['FORW_REF_DP','REV_REF_DP', 'FORW_ALT_DP','REV_ALT_DP']] = table_cp['StrandSupport'].str.split(',', expand=True)
- table_cp[['FORW_REF_DP','REV_REF_DP', 'FORW_ALT_DP','REV_ALT_DP']] = table_cp[['FORW_REF_DP','REV_REF_DP', 'FORW_ALT_DP','REV_ALT_DP']].apply(pd.to_numeric)
-
- table['DP'] = table_cp[['FORW_REF_DP','REV_REF_DP', 'FORW_ALT_DP','REV_ALT_DP']].sum(axis=1)
- table['REF_DP'] = table_cp[['FORW_REF_DP','REV_REF_DP']].sum(axis=1)
- table['ALT_DP'] = table_cp[['FORW_ALT_DP','REV_ALT_DP']].sum(axis=1)
- table['AF'] = table['ALT_DP'] / table['DP']
- table['AF'] = table['AF'].round(2)
- table.drop('StrandSupport', axis=1, inplace=True)
+ table_cp[["FORW_REF_DP", "REV_REF_DP", "FORW_ALT_DP", "REV_ALT_DP"]] = table_cp["StrandSupport"].str.split(
+ ",", expand=True
+ )
+ table_cp[["FORW_REF_DP", "REV_REF_DP", "FORW_ALT_DP", "REV_ALT_DP"]] = table_cp[
+ ["FORW_REF_DP", "REV_REF_DP", "FORW_ALT_DP", "REV_ALT_DP"]
+ ].apply(pd.to_numeric)
+
+ table["DP"] = table_cp[["FORW_REF_DP", "REV_REF_DP", "FORW_ALT_DP", "REV_ALT_DP"]].sum(axis=1)
+ table["REF_DP"] = table_cp[["FORW_REF_DP", "REV_REF_DP"]].sum(axis=1)
+ table["ALT_DP"] = table_cp[["FORW_ALT_DP", "REV_ALT_DP"]].sum(axis=1)
+ table["AF"] = table["ALT_DP"] / table["DP"]
+ table["AF"] = table["AF"].round(2)
+ table.drop("StrandSupport", axis=1, inplace=True)
return table
## Returns a pandas dataframe in the format:
- # CHROM POS REF ALT FILTER DP REF_DP ALT_DP AF
- # 0 MN908947.3 241 C T PASS 21 0 21 1.00
- # 1 MN908947.3 3037 C T PASS 28 0 25 0.89
+# CHROM POS REF ALT FILTER DP REF_DP ALT_DP AF
+# 0 MN908947.3 241 C T PASS 21 0 21 1.00
+# 1 MN908947.3 3037 C T PASS 28 0 25 0.89
def medaka_bcftools_query_to_table(bcftools_query_file):
- table = pd.read_table(bcftools_query_file, header='infer')
- table = table.dropna(how='all', axis=1)
+ table = pd.read_table(bcftools_query_file, header="infer")
+ table = table.dropna(how="all", axis=1)
old_colnames = list(table.columns)
- new_colnames = [x.split(']')[-1].split(':')[-1] for x in old_colnames]
+ new_colnames = [x.split("]")[-1].split(":")[-1] for x in old_colnames]
table.rename(columns=dict(zip(old_colnames, new_colnames)), inplace=True)
if not table.empty:
- table[['REF_DP','ALT_DP']] = table['AC'].str.split(',', expand=True)
+ table[["REF_DP", "ALT_DP"]] = table["AC"].str.split(",", expand=True)
table[["ALT_DP", "DP"]] = table[["ALT_DP", "DP"]].apply(pd.to_numeric)
- table['AF'] = table['ALT_DP'] / table['DP']
- table['AF'] = table['AF'].round(2)
- table.drop('AC', axis=1, inplace=True)
+ table["AF"] = table["ALT_DP"] / table["DP"]
+ table["AF"] = table["AF"].round(2)
+ table.drop("AC", axis=1, inplace=True)
return table
@@ -157,25 +226,25 @@ def medaka_bcftools_query_to_table(bcftools_query_file):
def get_pangolin_lineage(pangolin_file):
table = pd.read_csv(pangolin_file, sep=",", header="infer")
- return table['lineage'][0]
+ return table["lineage"][0]
def snpsift_to_table(snpsift_file):
- table = pd.read_table(snpsift_file, sep="\t", header='infer')
- table = table.loc[:, ~table.columns.str.contains('^Unnamed')]
+ table = pd.read_table(snpsift_file, sep="\t", header="infer")
+ table = table.loc[:, ~table.columns.str.contains("^Unnamed")]
old_colnames = list(table.columns)
- new_colnames = [x.replace('ANN[*].', '') for x in old_colnames]
+ new_colnames = [x.replace("ANN[*].", "") for x in old_colnames]
table.rename(columns=dict(zip(old_colnames, new_colnames)), inplace=True)
- table = table.loc[:, ['CHROM', 'POS', 'REF', 'ALT', 'GENE', 'EFFECT', 'HGVS_C', 'HGVS_P']]
+ table = table.loc[:, ["CHROM", "POS", "REF", "ALT", "GENE", "EFFECT", "HGVS_C", "HGVS_P"]]
## Split by comma and get first value in cols = ['ALT','GENE','EFFECT','HGVS_C','HGVS_P']
for i in range(len(table)):
- for j in range(3,8):
- table.iloc[i,j] = str(table.iloc[i,j]).split(",")[0]
+ for j in range(3, 8):
+ table.iloc[i, j] = str(table.iloc[i, j]).split(",")[0]
## Amino acid substitution
aa = []
- for index,item in table["HGVS_P"].iteritems():
+ for index, item in table["HGVS_P"].iteritems():
hgvs_p = three_letter_aa_to_one(str(item))
aa.append(hgvs_p)
table["HGVS_P_1LETTER"] = pd.Series(aa)
@@ -191,9 +260,11 @@ def main(args=None):
make_dir(out_dir)
## Check correct variant caller has been provided
- variant_callers = ['ivar', 'bcftools', 'nanopolish', 'medaka']
+ variant_callers = ["ivar", "bcftools", "nanopolish", "medaka"]
if args.variant_caller not in variant_callers:
- logger.error(f"Invalid option '--variant caller {args.variant_caller}'. Valid options: " + ', '.join(variant_callers))
+ logger.error(
+ f"Invalid option '--variant caller {args.variant_caller}'. Valid options: " + ", ".join(variant_callers)
+ )
sys.exit(1)
## Find files and create a dictionary {'sample': '/path/to/file'}
@@ -203,50 +274,52 @@ def main(args=None):
## Check all files are provided for each sample
if set(bcftools_files) != set(snpsift_files):
- logger.error(f"Number of BCFTools ({len(bcftools_files)}) and SnpSift ({len(snpsift_files)}) files do not match!")
+ logger.error(
+ f"Number of BCFTools ({len(bcftools_files)}) and SnpSift ({len(snpsift_files)}) files do not match!"
+ )
sys.exit(1)
else:
if pangolin_files:
if set(bcftools_files) != set(pangolin_files):
- logger.error(f"Number of BCFTools ({len(bcftools_files)}) and Pangolin ({len(pangolin_files)}) files do not match!")
+ logger.error(
+ f"Number of BCFTools ({len(bcftools_files)}) and Pangolin ({len(pangolin_files)}) files do not match!"
+ )
sys.exit(1)
## Create per-sample table and write to file
sample_tables = []
for sample in sorted(bcftools_files):
-
## Read in BCFTools query file
bcftools_table = None
- if args.variant_caller == 'ivar':
+ if args.variant_caller == "ivar":
bcftools_table = ivar_bcftools_query_to_table(bcftools_files[sample])
- elif args.variant_caller == 'bcftools':
+ elif args.variant_caller == "bcftools":
bcftools_table = bcftools_bcftools_query_to_table(bcftools_files[sample])
- elif args.variant_caller == 'nanopolish':
+ elif args.variant_caller == "nanopolish":
bcftools_table = nanopolish_bcftools_query_to_table(bcftools_files[sample])
- elif args.variant_caller == 'medaka':
+ elif args.variant_caller == "medaka":
bcftools_table = medaka_bcftools_query_to_table(bcftools_files[sample])
if not bcftools_table.empty:
-
## Read in SnpSift file
snpsift_table = snpsift_to_table(snpsift_files[sample])
- merged_table = pd.DataFrame(data = bcftools_table)
- merged_table.insert(0,'SAMPLE', sample)
- merged_table = pd.merge(merged_table, snpsift_table, how='outer')
- merged_table['CALLER'] = args.variant_caller
+ merged_table = pd.DataFrame(data=bcftools_table)
+ merged_table.insert(0, "SAMPLE", sample)
+ merged_table = pd.merge(merged_table, snpsift_table, how="outer")
+ merged_table["CALLER"] = args.variant_caller
## Read in Pangolin lineage file
if pangolin_files:
- merged_table['LINEAGE'] = get_pangolin_lineage(pangolin_files[sample])
+ merged_table["LINEAGE"] = get_pangolin_lineage(pangolin_files[sample])
sample_tables.append(merged_table)
## Merge table across samples
if sample_tables:
merged_tables = pd.concat(sample_tables)
- merged_tables.to_csv(args.output_file, index=False, encoding='utf-8-sig')
+ merged_tables.to_csv(args.output_file, index=False, encoding="utf-8-sig")
-if __name__ == '__main__':
+if __name__ == "__main__":
sys.exit(main())
diff --git a/bin/multiqc_to_custom_csv.py b/bin/multiqc_to_custom_csv.py
index 90a4c21a..5ce8c36d 100755
--- a/bin/multiqc_to_custom_csv.py
+++ b/bin/multiqc_to_custom_csv.py
@@ -8,7 +8,9 @@
def parse_args(args=None):
- Description = "Create custom spreadsheet for pertinent MultiQC metrics generated by the nf-core/viralrecon pipeline."
+ Description = (
+ "Create custom spreadsheet for pertinent MultiQC metrics generated by the nf-core/viralrecon pipeline."
+ )
Epilog = "Example usage: python multiqc_to_custom_tsv.py"
parser = argparse.ArgumentParser(description=Description, epilog=Epilog)
parser.add_argument(
@@ -58,9 +60,7 @@ def find_tag(d, tag):
yield i
-def yaml_fields_to_dict(
- yaml_file, append_dict={}, field_mapping_list=[], valid_sample_list=[]
-):
+def yaml_fields_to_dict(yaml_file, append_dict={}, field_mapping_list=[], valid_sample_list=[]):
integer_fields = [
"mapped_passed",
"number_of_SNPs",
@@ -94,9 +94,7 @@ def yaml_fields_to_dict(
val = list(find_tag(yaml_dict[k], j[0]))
## Fix for Cutadapt reporting reads/pairs as separate values
if j[0] == "r_written" and len(val) == 0:
- val = [
- list(find_tag(yaml_dict[k], "pairs_written"))[0] * 2
- ]
+ val = [list(find_tag(yaml_dict[k], "pairs_written"))[0] * 2]
if len(val) != 0:
val = val[0]
if len(j) == 2:
@@ -134,9 +132,7 @@ def yaml_fields_to_dict(
return append_dict
-def metrics_dict_to_file(
- file_field_list, multiqc_data_dir, out_file, valid_sample_list=[]
-):
+def metrics_dict_to_file(file_field_list, multiqc_data_dir, out_file, valid_sample_list=[]):
metrics_dict = {}
field_list = []
for yaml_file, mapping_list in file_field_list:
@@ -159,7 +155,7 @@ def metrics_dict_to_file(
for field in field_list:
if field in metrics_dict[k]:
if metrics_dict[k][field]:
- row_list.append(str(metrics_dict[k][field]).replace(',', ';'))
+ row_list.append(str(metrics_dict[k][field]).replace(",", ";"))
else:
row_list.append("NA")
else:
@@ -186,9 +182,7 @@ def main(args=None):
[
(
"% Non-host reads (Kraken 2)",
- [
- "PREPROCESS: Kraken 2_mqc-generalstats-preprocess_kraken_2-Unclassified"
- ],
+ ["PREPROCESS: Kraken 2_mqc-generalstats-preprocess_kraken_2-Unclassified"],
)
],
),
@@ -206,9 +200,7 @@ def main(args=None):
[
(
"Coverage median",
- [
- "VARIANTS: mosdepth_mqc-generalstats-variants_mosdepth-median_coverage"
- ],
+ ["VARIANTS: mosdepth_mqc-generalstats-variants_mosdepth-median_coverage"],
),
(
"% Coverage > 1x",
@@ -253,9 +245,7 @@ def main(args=None):
[
(
"% Non-host reads (Kraken 2)",
- [
- "PREPROCESS: Kraken 2_mqc-generalstats-preprocess_kraken_2-Unclassified"
- ],
+ ["PREPROCESS: Kraken 2_mqc-generalstats-preprocess_kraken_2-Unclassified"],
)
],
),
@@ -347,9 +337,7 @@ def main(args=None):
elif args.PLATFORM == "nanopore":
## List of real samples to output in report
sample_list = []
- yaml_file = os.path.join(
- args.MULTIQC_DATA_DIR, "multiqc_samtools_flagstat.yaml"
- )
+ yaml_file = os.path.join(args.MULTIQC_DATA_DIR, "multiqc_samtools_flagstat.yaml")
if os.path.exists(yaml_file):
metrics_dict = yaml_fields_to_dict(
yaml_file=yaml_file,
diff --git a/conf/base.config b/conf/base.config
index 5ecbe848..443a49a1 100644
--- a/conf/base.config
+++ b/conf/base.config
@@ -19,6 +19,16 @@ process {
maxErrors = '-1'
// Process-specific resource requirements
+ // NOTE - Please try and re-use the labels below as much as possible.
+ // These labels are used and recognised by default in DSL2 files hosted on nf-core/modules.
+ // If possible, it would be nice to keep the same label naming convention when
+ // adding in your local modules too.
+ // See https://www.nextflow.io/docs/latest/config.html#config-process-selectors
+ withLabel:process_single {
+ cpus = { check_max( 1 , 'cpus' ) }
+ memory = { check_max( 6.GB * task.attempt, 'memory' ) }
+ time = { check_max( 4.h * task.attempt, 'time' ) }
+ }
withLabel:process_low {
cpus = { check_max( 2 * task.attempt, 'cpus' ) }
memory = { check_max( 12.GB * task.attempt, 'memory' ) }
diff --git a/conf/modules_illumina.config b/conf/modules_illumina.config
index 5cd076ae..05424060 100644
--- a/conf/modules_illumina.config
+++ b/conf/modules_illumina.config
@@ -1,7 +1,7 @@
/*
-========================================================================================
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Config file for defining DSL2 per module options and publishing paths
-========================================================================================
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Available keys to override module options:
ext.args = Additional arguments appended to command in module.
ext.args2 = Second set of arguments appended to command in module (multi-tool modules).
@@ -39,6 +39,15 @@ process {
]
}
+ withName: 'CUSTOM_GETCHROMSIZES' {
+ publishDir = [
+ path: { "${params.outdir}/genome" },
+ mode: params.publish_dir_mode,
+ saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
+ enabled: params.save_reference
+ ]
+ }
+
withName: 'CAT_FASTQ' {
publishDir = [
path: { "${params.outdir}/fastq" },
@@ -49,7 +58,7 @@ process {
if (!params.skip_fastqc) {
process {
- withName: '.*:.*:FASTQC_FASTP:FASTQC_RAW' {
+ withName: '.*:.*:FASTQ_TRIM_FASTP_FASTQC:FASTQC_RAW' {
ext.args = '--quiet'
publishDir = [
path: { "${params.outdir}/fastqc/raw" },
@@ -83,18 +92,11 @@ if (!params.skip_fastp) {
]
]
}
-
- withName: 'MULTIQC_TSV_FAIL_READS' {
- publishDir = [
- path: { "${params.outdir}/multiqc" },
- enabled: false
- ]
- }
}
if (!params.skip_fastqc) {
process {
- withName: '.*:.*:FASTQC_FASTP:FASTQC_TRIM' {
+ withName: '.*:.*:FASTQ_TRIM_FASTP_FASTQC:FASTQC_TRIM' {
ext.args = '--quiet'
publishDir = [
path: { "${params.outdir}/fastqc/trim" },
@@ -162,7 +164,7 @@ if (!params.skip_variants) {
]
}
- withName: '.*:.*:ALIGN_BOWTIE2:.*:SAMTOOLS_SORT' {
+ withName: '.*:.*:FASTQ_ALIGN_BOWTIE2:.*:SAMTOOLS_SORT' {
ext.prefix = { "${meta.id}.sorted" }
publishDir = [
path: { "${params.outdir}/variants/bowtie2" },
@@ -171,7 +173,7 @@ if (!params.skip_variants) {
]
}
- withName: '.*:.*:ALIGN_BOWTIE2:.*:SAMTOOLS_INDEX' {
+ withName: '.*:.*:FASTQ_ALIGN_BOWTIE2:.*:SAMTOOLS_INDEX' {
publishDir = [
path: { "${params.outdir}/variants/bowtie2" },
mode: params.publish_dir_mode,
@@ -179,7 +181,7 @@ if (!params.skip_variants) {
]
}
- withName: '.*:.*:ALIGN_BOWTIE2:.*:BAM_STATS_SAMTOOLS:.*' {
+ withName: '.*:.*:FASTQ_ALIGN_BOWTIE2:.*:BAM_STATS_SAMTOOLS:.*' {
ext.prefix = { "${meta.id}.sorted.bam" }
publishDir = [
path: { "${params.outdir}/variants/bowtie2/samtools_stats" },
@@ -187,26 +189,6 @@ if (!params.skip_variants) {
pattern: "*.{stats,flagstat,idxstats}"
]
}
-
- withName: 'MULTIQC_TSV_FAIL_MAPPED' {
- publishDir = [
- path: { "${params.outdir}/multiqc" },
- enabled: false
- ]
- }
- }
-
- if (params.protocol == 'amplicon' || !params.skip_asciigenome) {
- process {
- withName: 'CUSTOM_GETCHROMSIZES' {
- publishDir = [
- path: { "${params.outdir}/genome" },
- mode: params.publish_dir_mode,
- saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
- enabled: params.save_reference
- ]
- }
- }
}
if (!params.skip_ivar_trim && params.protocol == 'amplicon') {
@@ -225,7 +207,7 @@ if (!params.skip_variants) {
]
}
- withName: '.*:.*:PRIMER_TRIM_IVAR:.*:SAMTOOLS_SORT' {
+ withName: '.*:.*:BAM_TRIM_PRIMERS_IVAR:.*:SAMTOOLS_SORT' {
ext.prefix = { "${meta.id}.ivar_trim.sorted" }
publishDir = [
path: { "${params.outdir}/variants/bowtie2" },
@@ -235,7 +217,7 @@ if (!params.skip_variants) {
]
}
- withName: '.*:.*:PRIMER_TRIM_IVAR:.*:SAMTOOLS_INDEX' {
+ withName: '.*:.*:BAM_TRIM_PRIMERS_IVAR:.*:SAMTOOLS_INDEX' {
publishDir = [
path: { "${params.outdir}/variants/bowtie2" },
mode: params.publish_dir_mode,
@@ -244,7 +226,7 @@ if (!params.skip_variants) {
]
}
- withName: '.*:.*:PRIMER_TRIM_IVAR:.*:BAM_STATS_SAMTOOLS:.*' {
+ withName: '.*:.*:BAM_TRIM_PRIMERS_IVAR:.*:BAM_STATS_SAMTOOLS:.*' {
ext.prefix = { "${meta.id}.ivar_trim.sorted.bam" }
publishDir = [
path: { "${params.outdir}/variants/bowtie2/samtools_stats" },
@@ -277,7 +259,7 @@ if (!params.skip_variants) {
]
}
- withName: '.*:MARK_DUPLICATES_PICARD:SAMTOOLS_INDEX' {
+ withName: '.*:BAM_MARKDUPLICATES_PICARD:SAMTOOLS_INDEX' {
publishDir = [
path: { "${params.outdir}/variants/bowtie2" },
mode: params.publish_dir_mode,
@@ -285,7 +267,7 @@ if (!params.skip_variants) {
]
}
- withName: '.*:MARK_DUPLICATES_PICARD:BAM_STATS_SAMTOOLS:.*' {
+ withName: '.*:BAM_MARKDUPLICATES_PICARD:BAM_STATS_SAMTOOLS:.*' {
ext.prefix = { "${meta.id}.markduplicates.sorted.bam" }
publishDir = [
path: { "${params.outdir}/variants/bowtie2/samtools_stats" },
@@ -670,13 +652,6 @@ if (!params.skip_variants) {
saveAs: { filename -> filename.endsWith(".csv") && !filename.endsWith("errors.csv") && !filename.endsWith("insertions.csv") ? filename : null }
]
}
-
- withName: 'MULTIQC_TSV_NEXTCLADE' {
- publishDir = [
- path: { "${params.outdir}/multiqc" },
- enabled: false
- ]
- }
}
}
diff --git a/conf/modules_nanopore.config b/conf/modules_nanopore.config
index 98d06e91..5a4a277f 100644
--- a/conf/modules_nanopore.config
+++ b/conf/modules_nanopore.config
@@ -1,7 +1,7 @@
/*
-========================================================================================
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Config file for defining DSL2 per module options and publishing paths
-========================================================================================
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Available keys to override module options:
ext.args = Additional arguments appended to command in module.
ext.args2 = Second set of arguments appended to command in module (multi-tool modules).
@@ -33,13 +33,6 @@ process {
]
}
- withName: 'MULTIQC_TSV_BARCODE_COUNT|MULTIQC_TSV_GUPPYPLEX_COUNT' {
- publishDir = [
- path: { "${params.outdir}/multiqc/${params.artic_minion_caller}" },
- enabled: false
- ]
- }
-
withName: 'ARTIC_GUPPYPLEX' {
ext.args = params.primer_set_version == 1200 ? '--min-length 250 --max-length 1500' : '--min-length 400 --max-length 700'
publishDir = [
@@ -120,20 +113,10 @@ process {
// Optional configuration options
//
-if (params.input) {
- process {
- withName: 'MULTIQC_TSV_NO_.*' {
- publishDir = [
- path: { "${params.outdir}/multiqc/${params.artic_minion_caller}" },
- enabled: false
- ]
- }
- }
-}
-
if (params.sequencing_summary && !params.skip_pycoqc) {
process {
withName: 'PYCOQC' {
+ ext.prefix = 'pycoqc'
publishDir = [
path: { "${params.outdir}/pycoqc" },
mode: params.publish_dir_mode,
@@ -243,13 +226,6 @@ if (!params.skip_nextclade) {
saveAs: { filename -> filename.endsWith(".csv") && !filename.endsWith("errors.csv") && !filename.endsWith("insertions.csv") ? filename : null }
]
}
-
- withName: 'MULTIQC_TSV_NEXTCLADE' {
- publishDir = [
- path: { "${params.outdir}/multiqc/${params.artic_minion_caller}" },
- enabled: false
- ]
- }
}
}
diff --git a/conf/test.config b/conf/test.config
index 45dad62c..15e93a4b 100644
--- a/conf/test.config
+++ b/conf/test.config
@@ -20,7 +20,7 @@ params {
max_time = '6.h'
// Input data to test amplicon analysis
- input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_amplicon.csv'
+ input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/v2.6/samplesheet_test_amplicon_illumina.csv'
platform = 'illumina'
protocol = 'amplicon'
primer_set = 'artic'
@@ -35,5 +35,4 @@ params {
// Assembly options
assemblers = 'spades,unicycler,minia'
- skip_plasmidid = true // Skip this by default to bypass Github Actions disk quota errors
}
diff --git a/conf/test_full.config b/conf/test_full.config
index 76c17fcb..3c9b7ec4 100644
--- a/conf/test_full.config
+++ b/conf/test_full.config
@@ -15,7 +15,7 @@ params {
config_profile_description = 'Full test dataset to check pipeline function'
// Input data for full test of amplicon analysis
- input = 's3://nf-core-awsmegatests/viralrecon/input_data/210212_K00102_0557_AHKN3LBBXY/samplesheet.csv'
+ input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/v2.6/samplesheet_full_amplicon_illumina.csv'
platform = 'illumina'
protocol = 'amplicon'
primer_set = 'artic'
@@ -29,7 +29,6 @@ params {
// Assembly options
assemblers = 'spades,unicycler,minia'
- skip_plasmidid = true // Skip this by default to bypass Github Actions disk quota errors
}
process {
diff --git a/conf/test_full_nanopore.config b/conf/test_full_nanopore.config
index 856246ea..2536ea4b 100644
--- a/conf/test_full_nanopore.config
+++ b/conf/test_full_nanopore.config
@@ -1,7 +1,7 @@
/*
-========================================================================================
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running full-size tests
-========================================================================================
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a full size pipeline test.
Use as follows:
@@ -16,15 +16,15 @@ params {
// Input data for full test of amplicon analysis
platform = 'nanopore'
- input = 's3://nf-core-awsmegatests/viralrecon/input_data/20210205_1526_X4_FAP51364_21fa8135/samplesheet.csv'
- fastq_dir = 's3://nf-core-awsmegatests/viralrecon/input_data/20210205_1526_X4_FAP51364_21fa8135/fastq_pass/'
- fast5_dir = 's3://nf-core-awsmegatests/viralrecon/input_data/20210205_1526_X4_FAP51364_21fa8135/fast5_pass/'
- sequencing_summary = 's3://nf-core-awsmegatests/viralrecon/input_data/20210205_1526_X4_FAP51364_21fa8135/sequencing_summary.txt'
+ input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/v2.6/samplesheet_full_amplicon_nanopore.csv'
+ fastq_dir = 's3://ngi-igenomes/test-data/viralrecon/20210205_1526_X4_FAP51364_21fa8135/fastq_pass/'
+ fast5_dir = 's3://ngi-igenomes/test-data/viralrecon/20210205_1526_X4_FAP51364_21fa8135/fast5_pass/'
+ sequencing_summary = 's3://ngi-igenomes/test-data/viralrecon/20210205_1526_X4_FAP51364_21fa8135/sequencing_summary.txt'
// Genome references
genome = 'MN908947.3'
primer_set_version = 3
// Other parameters
- artic_minion_medaka_model = 's3://nf-core-awsmegatests/viralrecon/input_data/20210205_1526_X4_FAP51364_21fa8135/r941_min_high_g360_model.hdf5'
+ artic_minion_medaka_model = 's3://ngi-igenomes/test-data/viralrecon/20210205_1526_X4_FAP51364_21fa8135/r941_min_high_g360_model.hdf5'
}
diff --git a/conf/test_full_sispa.config b/conf/test_full_sispa.config
index 7c190905..90bc0842 100644
--- a/conf/test_full_sispa.config
+++ b/conf/test_full_sispa.config
@@ -1,7 +1,7 @@
/*
-========================================================================================
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running full-size tests
-========================================================================================
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a full size pipeline test.
Use as follows:
@@ -15,7 +15,7 @@ params {
config_profile_description = 'Full test dataset to check pipeline function'
// Input data for full test of SISPA/metagenomics analysis
- input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_full_illumina_sispa.csv'
+ input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/v2.6/samplesheet_full_metagenomic_illumina.csv'
platform = 'illumina'
protocol = 'metagenomic'
diff --git a/conf/test_nanopore.config b/conf/test_nanopore.config
index 853d4a05..406c8c3e 100644
--- a/conf/test_nanopore.config
+++ b/conf/test_nanopore.config
@@ -1,7 +1,7 @@
/*
-========================================================================================
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
-========================================================================================
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.
Use as follows:
@@ -21,15 +21,15 @@ params {
// Input data to test nanopore analysis
platform = 'nanopore'
- input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_nanopore.csv'
- fastq_dir = 's3://nf-core-awsmegatests/viralrecon/input_data/minion_test/fastq_pass/'
- fast5_dir = 's3://nf-core-awsmegatests/viralrecon/input_data/minion_test/fast5_pass/'
- sequencing_summary = 's3://nf-core-awsmegatests/viralrecon/input_data/minion_test/sequencing_summary.txt'
+ input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/v2.6/samplesheet_test_amplicon_nanopore.csv'
+ fastq_dir = 's3://ngi-igenomes/test-data/viralrecon/minion_test/fastq_pass/'
+ fast5_dir = 's3://ngi-igenomes/test-data/viralrecon/minion_test/fast5_pass/'
+ sequencing_summary = 's3://ngi-igenomes/test-data/viralrecon/minion_test/sequencing_summary.txt'
// Genome references
genome = 'MN908947.3'
primer_set_version = 3
// Other parameters
- artic_minion_medaka_model = 's3://nf-core-awsmegatests/viralrecon/input_data/minion_test/r941_min_high_g360_model.hdf5'
+ artic_minion_medaka_model = 's3://ngi-igenomes/test-data/viralrecon/minion_test/r941_min_high_g360_model.hdf5'
}
diff --git a/conf/test_sispa.config b/conf/test_sispa.config
index c918e194..d3e39be8 100644
--- a/conf/test_sispa.config
+++ b/conf/test_sispa.config
@@ -1,7 +1,7 @@
/*
-========================================================================================
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Nextflow config file for running minimal tests
-========================================================================================
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Defines input files and everything required to run a fast and simple pipeline test.
Use as follows:
@@ -20,7 +20,7 @@ params {
max_time = '6.h'
// Input data to test SISPA/metagenomics analysis
- input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/samplesheet_test_illumina_sispa.csv'
+ input = 'https://raw.githubusercontent.com/nf-core/test-datasets/viralrecon/samplesheet/v2.6/samplesheet_test_metagenomic_illumina.csv'
platform = 'illumina'
protocol = 'metagenomic'
@@ -33,5 +33,4 @@ params {
// Assembly options
assemblers = 'spades,unicycler,minia'
- skip_plasmidid = true // Skip this by default to bypass Github Actions disk quota errors
}
diff --git a/docs/images/nf-core-viralrecon_metro_map.svg b/docs/images/nf-core-viralrecon_metro_map.svg
new file mode 100644
index 00000000..38e6792c
--- /dev/null
+++ b/docs/images/nf-core-viralrecon_metro_map.svg
@@ -0,0 +1,7257 @@
+
+
+
+
diff --git a/docs/images/nf-core-viralrecon_metro_map_illumina.png b/docs/images/nf-core-viralrecon_metro_map_illumina.png
new file mode 100644
index 00000000..fd5f2928
Binary files /dev/null and b/docs/images/nf-core-viralrecon_metro_map_illumina.png differ
diff --git a/docs/images/nf-core-viralrecon_metro_map_nanopore.png b/docs/images/nf-core-viralrecon_metro_map_nanopore.png
new file mode 100644
index 00000000..5fe0cdf1
Binary files /dev/null and b/docs/images/nf-core-viralrecon_metro_map_nanopore.png differ
diff --git a/docs/usage.md b/docs/usage.md
index b8416d20..1f27af21 100644
--- a/docs/usage.md
+++ b/docs/usage.md
@@ -4,13 +4,17 @@
> _Documentation of pipeline parameters is generated automatically from the pipeline schema and can no longer be found in markdown files._
+## Pipeline parameters
+
+Please provide pipeline parameters via the CLI or Nextflow `-params-file` option. Custom config files including those provided by the `-c` Nextflow option can be used to provide any configuration except for parameters; see [docs](https://nf-co.re/usage/configuration#custom-configuration-files).
+
## Samplesheet format
### Illumina
You will need to create a samplesheet with information about the samples you would like to analyse before running the pipeline. Use this parameter to specify its location. It has to be a comma-separated file with 3 columns, and a header row as shown in the examples below.
-```console
+```bash
--input '[path to samplesheet file]'
```
@@ -176,7 +180,7 @@ nextflow run nf-core/viralrecon \
The typical command for running the pipeline is as follows:
-```console
+```bash
nextflow run nf-core/viralrecon --input samplesheet.csv --outdir --genome 'MN908947.3' -profile docker
```
@@ -184,9 +188,9 @@ This will launch the pipeline with the `docker` configuration profile. See below
Note that the pipeline will create the following files in your working directory:
-```console
+```bash
work # Directory containing the nextflow working files
- # Finished results in specified location (defined with --outdir)
+ # Finished results in specified location (defined with --outdir)
.nextflow_log # Log file from Nextflow
# Other nextflow hidden files, eg. history of pipeline runs and old logs.
```
@@ -195,7 +199,7 @@ work # Directory containing the nextflow working files
When you run the above command, Nextflow automatically pulls the pipeline code from GitHub and stores it as a cached version. When running the pipeline after this, it will always use the cached version if available - even if the pipeline has been updated since. To make sure that you're running the latest version of the pipeline, make sure that you regularly update the cached version of the pipeline:
-```console
+```bash
nextflow pull nf-core/viralrecon
```
@@ -203,9 +207,9 @@ nextflow pull nf-core/viralrecon
It is a good idea to specify a pipeline version when running the pipeline on your data. This ensures that a specific version of the pipeline code and software are used when you run your pipeline. If you keep using the same tag, you'll be running the same version of the pipeline, even if there have been changes to the code since.
-First, go to the [nf-core/viralrecon releases page](https://github.com/nf-core/viralrecon/releases) and find the latest version number - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`.
+First, go to the [nf-core/viralrecon releases page](https://github.com/nf-core/viralrecon/releases) and find the latest pipeline version - numeric only (eg. `1.3.1`). Then specify this when running the pipeline with `-r` (one hyphen) - eg. `-r 1.3.1`. Of course, you can switch to another version by changing the number after the `-r` flag.
-This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future.
+This version number will be logged in reports when you run the pipeline, so that you'll know what you used when you look back in the future. For example, at the bottom of the MultiQC reports.
## Core Nextflow arguments
@@ -215,7 +219,7 @@ This version number will be logged in reports when you run the pipeline, so that
Use this parameter to choose a configuration profile. Profiles can give configuration presets for different compute environments.
-Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below. When using Biocontainers, most of these software packaging methods pull Docker containers from quay.io e.g [FastQC](https://quay.io/repository/biocontainers/fastqc) except for Singularity which directly downloads Singularity images via https hosted by the [Galaxy project](https://depot.galaxyproject.org/singularity/) and Conda which downloads and installs software locally from [Bioconda](https://bioconda.github.io/).
+Several generic profiles are bundled with the pipeline which instruct the pipeline to use software packaged using different methods (Docker, Singularity, Podman, Shifter, Charliecloud, Conda) - see below.
> We highly recommend the use of Docker or Singularity containers for full pipeline reproducibility, however when this is not possible, Conda is also supported.
@@ -224,8 +228,11 @@ The pipeline also dynamically loads configurations from [https://github.com/nf-c
Note that multiple profiles can be loaded, for example: `-profile test,docker` - the order of arguments is important!
They are loaded in sequence, so later profiles can overwrite earlier profiles.
-If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended.
+If `-profile` is not specified, the pipeline will run locally and expect all software to be installed and available on the `PATH`. This is _not_ recommended, since it can lead to different results on different machines dependent on the computer enviroment.
+- `test`
+ - A profile with a complete configuration for automated testing
+ - Includes links to test data so needs no other parameters
- `docker`
- A generic configuration profile to be used with [Docker](https://docker.com/)
- `singularity`
@@ -238,9 +245,6 @@ If `-profile` is not specified, the pipeline will run locally and expect all sof
- A generic configuration profile to be used with [Charliecloud](https://hpc.github.io/charliecloud/)
- `conda`
- A generic configuration profile to be used with [Conda](https://conda.io/docs/). Please only use Conda as a last resort i.e. when it's not possible to run the pipeline with Docker, Singularity, Podman, Shifter or Charliecloud.
-- `test`
- - A profile with a complete configuration for automated testing
- - Includes links to test data so needs no other parameters
### `-resume`
@@ -289,8 +293,14 @@ Work dir:
Tip: you can replicate the issue by changing to the process work dir and entering the command `bash .command.run`
```
+#### For beginners
+
+A first step to bypass this error, you could try to increase the amount of CPUs, memory, and time for the whole pipeline. Therefor you can try to increase the resource for the parameters `--max_cpus`, `--max_memory`, and `--max_time`. Based on the error above, you have to increase the amount of memory. Therefore you can go to the [parameter documentation of rnaseq](https://nf-co.re/rnaseq/3.9/parameters) and scroll down to the `show hidden parameter` button to get the default value for `--max_memory`. In this case 128GB, you than can try to run your pipeline again with `--max_memory 200GB -resume` to skip all process, that were already calculated. If you can not increase the resource of the complete pipeline, you can try to adapt the resource for a single process as mentioned below.
+
+#### Advanced option on process level
+
To bypass this error you would need to find exactly which resources are set by the `STAR_ALIGN` process. The quickest way is to search for `process STAR_ALIGN` in the [nf-core/rnaseq Github repo](https://github.com/nf-core/rnaseq/search?q=process+STAR_ALIGN).
-We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so, based on the search results, the file we want is `modules/nf-core/software/star/align/main.nf`.
+We have standardised the structure of Nextflow DSL2 pipelines such that all module files will be present in the `modules/` directory and so, based on the search results, the file we want is `modules/nf-core/star/align/main.nf`.
If you click on the link to that file you will notice that there is a `label` directive at the top of the module that is set to [`label process_high`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/modules/nf-core/software/star/align/main.nf#L9).
The [Nextflow `label`](https://www.nextflow.io/docs/latest/process.html#label) directive allows us to organise workflow processes in separate groups which can be referenced in a configuration file to select and configure subset of processes having similar computing requirements.
The default values for the `process_high` label are set in the pipeline's [`base.config`](https://github.com/nf-core/rnaseq/blob/4c27ef5610c87db00c3c5a3eed10b1d161abf575/conf/base.config#L33-L37) which in this case is defined as 72GB.
@@ -309,9 +319,10 @@ process {
>
> If you get a warning suggesting that the process selector isn't recognised check that the process name has been specified correctly.
-### Updating containers
+### Updating containers (advanced users)
The [Nextflow DSL2](https://www.nextflow.io/docs/latest/dsl2.html) implementation of this pipeline uses one container per process which makes it much easier to maintain and update software dependencies. If for some reason you need to use a different version of a particular tool with the pipeline then you just need to identify the `process` name and override the Nextflow `container` definition for that process using the `withName` declaration.
+For example, in the [nf-core/viralrecon](https://nf-co.re/viralrecon) pipeline a tool called [Pangolin](https://github.com/cov-lineages/pangolin) has been used during the COVID-19 pandemic to assign lineages to SARS-CoV-2 genome sequenced samples. Given that the lineage assignments change quite frequently it doesn't make sense to re-release the nf-core/viralrecon everytime a new version of Pangolin has been released. However, you can override the default container used by the pipeline by creating a custom config file and passing it as a command-line argument via `-c custom.config`.
#### Pangolin
@@ -413,6 +424,14 @@ See the main [Nextflow documentation](https://www.nextflow.io/docs/latest/config
If you have any questions or issues please send us a message on [Slack](https://nf-co.re/join/slack) on the [`#configs` channel](https://nfcore.slack.com/channels/configs).
+## Azure Resource Requests
+
+To be used with the `azurebatch` profile by specifying the `-profile azurebatch`.
+We recommend providing a compute `params.vm_type` of `Standard_D16_v3` VMs by default but these options can be changed if required.
+
+Note that the choice of VM size depends on your quota and the overall workload during the analysis.
+For a thorough list, please refer the [Azure Sizes for virtual machines in Azure](https://docs.microsoft.com/en-us/azure/virtual-machines/sizes).
+
## Running in the background
Nextflow handles job submissions and supervises the running jobs. The Nextflow process must run until the pipeline is finished.
@@ -427,6 +446,6 @@ Some HPC setups also allow you to run nextflow within a cluster job submitted yo
In some cases, the Nextflow Java virtual machines can start to request a large amount of memory.
We recommend adding the following line to your environment to limit this (typically in `~/.bashrc` or `~./bash_profile`):
-```console
+```bash
NXF_OPTS='-Xms1g -Xmx4g'
```
diff --git a/lib/NfcoreSchema.groovy b/lib/NfcoreSchema.groovy
index b3d092f8..33cd4f6e 100755
--- a/lib/NfcoreSchema.groovy
+++ b/lib/NfcoreSchema.groovy
@@ -46,7 +46,6 @@ class NfcoreSchema {
'quiet',
'syslog',
'v',
- 'version',
// Options for `nextflow run` command
'ansi',
diff --git a/lib/NfcoreTemplate.groovy b/lib/NfcoreTemplate.groovy
index d244008f..08d2922c 100755
--- a/lib/NfcoreTemplate.groovy
+++ b/lib/NfcoreTemplate.groovy
@@ -32,6 +32,40 @@ class NfcoreTemplate {
}
}
+ //
+ // Warn if using custom configs to provide pipeline parameters
+ //
+ public static void warnParamsProvidedInConfig(workflow, log) {
+ if (workflow.configFiles.size() > 1) {
+ log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
+ " Multiple config files detected!\n" +
+ " Please provide pipeline parameters via the CLI or Nextflow '-params-file' option.\n" +
+ " Custom config files including those provided by the '-c' Nextflow option can be\n" +
+ " used to provide any configuration except for parameters.\n\n" +
+ " Docs: https://nf-co.re/usage/configuration#custom-configuration-files\n" +
+ "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+ }
+ }
+
+ //
+ // Generate version string
+ //
+ public static String version(workflow) {
+ String version_string = ""
+
+ if (workflow.manifest.version) {
+ def prefix_v = workflow.manifest.version[0] != 'v' ? 'v' : ''
+ version_string += "${prefix_v}${workflow.manifest.version}"
+ }
+
+ if (workflow.commitId) {
+ def git_shortsha = workflow.commitId.substring(0, 7)
+ version_string += "-g${git_shortsha}"
+ }
+
+ return version_string
+ }
+
//
// Construct and send completion email
//
@@ -64,7 +98,7 @@ class NfcoreTemplate {
misc_fields['Nextflow Compile Timestamp'] = workflow.nextflow.timestamp
def email_fields = [:]
- email_fields['version'] = workflow.manifest.version
+ email_fields['version'] = NfcoreTemplate.version(workflow)
email_fields['runName'] = workflow.runName
email_fields['success'] = workflow.success
email_fields['dateComplete'] = workflow.complete
@@ -150,6 +184,64 @@ class NfcoreTemplate {
output_tf.withWriter { w -> w << email_txt }
}
+ //
+ // Construct and send a notification to a web server as JSON
+ // e.g. Microsoft Teams and Slack
+ //
+ public static void IM_notification(workflow, params, summary_params, projectDir, log) {
+ def hook_url = params.hook_url
+
+ def summary = [:]
+ for (group in summary_params.keySet()) {
+ summary << summary_params[group]
+ }
+
+ def misc_fields = [:]
+ misc_fields['start'] = workflow.start
+ misc_fields['complete'] = workflow.complete
+ misc_fields['scriptfile'] = workflow.scriptFile
+ misc_fields['scriptid'] = workflow.scriptId
+ if (workflow.repository) misc_fields['repository'] = workflow.repository
+ if (workflow.commitId) misc_fields['commitid'] = workflow.commitId
+ if (workflow.revision) misc_fields['revision'] = workflow.revision
+ misc_fields['nxf_version'] = workflow.nextflow.version
+ misc_fields['nxf_build'] = workflow.nextflow.build
+ misc_fields['nxf_timestamp'] = workflow.nextflow.timestamp
+
+ def msg_fields = [:]
+ msg_fields['version'] = NfcoreTemplate.version(workflow)
+ msg_fields['runName'] = workflow.runName
+ msg_fields['success'] = workflow.success
+ msg_fields['dateComplete'] = workflow.complete
+ msg_fields['duration'] = workflow.duration
+ msg_fields['exitStatus'] = workflow.exitStatus
+ msg_fields['errorMessage'] = (workflow.errorMessage ?: 'None')
+ msg_fields['errorReport'] = (workflow.errorReport ?: 'None')
+ msg_fields['commandLine'] = workflow.commandLine.replaceFirst(/ +--hook_url +[^ ]+/, "")
+ msg_fields['projectDir'] = workflow.projectDir
+ msg_fields['summary'] = summary << misc_fields
+
+ // Render the JSON template
+ def engine = new groovy.text.GStringTemplateEngine()
+ // Different JSON depending on the service provider
+ // Defaults to "Adaptive Cards" (https://adaptivecards.io), except Slack which has its own format
+ def json_path = hook_url.contains("hooks.slack.com") ? "slackreport.json" : "adaptivecard.json"
+ def hf = new File("$projectDir/assets/${json_path}")
+ def json_template = engine.createTemplate(hf).make(msg_fields)
+ def json_message = json_template.toString()
+
+ // POST
+ def post = new URL(hook_url).openConnection();
+ post.setRequestMethod("POST")
+ post.setDoOutput(true)
+ post.setRequestProperty("Content-Type", "application/json")
+ post.getOutputStream().write(json_message.getBytes("UTF-8"));
+ def postRC = post.getResponseCode();
+ if (! postRC.equals(200)) {
+ log.warn(post.getErrorStream().getText());
+ }
+ }
+
//
// Print pipeline summary on completion
//
@@ -182,7 +274,7 @@ class NfcoreTemplate {
if (workflow.stats.ignoredCount == 0) {
log.info "-${colors.purple}[$workflow.manifest.name]${colors.green} Pipeline completed successfully${colors.reset}-"
} else {
- log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed successfully, but with errored process(es) ${colors.reset}-"
+ log.info "-${colors.purple}[$workflow.manifest.name]${colors.yellow} Pipeline completed successfully, but with errored process(es) ${colors.reset}-"
}
} else {
log.info "-${colors.purple}[$workflow.manifest.name]${colors.red} Pipeline completed with errors${colors.reset}-"
@@ -270,6 +362,7 @@ class NfcoreTemplate {
//
public static String logo(workflow, monochrome_logs) {
Map colors = logColours(monochrome_logs)
+ String workflow_version = NfcoreTemplate.version(workflow)
String.format(
"""\n
${dashedLine(monochrome_logs)}
@@ -278,7 +371,7 @@ class NfcoreTemplate {
${colors.blue} |\\ | |__ __ / ` / \\ |__) |__ ${colors.yellow}} {${colors.reset}
${colors.blue} | \\| | \\__, \\__/ | \\ |___ ${colors.green}\\`-._,-`-,${colors.reset}
${colors.green}`._,._,\'${colors.reset}
- ${colors.purple} ${workflow.manifest.name} v${workflow.manifest.version}${colors.reset}
+ ${colors.purple} ${workflow.manifest.name} ${workflow_version}${colors.reset}
${dashedLine(monochrome_logs)}
""".stripIndent()
)
diff --git a/lib/Utils.groovy b/lib/Utils.groovy
old mode 100755
new mode 100644
index 28567bd7..fd6095c7
--- a/lib/Utils.groovy
+++ b/lib/Utils.groovy
@@ -21,20 +21,27 @@ class Utils {
}
// Check that all channels are present
- def required_channels = ['conda-forge', 'bioconda', 'defaults']
- def conda_check_failed = !required_channels.every { ch -> ch in channels }
+ // This channel list is ordered by required channel priority.
+ def required_channels_in_order = ['conda-forge', 'bioconda', 'defaults']
+ def channels_missing = ((required_channels_in_order as Set) - (channels as Set)) as Boolean
// Check that they are in the right order
- conda_check_failed |= !(channels.indexOf('conda-forge') < channels.indexOf('bioconda'))
- conda_check_failed |= !(channels.indexOf('bioconda') < channels.indexOf('defaults'))
+ def channel_priority_violation = false
+ def n = required_channels_in_order.size()
+ for (int i = 0; i < n - 1; i++) {
+ channel_priority_violation |= !(channels.indexOf(required_channels_in_order[i]) < channels.indexOf(required_channels_in_order[i+1]))
+ }
- if (conda_check_failed) {
- log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
+ if (channels_missing | channel_priority_violation) {
+ log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
" There is a problem with your Conda configuration!\n\n" +
" You will need to set-up the conda-forge and bioconda channels correctly.\n" +
- " Please refer to https://bioconda.github.io/user/install.html#set-up-channels\n" +
- " NB: The order of the channels matters!\n" +
- "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
+ " Please refer to https://bioconda.github.io/\n" +
+ " The observed channel order is \n" +
+ " ${channels}\n" +
+ " but the following channel order is required:\n" +
+ " ${required_channels_in_order}\n" +
+ "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
}
}
}
diff --git a/lib/WorkflowCommons.groovy b/lib/WorkflowCommons.groovy
index 5672a1cf..a1ec9776 100755
--- a/lib/WorkflowCommons.groovy
+++ b/lib/WorkflowCommons.groovy
@@ -9,11 +9,11 @@ class WorkflowCommons {
//
private static void genomeExistsError(params, log) {
if (params.genomes && params.genome && !params.genomes.containsKey(params.genome)) {
- log.error "=============================================================================\n" +
+ log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
" Genome '${params.genome}' not found in any config files provided to the pipeline.\n" +
" Currently, the available genome keys are:\n" +
" ${params.genomes.keySet().join(", ")}\n" +
- "============================================================================="
+ "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
System.exit(1)
}
}
@@ -45,6 +45,18 @@ class WorkflowCommons {
return yaml_file_text
}
+ //
+ // Create MultiQC tsv custom content from a list of values
+ //
+ public static String multiqcTsvFromList(tsv_data, header) {
+ def tsv_string = ""
+ if (tsv_data.size() > 0) {
+ tsv_string += "${header.join('\t')}\n"
+ tsv_string += tsv_data.join('\n')
+ }
+ return tsv_string
+ }
+
//
// Function to check whether primer BED file has the correct suffixes as provided to the pipeline
//
@@ -62,14 +74,14 @@ class WorkflowCommons {
)
}
if (total != (left + right)) {
- log.warn "=============================================================================\n" +
+ log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
" Please check the name field (column 4) in the file supplied via --primer_bed.\n\n" +
" All of the values in that column do not end with those supplied by:\n" +
" --primer_left_suffix : $primer_left_suffix\n" +
" --primer_right_suffix: $primer_right_suffix\n\n" +
" This information is required to collapse the primer intervals into amplicons\n" +
" for the coverage plots generated by the pipeline.\n" +
- "==================================================================================="
+ "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
}
}
@@ -109,13 +121,13 @@ class WorkflowCommons {
def intersect = bed_contigs.intersect(fai_contigs)
if (intersect.size() != bed_contigs.size()) {
def diff = bed_contigs.minus(intersect).sort()
- log.error "=============================================================================\n" +
+ log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
" Contigs in primer BED file do not match those in the reference genome:\n\n" +
" ${diff.join('\n ')}\n\n" +
" Please check:\n" +
" - Primer BED file supplied with --primer_bed\n" +
" - Genome FASTA file supplied with --fasta\n" +
- "============================================================================="
+ "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
System.exit(1)
}
}
diff --git a/lib/WorkflowIllumina.groovy b/lib/WorkflowIllumina.groovy
index 56b20612..f151b2e1 100755
--- a/lib/WorkflowIllumina.groovy
+++ b/lib/WorkflowIllumina.groovy
@@ -75,11 +75,11 @@ class WorkflowIllumina {
if (line.contains('>')) {
count++
if (count > 1) {
- log.warn "=============================================================================\n" +
+ log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
" This pipeline does not officially support multi-fasta genome files!\n\n" +
" The parameters and processes are tailored for viral genome analysis.\n" +
" Please amend the '--fasta' parameter.\n" +
- "==================================================================================="
+ "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
break
}
}
@@ -118,12 +118,12 @@ class WorkflowIllumina {
if (name.contains(name_prefix)) {
count++
if (count > 1) {
- log.warn "=============================================================================\n" +
+ log.warn "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
" Found '${name_prefix}' in the name field of the primer BED file!\n" +
" This suggests that you have used the SWIFT/SNAP protocol to prep your samples.\n" +
" If so, please set '--ivar_trim_offset 5' as suggested in the issue below:\n" +
" https://github.com/nf-core/viralrecon/issues/170\n" +
- "==================================================================================="
+ "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
break
}
}
diff --git a/lib/WorkflowMain.groovy b/lib/WorkflowMain.groovy
index 9778e073..64147d3d 100755
--- a/lib/WorkflowMain.groovy
+++ b/lib/WorkflowMain.groovy
@@ -18,7 +18,7 @@ class WorkflowMain {
}
//
- // Print help to screen if required
+ // Generate help string
//
public static String help(workflow, params, log) {
def command = "nextflow run ${workflow.manifest.name} --input samplesheet.csv --outdir --genome 'MN908947.3' -profile docker"
@@ -31,7 +31,7 @@ class WorkflowMain {
}
//
- // Print parameter summary log to screen
+ // Generate parameter summary log string
//
public static String paramsSummaryLog(workflow, params, log) {
def summary_log = ''
@@ -52,19 +52,29 @@ class WorkflowMain {
System.exit(0)
}
- // Validate workflow parameters via the JSON schema
- if (params.validate_params) {
- NfcoreSchema.validateParameters(workflow, params, log)
+ // Print workflow version and exit on --version
+ if (params.version) {
+ String workflow_version = NfcoreTemplate.version(workflow)
+ log.info "${workflow.manifest.name} ${workflow_version}"
+ System.exit(0)
}
// Print parameter summary log to screen
log.info paramsSummaryLog(workflow, params, log)
+ // Warn about using custom configs to provide pipeline parameters
+ NfcoreTemplate.warnParamsProvidedInConfig(workflow, log)
+
+ // Validate workflow parameters via the JSON schema
+ if (params.validate_params) {
+ NfcoreSchema.validateParameters(workflow, params, log)
+ }
+
// Check that a -profile or Nextflow config has been provided to run the pipeline
NfcoreTemplate.checkConfigProvided(workflow, log)
// Check that conda channels are set-up correctly
- if (params.enable_conda) {
+ if (workflow.profile.tokenize(',').intersect(['conda', 'mamba']).size() >= 1) {
Utils.checkCondaChannels(log)
}
@@ -89,7 +99,6 @@ class WorkflowMain {
}
}
}
-
//
// Get attribute from genome config file e.g. fasta
//
@@ -99,7 +108,7 @@ class WorkflowMain {
" - https://github.com/nf-core/configs/blob/master/conf/pipeline/viralrecon/genomes.config\n\n" +
" If you would still like to blame us please come and find us on nf-core Slack:\n" +
" - https://nf-co.re/viralrecon#contributions-and-support\n" +
- "============================================================================="
+ "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~"
if (params.genomes && params.genome && params.genomes.containsKey(params.genome)) {
def genome_map = params.genomes[ params.genome ]
if (primer_set) {
@@ -111,7 +120,7 @@ class WorkflowMain {
if (genome_map.containsKey(primer_set_version)) {
genome_map = genome_map[ primer_set_version ]
} else {
- log.error "=============================================================================\n" +
+ log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
" --primer_set_version '${primer_set_version}' not found!\n\n" +
" Currently, the available primer set version keys are: ${genome_map.keySet().join(", ")}\n\n" +
" Please check:\n" +
@@ -122,7 +131,7 @@ class WorkflowMain {
System.exit(1)
}
} else {
- log.error "=============================================================================\n" +
+ log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
" --primer_set '${primer_set}' not found!\n\n" +
" Currently, the available primer set keys are: ${genome_map.keySet().join(", ")}\n\n" +
" Please check:\n" +
@@ -132,7 +141,7 @@ class WorkflowMain {
System.exit(1)
}
} else {
- log.error "=============================================================================\n" +
+ log.error "~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~\n" +
" Genome '${params.genome}' does not contain any primer sets!\n\n" +
" Please check:\n" +
" - The value provided to --genome (currently '${params.genome}')\n" +
diff --git a/main.nf b/main.nf
index 4d662339..c659b05c 100644
--- a/main.nf
+++ b/main.nf
@@ -4,6 +4,7 @@
nf-core/viralrecon
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Github : https://github.com/nf-core/viralrecon
+
Website: https://nf-co.re/viralrecon
Slack : https://nfcore.slack.com/channels/viralrecon
----------------------------------------------------------------------------------------
diff --git a/modules.json b/modules.json
index 97603779..4e7dec6f 100644
--- a/modules.json
+++ b/modules.json
@@ -2,159 +2,289 @@
"name": "nf-core/viralrecon",
"homePage": "https://github.com/nf-core/viralrecon",
"repos": {
- "nf-core/modules": {
- "abacas": {
- "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
- },
- "artic/guppyplex": {
- "git_sha": "589f39c39e05fdd9493e765b1d2b4385d3b68fde"
- },
- "artic/minion": {
- "git_sha": "589f39c39e05fdd9493e765b1d2b4385d3b68fde"
- },
- "bandage/image": {
- "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
- },
- "bcftools/consensus": {
- "git_sha": "682f789f93070bd047868300dd018faf3d434e7c"
- },
- "bcftools/filter": {
- "git_sha": "682f789f93070bd047868300dd018faf3d434e7c"
- },
- "bcftools/mpileup": {
- "git_sha": "682f789f93070bd047868300dd018faf3d434e7c"
- },
- "bcftools/norm": {
- "git_sha": "682f789f93070bd047868300dd018faf3d434e7c"
- },
- "bcftools/query": {
- "git_sha": "682f789f93070bd047868300dd018faf3d434e7c"
- },
- "bcftools/sort": {
- "git_sha": "682f789f93070bd047868300dd018faf3d434e7c"
- },
- "bcftools/stats": {
- "git_sha": "682f789f93070bd047868300dd018faf3d434e7c"
- },
- "bedtools/getfasta": {
- "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
- },
- "bedtools/maskfasta": {
- "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
- },
- "bedtools/merge": {
- "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
- },
- "blast/blastn": {
- "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
- },
- "blast/makeblastdb": {
- "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
- },
- "bowtie2/align": {
- "git_sha": "848ee9a215d02d80be033bfa60881700f2bd914c"
- },
- "bowtie2/build": {
- "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
- },
- "cat/fastq": {
- "git_sha": "9aadd9a6d3f5964476582319b3a1c54a3e3fe7c9"
- },
- "custom/dumpsoftwareversions": {
- "git_sha": "682f789f93070bd047868300dd018faf3d434e7c"
- },
- "custom/getchromsizes": {
- "git_sha": "213403187932dbbdd936a04474cc8cd8abae7a08"
- },
- "fastp": {
- "git_sha": "9b51362a532a14665f513cf987531f9ea5046b74"
- },
- "fastqc": {
- "git_sha": "49b18b1639f4f7104187058866a8fab33332bdfe"
- },
- "gunzip": {
- "git_sha": "fa37e0662690c4ec4260dae282fbce08777503e6"
- },
- "ivar/consensus": {
- "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
- },
- "ivar/trim": {
- "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
- },
- "ivar/variants": {
- "git_sha": "cab399507bea60d90de6d7b296163210c371b693"
- },
- "kraken2/kraken2": {
- "git_sha": "abe025677cdd805cc93032341ab19885473c1a07"
- },
- "minia": {
- "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
- },
- "mosdepth": {
- "git_sha": "72a31b76eb1b58879e0d91fb1d992e0118693098"
- },
- "nanoplot": {
- "git_sha": "682f789f93070bd047868300dd018faf3d434e7c"
- },
- "nextclade/datasetget": {
- "git_sha": "682f789f93070bd047868300dd018faf3d434e7c"
- },
- "nextclade/run": {
- "git_sha": "682f789f93070bd047868300dd018faf3d434e7c"
- },
- "pangolin": {
- "git_sha": "682f789f93070bd047868300dd018faf3d434e7c"
- },
- "picard/collectmultiplemetrics": {
- "git_sha": "682f789f93070bd047868300dd018faf3d434e7c"
- },
- "picard/markduplicates": {
- "git_sha": "682f789f93070bd047868300dd018faf3d434e7c"
- },
- "plasmidid": {
- "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
- },
- "pycoqc": {
- "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
- },
- "quast": {
- "git_sha": "682f789f93070bd047868300dd018faf3d434e7c"
- },
- "samtools/flagstat": {
- "git_sha": "ecece498f10b47b7c9d06f53a310cea5811b4c5f"
- },
- "samtools/idxstats": {
- "git_sha": "ecece498f10b47b7c9d06f53a310cea5811b4c5f"
- },
- "samtools/index": {
- "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773"
- },
- "samtools/sort": {
- "git_sha": "897c33d5da084b61109500ee44c01da2d3e4e773"
- },
- "samtools/stats": {
- "git_sha": "ecece498f10b47b7c9d06f53a310cea5811b4c5f"
- },
- "samtools/view": {
- "git_sha": "6b64f9cb6c3dd3577931cc3cd032d6fb730000ce"
- },
- "spades": {
- "git_sha": "682f789f93070bd047868300dd018faf3d434e7c"
- },
- "tabix/bgzip": {
- "git_sha": "37bf3936f3665483d070a5e0e0b314311032af7c"
- },
- "tabix/tabix": {
- "git_sha": "b3e9b88e80880f450ad79a95b2b7aa05e1de5484"
- },
- "unicycler": {
- "git_sha": "e745e167c1020928ef20ea1397b6b4d230681b4d"
- },
- "untar": {
- "git_sha": "51be617b1ca9bff973655eb899d591ed6ab253b5"
- },
- "vcflib/vcfuniq": {
- "git_sha": "682f789f93070bd047868300dd018faf3d434e7c"
+ "https://github.com/nf-core/modules.git": {
+ "modules": {
+ "nf-core": {
+ "abacas": {
+ "branch": "master",
+ "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b",
+ "installed_by": ["modules"]
+ },
+ "artic/guppyplex": {
+ "branch": "master",
+ "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b",
+ "installed_by": ["modules"]
+ },
+ "artic/minion": {
+ "branch": "master",
+ "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b",
+ "installed_by": ["modules"]
+ },
+ "bandage/image": {
+ "branch": "master",
+ "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b",
+ "installed_by": ["modules"]
+ },
+ "bcftools/consensus": {
+ "branch": "master",
+ "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+ "installed_by": ["modules"]
+ },
+ "bcftools/filter": {
+ "branch": "master",
+ "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+ "installed_by": ["modules"]
+ },
+ "bcftools/mpileup": {
+ "branch": "master",
+ "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+ "installed_by": ["modules"]
+ },
+ "bcftools/norm": {
+ "branch": "master",
+ "git_sha": "bcad95fb35e567ad25840d3297c3e17eff211a3a",
+ "installed_by": ["modules"]
+ },
+ "bcftools/query": {
+ "branch": "master",
+ "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+ "installed_by": ["modules"]
+ },
+ "bcftools/sort": {
+ "branch": "master",
+ "git_sha": "4a21e4cca35e72ec059abd67f790e0b192ce5d81",
+ "installed_by": ["modules"]
+ },
+ "bcftools/stats": {
+ "branch": "master",
+ "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+ "installed_by": ["modules"]
+ },
+ "bedtools/getfasta": {
+ "branch": "master",
+ "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+ "installed_by": ["modules"]
+ },
+ "bedtools/maskfasta": {
+ "branch": "master",
+ "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+ "installed_by": ["modules"]
+ },
+ "bedtools/merge": {
+ "branch": "master",
+ "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+ "installed_by": ["modules"]
+ },
+ "blast/blastn": {
+ "branch": "master",
+ "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180",
+ "installed_by": ["modules"]
+ },
+ "blast/makeblastdb": {
+ "branch": "master",
+ "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180",
+ "installed_by": ["modules"]
+ },
+ "bowtie2/align": {
+ "branch": "master",
+ "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+ "installed_by": ["modules", "fastq_align_bowtie2"]
+ },
+ "bowtie2/build": {
+ "branch": "master",
+ "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+ "installed_by": ["modules"]
+ },
+ "cat/fastq": {
+ "branch": "master",
+ "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+ "installed_by": ["modules"]
+ },
+ "custom/dumpsoftwareversions": {
+ "branch": "master",
+ "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180",
+ "installed_by": ["modules"]
+ },
+ "custom/getchromsizes": {
+ "branch": "master",
+ "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+ "installed_by": ["modules"]
+ },
+ "fastp": {
+ "branch": "master",
+ "git_sha": "20a508676f40d0fd3f911ac595af91ec845704c4",
+ "installed_by": ["modules"]
+ },
+ "fastqc": {
+ "branch": "master",
+ "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+ "installed_by": ["modules"]
+ },
+ "gunzip": {
+ "branch": "master",
+ "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b",
+ "installed_by": ["modules"]
+ },
+ "ivar/consensus": {
+ "branch": "master",
+ "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180",
+ "installed_by": ["modules"]
+ },
+ "ivar/trim": {
+ "branch": "master",
+ "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180",
+ "installed_by": ["modules"]
+ },
+ "ivar/variants": {
+ "branch": "master",
+ "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180",
+ "installed_by": ["modules"]
+ },
+ "kraken2/kraken2": {
+ "branch": "master",
+ "git_sha": "7c695e0147df1157413e06246d9b0094617d3e6b",
+ "installed_by": ["modules"]
+ },
+ "minia": {
+ "branch": "master",
+ "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+ "installed_by": ["modules"]
+ },
+ "mosdepth": {
+ "branch": "master",
+ "git_sha": "def5f182583df0c20f43ec3d4355e8ebd341aaa9",
+ "installed_by": ["modules"]
+ },
+ "nanoplot": {
+ "branch": "master",
+ "git_sha": "3822e04e49b6d89b7092feb3480d744cb5d9986b",
+ "installed_by": ["modules"]
+ },
+ "nextclade/datasetget": {
+ "branch": "master",
+ "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180",
+ "installed_by": ["modules"]
+ },
+ "nextclade/run": {
+ "branch": "master",
+ "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180",
+ "installed_by": ["modules"]
+ },
+ "pangolin": {
+ "branch": "master",
+ "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180",
+ "installed_by": ["modules"]
+ },
+ "picard/collectmultiplemetrics": {
+ "branch": "master",
+ "git_sha": "75027bf77472b1f4fd2cdd7e46f83119dfb0f2c6",
+ "installed_by": ["modules"]
+ },
+ "picard/markduplicates": {
+ "branch": "master",
+ "git_sha": "75027bf77472b1f4fd2cdd7e46f83119dfb0f2c6",
+ "installed_by": ["modules", "bam_markduplicates_picard"]
+ },
+ "plasmidid": {
+ "branch": "master",
+ "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b",
+ "installed_by": ["modules"]
+ },
+ "pycoqc": {
+ "branch": "master",
+ "git_sha": "cb8a5428685f490d0295563b1b0c3a239bbe1927",
+ "installed_by": ["modules"]
+ },
+ "quast": {
+ "branch": "master",
+ "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b",
+ "installed_by": ["modules"]
+ },
+ "samtools/flagstat": {
+ "branch": "master",
+ "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b",
+ "installed_by": ["modules", "bam_stats_samtools"]
+ },
+ "samtools/idxstats": {
+ "branch": "master",
+ "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b",
+ "installed_by": ["modules", "bam_stats_samtools"]
+ },
+ "samtools/index": {
+ "branch": "master",
+ "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b",
+ "installed_by": ["bam_markduplicates_picard", "modules", "bam_sort_stats_samtools"]
+ },
+ "samtools/sort": {
+ "branch": "master",
+ "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b",
+ "installed_by": ["modules", "bam_sort_stats_samtools"]
+ },
+ "samtools/stats": {
+ "branch": "master",
+ "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b",
+ "installed_by": ["modules", "bam_stats_samtools"]
+ },
+ "samtools/view": {
+ "branch": "master",
+ "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b",
+ "installed_by": ["modules"]
+ },
+ "spades": {
+ "branch": "master",
+ "git_sha": "b6d4d476aee074311c89d82a69c1921bd70c8180",
+ "installed_by": ["modules"]
+ },
+ "tabix/bgzip": {
+ "branch": "master",
+ "git_sha": "90294980a903ecebd99ac31d8b6c66af48fa8259",
+ "installed_by": ["modules"]
+ },
+ "tabix/tabix": {
+ "branch": "master",
+ "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+ "installed_by": ["modules"]
+ },
+ "unicycler": {
+ "branch": "master",
+ "git_sha": "c8e35eb2055c099720a75538d1b8adb3fb5a464c",
+ "installed_by": ["modules"]
+ },
+ "untar": {
+ "branch": "master",
+ "git_sha": "cc1f997fab6d8fde5dc0e6e2a310814df5b53ce7",
+ "installed_by": ["modules"]
+ },
+ "vcflib/vcfuniq": {
+ "branch": "master",
+ "git_sha": "0f8a77ff00e65eaeebc509b8156eaa983192474b",
+ "installed_by": ["modules"]
+ }
+ }
+ },
+ "subworkflows": {
+ "nf-core": {
+ "bam_markduplicates_picard": {
+ "branch": "master",
+ "git_sha": "6f1697c121719dedde9e0537b6ed6a9cb8c13583",
+ "installed_by": ["subworkflows"]
+ },
+ "bam_sort_stats_samtools": {
+ "branch": "master",
+ "git_sha": "3911652a6b24249358f79e8b8466338d63efb2a2",
+ "installed_by": ["fastq_align_bowtie2"]
+ },
+ "bam_stats_samtools": {
+ "branch": "master",
+ "git_sha": "b4b7f89e7fd6d2293f0c176213f710e0bcdaf19e",
+ "installed_by": ["bam_sort_stats_samtools", "bam_markduplicates_picard"]
+ },
+ "fastq_align_bowtie2": {
+ "branch": "master",
+ "git_sha": "ac75f79157ecc64283a2b3a559f1ba90bc0f2259",
+ "installed_by": ["subworkflows"]
+ }
+ }
}
}
}
diff --git a/modules/local/asciigenome.nf b/modules/local/asciigenome.nf
index 09ca6bb6..adf8cf14 100644
--- a/modules/local/asciigenome.nf
+++ b/modules/local/asciigenome.nf
@@ -2,7 +2,7 @@ process ASCIIGENOME {
tag "$meta.id"
label 'process_medium'
- conda (params.enable_conda ? "bioconda::asciigenome=1.16.0 bioconda::bedtools=2.30.0" : null)
+ conda "bioconda::asciigenome=1.16.0 bioconda::bedtools=2.30.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-093691b47d719890dc19ac0c13c4528e9776897f:27211b8c38006480d69eb1be3ef09a7bf0a49d76-0' :
'quay.io/biocontainers/mulled-v2-093691b47d719890dc19ac0c13c4528e9776897f:27211b8c38006480d69eb1be3ef09a7bf0a49d76-0' }"
diff --git a/modules/local/collapse_primers.nf b/modules/local/collapse_primers.nf
index 316d1e2a..4219b6ae 100644
--- a/modules/local/collapse_primers.nf
+++ b/modules/local/collapse_primers.nf
@@ -2,7 +2,7 @@ process COLLAPSE_PRIMERS {
tag "$bed"
label 'process_medium'
- conda (params.enable_conda ? "conda-forge::python=3.9.5" : null)
+ conda "conda-forge::python=3.9.5"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/python:3.9--1' :
'quay.io/biocontainers/python:3.9--1' }"
diff --git a/modules/local/cutadapt.nf b/modules/local/cutadapt.nf
index a96ea8bb..11c8f6a5 100644
--- a/modules/local/cutadapt.nf
+++ b/modules/local/cutadapt.nf
@@ -2,10 +2,10 @@ process CUTADAPT {
tag "$meta.id"
label 'process_medium'
- conda (params.enable_conda ? 'bioconda::cutadapt=3.5' : null)
+ conda "bioconda::cutadapt=4.2"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/cutadapt:3.5--py39h38f01e4_0' :
- 'quay.io/biocontainers/cutadapt:3.5--py39h38f01e4_0' }"
+ 'https://depot.galaxyproject.org/singularity/cutadapt:4.2--py39hbf8eff0_0' :
+ 'quay.io/biocontainers/cutadapt:4.2--py39hbf8eff0_0' }"
input:
tuple val(meta), path(reads)
diff --git a/modules/local/filter_blastn.nf b/modules/local/filter_blastn.nf
index 5e5ed81b..0328a037 100644
--- a/modules/local/filter_blastn.nf
+++ b/modules/local/filter_blastn.nf
@@ -2,7 +2,7 @@ process FILTER_BLASTN {
tag "$meta.id"
label 'process_low'
- conda (params.enable_conda ? "conda-forge::sed=4.7" : null)
+ conda "conda-forge::sed=4.7"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
'ubuntu:20.04' }"
diff --git a/modules/local/ivar_variants_to_vcf.nf b/modules/local/ivar_variants_to_vcf.nf
index cd220b24..e6c88328 100644
--- a/modules/local/ivar_variants_to_vcf.nf
+++ b/modules/local/ivar_variants_to_vcf.nf
@@ -1,7 +1,7 @@
process IVAR_VARIANTS_TO_VCF {
tag "$meta.id"
- conda (params.enable_conda ? "conda-forge::python=3.9.5 conda-forge::matplotlib=3.5.1 conda-forge::pandas=1.3.5 conda-forge::r-sys=3.4 conda-forge::regex=2021.11.10 conda-forge::scipy=1.7.3" : null)
+ conda "conda-forge::python=3.9.5 conda-forge::matplotlib=3.5.1 conda-forge::pandas=1.3.5 conda-forge::r-sys=3.4 conda-forge::regex=2021.11.10 conda-forge::scipy=1.7.3 conda-forge::biopython=1.79"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-ff46c3f421ca930fcc54e67ab61c8e1bcbddfe22:1ad3da14f705eb0cdff6b5a44fea4909307524b4-0' :
'quay.io/biocontainers/mulled-v2-ff46c3f421ca930fcc54e67ab61c8e1bcbddfe22:1ad3da14f705eb0cdff6b5a44fea4909307524b4-0' }"
diff --git a/modules/local/kraken2_build.nf b/modules/local/kraken2_build.nf
index c5632aad..f1aa6865 100644
--- a/modules/local/kraken2_build.nf
+++ b/modules/local/kraken2_build.nf
@@ -2,7 +2,7 @@ process KRAKEN2_BUILD {
tag "$library"
label 'process_high'
- conda (params.enable_conda ? 'bioconda::kraken2=2.1.2 conda-forge::pigz=2.6' : null)
+ conda "bioconda::kraken2=2.1.2 conda-forge::pigz=2.6"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-5799ab18b5fc681e75923b2450abaa969907ec98:87fc08d11968d081f3e8a37131c1f1f6715b6542-0' :
'quay.io/biocontainers/mulled-v2-5799ab18b5fc681e75923b2450abaa969907ec98:87fc08d11968d081f3e8a37131c1f1f6715b6542-0' }"
diff --git a/modules/local/make_bed_mask.nf b/modules/local/make_bed_mask.nf
index 246a8ec5..c8c75bff 100644
--- a/modules/local/make_bed_mask.nf
+++ b/modules/local/make_bed_mask.nf
@@ -1,7 +1,7 @@
process MAKE_BED_MASK {
tag "$meta.id"
- conda (params.enable_conda ? "conda-forge::python=3.9.5 bioconda::samtools=1.14" : null)
+ conda "conda-forge::python=3.9.5 bioconda::samtools=1.14"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-1a35167f7a491c7086c13835aaa74b39f1f43979:6b5cffa1187cfccf2dc983ed3b5359d49b999eb0-0' :
'quay.io/biocontainers/mulled-v2-1a35167f7a491c7086c13835aaa74b39f1f43979:6b5cffa1187cfccf2dc983ed3b5359d49b999eb0-0' }"
diff --git a/modules/local/make_variants_long_table.nf b/modules/local/make_variants_long_table.nf
index d802ecc9..1d8b40fc 100644
--- a/modules/local/make_variants_long_table.nf
+++ b/modules/local/make_variants_long_table.nf
@@ -1,6 +1,6 @@
process MAKE_VARIANTS_LONG_TABLE {
- conda (params.enable_conda ? "conda-forge::python=3.9.5 conda-forge::matplotlib=3.5.1 conda-forge::pandas=1.3.5 conda-forge::r-sys=3.4 conda-forge::regex=2021.11.10 conda-forge::scipy=1.7.3" : null)
+ conda "conda-forge::python=3.9.5 conda-forge::matplotlib=3.5.1 conda-forge::pandas=1.3.5 conda-forge::r-sys=3.4 conda-forge::regex=2021.11.10 conda-forge::scipy=1.7.3"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-77320db00eefbbf8c599692102c3d387a37ef02a:08144a66f00dc7684fad061f1466033c0176e7ad-0' :
'quay.io/biocontainers/mulled-v2-77320db00eefbbf8c599692102c3d387a37ef02a:08144a66f00dc7684fad061f1466033c0176e7ad-0' }"
diff --git a/modules/local/multiqc_illumina.nf b/modules/local/multiqc_illumina.nf
index 59a031c2..bcfc37f1 100644
--- a/modules/local/multiqc_illumina.nf
+++ b/modules/local/multiqc_illumina.nf
@@ -1,10 +1,10 @@
process MULTIQC {
label 'process_medium'
- conda (params.enable_conda ? "bioconda::multiqc=1.13a" : null)
+ conda "bioconda::multiqc=1.14"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/multiqc:1.13a--pyhdfd78af_1' :
- 'quay.io/biocontainers/multiqc:1.13a--pyhdfd78af_1' }"
+ 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' :
+ 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }"
input:
path 'multiqc_config.yaml'
diff --git a/modules/local/multiqc_nanopore.nf b/modules/local/multiqc_nanopore.nf
index e23db35c..c6ffbb4d 100644
--- a/modules/local/multiqc_nanopore.nf
+++ b/modules/local/multiqc_nanopore.nf
@@ -1,10 +1,10 @@
process MULTIQC {
label 'process_medium'
- conda (params.enable_conda ? "bioconda::multiqc=1.13a" : null)
+ conda "bioconda::multiqc=1.14"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/multiqc:1.13a--pyhdfd78af_1' :
- 'quay.io/biocontainers/multiqc:1.13a--pyhdfd78af_1' }"
+ 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' :
+ 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }"
input:
path 'multiqc_config.yaml'
diff --git a/modules/local/multiqc_tsv_from_list.nf b/modules/local/multiqc_tsv_from_list.nf
deleted file mode 100644
index a53c8f73..00000000
--- a/modules/local/multiqc_tsv_from_list.nf
+++ /dev/null
@@ -1,28 +0,0 @@
-process MULTIQC_TSV_FROM_LIST {
-
- executor 'local'
- memory 100.MB
-
- input:
- val tsv_data // [ ['foo', 1], ['bar', 1] ]
- val header // [ 'name', 'number' ]
- val out_prefix
-
- output:
- path "*.tsv"
-
- when:
- task.ext.when == null || task.ext.when
-
- exec:
- // Generate file contents
- def contents = ""
- if (tsv_data.size() > 0) {
- contents += "${header.join('\t')}\n"
- contents += tsv_data.join('\n')
- }
-
- // Write to file
- def mqc_file = task.workDir.resolve("${out_prefix}_mqc.tsv")
- mqc_file.text = contents
-}
diff --git a/modules/local/plot_base_density.nf b/modules/local/plot_base_density.nf
index 97e7e93e..1932350a 100644
--- a/modules/local/plot_base_density.nf
+++ b/modules/local/plot_base_density.nf
@@ -2,7 +2,7 @@ process PLOT_BASE_DENSITY {
tag "$fasta"
label 'process_medium'
- conda (params.enable_conda ? "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" : null)
+ conda "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0' :
'quay.io/biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0' }"
diff --git a/modules/local/plot_mosdepth_regions.nf b/modules/local/plot_mosdepth_regions.nf
index 31cbeeb3..0195549b 100644
--- a/modules/local/plot_mosdepth_regions.nf
+++ b/modules/local/plot_mosdepth_regions.nf
@@ -1,7 +1,7 @@
process PLOT_MOSDEPTH_REGIONS {
label 'process_medium'
- conda (params.enable_conda ? "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2" : null)
+ conda "conda-forge::r-base=4.0.3 conda-forge::r-reshape2=1.4.4 conda-forge::r-optparse=1.6.6 conda-forge::r-ggplot2=3.3.3 conda-forge::r-scales=1.1.1 conda-forge::r-viridis=0.5.1 conda-forge::r-tidyverse=1.3.0 bioconda::bioconductor-biostrings=2.58.0 bioconda::bioconductor-complexheatmap=2.6.2"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0' :
'quay.io/biocontainers/mulled-v2-ad9dd5f398966bf899ae05f8e7c54d0fb10cdfa7:05678da05b8e5a7a5130e90a9f9a6c585b965afa-0' }"
diff --git a/modules/local/rename_fasta_header.nf b/modules/local/rename_fasta_header.nf
index 36810983..d42e5f11 100644
--- a/modules/local/rename_fasta_header.nf
+++ b/modules/local/rename_fasta_header.nf
@@ -1,7 +1,7 @@
process RENAME_FASTA_HEADER {
tag "$meta.id"
- conda (params.enable_conda ? "conda-forge::sed=4.7" : null)
+ conda "conda-forge::sed=4.7"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
'ubuntu:20.04' }"
diff --git a/modules/local/samplesheet_check.nf b/modules/local/samplesheet_check.nf
index ffed8149..0da5f587 100644
--- a/modules/local/samplesheet_check.nf
+++ b/modules/local/samplesheet_check.nf
@@ -1,7 +1,8 @@
process SAMPLESHEET_CHECK {
tag "$samplesheet"
+ label 'process_single'
- conda (params.enable_conda ? "conda-forge::python=3.9.5" : null)
+ conda "conda-forge::python=3.9.5"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/python:3.9--1' :
'quay.io/biocontainers/python:3.9--1' }"
diff --git a/modules/local/snpeff_ann.nf b/modules/local/snpeff_ann.nf
index 4ccf4db4..c834c1c5 100644
--- a/modules/local/snpeff_ann.nf
+++ b/modules/local/snpeff_ann.nf
@@ -2,7 +2,7 @@ process SNPEFF_ANN {
tag "$meta.id"
label 'process_medium'
- conda (params.enable_conda ? "bioconda::snpeff=5.0" : null)
+ conda "bioconda::snpeff=5.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/snpeff:5.0--hdfd78af_1' :
'quay.io/biocontainers/snpeff:5.0--hdfd78af_1' }"
diff --git a/modules/local/snpeff_build.nf b/modules/local/snpeff_build.nf
index faaeba19..e1ab367f 100644
--- a/modules/local/snpeff_build.nf
+++ b/modules/local/snpeff_build.nf
@@ -2,7 +2,7 @@ process SNPEFF_BUILD {
tag "$fasta"
label 'process_low'
- conda (params.enable_conda ? "bioconda::snpeff=5.0" : null)
+ conda "bioconda::snpeff=5.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/snpeff:5.0--hdfd78af_1' :
'quay.io/biocontainers/snpeff:5.0--hdfd78af_1' }"
diff --git a/modules/local/snpsift_extractfields.nf b/modules/local/snpsift_extractfields.nf
index 573063e0..5654e97e 100644
--- a/modules/local/snpsift_extractfields.nf
+++ b/modules/local/snpsift_extractfields.nf
@@ -2,7 +2,7 @@ process SNPSIFT_EXTRACTFIELDS {
tag "$meta.id"
label 'process_medium'
- conda (params.enable_conda ? "bioconda::snpsift=4.3.1t" : null)
+ conda "bioconda::snpsift=4.3.1t"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/snpsift:4.3.1t--hdfd78af_3' :
'quay.io/biocontainers/snpsift:4.3.1t--hdfd78af_3' }"
diff --git a/modules/nf-core/modules/abacas/main.nf b/modules/nf-core/abacas/main.nf
similarity index 94%
rename from modules/nf-core/modules/abacas/main.nf
rename to modules/nf-core/abacas/main.nf
index 00c9169f..beabc45b 100644
--- a/modules/nf-core/modules/abacas/main.nf
+++ b/modules/nf-core/abacas/main.nf
@@ -2,7 +2,7 @@ process ABACAS {
tag "$meta.id"
label 'process_medium'
- conda (params.enable_conda ? "bioconda::abacas=1.3.1" : null)
+ conda "bioconda::abacas=1.3.1"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/abacas:1.3.1--pl526_0' :
'quay.io/biocontainers/abacas:1.3.1--pl526_0' }"
diff --git a/modules/nf-core/modules/abacas/meta.yml b/modules/nf-core/abacas/meta.yml
similarity index 98%
rename from modules/nf-core/modules/abacas/meta.yml
rename to modules/nf-core/abacas/meta.yml
index c685e650..3bab9b22 100644
--- a/modules/nf-core/modules/abacas/meta.yml
+++ b/modules/nf-core/abacas/meta.yml
@@ -12,7 +12,7 @@ tools:
contigs based on a reference sequence.
homepage: http://abacas.sourceforge.net/documentation.html
documentation: http://abacas.sourceforge.net/documentation.html
- tool_dev_url: None
+
doi: "10.1093/bioinformatics/btp347"
licence: ["GPL v2-or-later"]
diff --git a/modules/nf-core/modules/artic/guppyplex/main.nf b/modules/nf-core/artic/guppyplex/main.nf
similarity index 77%
rename from modules/nf-core/modules/artic/guppyplex/main.nf
rename to modules/nf-core/artic/guppyplex/main.nf
index 2fd518e0..9be33484 100644
--- a/modules/nf-core/modules/artic/guppyplex/main.nf
+++ b/modules/nf-core/artic/guppyplex/main.nf
@@ -2,10 +2,10 @@ process ARTIC_GUPPYPLEX {
tag "$meta.id"
label 'process_high'
- conda (params.enable_conda ? "bioconda::artic=1.2.2" : null)
+ conda "bioconda::artic=1.2.3"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/artic:1.2.2--pyhdfd78af_0' :
- 'quay.io/biocontainers/artic:1.2.2--pyhdfd78af_0' }"
+ 'https://depot.galaxyproject.org/singularity/artic:1.2.3--pyhdfd78af_0' :
+ 'quay.io/biocontainers/artic:1.2.3--pyhdfd78af_0' }"
input:
tuple val(meta), path(fastq_dir)
@@ -20,7 +20,7 @@ process ARTIC_GUPPYPLEX {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
- def VERSION = '1.2.2' // WARN: Version information provided by tool on CLI is incorrect. Please update this string when bumping container versions.
+ def VERSION = '1.2.3' // WARN: Version information provided by tool on CLI is incorrect. Please update this string when bumping container versions.
"""
artic \\
guppyplex \\
diff --git a/modules/nf-core/modules/artic/guppyplex/meta.yml b/modules/nf-core/artic/guppyplex/meta.yml
similarity index 98%
rename from modules/nf-core/modules/artic/guppyplex/meta.yml
rename to modules/nf-core/artic/guppyplex/meta.yml
index fe288289..e8edc8f3 100644
--- a/modules/nf-core/modules/artic/guppyplex/meta.yml
+++ b/modules/nf-core/artic/guppyplex/meta.yml
@@ -10,7 +10,7 @@ tools:
homepage: https://artic.readthedocs.io/en/latest/
documentation: https://artic.readthedocs.io/en/latest/
tool_dev_url: https://github.com/artic-network/fieldbioinformatics
- doi: ""
+
licence: ["MIT"]
input:
diff --git a/modules/nf-core/modules/artic/minion/main.nf b/modules/nf-core/artic/minion/main.nf
similarity index 92%
rename from modules/nf-core/modules/artic/minion/main.nf
rename to modules/nf-core/artic/minion/main.nf
index 1629d433..429a107d 100644
--- a/modules/nf-core/modules/artic/minion/main.nf
+++ b/modules/nf-core/artic/minion/main.nf
@@ -2,10 +2,10 @@ process ARTIC_MINION {
tag "$meta.id"
label 'process_high'
- conda (params.enable_conda ? "bioconda::artic=1.2.2" : null)
+ conda "bioconda::artic=1.2.3"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/artic:1.2.2--pyhdfd78af_0' :
- 'quay.io/biocontainers/artic:1.2.2--pyhdfd78af_0' }"
+ 'https://depot.galaxyproject.org/singularity/artic:1.2.3--pyhdfd78af_0' :
+ 'quay.io/biocontainers/artic:1.2.3--pyhdfd78af_0' }"
input:
tuple val(meta), path(fastq)
@@ -48,7 +48,7 @@ process ARTIC_MINION {
model = medaka_model_file ? "--medaka-model ./$medaka_model_file" : "--medaka-model $medaka_model_string"
}
def hd5_plugin_path = task.ext.hd5_plugin_path ? "export HDF5_PLUGIN_PATH=" + task.ext.hd5_plugin_path : "export HDF5_PLUGIN_PATH=/usr/local/lib/python3.6/site-packages/ont_fast5_api/vbz_plugin"
- def VERSION = '1.2.2' // WARN: Version information provided by tool on CLI is incorrect. Please update this string when bumping container versions.
+ def VERSION = '1.2.3' // WARN: Version information provided by tool on CLI is incorrect. Please update this string when bumping container versions.
"""
$hd5_plugin_path
diff --git a/modules/nf-core/modules/artic/minion/meta.yml b/modules/nf-core/artic/minion/meta.yml
similarity index 99%
rename from modules/nf-core/modules/artic/minion/meta.yml
rename to modules/nf-core/artic/minion/meta.yml
index c0f97a0c..8ccf8434 100644
--- a/modules/nf-core/modules/artic/minion/meta.yml
+++ b/modules/nf-core/artic/minion/meta.yml
@@ -11,7 +11,7 @@ tools:
homepage: https://artic.readthedocs.io/en/latest/
documentation: https://artic.readthedocs.io/en/latest/
tool_dev_url: https://github.com/artic-network/fieldbioinformatics
- doi: ""
+
licence: ["MIT"]
input:
- meta:
@@ -65,7 +65,7 @@ output:
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- results:
- type:
+ type: file
description: Aggregated FastQ files
pattern: "*.fastq.gz"
- bam:
diff --git a/modules/nf-core/modules/bandage/image/main.nf b/modules/nf-core/bandage/image/main.nf
similarity index 93%
rename from modules/nf-core/modules/bandage/image/main.nf
rename to modules/nf-core/bandage/image/main.nf
index ee504a12..e4da7336 100644
--- a/modules/nf-core/modules/bandage/image/main.nf
+++ b/modules/nf-core/bandage/image/main.nf
@@ -2,7 +2,7 @@ process BANDAGE_IMAGE {
tag "${meta.id}"
label 'process_low'
- conda (params.enable_conda ? 'bioconda::bandage=0.8.1' : null)
+ conda "bioconda::bandage=0.8.1"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bandage:0.8.1--hc9558a2_2' :
'quay.io/biocontainers/bandage:0.8.1--hc9558a2_2' }"
diff --git a/modules/nf-core/bandage/image/meta.yml b/modules/nf-core/bandage/image/meta.yml
new file mode 100644
index 00000000..e68d8c98
--- /dev/null
+++ b/modules/nf-core/bandage/image/meta.yml
@@ -0,0 +1,44 @@
+name: bandage_image
+description: Render an assembly graph in GFA 1.0 format to PNG and SVG image formats
+keywords:
+ - gfa
+ - graph
+ - assembly
+ - visualisation
+tools:
+ - bandage:
+ description: |
+ Bandage - a Bioinformatics Application for Navigating De novo Assembly Graphs Easily
+ homepage: https://github.com/rrwick/Bandage
+ documentation: https://github.com/rrwick/Bandage
+ licence: ["GPL-3.0-or-later"]
+input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - gfa:
+ type: file
+ description: Assembly graph in GFA 1.0 format
+ pattern: "*.gfa"
+output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing sample information
+ e.g. [ id:'test', single_end:false ]
+ - png:
+ type: file
+ description: Bandage image in PNG format
+ pattern: "*.png"
+ - svg:
+ type: file
+ description: Bandage image in SVG format
+ pattern: "*.svg"
+ - versions:
+ type: file
+ description: File containing software versions
+ pattern: "versions.yml"
+authors:
+ - "@heuermh"
diff --git a/modules/nf-core/modules/bcftools/consensus/main.nf b/modules/nf-core/bcftools/consensus/main.nf
similarity index 85%
rename from modules/nf-core/modules/bcftools/consensus/main.nf
rename to modules/nf-core/bcftools/consensus/main.nf
index e28dc7f4..a32d94b1 100644
--- a/modules/nf-core/modules/bcftools/consensus/main.nf
+++ b/modules/nf-core/bcftools/consensus/main.nf
@@ -2,10 +2,10 @@ process BCFTOOLS_CONSENSUS {
tag "$meta.id"
label 'process_medium'
- conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null)
+ conda "bioconda::bcftools=1.16"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0':
- 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }"
+ 'https://depot.galaxyproject.org/singularity/bcftools:1.16--hfe4b78e_1':
+ 'quay.io/biocontainers/bcftools:1.16--hfe4b78e_1' }"
input:
tuple val(meta), path(vcf), path(tbi), path(fasta)
diff --git a/modules/nf-core/modules/bcftools/consensus/meta.yml b/modules/nf-core/bcftools/consensus/meta.yml
similarity index 100%
rename from modules/nf-core/modules/bcftools/consensus/meta.yml
rename to modules/nf-core/bcftools/consensus/meta.yml
diff --git a/modules/nf-core/modules/bcftools/norm/main.nf b/modules/nf-core/bcftools/filter/main.nf
similarity index 68%
rename from modules/nf-core/modules/bcftools/norm/main.nf
rename to modules/nf-core/bcftools/filter/main.nf
index 96f306bc..4e02009d 100644
--- a/modules/nf-core/modules/bcftools/norm/main.nf
+++ b/modules/nf-core/bcftools/filter/main.nf
@@ -1,19 +1,18 @@
-process BCFTOOLS_NORM {
+process BCFTOOLS_FILTER {
tag "$meta.id"
label 'process_medium'
- conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null)
+ conda "bioconda::bcftools=1.16"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0':
- 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }"
+ 'https://depot.galaxyproject.org/singularity/bcftools:1.16--hfe4b78e_1':
+ 'quay.io/biocontainers/bcftools:1.16--hfe4b78e_1' }"
input:
- tuple val(meta), path(vcf), path(tbi)
- path(fasta)
+ tuple val(meta), path(vcf)
output:
- tuple val(meta), path("*.gz") , emit: vcf
- path "versions.yml" , emit: versions
+ tuple val(meta), path("*.gz"), emit: vcf
+ path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
@@ -22,12 +21,10 @@ process BCFTOOLS_NORM {
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
- bcftools norm \\
- --fasta-ref ${fasta} \\
+ bcftools filter \\
--output ${prefix}.vcf.gz \\
$args \\
- --threads $task.cpus \\
- ${vcf}
+ $vcf
cat <<-END_VERSIONS > versions.yml
"${task.process}":
@@ -37,6 +34,7 @@ process BCFTOOLS_NORM {
stub:
def prefix = task.ext.prefix ?: "${meta.id}"
+
"""
touch ${prefix}.vcf.gz
diff --git a/modules/nf-core/modules/bcftools/filter/meta.yml b/modules/nf-core/bcftools/filter/meta.yml
similarity index 100%
rename from modules/nf-core/modules/bcftools/filter/meta.yml
rename to modules/nf-core/bcftools/filter/meta.yml
diff --git a/modules/nf-core/modules/bcftools/mpileup/main.nf b/modules/nf-core/bcftools/mpileup/main.nf
similarity index 66%
rename from modules/nf-core/modules/bcftools/mpileup/main.nf
rename to modules/nf-core/bcftools/mpileup/main.nf
index b7795bfc..c9e42c4d 100644
--- a/modules/nf-core/modules/bcftools/mpileup/main.nf
+++ b/modules/nf-core/bcftools/mpileup/main.nf
@@ -2,22 +2,22 @@ process BCFTOOLS_MPILEUP {
tag "$meta.id"
label 'process_medium'
- conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null)
+ conda "bioconda::bcftools=1.16"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0':
- 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }"
+ 'https://depot.galaxyproject.org/singularity/bcftools:1.16--hfe4b78e_1':
+ 'quay.io/biocontainers/bcftools:1.16--hfe4b78e_1' }"
input:
- tuple val(meta), path(bam)
+ tuple val(meta), path(bam), path(intervals)
path fasta
val save_mpileup
output:
- tuple val(meta), path("*.gz") , emit: vcf
- tuple val(meta), path("*.tbi") , emit: tbi
- tuple val(meta), path("*stats.txt"), emit: stats
- tuple val(meta), path("*.mpileup") , emit: mpileup, optional: true
- path "versions.yml" , emit: versions
+ tuple val(meta), path("*vcf.gz") , emit: vcf
+ tuple val(meta), path("*vcf.gz.tbi") , emit: tbi
+ tuple val(meta), path("*stats.txt") , emit: stats
+ tuple val(meta), path("*.mpileup.gz"), emit: mpileup, optional: true
+ path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
@@ -28,6 +28,8 @@ process BCFTOOLS_MPILEUP {
def args3 = task.ext.args3 ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def mpileup = save_mpileup ? "| tee ${prefix}.mpileup" : ""
+ def bgzip_mpileup = save_mpileup ? "bgzip ${prefix}.mpileup" : ""
+ def intervals = intervals ? "-T ${intervals}" : ""
"""
echo "${meta.id}" > sample_name.list
@@ -36,11 +38,14 @@ process BCFTOOLS_MPILEUP {
--fasta-ref $fasta \\
$args \\
$bam \\
+ $intervals \\
$mpileup \\
| bcftools call --output-type v $args2 \\
| bcftools reheader --samples sample_name.list \\
| bcftools view --output-file ${prefix}.vcf.gz --output-type z $args3
+ $bgzip_mpileup
+
tabix -p vcf -f ${prefix}.vcf.gz
bcftools stats ${prefix}.vcf.gz > ${prefix}.bcftools_stats.txt
diff --git a/modules/nf-core/modules/bcftools/mpileup/meta.yml b/modules/nf-core/bcftools/mpileup/meta.yml
similarity index 83%
rename from modules/nf-core/modules/bcftools/mpileup/meta.yml
rename to modules/nf-core/bcftools/mpileup/meta.yml
index d10dac14..5619a6f5 100644
--- a/modules/nf-core/modules/bcftools/mpileup/meta.yml
+++ b/modules/nf-core/bcftools/mpileup/meta.yml
@@ -22,6 +22,9 @@ input:
type: file
description: Input BAM file
pattern: "*.{bam}"
+ - intervals:
+ type: file
+ description: Input intervals file. A file (commonly '.bed') containing regions to subset
- fasta:
type: file
description: FASTA reference file
@@ -29,7 +32,6 @@ input:
- save_mpileup:
type: boolean
description: Save mpileup file generated by bcftools mpileup
- patter: "*.mpileup"
output:
- meta:
type: map
@@ -43,11 +45,15 @@ output:
- tbi:
type: file
description: tabix index file
- pattern: "*.{tbi}"
+ pattern: "*.{vcf.gz.tbi}"
- stats:
type: file
description: Text output file containing stats
pattern: "*{stats.txt}"
+ - mpileup:
+ type: file
+ description: mpileup gzipped output for all positions
+ pattern: "{*.mpileup.gz}"
- versions:
type: file
description: File containing software versions
diff --git a/modules/nf-core/bcftools/norm/main.nf b/modules/nf-core/bcftools/norm/main.nf
new file mode 100644
index 00000000..90387d6c
--- /dev/null
+++ b/modules/nf-core/bcftools/norm/main.nf
@@ -0,0 +1,60 @@
+process BCFTOOLS_NORM {
+ tag "$meta.id"
+ label 'process_medium'
+
+ conda "bioconda::bcftools=1.16"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/bcftools:1.16--hfe4b78e_1':
+ 'quay.io/biocontainers/bcftools:1.16--hfe4b78e_1' }"
+
+ input:
+ tuple val(meta), path(vcf), path(tbi)
+ path(fasta)
+
+ output:
+ tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}") , emit: vcf
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: '--output-type z'
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" :
+ args.contains("--output-type u") || args.contains("-Ou") ? "bcf" :
+ args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" :
+ args.contains("--output-type v") || args.contains("-Ov") ? "vcf" :
+ "vcf.gz"
+
+ """
+ bcftools norm \\
+ --fasta-ref ${fasta} \\
+ --output ${prefix}.${extension}\\
+ $args \\
+ --threads $task.cpus \\
+ ${vcf}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: '--output-type z'
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" :
+ args.contains("--output-type u") || args.contains("-Ou") ? "bcf" :
+ args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" :
+ args.contains("--output-type v") || args.contains("-Ov") ? "vcf" :
+ "vcf.gz"
+ """
+ touch ${prefix}.${extension}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/modules/bcftools/norm/meta.yml b/modules/nf-core/bcftools/norm/meta.yml
similarity index 86%
rename from modules/nf-core/modules/bcftools/norm/meta.yml
rename to modules/nf-core/bcftools/norm/meta.yml
index 2b3c8eae..c3ea2c03 100644
--- a/modules/nf-core/modules/bcftools/norm/meta.yml
+++ b/modules/nf-core/bcftools/norm/meta.yml
@@ -42,8 +42,8 @@ output:
e.g. [ id:'test', single_end:false ]
- vcf:
type: file
- description: VCF normalized output file
- pattern: "*.vcf.gz"
+ description: One of uncompressed VCF (.vcf), compressed VCF (.vcf.gz), compressed BCF (.bcf.gz) or uncompressed BCF (.bcf) normalized output file
+ pattern: "*.{vcf,vcf.gz,bcf,bcf.gz}"
- versions:
type: file
description: File containing software versions
diff --git a/modules/nf-core/modules/bcftools/query/main.nf b/modules/nf-core/bcftools/query/main.nf
similarity index 88%
rename from modules/nf-core/modules/bcftools/query/main.nf
rename to modules/nf-core/bcftools/query/main.nf
index 5de34a9e..5a917b3e 100644
--- a/modules/nf-core/modules/bcftools/query/main.nf
+++ b/modules/nf-core/bcftools/query/main.nf
@@ -2,10 +2,10 @@ process BCFTOOLS_QUERY {
tag "$meta.id"
label 'process_medium'
- conda (params.enable_conda ? "bioconda::bcftools=1.15.1" : null)
+ conda "bioconda::bcftools=1.16"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/bcftools:1.15.1--h0ea216a_0':
- 'quay.io/biocontainers/bcftools:1.15.1--h0ea216a_0' }"
+ 'https://depot.galaxyproject.org/singularity/bcftools:1.16--hfe4b78e_1':
+ 'quay.io/biocontainers/bcftools:1.16--hfe4b78e_1' }"
input:
tuple val(meta), path(vcf), path(tbi)
diff --git a/modules/nf-core/modules/bcftools/query/meta.yml b/modules/nf-core/bcftools/query/meta.yml
similarity index 100%
rename from modules/nf-core/modules/bcftools/query/meta.yml
rename to modules/nf-core/bcftools/query/meta.yml
diff --git a/modules/nf-core/bcftools/sort/main.nf b/modules/nf-core/bcftools/sort/main.nf
new file mode 100644
index 00000000..9ae3253b
--- /dev/null
+++ b/modules/nf-core/bcftools/sort/main.nf
@@ -0,0 +1,60 @@
+process BCFTOOLS_SORT {
+ tag "$meta.id"
+ label 'process_medium'
+
+ conda "bioconda::bcftools=1.16"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/bcftools:1.16--hfe4b78e_1':
+ 'quay.io/biocontainers/bcftools:1.16--hfe4b78e_1' }"
+
+ input:
+ tuple val(meta), path(vcf)
+
+ output:
+ tuple val(meta), path("*.{vcf,vcf.gz,bcf,bcf.gz}") , emit: vcf
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: '--output-type z'
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" :
+ args.contains("--output-type u") || args.contains("-Ou") ? "bcf" :
+ args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" :
+ args.contains("--output-type v") || args.contains("-Ov") ? "vcf" :
+ "vcf"
+
+ """
+ bcftools \\
+ sort \\
+ --output ${prefix}.${extension} \\
+ $args \\
+ $vcf
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
+ END_VERSIONS
+ """
+
+ stub:
+ def args = task.ext.args ?: '--output-type z'
+ def prefix = task.ext.prefix ?: "${meta.id}"
+
+ def extension = args.contains("--output-type b") || args.contains("-Ob") ? "bcf.gz" :
+ args.contains("--output-type u") || args.contains("-Ou") ? "bcf" :
+ args.contains("--output-type z") || args.contains("-Oz") ? "vcf.gz" :
+ args.contains("--output-type v") || args.contains("-Ov") ? "vcf" :
+ "vcf"
+
+ """
+ touch ${prefix}.${extension}
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/modules/bcftools/sort/meta.yml b/modules/nf-core/bcftools/sort/meta.yml
similarity index 100%
rename from modules/nf-core/modules/bcftools/sort/meta.yml
rename to modules/nf-core/bcftools/sort/meta.yml
diff --git a/modules/nf-core/bcftools/stats/main.nf b/modules/nf-core/bcftools/stats/main.nf
new file mode 100644
index 00000000..51e9c91c
--- /dev/null
+++ b/modules/nf-core/bcftools/stats/main.nf
@@ -0,0 +1,54 @@
+process BCFTOOLS_STATS {
+ tag "$meta.id"
+ label 'process_single'
+
+ conda "bioconda::bcftools=1.16"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/bcftools:1.16--hfe4b78e_1':
+ 'quay.io/biocontainers/bcftools:1.16--hfe4b78e_1' }"
+
+ input:
+ tuple val(meta), path(vcf), path(tbi)
+ path regions
+ path targets
+ path samples
+
+ output:
+ tuple val(meta), path("*stats.txt"), emit: stats
+ path "versions.yml" , emit: versions
+
+ when:
+ task.ext.when == null || task.ext.when
+
+ script:
+ def args = task.ext.args ?: ''
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def regions_file = regions ? "--regions-file ${regions}" : ""
+ def targets_file = targets ? "--targets-file ${targets}" : ""
+ def samples_file = samples ? "--samples-file ${samples}" : ""
+ """
+ bcftools stats \\
+ $args \\
+ $regions_file \\
+ $targets_file \\
+ $samples_file \\
+ $vcf > ${prefix}.bcftools_stats.txt
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
+ END_VERSIONS
+ """
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+
+ """
+ touch ${prefix}.bcftools_stats.txt
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ bcftools: \$(bcftools --version 2>&1 | head -n1 | sed 's/^.*bcftools //; s/ .*\$//')
+ END_VERSIONS
+ """
+}
diff --git a/modules/nf-core/modules/bcftools/stats/meta.yml b/modules/nf-core/bcftools/stats/meta.yml
similarity index 63%
rename from modules/nf-core/modules/bcftools/stats/meta.yml
rename to modules/nf-core/bcftools/stats/meta.yml
index 304b88ec..f7afcd50 100644
--- a/modules/nf-core/modules/bcftools/stats/meta.yml
+++ b/modules/nf-core/bcftools/stats/meta.yml
@@ -23,6 +23,24 @@ input:
type: file
description: VCF input file
pattern: "*.{vcf}"
+ - tbi:
+ type: file
+ description: |
+ The tab index for the VCF file to be inspected. Optional: only required when parameter regions is chosen.
+ pattern: "*.tbi"
+ - regions:
+ type: file
+ description: |
+ Optionally, restrict the operation to regions listed in this file. (VCF, BED or tab-delimited)
+ - targets:
+ type: file
+ description: |
+ Optionally, restrict the operation to regions listed in this file (doesn't rely upon tbi index files)
+ - samples:
+ type: file
+ description: |
+ Optional, file of sample names to be included or excluded.
+ e.g. 'file.tsv'
output:
- meta:
type: map
@@ -40,3 +58,4 @@ output:
authors:
- "@joseespinosa"
- "@drpatelh"
+ - "@SusiJo"
diff --git a/modules/nf-core/modules/bedtools/getfasta/main.nf b/modules/nf-core/bedtools/getfasta/main.nf
similarity index 90%
rename from modules/nf-core/modules/bedtools/getfasta/main.nf
rename to modules/nf-core/bedtools/getfasta/main.nf
index 4ce8c01e..57e7f0de 100644
--- a/modules/nf-core/modules/bedtools/getfasta/main.nf
+++ b/modules/nf-core/bedtools/getfasta/main.nf
@@ -1,8 +1,8 @@
process BEDTOOLS_GETFASTA {
tag "$bed"
- label 'process_medium'
+ label 'process_single'
- conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null)
+ conda "bioconda::bedtools=2.30.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' :
'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }"
diff --git a/modules/nf-core/modules/bedtools/getfasta/meta.yml b/modules/nf-core/bedtools/getfasta/meta.yml
similarity index 100%
rename from modules/nf-core/modules/bedtools/getfasta/meta.yml
rename to modules/nf-core/bedtools/getfasta/meta.yml
diff --git a/modules/nf-core/modules/bedtools/maskfasta/main.nf b/modules/nf-core/bedtools/maskfasta/main.nf
similarity index 90%
rename from modules/nf-core/modules/bedtools/maskfasta/main.nf
rename to modules/nf-core/bedtools/maskfasta/main.nf
index 04ba116b..a84a23c1 100644
--- a/modules/nf-core/modules/bedtools/maskfasta/main.nf
+++ b/modules/nf-core/bedtools/maskfasta/main.nf
@@ -1,8 +1,8 @@
process BEDTOOLS_MASKFASTA {
tag "$meta.id"
- label 'process_medium'
+ label 'process_single'
- conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null)
+ conda "bioconda::bedtools=2.30.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' :
'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }"
diff --git a/modules/nf-core/modules/bedtools/maskfasta/meta.yml b/modules/nf-core/bedtools/maskfasta/meta.yml
similarity index 100%
rename from modules/nf-core/modules/bedtools/maskfasta/meta.yml
rename to modules/nf-core/bedtools/maskfasta/meta.yml
diff --git a/modules/nf-core/modules/bedtools/merge/main.nf b/modules/nf-core/bedtools/merge/main.nf
similarity index 82%
rename from modules/nf-core/modules/bedtools/merge/main.nf
rename to modules/nf-core/bedtools/merge/main.nf
index 6d1daa03..21b2e645 100644
--- a/modules/nf-core/modules/bedtools/merge/main.nf
+++ b/modules/nf-core/bedtools/merge/main.nf
@@ -1,8 +1,8 @@
process BEDTOOLS_MERGE {
tag "$meta.id"
- label 'process_medium'
+ label 'process_single'
- conda (params.enable_conda ? "bioconda::bedtools=2.30.0" : null)
+ conda "bioconda::bedtools=2.30.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bedtools:2.30.0--hc088bd4_0' :
'quay.io/biocontainers/bedtools:2.30.0--hc088bd4_0' }"
@@ -20,6 +20,7 @@ process BEDTOOLS_MERGE {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
+ if ("$bed" == "${prefix}.bed") error "Input and output names are the same, use \"task.ext.prefix\" to disambiguate!"
"""
bedtools \\
merge \\
diff --git a/modules/nf-core/modules/bedtools/merge/meta.yml b/modules/nf-core/bedtools/merge/meta.yml
similarity index 100%
rename from modules/nf-core/modules/bedtools/merge/meta.yml
rename to modules/nf-core/bedtools/merge/meta.yml
diff --git a/modules/nf-core/modules/blast/blastn/main.nf b/modules/nf-core/blast/blastn/main.nf
similarity index 75%
rename from modules/nf-core/modules/blast/blastn/main.nf
rename to modules/nf-core/blast/blastn/main.nf
index b85f6c8e..5f35422a 100644
--- a/modules/nf-core/modules/blast/blastn/main.nf
+++ b/modules/nf-core/blast/blastn/main.nf
@@ -2,10 +2,10 @@ process BLAST_BLASTN {
tag "$meta.id"
label 'process_medium'
- conda (params.enable_conda ? 'bioconda::blast=2.12.0' : null)
+ conda "bioconda::blast=2.13.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/blast:2.12.0--pl5262h3289130_0' :
- 'quay.io/biocontainers/blast:2.12.0--pl5262h3289130_0' }"
+ 'https://depot.galaxyproject.org/singularity/blast:2.13.0--hf3cf87c_0' :
+ 'quay.io/biocontainers/blast:2.13.0--hf3cf87c_0' }"
input:
tuple val(meta), path(fasta)
@@ -22,7 +22,7 @@ process BLAST_BLASTN {
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
- DB=`find -L ./ -name "*.ndb" | sed 's/.ndb//'`
+ DB=`find -L ./ -name "*.ndb" | sed 's/\\.ndb\$//'`
blastn \\
-num_threads $task.cpus \\
-db \$DB \\
diff --git a/modules/nf-core/modules/blast/blastn/meta.yml b/modules/nf-core/blast/blastn/meta.yml
similarity index 100%
rename from modules/nf-core/modules/blast/blastn/meta.yml
rename to modules/nf-core/blast/blastn/meta.yml
diff --git a/modules/nf-core/modules/blast/makeblastdb/main.nf b/modules/nf-core/blast/makeblastdb/main.nf
similarity index 75%
rename from modules/nf-core/modules/blast/makeblastdb/main.nf
rename to modules/nf-core/blast/makeblastdb/main.nf
index 12208ea8..62abd813 100644
--- a/modules/nf-core/modules/blast/makeblastdb/main.nf
+++ b/modules/nf-core/blast/makeblastdb/main.nf
@@ -2,10 +2,10 @@ process BLAST_MAKEBLASTDB {
tag "$fasta"
label 'process_medium'
- conda (params.enable_conda ? 'bioconda::blast=2.12.0' : null)
+ conda "bioconda::blast=2.13.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/blast:2.12.0--pl5262h3289130_0' :
- 'quay.io/biocontainers/blast:2.12.0--pl5262h3289130_0' }"
+ 'https://depot.galaxyproject.org/singularity/blast:2.13.0--hf3cf87c_0' :
+ 'quay.io/biocontainers/blast:2.13.0--hf3cf87c_0' }"
input:
path fasta
diff --git a/modules/nf-core/modules/blast/makeblastdb/meta.yml b/modules/nf-core/blast/makeblastdb/meta.yml
similarity index 100%
rename from modules/nf-core/modules/blast/makeblastdb/meta.yml
rename to modules/nf-core/blast/makeblastdb/meta.yml
diff --git a/modules/nf-core/modules/bowtie2/align/main.nf b/modules/nf-core/bowtie2/align/main.nf
similarity index 77%
rename from modules/nf-core/modules/bowtie2/align/main.nf
rename to modules/nf-core/bowtie2/align/main.nf
index c74e376f..3d851866 100644
--- a/modules/nf-core/modules/bowtie2/align/main.nf
+++ b/modules/nf-core/bowtie2/align/main.nf
@@ -2,14 +2,14 @@ process BOWTIE2_ALIGN {
tag "$meta.id"
label "process_high"
- conda (params.enable_conda ? "bioconda::bowtie2=2.4.4 bioconda::samtools=1.15.1 conda-forge::pigz=2.6" : null)
- container "${ workflow.containerEngine == "singularity" && !task.ext.singularity_pull_docker_container ?
- "https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:1744f68fe955578c63054b55309e05b41c37a80d-0" :
- "quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:1744f68fe955578c63054b55309e05b41c37a80d-0" }"
+ conda "bioconda::bowtie2=2.4.4 bioconda::samtools=1.16.1 conda-forge::pigz=2.6"
+ container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
+ 'https://depot.galaxyproject.org/singularity/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0' :
+ 'quay.io/biocontainers/mulled-v2-ac74a7f02cebcfcc07d8e8d1d750af9c83b4d45a:a0ffedb52808e102887f6ce600d092675bf3528a-0' }"
input:
- tuple val(meta), path(reads)
- path index
+ tuple val(meta) , path(reads)
+ tuple val(meta2), path(index)
val save_unaligned
val sort_bam
@@ -40,8 +40,8 @@ process BOWTIE2_ALIGN {
def samtools_command = sort_bam ? 'sort' : 'view'
"""
- INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/.rev.1.bt2//"`
- [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/.rev.1.bt2l//"`
+ INDEX=`find -L ./ -name "*.rev.1.bt2" | sed "s/\\.rev.1.bt2\$//"`
+ [ -z "\$INDEX" ] && INDEX=`find -L ./ -name "*.rev.1.bt2l" | sed "s/\\.rev.1.bt2l\$//"`
[ -z "\$INDEX" ] && echo "Bowtie2 index files not found" 1>&2 && exit 1
bowtie2 \\
diff --git a/modules/nf-core/modules/bowtie2/align/meta.yml b/modules/nf-core/bowtie2/align/meta.yml
similarity index 92%
rename from modules/nf-core/modules/bowtie2/align/meta.yml
rename to modules/nf-core/bowtie2/align/meta.yml
index 42ba0f96..c8e9a001 100644
--- a/modules/nf-core/modules/bowtie2/align/meta.yml
+++ b/modules/nf-core/bowtie2/align/meta.yml
@@ -27,6 +27,11 @@ input:
description: |
List of input FastQ files of size 1 and 2 for single-end and paired-end data,
respectively.
+ - meta2:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. [ id:'test', single_end:false ]
- index:
type: file
description: Bowtie2 genome index files
diff --git a/modules/nf-core/modules/bowtie2/build/main.nf b/modules/nf-core/bowtie2/build/main.nf
similarity index 80%
rename from modules/nf-core/modules/bowtie2/build/main.nf
rename to modules/nf-core/bowtie2/build/main.nf
index a4da62d0..551893af 100644
--- a/modules/nf-core/modules/bowtie2/build/main.nf
+++ b/modules/nf-core/bowtie2/build/main.nf
@@ -2,17 +2,17 @@ process BOWTIE2_BUILD {
tag "$fasta"
label 'process_high'
- conda (params.enable_conda ? 'bioconda::bowtie2=2.4.4' : null)
+ conda "bioconda::bowtie2=2.4.4"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/bowtie2:2.4.4--py39hbb4e92a_0' :
'quay.io/biocontainers/bowtie2:2.4.4--py39hbb4e92a_0' }"
input:
- path fasta
+ tuple val(meta), path(fasta)
output:
- path 'bowtie2' , emit: index
- path "versions.yml" , emit: versions
+ tuple val(meta), path('bowtie2') , emit: index
+ path "versions.yml" , emit: versions
when:
task.ext.when == null || task.ext.when
diff --git a/modules/nf-core/modules/bowtie2/build/meta.yml b/modules/nf-core/bowtie2/build/meta.yml
similarity index 74%
rename from modules/nf-core/modules/bowtie2/build/meta.yml
rename to modules/nf-core/bowtie2/build/meta.yml
index 2da9a217..0240224d 100644
--- a/modules/nf-core/modules/bowtie2/build/meta.yml
+++ b/modules/nf-core/bowtie2/build/meta.yml
@@ -16,10 +16,20 @@ tools:
doi: 10.1038/nmeth.1923
licence: ["GPL-3.0-or-later"]
input:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. [ id:'test', single_end:false ]
- fasta:
type: file
description: Input genome fasta file
output:
+ - meta:
+ type: map
+ description: |
+ Groovy Map containing reference information
+ e.g. [ id:'test', single_end:false ]
- index:
type: file
description: Bowtie2 genome index files
diff --git a/modules/nf-core/modules/cat/fastq/main.nf b/modules/nf-core/cat/fastq/main.nf
similarity index 59%
rename from modules/nf-core/modules/cat/fastq/main.nf
rename to modules/nf-core/cat/fastq/main.nf
index b6854895..8a0b5600 100644
--- a/modules/nf-core/modules/cat/fastq/main.nf
+++ b/modules/nf-core/cat/fastq/main.nf
@@ -1,8 +1,8 @@
process CAT_FASTQ {
tag "$meta.id"
- label 'process_low'
+ label 'process_single'
- conda (params.enable_conda ? "conda-forge::sed=4.7" : null)
+ conda "conda-forge::sed=4.7"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/ubuntu:20.04' :
'ubuntu:20.04' }"
@@ -20,9 +20,9 @@ process CAT_FASTQ {
script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
- def readList = reads.collect{ it.toString() }
+ def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()]
if (meta.single_end) {
- if (readList.size > 1) {
+ if (readList.size >= 1) {
"""
cat ${readList.join(' ')} > ${prefix}.merged.fastq.gz
@@ -33,7 +33,7 @@ process CAT_FASTQ {
"""
}
} else {
- if (readList.size > 2) {
+ if (readList.size >= 2) {
def read1 = []
def read2 = []
readList.eachWithIndex{ v, ix -> ( ix & 1 ? read2 : read1 ) << v }
@@ -48,4 +48,33 @@ process CAT_FASTQ {
"""
}
}
+
+ stub:
+ def prefix = task.ext.prefix ?: "${meta.id}"
+ def readList = reads instanceof List ? reads.collect{ it.toString() } : [reads.toString()]
+ if (meta.single_end) {
+ if (readList.size > 1) {
+ """
+ touch ${prefix}.merged.fastq.gz
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//')
+ END_VERSIONS
+ """
+ }
+ } else {
+ if (readList.size > 2) {
+ """
+ touch ${prefix}_1.merged.fastq.gz
+ touch ${prefix}_2.merged.fastq.gz
+
+ cat <<-END_VERSIONS > versions.yml
+ "${task.process}":
+ cat: \$(echo \$(cat --version 2>&1) | sed 's/^.*coreutils) //; s/ .*\$//')
+ END_VERSIONS
+ """
+ }
+ }
+
}
diff --git a/modules/nf-core/modules/cat/fastq/meta.yml b/modules/nf-core/cat/fastq/meta.yml
similarity index 100%
rename from modules/nf-core/modules/cat/fastq/meta.yml
rename to modules/nf-core/cat/fastq/meta.yml
diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf b/modules/nf-core/custom/dumpsoftwareversions/main.nf
similarity index 79%
rename from modules/nf-core/modules/custom/dumpsoftwareversions/main.nf
rename to modules/nf-core/custom/dumpsoftwareversions/main.nf
index 12293efc..800a6099 100644
--- a/modules/nf-core/modules/custom/dumpsoftwareversions/main.nf
+++ b/modules/nf-core/custom/dumpsoftwareversions/main.nf
@@ -1,11 +1,11 @@
process CUSTOM_DUMPSOFTWAREVERSIONS {
- label 'process_low'
+ label 'process_single'
// Requires `pyyaml` which does not have a dedicated container but is in the MultiQC container
- conda (params.enable_conda ? "bioconda::multiqc=1.12" : null)
+ conda "bioconda::multiqc=1.14"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
- 'https://depot.galaxyproject.org/singularity/multiqc:1.12--pyhdfd78af_0' :
- 'quay.io/biocontainers/multiqc:1.12--pyhdfd78af_0' }"
+ 'https://depot.galaxyproject.org/singularity/multiqc:1.14--pyhdfd78af_0' :
+ 'quay.io/biocontainers/multiqc:1.14--pyhdfd78af_0' }"
input:
path versions
diff --git a/modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml b/modules/nf-core/custom/dumpsoftwareversions/meta.yml
similarity index 100%
rename from modules/nf-core/modules/custom/dumpsoftwareversions/meta.yml
rename to modules/nf-core/custom/dumpsoftwareversions/meta.yml
diff --git a/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
new file mode 100755
index 00000000..da033408
--- /dev/null
+++ b/modules/nf-core/custom/dumpsoftwareversions/templates/dumpsoftwareversions.py
@@ -0,0 +1,101 @@
+#!/usr/bin/env python
+
+
+"""Provide functions to merge multiple versions.yml files."""
+
+
+import yaml
+import platform
+from textwrap import dedent
+
+
+def _make_versions_html(versions):
+ """Generate a tabular HTML output of all versions for MultiQC."""
+ html = [
+ dedent(
+ """\\
+
+
+
+
+
Process Name
+
Software
+
Version
+
+
+ """
+ )
+ ]
+ for process, tmp_versions in sorted(versions.items()):
+ html.append("")
+ for i, (tool, version) in enumerate(sorted(tmp_versions.items())):
+ html.append(
+ dedent(
+ f"""\\
+