Skip to content

Commit

Permalink
update
Browse files Browse the repository at this point in the history
  • Loading branch information
xiaoli-dong committed Jan 30, 2024
1 parent 23cb2a7 commit b2bc2e0
Show file tree
Hide file tree
Showing 26 changed files with 1,053 additions and 244 deletions.
501 changes: 501 additions & 0 deletions bin/GBS-SBG.pl

Large diffs are not rendered by default.

3 changes: 2 additions & 1 deletion bin/combine_xml.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,8 @@ def main():
xml_element_tree = None
for xml_file in xml_files:
data = ET.tostring(ET.parse(xml_file).getroot()).decode("utf-8")
fout.write(data)
fout.write(data)
fout.write('\n')
fout.close()


Expand Down
61 changes: 61 additions & 0 deletions bin/xml2csv.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
#!/usr/bin/env python

import csv
import argparse
import xml.etree.ElementTree as ET

def xml_to_csv(element, csv_writer):
# Extract element name and text
name = element.tag
text = element.text

# Extract element attributes
attrib = element.attrib

# Write element name and text as well as attributes to CSV file
row = [name, text] + list(attrib.values())
csv_writer.writerow(row)

# Recursively process child elements
for child in element:
xml_to_csv(child, csv_writer)

def main():

description = "Combine multiple xml files into a single xml file"
parser = argparse.ArgumentParser(description=description)

# help=f"Space seperated xml file list, for example: 'f1.xml f2.xml f3.xml'\n",
parser.add_argument('-i', "--input", required=True, help=f"space seperated xml file name list\n")
parser.add_argument("-o", "--output", required=True, default="combined.xml", help=f"Output file name\n")

args = parser.parse_args()

cols = ["name", "phone", "email", "date", "country"]
rows = []

tree = ET.parse(args.input)
root = tree.getroot()

for result in root[1]:
sample_data = []
#print(result.tag)
#print(result.attrib)
for detail in result:
#print(detail.tag)
#print(detail.attrib)
print(detail.attrib.get("type"))
print(detail.attrib.get("value"))


#Open CSV file for writing
with open(args.output, "w", newline="") as csv_file:
# Create CSV writer
csv_writer = csv.writer(csv_file)

# Convert XML to CSV
xml_to_csv(root, csv_writer)

if __name__ == "__main__":
main()

35 changes: 34 additions & 1 deletion conf/modules.config
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,14 @@ if(!params.skip_illumina_reads_qc){
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
]
}
withName: CSVTK_CONCAT_STATS_NOT_ASSEMBLED{
publishDir = [
path: { "${params.outdir}/report" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },

]
}
}

if(params.illumina_reads_qc_tool == 'bbduk'){
Expand Down Expand Up @@ -276,7 +284,7 @@ if(! params.skip_tbprofiler){
if(! params.skip_pneumocat){
process{
withName: PNEUMOCAT {
ext.prefix = { "${meta.id}.pneumocat" }
//ext.prefix = { "${meta.id}.pneumocat" }
ext.args = '--cleanup'
publishDir = [
path: { "${params.outdir}/${meta.id}/pneumocat/illumina" },
Expand Down Expand Up @@ -628,7 +636,32 @@ if(! params.skip_emmtyper){

}
}
if(! params.skip_gbssbg){
process{
withName: GBS_SBG {
ext.prefix = { "${meta.id}.gbssbg" }

publishDir = [
path: { "${params.outdir}/${meta.id}/gbssbg" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
enabled: true
]

}

withName: CSVTK_CONCAT_GBSSBG{
ext.args = '-C \'$\' -I -E '
publishDir = [
path: { "${params.outdir}/report" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename },
enabled: true
]
}

}
}

if (params.platform == 'nanopore') {
includeConfig 'modules_nanopore.config'
Expand Down
2 changes: 2 additions & 0 deletions conf/modules_illumina.config
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@

if(!params.skip_illumina_reads_assembly){
process {


withName: SKESA {
publishDir = [
path: { "${params.outdir}/${meta.id}/assembly/illumina/skesa" },
Expand Down
40 changes: 40 additions & 0 deletions modules/local/csvtk/concat/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
process CSVTK_CONCAT {
tag "$meta.id"
label 'process_low'

conda "bioconda::csvtk=0.23.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/csvtk:0.23.0--h9ee0642_0' :
'biocontainers/csvtk:0.23.0--h9ee0642_0' }"

input:
tuple val(meta), path(csv)
val in_format
val out_format

output:
tuple val(meta), path("${prefix}.${out_extension}"), emit: csv
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
prefix = task.ext.prefix ?: "${meta.id}"

out_extension = out_format == "tsv" ? 'tsv' : 'csv'
"""
csvtk \\
concat \\
$args \\
--num-cpus $task.cpus \\
--out-file ${prefix}.${out_extension} \\
$csv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
csvtk: \$(echo \$( csvtk version | sed -e "s/csvtk v//g" ))
END_VERSIONS
"""
}
51 changes: 51 additions & 0 deletions modules/local/csvtk/concat/meta.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
name: csvtk_concat
description: Concatenate two or more CSV (or TSV) tables into a single table
keywords:
- concatenate
- tsv
- csv
tools:
- csvtk:
description: A cross-platform, efficient, practical CSV/TSV toolkit
homepage: http://bioinf.shenwei.me/csvtk
documentation: http://bioinf.shenwei.me/csvtk
tool_dev_url: https://github.com/shenwei356/csvtk

licence: ["MIT"]

input:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- csv:
type: file
description: CSV/TSV formatted files
pattern: "*.{csv,tsv}"
- in_format:
type: string
description: Input format (csv, tab, or a delimiting character)
pattern: "*"
- out_format:
type: string
description: Output format (csv, tab, or a delimiting character)
pattern: "*"

output:
- meta:
type: map
description: |
Groovy Map containing sample information
e.g. [ id:'test', single_end:false ]
- versions:
type: file
description: File containing software versions
pattern: "version.yml"
- csv:
type: file
description: Concatenated CSV/TSV file
pattern: "*.{csv,tsv}"

authors:
- "@rpetit3"
6 changes: 3 additions & 3 deletions modules/local/fastp/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ process FASTP {
val save_merged

output:
tuple val(meta), path('*fastp.*fastq.gz') , optional:true, emit: reads
tuple val(meta), path('*fastp*fastq.gz') , optional:true, emit: reads
tuple val(meta), path('*.json') , emit: json
tuple val(meta), path('*.html') , emit: html
tuple val(meta), path('*.log') , emit: log
Expand Down Expand Up @@ -81,8 +81,8 @@ process FASTP {
fastp \\
--in1 ${prefix}_1.fastq.gz \\
--in2 ${prefix}_2.fastq.gz \\
--out1 ${prefix}_fastp.R1.fastq.gz \\
--out2 ${prefix}_fastp.R2.fastq.gz \\
--out1 ${prefix}_fastp_1.fastq.gz \\
--out2 ${prefix}_fastp_2.fastq.gz \\
--json ${prefix}.fastp.json \\
--html ${prefix}.fastp.html \\
$adapter_list \\
Expand Down
43 changes: 43 additions & 0 deletions modules/local/gbs/sbg/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,43 @@
process GBS_SBG {
tag "$meta.id"
label 'process_low'

conda "bioconda::blast=2.15.0"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/blast%3A2.15.0--pl5321h6f7f691_1':
'biocontainers/blast%3A2.15.0--pl5321h6f7f691_1' }"


input:
tuple val(meta), path(fasta) //contigs
path(ref)

output:
//# Name Serotype Uncertainty
//S18 NT MaxCov:0;MaxID:0
//S17 GBS-SBG:Ia
tuple val(meta), path("*.tsv"), emit: tsv
path "versions.yml" , emit: versions

when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
def gzipped = fasta.toString().endsWith('.gz')
def cmd_input = gzipped ? "zcat ${fasta}" : "cat ${fasta}"
def cmd_refdb = ref ? "-ref ${ref}" : ""

"""
${cmd_input} | GBS-SBG.pl \\
-name ${meta.id} \\
${cmd_refdb} \\
> ${prefix}.tsv
cat <<-END_VERSIONS > versions.yml
"${task.process}":
blast: \$(blastn -version 2>&1 | sed 's/^.*blastn: //; s/ .*\$//')
END_VERSIONS
"""
}
8 changes: 4 additions & 4 deletions modules/local/hostile/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,8 @@ process HOSTILE {
then
mv ${simplename}*.clean.fastq.gz ${prefix}.dehost.fastq.gz
else
mv ${simplename}*.clean_1.fastq.gz ${prefix}.dehost.R1.fastq.gz
mv ${simplename}*.clean_2.fastq.gz ${prefix}.dehost.R2.fastq.gz
mv *clean_1.fastq.gz ${prefix}.dehost_1.fastq.gz
mv *clean_2.fastq.gz ${prefix}.dehost_2.fastq.gz
fi
cat <<-END_VERSIONS > versions.yml
Expand All @@ -71,8 +71,8 @@ process HOSTILE {
then
mv ${simplename}*.clean.fastq.gz ${prefix}.dehost.fastq.gz
else
mv ${simplename}*.clean_1.fastq.gz ${prefix}.dehost.R1.fastq.gz
mv ${simplename}*.clean_2.fastq.gz ${prefix}.dehost.R2.fastq.gz
mv ${simplename}*.clean_1.fastq.gz ${prefix}.dehost_1.fastq.gz
mv ${simplename}*.clean_2.fastq.gz ${prefix}.dehost_2.fastq.gz
fi
cat <<-END_VERSIONS > versions.yml
Expand Down
File renamed without changes.
73 changes: 73 additions & 0 deletions modules/local/pneumocat/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
VERSION = '1.2.1' // WARN: Version information not provided by tool on CLI. Please update this string when bumping container versions.
process PNEUMOCAT {
tag "$meta.id"
label 'process_low'
errorStrategy 'ignore'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/pneumocat:1.2.1--0':
'biocontainers/pneumocat:1.2.1--0' }"

input:
tuple val(meta), path(reads)

output:
tuple val(meta), path("*.final_results.xml"), emit: results
tuple val(meta), path("coverage_summary.txt"), emit: coverage
path "versions.yml" , emit: versions
/*
If only one capsular type is matched with more than 90% coverage
then the report from step 1 contained in this xml file is considered
the final result (result type="Serotype") and no further folders
will appear within the PneumoCaT output folder. If more than one
capsular type are matched with more than 90% coverage then the
software moves to step two and a SNP_based_serotyping folder is
created containing a second XML file with the final result
- see STEP 2- VARIANT-BASED APPROACH.
Note that the output XML file from step 1 only reports two capsular types,
when actually more could be matched and all will pass to step 2 for
further distinction. Further information on mapped serotypes in
stage 1 can be found in "Coverage_summary.txt". If the top hit
coverage is < 90% then no serotypes are reported and 'Failed'
appears instead.
*/
when:
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
PneumoCaT.py \\
--input_directory ./ \\
$args \\
--threads $task.cpus \\
--output_dir ./
if [ -d "SNP_based_serotyping" ]
then
cp SNP_based_serotyping/${prefix}.results.xml ${prefix}.final_results.xml
else
cp ${prefix}.results.xml ${prefix}.final_results.xml
fi
cat <<-END_VERSIONS > versions.yml
"${task.process}":
pneumocat: $VERSION
END_VERSIONS
"""

stub:
def args = task.ext.args ?: ''
def prefix = task.ext.prefix ?: "${meta.id}"
"""
touch ${prefix}.results.xml
touch ${prefix}.txt
cat <<-END_VERSIONS > versions.yml
"${task.process}":
pneumocat: $VERSION
END_VERSIONS
"""
}
File renamed without changes.
Loading

0 comments on commit b2bc2e0

Please sign in to comment.