Skip to content

Commit

Permalink
Merge branch 'custom_gtf' into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
svarona authored Jan 17, 2024
2 parents ce72f48 + cbab17b commit b57d083
Show file tree
Hide file tree
Showing 9 changed files with 182 additions and 17 deletions.
16 changes: 9 additions & 7 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,18 @@ Thank you to everyone else that has contributed by reporting bugs, enhancements
- [[PR #387](https://github.com/nf-core/viralrecon/pull/387)] - Software closes gracefully when encountering an error
- [[PR #395](https://github.com/nf-core/viralrecon/pull/395)] - Remove minia from default assemblers because it is unreliable
- [[PR #393](https://github.com/nf-core/viralrecon/pull/393)] - Changed primer set to params
- [[PR #401](https://github.com/nf-core/viralrecon/pull/401)] - Added option to add a custom annotation

### Parameters

| Old parameter | New parameter |
| ------------------- | ------------- |
| `--skip_freyja` | |
| `--freyja_repeats` | |
| `--freyja_db_name` | |
| `--freyja_barcodes` | |
| `--freyja_lineages` | |
| Old parameter | New parameter |
| ------------------- | -------------------- |
| `--skip_freyja` | |
| `--freyja_repeats` | |
| `--freyja_db_name` | |
| `--freyja_barcodes` | |
| `--freyja_lineages` | |
| | `--additional_annot` |

> **NB:** Parameter has been **updated** if both old and new parameter information is present.
> **NB:** Parameter has been **added** if just the new parameter information is present.
Expand Down
51 changes: 46 additions & 5 deletions bin/make_variants_long_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -236,11 +236,7 @@ def snpsift_to_table(snpsift_file):
new_colnames = [x.replace("ANN[*].", "") for x in old_colnames]
table.rename(columns=dict(zip(old_colnames, new_colnames)), inplace=True)
table = table.loc[:, ["CHROM", "POS", "REF", "ALT", "GENE", "EFFECT", "HGVS_C", "HGVS_P"]]

## Split by comma and get first value in cols = ['ALT','GENE','EFFECT','HGVS_C','HGVS_P']
for i in range(len(table)):
for j in range(3, 8):
table.iloc[i, j] = str(table.iloc[i, j]).split(",")[0]
table = one_effect_per_line(table)

## Amino acid substitution
aa = []
Expand All @@ -252,6 +248,51 @@ def snpsift_to_table(snpsift_file):
return table


def one_effect_per_line(table):
one_effect_per_line_table = pd.DataFrame()
for i in range(len(table)):
gene_list = table.iloc[i, 4].split(",")
effect_list = table.iloc[i, 5].split(",")
hgvs_c_list = table.iloc[i, 6].split(",")
hgvs_p_list = table.iloc[i, 7].split(",")

count = 0
for j in range(len(gene_list)):
if "upstream" in effect_list[j] or "downstream" in effect_list[j]:
count += 1
for j in range(len(gene_list)):
if len(effect_list) == count:
row = {
"CHROM": table.iloc[i, 0],
"POS": table.iloc[i, 1],
"REF": table.iloc[i, 2],
"ALT": table.iloc[i, 3],
"GENE": gene_list[0],
"EFFECT": effect_list[0],
"HGVS_C": hgvs_c_list[0],
"HGVS_P": hgvs_p_list[0],
}
one_effect_per_line_table = pd.concat(
[one_effect_per_line_table, pd.DataFrame([row])], ignore_index=True
)
else:
if not "upstream" in effect_list[j] and not "downstream" in effect_list[j]:
row = {
"CHROM": table.iloc[i, 0],
"POS": table.iloc[i, 1],
"REF": table.iloc[i, 2],
"ALT": table.iloc[i, 3],
"GENE": gene_list[j],
"EFFECT": effect_list[j],
"HGVS_C": hgvs_c_list[j],
"HGVS_P": hgvs_p_list[j],
}
one_effect_per_line_table = pd.concat(
[one_effect_per_line_table, pd.DataFrame([row])], ignore_index=True
)
return one_effect_per_line_table


def main(args=None):
args = parser_args(args)

Expand Down
8 changes: 8 additions & 0 deletions conf/modules_illumina.config
Original file line number Diff line number Diff line change
Expand Up @@ -564,6 +564,14 @@ if (!params.skip_variants) {
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
withName: 'MAKE_VARIANTS_LONG_TABLE_ADDITIONAL' {
ext.args = "--variant_caller ${variant_caller} --output_file 'additional_variants_long_table.csv'"
publishDir = [
path: { "${params.outdir}/variants/${variant_caller}" },
mode: params.publish_dir_mode,
saveAs: { filename -> filename.equals('versions.yml') ? null : filename }
]
}
}
}
}
Expand Down
1 change: 1 addition & 0 deletions docs/output.md
Original file line number Diff line number Diff line change
Expand Up @@ -289,6 +289,7 @@ As described in the documentation, [ASCIIGenome](https://asciigenome.readthedocs

- `<CALLER>/`
- `variants_long_table.csv`: Long format table collating per-sample information for individual variants, functional effect prediction and lineage analysis.
- `additional_variants_long_table.csv`: Long format table similar to `variants_long_table.csv` for additional annotation file with overlapping annotation features.

**NB:** The value of `<CALLER>` in the output directory name above is determined by the `--artic_minion_caller` parameter (Default: 'nanopolish').

Expand Down
12 changes: 10 additions & 2 deletions modules/local/snpeff_build.nf
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,14 @@ process SNPEFF_BUILD {
task.ext.when == null || task.ext.when

script:
def args = task.ext.args ?: ''
def basename = fasta.baseName
def extension = gff.getExtension()
if (extension == "gtf") {
format = "gtf22"
} else {
format = "gff3"
}

def avail_mem = 4
if (!task.memory) {
Expand All @@ -36,7 +43,7 @@ process SNPEFF_BUILD {
cd ../../
mkdir -p snpeff_db/${basename}/
cd snpeff_db/${basename}/
ln -s ../../$gff genes.gff
ln -s ../../$gff genes.$extension
cd ../../
echo "${basename}.genome : ${basename}" > snpeff.config
Expand All @@ -46,7 +53,8 @@ process SNPEFF_BUILD {
build \\
-config snpeff.config \\
-dataDir ./snpeff_db \\
-gff3 \\
-${format} \\
$args \\
-v \\
${basename}
Expand Down
1 change: 1 addition & 0 deletions nextflow.config
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ params {
primer_left_suffix = '_LEFT'
primer_right_suffix = '_RIGHT'
save_reference = false
additional_annot = null

// Nanopore options
fastq_dir = null
Expand Down
8 changes: 8 additions & 0 deletions nextflow_schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,14 @@
"description": "Full path to GFF annotation file.",
"fa_icon": "fas fa-file-invoice"
},
"additional_annot": {
"type": "string",
"format": "file-path",
"mimetype": "text/plain",
"pattern": "^\\S+(\\.gff|\\.gtf)(\\.gz)?$",
"description": "Full path to additional annotation file in GTF or GFF format.",
"fa_icon": "fas fa-file-invoice"
},
"bowtie2_index": {
"type": "string",
"format": "path",
Expand Down
79 changes: 79 additions & 0 deletions subworkflows/local/additional_annot.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
//
// Run snpEff, bgzip, tabix, stats and SnpSift commands
//

include { SNPEFF_BUILD } from '../../modules/local/snpeff_build'
include { SNPEFF_ANN } from '../../modules/local/snpeff_ann'
include { SNPSIFT_EXTRACTFIELDS } from '../../modules/local/snpsift_extractfields'
include { VCF_BGZIP_TABIX_STATS } from './vcf_bgzip_tabix_stats'
include { BCFTOOLS_QUERY } from '../../modules/nf-core/bcftools/query/main'
include { MAKE_VARIANTS_LONG_TABLE as MAKE_VARIANTS_LONG_TABLE_ADDITIONAL } from '../../modules/local/make_variants_long_table'


workflow ADDITIONAL_ANNOT {
take:
vcf // channel: [ val(meta), [ vcf ] ]
tbi // channel: [ val(meta), [ tbi ] ]
fasta // path : genome.fasta
annot // path : additional_annot
pangolin // channel: [ val(meta), [ csv ] ]

main:

ch_versions = Channel.empty()

//
// Make snpEff database
//
ch_snpeff_db = Channel.empty()
ch_snpeff_config = Channel.empty()

SNPEFF_BUILD (
fasta,
annot
)
ch_snpeff_db = SNPEFF_BUILD.out.db
ch_snpeff_config = SNPEFF_BUILD.out.config
ch_versions = ch_versions.mix(SNPEFF_BUILD.out.versions)

SNPEFF_ANN (
vcf,
ch_snpeff_db,
ch_snpeff_config,
fasta
)
ch_versions = ch_versions.mix(SNPEFF_ANN.out.versions.first())

VCF_BGZIP_TABIX_STATS (
SNPEFF_ANN.out.vcf,
[],
[],
[]
)
ch_versions = ch_versions.mix(VCF_BGZIP_TABIX_STATS.out.versions)

SNPSIFT_EXTRACTFIELDS (
VCF_BGZIP_TABIX_STATS.out.vcf
)
ch_versions = ch_versions.mix(SNPSIFT_EXTRACTFIELDS.out.versions.first())

BCFTOOLS_QUERY (
vcf.join(tbi, by: [0]),
[],
[],
[]
)
ch_versions = ch_versions.mix(BCFTOOLS_QUERY.out.versions.first())

MAKE_VARIANTS_LONG_TABLE_ADDITIONAL (
BCFTOOLS_QUERY.out.txt.collect{it[1]},
SNPSIFT_EXTRACTFIELDS.out.txt.collect{it[1]}.ifEmpty([]),
pangolin.collect{it[1]}.ifEmpty([])
)
ch_versions = ch_versions.mix(MAKE_VARIANTS_LONG_TABLE_ADDITIONAL.out.versions)

emit:
long_table = MAKE_VARIANTS_LONG_TABLE_ADDITIONAL.out.csv // channel: [ val(meta), [ csv ] ]

versions = ch_versions // channel: [ versions.yml ]
}
23 changes: 20 additions & 3 deletions workflows/illumina.nf
Original file line number Diff line number Diff line change
Expand Up @@ -35,12 +35,13 @@ def checkPathParamList = [
params.input, params.fasta, params.gff, params.bowtie2_index,
params.kraken2_db, params.primer_bed, params.primer_fasta,
params.blast_db, params.spades_hmm, params.multiqc_config,
params.freyja_barcodes, params.freyja_lineages
params.freyja_barcodes, params.freyja_lineages, params.additional_annot
]
for (param in checkPathParamList) { if (param) { file(param, checkIfExists: true) } }

if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet file not specified!' }
if (params.spades_hmm) { ch_spades_hmm = file(params.spades_hmm) } else { ch_spades_hmm = [] }
if (params.input) { ch_input = file(params.input) } else { exit 1, 'Input samplesheet file not specified!' }
if (params.spades_hmm) { ch_spades_hmm = file(params.spades_hmm) } else { ch_spades_hmm = [] }
if (params.additional_annot) { ch_additional_gtf = file(params.additional_annot) } else { additional_annot = [] }

def assemblers = params.assemblers ? params.assemblers.split(',').collect{ it.trim().toLowerCase() } : []

Expand Down Expand Up @@ -84,6 +85,7 @@ include { VARIANTS_BCFTOOLS } from '../subworkflows/local/variants_bcftool
include { CONSENSUS_IVAR } from '../subworkflows/local/consensus_ivar'
include { CONSENSUS_BCFTOOLS } from '../subworkflows/local/consensus_bcftools'
include { VARIANTS_LONG_TABLE } from '../subworkflows/local/variants_long_table'
include { ADDITIONAL_ANNOT } from '../subworkflows/local/additional_annot'
include { ASSEMBLY_SPADES } from '../subworkflows/local/assembly_spades'
include { ASSEMBLY_UNICYCLER } from '../subworkflows/local/assembly_unicycler'
include { ASSEMBLY_MINIA } from '../subworkflows/local/assembly_minia'
Expand Down Expand Up @@ -560,6 +562,21 @@ workflow ILLUMINA {
ch_versions = ch_versions.mix(VARIANTS_LONG_TABLE.out.versions)
}

//
// SUBWORKFLOW: Create variants long table report for additional annotation file
//
if (params.additional_annot) {
ADDITIONAL_ANNOT (
ch_vcf,
ch_tbi,
PREPARE_GENOME.out.fasta,
ch_additional_gtf,
ch_pangolin_multiqc

)
ch_versions = ch_versions.mix(ADDITIONAL_ANNOT.out.versions)
}

//
// MODULE: Primer trimming with Cutadapt
//
Expand Down

0 comments on commit b57d083

Please sign in to comment.