From 6697b53b1d8327fbbec217c91c484b492c24c045 Mon Sep 17 00:00:00 2001 From: "C. Titus Brown" Date: Thu, 20 Jan 2022 15:48:27 -0800 Subject: [PATCH] initial commit --- README.md | 1 + Snakefile | 59 ++++++++++++++++++++++++++++++++++++++++++++++++ env-bcftools.yml | 7 ++++++ env-minimap.yml | 7 ++++++ env-wget.yml | 6 +++++ environment.yml | 10 ++++++++ 6 files changed, 90 insertions(+) create mode 100644 README.md create mode 100644 Snakefile create mode 100644 env-bcftools.yml create mode 100644 env-minimap.yml create mode 100644 env-wget.yml create mode 100644 environment.yml diff --git a/README.md b/README.md new file mode 100644 index 0000000..c84eba6 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +[![Binder](https://binder.pangeo.io/badge_logo.svg)](https://binder.pangeo.io/v2/gh/ngs-docs/2022-ggg-201b-variant-calling/stable?urlpath=rstudio) diff --git a/Snakefile b/Snakefile new file mode 100644 index 0000000..8c5e8a4 --- /dev/null +++ b/Snakefile @@ -0,0 +1,59 @@ +# default rule that tells snakemake to create the .vcf file if +# it is not run with any specific rule or file request. +rule all: + input: + "SRR2584857_1.ecoli-rel606.vcf" + +rule download_data: + conda: "env-wget.yml" + output: "SRR2584857_1.fastq.gz" + shell: """ + wget https://osf.io/4rdza/download -O {output} + """ + +rule download_genome: + conda: "env-wget.yml" + output: "ecoli-rel606.fa.gz" + shell: + "wget https://osf.io/8sm92/download -O {output}" + +rule map_reads: + conda: "env-minimap.yml" + input: ref="ecoli-rel606.fa.gz", reads="SRR2584857_1.fastq.gz" + output: "SRR2584857_1.ecoli-rel606.sam" + shell: """ + minimap2 -ax sr {input.ref} {input.reads} > {output} + """ + +rule sam_to_bam: + conda: "env-minimap.yml" + input: "SRR2584857_1.ecoli-rel606.sam" + output: "SRR2584857_1.ecoli-rel606.bam" + shell: """ + samtools view -b -F 4 {input} > {output} + """ + +rule sort_bam: + conda: "env-minimap.yml" + input: "SRR2584857_1.ecoli-rel606.bam" + output: "SRR2584857_1.ecoli-rel606.bam.sorted" + shell: """ + samtools sort {input} > {output} + """ + +rule call_variants: + conda: "env-bcftools.yml" + input: + ref="ecoli-rel606.fa.gz", + bamsort="SRR2584857_1.ecoli-rel606.bam.sorted" + output: + refout="ecoli-rel606.fa", + pileup="SRR2584857_1.ecoli-rel606.pileup", + bcf="SRR2584857_1.ecoli-rel606.bcf", + vcf="SRR2584857_1.ecoli-rel606.vcf" + shell: """ + gunzip -c {input.ref} > {output.refout} + bcftools mpileup -Ou -f {output.refout} {input.bamsort} > {output.pileup} + bcftools call -mv -Ob {output.pileup} -o {output.bcf} + bcftools view {output.bcf} > {output.vcf} + """ diff --git a/env-bcftools.yml b/env-bcftools.yml new file mode 100644 index 0000000..8ddce27 --- /dev/null +++ b/env-bcftools.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bcftools=1.11 + - samtools diff --git a/env-minimap.yml b/env-minimap.yml new file mode 100644 index 0000000..c3ab28c --- /dev/null +++ b/env-minimap.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - minimap2=2.17 + - samtools=1.10 diff --git a/env-wget.yml b/env-wget.yml new file mode 100644 index 0000000..aa48f6f --- /dev/null +++ b/env-wget.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - wget diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..fa3a336 --- /dev/null +++ b/environment.yml @@ -0,0 +1,10 @@ +name: vc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - r-base=3.6 + - snakemake-minimal=5.30.1 + - wget + - samtools=1.10 \ No newline at end of file