diff --git a/README.md b/README.md new file mode 100644 index 0000000..c84eba6 --- /dev/null +++ b/README.md @@ -0,0 +1 @@ +[![Binder](https://binder.pangeo.io/badge_logo.svg)](https://binder.pangeo.io/v2/gh/ngs-docs/2022-ggg-201b-variant-calling/stable?urlpath=rstudio) diff --git a/Snakefile b/Snakefile new file mode 100644 index 0000000..8c5e8a4 --- /dev/null +++ b/Snakefile @@ -0,0 +1,59 @@ +# default rule that tells snakemake to create the .vcf file if +# it is not run with any specific rule or file request. +rule all: + input: + "SRR2584857_1.ecoli-rel606.vcf" + +rule download_data: + conda: "env-wget.yml" + output: "SRR2584857_1.fastq.gz" + shell: """ + wget https://osf.io/4rdza/download -O {output} + """ + +rule download_genome: + conda: "env-wget.yml" + output: "ecoli-rel606.fa.gz" + shell: + "wget https://osf.io/8sm92/download -O {output}" + +rule map_reads: + conda: "env-minimap.yml" + input: ref="ecoli-rel606.fa.gz", reads="SRR2584857_1.fastq.gz" + output: "SRR2584857_1.ecoli-rel606.sam" + shell: """ + minimap2 -ax sr {input.ref} {input.reads} > {output} + """ + +rule sam_to_bam: + conda: "env-minimap.yml" + input: "SRR2584857_1.ecoli-rel606.sam" + output: "SRR2584857_1.ecoli-rel606.bam" + shell: """ + samtools view -b -F 4 {input} > {output} + """ + +rule sort_bam: + conda: "env-minimap.yml" + input: "SRR2584857_1.ecoli-rel606.bam" + output: "SRR2584857_1.ecoli-rel606.bam.sorted" + shell: """ + samtools sort {input} > {output} + """ + +rule call_variants: + conda: "env-bcftools.yml" + input: + ref="ecoli-rel606.fa.gz", + bamsort="SRR2584857_1.ecoli-rel606.bam.sorted" + output: + refout="ecoli-rel606.fa", + pileup="SRR2584857_1.ecoli-rel606.pileup", + bcf="SRR2584857_1.ecoli-rel606.bcf", + vcf="SRR2584857_1.ecoli-rel606.vcf" + shell: """ + gunzip -c {input.ref} > {output.refout} + bcftools mpileup -Ou -f {output.refout} {input.bamsort} > {output.pileup} + bcftools call -mv -Ob {output.pileup} -o {output.bcf} + bcftools view {output.bcf} > {output.vcf} + """ diff --git a/env-bcftools.yml b/env-bcftools.yml new file mode 100644 index 0000000..8ddce27 --- /dev/null +++ b/env-bcftools.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - bcftools=1.11 + - samtools diff --git a/env-minimap.yml b/env-minimap.yml new file mode 100644 index 0000000..c3ab28c --- /dev/null +++ b/env-minimap.yml @@ -0,0 +1,7 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - minimap2=2.17 + - samtools=1.10 diff --git a/env-wget.yml b/env-wget.yml new file mode 100644 index 0000000..aa48f6f --- /dev/null +++ b/env-wget.yml @@ -0,0 +1,6 @@ +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - wget diff --git a/environment.yml b/environment.yml new file mode 100644 index 0000000..fa3a336 --- /dev/null +++ b/environment.yml @@ -0,0 +1,10 @@ +name: vc +channels: + - conda-forge + - bioconda + - defaults +dependencies: + - r-base=3.6 + - snakemake-minimal=5.30.1 + - wget + - samtools=1.10 \ No newline at end of file