forked from guanxiangliang/sunbeam_neutrino
-
Notifications
You must be signed in to change notification settings - Fork 0
/
fungi.rules
96 lines (85 loc) · 3.18 KB
/
fungi.rules
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
# -*- mode: Snakemake -*-
def bt2_index_files(genome, idx_fp):
fwd = expand('{idx_fp}/{genome}.{index}.bt2',
genome=genome,
idx_fp=idx_fp,
index=range(1,5))
rev = expand('{idx_fp}/{genome}.rev.{index}.bt2',
genome=genome,
idx_fp=idx_fp,
index=range(1,3))
return fwd + rev
rule _test_bt2_index:
input:
expand(str(GENOME_DIR/'{genome}.{s}.bt2'), genome=GENOMES_DICT.keys(), s=['1','2','3','4','rev.1','rev.2'])
rule bt2_index:
input:
lambda wildcards: GENOMES_DICT[wildcards.genome]
output:
expand(str(GENOME_DIR/'{{genome}}.{s}.bt2'), s=['1','2','3','4','rev.1','rev.2'])
params:
prefix=str(GENOME_DIR/'{genome}')
shell:
"bowtie2-build {input} {params.prefix}"
rule bt2_align:
input:
lambda wildcards: bt2_index_files(wildcards.genome, Cfg['mapping']['genomes_fp']),
r1 = str(QC_FP/'decontam'/'{sample}_R1.fastq'),
r2 = str(QC_FP/'decontam'/'{sample}_R2.fastq')
output:
temp(str(MAPPING_FP/'bowtie'/'{genome}'/'{sample}/reads.sam'))
threads:
Cfg['mapping']['threads']
params:
index=str(GENOME_DIR/'{genome}')
shell:
"bowtie2 -x {params.index} -1 {input.r1} -2 {input.r2} -S {output} -p {threads}"
rule sam_to_bam:
input: "{fname}.sam"
output: temp("{fname}.bam")
shell: "samtools view -bS {input} > {output}"
rule sort_bam:
input: "{fname}.bam"
output: "{fname}.sorted.bam"
shell: "samtools sort -o {output} {input}"
rule index_bam:
input: "{fname}.sorted.bam"
output: "{fname}.sorted.bam.bai"
shell: "samtools index {input}"
rule bam_read_counts:
input:
bam=str(MAPPING_FP/'bowtie'/'{genome}'/'{sample}'/'reads.sorted.bam'),
bai=str(MAPPING_FP/'bowtie'/'{genome}'/'{sample}'/'reads.sorted.bam.bai')
output:
str(MAPPING_FP/'bowtie'/'{genome}'/'summary'/'{sample}_counts')
shell:
"samtools idxstats {input.bam} | cut -f 1,3 | sed '$ d' > {output}"
rule bam_coverage:
input:
bam = str(MAPPING_FP/'bowtie'/'{genome}'/'{sample}'/'reads.sorted.bam'),
genome = str(GENOME_DIR/"{genome}.fasta")
output:
str(MAPPING_FP/'bowtie'/'{genome}'/'summary'/'{sample}_coverage')
shell:
"bedtools genomecov -ibam {input.bam} -g {input.genome} -bg > {output}"
rule _map_all:
input:
expand(
str(MAPPING_FP/'bowtie'/'{genome}'/'summary'/'{sample}_counts'),
sample=Samples.keys(), genome=GENOMES_DICT.keys()),
expand(
str(MAPPING_FP/'bowtie'/'{genome}'/'summary'/'{sample}_coverage'),
sample=Samples.keys(), genome=GENOMES_DICT.keys())
rule plot_coverage:
input:
s = str(MAPPING_FP/'bowtie'/'{genome}'/'{sample}'/'reads.sorted.bam'),
Rscript = str(Cfg['mapping']['Rscript_fp']/'script.R')
output:
f = str(MAPPING_FP/'bowtie'/'{genome}'/'{sample}'/'{sample}.coverage.pdf')
params:
t = str(MAPPING_FP/'bowtie'/'{genome}'/'{sample}'/'reads.coverage')
shell:
"""
samtools depth {input.s} > {params.t}
Rscript {input.Rscript} {params.t} {output.f}
"""