forked from cbib/MICADo
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSnakefile
106 lines (87 loc) · 4.47 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
import random
# frac=["03",'035','040','045',"05","10","30","50"])
rule specific:
input:expand("data/synthetic/C_FOOFOO_{seed}_{nreads}_{frac}_{nalt}_1-1-1.combined_alterations.json data/synthetic/C_FOOFOO_{seed}_{nreads}_{frac}_{nalt}_1-1-1_C_model_GMAPno40.sorted.bam".split(),\
seed=[9584],\
nreads=1000,\
nalt=[2],\
alt_type=['1-1-1'],\
frac=['50'])
rule all:
input:expand("data/synthetic/C_FOOFOO_{seed}_{nreads}_{frac}_{nalt}_1-1-1.combined_alterations.json",\
seed=random.sample(range(10000),k=8),\
nreads=[150,500,700,1000],\
nalt=[1,2,3],\
alt_type=['1-1-1','1-1-0','0-1-1'],\
frac=['035','040','045',"05","10","50"])
rule clean:
shell:"""
rm data/synthetic/*.json
rm data/synthetic/*.fastq
rm data/synthetic/*.bam
rm data/synthetic/*.sam
rm data/synthetic/*.bai
"""
rule run_micado:
priority :2
input : fasta_ref="data/reference/reference_TP53_C.fasta",\
random_sample="data/synthetic/{sample}.fastq",\
snp_data="data/reference/snp_TP53.tab"
params : sample_name= "data/synthetic/{sample}"
output:
micado_results=temp("data/synthetic/{sample}.significant_alterations.json"),\
shell:"""
source ~/.virtualenvs/micado/bin/activate
export PYTHONPATH=`pwd`/src
# run micado
python src/principal.py --fastq {input.random_sample} --experiment TP53 \
--fasta {input.fasta_ref} \
--samplekey synth2 \
--snp {input.snp_data} \
--npermutations 20 --pvalue 0.1 \
--results {output.micado_results}
"""
rule generate_sample:
input:input_sam="data/alignments/C_model_GMAPno40_NM_000546.5.sam"
params:
sample_name="data/synthetic/{sample}_{seed}_{nreads}_{frac}_{nalt}_{altw}"
output:random_alt=temp("data/synthetic/{sample}_{seed,\d+}_{nreads,\d+}_{frac,\d+}_{nalt,\d+}_{altw}.fastq"),
non_alt=temp("data/synthetic/{sample}_{seed,\d+}_{nreads,\d+}_{frac,\d+}_{nalt,\d+}_{altw}_non_alt.fastq"),\
sampler_results=temp("data/synthetic/{sample}_{seed,\d+}_{nreads,\d+}_{frac,\d+}_{nalt,\d+}_{altw}.alterations.json")
shell:"""
source ~/.virtualenvs/micado/bin/activate
export PYTHONPATH=`pwd`/src
# build a sample
python src/read_sampler/altered_reads_sampler.py --input_sam {input.input_sam} \
--output_file_prefix "{params.sample_name}" \
--n_reads {wildcards.nreads} --fraction_altered 0.{wildcards.frac} --n_alterations {wildcards.nalt} --alt_weight {wildcards.altw} \
--seed {wildcards.seed} \
# --output_lowercase \
--systematic_offset -202
"""
rule combine_json :
priority : 50
input : micado_results="data/synthetic/{sample}.significant_alterations.json",\
sampler_results="data/synthetic/{sample}.alterations.json"
output:combined_json=temp("data/synthetic/{sample}.combined_alterations.temp.json"),
cleaned_json="data/synthetic/{sample}.combined_alterations.json" # we move them outside
shell:"""
# merge known alterations and identified alterations
source ~/.virtualenvs/micado/bin/activate
export PYTHONPATH=`pwd`/src
bin/merge_json_objects.py {input.sampler_results} {input.micado_results} > {output.combined_json}
jq "." < {output.combined_json} > {output.cleaned_json}
"""
rule align_synthetic_data:
input: fastq="data/synthetic/{sample}.fastq"
params: TGTGENOME="NM_000546.5",sample="data/synthetic/{sample}"
output: "data/synthetic/{sample}_C_model_GMAPno40.sorted.bam"
shell:"""
gmap --min-intronlength=15000 -t 32 -D data/gmap_genomes/{params.TGTGENOME} \
-d {params.TGTGENOME} -f samse --read-group-id=EORTC10994 \
--read-group-name=GemSim_test --read-group-library=MWG1 \
--read-group-platform=PACBIO {input.fastq} > {params.sample}_C_model_GMAPno40.sam
samtools view -b -S {params.sample}_C_model_GMAPno40.sam > {params.sample}_C_model_GMAPno40.bam
samtools sort {params.sample}_C_model_GMAPno40.bam {params.sample}_C_model_GMAPno40.sorted
samtools index {params.sample}_C_model_GMAPno40.sorted.bam
"""