-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathSnakefile
130 lines (111 loc) · 3.28 KB
/
Snakefile
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
import os
from pathlib import Path
from shlex import join # for quoting file paths
# Additional setup for running with git-bash on Windows
if os.name == 'nt':
from snakemake.shell import shell
shell.executable(r'C:\Users\tomliao\AppData\Local\Programs\Git\bin\bash.exe')
shell.prefix("""
# Load bash predefined functions
lastwd=$(pwd)
source ~/.bash_profile
cd "$lastwd"
""")
# Protect Raw data (read-only permissions for all files in `raw/`)
# from stat import S_IREAD, S_IRGRP, S_IROTH
# for fp in Path("raw").rglob("*"):
# if fp.is_file():
# os.chmod(fp, S_IREAD|S_IRGRP|S_IROTH)
# 6. Collect model-based estimates of AoA into data
rule model_based_analysis:
input:
script = "src/model_based_analysis.R"
output:
"made/MCDI.age.m-produciton.xlsx"
shell:
"""
Rscript {input.script}
"""
# 5. Fit model
rule fit_stan:
input:
script = "src/fit_stan.R",
model = "src/model.stan",
p = "made/MCDI.age-production.csv",
u = "made/MCDI.age-understanding.csv"
output:
p = [
"made/dat.fit-production.RDS",
"made/model-production.RDS"
],
# u = [
# "made/dat.fit-understanding.RDS",
# "made/model-understanding.RDS"
# ]
shell:
"""
Rscript {input.script} {input.p} {output.p} 48 # production
"""
# Rscript {input.script} {input.u} {output.u} 48 # understanding
# 4. Plot empirical distribution per word
rule distrbution_per_word:
input:
src = "src/distribution_per_word.R"
output:
"made/distribution_per_word-understanding.pdf",
"made/distribution_per_word-production.pdf"
shell:
"""
Rscript {input.src} UNDERSTANDING
Rscript {input.src} PRODUCTION
"""
# 3. Compute data for estimating AoA (understanding)
rule age_distribution_understanding:
input:
src = "src/age_distribution_understanding.R",
data = [
"made/word_id.csv",
"raw/wordbank_instrument_data_MandarinWG.csv",
]
output:
"made/MCDI.age-understanding.csv"
shell:
"""
Rscript {input.src}
"""
# 2. Compute data for estimating AoA (production)
rule age_distribution_production:
input:
src = "src/age_distribution_production.R",
data = [
"made/word_id.csv",
"raw/wordbank_instrument_data_MandarinWG.csv",
"raw/wordbank_instrument_data_MandarinWS.csv"
]
output:
"made/MCDI.age-production.csv"
shell:
"""
Rscript {input.src}
"""
# 1. Word metadata across WG & WS forms
rule word_id:
input:
src = "src/word_id.R"
output:
"made/word_id.csv"
shell:
"""
Rscript {input.src}
"""
########################################
"""
[Tips]
DO NOT use Snakmake built-in wildcards
lots of limitations & changes the default behaviour in for loops
of run: blocks
[How relative paths work in Snakefile]
`input:`, `output:`, and `shell:` have the project root as the working dir.
All other directives (e.g. `script:`, `include:`, and `notebook:`) have
the directory where `Snakefile` is located as the working dir.
"""