-
Notifications
You must be signed in to change notification settings - Fork 1
/
config.yaml
198 lines (180 loc) · 10.8 KB
/
config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
# YAML configuration file for the analysis
# max CPUs used by any rules
max_cpus: 16
# output directories
log_dir: results/logs # logs for snakemake rules
fastq10x_dir: results/fastq10x # FASTQs & QC 10x transcriptomic runs
pacbio_dir: results/pacbio # FASTQs for pacBio runs
mkfastq10x_dir: results/fastq10x/mkfastq_output # `cellranger mkfastq` output
genome_dir: results/genomes # location of downloaded genomes and annotations
refgenome: results/genomes/refgenome # STAR reference genome directory
aligned_fastq10x_dir: results/aligned_fastq10x # aligned 10x transcriptomic reads
viral_fastq10x_dir: results/viral_fastq10x # viral tags / barcodes in 10x transcriptomics
viral_tags_bcs_in_cells_dir: results/viral_tags_bcs_in_cells # calling viral tags / barcodes by cell
viral_progeny_dir: results/viral_progeny # viral barcodes from progeny
align_and_parse_dir: results/pacbio/CA09_align_and_parse #pacbio alignment and parsing output
# cellular genome and GTF ftp sites
cell_genome_ftp: ftp://ftp.ensembl.org/pub/release-98/fasta/canis_familiaris/dna/Canis_familiaris.CanFam3.1.dna.toplevel.fa.gz
cell_gtf_ftp: ftp://ftp.ensembl.org/pub/release-98/gtf/canis_familiaris/Canis_familiaris.CanFam3.1.98.gtf.gz
# viral genome and transcript information
viral_genome: data/flu_sequences/flu-CA09.fasta
viral_gtf: data/flu_sequences/flu-CA09.gtf
viral_genbank: data/flu_sequences/flu-CA09.gb
viral_tag_identities: data/flu_sequences/flu-CA09_viral_tags.yaml
viral_amplicons: data/flu_sequences/pacbio_amplicons/fluCA09.gb
amplicon_features: data/flu_sequences/pacbio_amplicons/fluCA09_features.yaml
amplicon_linearization_primers: data/flu_sequences/pacbio_amplicons/linearization_primers.tsv
amplicon_to_reference: data/flu_sequences/pacbio_amplicons/amplicon_to_reference.csv
# STAR alignment parameters for transcriptomics. Reduce the penalty for
# non-canonical splice sites, which is probably bad for mapping cellular
# reads but is good for mapping viral reads which will have deletions
# not corresponding to splice sites.
scoreGapNoncan: -4
scoreGapGCAG: -4
scoreGapATAC: -4
# URL location of 10X barcode whitelist: **this is for the v3 kit**
cb_whitelist_10x_url: https://raw.githubusercontent.com/10XGenomics/cellranger/master/lib/python/cellranger/barcodes/3M-february-2018.txt.gz
cb_whitelist_10x: results/aligned_fastq10x/cb_whitelist_10x.txt
cb_len_10x: 16 # length of 10X cell barcode
umi_len_10x: 12 # length of 10X UMI: **this is for the v3 kit**
# Number of Median Absolute Deviations to use for filtering total UMIs per cell
total_UMI_deviations: 2.5
# False discovery rate for assigning viral tags based on expected
# viral tag counts in uninfected cells.
viral_tag_fdr: 0.0001
# viral barcode parsing paremeters for supernatant and second
# infection sequencing.
viral_barcode_upstream_length: 28 # Specify the length of upstream seq used to check identity
viral_barcode_mismatch: 0 # Number of mismatches allowed in upstream seq
viral_barcode_minq: 30 # Minimum quality threshold for bases in viral barcode
# Detection limit for supernatant and second infection sequencing
progeny_detection_limit: 1e-5
# False discovery rate for calling viral barcodes based on expected
# viral tag counts in uninfected cells.
viral_bc_fdr: 0.0001
# Parameters for building PacBio CCSs
ccs_min_length: 50
ccs_max_length: 5000
ccs_min_rq: 0.999
#----------------------------------------------------------------------------
# configuration of experiments
#----------------------------------------------------------------------------
experiments:
scProgenyProduction_trial1:
description: Single-cell transcriptomics performed on low MOI-infected sample.
Innoculum volumes based on `hashing_trial2` data. Loaded
all collected cells from two 24-well wells, each infected with either
wildtype or dblySn viral tag variants.
lab_notes: https://benchling.com/s/etr-20lOO7l1FdEJJIpxV6vD
expect_ncells: 10000
infection_threshold: 0.025 # Fraction of viral UMIs required to call cell as infected
transcriptomics:
2020-09-25:
index: SI-GA-B8
bcl_folder: /shared/ngs/illumina/bloom_lab/200925_D00300_1065_AHHL7NBCX3/raw/200925_D00300_1065_AHHL7NBCX3/
lane: 2
index_sequencing: single
viral_barcodes:
supernatant:
wt:
fluHA:
replicate_1:
2020-10-13: /shared/ngs/illumina/bloom_lab/201013_M03100_0625_000000000-JB2KP/Data/Intensities/BaseCalls/Trial1-WT-Sup-fluHA-A_S1_L001_R1_001.fastq.gz
replicate_2:
2020-10-13: /shared/ngs/illumina/bloom_lab/201013_M03100_0625_000000000-JB2KP/Data/Intensities/BaseCalls/Trial1-WT-Sup-fluHA-B_S2_L001_R1_001.fastq.gz
fluNA:
replicate_1:
2020-10-13: /shared/ngs/illumina/bloom_lab/201013_M03100_0625_000000000-JB2KP/Data/Intensities/BaseCalls/Trial1-WT-Sup-fluNA-A_S17_L001_R1_001.fastq.gz
replicate_2:
2020-10-13: /shared/ngs/illumina/bloom_lab/201013_M03100_0625_000000000-JB2KP/Data/Intensities/BaseCalls/Trial1-WT-Sup-fluNA-B_S18_L001_R1_001.fastq.gz
syn:
fluHA:
replicate_1:
2020-10-13: /shared/ngs/illumina/bloom_lab/201013_M03100_0625_000000000-JB2KP/Data/Intensities/BaseCalls/Trial1-dblSyn-Sup-fluHA-A_S3_L001_R1_001.fastq.gz
replicate_2:
2020-10-13: /shared/ngs/illumina/bloom_lab/201013_M03100_0625_000000000-JB2KP/Data/Intensities/BaseCalls/Trial1-dblSyn-Sup-fluHA-B_S4_L001_R1_001.fastq.gz
fluNA:
replicate_1:
2020-10-13: /shared/ngs/illumina/bloom_lab/201013_M03100_0625_000000000-JB2KP/Data/Intensities/BaseCalls/Trial1-dblSyn-Sup-fluNA-A_S19_L001_R1_001.fastq.gz
replicate_2:
2020-10-13: /shared/ngs/illumina/bloom_lab/201013_M03100_0625_000000000-JB2KP/Data/Intensities/BaseCalls/Trial1-dblSyn-Sup-fluNA-B_S20_L001_R1_001.fastq.gz
scProgenyProduction_trial3:
description: Single-cell transcriptomics performed on even lower MOI-infected
sample. Innoculum volumes based on `scProgenyProduction_trial1` and
`scProgenyProduction_trial2` data. Loaded all collected cells from
two 24-well wells, each infected with either wildtype or dblySn
viral tag variants.
lab_notes: https://benchling.com/s/etr-N4laiIAy3AUePJdig7wR
expect_ncells: 10000
infection_threshold: 0.01 # Fraction of viral UMIs required to call cell as infected
transcriptomics:
2021-03-12:
index: SI-TT-A3
bcl_folder: /shared/ngs/illumina/bloom_lab/210312_D00300_1192_BHKHNYBCX3/raw/210312_D00300_1192_BHKHNYBCX3/
lane: 1
index_sequencing: single
2021-04-09:
index: SI-TT-A3
bcl_folder: /shared/ngs/illumina/bloom_lab/210409_D00300_1210_BHKC7KBCX3/raw/210409_D00300_1210_BHKC7KBCX3/
lane: '*'
index_sequencing: single
2021-08-25:
index: SI-TT-A3
bcl_folder: /shared/ngs/illumina/bloom_lab/210825_D00300_1310_BHMH5TBCX3/raw/210825_D00300_1310_BHMH5TBCX3/
lane: '*'
index_sequencing: none
2021-09-15:
index: SI-TT-A3
bcl_folder: /shared/ngs/illumina/bloom_lab/210915_VH00699_2_AAAMCL7M5/raw/210915_VH00699_2_AAAMCL7M5/
lane: '*'
index_sequencing: none
pacbio_viral_sequencing:
2021-08-05_all_segments_run1: /fh/fast/bloom_j/SR/ngs/pacbio/210730_SequelIIe/r64272e_20210730_182729/1_A01/m64272e_210730_193026.subreads.bam
2021-09-17_all_segments_run2: /fh/fast/bloom_j/SR/ngs/pacbio/210917_SequelIIe/r64272e_20210917_174421/1_A01/m64272e_210917_175514.subreads.bam
viral_barcodes:
supernatant:
wt:
fluHA:
replicate_1:
2021-04-08: /shared/ngs/illumina/bloom_lab/210408_M04866_0464_000000000-JL5M4_new-demux/Data/Intensities/BaseCalls/WT-sup-fluHA-rep1_S9_L001_R1_001.fastq.gz
replicate_2:
2021-04-08: /shared/ngs/illumina/bloom_lab/210408_M04866_0464_000000000-JL5M4_new-demux/Data/Intensities/BaseCalls/WT-sup-fluHA-rep2_S13_L001_R1_001.fastq.gz
fluNA:
replicate_1:
2021-04-08: /shared/ngs/illumina/bloom_lab/210408_M04866_0464_000000000-JL5M4_new-demux/Data/Intensities/BaseCalls/WT-sup-fluNA-rep1_S1_L001_R1_001.fastq.gz
replicate_2:
2021-05-17: /shared/ngs/illumina/bloom_lab/210517_M03100_0681_000000000-JL4TM/Data/Intensities/BaseCalls/WT-sup-fluNA-rep2_S1_L001_R1_001.fastq.gz
syn:
fluHA:
replicate_1:
2021-04-08: /shared/ngs/illumina/bloom_lab/210408_M04866_0464_000000000-JL5M4_new-demux/Data/Intensities/BaseCalls/dblSyn-sup-fluHA-rep1_S10_L001_R1_001.fastq.gz
replicate_2:
2021-04-08: /shared/ngs/illumina/bloom_lab/210408_M04866_0464_000000000-JL5M4_new-demux/Data/Intensities/BaseCalls/dblSyn-sup-fluHA-rep2_S14_L001_R1_001.fastq.gz
fluNA:
replicate_1:
2021-04-08: /shared/ngs/illumina/bloom_lab/210408_M04866_0464_000000000-JL5M4_new-demux/Data/Intensities/BaseCalls/dblSyn-sup-fluNA-rep1_S2_L001_R1_001.fastq.gz
replicate_2:
2021-04-08: /shared/ngs/illumina/bloom_lab/210408_M04866_0464_000000000-JL5M4_new-demux/Data/Intensities/BaseCalls/dblSyn-sup-fluNA-rep2_S6_L001_R1_001.fastq.gz
second_infection:
wt:
fluHA:
replicate_1:
2021-05-17: /shared/ngs/illumina/bloom_lab/210517_M03100_0681_000000000-JL4TM/Data/Intensities/BaseCalls/WT-SI-fluHA-rep1_S2_L001_R1_001.fastq.gz
replicate_2:
2021-05-17: /shared/ngs/illumina/bloom_lab/210517_M03100_0681_000000000-JL4TM/Data/Intensities/BaseCalls/WT-SI-fluHA-rep2_S3_L001_R1_001.fastq.gz
fluNA:
replicate_1:
2021-05-17: /shared/ngs/illumina/bloom_lab/210517_M03100_0681_000000000-JL4TM/Data/Intensities/BaseCalls/WT-SI-fluNA-rep1_S11_L001_R1_001.fastq.gz
replicate_2:
2021-05-17: /shared/ngs/illumina/bloom_lab/210517_M03100_0681_000000000-JL4TM/Data/Intensities/BaseCalls/WT-SI-fluNA-rep2_S12_L001_R1_001.fastq.gz
syn:
fluHA:
replicate_1:
2021-05-17: /shared/ngs/illumina/bloom_lab/210517_M03100_0681_000000000-JL4TM/Data/Intensities/BaseCalls/dblSyn-SI-fluHA-rep1_S4_L001_R1_001.fastq.gz
replicate_2:
2021-05-17: /shared/ngs/illumina/bloom_lab/210517_M03100_0681_000000000-JL4TM/Data/Intensities/BaseCalls/dblSyn-SI-fluHA-rep2_S5_L001_R1_001.fastq.gz
fluNA:
replicate_1:
2021-05-17: /shared/ngs/illumina/bloom_lab/210517_M03100_0681_000000000-JL4TM/Data/Intensities/BaseCalls/dblSyn-SI-fluNA-rep1_S13_L001_R1_001.fastq.gz
replicate_2:
2021-05-17: /shared/ngs/illumina/bloom_lab/210517_M03100_0681_000000000-JL4TM/Data/Intensities/BaseCalls/dblSyn-SI-fluNA-rep2_S14_L001_R1_001.fastq.gz