-
Notifications
You must be signed in to change notification settings - Fork 0
/
2_runChangeO.sh
117 lines (103 loc) · 4.55 KB
/
2_runChangeO.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
#!/usr/bin/env bash
# run a set of Change-O scripts on files generated by script 1_load_QC_demultiplex_clean.Rmd
# outputs are used as input for script 3_analyze_BCRs.Rmd
# outputs end up in same directory as target files
# if paths to files, python executables, igblast, or IMGT reference files differ from below, modify the entries
# set paths to files; to run on AggregateMice versions, modify directory below
dirFastaFiles="../data_saved/filteredContigFastaFilteredSplit" # for versions by mouse
dirContigAnnotationsFiles="../data_saved/filteredContigAnnotationsFilteredSplit" # for versions by mouse
# dirFastaFiles="../data_saved/filteredContigFastaAggregateMiceFiltered" # for AggregateMice versions
# dirContigAnnotationsFiles="../data_saved/filteredContigAnnotationsAggregateMiceFiltered" # for AggregateMice versions
# set paths to python executables
pathAssignGenes=~/Library/Python/3.8/bin/AssignGenes.py
pathMakeDb=~/Library/Python/3.8/bin/MakeDb.py
pathParseDb=~/Library/Python/3.8/bin/ParseDb.py
pathDefineClones=~/Library/Python/3.8/bin/DefineClones.py
pathLightCluster=/usr/local/bin/light_cluster.py
pathCreateGermlines=~/Library/Python/3.8/bin/CreateGermlines.py
# set path to IMGT reference files to use
dirRefImgt=~/Library/CloudStorage/Box-Box/Tools/IMGT_germlines/mouse/vdj
# set path to igblast
pathIgBlast=~/Library/CloudStorage/Box-Box/Tools/igblast
# run AssignGenes.py on each .fasta file
for currFastaFile in $(ls ${dirFastaFiles} | grep "\.fasta")
do
python3 ${pathAssignGenes} igblast \
-s ${dirFastaFiles}/${currFastaFile} \
-b ${pathIgBlast} \
--organism mouse --loci ig --format blast
done
# run MakeDb.py on each pair of _igblast.fmt7 and contigAnnotations files
for currFastaFile in $(ls ${dirFastaFiles} | grep "\.fasta")
do
# define IgBlast output file
currIgBlastFile=$(echo $currFastaFile | sed 's/\.fasta/_igblast.fmt7/')
currContigAnnotationsFile=$(echo $currFastaFile | sed 's/\.fasta/.csv/' | sed 's/ContigFasta/ContigAnnotations/')
python3 ${pathMakeDb} igblast \
-i ${dirFastaFiles}/${currIgBlastFile} \
-s ${dirFastaFiles}/${currFastaFile} \
-r ${dirRefImgt}/imgt_mouse_IGHV.fasta \
${dirRefImgt}/imgt_mouse_IGHD.fasta \
${dirRefImgt}/imgt_mouse_IGHJ.fasta \
${dirRefImgt}/imgt_mouse_IGKV.fasta \
${dirRefImgt}/imgt_mouse_IGKJ.fasta \
${dirRefImgt}/imgt_mouse_IGLV.fasta \
${dirRefImgt}/imgt_mouse_IGLJ.fasta \
--10x ${dirContigAnnotationsFiles}/${currContigAnnotationsFile} \
--extended
done
# run ParseDb.py on each _igblast_db-pass.tsv file
for currDbFile in $(ls ${dirFastaFiles} | grep "_igblast_db-pass\.tsv")
do
# parse heavy chain files
currOutFile=$(echo $currDbFile | sed 's/_igblast_db-pass\.tsv/.heavy_igblast_db-pass\.tsv/')
python3 ${pathParseDb} select \
-d ${dirFastaFiles}/${currDbFile} \
-f locus -u "IGH" \
--logic all --regex \
-o ${dirFastaFiles}/${currOutFile}
# parse light chain files
currOutFile=$(echo $currDbFile | sed 's/_igblast_db-pass\.tsv/.light_igblast_db-pass\.tsv/')
python3 ${pathParseDb} select \
-d ${dirFastaFiles}/${currDbFile} \
-f locus -u "IG[KL]" \
--logic all --regex \
-o ${dirFastaFiles}/${currOutFile}
done
# run DefineClones.py on heavy chain files
cwd=$(pwd)
cd $dirFastaFiles
dbFiles=$(ls | grep "_igblast_db-pass\.tsv" | grep "heavy")
python3 ${pathDefineClones} \
-d $dbFiles --act set --model ham --norm len --dist 0.045
cd $cwd # return to starting directory
# run light_cluster.py on each heavy chain file
for currCloneFile in $(ls ${dirFastaFiles} | grep "_igblast_db-pass_clone-pass\.tsv" | grep "heavy")
do
# define light chain
currIgBlastLightFile=$(echo $currCloneFile | sed 's/heavy_igblast_db-pass_clone-pass\.tsv/light_igblast_db-pass.tsv/')
currOutFile=$(echo $currCloneFile | sed 's/heavy_igblast_db-pass_clone-pass\.tsv/igblast_final-clone-pass.tsv/')
python3 ${pathLightCluster} \
-d ${dirFastaFiles}/${currCloneFile} \
-e ${dirFastaFiles}/${currIgBlastLightFile} \
-o ${dirFastaFiles}/${currOutFile}
done
# run CreateGermlines on heavy chain files, for full germline
cd $dirFastaFiles
cloneFiles=$(ls | grep "igblast_final-clone-pass\.tsv")
python3 ${pathCreateGermlines} \
-d ${cloneFiles} \
-g full --cloned \
--outdir germline_full \
-r ${dirRefImgt}/imgt_mouse_IGHV.fasta \
${dirRefImgt}/imgt_mouse_IGHD.fasta \
${dirRefImgt}/imgt_mouse_IGHJ.fasta
# run CreateGermlines on heavy chain files, with D region masked
cloneFiles=$(ls | grep "igblast_final-clone-pass\.tsv")
python3 ${pathCreateGermlines} \
-d ${cloneFiles} \
-g dmask --cloned \
--outdir germline_d_mask \
-r ${dirRefImgt}/imgt_mouse_IGHV.fasta \
${dirRefImgt}/imgt_mouse_IGHD.fasta \
${dirRefImgt}/imgt_mouse_IGHJ.fasta