@@ -54,7 +54,7 @@ Tables.columnnames(m::ClassificationResult) = names(m)
54
54
55
55
56
56
# ### Underlying algorithm
57
- function naieve_bayes (seqs:: Vector ,refs:: Vector ,k, n_bootstrap,lp= false )
57
+ function naieve_bayes (seqs:: Vector ,refs:: Vector ,k, n_bootstrap,lp,genera )
58
58
t = time ()
59
59
N = length (refs)
60
60
n = length (seqs)
@@ -76,14 +76,14 @@ function naieve_bayes(seqs::Vector,refs::Vector,k, n_bootstrap,lp=false)
76
76
assignment = assign (kmer_array,log_probs)
77
77
assignments[i] = assignment
78
78
sample_size = sum (kmer_array) ÷ k
79
- confs[i] = bootstrap (vec (kmer_array),log_probs,assignment,sample_size,n_bootstrap)
79
+ confs[i] = bootstrap (vec (kmer_array),log_probs,genera[ assignment] ,sample_size,n_bootstrap,genera )
80
80
end
81
81
return assignments, confs, log_probs
82
82
end
83
83
84
84
85
85
function naieve_bayes (seqs:: Vector ,refs:: Vector ,taxa :: Array ,k, n_bootstrap,lp= false )
86
- a,c,l = naieve_bayes (seqs,refs,k, n_bootstrap,lp)
86
+ a,c,l = naieve_bayes (seqs,refs,k, n_bootstrap,lp,taxa[:, end ] )
87
87
t = taxa[a,:]
88
88
return hcat (t,c),l
89
89
end
@@ -93,12 +93,12 @@ function assign(seq_mask,log_probs)
93
93
return findmax (cond_probs)[2 ]
94
94
end
95
95
96
- function bootstrap (kmer_vec,log_probs,assignment, sample_size,n_bootstrap)
96
+ function bootstrap (kmer_vec,log_probs,assignment, sample_size,n_bootstrap,genera )
97
97
hits = 0
98
98
seq_inds = eachindex (kmer_vec)[kmer_vec]
99
99
for i in 1 : n_bootstrap
100
100
inds = rand (seq_inds,sample_size)
101
- if assign (inds,log_probs) == assignment
101
+ if genera[ assign (inds,log_probs)] == assignment
102
102
hits += 1
103
103
end
104
104
end
@@ -160,22 +160,22 @@ end
160
160
161
161
# ## Alternatives for working without fastas
162
162
function assign_taxonomy (seqs:: Vector ,ids,refs,taxa; k = 8 , n_bootstrap = 100 ,keep_lp = false ,lp= false )
163
- assignments,log_probs = naieve_bayes (seqs,refs,taxa,k,n_bootstrap,lp)
163
+ assignments,log_probs = naieve_bayes (seqs,refs,taxa,k,n_bootstrap,lp,taxa[:, end ] )
164
164
res = classification_result (ids,seqs,assignments)
165
165
return keep_lp ? (res,log_probs) : res
166
166
end
167
167
function assign_taxonomy (seqs:: Vector ,refs:: Vector ,taxa:: Array ; k = 8 , n_bootstrap = 100 ,keep_lp = false ,lp= false )
168
- assignments,log_probs = naieve_bayes (seqs,refs,taxa,k,n_bootstrap,lp)
168
+ assignments,log_probs = naieve_bayes (seqs,refs,taxa,k,n_bootstrap,lp,taxa[:, end ] )
169
169
res = classification_result (fill (" " ,length (seqs)),seqs,assignments)
170
170
return keep_lp ? (res,log_probs) : res
171
171
end
172
172
function assign_taxonomy (seq,id,refs,taxa; k = 8 , n_bootstrap = 100 ,keep_lp = false ,lp= false )
173
- assignments,log_probs = naieve_bayes ([seq],refs,taxa,k,n_bootstrap,lp)
173
+ assignments,log_probs = naieve_bayes ([seq],refs,taxa,k,n_bootstrap,lp,taxa[:, end ] )
174
174
res = classification_result (id,seq,assignments)
175
175
return keep_lp ? (res,log_probs) : res
176
176
end
177
177
function assign_taxonomy (seq,refs,taxa; k = 8 , n_bootstrap = 100 ,keep_lp = false ,lp= false )
178
- assignments,log_probs = naieve_bayes ([seq],refs,taxa,k,n_bootstrap,lp)
178
+ assignments,log_probs = naieve_bayes ([seq],refs,taxa,k,n_bootstrap,lp,taxa[:, end ] )
179
179
res = classification_result (" " ,seq,assignments)
180
180
return keep_lp ? (res,log_probs) : res
181
181
end
0 commit comments