-
Notifications
You must be signed in to change notification settings - Fork 69
/
hiddenMarkovModel.jl
163 lines (124 loc) · 4.01 KB
/
hiddenMarkovModel.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
type Hmm
e_to_i::Dict{String, Integer}
i_to_e::Dict{Integer, String}
s_to_i::Dict{String, Integer}
i_to_s::Dict{Integer, String}
pi_::Vector
A::Matrix
B::Matrix
data::Matrix
end
function normalize(x)
return x./sum(x)
end
# forward-backward algorithm for scoring and conditional probability
# Smoothing.
# Input: The HMM (state and observation maps, and probabilities)
# A list of T observations: E(0), E(1), ..., E(T-1)
#
# Ouptut: The posterior probability distribution over each state given all
# of the observations: P(X(k)|E(0), ..., E(T-1) for 0 <= k <= T-1.
#
# These distributions should be returned as a list of lists.
function scoring(model::Hmm)
n_sample = size(model.data,1)
n_state = size(model.A, 1)
alpha = zeros(n_state, n_sample)
alpha[:, 1] = vec(model.pi_) .* model.B[:,model.e_to_i[model.data[1,2]]]
for i = 2:size(data,1)
alpha[:, i] = vec(alpha[:, i-1]' * model.A) .* model.B[:,model.e_to_i[model.data[i, 2]]]
end
score_alpha = sum(alpha[:, end])
beta = zeros(n_state, n_sample)
beta[:, n_sample] = ones(3)
for i = (n_sample-1):-1:1
beta[:, i] = model.A * beta[:, i+1] .* model.B[:, model.e_to_i[model.data[i+1, 2]]]
end
score_beta = sum(beta[:, 1] .* model.pi_ .* model.B[:, model.e_to_i[model.data[1,2]]])
phi = alpha .* beta
for i = 1:size(phi,2)
phi[:,i] = normalize(phi[:, i])
end
return phi
end
# vitorbi
function matching(mode::Hmm)
n_sample = size(model.data,1)
n_state = size(model.A, 1)
phi = zeros(n_state, n_sample)
phi[:, 1] = normalize(model.pi_ .* model.B[:, model.e_to_i[model.data[1,2]]])
eta = zeros(n_state, n_sample)
eta[:, 1] = zeros(3)
for i = 2:n_sample
for j = 1:n_state
temp = phi[:, i-1] .* model.A[:, j]
eta[j,i] = indmax(temp)
phi[j,i] = maximum(temp)
end
phi[:, i] = phi[:, i] .* model.B[:, model.e_to_i[model.data[i,2]]]
phi[:, i] = normalize(phi[:, i])
end
@show eta[:, 1:10]
state_optim = zeros(n_sample)
state_optim[n_sample] = indmax(phi[:, n_sample])
for i = (n_sample-1):-1:1
state_optim[i] = eta[convert(Int,state_optim[i+1]), i+1]
end
state_optim = map(x -> model.i_to_s[convert(Int,x)], state_optim)
return state_optim
end
function learning(model::Hmm)
#learning its parameters
#pi_, A, B
#given the obs
end
function load_data(filename)
num_ = readline(filename)
num = parse(Int64, num_)
data = []
count = 1
open(filename, "r") do f
for line in readlines(f)
if count == 1
count += 1
continue
end
str = split(chomp(line), ",")
push!(data,str')
end
data = reduce(vcat,data)
end
return data
end
function test_Hmm()
X_train, X_test, y_train, y_test = make_cla()
model = Hmm()
train!(model,X_train, y_train)
predictions = predict(model,X_test)
print("classification accuracy", accuracy(y_test, predictions))
end
####################################
## there is some problem to be fixed
####################################
function test_HMM()
# state map
weatherStateMap = Dict([("sunny", 1), ("rainy", 2), ("foggy", 3)])
weatherStateIndex = Dict([(1, "sunny"), (2, "rainy"), (3, "foggy")])
# observation map
weatherObsMap = Dict([("no", 1), ("yes", 2)])
weatherObsIndex = Dict([(1, "no"), (2, "yes")])
# prior prob
weatherprob = [0.5,0.25,0.25]
# trasition probabilities
weather_trans = [0.8 0.05 0.15;
0.2 0.6 0.2;
0.2 0.3 0.5]
# obs
weather_obs = [0.9 0.1 ;0.2 0.8 ;0.7 0.3]
data = load_data("data/weather-test1-1000.txt")
model = Hmm(weatherObsMap, weatherObsIndex, weatherStateMap,
weatherStateIndex, weatherprob,
weather_trans, weather_obs, data)
phi = scoring(model)
matching_state = matching(model)
end