-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathStateSpace_v1.m
84 lines (66 loc) · 3.05 KB
/
StateSpace_v1.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
function [myState myMap]=StateSpace_v1(myState,myMap,opt)
% input: myState (with current ".state_history" and ".action_history"
% parameter: myMap.action1, myMap.action2, myMap.reward
% output: myState (with updated ".index", ".state_history" and ".reward_history"
% opt.use_data=0; >> normal state transition
% opt.use_data=1; >> simply using myMap.data for state transition
%% readout
current_state=myState.state_history(myState.index);
current_action=myState.action_history(myState.index);
if(opt.use_data==0)
if(myMap.IsTerminal(current_state)~=1)
%% clock+1 (index synchronization)
myMap.index=myMap.index+1; % 2,3
myState.index=myMap.index;
%% state transition
prob_mat=myMap.action(1,current_action).prob(current_state,:); %(ex) [... 0 0.3 0 0.7]
state_mat=myMap.action(1,current_action).connection(current_state,:); % (ex) [... 0 1 0 1]
[tmp state_cand]=find(state_mat==1); % a set of candidate next-states
if(rand<=prob_mat(state_cand(1))) % choose one randomly among two possible states when transitioning to a next stage
% state transition
myState.state_history(myState.index)=state_cand(1);
% reward
if(rand<=myMap.reward_prob(state_cand(1)))
myState.reward_history(myState.index)=myMap.reward(state_cand(1));
else
myState.reward_history(myState.index)=0;
end
else
% state transition
myState.state_history(myState.index)=state_cand(2);
% reward
if(rand<=myMap.reward_prob(state_cand(2)))
myState.reward_history(myState.index)=myMap.reward(state_cand(2));
else
myState.reward_history(myState.index)=0;
end
end
%% filling in SARSA matrix - (r,s')
myState.SARSA(3:4)=[myState.reward_history(myState.index) myState.state_history(myState.index)];
%% If the new state is terminal,
if(myMap.IsTerminal(myState.state_history(myState.index))==1)
myState.JobComplete=1;
myMap.JobComplete=1;
end
end
end
if(opt.use_data==1) % using saved behavior data for the state-transition
if(myMap.IsTerminal(current_state)~=1)
%% clock+1 (index synchronization)
myMap.index=myMap.index+1; % 2,3
myState.index=myMap.index;
%% state transition
state_selected=myMap.data(myMap.trial,3+myMap.index);
myState.state_history(myState.index)=state_selected;
% reward
myState.reward_history(myState.index)=myMap.data(myMap.trial,16);
%% filling in SARSA matrix - (r,s')
myState.SARSA(3:4)=[myState.reward_history(myState.index) myState.state_history(myState.index)];
%% If the new state is terminal,
if(myMap.IsTerminal(myState.state_history(myState.index))==1)
myState.JobComplete=1;
myMap.JobComplete=1;
end
end
end
end