-
Notifications
You must be signed in to change notification settings - Fork 1
/
next_env_0.m
70 lines (60 loc) · 1.95 KB
/
next_env_0.m
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
function [env, s, o, r] = next_env_0(env, a)
% update environment after taking action
% compute next state, observation, and reward
% track 2 only
r = 0;
assert(a == 1); % force to be Pavlovian
if env.s == env.ITI
% trial start
if env.started && rand() < 1 - 1.0/env.ITI_len % expectation of geometric distribution
% self-transition to simulate expo ITIs
% notice we do this only at end of trial, that is, the order is ISI->ITI
nexts = [env.ITI];
obs = [1];
rews = [0];
elseif ~env.started
% start of trial
env.started = true;
nexts = [env.first];
obs = [2]; % odor
rews = [0];
else
% trial is ending (started was true, and ITI is done)
env.ended = true;
nexts = [env.ITI];
obs = [1];
rews = [0];
end
else
% mid trial
if env.s+1 == env.ISI_len
% if next state is end of ISI, and if keep running, give reward (unless omission) and go to ITI
% notice this means we never really reach the state with the reward -- see next_env_1 for justification; might change here th
nexts = [env.ITI];
obs = [1];
if env.omission
rews = [0];
else
rews = [1]; % reward
end
elseif env.s == env.last
% end of track
assert(false); % this should never happen -- trial always ends
nexts = [NaN];
obs = [NaN];
rews = [NaN];
else
% next state (if running)
nexts = [env.s+1 env.ITI];
obs = [1];
rews = [0];
end
end
s = nexts(a);
r = rews(a);
o = obs(a);
env.s = s;
env.o = o;
env.r = r;
assert(env.s >= 1 && env.s <= env.nS);
assert(env.o >= 1 && env.o <= env.nO);