-
Notifications
You must be signed in to change notification settings - Fork 7
/
rl_paths.go
38 lines (28 loc) · 1.14 KB
/
rl_paths.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
// Copyright (c) 2020, The Emergent Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.
package axon
//gosl:start rl_paths
// RLPredPathParams does dopamine-modulated learning for reward prediction: Da * Send.Act
// Used by RWPath and TDPredPath within corresponding RWPredLayer or TDPredLayer
// to generate reward predictions based on its incoming weights, using linear activation
// function. Has no weight bounds or limits on sign etc.
type RLPredPathParams struct {
// how much to learn on opposite DA sign coding neuron (0..1)
OppSignLRate float32
// tolerance on DA -- if below this abs value, then DA goes to zero and there is no learning -- prevents prediction from exactly learning to cancel out reward value, retaining a residual valence of signal
DaTol float32
pad, pad1 float32
}
func (pj *RLPredPathParams) Defaults() {
pj.OppSignLRate = 1.0
}
func (pj *RLPredPathParams) Update() {
}
//gosl:end rl_paths
func (pj *PathParams) RLPredDefaults() {
pj.SWts.Adapt.SigGain = 1
pj.SWts.Init.Mean = 0
pj.SWts.Init.Var = 0
pj.SWts.Init.Sym.SetBool(false)
}