-
Notifications
You must be signed in to change notification settings - Fork 0
/
NN.h
170 lines (142 loc) · 4.23 KB
/
NN.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
#include <stdio.h>
#include "ILayer.h"
#include "costs.cuh"
#include "functionality.h"
#include "DenseConnections.h"
#include "NeatConnections.h"
#include "NeuronLayer.h"
#include "LSTMLayer.h"
#include "kernel_macros.h"
#include "NN_enums.h"
#include "costs.cuh"
#include "GAE.cuh"
#include "neuron_operations.cuh"
#include "evolution_info.h"
#pragma once
class NN
{
private:
ILayer **layers = 0;
size_t layer_count = 0;
size_t neuron_count = 0;
size_t input_length = 0;
size_t output_length = 0;
size_t* output_activations_start = 0;
size_t execution_value_count = 0;
size_t derivative_count = 0;
short contains_recurrent_layers = 0;
size_t gradient_count = 0;
// Now state derivatives are 1 (variable) by default so no need to save derivatives
//data_t* activations_since_memory_deletion = 0;
//data_t* execution_values_since_memory_deletion = 0;
//data_t* derivatives_since_memory_deletion = 0;
//size_t since_memory_deletion_t_count = 0;
protected:
void set_fields();
public:
NN();
evolution_metadata evolution_values;
bool stateful = false;
size_t get_input_length();
size_t get_output_length();
~NN();
NN(ILayer** layers, size_t input_length, size_t layer_count);
void execute(data_t* input, data_t* execution_values, data_t *activations, size_t t, data_t* output_start_pointer, short copy_output_to_host);
void set_up_execution_arrays(data_t** execution_values, data_t** activations, size_t t_count);
data_t* batch_execute(data_t* input, size_t t_count);
data_t* inference_execute(data_t* input);
data_t adjust_learning_rate(
data_t learning_rate,
data_t cost,
LearningRateAdjusters adjuster,
data_t max_learning_rate,
data_t previous_cost = 0
);
data_t calculate_output_costs(
CostFunctions cost_function,
size_t t_count,
data_t* Y_hat,
data_t* activations, size_t activations_start,
data_t* costs, size_t costs_start
);
void training_execute(
size_t t_count,
data_t* X,
data_t** Y,
bool copy_Y_to_host,
data_t** execution_values,
data_t** activations,
size_t old_arrays_t_length = 0
);
data_t train(
size_t t_count,
data_t* execution_values,
data_t* activations,
data_t* Y_hat,
bool is_Y_hat_on_host_memory,
size_t Y_hat_value_count,
CostFunctions cost_function,
data_t learning_rate,
data_t gradient_clip,
float dropout_rate
);
data_t training_batch(
size_t t_count,
data_t* X,
data_t* Y_hat,
bool is_Y_hat_on_host_memory,
size_t Y_hat_value_count,
CostFunctions cost_function,
data_t learning_rate,
data_t** Y,
bool copy_Y_to_host,
data_t gradient_clip,
float dropout_rate = .2
);
/// <param name="gradients">- pointer to cero and to a valid array are valid</param>
void backpropagate(
size_t t_count,
data_t* costs,
data_t* activations,
data_t* execution_values,
data_t** gradients
);
void calculate_derivatives(
data_t* activations, size_t activations_start,
data_t* derivatives, size_t previous_derivatives_start, size_t derivatives_start,
data_t* execution_values, size_t execution_values_start
);
void calculate_gradients(
data_t* activations, size_t activations_start,
data_t* execution_values, size_t execution_values_start,
data_t* costs, size_t costs_start,
data_t* gradients, size_t gradients_start, size_t next_gradients_start,
data_t* derivatives, size_t derivatives_start, size_t previous_derivatives_start
);
data_t *calculate_GAE_advantage(
size_t t_count,
data_t gamma, data_t lambda,
NN *value_function_estimator, data_t *value_function_state,
data_t *rewards
);
void subtract_gradients(
data_t* gradients, size_t gradients_start, data_t learning_rate, float dropout_rate, data_t gradient_clip
);
void evolve();
void add_layer(size_t insert_i, ILayer* layer);
void add_output_neuron();
void add_input_neuron();
void add_neuron(size_t layer_i);
/// <param name="neuron_i">in respect to the whole network</param>
void adjust_to_added_neuron(int layer_i, size_t neuron_i);
void remove_neuron(size_t layer_i);
void remove_neuron(size_t layer_i, size_t layer_neuron_i);
void delete_memory();
NN* clone();
void save(const char *pathname);
void save(FILE* file);
static NN* load(const char *pathname, bool load_state = true);
static NN* load(FILE* file);
void deallocate();
void print_shape();
};