Skip to content

Commit 0e5db84

Browse files
committed
[Layers] Modify layers for data type
It is assumed that activations and weight are fully compotaible, so it's unnecessary to be converted to. input layer and loss layres are different, cause input data and label data is assumed to be always float 32 type now. Signed-off-by: Jiho Chu <jiho.chu@samsung.com>
1 parent cd1f7cf commit 0e5db84

18 files changed

+586
-223
lines changed

nntrainer/layers/bn_layer.cpp

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -111,6 +111,12 @@ void BatchNormalizationLayer::finalize(InitLayerContext &context) {
111111
context.requestWeight(dim, bnparams_beta, WeightRegularizer::NONE, 1.0f,
112112
bias_decay, "beta", true);
113113

114+
/**
115+
* @note declare weigth dimention with activation datatype
116+
*/
117+
TensorDim w_dim = dim;
118+
w_dim.setDataType(in_dim.getDataType());
119+
114120
/**
115121
* caches the deviation -> input - avg(input)
116122
* @todo check if avoiding this storage and adding dependency on input (no
@@ -121,7 +127,7 @@ void BatchNormalizationLayer::finalize(InitLayerContext &context) {
121127
TensorLifespan::ITERATION_LIFESPAN);
122128
/** caches the inverse standard deviation */
123129
wt_idx[BNParams::invstd] =
124-
context.requestTensor(dim, "invstd", Tensor::Initializer::NONE, false,
130+
context.requestTensor(w_dim, "invstd", Tensor::Initializer::NONE, false,
125131
TensorLifespan::ITERATION_LIFESPAN);
126132
/**
127133
* Temporary tensor to store the full sized tensors in order to allow batch
@@ -136,13 +142,13 @@ void BatchNormalizationLayer::finalize(InitLayerContext &context) {
136142
* caches variance + epsilon as well.
137143
*/
138144
wt_idx[BNParams::cvar] =
139-
context.requestTensor(dim, "cvar", Tensor::Initializer::NONE, false,
145+
context.requestTensor(w_dim, "cvar", Tensor::Initializer::NONE, false,
140146
TensorLifespan::ITERATION_LIFESPAN);
141147
/**
142148
* Temporary tensor to store the reduced tensors along the axes_to_reduce.
143149
*/
144150
wt_idx[BNParams::t_reduced] =
145-
context.requestTensor(dim, "tensor_reduced", Tensor::Initializer::NONE,
151+
context.requestTensor(w_dim, "tensor_reduced", Tensor::Initializer::NONE,
146152
false, TensorLifespan::FORWARD_DERIV_LIFESPAN);
147153
}
148154

nntrainer/layers/conv2d_layer.cpp

Lines changed: 101 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -38,7 +38,8 @@ namespace {
3838
static TensorDim calcCol2ImOutputDim(const TensorDim &out,
3939
const TensorDim &kdim) {
4040

41-
return TensorDim({kdim.getFeatureLen(), out.width() * out.height()});
41+
return TensorDim({kdim.getFeatureLen(), out.width() * out.height()},
42+
out.getTensorType());
4243
}
4344

4445
/**
@@ -56,7 +57,10 @@ static void col2im(const Tensor &col_matrix, const TensorDim &kdim,
5657
const std::array<props::Stride, CONV2D_DIM> &mstride,
5758
const std::array<props::Dilation, CONV2D_DIM> &dilation,
5859
Tensor &image) {
59-
auto [pt, pb, pl, pr] = padding;
60+
auto pt = padding[0];
61+
auto pb = padding[1];
62+
auto pl = padding[2];
63+
auto pr = padding[3];
6064

6165
unsigned k_height = kdim.height();
6266
unsigned k_width = kdim.width();
@@ -84,32 +88,48 @@ static void col2im(const Tensor &col_matrix, const TensorDim &kdim,
8488
int h_stride_end = im_eff_height - eff_k_height - pt;
8589
int w_stride_end = im_eff_width - eff_k_width - pl;
8690

87-
unsigned col_w = 0;
88-
for (int hs = -pt; hs <= h_stride_end; hs += hstride) {
89-
for (int ws = -pl; ws <= w_stride_end; ws += wstride) {
90-
unsigned col_h = 0;
91-
int patch_height_end = hs + eff_k_height;
92-
int patch_width_end = ws + eff_k_width;
93-
for (unsigned c = 0; c < im_channel; c++) {
94-
for (int h = hs; h < patch_height_end; h += hdilation) {
95-
if (h < 0 || im_height <= h) {
96-
col_h += k_width;
97-
continue;
98-
}
99-
for (int w = ws; w < patch_width_end; w += wdilation) {
100-
if (w < 0 || im_width <= w) {
101-
col_h++;
91+
auto apply_data = [&]<typename T>(T *val) {
92+
unsigned col_w = 0;
93+
for (int hs = -pt; hs <= h_stride_end; hs += hstride) {
94+
for (int ws = -pl; ws <= w_stride_end; ws += wstride) {
95+
unsigned col_h = 0;
96+
int patch_height_end = hs + eff_k_height;
97+
int patch_width_end = ws + eff_k_width;
98+
for (unsigned c = 0; c < im_channel; c++) {
99+
for (int h = hs; h < patch_height_end; h += hdilation) {
100+
if (h < 0 || im_height <= h) {
101+
col_h += k_width;
102102
continue;
103103
}
104-
105-
float *val = image.getAddress<float>(0, c, h, w);
106-
*val += col_matrix.getValue<float>(0, 0, col_h, col_w);
107-
col_h++;
104+
for (int w = ws; w < patch_width_end; w += wdilation) {
105+
if (w < 0 || im_width <= w) {
106+
col_h++;
107+
continue;
108+
}
109+
110+
val = image.getAddress<T>(0, c, h, w);
111+
*val += col_matrix.getValue<T>(0, 0, col_h, col_w);
112+
col_h++;
113+
}
108114
}
109115
}
116+
col_w++;
110117
}
111-
col_w++;
112118
}
119+
};
120+
121+
if (image.getDataType() == nntrainer::Tdatatype::FP32) {
122+
float val;
123+
apply_data(&val);
124+
}
125+
#ifdef ENABLE_FP16
126+
else if (image.getDataType() == nntrainer::Tdatatype::FP16) {
127+
_FP16 val;
128+
apply_data(&val);
129+
}
130+
#endif
131+
else {
132+
throw std::runtime_error("Not supported datatype");
113133
}
114134
}
115135

@@ -179,7 +199,10 @@ static void im2col(const Tensor &in, const TensorDim &kdim,
179199
// }
180200
*/
181201

182-
auto [pt, pb, pl, pr] = padding;
202+
auto pt = padding[0];
203+
auto pb = padding[1];
204+
auto pl = padding[2];
205+
auto pr = padding[3];
183206

184207
unsigned int channel = in.channel();
185208
int in_height = in.height();
@@ -198,46 +221,62 @@ static void im2col(const Tensor &in, const TensorDim &kdim,
198221
unsigned int out_width = (width - eff_k_width) / mstride[1] + 1;
199222

200223
out.reshape(
201-
TensorDim({out_height * out_width, in.channel() * k_height * k_width}));
202-
float *out_data = out.getData();
203-
204-
int h_stride_end = height - eff_k_height - pt;
205-
int w_stride_end = width - eff_k_width - pl;
206-
207-
/// get a patch, size of kernel
208-
/// hs is height_strided, ws is width_strided
209-
unsigned int owidth = out.width();
210-
unsigned int base_im_w = 0;
211-
for (int hs = -pt; hs <= h_stride_end; hs += mstride[0]) {
212-
unsigned int base_im_h = 0;
213-
int patch_height_end = eff_k_height + hs;
214-
/// map the patch to a single line looping through channel
215-
for (unsigned int c = 0; c < channel; ++c) {
216-
for (int h = hs; h < patch_height_end; h += dilation[0]) {
217-
if (h < 0 || in_height <= h) {
218-
base_im_h += k_width;
219-
continue;
220-
}
221-
222-
unsigned int im_w = base_im_w;
223-
for (int ws = -pl; ws <= w_stride_end; ws += mstride[1]) {
224-
unsigned int im_h = base_im_h;
225-
int patch_width_end = eff_k_width + ws;
224+
TensorDim({out_height * out_width, in.channel() * k_height * k_width},
225+
in.getTensorType()));
226+
227+
auto apply_data = [&]<typename T>(T *out_data) {
228+
int h_stride_end = height - eff_k_height - pt;
229+
int w_stride_end = width - eff_k_width - pl;
230+
231+
/// get a patch, size of kernel
232+
/// hs is height_strided, ws is width_strided
233+
unsigned int owidth = out.width();
234+
unsigned int base_im_w = 0;
235+
for (int hs = -pt; hs <= h_stride_end; hs += mstride[0]) {
236+
unsigned int base_im_h = 0;
237+
int patch_height_end = eff_k_height + hs;
238+
/// map the patch to a single line looping through channel
239+
for (unsigned int c = 0; c < channel; ++c) {
240+
for (int h = hs; h < patch_height_end; h += dilation[0]) {
241+
if (h < 0 || in_height <= h) {
242+
base_im_h += k_width;
243+
continue;
244+
}
226245

227-
for (int w = ws; w < patch_width_end; w += dilation[1]) {
228-
if (w < 0 || in_width <= w) {
246+
unsigned int im_w = base_im_w;
247+
for (int ws = -pl; ws <= w_stride_end; ws += mstride[1]) {
248+
unsigned int im_h = base_im_h;
249+
int patch_width_end = eff_k_width + ws;
250+
251+
for (int w = ws; w < patch_width_end; w += dilation[1]) {
252+
if (w < 0 || in_width <= w) {
253+
im_h++;
254+
continue;
255+
}
256+
out_data[im_w * owidth + im_h] = in.getValue<T>(0, c, h, w);
229257
im_h++;
230-
continue;
231258
}
232-
out_data[im_w * owidth + im_h] = in.getValue<float>(0, c, h, w);
233-
im_h++;
259+
im_w++;
234260
}
235-
im_w++;
261+
base_im_h += k_width;
236262
}
237-
base_im_h += k_width;
238263
}
264+
base_im_w += out_width;
239265
}
240-
base_im_w += out_width;
266+
};
267+
268+
if (out.getDataType() == nntrainer::Tdatatype::FP32) {
269+
float *out_data = out.getData<float>();
270+
apply_data(out_data);
271+
}
272+
#ifdef ENABLE_FP16
273+
else if (out.getDataType() == nntrainer::Tdatatype::FP16) {
274+
_FP16 *out_data = out.getData<_FP16>();
275+
apply_data(out_data);
276+
}
277+
#endif
278+
else {
279+
throw std::runtime_error("Not supported datatype");
241280
}
242281
}
243282

@@ -279,9 +318,11 @@ void Conv2DLayer::finalize(InitLayerContext &context) {
279318
auto &dilation =
280319
std::get<std::array<props::Dilation, CONV2D_DIM>>(conv_props);
281320

282-
TensorDim kernel_dim =
283-
TensorDim(filter_size, in_dim.channel(), kernel_size[0], kernel_size[1]);
284-
TensorDim bias_dim = TensorDim(1, filter_size, 1, 1);
321+
auto in_t_type = in_dim.getTensorType();
322+
in_t_type.data_type = context.getWeightDataType();
323+
TensorDim kernel_dim = TensorDim(filter_size, in_dim.channel(),
324+
kernel_size[0], kernel_size[1], in_t_type);
325+
TensorDim bias_dim = TensorDim(1, filter_size, 1, 1, in_t_type);
285326

286327
padding = std::get<props::Padding2D>(conv_props)
287328
.compute(in_dim, kernel_dim, {stride[0], stride[1]},
@@ -309,6 +350,7 @@ void Conv2DLayer::finalize(InitLayerContext &context) {
309350
out_dim.channel(filter_size);
310351
out_dim.height((eff_in_height - eff_k_height) / stride[0] + 1);
311352
out_dim.width((eff_in_width - eff_k_width) / stride[1] + 1);
353+
out_dim.setTensorType(in_dim.getTensorType());
312354
context.setOutputDimensions({out_dim});
313355

314356
NNTR_THROW_IF(eff_in_height < kernel_size[0] || eff_in_width < kernel_size[1],

nntrainer/layers/input_layer.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -33,8 +33,7 @@ namespace nntrainer {
3333
static constexpr size_t SINGLE_INOUT_IDX = 0;
3434

3535
InputLayer::InputLayer() :
36-
Layer(),
37-
input_props(props::Normalization(), props::Standardization()) {}
36+
Layer(), input_props(props::Normalization(), props::Standardization()) {}
3837

3938
void InputLayer::setProperty(const std::vector<std::string> &values) {
4039
auto remain_props = loadProperties(values, input_props);
@@ -47,7 +46,7 @@ void InputLayer::forwarding(RunLayerContext &context, bool training) {
4746
Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);
4847
if (!context.executeInPlace()) {
4948
Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
50-
hidden_.copy(input_);
49+
hidden_.copyData(input_);
5150
}
5251

5352
if (std::get<props::Normalization>(input_props))
@@ -70,6 +69,10 @@ void InputLayer::finalize(InitLayerContext &context) {
7069

7170
std::vector<TensorDim> output_dims = context.getInputDimensions();
7271

72+
NNTR_THROW_IF(output_dims.size() != 1, std::invalid_argument);
73+
74+
output_dims[0].setTensorType(
75+
{context.getFormat(), context.getActivationDataType()});
7376
context.setOutputDimensions(output_dims);
7477
}
7578

nntrainer/layers/layer_context.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -152,6 +152,16 @@ Tensor &RunLayerContext::getWeight(unsigned int idx) const {
152152
return weights[idx]->getVariableRef();
153153
}
154154

155+
/**
156+
* @brief Get the Weight tensor object
157+
*
158+
* @param idx Identifier of the weight
159+
* @return Tensor& Reference to the weight tensor
160+
*/
161+
Tensor *RunLayerContext::getWeightMaster(unsigned int idx) const {
162+
return weights[idx]->getVariableMasterRef();
163+
}
164+
155165
/**
156166
* @brief Get the Weight Gradient tensor object
157167
*
@@ -177,6 +187,18 @@ Tensor &RunLayerContext::getWeightOptVar(unsigned int idx,
177187
return weights[idx]->getOptimizerVariableRef(jdx);
178188
}
179189

190+
/**
191+
* @brief Get the Weight Optimizer Variable tensor object
192+
*
193+
* @param idx Identifier of the weight
194+
* @param jdx Identifier of the optimizer variables
195+
* @return Tensor& Reference to the weight optimizer variable tensor
196+
*/
197+
Tensor &RunLayerContext::getWeightOptMasterVar(unsigned int idx,
198+
unsigned int jdx) const {
199+
return weights[idx]->getOptimizerMasterVariableRef(jdx);
200+
}
201+
180202
/**
181203
* @brief Get the Number of Weight Optimizer Variable tensor object
182204
*
@@ -187,6 +209,16 @@ unsigned int RunLayerContext::getNumWeightOptVar(unsigned int idx) const {
187209
return weights[idx]->getNumOptVariable();
188210
}
189211

212+
/**
213+
* @brief Get the Number of Weight Optimizer Variable tensor object
214+
*
215+
* @param idx Identifier of the weight
216+
* @return int Number of the weight optimizer variable
217+
*/
218+
unsigned int RunLayerContext::getNumWeightOptMasterVar(unsigned int idx) const {
219+
return weights[idx]->getNumOptMasterVariable();
220+
}
221+
190222
/**
191223
* @brief Get regularization loss for the weight
192224
*
@@ -326,6 +358,25 @@ Tensor &RunLayerContext::getOutgoingDerivative(unsigned int idx) {
326358
return getInputGrad(idx);
327359
}
328360

361+
bool RunLayerContext::validateDerivatives() {
362+
auto num_in = getNumInputs();
363+
auto num_out = getNumOutputs();
364+
365+
for (unsigned int i = 0; i < num_in; ++i) {
366+
auto deriv = getIncomingDerivative(i);
367+
if (deriv.checkDataValidation(false) == false)
368+
return false;
369+
}
370+
371+
for (unsigned int i = 0; i < num_out; ++i) {
372+
auto deriv = getOutgoingDerivative(i);
373+
if (deriv.checkDataValidation(false) == false)
374+
return false;
375+
}
376+
377+
return true;
378+
}
379+
329380
/**
330381
* @brief Get the Tensor object
331382
*

0 commit comments

Comments
 (0)