Skip to content

Commit 9468bbc

Browse files
committed
[Mixed] Mixed Precision Layer update
This PR is to update the mixed precision layer. - integrate nnstreamer#2568 & nnstreamer#2455 - will update more test **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Donghak PARK <donghak.park@samsung.com>
1 parent 26b75ad commit 9468bbc

12 files changed

+37
-315
lines changed

nntrainer/layers/layer_context.cpp

Lines changed: 0 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -156,16 +156,6 @@ Tensor &RunLayerContext::getWeight(unsigned int idx) const {
156156
return weights[idx]->getVariableRef();
157157
}
158158

159-
/**
160-
* @brief Get the Weight tensor object
161-
*
162-
* @param idx Identifier of the weight
163-
* @return Tensor& Reference to the weight tensor
164-
*/
165-
Tensor *RunLayerContext::getWeightMaster(unsigned int idx) const {
166-
return weights[idx]->getVariableMasterRef();
167-
}
168-
169159
/**
170160
* @brief Get the Weight Gradient tensor object
171161
*
@@ -204,18 +194,6 @@ Tensor &RunLayerContext::getWeightOptVar(unsigned int idx,
204194
return weights[idx]->getOptimizerVariableRef(jdx);
205195
}
206196

207-
/**
208-
* @brief Get the Weight Optimizer Variable tensor object
209-
*
210-
* @param idx Identifier of the weight
211-
* @param jdx Identifier of the optimizer variables
212-
* @return Tensor& Reference to the weight optimizer variable tensor
213-
*/
214-
Tensor &RunLayerContext::getWeightOptMasterVar(unsigned int idx,
215-
unsigned int jdx) const {
216-
return weights[idx]->getOptimizerMasterVariableRef(jdx);
217-
}
218-
219197
/**
220198
* @brief Get the Number of Weight Optimizer Variable tensor object
221199
*
@@ -226,16 +204,6 @@ unsigned int RunLayerContext::getNumWeightOptVar(unsigned int idx) const {
226204
return weights[idx]->getNumOptVariable();
227205
}
228206

229-
/**
230-
* @brief Get the Number of Weight Optimizer Variable tensor object
231-
*
232-
* @param idx Identifier of the weight
233-
* @return int Number of the weight optimizer variable
234-
*/
235-
unsigned int RunLayerContext::getNumWeightOptMasterVar(unsigned int idx) const {
236-
return weights[idx]->getNumOptMasterVariable();
237-
}
238-
239207
/**
240208
* @brief Get regularization loss for the weight
241209
*
@@ -375,25 +343,6 @@ Tensor &RunLayerContext::getOutgoingDerivative(unsigned int idx) {
375343
return getInputGrad(idx);
376344
}
377345

378-
bool RunLayerContext::validateDerivatives() {
379-
auto num_in = getNumInputs();
380-
auto num_out = getNumOutputs();
381-
382-
for (unsigned int i = 0; i < num_in; ++i) {
383-
auto deriv = getIncomingDerivative(i);
384-
if (deriv.checkDataValidation(false) == false)
385-
return false;
386-
}
387-
388-
for (unsigned int i = 0; i < num_out; ++i) {
389-
auto deriv = getOutgoingDerivative(i);
390-
if (deriv.checkDataValidation(false) == false)
391-
return false;
392-
}
393-
394-
return true;
395-
}
396-
397346
/**
398347
* @brief Get the Tensor object
399348
*

nntrainer/layers/layer_context.h

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -453,14 +453,6 @@ class RunLayerContext {
453453
*/
454454
Tensor &getWeight(unsigned int idx) const;
455455

456-
/**
457-
* @brief Get the Weight master tensor object
458-
*
459-
* @param idx Identifier of the weight
460-
* @return Tensor& Reference to the weight tensor
461-
*/
462-
Tensor *getWeightMaster(unsigned int idx) const;
463-
464456
/**
465457
* @brief Get the Weight Gradient tensor object
466458
*
@@ -488,15 +480,6 @@ class RunLayerContext {
488480
*/
489481
Tensor &getWeightOptVar(unsigned int idx, unsigned int jdx) const;
490482

491-
/**
492-
* @brief Get the Weight Optimizer Master Variable tensor object
493-
*
494-
* @param idx Identifier of the weight
495-
* @param jdx Identifier of the weight optimizer master variable
496-
* @return Tensor& Reference to the weight optimizer tensor
497-
*/
498-
Tensor &getWeightOptMasterVar(unsigned int idx, unsigned int jdx) const;
499-
500483
/**
501484
* @brief Get the Weight name
502485
*
@@ -607,11 +590,6 @@ class RunLayerContext {
607590
*/
608591
Tensor &getOutgoingDerivative(unsigned int idx);
609592

610-
/**
611-
* @brief validate input/output derivatives of the layer
612-
*/
613-
bool validateDerivatives();
614-
615593
/**
616594
* @brief Get the Tensor object
617595
*
@@ -727,29 +705,13 @@ class RunLayerContext {
727705
*/
728706
unsigned int getNumWeightOptVar(unsigned int idx) const;
729707

730-
/**
731-
* @brief Get the Number of Weight Optimizer Variable tensor object
732-
*
733-
* @param idx Identifier of the weight
734-
* @return unsigned int Number of the weight optimizer variable
735-
*/
736-
unsigned int getNumWeightOptMasterVar(unsigned int idx) const;
737-
738708
/**
739709
* @brief Get the number of requested tensors objects
740710
*
741711
* @return unsigned int number of requested tensors
742712
*/
743713
unsigned int getNumTensors() const { return tensors.size(); }
744714

745-
/**
746-
* @brief Set the Weight Optimizer Variable tensor object
747-
*
748-
* @param idx Identifier of the weight
749-
* @param jdx Identifier of the weight optimizer variable
750-
*/
751-
void setWeightOptVars(unsigned int idx, std::vector<Tensor *> opts);
752-
753715
/**
754716
* @brief Set the batch for the run context
755717
*

nntrainer/layers/layer_node.cpp

Lines changed: 4 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
#include <cmath>
1717
#include <iterator>
1818
#include <stdexcept>
19-
#include <tuple>
2019
#include <utility>
2120

2221
#include <activation_layer.h>
@@ -466,24 +465,16 @@ void LayerNode::read(std::ifstream &file, bool opt_var) {
466465
for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {
467466
if (run_context->isGradientLastAccess(i) && getTrainable()) {
468467
/// @note read optimizer variables
469-
auto num_w_opt_m = run_context->getNumWeightOptMasterVar(i);
470468
for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i); ++j) {
471-
if (num_w_opt_m > 0)
472-
run_context->getWeightOptMasterVar(i, j).read(file);
473-
else
474-
run_context->getWeightOptVar(i, j).read(file);
469+
run_context->getWeightOptVar(i, j).read(file);
475470
}
476471
}
477472
}
478473
} else {
479474
for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {
480475
/// @note shared weights are only be read at the first acecss
481476
if (run_context->isGradientLastAccess(i)) {
482-
auto w = run_context->getWeightMaster(i);
483-
if (w)
484-
w->read(file);
485-
else
486-
run_context->getWeight(i).read(file);
477+
run_context->getWeight(i).read(file);
487478
}
488479
}
489480
}
@@ -498,13 +489,9 @@ void LayerNode::save(std::ofstream &file, bool opt_var) const {
498489
if (run_context->isGradientLastAccess(i) && getTrainable()) {
499490
// @note save optimizer variables
500491
if (run_context->weightHasGradient(i)) {
501-
auto num_w_opt_m = run_context->getNumWeightOptMasterVar(i);
502492
for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i);
503493
++j) {
504-
if (num_w_opt_m > 0)
505-
run_context->getWeightOptMasterVar(i, j).save(file);
506-
else
507-
run_context->getWeightOptVar(i, j).save(file);
494+
run_context->getWeightOptVar(i, j).save(file);
508495
}
509496
}
510497
}
@@ -513,13 +500,7 @@ void LayerNode::save(std::ofstream &file, bool opt_var) const {
513500
// @note shared weights are only be saved at the first access
514501
for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {
515502
if (run_context->isGradientLastAccess(i)) {
516-
if (run_context->getNumWeights()) {
517-
auto w = run_context->getWeightMaster(i);
518-
if (w)
519-
w->save(file);
520-
else
521-
run_context->getWeight(i).save(file);
522-
}
503+
run_context->getWeight(i).save(file);
523504
}
524505
}
525506
}

nntrainer/layers/layer_node.h

Lines changed: 0 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -899,11 +899,6 @@ class LayerNode final : public ml::train::Layer, public GraphNode {
899899
*/
900900
bool needsCalcGradient() { return needs_calc_gradient; }
901901

902-
/**
903-
* @brief Set loss scale factor
904-
*/
905-
void setLossScale(float scale) { layer->setLossScale(scale); }
906-
907902
private:
908903
/**
909904
* @brief Get the Input Layers object

nntrainer/layers/loss/cross_entropy_sigmoid_loss_layer.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,6 @@ void CrossEntropySigmoidLossLayer::calcDerivative(RunLayerContext &context) {
6161
Tensor &y = context.getInput(SINGLE_INOUT_IDX);
6262

6363
y.apply<float>(ActiFunc::sigmoid<float>, ret_derivative);
64-
65-
applyLossScale(ret_derivative);
66-
6764
ret_derivative.subtract_i(y2);
6865
if (ret_derivative.divide_i(ret_derivative.size()) != ML_ERROR_NONE) {
6966
throw std::runtime_error("[CrossEntropySigmoidLossLayer::calcDerivative] "

nntrainer/layers/loss/cross_entropy_softmax_loss_layer.cpp

Lines changed: 7 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,9 @@ void CrossEntropySoftmaxLossLayer::forwarding(RunLayerContext &context,
3030
Tensor &y = context.getInput(SINGLE_INOUT_IDX);
3131

3232
// fill the output
33-
auto out_type = hidden_.getDataType();
34-
if (out_type == ml::train::TensorDim::DataType::FP32) {
35-
if (y.getDataType() != out_type) {
36-
Tensor y_ = y.clone(out_type);
37-
hidden_ = y_.apply(ActiFunc::softmax<float>, hidden_);
38-
} else {
39-
hidden_ = y.apply(ActiFunc::softmax<float>, hidden_);
40-
}
33+
auto dataType = y.getDataType();
34+
if (dataType == ml::train::TensorDim::DataType::FP32) {
35+
hidden_ = y.apply(ActiFunc::softmax<float>, hidden_);
4136

4237
if (context.isLabelAvailable(SINGLE_INOUT_IDX)) {
4338
Tensor &y2 = context.getLabel(SINGLE_INOUT_IDX);
@@ -48,14 +43,9 @@ void CrossEntropySoftmaxLossLayer::forwarding(RunLayerContext &context,
4843
// update the loss value
4944
LossLayer::updateLoss(context, l);
5045
}
51-
} else if (out_type == ml::train::TensorDim::DataType::FP16) {
46+
} else if (dataType == ml::train::TensorDim::DataType::FP16) {
5247
#ifdef ENABLE_FP16
53-
if (y.getDataType() != out_type) {
54-
Tensor y_ = y.clone(out_type);
55-
hidden_ = y_.apply(ActiFunc::softmax<_FP16>, hidden_);
56-
} else {
57-
hidden_ = y.apply(ActiFunc::softmax<_FP16>, hidden_);
58-
}
48+
hidden_ = y.apply(ActiFunc::softmax<_FP16>, hidden_);
5949

6050
if (context.isLabelAvailable(SINGLE_INOUT_IDX)) {
6151
Tensor &y2 = context.getLabel(SINGLE_INOUT_IDX);
@@ -78,8 +68,7 @@ void CrossEntropySoftmaxLossLayer::calcDerivative(RunLayerContext &context) {
7868
Tensor &y = context.getInput(SINGLE_INOUT_IDX);
7969

8070
auto dataType = y.getDataType();
81-
82-
Tensor ret(y.getDim());
71+
Tensor ret = Tensor("ret", y.getFormat(), y.getDataType());
8372
if (dataType == ml::train::TensorDim::DataType::FP32) {
8473
y.apply(ActiFunc::softmax<float>, ret);
8574
} else if (dataType == ml::train::TensorDim::DataType::FP16) {
@@ -94,18 +83,7 @@ void CrossEntropySoftmaxLossLayer::calcDerivative(RunLayerContext &context) {
9483
/// operation
9584
// TODO: verify y and ret_derivative must not be same as loss layer is not
9685
// working in-place
97-
if (ret.getDataType() != y2.getDataType()) {
98-
ret.subtract(y2.clone(ret.getDataType()), ret_derivative);
99-
} else {
100-
ret.subtract(y2, ret_derivative);
101-
}
102-
103-
/**
104-
* loss scale is applied for mixed precision
105-
* every loss layers need to specify this applying code.
106-
*/
107-
applyLossScale(ret_derivative);
108-
86+
ret.subtract(y2, ret_derivative);
10987
if (ret_derivative.divide_i(ret.batch()) != ML_ERROR_NONE) {
11088
throw std::runtime_error("[CrossEntropySoftmaxLossLayer::calcDerivative] "
11189
"Error when calculating loss");

nntrainer/layers/loss/loss_layer.cpp

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,17 +15,14 @@
1515
#include <loss_layer.h>
1616

1717
namespace nntrainer {
18-
19-
LossLayer::LossLayer() : Layer(), loss_scale(0.0f) {}
20-
2118
void LossLayer::finalize(InitLayerContext &context) {
2219
std::vector<TensorDim> input_dim = context.getInputDimensions();
2320
std::vector<TensorDim> output_dim = input_dim;
2421
for (auto &d : output_dim)
2522
d.setDataType(
2623
str_converter<enum_class_prop_tag,
2724
nntrainer::TensorDataTypeInfo>::from_string("FP32"));
28-
25+
2926
context.setOutputDimensions(output_dim);
3027
}
3128

nntrainer/layers/loss/loss_layer.h

Lines changed: 0 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,6 @@ namespace nntrainer {
2727
*/
2828
class LossLayer : public Layer {
2929
public:
30-
/**
31-
* @brief Constructor of Loss Layer
32-
*/
33-
LossLayer();
34-
3530
/**
3631
* @brief Destructor of Loss Layer
3732
*/
@@ -52,19 +47,11 @@ class LossLayer : public Layer {
5247
*/
5348
virtual bool supportBackwarding() const override { return true; }
5449

55-
/**
56-
* @brief Set loss scale factor
57-
*/
58-
virtual void setLossScale(float scale) override { loss_scale = scale; }
59-
60-
private:
6150
/**
6251
* @copydoc Layer::requireLabel()
6352
*/
6453
bool requireLabel() const override { return true; }
6554

66-
float loss_scale; /**< loss scale factor */
67-
6855
protected:
6956
/**
7057
* @brief update loss
@@ -73,14 +60,6 @@ class LossLayer : public Layer {
7360
*/
7461
void updateLoss(RunLayerContext &context, const Tensor &l);
7562

76-
/**
77-
* @brief apply loss scale
78-
*/
79-
void applyLossScale(Tensor &derivative) {
80-
if (loss_scale != 0.0f)
81-
derivative.multiply_i(loss_scale);
82-
}
83-
8463
Tensor
8564
l; /**< loss tensor to store intermediate value to calculate loss value */
8665
};

0 commit comments

Comments
 (0)