Skip to content

Commit 1d4b135

Browse files
committed
[Mixed] Mixed Precision Layer update
This PR is to update the mixed precision layer. - integrate nnstreamer#2568 & nnstreamer#2455 - will update more test **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Donghak PARK <donghak.park@samsung.com>
1 parent 600f996 commit 1d4b135

12 files changed

+38
-292
lines changed

nntrainer/layers/layer_context.cpp

Lines changed: 0 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -157,16 +157,6 @@ Tensor &RunLayerContext::getWeight(unsigned int idx) const {
157157
return weights[idx]->getVariableRef();
158158
}
159159

160-
/**
161-
* @brief Get the Weight tensor object
162-
*
163-
* @param idx Identifier of the weight
164-
* @return Tensor& Reference to the weight tensor
165-
*/
166-
Tensor *RunLayerContext::getWeightMaster(unsigned int idx) const {
167-
return weights[idx]->getVariableMasterRef();
168-
}
169-
170160
/**
171161
* @brief Get the Weight Gradient tensor object
172162
*
@@ -205,18 +195,6 @@ Tensor &RunLayerContext::getWeightOptVar(unsigned int idx,
205195
return weights[idx]->getOptimizerVariableRef(jdx);
206196
}
207197

208-
/**
209-
* @brief Get the Weight Optimizer Variable tensor object
210-
*
211-
* @param idx Identifier of the weight
212-
* @param jdx Identifier of the optimizer variables
213-
* @return Tensor& Reference to the weight optimizer variable tensor
214-
*/
215-
Tensor &RunLayerContext::getWeightOptMasterVar(unsigned int idx,
216-
unsigned int jdx) const {
217-
return weights[idx]->getOptimizerMasterVariableRef(jdx);
218-
}
219-
220198
/**
221199
* @brief Get the Number of Weight Optimizer Variable tensor object
222200
*
@@ -227,16 +205,6 @@ unsigned int RunLayerContext::getNumWeightOptVar(unsigned int idx) const {
227205
return weights[idx]->getNumOptVariable();
228206
}
229207

230-
/**
231-
* @brief Get the Number of Weight Optimizer Variable tensor object
232-
*
233-
* @param idx Identifier of the weight
234-
* @return int Number of the weight optimizer variable
235-
*/
236-
unsigned int RunLayerContext::getNumWeightOptMasterVar(unsigned int idx) const {
237-
return weights[idx]->getNumOptMasterVariable();
238-
}
239-
240208
/**
241209
* @brief Get regularization loss for the weight
242210
*
@@ -376,25 +344,6 @@ Tensor &RunLayerContext::getOutgoingDerivative(unsigned int idx) {
376344
return getInputGrad(idx);
377345
}
378346

379-
bool RunLayerContext::validateDerivatives() {
380-
auto num_in = getNumInputs();
381-
auto num_out = getNumOutputs();
382-
383-
for (unsigned int i = 0; i < num_in; ++i) {
384-
auto deriv = getIncomingDerivative(i);
385-
if (deriv.checkDataValidation(false) == false)
386-
return false;
387-
}
388-
389-
for (unsigned int i = 0; i < num_out; ++i) {
390-
auto deriv = getOutgoingDerivative(i);
391-
if (deriv.checkDataValidation(false) == false)
392-
return false;
393-
}
394-
395-
return true;
396-
}
397-
398347
/**
399348
* @brief Get the Tensor object
400349
*

nntrainer/layers/layer_context.h

Lines changed: 0 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -474,14 +474,6 @@ class RunLayerContext {
474474
*/
475475
Tensor &getWeight(unsigned int idx) const;
476476

477-
/**
478-
* @brief Get the Weight master tensor object
479-
*
480-
* @param idx Identifier of the weight
481-
* @return Tensor& Reference to the weight tensor
482-
*/
483-
Tensor *getWeightMaster(unsigned int idx) const;
484-
485477
/**
486478
* @brief Get the Weight Gradient tensor object
487479
*
@@ -509,15 +501,6 @@ class RunLayerContext {
509501
*/
510502
Tensor &getWeightOptVar(unsigned int idx, unsigned int jdx) const;
511503

512-
/**
513-
* @brief Get the Weight Optimizer Master Variable tensor object
514-
*
515-
* @param idx Identifier of the weight
516-
* @param jdx Identifier of the weight optimizer master variable
517-
* @return Tensor& Reference to the weight optimizer tensor
518-
*/
519-
Tensor &getWeightOptMasterVar(unsigned int idx, unsigned int jdx) const;
520-
521504
/**
522505
* @brief Get the Weight name
523506
*
@@ -628,11 +611,6 @@ class RunLayerContext {
628611
*/
629612
Tensor &getOutgoingDerivative(unsigned int idx);
630613

631-
/**
632-
* @brief validate input/output derivatives of the layer
633-
*/
634-
bool validateDerivatives();
635-
636614
/**
637615
* @brief Get the Tensor object
638616
*
@@ -762,29 +740,13 @@ class RunLayerContext {
762740
*/
763741
unsigned int getNumWeightOptVar(unsigned int idx) const;
764742

765-
/**
766-
* @brief Get the Number of Weight Optimizer Variable tensor object
767-
*
768-
* @param idx Identifier of the weight
769-
* @return unsigned int Number of the weight optimizer variable
770-
*/
771-
unsigned int getNumWeightOptMasterVar(unsigned int idx) const;
772-
773743
/**
774744
* @brief Get the number of requested tensors objects
775745
*
776746
* @return unsigned int number of requested tensors
777747
*/
778748
unsigned int getNumTensors() const { return tensors.size(); }
779749

780-
/**
781-
* @brief Set the Weight Optimizer Variable tensor object
782-
*
783-
* @param idx Identifier of the weight
784-
* @param jdx Identifier of the weight optimizer variable
785-
*/
786-
void setWeightOptVars(unsigned int idx, std::vector<Tensor *> opts);
787-
788750
/**
789751
* @brief Set the batch for the run context
790752
*

nntrainer/layers/layer_node.cpp

Lines changed: 3 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,6 @@
1616
#include <cmath>
1717
#include <iterator>
1818
#include <stdexcept>
19-
#include <tuple>
2019
#include <utility>
2120

2221
#include <activation_layer.h>
@@ -468,12 +467,8 @@ void LayerNode::read(std::ifstream &file, bool opt_var) {
468467
for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {
469468
if (run_context->isGradientLastAccess(i) && getTrainable()) {
470469
/// @note read optimizer variables
471-
auto num_w_opt_m = run_context->getNumWeightOptMasterVar(i);
472470
for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i); ++j) {
473-
if (num_w_opt_m > 0)
474-
run_context->getWeightOptMasterVar(i, j).read(file);
475-
else
476-
run_context->getWeightOptVar(i, j).read(file);
471+
run_context->getWeightOptVar(i, j).read(file);
477472
}
478473
}
479474
}
@@ -499,13 +494,9 @@ void LayerNode::save(std::ofstream &file, bool opt_var) const {
499494
if (run_context->isGradientLastAccess(i) && getTrainable()) {
500495
// @note save optimizer variables
501496
if (run_context->weightHasGradient(i)) {
502-
auto num_w_opt_m = run_context->getNumWeightOptMasterVar(i);
503497
for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i);
504498
++j) {
505-
if (num_w_opt_m > 0)
506-
run_context->getWeightOptMasterVar(i, j).save(file);
507-
else
508-
run_context->getWeightOptVar(i, j).save(file);
499+
run_context->getWeightOptVar(i, j).save(file);
509500
}
510501
}
511502
}
@@ -514,13 +505,7 @@ void LayerNode::save(std::ofstream &file, bool opt_var) const {
514505
// @note shared weights are only be saved at the first access
515506
for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {
516507
if (run_context->isGradientLastAccess(i)) {
517-
if (run_context->getNumWeights()) {
518-
auto w = run_context->getWeightMaster(i);
519-
if (w)
520-
w->save(file);
521-
else
522-
run_context->getWeight(i).save(file);
523-
}
508+
run_context->getWeight(i).save(file);
524509
}
525510
}
526511
}

nntrainer/layers/loss/cross_entropy_sigmoid_loss_layer.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -61,9 +61,6 @@ void CrossEntropySigmoidLossLayer::calcDerivative(RunLayerContext &context) {
6161
Tensor &y = context.getInput(SINGLE_INOUT_IDX);
6262

6363
y.apply<float>(ActiFunc::sigmoid<float>, ret_derivative);
64-
65-
applyLossScale(ret_derivative);
66-
6764
ret_derivative.subtract_i(y2);
6865
if (ret_derivative.divide_i(ret_derivative.size()) != ML_ERROR_NONE) {
6966
throw std::runtime_error("[CrossEntropySigmoidLossLayer::calcDerivative] "

nntrainer/layers/loss/cross_entropy_softmax_loss_layer.cpp

Lines changed: 7 additions & 29 deletions
Original file line numberDiff line numberDiff line change
@@ -30,14 +30,9 @@ void CrossEntropySoftmaxLossLayer::forwarding(RunLayerContext &context,
3030
Tensor &y = context.getInput(SINGLE_INOUT_IDX);
3131

3232
// fill the output
33-
auto out_type = hidden_.getDataType();
34-
if (out_type == ml::train::TensorDim::DataType::FP32) {
35-
if (y.getDataType() != out_type) {
36-
Tensor y_ = y.clone(out_type);
37-
hidden_ = y_.apply(ActiFunc::softmax<float>, hidden_);
38-
} else {
39-
hidden_ = y.apply(ActiFunc::softmax<float>, hidden_);
40-
}
33+
auto dataType = y.getDataType();
34+
if (dataType == ml::train::TensorDim::DataType::FP32) {
35+
hidden_ = y.apply(ActiFunc::softmax<float>, hidden_);
4136

4237
if (context.isLabelAvailable(SINGLE_INOUT_IDX)) {
4338
Tensor &y2 = context.getLabel(SINGLE_INOUT_IDX);
@@ -48,14 +43,9 @@ void CrossEntropySoftmaxLossLayer::forwarding(RunLayerContext &context,
4843
// update the loss value
4944
LossLayer::updateLoss(context, l);
5045
}
51-
} else if (out_type == ml::train::TensorDim::DataType::FP16) {
46+
} else if (dataType == ml::train::TensorDim::DataType::FP16) {
5247
#ifdef ENABLE_FP16
53-
if (y.getDataType() != out_type) {
54-
Tensor y_ = y.clone(out_type);
55-
hidden_ = y_.apply(ActiFunc::softmax<_FP16>, hidden_);
56-
} else {
57-
hidden_ = y.apply(ActiFunc::softmax<_FP16>, hidden_);
58-
}
48+
hidden_ = y.apply(ActiFunc::softmax<_FP16>, hidden_);
5949

6050
if (context.isLabelAvailable(SINGLE_INOUT_IDX)) {
6151
Tensor &y2 = context.getLabel(SINGLE_INOUT_IDX);
@@ -78,8 +68,7 @@ void CrossEntropySoftmaxLossLayer::calcDerivative(RunLayerContext &context) {
7868
Tensor &y = context.getInput(SINGLE_INOUT_IDX);
7969

8070
auto dataType = y.getDataType();
81-
82-
Tensor ret(y.getDim());
71+
Tensor ret = Tensor("ret", y.getFormat(), y.getDataType());
8372
if (dataType == ml::train::TensorDim::DataType::FP32) {
8473
y.apply(ActiFunc::softmax<float>, ret);
8574
} else if (dataType == ml::train::TensorDim::DataType::FP16) {
@@ -94,18 +83,7 @@ void CrossEntropySoftmaxLossLayer::calcDerivative(RunLayerContext &context) {
9483
/// operation
9584
// TODO: verify y and ret_derivative must not be same as loss layer is not
9685
// working in-place
97-
if (ret.getDataType() != y2.getDataType()) {
98-
ret.subtract(y2.clone(ret.getDataType()), ret_derivative);
99-
} else {
100-
ret.subtract(y2, ret_derivative);
101-
}
102-
103-
/**
104-
* loss scale is applied for mixed precision
105-
* every loss layers need to specify this applying code.
106-
*/
107-
applyLossScale(ret_derivative);
108-
86+
ret.subtract(y2, ret_derivative);
10987
if (ret_derivative.divide_i(ret.batch()) != ML_ERROR_NONE) {
11088
throw std::runtime_error("[CrossEntropySoftmaxLossLayer::calcDerivative] "
11189
"Error when calculating loss");

nntrainer/layers/loss/loss_layer.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -15,9 +15,6 @@
1515
#include <loss_layer.h>
1616

1717
namespace nntrainer {
18-
19-
LossLayer::LossLayer() : Layer(), loss_scale(0.0f) {}
20-
2118
void LossLayer::finalize(InitLayerContext &context) {
2219
std::vector<TensorDim> input_dim = context.getInputDimensions();
2320
std::vector<TensorDim> output_dim = input_dim;

nntrainer/layers/loss/loss_layer.h

Lines changed: 0 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -27,11 +27,6 @@ namespace nntrainer {
2727
*/
2828
class LossLayer : public Layer {
2929
public:
30-
/**
31-
* @brief Constructor of Loss Layer
32-
*/
33-
LossLayer();
34-
3530
/**
3631
* @brief Destructor of Loss Layer
3732
*/
@@ -54,19 +49,11 @@ class LossLayer : public Layer {
5449

5550
bool supportInPlace() const override {return is_inplace;}
5651

57-
/**
58-
* @brief Set loss scale factor
59-
*/
60-
virtual void setLossScale(float scale) override { loss_scale = scale; }
61-
62-
private:
6352
/**
6453
* @copydoc Layer::requireLabel()
6554
*/
6655
bool requireLabel() const override { return true; }
6756

68-
float loss_scale; /**< loss scale factor */
69-
7057
protected:
7158
/**
7259
* @brief update loss

nntrainer/layers/loss/meson.build

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,9 +7,7 @@ loss_layer_sources = [
77
'constant_derivative_loss_layer.cpp'
88
]
99

10-
loss_layer_headers = [
11-
'loss_layer.h'
12-
]
10+
loss_layer_headers = []
1311

1412
loss_layer_deps = []
1513

0 commit comments

Comments
 (0)