[Mixed] Mixed Precision Layer update

DonghakPark · DonghakPark · commit 1d4b135a14e2 · 2024-05-27T16:50:29.000+09:00
This PR is to update the mixed precision layer. - integrate nnstreamer#2568 & nnstreamer#2455 - will update more test **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Donghak PARK <donghak.park@samsung.com>
diff --git a/nntrainer/layers/layer_context.cpp b/nntrainer/layers/layer_context.cpp
@@ -157,16 +157,6 @@ Tensor &RunLayerContext::getWeight(unsigned int idx) const {
   return weights[idx]->getVariableRef();
 }
 
-/**
- * @brief Get the Weight tensor object
- *
- * @param idx Identifier of the weight
- * @return Tensor& Reference to the weight tensor
- */
-Tensor *RunLayerContext::getWeightMaster(unsigned int idx) const {
-  return weights[idx]->getVariableMasterRef();
-}
-
 /**
  * @brief Get the Weight Gradient tensor object
  *
@@ -205,18 +195,6 @@ Tensor &RunLayerContext::getWeightOptVar(unsigned int idx,
   return weights[idx]->getOptimizerVariableRef(jdx);
 }
 
-/**
- * @brief Get the Weight Optimizer Variable tensor object
- *
- * @param idx Identifier of the weight
- * @param jdx Identifier of the optimizer variables
- * @return Tensor& Reference to the weight optimizer variable tensor
- */
-Tensor &RunLayerContext::getWeightOptMasterVar(unsigned int idx,
-                                               unsigned int jdx) const {
-  return weights[idx]->getOptimizerMasterVariableRef(jdx);
-}
-
 /**
  * @brief Get the Number of Weight Optimizer Variable tensor object
  *
@@ -227,16 +205,6 @@ unsigned int RunLayerContext::getNumWeightOptVar(unsigned int idx) const {
   return weights[idx]->getNumOptVariable();
 }
 
-/**
- * @brief Get the Number of Weight Optimizer Variable tensor object
- *
- * @param idx Identifier of the weight
- * @return int Number of the weight optimizer variable
- */
-unsigned int RunLayerContext::getNumWeightOptMasterVar(unsigned int idx) const {
-  return weights[idx]->getNumOptMasterVariable();
-}
-
 /**
  * @brief Get regularization loss for the weight
  *
@@ -376,25 +344,6 @@ Tensor &RunLayerContext::getOutgoingDerivative(unsigned int idx) {
   return getInputGrad(idx);
 }
 
-bool RunLayerContext::validateDerivatives() {
-  auto num_in = getNumInputs();
-  auto num_out = getNumOutputs();
-
-  for (unsigned int i = 0; i < num_in; ++i) {
-    auto deriv = getIncomingDerivative(i);
-    if (deriv.checkDataValidation(false) == false)
-      return false;
-  }
-
-  for (unsigned int i = 0; i < num_out; ++i) {
-    auto deriv = getOutgoingDerivative(i);
-    if (deriv.checkDataValidation(false) == false)
-      return false;
-  }
-
-  return true;
-}
-
 /**
  * @brief Get the Tensor object
  *
diff --git a/nntrainer/layers/layer_context.h b/nntrainer/layers/layer_context.h
@@ -474,14 +474,6 @@ class RunLayerContext {
    */
   Tensor &getWeight(unsigned int idx) const;
 
-  /**
-   * @brief Get the Weight master tensor object
-   *
-   * @param idx Identifier of the weight
-   * @return Tensor& Reference to the weight tensor
-   */
-  Tensor *getWeightMaster(unsigned int idx) const;
-
   /**
    * @brief Get the Weight Gradient tensor object
    *
@@ -509,15 +501,6 @@ class RunLayerContext {
    */
   Tensor &getWeightOptVar(unsigned int idx, unsigned int jdx) const;
 
-  /**
-   * @brief Get the Weight Optimizer Master Variable tensor object
-   *
-   * @param idx Identifier of the weight
-   * @param jdx Identifier of the weight optimizer master variable
-   * @return Tensor& Reference to the weight optimizer tensor
-   */
-  Tensor &getWeightOptMasterVar(unsigned int idx, unsigned int jdx) const;
-
   /**
    * @brief Get the Weight name
    *
@@ -628,11 +611,6 @@ class RunLayerContext {
    */
   Tensor &getOutgoingDerivative(unsigned int idx);
 
-  /**
-   * @brief validate input/output derivatives of the layer
-   */
-  bool validateDerivatives();
-
   /**
    * @brief Get the Tensor object
    *
@@ -762,29 +740,13 @@ class RunLayerContext {
    */
   unsigned int getNumWeightOptVar(unsigned int idx) const;
 
-  /**
-   * @brief Get the Number of Weight Optimizer Variable tensor object
-   *
-   * @param idx Identifier of the weight
-   * @return unsigned int Number of the weight optimizer variable
-   */
-  unsigned int getNumWeightOptMasterVar(unsigned int idx) const;
-
   /**
    * @brief Get the number of requested tensors objects
    *
    * @return unsigned int number of requested tensors
    */
   unsigned int getNumTensors() const { return tensors.size(); }
 
-  /**
-   * @brief Set the Weight Optimizer Variable tensor object
-   *
-   * @param idx Identifier of the weight
-   * @param jdx Identifier of the weight optimizer variable
-   */
-  void setWeightOptVars(unsigned int idx, std::vector<Tensor *> opts);
-
   /**
    * @brief Set the batch for the run context
    *
diff --git a/nntrainer/layers/layer_node.cpp b/nntrainer/layers/layer_node.cpp
@@ -16,7 +16,6 @@
 #include <cmath>
 #include <iterator>
 #include <stdexcept>
-#include <tuple>
 #include <utility>
 
 #include <activation_layer.h>
@@ -468,12 +467,8 @@ void LayerNode::read(std::ifstream &file, bool opt_var) {
     for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {
       if (run_context->isGradientLastAccess(i) && getTrainable()) {
         /// @note read optimizer variables
-        auto num_w_opt_m = run_context->getNumWeightOptMasterVar(i);
         for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i); ++j) {
-          if (num_w_opt_m > 0)
-            run_context->getWeightOptMasterVar(i, j).read(file);
-          else
-            run_context->getWeightOptVar(i, j).read(file);
+          run_context->getWeightOptVar(i, j).read(file);
         }
       }
     }
@@ -499,13 +494,9 @@ void LayerNode::save(std::ofstream &file, bool opt_var) const {
       if (run_context->isGradientLastAccess(i) && getTrainable()) {
         // @note save optimizer variables
         if (run_context->weightHasGradient(i)) {
-          auto num_w_opt_m = run_context->getNumWeightOptMasterVar(i);
           for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i);
                ++j) {
-            if (num_w_opt_m > 0)
-              run_context->getWeightOptMasterVar(i, j).save(file);
-            else
-              run_context->getWeightOptVar(i, j).save(file);
+            run_context->getWeightOptVar(i, j).save(file);
           }
         }
       }
@@ -514,13 +505,7 @@ void LayerNode::save(std::ofstream &file, bool opt_var) const {
     // @note shared weights are only be saved at the first access
     for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {
       if (run_context->isGradientLastAccess(i)) {
-        if (run_context->getNumWeights()) {
-          auto w = run_context->getWeightMaster(i);
-          if (w)
-            w->save(file);
-          else
-            run_context->getWeight(i).save(file);
-        }
+        run_context->getWeight(i).save(file);
       }
     }
   }
diff --git a/nntrainer/layers/loss/cross_entropy_sigmoid_loss_layer.cpp b/nntrainer/layers/loss/cross_entropy_sigmoid_loss_layer.cpp
@@ -61,9 +61,6 @@ void CrossEntropySigmoidLossLayer::calcDerivative(RunLayerContext &context) {
   Tensor &y = context.getInput(SINGLE_INOUT_IDX);
 
   y.apply<float>(ActiFunc::sigmoid<float>, ret_derivative);
-
-  applyLossScale(ret_derivative);
-
   ret_derivative.subtract_i(y2);
   if (ret_derivative.divide_i(ret_derivative.size()) != ML_ERROR_NONE) {
     throw std::runtime_error("[CrossEntropySigmoidLossLayer::calcDerivative] "
diff --git a/nntrainer/layers/loss/cross_entropy_softmax_loss_layer.cpp b/nntrainer/layers/loss/cross_entropy_softmax_loss_layer.cpp
@@ -30,14 +30,9 @@ void CrossEntropySoftmaxLossLayer::forwarding(RunLayerContext &context,
   Tensor &y = context.getInput(SINGLE_INOUT_IDX);
 
   // fill the output
-  auto out_type = hidden_.getDataType();
-  if (out_type == ml::train::TensorDim::DataType::FP32) {
-    if (y.getDataType() != out_type) {
-      Tensor y_ = y.clone(out_type);
-      hidden_ = y_.apply(ActiFunc::softmax<float>, hidden_);
-    } else {
-      hidden_ = y.apply(ActiFunc::softmax<float>, hidden_);
-    }
+  auto dataType = y.getDataType();
+  if (dataType == ml::train::TensorDim::DataType::FP32) {
+    hidden_ = y.apply(ActiFunc::softmax<float>, hidden_);
 
     if (context.isLabelAvailable(SINGLE_INOUT_IDX)) {
       Tensor &y2 = context.getLabel(SINGLE_INOUT_IDX);
@@ -48,14 +43,9 @@ void CrossEntropySoftmaxLossLayer::forwarding(RunLayerContext &context,
       // update the loss value
       LossLayer::updateLoss(context, l);
     }
-  } else if (out_type == ml::train::TensorDim::DataType::FP16) {
+  } else if (dataType == ml::train::TensorDim::DataType::FP16) {
 #ifdef ENABLE_FP16
-    if (y.getDataType() != out_type) {
-      Tensor y_ = y.clone(out_type);
-      hidden_ = y_.apply(ActiFunc::softmax<_FP16>, hidden_);
-    } else {
-      hidden_ = y.apply(ActiFunc::softmax<_FP16>, hidden_);
-    }
+    hidden_ = y.apply(ActiFunc::softmax<_FP16>, hidden_);
 
     if (context.isLabelAvailable(SINGLE_INOUT_IDX)) {
       Tensor &y2 = context.getLabel(SINGLE_INOUT_IDX);
@@ -78,8 +68,7 @@ void CrossEntropySoftmaxLossLayer::calcDerivative(RunLayerContext &context) {
   Tensor &y = context.getInput(SINGLE_INOUT_IDX);
 
   auto dataType = y.getDataType();
-
-  Tensor ret(y.getDim());
+  Tensor ret = Tensor("ret", y.getFormat(), y.getDataType());
   if (dataType == ml::train::TensorDim::DataType::FP32) {
     y.apply(ActiFunc::softmax<float>, ret);
   } else if (dataType == ml::train::TensorDim::DataType::FP16) {
@@ -94,18 +83,7 @@ void CrossEntropySoftmaxLossLayer::calcDerivative(RunLayerContext &context) {
   /// operation
   // TODO: verify y and ret_derivative must not be same as loss layer is not
   // working in-place
-  if (ret.getDataType() != y2.getDataType()) {
-    ret.subtract(y2.clone(ret.getDataType()), ret_derivative);
-  } else {
-    ret.subtract(y2, ret_derivative);
-  }
-
-  /**
-   * loss scale is applied for mixed precision
-   * every loss layers need to specify this applying code.
-   */
-  applyLossScale(ret_derivative);
-
+  ret.subtract(y2, ret_derivative);
   if (ret_derivative.divide_i(ret.batch()) != ML_ERROR_NONE) {
     throw std::runtime_error("[CrossEntropySoftmaxLossLayer::calcDerivative] "
                              "Error when calculating loss");
diff --git a/nntrainer/layers/loss/loss_layer.cpp b/nntrainer/layers/loss/loss_layer.cpp
@@ -15,9 +15,6 @@
 #include <loss_layer.h>
 
 namespace nntrainer {
-
-LossLayer::LossLayer() : Layer(), loss_scale(0.0f) {}
-
 void LossLayer::finalize(InitLayerContext &context) {
   std::vector<TensorDim> input_dim = context.getInputDimensions();
   std::vector<TensorDim> output_dim = input_dim;
diff --git a/nntrainer/layers/loss/loss_layer.h b/nntrainer/layers/loss/loss_layer.h
@@ -27,11 +27,6 @@ namespace nntrainer {
  */
 class LossLayer : public Layer {
 public:
-  /**
-   * @brief     Constructor of Loss Layer
-   */
-  LossLayer();
-
   /**
    * @brief     Destructor of Loss Layer
    */
@@ -54,19 +49,11 @@ class LossLayer : public Layer {
 
   bool supportInPlace() const override {return is_inplace;}
 
-  /**
-   * @brief Set loss scale factor
-   */
-  virtual void setLossScale(float scale) override { loss_scale = scale; }
-
-private:
   /**
    * @copydoc Layer::requireLabel()
    */
   bool requireLabel() const override { return true; }
 
-  float loss_scale; /**< loss scale factor */
-
 protected:
   /**
    * @brief     update loss
diff --git a/nntrainer/layers/loss/meson.build b/nntrainer/layers/loss/meson.build
@@ -7,9 +7,7 @@ loss_layer_sources = [
   'constant_derivative_loss_layer.cpp'
 ]
 
-loss_layer_headers = [
-  'loss_layer.h'
-]
+loss_layer_headers = []
 
 loss_layer_deps = []
 
diff --git a/nntrainer/layers/loss/mse_loss_layer.cpp b/nntrainer/layers/loss/mse_loss_layer.cpp
diff --git a/nntrainer/optimizers/adam.cpp b/nntrainer/optimizers/adam.cpp
diff --git a/nntrainer/optimizers/optimizer_context.cpp b/nntrainer/optimizers/optimizer_context.cpp
diff --git a/nntrainer/optimizers/optimizer_context.h b/nntrainer/optimizers/optimizer_context.h

Original file line number	Diff line number	Diff line change
`@@ -16,7 +16,6 @@`
`16`	`16`	`#include <cmath>`
`17`	`17`	`#include <iterator>`
`18`	`18`	`#include <stdexcept>`
`19`		`-#include <tuple>`
`20`	`19`	`#include <utility>`
`21`	`20`
`22`	`21`	`#include <activation_layer.h>`
`@@ -468,12 +467,8 @@ void LayerNode::read(std::ifstream &file, bool opt_var) {`
`468`	`467`	`for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {`
`469`	`468`	`if (run_context->isGradientLastAccess(i) && getTrainable()) {`
`470`	`469`	`/// @note read optimizer variables`
`471`		`- auto num_w_opt_m = run_context->getNumWeightOptMasterVar(i);`
`472`	`470`	`for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i); ++j) {`
`473`		`- if (num_w_opt_m > 0)`
`474`		`- run_context->getWeightOptMasterVar(i, j).read(file);`
`475`		`- else`
`476`		`- run_context->getWeightOptVar(i, j).read(file);`
	`471`	`+ run_context->getWeightOptVar(i, j).read(file);`
`477`	`472`	`}`
`478`	`473`	`}`
`479`	`474`	`}`
`@@ -499,13 +494,9 @@ void LayerNode::save(std::ofstream &file, bool opt_var) const {`
`499`	`494`	`if (run_context->isGradientLastAccess(i) && getTrainable()) {`
`500`	`495`	`// @note save optimizer variables`
`501`	`496`	`if (run_context->weightHasGradient(i)) {`
`502`		`- auto num_w_opt_m = run_context->getNumWeightOptMasterVar(i);`
`503`	`497`	`for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i);`
`504`	`498`	`++j) {`
`505`		`- if (num_w_opt_m > 0)`
`506`		`- run_context->getWeightOptMasterVar(i, j).save(file);`
`507`		`- else`
`508`		`- run_context->getWeightOptVar(i, j).save(file);`
	`499`	`+ run_context->getWeightOptVar(i, j).save(file);`
`509`	`500`	`}`
`510`	`501`	`}`
`511`	`502`	`}`
`@@ -514,13 +505,7 @@ void LayerNode::save(std::ofstream &file, bool opt_var) const {`
`514`	`505`	`// @note shared weights are only be saved at the first access`
`515`	`506`	`for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {`
`516`	`507`	`if (run_context->isGradientLastAccess(i)) {`
`517`		`- if (run_context->getNumWeights()) {`
`518`		`- auto w = run_context->getWeightMaster(i);`
`519`		`- if (w)`
`520`		`- w->save(file);`
`521`		`- else`
`522`		`- run_context->getWeight(i).save(file);`
`523`		`- }`
	`508`	`+ run_context->getWeight(i).save(file);`
`524`	`509`	`}`
`525`	`510`	`}`
`526`	`511`	`}`
Original file line number	Diff line number	Diff line change
`@@ -7,9 +7,7 @@ loss_layer_sources = [`
`7`	`7`	`'constant_derivative_loss_layer.cpp'`
`8`	`8`	`]`
`9`	`9`
`10`		`-loss_layer_headers = [`
`11`		`- 'loss_layer.h'`
`12`		`-]`
	`10`	`+loss_layer_headers = []`
`13`	`11`
`14`	`12`	`loss_layer_deps = []`
`15`	`13`