[Mixed] Mixed Precision Layer update

DonghakPark · DonghakPark · commit 9468bbc47476 · 2024-05-10T09:27:10.000+09:00
This PR is to update the mixed precision layer. - integrate nnstreamer#2568 & nnstreamer#2455 - will update more test **Self evaluation:** 1. Build test: [X]Passed [ ]Failed [ ]Skipped 2. Run test: [X]Passed [ ]Failed [ ]Skipped Signed-off-by: Donghak PARK <donghak.park@samsung.com>
diff --git a/nntrainer/layers/layer_context.cpp b/nntrainer/layers/layer_context.cpp
@@ -156,16 +156,6 @@ Tensor &RunLayerContext::getWeight(unsigned int idx) const {
   return weights[idx]->getVariableRef();
 }
 
-/**
- * @brief Get the Weight tensor object
- *
- * @param idx Identifier of the weight
- * @return Tensor& Reference to the weight tensor
- */
-Tensor *RunLayerContext::getWeightMaster(unsigned int idx) const {
-  return weights[idx]->getVariableMasterRef();
-}
-
 /**
  * @brief Get the Weight Gradient tensor object
  *
@@ -204,18 +194,6 @@ Tensor &RunLayerContext::getWeightOptVar(unsigned int idx,
   return weights[idx]->getOptimizerVariableRef(jdx);
 }
 
-/**
- * @brief Get the Weight Optimizer Variable tensor object
- *
- * @param idx Identifier of the weight
- * @param jdx Identifier of the optimizer variables
- * @return Tensor& Reference to the weight optimizer variable tensor
- */
-Tensor &RunLayerContext::getWeightOptMasterVar(unsigned int idx,
-                                               unsigned int jdx) const {
-  return weights[idx]->getOptimizerMasterVariableRef(jdx);
-}
-
 /**
  * @brief Get the Number of Weight Optimizer Variable tensor object
  *
@@ -226,16 +204,6 @@ unsigned int RunLayerContext::getNumWeightOptVar(unsigned int idx) const {
   return weights[idx]->getNumOptVariable();
 }
 
-/**
- * @brief Get the Number of Weight Optimizer Variable tensor object
- *
- * @param idx Identifier of the weight
- * @return int Number of the weight optimizer variable
- */
-unsigned int RunLayerContext::getNumWeightOptMasterVar(unsigned int idx) const {
-  return weights[idx]->getNumOptMasterVariable();
-}
-
 /**
  * @brief Get regularization loss for the weight
  *
@@ -375,25 +343,6 @@ Tensor &RunLayerContext::getOutgoingDerivative(unsigned int idx) {
   return getInputGrad(idx);
 }
 
-bool RunLayerContext::validateDerivatives() {
-  auto num_in = getNumInputs();
-  auto num_out = getNumOutputs();
-
-  for (unsigned int i = 0; i < num_in; ++i) {
-    auto deriv = getIncomingDerivative(i);
-    if (deriv.checkDataValidation(false) == false)
-      return false;
-  }
-
-  for (unsigned int i = 0; i < num_out; ++i) {
-    auto deriv = getOutgoingDerivative(i);
-    if (deriv.checkDataValidation(false) == false)
-      return false;
-  }
-
-  return true;
-}
-
 /**
  * @brief Get the Tensor object
  *
diff --git a/nntrainer/layers/layer_context.h b/nntrainer/layers/layer_context.h
@@ -453,14 +453,6 @@ class RunLayerContext {
    */
   Tensor &getWeight(unsigned int idx) const;
 
-  /**
-   * @brief Get the Weight master tensor object
-   *
-   * @param idx Identifier of the weight
-   * @return Tensor& Reference to the weight tensor
-   */
-  Tensor *getWeightMaster(unsigned int idx) const;
-
   /**
    * @brief Get the Weight Gradient tensor object
    *
@@ -488,15 +480,6 @@ class RunLayerContext {
    */
   Tensor &getWeightOptVar(unsigned int idx, unsigned int jdx) const;
 
-  /**
-   * @brief Get the Weight Optimizer Master Variable tensor object
-   *
-   * @param idx Identifier of the weight
-   * @param jdx Identifier of the weight optimizer master variable
-   * @return Tensor& Reference to the weight optimizer tensor
-   */
-  Tensor &getWeightOptMasterVar(unsigned int idx, unsigned int jdx) const;
-
   /**
    * @brief Get the Weight name
    *
@@ -607,11 +590,6 @@ class RunLayerContext {
    */
   Tensor &getOutgoingDerivative(unsigned int idx);
 
-  /**
-   * @brief validate input/output derivatives of the layer
-   */
-  bool validateDerivatives();
-
   /**
    * @brief Get the Tensor object
    *
@@ -727,29 +705,13 @@ class RunLayerContext {
    */
   unsigned int getNumWeightOptVar(unsigned int idx) const;
 
-  /**
-   * @brief Get the Number of Weight Optimizer Variable tensor object
-   *
-   * @param idx Identifier of the weight
-   * @return unsigned int Number of the weight optimizer variable
-   */
-  unsigned int getNumWeightOptMasterVar(unsigned int idx) const;
-
   /**
    * @brief Get the number of requested tensors objects
    *
    * @return unsigned int number of requested tensors
    */
   unsigned int getNumTensors() const { return tensors.size(); }
 
-  /**
-   * @brief Set the Weight Optimizer Variable tensor object
-   *
-   * @param idx Identifier of the weight
-   * @param jdx Identifier of the weight optimizer variable
-   */
-  void setWeightOptVars(unsigned int idx, std::vector<Tensor *> opts);
-
   /**
    * @brief Set the batch for the run context
    *
diff --git a/nntrainer/layers/layer_node.cpp b/nntrainer/layers/layer_node.cpp
@@ -16,7 +16,6 @@
 #include <cmath>
 #include <iterator>
 #include <stdexcept>
-#include <tuple>
 #include <utility>
 
 #include <activation_layer.h>
@@ -466,24 +465,16 @@ void LayerNode::read(std::ifstream &file, bool opt_var) {
     for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {
       if (run_context->isGradientLastAccess(i) && getTrainable()) {
         /// @note read optimizer variables
-        auto num_w_opt_m = run_context->getNumWeightOptMasterVar(i);
         for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i); ++j) {
-          if (num_w_opt_m > 0)
-            run_context->getWeightOptMasterVar(i, j).read(file);
-          else
-            run_context->getWeightOptVar(i, j).read(file);
+          run_context->getWeightOptVar(i, j).read(file);
         }
       }
     }
   } else {
     for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {
       /// @note shared weights are only be read at the first acecss
       if (run_context->isGradientLastAccess(i)) {
-        auto w = run_context->getWeightMaster(i);
-        if (w)
-          w->read(file);
-        else
-          run_context->getWeight(i).read(file);
+        run_context->getWeight(i).read(file);
       }
     }
   }
@@ -498,13 +489,9 @@ void LayerNode::save(std::ofstream &file, bool opt_var) const {
       if (run_context->isGradientLastAccess(i) && getTrainable()) {
         // @note save optimizer variables
         if (run_context->weightHasGradient(i)) {
-          auto num_w_opt_m = run_context->getNumWeightOptMasterVar(i);
           for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i);
                ++j) {
-            if (num_w_opt_m > 0)
-              run_context->getWeightOptMasterVar(i, j).save(file);
-            else
-              run_context->getWeightOptVar(i, j).save(file);
+            run_context->getWeightOptVar(i, j).save(file);
           }
         }
       }
@@ -513,13 +500,7 @@ void LayerNode::save(std::ofstream &file, bool opt_var) const {
     // @note shared weights are only be saved at the first access
     for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {
       if (run_context->isGradientLastAccess(i)) {
-        if (run_context->getNumWeights()) {
-          auto w = run_context->getWeightMaster(i);
-          if (w)
-            w->save(file);
-          else
-            run_context->getWeight(i).save(file);
-        }
+        run_context->getWeight(i).save(file);
       }
     }
   }
diff --git a/nntrainer/layers/layer_node.h b/nntrainer/layers/layer_node.h
@@ -899,11 +899,6 @@ class LayerNode final : public ml::train::Layer, public GraphNode {
    */
   bool needsCalcGradient() { return needs_calc_gradient; }
 
-  /**
-   * @brief Set loss scale factor
-   */
-  void setLossScale(float scale) { layer->setLossScale(scale); }
-
 private:
   /**
    * @brief     Get the Input Layers object
diff --git a/nntrainer/layers/loss/cross_entropy_sigmoid_loss_layer.cpp b/nntrainer/layers/loss/cross_entropy_sigmoid_loss_layer.cpp
@@ -61,9 +61,6 @@ void CrossEntropySigmoidLossLayer::calcDerivative(RunLayerContext &context) {
   Tensor &y = context.getInput(SINGLE_INOUT_IDX);
 
   y.apply<float>(ActiFunc::sigmoid<float>, ret_derivative);
-
-  applyLossScale(ret_derivative);
-
   ret_derivative.subtract_i(y2);
   if (ret_derivative.divide_i(ret_derivative.size()) != ML_ERROR_NONE) {
     throw std::runtime_error("[CrossEntropySigmoidLossLayer::calcDerivative] "
diff --git a/nntrainer/layers/loss/cross_entropy_softmax_loss_layer.cpp b/nntrainer/layers/loss/cross_entropy_softmax_loss_layer.cpp
@@ -30,14 +30,9 @@ void CrossEntropySoftmaxLossLayer::forwarding(RunLayerContext &context,
   Tensor &y = context.getInput(SINGLE_INOUT_IDX);
 
   // fill the output
-  auto out_type = hidden_.getDataType();
-  if (out_type == ml::train::TensorDim::DataType::FP32) {
-    if (y.getDataType() != out_type) {
-      Tensor y_ = y.clone(out_type);
-      hidden_ = y_.apply(ActiFunc::softmax<float>, hidden_);
-    } else {
-      hidden_ = y.apply(ActiFunc::softmax<float>, hidden_);
-    }
+  auto dataType = y.getDataType();
+  if (dataType == ml::train::TensorDim::DataType::FP32) {
+    hidden_ = y.apply(ActiFunc::softmax<float>, hidden_);
 
     if (context.isLabelAvailable(SINGLE_INOUT_IDX)) {
       Tensor &y2 = context.getLabel(SINGLE_INOUT_IDX);
@@ -48,14 +43,9 @@ void CrossEntropySoftmaxLossLayer::forwarding(RunLayerContext &context,
       // update the loss value
       LossLayer::updateLoss(context, l);
     }
-  } else if (out_type == ml::train::TensorDim::DataType::FP16) {
+  } else if (dataType == ml::train::TensorDim::DataType::FP16) {
 #ifdef ENABLE_FP16
-    if (y.getDataType() != out_type) {
-      Tensor y_ = y.clone(out_type);
-      hidden_ = y_.apply(ActiFunc::softmax<_FP16>, hidden_);
-    } else {
-      hidden_ = y.apply(ActiFunc::softmax<_FP16>, hidden_);
-    }
+    hidden_ = y.apply(ActiFunc::softmax<_FP16>, hidden_);
 
     if (context.isLabelAvailable(SINGLE_INOUT_IDX)) {
       Tensor &y2 = context.getLabel(SINGLE_INOUT_IDX);
@@ -78,8 +68,7 @@ void CrossEntropySoftmaxLossLayer::calcDerivative(RunLayerContext &context) {
   Tensor &y = context.getInput(SINGLE_INOUT_IDX);
 
   auto dataType = y.getDataType();
-
-  Tensor ret(y.getDim());
+  Tensor ret = Tensor("ret", y.getFormat(), y.getDataType());
   if (dataType == ml::train::TensorDim::DataType::FP32) {
     y.apply(ActiFunc::softmax<float>, ret);
   } else if (dataType == ml::train::TensorDim::DataType::FP16) {
@@ -94,18 +83,7 @@ void CrossEntropySoftmaxLossLayer::calcDerivative(RunLayerContext &context) {
   /// operation
   // TODO: verify y and ret_derivative must not be same as loss layer is not
   // working in-place
-  if (ret.getDataType() != y2.getDataType()) {
-    ret.subtract(y2.clone(ret.getDataType()), ret_derivative);
-  } else {
-    ret.subtract(y2, ret_derivative);
-  }
-
-  /**
-   * loss scale is applied for mixed precision
-   * every loss layers need to specify this applying code.
-   */
-  applyLossScale(ret_derivative);
-
+  ret.subtract(y2, ret_derivative);
   if (ret_derivative.divide_i(ret.batch()) != ML_ERROR_NONE) {
     throw std::runtime_error("[CrossEntropySoftmaxLossLayer::calcDerivative] "
                              "Error when calculating loss");
diff --git a/nntrainer/layers/loss/loss_layer.cpp b/nntrainer/layers/loss/loss_layer.cpp
@@ -15,17 +15,14 @@
 #include <loss_layer.h>
 
 namespace nntrainer {
-
-LossLayer::LossLayer() : Layer(), loss_scale(0.0f) {}
-
 void LossLayer::finalize(InitLayerContext &context) {
   std::vector<TensorDim> input_dim = context.getInputDimensions();
   std::vector<TensorDim> output_dim = input_dim;
   for (auto &d : output_dim)
     d.setDataType(
       str_converter<enum_class_prop_tag,
                     nntrainer::TensorDataTypeInfo>::from_string("FP32"));
-
+  
   context.setOutputDimensions(output_dim);
 }
 
diff --git a/nntrainer/layers/loss/loss_layer.h b/nntrainer/layers/loss/loss_layer.h
@@ -27,11 +27,6 @@ namespace nntrainer {
  */
 class LossLayer : public Layer {
 public:
-  /**
-   * @brief     Constructor of Loss Layer
-   */
-  LossLayer();
-
   /**
    * @brief     Destructor of Loss Layer
    */
@@ -52,19 +47,11 @@ class LossLayer : public Layer {
    */
   virtual bool supportBackwarding() const override { return true; }
 
-  /**
-   * @brief Set loss scale factor
-   */
-  virtual void setLossScale(float scale) override { loss_scale = scale; }
-
-private:
   /**
    * @copydoc Layer::requireLabel()
    */
   bool requireLabel() const override { return true; }
 
-  float loss_scale; /**< loss scale factor */
-
 protected:
   /**
    * @brief     update loss
@@ -73,14 +60,6 @@ class LossLayer : public Layer {
    */
   void updateLoss(RunLayerContext &context, const Tensor &l);
 
-  /**
-   * @brief     apply loss scale
-   */
-  void applyLossScale(Tensor &derivative) {
-    if (loss_scale != 0.0f)
-      derivative.multiply_i(loss_scale);
-  }
-
   Tensor
     l; /**< loss tensor to store intermediate value to calculate loss value */
 };
diff --git a/nntrainer/layers/loss/mse_loss_layer.cpp b/nntrainer/layers/loss/mse_loss_layer.cpp
diff --git a/nntrainer/optimizers/adam.cpp b/nntrainer/optimizers/adam.cpp
diff --git a/nntrainer/optimizers/optimizer_context.cpp b/nntrainer/optimizers/optimizer_context.cpp
diff --git a/nntrainer/optimizers/optimizer_context.h b/nntrainer/optimizers/optimizer_context.h

Original file line number	Diff line number	Diff line change
`@@ -16,7 +16,6 @@`
`16`	`16`	`#include <cmath>`
`17`	`17`	`#include <iterator>`
`18`	`18`	`#include <stdexcept>`
`19`		`-#include <tuple>`
`20`	`19`	`#include <utility>`
`21`	`20`
`22`	`21`	`#include <activation_layer.h>`
`@@ -466,24 +465,16 @@ void LayerNode::read(std::ifstream &file, bool opt_var) {`
`466`	`465`	`for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {`
`467`	`466`	`if (run_context->isGradientLastAccess(i) && getTrainable()) {`
`468`	`467`	`/// @note read optimizer variables`
`469`		`- auto num_w_opt_m = run_context->getNumWeightOptMasterVar(i);`
`470`	`468`	`for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i); ++j) {`
`471`		`- if (num_w_opt_m > 0)`
`472`		`- run_context->getWeightOptMasterVar(i, j).read(file);`
`473`		`- else`
`474`		`- run_context->getWeightOptVar(i, j).read(file);`
	`469`	`+ run_context->getWeightOptVar(i, j).read(file);`
`475`	`470`	`}`
`476`	`471`	`}`
`477`	`472`	`}`
`478`	`473`	`} else {`
`479`	`474`	`for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {`
`480`	`475`	`/// @note shared weights are only be read at the first acecss`
`481`	`476`	`if (run_context->isGradientLastAccess(i)) {`
`482`		`- auto w = run_context->getWeightMaster(i);`
`483`		`- if (w)`
`484`		`- w->read(file);`
`485`		`- else`
`486`		`- run_context->getWeight(i).read(file);`
	`477`	`+ run_context->getWeight(i).read(file);`
`487`	`478`	`}`
`488`	`479`	`}`
`489`	`480`	`}`
`@@ -498,13 +489,9 @@ void LayerNode::save(std::ofstream &file, bool opt_var) const {`
`498`	`489`	`if (run_context->isGradientLastAccess(i) && getTrainable()) {`
`499`	`490`	`// @note save optimizer variables`
`500`	`491`	`if (run_context->weightHasGradient(i)) {`
`501`		`- auto num_w_opt_m = run_context->getNumWeightOptMasterVar(i);`
`502`	`492`	`for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i);`
`503`	`493`	`++j) {`
`504`		`- if (num_w_opt_m > 0)`
`505`		`- run_context->getWeightOptMasterVar(i, j).save(file);`
`506`		`- else`
`507`		`- run_context->getWeightOptVar(i, j).save(file);`
	`494`	`+ run_context->getWeightOptVar(i, j).save(file);`
`508`	`495`	`}`
`509`	`496`	`}`
`510`	`497`	`}`
`@@ -513,13 +500,7 @@ void LayerNode::save(std::ofstream &file, bool opt_var) const {`
`513`	`500`	`// @note shared weights are only be saved at the first access`
`514`	`501`	`for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) {`
`515`	`502`	`if (run_context->isGradientLastAccess(i)) {`
`516`		`- if (run_context->getNumWeights()) {`
`517`		`- auto w = run_context->getWeightMaster(i);`
`518`		`- if (w)`
`519`		`- w->save(file);`
`520`		`- else`
`521`		`- run_context->getWeight(i).save(file);`
`522`		`- }`
	`503`	`+ run_context->getWeight(i).save(file);`
`523`	`504`	`}`
`524`	`505`	`}`
`525`	`506`	`}`