diff --git a/nntrainer/layers/layer_context.cpp b/nntrainer/layers/layer_context.cpp index 648eae30ee..5862e6af14 100644 --- a/nntrainer/layers/layer_context.cpp +++ b/nntrainer/layers/layer_context.cpp @@ -157,16 +157,6 @@ Tensor &RunLayerContext::getWeight(unsigned int idx) const { return weights[idx]->getVariableRef(); } -/** - * @brief Get the Weight tensor object - * - * @param idx Identifier of the weight - * @return Tensor& Reference to the weight tensor - */ -Tensor *RunLayerContext::getWeightMaster(unsigned int idx) const { - return weights[idx]->getVariableMasterRef(); -} - /** * @brief Get the Weight Gradient tensor object * @@ -205,18 +195,6 @@ Tensor &RunLayerContext::getWeightOptVar(unsigned int idx, return weights[idx]->getOptimizerVariableRef(jdx); } -/** - * @brief Get the Weight Optimizer Variable tensor object - * - * @param idx Identifier of the weight - * @param jdx Identifier of the optimizer variables - * @return Tensor& Reference to the weight optimizer variable tensor - */ -Tensor &RunLayerContext::getWeightOptMasterVar(unsigned int idx, - unsigned int jdx) const { - return weights[idx]->getOptimizerMasterVariableRef(jdx); -} - /** * @brief Get the Number of Weight Optimizer Variable tensor object * @@ -227,16 +205,6 @@ unsigned int RunLayerContext::getNumWeightOptVar(unsigned int idx) const { return weights[idx]->getNumOptVariable(); } -/** - * @brief Get the Number of Weight Optimizer Variable tensor object - * - * @param idx Identifier of the weight - * @return int Number of the weight optimizer variable - */ -unsigned int RunLayerContext::getNumWeightOptMasterVar(unsigned int idx) const { - return weights[idx]->getNumOptMasterVariable(); -} - /** * @brief Get regularization loss for the weight * @@ -376,25 +344,6 @@ Tensor &RunLayerContext::getOutgoingDerivative(unsigned int idx) { return getInputGrad(idx); } -bool RunLayerContext::validateDerivatives() { - auto num_in = getNumInputs(); - auto num_out = getNumOutputs(); - - for (unsigned int i = 0; i < num_in; ++i) { - auto deriv = getIncomingDerivative(i); - if (deriv.checkDataValidation(false) == false) - return false; - } - - for (unsigned int i = 0; i < num_out; ++i) { - auto deriv = getOutgoingDerivative(i); - if (deriv.checkDataValidation(false) == false) - return false; - } - - return true; -} - /** * @brief Get the Tensor object * diff --git a/nntrainer/layers/layer_context.h b/nntrainer/layers/layer_context.h index 3e2e3d0339..c68c42f11d 100644 --- a/nntrainer/layers/layer_context.h +++ b/nntrainer/layers/layer_context.h @@ -474,14 +474,6 @@ class RunLayerContext { */ Tensor &getWeight(unsigned int idx) const; - /** - * @brief Get the Weight master tensor object - * - * @param idx Identifier of the weight - * @return Tensor& Reference to the weight tensor - */ - Tensor *getWeightMaster(unsigned int idx) const; - /** * @brief Get the Weight Gradient tensor object * @@ -509,15 +501,6 @@ class RunLayerContext { */ Tensor &getWeightOptVar(unsigned int idx, unsigned int jdx) const; - /** - * @brief Get the Weight Optimizer Master Variable tensor object - * - * @param idx Identifier of the weight - * @param jdx Identifier of the weight optimizer master variable - * @return Tensor& Reference to the weight optimizer tensor - */ - Tensor &getWeightOptMasterVar(unsigned int idx, unsigned int jdx) const; - /** * @brief Get the Weight name * @@ -628,11 +611,6 @@ class RunLayerContext { */ Tensor &getOutgoingDerivative(unsigned int idx); - /** - * @brief validate input/output derivatives of the layer - */ - bool validateDerivatives(); - /** * @brief Get the Tensor object * @@ -762,14 +740,6 @@ class RunLayerContext { */ unsigned int getNumWeightOptVar(unsigned int idx) const; - /** - * @brief Get the Number of Weight Optimizer Variable tensor object - * - * @param idx Identifier of the weight - * @return unsigned int Number of the weight optimizer variable - */ - unsigned int getNumWeightOptMasterVar(unsigned int idx) const; - /** * @brief Get the number of requested tensors objects * @@ -777,14 +747,6 @@ class RunLayerContext { */ unsigned int getNumTensors() const { return tensors.size(); } - /** - * @brief Set the Weight Optimizer Variable tensor object - * - * @param idx Identifier of the weight - * @param jdx Identifier of the weight optimizer variable - */ - void setWeightOptVars(unsigned int idx, std::vector opts); - /** * @brief Set the batch for the run context * diff --git a/nntrainer/layers/layer_node.cpp b/nntrainer/layers/layer_node.cpp index 6eb4b279de..f41752a4d8 100644 --- a/nntrainer/layers/layer_node.cpp +++ b/nntrainer/layers/layer_node.cpp @@ -16,7 +16,6 @@ #include #include #include -#include #include #include @@ -466,12 +465,8 @@ void LayerNode::read(std::ifstream &file, bool opt_var) { for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) { if (run_context->isGradientLastAccess(i) && getTrainable()) { /// @note read optimizer variables - auto num_w_opt_m = run_context->getNumWeightOptMasterVar(i); for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i); ++j) { - if (num_w_opt_m > 0) - run_context->getWeightOptMasterVar(i, j).read(file); - else - run_context->getWeightOptVar(i, j).read(file); + run_context->getWeightOptVar(i, j).read(file); } } } @@ -479,11 +474,7 @@ void LayerNode::read(std::ifstream &file, bool opt_var) { for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) { /// @note shared weights are only be read at the first acecss if (run_context->isGradientLastAccess(i)) { - auto w = run_context->getWeightMaster(i); - if (w) - w->read(file); - else - run_context->getWeight(i).read(file); + run_context->getWeight(i).read(file); } } } @@ -498,13 +489,9 @@ void LayerNode::save(std::ofstream &file, bool opt_var) const { if (run_context->isGradientLastAccess(i) && getTrainable()) { // @note save optimizer variables if (run_context->weightHasGradient(i)) { - auto num_w_opt_m = run_context->getNumWeightOptMasterVar(i); for (unsigned int j = 0; j < run_context->getNumWeightOptVar(i); ++j) { - if (num_w_opt_m > 0) - run_context->getWeightOptMasterVar(i, j).save(file); - else - run_context->getWeightOptVar(i, j).save(file); + run_context->getWeightOptVar(i, j).save(file); } } } @@ -513,13 +500,7 @@ void LayerNode::save(std::ofstream &file, bool opt_var) const { // @note shared weights are only be saved at the first access for (unsigned int i = 0; i < run_context->getNumWeights(); ++i) { if (run_context->isGradientLastAccess(i)) { - if (run_context->getNumWeights()) { - auto w = run_context->getWeightMaster(i); - if (w) - w->save(file); - else - run_context->getWeight(i).save(file); - } + run_context->getWeight(i).save(file); } } } diff --git a/nntrainer/layers/layer_node.h b/nntrainer/layers/layer_node.h index 6be31f536e..3fd2d55b97 100644 --- a/nntrainer/layers/layer_node.h +++ b/nntrainer/layers/layer_node.h @@ -900,11 +900,6 @@ class LayerNode final : public ml::train::Layer, public GraphNode { */ bool needsCalcGradient() { return needs_calc_gradient; } - /** - * @brief Set loss scale factor - */ - void setLossScale(float scale) { layer->setLossScale(scale); } - private: /** * @brief Get the Input Layers object diff --git a/nntrainer/layers/loss/cross_entropy_sigmoid_loss_layer.cpp b/nntrainer/layers/loss/cross_entropy_sigmoid_loss_layer.cpp index feeff2b3d8..60ea113418 100644 --- a/nntrainer/layers/loss/cross_entropy_sigmoid_loss_layer.cpp +++ b/nntrainer/layers/loss/cross_entropy_sigmoid_loss_layer.cpp @@ -61,9 +61,6 @@ void CrossEntropySigmoidLossLayer::calcDerivative(RunLayerContext &context) { Tensor &y = context.getInput(SINGLE_INOUT_IDX); y.apply(ActiFunc::sigmoid, ret_derivative); - - applyLossScale(ret_derivative); - ret_derivative.subtract_i(y2); if (ret_derivative.divide_i(ret_derivative.size()) != ML_ERROR_NONE) { throw std::runtime_error("[CrossEntropySigmoidLossLayer::calcDerivative] " diff --git a/nntrainer/layers/loss/cross_entropy_softmax_loss_layer.cpp b/nntrainer/layers/loss/cross_entropy_softmax_loss_layer.cpp index c181c60b9a..53854662ae 100644 --- a/nntrainer/layers/loss/cross_entropy_softmax_loss_layer.cpp +++ b/nntrainer/layers/loss/cross_entropy_softmax_loss_layer.cpp @@ -30,14 +30,9 @@ void CrossEntropySoftmaxLossLayer::forwarding(RunLayerContext &context, Tensor &y = context.getInput(SINGLE_INOUT_IDX); // fill the output - auto out_type = hidden_.getDataType(); - if (out_type == ml::train::TensorDim::DataType::FP32) { - if (y.getDataType() != out_type) { - Tensor y_ = y.clone(out_type); - hidden_ = y_.apply(ActiFunc::softmax, hidden_); - } else { - hidden_ = y.apply(ActiFunc::softmax, hidden_); - } + auto dataType = y.getDataType(); + if (dataType == ml::train::TensorDim::DataType::FP32) { + hidden_ = y.apply(ActiFunc::softmax, hidden_); if (context.isLabelAvailable(SINGLE_INOUT_IDX)) { Tensor &y2 = context.getLabel(SINGLE_INOUT_IDX); @@ -48,14 +43,9 @@ void CrossEntropySoftmaxLossLayer::forwarding(RunLayerContext &context, // update the loss value LossLayer::updateLoss(context, l); } - } else if (out_type == ml::train::TensorDim::DataType::FP16) { + } else if (dataType == ml::train::TensorDim::DataType::FP16) { #ifdef ENABLE_FP16 - if (y.getDataType() != out_type) { - Tensor y_ = y.clone(out_type); - hidden_ = y_.apply(ActiFunc::softmax<_FP16>, hidden_); - } else { - hidden_ = y.apply(ActiFunc::softmax<_FP16>, hidden_); - } + hidden_ = y.apply(ActiFunc::softmax<_FP16>, hidden_); if (context.isLabelAvailable(SINGLE_INOUT_IDX)) { Tensor &y2 = context.getLabel(SINGLE_INOUT_IDX); @@ -78,8 +68,7 @@ void CrossEntropySoftmaxLossLayer::calcDerivative(RunLayerContext &context) { Tensor &y = context.getInput(SINGLE_INOUT_IDX); auto dataType = y.getDataType(); - - Tensor ret(y.getDim()); + Tensor ret = Tensor("ret", y.getFormat(), y.getDataType()); if (dataType == ml::train::TensorDim::DataType::FP32) { y.apply(ActiFunc::softmax, ret); } else if (dataType == ml::train::TensorDim::DataType::FP16) { @@ -94,18 +83,7 @@ void CrossEntropySoftmaxLossLayer::calcDerivative(RunLayerContext &context) { /// operation // TODO: verify y and ret_derivative must not be same as loss layer is not // working in-place - if (ret.getDataType() != y2.getDataType()) { - ret.subtract(y2.clone(ret.getDataType()), ret_derivative); - } else { - ret.subtract(y2, ret_derivative); - } - - /** - * loss scale is applied for mixed precision - * every loss layers need to specify this applying code. - */ - applyLossScale(ret_derivative); - + ret.subtract(y2, ret_derivative); if (ret_derivative.divide_i(ret.batch()) != ML_ERROR_NONE) { throw std::runtime_error("[CrossEntropySoftmaxLossLayer::calcDerivative] " "Error when calculating loss"); diff --git a/nntrainer/layers/loss/loss_layer.cpp b/nntrainer/layers/loss/loss_layer.cpp index 422037b9e9..ab2ccf8be2 100644 --- a/nntrainer/layers/loss/loss_layer.cpp +++ b/nntrainer/layers/loss/loss_layer.cpp @@ -15,9 +15,6 @@ #include namespace nntrainer { - -LossLayer::LossLayer() : Layer(), loss_scale(0.0f) {} - void LossLayer::finalize(InitLayerContext &context) { std::vector input_dim = context.getInputDimensions(); std::vector output_dim = input_dim; diff --git a/nntrainer/layers/loss/loss_layer.h b/nntrainer/layers/loss/loss_layer.h index 84a1112864..581e9477a8 100644 --- a/nntrainer/layers/loss/loss_layer.h +++ b/nntrainer/layers/loss/loss_layer.h @@ -27,11 +27,6 @@ namespace nntrainer { */ class LossLayer : public Layer { public: - /** - * @brief Constructor of Loss Layer - */ - LossLayer(); - /** * @brief Destructor of Loss Layer */ @@ -52,19 +47,11 @@ class LossLayer : public Layer { */ virtual bool supportBackwarding() const override { return true; } - /** - * @brief Set loss scale factor - */ - virtual void setLossScale(float scale) override { loss_scale = scale; } - -private: /** * @copydoc Layer::requireLabel() */ bool requireLabel() const override { return true; } - float loss_scale; /**< loss scale factor */ - protected: /** * @brief update loss diff --git a/nntrainer/layers/loss/meson.build b/nntrainer/layers/loss/meson.build index 8ec9928101..9fccd0290d 100644 --- a/nntrainer/layers/loss/meson.build +++ b/nntrainer/layers/loss/meson.build @@ -7,9 +7,7 @@ loss_layer_sources = [ 'constant_derivative_loss_layer.cpp' ] -loss_layer_headers = [ - 'loss_layer.h' -] +loss_layer_headers = [] loss_layer_deps = [] diff --git a/nntrainer/layers/loss/mse_loss_layer.cpp b/nntrainer/layers/loss/mse_loss_layer.cpp index b6cf141a69..4edcf83281 100644 --- a/nntrainer/layers/loss/mse_loss_layer.cpp +++ b/nntrainer/layers/loss/mse_loss_layer.cpp @@ -10,13 +10,9 @@ * @bug No known bugs except for NYI items * */ - -#include "tensor.h" #include #include - namespace nntrainer { - static constexpr size_t SINGLE_INOUT_IDX = 0; void MSELossLayer::forwarding(RunLayerContext &context, bool training) { @@ -33,41 +29,19 @@ void MSELossLayer::forwarding(RunLayerContext &context, bool training) { .clone(ml::train::TensorDim::DataType::FP32); // hidden_ <- y2 - y; - auto out_type = hidden_.getDataType(); - if (out_type != y_.getDataType()) { - Tensor y = y_.clone(out_type); - if (context.isLabelAvailable(SINGLE_INOUT_IDX)) { - Tensor &y2 = context.getLabel(SINGLE_INOUT_IDX); - y2.subtract(y, hidden_); - - /** calculate sum of squares normalized by size */ - float l2norm = hidden_.l2norm(); - l2norm *= l2norm / hidden_.size(); - - /** wrap in tensor for update loss */ - Tensor l = Tensor(TensorDim(1, 1, 1, 1), &l2norm); - LossLayer::updateLoss(context, l); - } - // fill the output - hidden_.fill(y); - } else { - if (context.isLabelAvailable(SINGLE_INOUT_IDX)) { - Tensor &y2 = context.getLabel(SINGLE_INOUT_IDX); - y2.subtract(y_, hidden_); - - /** calculate sum of squares normalized by size */ - float l2norm = hidden_.l2norm(); - l2norm *= l2norm / hidden_.size(); - - /** wrap in tensor for update loss */ - Tensor l = Tensor(TensorDim(1, 1, 1, 1), &l2norm); - LossLayer::updateLoss(context, l); - } - // fill the output - hidden_.fill(y_); + if (context.isLabelAvailable(SINGLE_INOUT_IDX)) { + Tensor &y2 = context.getLabel(SINGLE_INOUT_IDX); + y2.subtract(y, hidden_); + /** calculate sum of squares normalized by size */ + float l2norm = hidden_.l2norm(); + l2norm *= l2norm / hidden_.size(); + /** wrap in tensor for update loss */ + Tensor l = Tensor(TensorDim(1, 1, 1, 1), &l2norm); + LossLayer::updateLoss(context, l); } + // fill the output + hidden_.fill(y); } - void MSELossLayer::calcDerivative(RunLayerContext &context) { Tensor empty_tensor; Tensor &ret_derivative = context.getOutgoingDerivative(SINGLE_INOUT_IDX); @@ -81,39 +55,13 @@ void MSELossLayer::calcDerivative(RunLayerContext &context) { y2 = y2_.clone(ret_derivative.getDataType()); Tensor &y = context.getInput(SINGLE_INOUT_IDX); - - const auto &in_type = y.getDataType(); - if (in_type != y2.getDataType()) { - Tensor y2_ = y2.clone(in_type); - y.subtract(y2_, ret_derivative); - } else { - y.subtract(y2, ret_derivative); - } - - applyLossScale(ret_derivative); - + y.subtract(y2, ret_derivative); float divider = ((float)y.size()) / 2; - - /** - * ret_derivative may be eliminated by big divider with fp16 calculation. - * So, it calcuated with larger precision. - */ - int ret; - if (ret_derivative.getDataType() != ml::train::TensorDim::DataType::FP32) { - Tensor ret_derivative_ = - ret_derivative.clone(ml::train::TensorDim::DataType::FP32); - ret = ret_derivative_.divide_i(divider); - ret_derivative.copyData(ret_derivative_); - } else { - ret = ret_derivative.divide_i(divider); - } - - if (ret != ML_ERROR_NONE) { + if (ret_derivative.divide_i(divider) != ML_ERROR_NONE) { throw std::runtime_error( "[MSELossLayer::calcDerivative] Error when calculating loss"); } LossLayer::applyLossScale(context, ret_derivative); } - } // namespace nntrainer diff --git a/nntrainer/optimizers/adam.cpp b/nntrainer/optimizers/adam.cpp index e2d611b7fd..530e7fdf31 100644 --- a/nntrainer/optimizers/adam.cpp +++ b/nntrainer/optimizers/adam.cpp @@ -95,60 +95,26 @@ void Adam::applyGradient(RunOptimizerContext &context) { Tensor &wm = context.getOptimizerVariable(AdamParams::wm); Tensor &wv = context.getOptimizerVariable(AdamParams::wv); - if (context.getNumOptMasterVariable() != 0) { - Tensor &wm_m = context.getOptimizerMasterVariable(AdamParams::wm); - Tensor &wv_m = context.getOptimizerMasterVariable(AdamParams::wv); - Tensor x_grad_ = x_grad.clone(wm_m.getDataType()); + wm.multiply_i(beta1); + wm.add_i(x_grad, 1.0f - beta1); - wm_m.multiply_i(beta1); - wm_m.add_i(x_grad_, 1.0f - beta1); - - wv_m.multiply_i(beta2); - wv_m.add_i(x_grad_.multiply(x_grad_), 1.0f - beta2); - - wm.copyData(wm_m); - wv.copyData(wv_m); - } else { - wm.multiply_i(beta1); - wm.add_i(x_grad, 1.0f - beta1); - - wv.multiply_i(beta2); - wv.add_i(x_grad.multiply(x_grad), 1.0f - beta2); - } + wv.multiply_i(beta2); + wv.add_i(x_grad.multiply(x_grad), 1.0f - beta2); if (torch_ref) { - if (x_grad.getDataType() == ml::train::TensorDim::DataType::FP32) { - Tensor denom = wv.apply(sqrtFloat); - denom.divide_i(sqrtFloat(biasCorrection2)); - denom.add_i(epsilon); - wm.divide(denom, x_grad); -#ifdef ENABLE_FP16 - } else if (x_grad.getDataType() == ml::train::TensorDim::DataType::FP16) { - Tensor denom = wv.apply<_FP16>(sqrtFloat<_FP16>); - denom.divide_i(sqrtFloat(biasCorrection2)); - denom.add_i(epsilon); - wm.divide(denom, x_grad); -#endif - } else { - throw std::runtime_error("Not supported datatype"); - } + Tensor denom = wv.apply(sqrtFloat); + denom.divide_i(sqrtFloat(biasCorrection2)); + denom.add_i(epsilon); + wm.divide(denom, x_grad); context.applyGradient(context.getLearningRate() / biasCorrection1, x_grad); } else { - auto sqrtEps = [epsilon](T f) -> T { - return 1 / (static_cast(sqrtDouble(f)) + static_cast(epsilon)); + std::function sqrtEps = [epsilon](double f) { + return 1 / (sqrtDouble(f) + epsilon); }; - if (x_grad.getDataType() == ml::train::TensorDim::DataType::FP32) - x_grad = wv.apply(sqrtEps, x_grad); -#ifdef ENABLE_FP16 - else if (x_grad.getDataType() == ml::train::TensorDim::DataType::FP16) - x_grad = wv.apply<_FP16>(sqrtEps, x_grad); -#endif - else - throw std::runtime_error("Not supported datatype"); - + x_grad = wv.apply(sqrtEps, x_grad); x_grad.multiply_i(wm); context.applyGradient( getUpdatedLearningRate(context.getIteration(), context.getLearningRate()), diff --git a/nntrainer/optimizers/optimizer_context.cpp b/nntrainer/optimizers/optimizer_context.cpp index f0817c2a75..f70ab773a9 100644 --- a/nntrainer/optimizers/optimizer_context.cpp +++ b/nntrainer/optimizers/optimizer_context.cpp @@ -36,21 +36,6 @@ Tensor &RunOptimizerContext::getOptimizerVariable(unsigned int idx) const { return weight->getOptimizerVariableRef(idx); } -/** - * @brief Get the optimizer variable associated to this weight - */ -Tensor & -RunOptimizerContext::getOptimizerMasterVariable(unsigned int idx) const { - return weight->getOptimizerMasterVariableRef(idx); -} - -/** - * @brief Get number of optimizer master variable - */ -int RunOptimizerContext::getNumOptMasterVariable() { - return weight->getNumOptMasterVariable(); -} - /** * @brief Apply the gradient with the given learning rate */ diff --git a/nntrainer/optimizers/optimizer_context.h b/nntrainer/optimizers/optimizer_context.h index ca30c36b94..6b4b983e35 100644 --- a/nntrainer/optimizers/optimizer_context.h +++ b/nntrainer/optimizers/optimizer_context.h @@ -59,21 +59,6 @@ class RunOptimizerContext { */ Tensor &getOptimizerVariable(unsigned int idx) const; - /** - * @brief Get the optimizer Master variable associated to this weight - * - * @param idx Identifier of the associated weight - * @return Tensor& Reference to the optimizer variable - */ - Tensor &getOptimizerMasterVariable(unsigned int idx) const; - - /** - * @brief Get number of the optimizer Master variable - * - * @return number of optimizer master variable - */ - int getNumOptMasterVariable(); - /** * @brief Check if run context is set and is ready to use *