From ce33c3153a38a53a4cca22ab7a6be7e6365f1977 Mon Sep 17 00:00:00 2001 From: Jiho Chu Date: Fri, 2 Feb 2024 15:27:11 +0900 Subject: [PATCH] [Layer] Modify Layer for mixed type fc, conv2d, softmax, pooling layers are modified for mxied type. It supports In/Out tensors as 32/16 float type, also for weights. Signed-off-by: Jiho Chu --- nntrainer/layers/common_properties.h | 4 +- nntrainer/layers/conv2d_layer.cpp | 65 +++++++++++++++++-- nntrainer/layers/fc_layer.cpp | 49 ++++++++++++-- .../loss/cross_entropy_softmax_loss_layer.cpp | 5 +- nntrainer/layers/pooling2d_layer.cpp | 43 ++++++++++-- 5 files changed, 143 insertions(+), 23 deletions(-) diff --git a/nntrainer/layers/common_properties.h b/nntrainer/layers/common_properties.h index cac69b960f..f3b5702a5d 100644 --- a/nntrainer/layers/common_properties.h +++ b/nntrainer/layers/common_properties.h @@ -1398,10 +1398,12 @@ class PropsUserData final : public Property { /** * @brief LossScale property, loss is scaled by this value * + * @note It is uesed to enable/disable loss scale, + * and '0.0f' means disable. */ class LossScale : public nntrainer::Property { public: - LossScale(float value = 1.0f); + LossScale(float value = 0.0f); static constexpr const char *key = "loss_scale"; /**< unique key to access */ using prop_tag = float_prop_tag; /**< property type */ }; diff --git a/nntrainer/layers/conv2d_layer.cpp b/nntrainer/layers/conv2d_layer.cpp index ff44afeaf6..f98df2276d 100644 --- a/nntrainer/layers/conv2d_layer.cpp +++ b/nntrainer/layers/conv2d_layer.cpp @@ -118,10 +118,16 @@ static void col2im(const Tensor &col_matrix, const TensorDim &kdim, if (image.getDataType() == nntrainer::Tdatatype::FP32) { float val; apply_data(&val); - } else if (image.getDataType() == nntrainer::Tdatatype::FP16) { + } +#ifdef ENABLE_FP16 + else if (image.getDataType() == nntrainer::Tdatatype::FP16) { _FP16 val; apply_data(&val); } +#endif + else { + throw std::runtime_error("Not supported datatype"); + } } /** @@ -256,10 +262,16 @@ static void im2col(const Tensor &in, const TensorDim &kdim, if (out.getDataType() == nntrainer::Tdatatype::FP32) { float *out_data = out.getData(); apply_data(out_data); - } else if (out.getDataType() == nntrainer::Tdatatype::FP16) { + } +#ifdef ENABLE_FP16 + else if (out.getDataType() == nntrainer::Tdatatype::FP16) { _FP16 *out_data = out.getData<_FP16>(); apply_data(out_data); } +#endif + else { + throw std::runtime_error("Not supported datatype"); + } } } // namespace @@ -358,7 +370,14 @@ void Conv2DLayer::forwarding(RunLayerContext &context, bool training) { Tensor &input_ = context.getInput(SINGLE_INOUT_IDX); Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX); - Tensor &filter_kernel = context.getWeight(wt_idx[ConvParams::weight]); + Tensor &filter_kernel_ = context.getWeight(wt_idx[ConvParams::weight]); + + Tensor filter_kernel; + if (filter_kernel_.getDataType() != input_.getDataType()) { + filter_kernel = filter_kernel_.clone(input_.getDataType()); + } else { + filter_kernel = filter_kernel_; + } /** Calculate Convolution 2D * @@ -434,12 +453,22 @@ void Conv2DLayer::forwarding(RunLayerContext &context, bool training) { filter_kernel.reshape(filter_dim); if (auto &disable_bias = std::get(*layer_impl_props); disable_bias.empty() || disable_bias.get() == false) { - Tensor &bias_kernel = context.getWeight(wt_idx[ConvParams::bias]); + Tensor &bias_kernel_ = context.getWeight(wt_idx[ConvParams::bias]); + Tensor bias_kernel; + if (bias_kernel_.getDataType() != hidden_.getDataType()) { + bias_kernel = bias_kernel_.clone(hidden_.getDataType()); + } else { + bias_kernel = bias_kernel_; + } + status = hidden_.add_i(bias_kernel); if (status != ML_ERROR_NONE) { throw std::invalid_argument("[Conv2D] adding bias failed"); } } + // export_to_file(input_, std::string("debug/var/") + input_.getName() + + // ".var"); export_to_file(hidden_, std::string("debug/var/") + + // hidden_.getName() + ".var"); } void Conv2DLayer::calcDerivative(RunLayerContext &context) { @@ -450,7 +479,14 @@ void Conv2DLayer::calcDerivative(RunLayerContext &context) { const Tensor &derivative = context.getIncomingDerivative(SINGLE_INOUT_IDX); Tensor &input_derivative = context.getOutgoingDerivative(SINGLE_INOUT_IDX); - Tensor &filter_kernel = context.getWeight(wt_idx[ConvParams::weight]); + Tensor &filter_kernel_ = context.getWeight(wt_idx[ConvParams::weight]); + + Tensor filter_kernel; + if (filter_kernel_.getDataType() != input_derivative.getDataType()) { + filter_kernel = filter_kernel_.clone(input_derivative.getDataType()); + } else { + filter_kernel = filter_kernel_; + } TensorDim filter_dim = filter_kernel.getDim(); TensorDim filter_dim_squeezed{filter_kernel.batch(), @@ -498,7 +534,14 @@ void Conv2DLayer::calcGradient(RunLayerContext &context) { const Tensor &derivative = context.getIncomingDerivative(SINGLE_INOUT_IDX); Tensor &input_ = context.getInput(SINGLE_INOUT_IDX); - Tensor &delK = context.getWeightGrad(wt_idx[ConvParams::weight]); + Tensor &delK_ = context.getWeightGrad(wt_idx[ConvParams::weight]); + Tensor delK; + if (delK.getDataType() != input_.getDataType()) { + delK = delK_.clone(input_.getDataType()); + } else { + delK = delK_; + } + delK.setZero(); TensorDim filter_dim = delK.getDim(); @@ -582,9 +625,17 @@ void Conv2DLayer::calcGradient(RunLayerContext &context) { delK.reshape(filter_dim); if (auto &disable_bias = std::get(*layer_impl_props); disable_bias.empty() || disable_bias.get() == false) { - Tensor &delBias = context.getWeightGrad(wt_idx[ConvParams::bias]); + Tensor &delBias_ = context.getWeightGrad(wt_idx[ConvParams::bias]); + Tensor delBias; + if (delBias_.getDataType() != derivative.getDataType()) { + delBias = delBias_.clone(derivative.getDataType()); + } else { + delBias = delBias_; + } derivative.sum({0, 2, 3}, delBias); + delBias_.copyData(delBias); } + delK_.copyData(delK); } void Conv2DLayer::exportTo(Exporter &exporter, diff --git a/nntrainer/layers/fc_layer.cpp b/nntrainer/layers/fc_layer.cpp index 93610e1fcc..25ff651ca8 100644 --- a/nntrainer/layers/fc_layer.cpp +++ b/nntrainer/layers/fc_layer.cpp @@ -39,8 +39,7 @@ static constexpr size_t SINGLE_INOUT_IDX = 0; enum FCParams { weight, bias }; FullyConnectedLayer::FullyConnectedLayer() : - LayerImpl(), - fc_props(props::Unit()) { + LayerImpl(), fc_props(props::Unit()) { weight_idx.fill(std::numeric_limits::max()); } @@ -116,10 +115,17 @@ void FullyConnectedLayer::setProperty(const std::vector &values) { } void FullyConnectedLayer::forwarding(RunLayerContext &context, bool training) { - Tensor &weight = context.getWeight(weight_idx[FCParams::weight]); + Tensor &weight_ = context.getWeight(weight_idx[FCParams::weight]); Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX); Tensor &input_ = context.getInput(SINGLE_INOUT_IDX); + Tensor weight; + if (weight.getDataType() != input_.getDataType()) { + weight = weight_.clone(input_.getDataType()); + } else { + weight = weight_; + } + if (weight.getDataType() == nntrainer::Tdatatype::QINT4 || weight.getDataType() == nntrainer::Tdatatype::QINT8) { Tdatatype dtype = input_.getDataType(); @@ -141,7 +147,13 @@ void FullyConnectedLayer::forwarding(RunLayerContext &context, bool training) { if (auto &disable_bias = std::get(*layer_impl_props); disable_bias.empty() || disable_bias.get() == false) { Tensor &bias = context.getWeight(weight_idx[FCParams::bias]); - hidden_.add_i(bias); + Tensor b; + if (bias.getDataType() != input_.getDataType()) { + b = bias.clone(input_.getDataType()); + } else { + b = bias; + } + hidden_.add_i(b); } } @@ -187,23 +199,44 @@ void FullyConnectedLayer::incremental_forwarding(RunLayerContext &context, } void FullyConnectedLayer::calcDerivative(RunLayerContext &context) { - Tensor &weight = context.getWeight(weight_idx[FCParams::weight]); + Tensor &weight_ = context.getWeight(weight_idx[FCParams::weight]); const Tensor &derivative_ = context.getIncomingDerivative(SINGLE_INOUT_IDX); Tensor &ret_ = context.getOutgoingDerivative(SINGLE_INOUT_IDX); + Tensor weight; + if (weight_.getDataType() != derivative_.getDataType()) { + weight = weight_.clone(derivative_.getDataType()); + } else { + weight = weight_; + } + ret_.dot_deriv_wrt_1(weight, derivative_, false, false); } void FullyConnectedLayer::calcGradient(RunLayerContext &context) { - Tensor &djdw = context.getWeightGrad(weight_idx[FCParams::weight]); + Tensor &djdw_ = context.getWeightGrad(weight_idx[FCParams::weight]); const Tensor &derivative_ = context.getIncomingDerivative(SINGLE_INOUT_IDX); Tensor &input_ = context.getInput(SINGLE_INOUT_IDX); + Tensor djdw; + if (djdw_.getDataType() != derivative_.getDataType()) { + djdw = djdw_.clone(derivative_.getDataType()); + } else { + djdw = djdw_; + } + if (auto &disable_bias = std::get(*layer_impl_props); disable_bias.empty() || disable_bias.get() == false) { - Tensor &djdb = context.getWeightGrad(weight_idx[FCParams::bias]); + Tensor &djdb_ = context.getWeightGrad(weight_idx[FCParams::bias]); + + Tensor djdb; + if (djdb_.getDataType() != input_.getDataType()) { + djdb = djdb_.clone(input_.getDataType()); + } else { + djdb = djdb_; + } if (context.isGradientFirstAccess(weight_idx[FCParams::bias])) { derivative_.sum({0, 1, 2}, djdb); @@ -212,11 +245,13 @@ void FullyConnectedLayer::calcGradient(RunLayerContext &context) { Tensor t = derivative_.sum({0, 1, 2}); djdb.add_i(t); } + djdb_.copyData(djdb); } input_.dot_deriv_wrt_2( djdw, derivative_, false, false, !context.isGradientFirstAccess(weight_idx[FCParams::weight])); + djdw_.copyData(djdw); } } /* namespace nntrainer */ diff --git a/nntrainer/layers/loss/cross_entropy_softmax_loss_layer.cpp b/nntrainer/layers/loss/cross_entropy_softmax_loss_layer.cpp index 11d4567709..df3c256531 100644 --- a/nntrainer/layers/loss/cross_entropy_softmax_loss_layer.cpp +++ b/nntrainer/layers/loss/cross_entropy_softmax_loss_layer.cpp @@ -29,6 +29,8 @@ void CrossEntropySoftmaxLossLayer::forwarding(RunLayerContext &context, Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX); Tensor &y = context.getInput(SINGLE_INOUT_IDX); + // export_to_file(y, std::string("debug/var/") + y.getName() + ".var"); + // fill the output auto out_type = hidden_.getDataType(); if (out_type == ml::train::TensorDim::DataType::FP32) { @@ -93,7 +95,8 @@ void CrossEntropySoftmaxLossLayer::calcDerivative(RunLayerContext &context) { "Error when calculating loss"); } - ret_derivative.multiply_i(loss_scale); + if (loss_scale != 0.0f) + ret_derivative.multiply_i(loss_scale); } } // namespace nntrainer diff --git a/nntrainer/layers/pooling2d_layer.cpp b/nntrainer/layers/pooling2d_layer.cpp index 036a933c42..764885196c 100644 --- a/nntrainer/layers/pooling2d_layer.cpp +++ b/nntrainer/layers/pooling2d_layer.cpp @@ -155,6 +155,10 @@ void Pooling2DLayer::forwarding(RunLayerContext &context, bool training) { } else { forwarding_job(0, in_dim.batch(), 0, nullptr); } + + // export_to_file(input_, std::string("debug/var/") + input_.getName() + + // ".var"); export_to_file(hidden_, std::string("debug/var/") + + // hidden_.getName() + ".var"); } void Pooling2DLayer::calcDerivative(RunLayerContext &context) { @@ -185,7 +189,7 @@ void Pooling2DLayer::calcDerivative(RunLayerContext &context) { unsigned int out_map_size = deriv.height() * deriv.width(); unsigned int in_map_size = height * width; - auto apply_max = [&](T *result_data) { + auto apply_max = [&](T * result_data) { const int *iter = pool_helper.getData(); const T *deriv_data = deriv.getData(); for (unsigned int b = 0; b < batch; ++b) { @@ -204,7 +208,7 @@ void Pooling2DLayer::calcDerivative(RunLayerContext &context) { } }; - auto apply_average = [&](T *result_data) { + auto apply_average = [&](T * result_data) { int height_stride_end = height - p_height + pt; int width_stride_end = width - p_width + pl; const int *iter = pool_helper.getData(); @@ -236,7 +240,7 @@ void Pooling2DLayer::calcDerivative(RunLayerContext &context) { } }; - auto apply_global_max = [&](T *result_data) { + auto apply_global_max = [&](T * result_data) { const T *deriv_data = deriv.getData(); for (unsigned int b = 0; b < batch; b++) { for (unsigned int c = 0; c < channel; c++) { @@ -258,21 +262,33 @@ void Pooling2DLayer::calcDerivative(RunLayerContext &context) { case props::PoolingTypeInfo::Enum::max: if (in_dim.getDataType() == ml::train::TensorDim::DataType::FP32) apply_max(result.getData()); +#ifdef ENABLE_FP16 else if (in_dim.getDataType() == ml::train::TensorDim::DataType::FP16) apply_max(result.getData<_FP16>()); +#endif + else + throw std::runtime_error("Not supported datatype"); break; case props::PoolingTypeInfo::Enum::global_average: case props::PoolingTypeInfo::Enum::average: if (in_dim.getDataType() == ml::train::TensorDim::DataType::FP32) apply_average(result.getData()); +#ifdef ENABLE_FP16 else if (in_dim.getDataType() == ml::train::TensorDim::DataType::FP16) apply_average(result.getData<_FP16>()); +#endif + else + throw std::runtime_error("Not supported datatype"); break; case props::PoolingTypeInfo::Enum::global_max: if (in_dim.getDataType() == ml::train::TensorDim::DataType::FP32) apply_global_max(result.getData()); +#ifdef ENABLE_FP16 else if (in_dim.getDataType() == ml::train::TensorDim::DataType::FP16) apply_global_max(result.getData<_FP16>()); +#endif + else + throw std::runtime_error("Not supported datatype"); break; default: throw std::runtime_error("Error: Unknown Pooling Type"); @@ -320,7 +336,9 @@ void Pooling2DLayer::pooling2d(Tensor &in, bool training, Tensor &output, * @return result value of pooling */ PoolFunc::Type pool_fn_fp32; +#ifdef ENABLE_FP16 PoolFunc<_FP16>::Type pool_fn_fp16; +#endif unsigned int max_idx_count = 0; @@ -355,9 +373,8 @@ void Pooling2DLayer::pooling2d(Tensor &in, bool training, Tensor &output, return max_val; }; - auto pool_fn_global_max = [&, this](const T *in_data, - int channel_idx, int start_h, - int start_w) { + auto pool_fn_global_max = [&, this ]( + const T *in_data, int channel_idx, int start_h, int start_w) { int end_h = start_h + patch_height; int end_w = start_w + patch_width; @@ -412,16 +429,22 @@ void Pooling2DLayer::pooling2d(Tensor &in, bool training, Tensor &output, switch (pooling_type) { case props::PoolingTypeInfo::Enum::max: pool_fn_fp32 = pool_fn_max; +#ifdef ENABLE_FP16 pool_fn_fp16 = pool_fn_max; +#endif break; case props::PoolingTypeInfo::Enum::global_max: pool_fn_fp32 = pool_fn_global_max; +#ifdef ENABLE_FP16 pool_fn_fp16 = pool_fn_global_max; +#endif break; case props::PoolingTypeInfo::Enum::global_average: case props::PoolingTypeInfo::Enum::average: pool_fn_fp32 = pool_fn_average; +#ifdef ENABLE_FP16 pool_fn_fp16 = pool_fn_average; +#endif break; case props::PoolingTypeInfo::Enum::unknown: default: @@ -447,7 +470,9 @@ void Pooling2DLayer::pooling2d(Tensor &in, bool training, Tensor &output, } } } - } else if (in.getDataType() == ml::train::TensorDim::DataType::FP16) { + } +#ifdef ENABLE_FP16 + else if (in.getDataType() == ml::train::TensorDim::DataType::FP16) { const _FP16 *in_data = in.getData<_FP16>(); _FP16 *out_data = output.getData<_FP16>(); @@ -466,6 +491,10 @@ void Pooling2DLayer::pooling2d(Tensor &in, bool training, Tensor &output, } } } +#endif + else { + throw std::runtime_error("Not supported datatype"); + } } void Pooling2DLayer::setBatch(RunLayerContext &context, unsigned int batch) {