Skip to content

Commit

Permalink
[Layer] Modify Layer for mixed type
Browse files Browse the repository at this point in the history
fc, conv2d, softmax, pooling layers are modified for mxied type.
It supports In/Out tensors as 32/16 float type, also for weights.

Signed-off-by: Jiho Chu <jiho.chu@samsung.com>
  • Loading branch information
jihochu committed Feb 2, 2024
1 parent 818cb94 commit ce33c31
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 23 deletions.
4 changes: 3 additions & 1 deletion nntrainer/layers/common_properties.h
Original file line number Diff line number Diff line change
Expand Up @@ -1398,10 +1398,12 @@ class PropsUserData final : public Property<void *> {
/**
* @brief LossScale property, loss is scaled by this value
*
* @note It is uesed to enable/disable loss scale,
* and '0.0f' means disable.
*/
class LossScale : public nntrainer::Property<float> {
public:
LossScale(float value = 1.0f);
LossScale(float value = 0.0f);
static constexpr const char *key = "loss_scale"; /**< unique key to access */
using prop_tag = float_prop_tag; /**< property type */
};
Expand Down
65 changes: 58 additions & 7 deletions nntrainer/layers/conv2d_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -118,10 +118,16 @@ static void col2im(const Tensor &col_matrix, const TensorDim &kdim,
if (image.getDataType() == nntrainer::Tdatatype::FP32) {
float val;
apply_data(&val);
} else if (image.getDataType() == nntrainer::Tdatatype::FP16) {
}
#ifdef ENABLE_FP16
else if (image.getDataType() == nntrainer::Tdatatype::FP16) {
_FP16 val;
apply_data(&val);
}
#endif
else {
throw std::runtime_error("Not supported datatype");
}
}

/**
Expand Down Expand Up @@ -256,10 +262,16 @@ static void im2col(const Tensor &in, const TensorDim &kdim,
if (out.getDataType() == nntrainer::Tdatatype::FP32) {
float *out_data = out.getData<float>();
apply_data(out_data);
} else if (out.getDataType() == nntrainer::Tdatatype::FP16) {
}
#ifdef ENABLE_FP16
else if (out.getDataType() == nntrainer::Tdatatype::FP16) {
_FP16 *out_data = out.getData<_FP16>();
apply_data(out_data);
}
#endif
else {
throw std::runtime_error("Not supported datatype");
}
}

} // namespace
Expand Down Expand Up @@ -358,7 +370,14 @@ void Conv2DLayer::forwarding(RunLayerContext &context, bool training) {
Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);

Tensor &filter_kernel = context.getWeight(wt_idx[ConvParams::weight]);
Tensor &filter_kernel_ = context.getWeight(wt_idx[ConvParams::weight]);

Tensor filter_kernel;
if (filter_kernel_.getDataType() != input_.getDataType()) {
filter_kernel = filter_kernel_.clone(input_.getDataType());
} else {
filter_kernel = filter_kernel_;
}

/** Calculate Convolution 2D
*
Expand Down Expand Up @@ -434,12 +453,22 @@ void Conv2DLayer::forwarding(RunLayerContext &context, bool training) {
filter_kernel.reshape(filter_dim);
if (auto &disable_bias = std::get<props::DisableBias>(*layer_impl_props);
disable_bias.empty() || disable_bias.get() == false) {
Tensor &bias_kernel = context.getWeight(wt_idx[ConvParams::bias]);
Tensor &bias_kernel_ = context.getWeight(wt_idx[ConvParams::bias]);
Tensor bias_kernel;
if (bias_kernel_.getDataType() != hidden_.getDataType()) {
bias_kernel = bias_kernel_.clone(hidden_.getDataType());
} else {
bias_kernel = bias_kernel_;
}

status = hidden_.add_i(bias_kernel);
if (status != ML_ERROR_NONE) {
throw std::invalid_argument("[Conv2D] adding bias failed");
}
}
// export_to_file(input_, std::string("debug/var/") + input_.getName() +
// ".var"); export_to_file(hidden_, std::string("debug/var/") +
// hidden_.getName() + ".var");
}

void Conv2DLayer::calcDerivative(RunLayerContext &context) {
Expand All @@ -450,7 +479,14 @@ void Conv2DLayer::calcDerivative(RunLayerContext &context) {

const Tensor &derivative = context.getIncomingDerivative(SINGLE_INOUT_IDX);
Tensor &input_derivative = context.getOutgoingDerivative(SINGLE_INOUT_IDX);
Tensor &filter_kernel = context.getWeight(wt_idx[ConvParams::weight]);
Tensor &filter_kernel_ = context.getWeight(wt_idx[ConvParams::weight]);

Tensor filter_kernel;
if (filter_kernel_.getDataType() != input_derivative.getDataType()) {
filter_kernel = filter_kernel_.clone(input_derivative.getDataType());
} else {
filter_kernel = filter_kernel_;
}

TensorDim filter_dim = filter_kernel.getDim();
TensorDim filter_dim_squeezed{filter_kernel.batch(),
Expand Down Expand Up @@ -498,7 +534,14 @@ void Conv2DLayer::calcGradient(RunLayerContext &context) {
const Tensor &derivative = context.getIncomingDerivative(SINGLE_INOUT_IDX);
Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);

Tensor &delK = context.getWeightGrad(wt_idx[ConvParams::weight]);
Tensor &delK_ = context.getWeightGrad(wt_idx[ConvParams::weight]);
Tensor delK;
if (delK.getDataType() != input_.getDataType()) {
delK = delK_.clone(input_.getDataType());
} else {
delK = delK_;
}

delK.setZero();

TensorDim filter_dim = delK.getDim();
Expand Down Expand Up @@ -582,9 +625,17 @@ void Conv2DLayer::calcGradient(RunLayerContext &context) {
delK.reshape(filter_dim);
if (auto &disable_bias = std::get<props::DisableBias>(*layer_impl_props);
disable_bias.empty() || disable_bias.get() == false) {
Tensor &delBias = context.getWeightGrad(wt_idx[ConvParams::bias]);
Tensor &delBias_ = context.getWeightGrad(wt_idx[ConvParams::bias]);
Tensor delBias;
if (delBias_.getDataType() != derivative.getDataType()) {
delBias = delBias_.clone(derivative.getDataType());
} else {
delBias = delBias_;
}
derivative.sum({0, 2, 3}, delBias);
delBias_.copyData(delBias);
}
delK_.copyData(delK);
}

void Conv2DLayer::exportTo(Exporter &exporter,
Expand Down
49 changes: 42 additions & 7 deletions nntrainer/layers/fc_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,8 +39,7 @@ static constexpr size_t SINGLE_INOUT_IDX = 0;
enum FCParams { weight, bias };

FullyConnectedLayer::FullyConnectedLayer() :
LayerImpl(),
fc_props(props::Unit()) {
LayerImpl(), fc_props(props::Unit()) {
weight_idx.fill(std::numeric_limits<unsigned>::max());
}

Expand Down Expand Up @@ -116,10 +115,17 @@ void FullyConnectedLayer::setProperty(const std::vector<std::string> &values) {
}

void FullyConnectedLayer::forwarding(RunLayerContext &context, bool training) {
Tensor &weight = context.getWeight(weight_idx[FCParams::weight]);
Tensor &weight_ = context.getWeight(weight_idx[FCParams::weight]);
Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);
Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);

Tensor weight;
if (weight.getDataType() != input_.getDataType()) {
weight = weight_.clone(input_.getDataType());
} else {
weight = weight_;
}

if (weight.getDataType() == nntrainer::Tdatatype::QINT4 ||
weight.getDataType() == nntrainer::Tdatatype::QINT8) {
Tdatatype dtype = input_.getDataType();
Expand All @@ -141,7 +147,13 @@ void FullyConnectedLayer::forwarding(RunLayerContext &context, bool training) {
if (auto &disable_bias = std::get<props::DisableBias>(*layer_impl_props);
disable_bias.empty() || disable_bias.get() == false) {
Tensor &bias = context.getWeight(weight_idx[FCParams::bias]);
hidden_.add_i(bias);
Tensor b;
if (bias.getDataType() != input_.getDataType()) {
b = bias.clone(input_.getDataType());
} else {
b = bias;
}
hidden_.add_i(b);
}
}

Expand Down Expand Up @@ -187,23 +199,44 @@ void FullyConnectedLayer::incremental_forwarding(RunLayerContext &context,
}

void FullyConnectedLayer::calcDerivative(RunLayerContext &context) {
Tensor &weight = context.getWeight(weight_idx[FCParams::weight]);
Tensor &weight_ = context.getWeight(weight_idx[FCParams::weight]);

const Tensor &derivative_ = context.getIncomingDerivative(SINGLE_INOUT_IDX);
Tensor &ret_ = context.getOutgoingDerivative(SINGLE_INOUT_IDX);

Tensor weight;
if (weight_.getDataType() != derivative_.getDataType()) {
weight = weight_.clone(derivative_.getDataType());
} else {
weight = weight_;
}

ret_.dot_deriv_wrt_1(weight, derivative_, false, false);
}

void FullyConnectedLayer::calcGradient(RunLayerContext &context) {
Tensor &djdw = context.getWeightGrad(weight_idx[FCParams::weight]);
Tensor &djdw_ = context.getWeightGrad(weight_idx[FCParams::weight]);

const Tensor &derivative_ = context.getIncomingDerivative(SINGLE_INOUT_IDX);
Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);

Tensor djdw;
if (djdw_.getDataType() != derivative_.getDataType()) {
djdw = djdw_.clone(derivative_.getDataType());
} else {
djdw = djdw_;
}

if (auto &disable_bias = std::get<props::DisableBias>(*layer_impl_props);
disable_bias.empty() || disable_bias.get() == false) {
Tensor &djdb = context.getWeightGrad(weight_idx[FCParams::bias]);
Tensor &djdb_ = context.getWeightGrad(weight_idx[FCParams::bias]);

Tensor djdb;
if (djdb_.getDataType() != input_.getDataType()) {
djdb = djdb_.clone(input_.getDataType());
} else {
djdb = djdb_;
}

if (context.isGradientFirstAccess(weight_idx[FCParams::bias])) {
derivative_.sum({0, 1, 2}, djdb);
Expand All @@ -212,11 +245,13 @@ void FullyConnectedLayer::calcGradient(RunLayerContext &context) {
Tensor t = derivative_.sum({0, 1, 2});
djdb.add_i(t);
}
djdb_.copyData(djdb);
}

input_.dot_deriv_wrt_2(
djdw, derivative_, false, false,
!context.isGradientFirstAccess(weight_idx[FCParams::weight]));
djdw_.copyData(djdw);
}

} /* namespace nntrainer */
5 changes: 4 additions & 1 deletion nntrainer/layers/loss/cross_entropy_softmax_loss_layer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ void CrossEntropySoftmaxLossLayer::forwarding(RunLayerContext &context,
Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);
Tensor &y = context.getInput(SINGLE_INOUT_IDX);

// export_to_file(y, std::string("debug/var/") + y.getName() + ".var");

// fill the output
auto out_type = hidden_.getDataType();
if (out_type == ml::train::TensorDim::DataType::FP32) {
Expand Down Expand Up @@ -93,7 +95,8 @@ void CrossEntropySoftmaxLossLayer::calcDerivative(RunLayerContext &context) {
"Error when calculating loss");
}

ret_derivative.multiply_i(loss_scale);
if (loss_scale != 0.0f)
ret_derivative.multiply_i(loss_scale);
}

} // namespace nntrainer
Loading

0 comments on commit ce33c31

Please sign in to comment.