nnstreamer
diff --git a/‎nntrainer/layers/bn_layer.cpp
Lines changed: 9 additions & 3 deletions b/‎nntrainer/layers/bn_layer.cpp
Lines changed: 9 additions & 3 deletions
diff --git a/‎nntrainer/layers/conv2d_layer.cpp
Lines changed: 101 additions & 59 deletions b/‎nntrainer/layers/conv2d_layer.cpp
Lines changed: 101 additions & 59 deletions
diff --git a/‎nntrainer/layers/input_layer.cpp
Lines changed: 6 additions & 3 deletions b/‎nntrainer/layers/input_layer.cpp
Lines changed: 6 additions & 3 deletions
diff --git a/‎nntrainer/layers/layer_context.cpp
Lines changed: 51 additions & 0 deletions b/‎nntrainer/layers/layer_context.cpp
Lines changed: 51 additions & 0 deletions
@@ -111,6 +111,12 @@ void BatchNormalizationLayer::finalize(InitLayerContext &context) {
     context.requestWeight(dim, bnparams_beta, WeightRegularizer::NONE, 1.0f,
                           bias_decay, "beta", true);
 
+  /**
+   * @note declare weigth dimention with activation datatype
+   */
+  TensorDim w_dim = dim;
+  w_dim.setDataType(in_dim.getDataType());
+
   /**
    * caches the deviation -> input - avg(input)
    * @todo check if avoiding this storage and adding dependency on input (no
@@ -121,7 +127,7 @@ void BatchNormalizationLayer::finalize(InitLayerContext &context) {
                           TensorLifespan::ITERATION_LIFESPAN);
   /** caches the inverse standard deviation */
   wt_idx[BNParams::invstd] =
-    context.requestTensor(dim, "invstd", Tensor::Initializer::NONE, false,
+    context.requestTensor(w_dim, "invstd", Tensor::Initializer::NONE, false,
                           TensorLifespan::ITERATION_LIFESPAN);
   /**
    * Temporary tensor to store the full sized tensors in order to allow batch
@@ -136,13 +142,13 @@ void BatchNormalizationLayer::finalize(InitLayerContext &context) {
    * caches variance + epsilon as well.
    */
   wt_idx[BNParams::cvar] =
-    context.requestTensor(dim, "cvar", Tensor::Initializer::NONE, false,
+    context.requestTensor(w_dim, "cvar", Tensor::Initializer::NONE, false,
                           TensorLifespan::ITERATION_LIFESPAN);
   /**
    * Temporary tensor to store the reduced tensors along the axes_to_reduce.
    */
   wt_idx[BNParams::t_reduced] =
-    context.requestTensor(dim, "tensor_reduced", Tensor::Initializer::NONE,
+    context.requestTensor(w_dim, "tensor_reduced", Tensor::Initializer::NONE,
                           false, TensorLifespan::FORWARD_DERIV_LIFESPAN);
 }
 
 
@@ -38,7 +38,8 @@ namespace {
 static TensorDim calcCol2ImOutputDim(const TensorDim &out,
                                      const TensorDim &kdim) {
 
-  return TensorDim({kdim.getFeatureLen(), out.width() * out.height()});
+  return TensorDim({kdim.getFeatureLen(), out.width() * out.height()},
+                   out.getTensorType());
 }
 
 /**
@@ -56,7 +57,10 @@ static void col2im(const Tensor &col_matrix, const TensorDim &kdim,
                    const std::array<props::Stride, CONV2D_DIM> &mstride,
                    const std::array<props::Dilation, CONV2D_DIM> &dilation,
                    Tensor &image) {
-  auto [pt, pb, pl, pr] = padding;
+  auto pt = padding[0];
+  auto pb = padding[1];
+  auto pl = padding[2];
+  auto pr = padding[3];
 
   unsigned k_height = kdim.height();
   unsigned k_width = kdim.width();
@@ -84,32 +88,48 @@ static void col2im(const Tensor &col_matrix, const TensorDim &kdim,
   int h_stride_end = im_eff_height - eff_k_height - pt;
   int w_stride_end = im_eff_width - eff_k_width - pl;
 
-  unsigned col_w = 0;
-  for (int hs = -pt; hs <= h_stride_end; hs += hstride) {
-    for (int ws = -pl; ws <= w_stride_end; ws += wstride) {
-      unsigned col_h = 0;
-      int patch_height_end = hs + eff_k_height;
-      int patch_width_end = ws + eff_k_width;
-      for (unsigned c = 0; c < im_channel; c++) {
-        for (int h = hs; h < patch_height_end; h += hdilation) {
-          if (h < 0 || im_height <= h) {
-            col_h += k_width;
-            continue;
-          }
-          for (int w = ws; w < patch_width_end; w += wdilation) {
-            if (w < 0 || im_width <= w) {
-              col_h++;
+  auto apply_data = [&]<typename T>(T *val) {
+    unsigned col_w = 0;
+    for (int hs = -pt; hs <= h_stride_end; hs += hstride) {
+      for (int ws = -pl; ws <= w_stride_end; ws += wstride) {
+        unsigned col_h = 0;
+        int patch_height_end = hs + eff_k_height;
+        int patch_width_end = ws + eff_k_width;
+        for (unsigned c = 0; c < im_channel; c++) {
+          for (int h = hs; h < patch_height_end; h += hdilation) {
+            if (h < 0 || im_height <= h) {
+              col_h += k_width;
               continue;
             }
-
-            float *val = image.getAddress<float>(0, c, h, w);
-            *val += col_matrix.getValue<float>(0, 0, col_h, col_w);
-            col_h++;
+            for (int w = ws; w < patch_width_end; w += wdilation) {
+              if (w < 0 || im_width <= w) {
+                col_h++;
+                continue;
+              }
+
+              val = image.getAddress<T>(0, c, h, w);
+              *val += col_matrix.getValue<T>(0, 0, col_h, col_w);
+              col_h++;
+            }
           }
         }
+        col_w++;
       }
-      col_w++;
     }
+  };
+
+  if (image.getDataType() == nntrainer::Tdatatype::FP32) {
+    float val;
+    apply_data(&val);
+  }
+#ifdef ENABLE_FP16
+  else if (image.getDataType() == nntrainer::Tdatatype::FP16) {
+    _FP16 val;
+    apply_data(&val);
+  }
+#endif
+  else {
+    throw std::runtime_error("Not supported datatype");
   }
 }
 
@@ -179,7 +199,10 @@ static void im2col(const Tensor &in, const TensorDim &kdim,
   //   }
   */
 
-  auto [pt, pb, pl, pr] = padding;
+  auto pt = padding[0];
+  auto pb = padding[1];
+  auto pl = padding[2];
+  auto pr = padding[3];
 
   unsigned int channel = in.channel();
   int in_height = in.height();
@@ -198,46 +221,62 @@ static void im2col(const Tensor &in, const TensorDim &kdim,
   unsigned int out_width = (width - eff_k_width) / mstride[1] + 1;
 
   out.reshape(
-    TensorDim({out_height * out_width, in.channel() * k_height * k_width}));
-  float *out_data = out.getData();
-
-  int h_stride_end = height - eff_k_height - pt;
-  int w_stride_end = width - eff_k_width - pl;
-
-  /// get a patch, size of kernel
-  /// hs is height_strided, ws is width_strided
-  unsigned int owidth = out.width();
-  unsigned int base_im_w = 0;
-  for (int hs = -pt; hs <= h_stride_end; hs += mstride[0]) {
-    unsigned int base_im_h = 0;
-    int patch_height_end = eff_k_height + hs;
-    /// map the patch to a single line looping through channel
-    for (unsigned int c = 0; c < channel; ++c) {
-      for (int h = hs; h < patch_height_end; h += dilation[0]) {
-        if (h < 0 || in_height <= h) {
-          base_im_h += k_width;
-          continue;
-        }
-
-        unsigned int im_w = base_im_w;
-        for (int ws = -pl; ws <= w_stride_end; ws += mstride[1]) {
-          unsigned int im_h = base_im_h;
-          int patch_width_end = eff_k_width + ws;
+    TensorDim({out_height * out_width, in.channel() * k_height * k_width},
+              in.getTensorType()));
+
+  auto apply_data = [&]<typename T>(T *out_data) {
+    int h_stride_end = height - eff_k_height - pt;
+    int w_stride_end = width - eff_k_width - pl;
+
+    /// get a patch, size of kernel
+    /// hs is height_strided, ws is width_strided
+    unsigned int owidth = out.width();
+    unsigned int base_im_w = 0;
+    for (int hs = -pt; hs <= h_stride_end; hs += mstride[0]) {
+      unsigned int base_im_h = 0;
+      int patch_height_end = eff_k_height + hs;
+      /// map the patch to a single line looping through channel
+      for (unsigned int c = 0; c < channel; ++c) {
+        for (int h = hs; h < patch_height_end; h += dilation[0]) {
+          if (h < 0 || in_height <= h) {
+            base_im_h += k_width;
+            continue;
+          }
 
-          for (int w = ws; w < patch_width_end; w += dilation[1]) {
-            if (w < 0 || in_width <= w) {
+          unsigned int im_w = base_im_w;
+          for (int ws = -pl; ws <= w_stride_end; ws += mstride[1]) {
+            unsigned int im_h = base_im_h;
+            int patch_width_end = eff_k_width + ws;
+
+            for (int w = ws; w < patch_width_end; w += dilation[1]) {
+              if (w < 0 || in_width <= w) {
+                im_h++;
+                continue;
+              }
+              out_data[im_w * owidth + im_h] = in.getValue<T>(0, c, h, w);
               im_h++;
-              continue;
             }
-            out_data[im_w * owidth + im_h] = in.getValue<float>(0, c, h, w);
-            im_h++;
+            im_w++;
           }
-          im_w++;
+          base_im_h += k_width;
         }
-        base_im_h += k_width;
       }
+      base_im_w += out_width;
     }
-    base_im_w += out_width;
+  };
+
+  if (out.getDataType() == nntrainer::Tdatatype::FP32) {
+    float *out_data = out.getData<float>();
+    apply_data(out_data);
+  }
+#ifdef ENABLE_FP16
+  else if (out.getDataType() == nntrainer::Tdatatype::FP16) {
+    _FP16 *out_data = out.getData<_FP16>();
+    apply_data(out_data);
+  }
+#endif
+  else {
+    throw std::runtime_error("Not supported datatype");
   }
 }
 
@@ -279,9 +318,11 @@ void Conv2DLayer::finalize(InitLayerContext &context) {
   auto &dilation =
     std::get<std::array<props::Dilation, CONV2D_DIM>>(conv_props);
 
-  TensorDim kernel_dim =
-    TensorDim(filter_size, in_dim.channel(), kernel_size[0], kernel_size[1]);
-  TensorDim bias_dim = TensorDim(1, filter_size, 1, 1);
+  auto in_t_type = in_dim.getTensorType();
+  in_t_type.data_type = context.getWeightDataType();
+  TensorDim kernel_dim = TensorDim(filter_size, in_dim.channel(),
+                                   kernel_size[0], kernel_size[1], in_t_type);
+  TensorDim bias_dim = TensorDim(1, filter_size, 1, 1, in_t_type);
 
   padding = std::get<props::Padding2D>(conv_props)
               .compute(in_dim, kernel_dim, {stride[0], stride[1]},
@@ -309,6 +350,7 @@ void Conv2DLayer::finalize(InitLayerContext &context) {
   out_dim.channel(filter_size);
   out_dim.height((eff_in_height - eff_k_height) / stride[0] + 1);
   out_dim.width((eff_in_width - eff_k_width) / stride[1] + 1);
+  out_dim.setTensorType(in_dim.getTensorType());
   context.setOutputDimensions({out_dim});
 
   NNTR_THROW_IF(eff_in_height < kernel_size[0] || eff_in_width < kernel_size[1],
 
@@ -33,8 +33,7 @@ namespace nntrainer {
 static constexpr size_t SINGLE_INOUT_IDX = 0;
 
 InputLayer::InputLayer() :
-  Layer(),
-  input_props(props::Normalization(), props::Standardization()) {}
+  Layer(), input_props(props::Normalization(), props::Standardization()) {}
 
 void InputLayer::setProperty(const std::vector<std::string> &values) {
   auto remain_props = loadProperties(values, input_props);
@@ -47,7 +46,7 @@ void InputLayer::forwarding(RunLayerContext &context, bool training) {
   Tensor &hidden_ = context.getOutput(SINGLE_INOUT_IDX);
   if (!context.executeInPlace()) {
     Tensor &input_ = context.getInput(SINGLE_INOUT_IDX);
-    hidden_.copy(input_);
+    hidden_.copyData(input_);
   }
 
   if (std::get<props::Normalization>(input_props))
@@ -70,6 +69,10 @@ void InputLayer::finalize(InitLayerContext &context) {
 
   std::vector<TensorDim> output_dims = context.getInputDimensions();
 
+  NNTR_THROW_IF(output_dims.size() != 1, std::invalid_argument);
+
+  output_dims[0].setTensorType(
+    {context.getFormat(), context.getActivationDataType()});
   context.setOutputDimensions(output_dims);
 }
 
 
@@ -152,6 +152,16 @@ Tensor &RunLayerContext::getWeight(unsigned int idx) const {
   return weights[idx]->getVariableRef();
 }
 
+/**
+ * @brief Get the Weight tensor object
+ *
+ * @param idx Identifier of the weight
+ * @return Tensor& Reference to the weight tensor
+ */
+Tensor *RunLayerContext::getWeightMaster(unsigned int idx) const {
+  return weights[idx]->getVariableMasterRef();
+}
+
 /**
  * @brief Get the Weight Gradient tensor object
  *
@@ -177,6 +187,18 @@ Tensor &RunLayerContext::getWeightOptVar(unsigned int idx,
   return weights[idx]->getOptimizerVariableRef(jdx);
 }
 
+/**
+ * @brief Get the Weight Optimizer Variable tensor object
+ *
+ * @param idx Identifier of the weight
+ * @param jdx Identifier of the optimizer variables
+ * @return Tensor& Reference to the weight optimizer variable tensor
+ */
+Tensor &RunLayerContext::getWeightOptMasterVar(unsigned int idx,
+                                               unsigned int jdx) const {
+  return weights[idx]->getOptimizerMasterVariableRef(jdx);
+}
+
 /**
  * @brief Get the Number of Weight Optimizer Variable tensor object
  *
@@ -187,6 +209,16 @@ unsigned int RunLayerContext::getNumWeightOptVar(unsigned int idx) const {
   return weights[idx]->getNumOptVariable();
 }
 
+/**
+ * @brief Get the Number of Weight Optimizer Variable tensor object
+ *
+ * @param idx Identifier of the weight
+ * @return int Number of the weight optimizer variable
+ */
+unsigned int RunLayerContext::getNumWeightOptMasterVar(unsigned int idx) const {
+  return weights[idx]->getNumOptMasterVariable();
+}
+
 /**
  * @brief Get regularization loss for the weight
  *
@@ -326,6 +358,25 @@ Tensor &RunLayerContext::getOutgoingDerivative(unsigned int idx) {
   return getInputGrad(idx);
 }
 
+bool RunLayerContext::validateDerivatives() {
+  auto num_in = getNumInputs();
+  auto num_out = getNumOutputs();
+
+  for (unsigned int i = 0; i < num_in; ++i) {
+    auto deriv = getIncomingDerivative(i);
+    if (deriv.checkDataValidation(false) == false)
+      return false;
+  }
+
+  for (unsigned int i = 0; i < num_out; ++i) {
+    auto deriv = getOutgoingDerivative(i);
+    if (deriv.checkDataValidation(false) == false)
+      return false;
+  }
+
+  return true;
+}
+
 /**
  * @brief Get the Tensor object
  *