[OpenCL] Addressed review comments

s-debadri · s-debadri · commit 9ef8f3a8114a · 2024-03-06T12:19:08.000+05:30
Modified doc for OpenCL buffer move constructor
Addressed review comments

Signed-off-by: Debadri Samaddar &lt;s.debadri@samsung.com&gt;
diff --git a/nntrainer/cl_context.cpp b/nntrainer/cl_context.cpp
@@ -22,10 +22,9 @@ std::mutex cl_factory_mutex;
 std::once_flag global_cl_context_init_flag;
 
 static void add_default_object(ClContext &cc) {
-  using LayerType = ml::train::LayerType;
 
   cc.registerFactory(nntrainer::createLayer<FullyConnectedLayer>,
-                     FullyConnectedLayer::type, LayerType::LAYER_FC);
+                     FullyConnectedLayer::type, ml::train::LayerType::LAYER_FC);
 }
 
 static void registerer(ClContext &cc) noexcept {
diff --git a/nntrainer/layers/layer_context.h b/nntrainer/layers/layer_context.h
@@ -808,6 +808,11 @@ class RunLayerContext {
   std::vector<Weight *> getWeights() { return weights; }
 
 #ifdef ENABLE_OPENCL
+
+  // getting static instances of commandqueue, context and kernel
+  opencl::ContextManager &context_inst_ = opencl::ContextManager::GetInstance();
+  opencl::Kernel kernel_;
+
   /**
    * @brief set the compute engine for this node
    * @param compute engine: (CPU/GPU)
@@ -854,12 +859,6 @@ class RunLayerContext {
   ml::train::LayerComputeEngine compute_engine =
     ml::train::LayerComputeEngine::CPU;
 
-#ifdef ENABLE_OPENCL
-  // getting static instances of commandqueue, context and kernel
-  opencl::ContextManager &context_inst_ = opencl::ContextManager::GetInstance();
-  opencl::Kernel kernel_;
-#endif
-
   // flag to check whether opencl kernel is initialized or not
   bool kernel_initialized = false;
 
diff --git a/nntrainer/opencl/opencl_buffer.cpp b/nntrainer/opencl/opencl_buffer.cpp
@@ -48,7 +48,7 @@ Buffer::Buffer(ContextManager &context_manager, int size_in_bytes,
 }
 
 /**
- * @brief Copy constructor for buffer
+ * @brief Move constructor for buffer by deleting the previous buffer
  *
  * @param buffer
  */
diff --git a/nntrainer/opencl/opencl_buffer.h b/nntrainer/opencl/opencl_buffer.h
@@ -11,8 +11,8 @@
  *
  */
 
-#ifndef GPU_CL_OPENCL_BUFFER_HPP_
-#define GPU_CL_OPENCL_BUFFER_HPP_
+#ifndef __OPENCL_BUFFER_H__
+#define __OPENCL_BUFFER_H__
 
 #include "opencl_command_queue_manager.h"
 #include "opencl_context_manager.h"
@@ -58,7 +58,7 @@ class Buffer {
          void *data);
 
   /**
-   * @brief Copy constructor for buffer
+   * @brief Move constructor for buffer by deleting the previous buffer
    *
    * @param buffer
    */
@@ -116,4 +116,4 @@ class Buffer {
   bool ReadData(CommandQueueManager &command_queue_inst, void *data);
 };
 } // namespace nntrainer::opencl
-#endif // GPU_CL_OPENCL_BUFFER_HPP_
+#endif // __OPENCL_BUFFER_H__
diff --git a/nntrainer/opencl/opencl_command_queue_manager.h b/nntrainer/opencl/opencl_command_queue_manager.h
@@ -11,8 +11,8 @@
  *
  */
 
-#ifndef GPU_CL_OPENCL_COMMAND_QUEUE_MANAGER_HPP_
-#define GPU_CL_OPENCL_COMMAND_QUEUE_MANAGER_HPP_
+#ifndef __OPENCL_COMMAND_QUEUE_MANAGER_H__
+#define __OPENCL_COMMAND_QUEUE_MANAGER_H__
 
 #include "opencl_kernel.h"
 #include "third_party/cl.h"
@@ -126,4 +126,4 @@ class CommandQueueManager {
 };
 } // namespace nntrainer::opencl
 
-#endif // GPU_CL_OPENCL_COMMAND_QUEUE_MANAGER_HPP_
+#endif // __OPENCL_COMMAND_QUEUE_MANAGER_H__
diff --git a/nntrainer/opencl/opencl_context_manager.h b/nntrainer/opencl/opencl_context_manager.h
@@ -11,8 +11,8 @@
  *
  */
 
-#ifndef GPU_CL_OPENCL_CONTEXT_MANAGER_HPP_
-#define GPU_CL_OPENCL_CONTEXT_MANAGER_HPP_
+#ifndef __OPENCL_CONTEXT_MANAGER_H__
+#define __OPENCL_CONTEXT_MANAGER_H__
 
 #include <mutex>
 
@@ -97,4 +97,4 @@ class ContextManager {
   ~ContextManager();
 };
 } // namespace nntrainer::opencl
-#endif // GPU_CL_OPENCL_CONTEXT_MANAGER_HPP_
+#endif // __OPENCL_CONTEXT_MANAGER_H__
diff --git a/nntrainer/opencl/opencl_kernel.h b/nntrainer/opencl/opencl_kernel.h
@@ -11,8 +11,8 @@
  *
  */
 
-#ifndef GPU_CL_OPENCL_KERNEL_HPP_
-#define GPU_CL_OPENCL_KERNEL_HPP_
+#ifndef __OPENCL_KERNEL_H__
+#define __OPENCL_KERNEL_H__
 
 #include <string>
 
@@ -59,4 +59,4 @@ class Kernel {
   const cl_kernel GetKernel();
 };
 } // namespace nntrainer::opencl
-#endif // GPU_CL_OPENCL_KERNEL_HPP_
+#endif // __OPENCL_KERNEL_H__
diff --git a/nntrainer/opencl/opencl_loader.h b/nntrainer/opencl/opencl_loader.h
@@ -11,8 +11,8 @@
  *
  */
 
-#ifndef GPU_CL_OPENCL_LOADER_HPP_
-#define GPU_CL_OPENCL_LOADER_HPP_
+#ifndef __OPENCL_LOADER_H__
+#define __OPENCL_LOADER_H__
 
 #include "third_party/cl.h"
 
@@ -141,4 +141,4 @@ extern PFN_clReleaseMemObject clReleaseMemObject;
 
 } // namespace nntrainer::opencl
 
-#endif // GPU_CL_OPENCL_LOADER_HPP_
+#endif // __OPENCL_LOADER_H__
diff --git a/nntrainer/opencl/opencl_op_interface.h b/nntrainer/opencl/opencl_op_interface.h
@@ -13,8 +13,8 @@
  *
  */
 
-#ifndef GPU_CL_OP_INTERFACE_HPP_
-#define GPU_CL_OP_INTERFACE_HPP_
+#ifndef __OPENCL_OP_INTERFACE_H__
+#define __OPENCL_OP_INTERFACE_H__
 
 #include <cstdint>
 #include <string>
@@ -58,4 +58,4 @@ class GpuCLOpInterface {
 };
 } // namespace nntrainer::opencl
 
-#endif // GPU_CL_OP_INTERFACE_HPP_
+#endif // __OPENCL_OP_INTERFACE_H__
diff --git a/nntrainer/opencl/opencl_program.h b/nntrainer/opencl/opencl_program.h
@@ -11,8 +11,8 @@
  *
  */
 
-#ifndef GPU_CL_OPENCL_PROGRAM_HPP_
-#define GPU_CL_OPENCL_PROGRAM_HPP_
+#ifndef __OPENCL_PROGRAM_H__
+#define __OPENCL_PROGRAM_H__
 
 #include <string>
 
@@ -70,4 +70,4 @@ class Program {
   const cl_program &GetProgram();
 };
 } // namespace nntrainer::opencl
-#endif // GPU_CL_OPENCL_PROGRAM_HPP_
+#endif // __OPENCL_PROGRAM_H__
diff --git a/nntrainer/tensor/cl_operations/cl_interface.h b/nntrainer/tensor/cl_operations/cl_interface.h
@@ -33,7 +33,7 @@ namespace nntrainer {
 void gpu_sgemv(const float *A, const float *X, float *Y, float alpha,
                float beta, unsigned int rows, unsigned int cols) {
   static internal::GpuCLSgemv cl_gpu_sgemv;
-  cl_gpu_sgemv.CLSgemv(A, X, Y, alpha, beta, rows, cols);
+  cl_gpu_sgemv.cLSgemv(A, X, Y, alpha, beta, rows, cols);
 }
 } // namespace nntrainer
 
diff --git a/nntrainer/tensor/cl_operations/cl_sgemv.cpp b/nntrainer/tensor/cl_operations/cl_sgemv.cpp
@@ -14,15 +14,14 @@
  */
 
 #include "cl_sgemv.h"
-#include <iostream>
 #include <opencl_buffer.h>
 
 #include <nntrainer_log.h>
 
 namespace nntrainer::internal {
 
 template <typename T>
-T *GpuCLSgemv::CLSgemv(const T *matAdata, const T *vecXdata, T *vecYdata,
+T *GpuCLSgemv::cLSgemv(const T *matAdata, const T *vecXdata, T *vecYdata,
                        T alpha, T beta, unsigned int dim1, unsigned int dim2) {
 
   ml_logi("GpuCLSgemv::CLSgemv");
@@ -112,7 +111,11 @@ T *GpuCLSgemv::CLSgemv(const T *matAdata, const T *vecXdata, T *vecYdata,
   return vecYdata;
 }
 
-template float *GpuCLSgemv::CLSgemv<float>(const float *matAdata,
+/**
+ * @brief Template declaration for float CLSgemv call
+ *
+ */
+template float *GpuCLSgemv::cLSgemv<float>(const float *matAdata,
                                            const float *vecXdata,
                                            float *vecYdata, float alpha,
                                            float beta, unsigned int dim1,
diff --git a/nntrainer/tensor/cl_operations/cl_sgemv.h b/nntrainer/tensor/cl_operations/cl_sgemv.h
@@ -13,12 +13,17 @@
  *
  */
 
-#ifndef GPU_CL_SGEMV_HPP_
-#define GPU_CL_SGEMV_HPP_
+#ifndef __CL_SGEMV_H__
+#define __CL_SGEMV_H__
 
 #include <opencl_op_interface.h>
 
 namespace nntrainer::internal {
+/**
+ * @class   GpuCLSgemv class
+ * @brief   Kernel and implementation of naive SGEMV. USed for
+ * testing/experimentation.
+ */
 class GpuCLSgemv : public nntrainer::opencl::GpuCLOpInterface {
   std::string sgemv_kernel_ =
     R"(__kernel void sgemv(const __global float* A, const __global float* X,
@@ -31,10 +36,23 @@ class GpuCLSgemv : public nntrainer::opencl::GpuCLOpInterface {
     })";
 
 public:
+/**
+ * @brief Function to set buffers and kernel arguments for SGEMV
+ * 
+ * @tparam T 
+ * @param matAdata 
+ * @param vecXdata 
+ * @param vecYdata 
+ * @param alpha 
+ * @param beta 
+ * @param dim1 
+ * @param dim2 
+ * @return T* 
+ */
   template <typename T>
-  T *CLSgemv(const T *matAdata, const T *vecXdata, T *vecYdata, T alpha, T beta,
+  T *cLSgemv(const T *matAdata, const T *vecXdata, T *vecYdata, T alpha, T beta,
              unsigned int dim1, unsigned int dim2);
 };
 } // namespace nntrainer::internal
 
-#endif // GPU_CL_SGEMV_HPP_
+#endif // __CL_SGEMV_H__

Original file line number	Diff line number	Diff line change
`@@ -22,10 +22,9 @@ std::mutex cl_factory_mutex;`
`22`	`22`	`std::once_flag global_cl_context_init_flag;`
`23`	`23`
`24`	`24`	`static void add_default_object(ClContext &cc) {`
`25`		`- using LayerType = ml::train::LayerType;`
`26`	`25`
`27`	`26`	`cc.registerFactory(nntrainer::createLayer<FullyConnectedLayer>,`
`28`		`- FullyConnectedLayer::type, LayerType::LAYER_FC);`
	`27`	`+ FullyConnectedLayer::type, ml::train::LayerType::LAYER_FC);`
`29`	`28`	`}`
`30`	`29`
`31`	`30`	`static void registerer(ClContext &cc) noexcept {`
Original file line number	Diff line number	Diff line change
`@@ -48,7 +48,7 @@ Buffer::Buffer(ContextManager &context_manager, int size_in_bytes,`
`48`	`48`	`}`
`49`	`49`
`50`	`50`	`/**`
`51`		`- * @brief Copy constructor for buffer`
	`51`	`+ * @brief Move constructor for buffer by deleting the previous buffer`
`52`	`52`	`*`
`53`	`53`	`* @param buffer`
`54`	`54`	`*/`
Original file line number	Diff line number	Diff line change
`@@ -33,7 +33,7 @@ namespace nntrainer {`
`33`	`33`	`void gpu_sgemv(const float A, const float X, float *Y, float alpha,`
`34`	`34`	`float beta, unsigned int rows, unsigned int cols) {`
`35`	`35`	`static internal::GpuCLSgemv cl_gpu_sgemv;`
`36`		`- cl_gpu_sgemv.CLSgemv(A, X, Y, alpha, beta, rows, cols);`
	`36`	`+ cl_gpu_sgemv.cLSgemv(A, X, Y, alpha, beta, rows, cols);`
`37`	`37`	`}`
`38`	`38`	`} // namespace nntrainer`
`39`	`39`