Skip to content

Commit 9ef8f3a

Browse files
committed
[OpenCL] Addressed review comments
Modified doc for OpenCL buffer move constructor Addressed review comments Signed-off-by: Debadri Samaddar <s.debadri@samsung.com>
1 parent 4ac41f0 commit 9ef8f3a

13 files changed

+58
-39
lines changed

nntrainer/cl_context.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,10 +22,9 @@ std::mutex cl_factory_mutex;
2222
std::once_flag global_cl_context_init_flag;
2323

2424
static void add_default_object(ClContext &cc) {
25-
using LayerType = ml::train::LayerType;
2625

2726
cc.registerFactory(nntrainer::createLayer<FullyConnectedLayer>,
28-
FullyConnectedLayer::type, LayerType::LAYER_FC);
27+
FullyConnectedLayer::type, ml::train::LayerType::LAYER_FC);
2928
}
3029

3130
static void registerer(ClContext &cc) noexcept {

nntrainer/layers/layer_context.h

Lines changed: 5 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -808,6 +808,11 @@ class RunLayerContext {
808808
std::vector<Weight *> getWeights() { return weights; }
809809

810810
#ifdef ENABLE_OPENCL
811+
812+
// getting static instances of commandqueue, context and kernel
813+
opencl::ContextManager &context_inst_ = opencl::ContextManager::GetInstance();
814+
opencl::Kernel kernel_;
815+
811816
/**
812817
* @brief set the compute engine for this node
813818
* @param compute engine: (CPU/GPU)
@@ -854,12 +859,6 @@ class RunLayerContext {
854859
ml::train::LayerComputeEngine compute_engine =
855860
ml::train::LayerComputeEngine::CPU;
856861

857-
#ifdef ENABLE_OPENCL
858-
// getting static instances of commandqueue, context and kernel
859-
opencl::ContextManager &context_inst_ = opencl::ContextManager::GetInstance();
860-
opencl::Kernel kernel_;
861-
#endif
862-
863862
// flag to check whether opencl kernel is initialized or not
864863
bool kernel_initialized = false;
865864

nntrainer/opencl/opencl_buffer.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -48,7 +48,7 @@ Buffer::Buffer(ContextManager &context_manager, int size_in_bytes,
4848
}
4949

5050
/**
51-
* @brief Copy constructor for buffer
51+
* @brief Move constructor for buffer by deleting the previous buffer
5252
*
5353
* @param buffer
5454
*/

nntrainer/opencl/opencl_buffer.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
*
1212
*/
1313

14-
#ifndef GPU_CL_OPENCL_BUFFER_HPP_
15-
#define GPU_CL_OPENCL_BUFFER_HPP_
14+
#ifndef __OPENCL_BUFFER_H__
15+
#define __OPENCL_BUFFER_H__
1616

1717
#include "opencl_command_queue_manager.h"
1818
#include "opencl_context_manager.h"
@@ -58,7 +58,7 @@ class Buffer {
5858
void *data);
5959

6060
/**
61-
* @brief Copy constructor for buffer
61+
* @brief Move constructor for buffer by deleting the previous buffer
6262
*
6363
* @param buffer
6464
*/
@@ -116,4 +116,4 @@ class Buffer {
116116
bool ReadData(CommandQueueManager &command_queue_inst, void *data);
117117
};
118118
} // namespace nntrainer::opencl
119-
#endif // GPU_CL_OPENCL_BUFFER_HPP_
119+
#endif // __OPENCL_BUFFER_H__

nntrainer/opencl/opencl_command_queue_manager.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
*
1212
*/
1313

14-
#ifndef GPU_CL_OPENCL_COMMAND_QUEUE_MANAGER_HPP_
15-
#define GPU_CL_OPENCL_COMMAND_QUEUE_MANAGER_HPP_
14+
#ifndef __OPENCL_COMMAND_QUEUE_MANAGER_H__
15+
#define __OPENCL_COMMAND_QUEUE_MANAGER_H__
1616

1717
#include "opencl_kernel.h"
1818
#include "third_party/cl.h"
@@ -126,4 +126,4 @@ class CommandQueueManager {
126126
};
127127
} // namespace nntrainer::opencl
128128

129-
#endif // GPU_CL_OPENCL_COMMAND_QUEUE_MANAGER_HPP_
129+
#endif // __OPENCL_COMMAND_QUEUE_MANAGER_H__

nntrainer/opencl/opencl_context_manager.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
*
1212
*/
1313

14-
#ifndef GPU_CL_OPENCL_CONTEXT_MANAGER_HPP_
15-
#define GPU_CL_OPENCL_CONTEXT_MANAGER_HPP_
14+
#ifndef __OPENCL_CONTEXT_MANAGER_H__
15+
#define __OPENCL_CONTEXT_MANAGER_H__
1616

1717
#include <mutex>
1818

@@ -97,4 +97,4 @@ class ContextManager {
9797
~ContextManager();
9898
};
9999
} // namespace nntrainer::opencl
100-
#endif // GPU_CL_OPENCL_CONTEXT_MANAGER_HPP_
100+
#endif // __OPENCL_CONTEXT_MANAGER_H__

nntrainer/opencl/opencl_kernel.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
*
1212
*/
1313

14-
#ifndef GPU_CL_OPENCL_KERNEL_HPP_
15-
#define GPU_CL_OPENCL_KERNEL_HPP_
14+
#ifndef __OPENCL_KERNEL_H__
15+
#define __OPENCL_KERNEL_H__
1616

1717
#include <string>
1818

@@ -59,4 +59,4 @@ class Kernel {
5959
const cl_kernel GetKernel();
6060
};
6161
} // namespace nntrainer::opencl
62-
#endif // GPU_CL_OPENCL_KERNEL_HPP_
62+
#endif // __OPENCL_KERNEL_H__

nntrainer/opencl/opencl_loader.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
*
1212
*/
1313

14-
#ifndef GPU_CL_OPENCL_LOADER_HPP_
15-
#define GPU_CL_OPENCL_LOADER_HPP_
14+
#ifndef __OPENCL_LOADER_H__
15+
#define __OPENCL_LOADER_H__
1616

1717
#include "third_party/cl.h"
1818

@@ -141,4 +141,4 @@ extern PFN_clReleaseMemObject clReleaseMemObject;
141141

142142
} // namespace nntrainer::opencl
143143

144-
#endif // GPU_CL_OPENCL_LOADER_HPP_
144+
#endif // __OPENCL_LOADER_H__

nntrainer/opencl/opencl_op_interface.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
*
1414
*/
1515

16-
#ifndef GPU_CL_OP_INTERFACE_HPP_
17-
#define GPU_CL_OP_INTERFACE_HPP_
16+
#ifndef __OPENCL_OP_INTERFACE_H__
17+
#define __OPENCL_OP_INTERFACE_H__
1818

1919
#include <cstdint>
2020
#include <string>
@@ -58,4 +58,4 @@ class GpuCLOpInterface {
5858
};
5959
} // namespace nntrainer::opencl
6060

61-
#endif // GPU_CL_OP_INTERFACE_HPP_
61+
#endif // __OPENCL_OP_INTERFACE_H__

nntrainer/opencl/opencl_program.h

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
*
1212
*/
1313

14-
#ifndef GPU_CL_OPENCL_PROGRAM_HPP_
15-
#define GPU_CL_OPENCL_PROGRAM_HPP_
14+
#ifndef __OPENCL_PROGRAM_H__
15+
#define __OPENCL_PROGRAM_H__
1616

1717
#include <string>
1818

@@ -70,4 +70,4 @@ class Program {
7070
const cl_program &GetProgram();
7171
};
7272
} // namespace nntrainer::opencl
73-
#endif // GPU_CL_OPENCL_PROGRAM_HPP_
73+
#endif // __OPENCL_PROGRAM_H__

nntrainer/tensor/cl_operations/cl_interface.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ namespace nntrainer {
3333
void gpu_sgemv(const float *A, const float *X, float *Y, float alpha,
3434
float beta, unsigned int rows, unsigned int cols) {
3535
static internal::GpuCLSgemv cl_gpu_sgemv;
36-
cl_gpu_sgemv.CLSgemv(A, X, Y, alpha, beta, rows, cols);
36+
cl_gpu_sgemv.cLSgemv(A, X, Y, alpha, beta, rows, cols);
3737
}
3838
} // namespace nntrainer
3939

nntrainer/tensor/cl_operations/cl_sgemv.cpp

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -14,15 +14,14 @@
1414
*/
1515

1616
#include "cl_sgemv.h"
17-
#include <iostream>
1817
#include <opencl_buffer.h>
1918

2019
#include <nntrainer_log.h>
2120

2221
namespace nntrainer::internal {
2322

2423
template <typename T>
25-
T *GpuCLSgemv::CLSgemv(const T *matAdata, const T *vecXdata, T *vecYdata,
24+
T *GpuCLSgemv::cLSgemv(const T *matAdata, const T *vecXdata, T *vecYdata,
2625
T alpha, T beta, unsigned int dim1, unsigned int dim2) {
2726

2827
ml_logi("GpuCLSgemv::CLSgemv");
@@ -112,7 +111,11 @@ T *GpuCLSgemv::CLSgemv(const T *matAdata, const T *vecXdata, T *vecYdata,
112111
return vecYdata;
113112
}
114113

115-
template float *GpuCLSgemv::CLSgemv<float>(const float *matAdata,
114+
/**
115+
* @brief Template declaration for float CLSgemv call
116+
*
117+
*/
118+
template float *GpuCLSgemv::cLSgemv<float>(const float *matAdata,
116119
const float *vecXdata,
117120
float *vecYdata, float alpha,
118121
float beta, unsigned int dim1,

nntrainer/tensor/cl_operations/cl_sgemv.h

Lines changed: 22 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -13,12 +13,17 @@
1313
*
1414
*/
1515

16-
#ifndef GPU_CL_SGEMV_HPP_
17-
#define GPU_CL_SGEMV_HPP_
16+
#ifndef __CL_SGEMV_H__
17+
#define __CL_SGEMV_H__
1818

1919
#include <opencl_op_interface.h>
2020

2121
namespace nntrainer::internal {
22+
/**
23+
* @class GpuCLSgemv class
24+
* @brief Kernel and implementation of naive SGEMV. USed for
25+
* testing/experimentation.
26+
*/
2227
class GpuCLSgemv : public nntrainer::opencl::GpuCLOpInterface {
2328
std::string sgemv_kernel_ =
2429
R"(__kernel void sgemv(const __global float* A, const __global float* X,
@@ -31,10 +36,23 @@ class GpuCLSgemv : public nntrainer::opencl::GpuCLOpInterface {
3136
})";
3237

3338
public:
39+
/**
40+
* @brief Function to set buffers and kernel arguments for SGEMV
41+
*
42+
* @tparam T
43+
* @param matAdata
44+
* @param vecXdata
45+
* @param vecYdata
46+
* @param alpha
47+
* @param beta
48+
* @param dim1
49+
* @param dim2
50+
* @return T*
51+
*/
3452
template <typename T>
35-
T *CLSgemv(const T *matAdata, const T *vecXdata, T *vecYdata, T alpha, T beta,
53+
T *cLSgemv(const T *matAdata, const T *vecXdata, T *vecYdata, T alpha, T beta,
3654
unsigned int dim1, unsigned int dim2);
3755
};
3856
} // namespace nntrainer::internal
3957

40-
#endif // GPU_CL_SGEMV_HPP_
58+
#endif // __CL_SGEMV_H__

0 commit comments

Comments
 (0)