Skip to content

Commit

Permalink
fix: 解决一些warning,并把sync操作从算子内部移除
Browse files Browse the repository at this point in the history
  • Loading branch information
Chamberlain0w0 authored and YdrMaster committed Jan 31, 2024
1 parent 2583eb3 commit 848eb19
Show file tree
Hide file tree
Showing 35 changed files with 67 additions and 65 deletions.
5 changes: 2 additions & 3 deletions src/04kernel/src/kernels/batch_normalization/cnnl_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -128,8 +128,8 @@ namespace refactor::kernel {
auto y = outputs[0];

void *xTrans = workspace;
void *yTrans = xTrans + xTransSize;
void *cursor = yTrans + xTransSize;
void *yTrans = reinterpret_cast<uint8_t *>(xTrans) + xTransSize;
void *cursor = reinterpret_cast<uint8_t *>(yTrans) + xTransSize;

// transpose NCHW input to NHWC
CNNL_ASSERT(cnnlTranspose_v2(handle, d->NCHW2NHWC, d->inDesc, x,
Expand All @@ -147,7 +147,6 @@ namespace refactor::kernel {
CNNL_ASSERT(cnnlTranspose_v2(handle, d->NHWC2NCHW, d->inDescTrans, yTrans,
d->inDesc, y, cursor, workspaceSize));

BANG_ASSERT(cnrtQueueSync(res.fetchOrStore<CnnlContext>()->queue));
};

return {std::move(routine), totalWorkspaceSize};
Expand Down
1 change: 0 additions & 1 deletion src/04kernel/src/kernels/cast/cnnl_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,6 @@ namespace refactor::kernel {
return [d = std::move(d)](Resources &res, void *workspace, void const *const *inputs, void *const *outputs) {
CNNL_ASSERT(cnnlCastDataType(res.fetchOrStore<CnnlContext>()->handle,
d->inDesc, inputs[0], d->cast, d->outDesc, outputs[0]));
// BANG_ASSERT(cnrtQueueSync(res.fetchOrStore<CnnlContext>()->queue));
};
}

Expand Down
1 change: 0 additions & 1 deletion src/04kernel/src/kernels/clip/cnnl_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,6 @@ namespace refactor::kernel {
CNNL_POINTER_MODE_DEVICE, d->t,
inputs[0], inputs[1], hasMax ? inputs[2] : nullptr,
d->t, outputs[0]));
BANG_ASSERT(cnrtQueueSync(res.fetchOrStore<CnnlContext>()->queue));
};
}

Expand Down
4 changes: 2 additions & 2 deletions src/04kernel/src/kernels/concat/cnnl_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ namespace refactor::kernel {
}
~Descriptors() noexcept(false) {
CNNL_ASSERT(cnnlDestroyTensorDescriptor(in));
for (auto i = 0; i < out.size(); i++) {
for (size_t i = 0; i < out.size(); i++) {
CNNL_ASSERT(cnnlDestroyTensorDescriptor(out[i]));
}
}
Expand All @@ -62,7 +62,7 @@ namespace refactor::kernel {
};
auto d = std::make_shared<Descriptors>(info.num, info.dataType != DT::F64);
setCnnlTensor(d->in, info.dataType, slice(info.inDim.data(), info.inDim.size()));
for (auto i = 0; i < info.outDims.size(); i++) {
for (size_t i = 0; i < info.outDims.size(); i++) {
setCnnlTensor(d->out[i], info.dataType, slice(info.outDims[i].data(), info.outDims[i].size()));
}

Expand Down
6 changes: 3 additions & 3 deletions src/04kernel/src/kernels/conv/cnnl_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -209,9 +209,9 @@ namespace refactor::kernel {
// }

void *xTrans = workspace;
void *wTrans = xTrans + xTransSize;
void *yTrans = wTrans + wTransSize;
void *opWorkspace = yTrans + yTransSize;
void *wTrans = reinterpret_cast<uint8_t *>(xTrans) + xTransSize;
void *yTrans = reinterpret_cast<uint8_t *>(wTrans) + wTransSize;
void *opWorkspace = reinterpret_cast<uint8_t *>(yTrans) + yTransSize;

// transpose NCHW input to NHWC
CNNL_ASSERT(cnnlTranspose_v2(handle, d->NCHW2NHWC, d->x, x,
Expand Down
1 change: 0 additions & 1 deletion src/04kernel/src/kernels/expand/cnnl_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,6 @@ namespace refactor::kernel {
return [d = std::move(d)](Resources &res, void *workspace, void const *const *inputs, void *const *outputs) {
CNNL_ASSERT(cnnlExpand(res.fetchOrStore<CnnlContext>()->handle,
d->inDesc, inputs[0], d->outDesc, outputs[0]));
// BANG_ASSERT(cnrtQueueSync(res.fetchOrStore<CnnlContext>()->queue));
};
}
#endif
Expand Down
1 change: 0 additions & 1 deletion src/04kernel/src/kernels/gather/cnnl_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -79,7 +79,6 @@ namespace refactor::kernel {
d->inDesc, inputs[0], reinterpret_cast<const int *>(workspace),
d->indexDesc, reinterpret_cast<const int *>(inputs[1]),
d->outDesc, outputs[0]));
BANG_ASSERT(cnrtQueueSync(res.fetchOrStore<CnnlContext>()->queue));
};

return {std::move(routine), workspaceSize};
Expand Down
1 change: 0 additions & 1 deletion src/04kernel/src/kernels/mat_mul/cnnl_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -141,7 +141,6 @@ namespace refactor::kernel {
workspace, algoWorkspaceSize));
}

BANG_ASSERT(cnrtQueueSync(res.fetchOrStore<CnnlContext>()->queue));
};

return {std::move(routine), algoWorkspaceSize};
Expand Down
4 changes: 2 additions & 2 deletions src/04kernel/src/kernels/pool/cnnl_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -130,7 +130,7 @@ namespace refactor::kernel {
auto handle = res.fetchOrStore<CnnlContext>()->handle;

void *extraInputDev = workspace;
void *poolWorkSpace = workspace + extraInputSize;
void *poolWorkSpace = reinterpret_cast<uint8_t *>(workspace) + extraInputSize;

void *extraInputHost = malloc(extraInputSize);
CNNL_ASSERT(cnnlInitPoolingExtraInput(handle, d->pooling, d->x, d->y, extraInputHost));
Expand All @@ -145,7 +145,7 @@ namespace refactor::kernel {
&b, extraInputDev, d->y, outputs[0],
poolWorkSpace, workspaceSize));

BANG_ASSERT(cnrtQueueSync(res.fetchOrStore<CnnlContext>()->queue));
res.fetchOrStore<CnnlContext>()->queueSync();

free(extraInputHost);
};
Expand Down
1 change: 0 additions & 1 deletion src/04kernel/src/kernels/simple_binary/binary_cnnl.cc
Original file line number Diff line number Diff line change
Expand Up @@ -180,7 +180,6 @@ namespace refactor::kernel {
workspace, workspaceSize));
}

BANG_ASSERT(cnrtQueueSync(res.fetchOrStore<CnnlContext>()->queue));
};

return {std::move(routine), workspaceSize};
Expand Down
2 changes: 1 addition & 1 deletion src/04kernel/src/kernels/slice/cnnl_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ namespace refactor::kernel {
CNNL_ASSERT(cnnlSetTensorDescriptor(d->in, CNNL_LAYOUT_NCHW, cnnlDataTypeConvert(info.dataType), info.inDim.size(), info.inDim.data()));
CNNL_ASSERT(cnnlSetTensorDescriptor(d->out, CNNL_LAYOUT_NCHW, cnnlDataTypeConvert(info.dataType), info.outDim.size(), info.outDim.data()));
std::vector<int> begin, end, stride;
for (auto i = 0; i < info.dims.size(); i++) {
for (size_t i = 0; i < info.dims.size(); i++) {
// [begin, end), end is not inclued
begin.push_back(info.dims[i].start);
auto sign = info.dims[i].step > 0 ? 1 : -1;
Expand Down
1 change: 0 additions & 1 deletion src/04kernel/src/kernels/softmax/cnnl_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,6 @@ namespace refactor::kernel {
CNNL_COMPUTATION_ULTRAHIGH_PRECISION,
&a, d->t, inputs[0],
&b, d->t, outputs[0]));
res.fetchOrStore<CnnlContext>()->queueSync();
};
}

Expand Down
4 changes: 2 additions & 2 deletions src/04kernel/src/kernels/split/cnnl_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,7 @@ namespace refactor::kernel {
}
~Descriptors() noexcept(false) {
CNNL_ASSERT(cnnlDestroyTensorDescriptor(in));
for (auto i = 0; i < out.size(); i++) {
for (size_t i = 0; i < out.size(); i++) {
CNNL_ASSERT(cnnlDestroyTensorDescriptor(out[i]));
}
}
Expand All @@ -81,7 +81,7 @@ namespace refactor::kernel {
// setCnnlTensor(d->in, info.dataType, slice(info.inDim.data(), info.inDim.size()));
CNNL_ASSERT(cnnlSetTensorDescriptor(d->in, CNNL_LAYOUT_NCHW, cnnlDataTypeConvert(info.dataType), info.inDim.size(), info.inDim.data()));

for (auto i = 0; i < info.outDims.size(); i++) {
for (size_t i = 0; i < info.outDims.size(); i++) {
// setCnnlTensor(d->out[i], info.dataType, slice(info.outDims[i].data(), info.outDims[i].size()));
CNNL_ASSERT(cnnlSetTensorDescriptor(d->out[i], CNNL_LAYOUT_NCHW, cnnlDataTypeConvert(info.dataType), info.outDims[i].size(), info.outDims[i].data()));
}
Expand Down
1 change: 0 additions & 1 deletion src/04kernel/src/kernels/where/cnnl_kernel.cc
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,6 @@ namespace refactor::kernel {
d->y, inputs[2], workspace, workspaceSize,
d->ans, outputs[0]));

res.fetchOrStore<CnnlContext>()->queueSync();
};

return {std::move(routine), workspaceSize};
Expand Down
4 changes: 2 additions & 2 deletions src/04kernel/src/utilities/bang/cnrt_functions.cc
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
#include <cnrt.h>
#include <cstdio>

namespace refactor::kernel::cnnl {
namespace refactor::kernel::bang {

int currentDevice() {
int device;
Expand All @@ -22,6 +22,6 @@ namespace refactor::kernel::cnnl {
CNRT_MEM_TRANS_DIR_DEV2HOST));
}

}// namespace refactor::kernel::cnnl
}// namespace refactor::kernel::bang

#endif
4 changes: 2 additions & 2 deletions src/04kernel/src/utilities/bang/cnrt_functions.h
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@

#include "common.h"

namespace refactor::kernel::cnnl {
namespace refactor::kernel::bang {

int currentDevice();

void sync();

void copyOut(void *dst, const void *src, size_t size);

}// namespace refactor::kernel::cnnl
}// namespace refactor::kernel::bang

#endif// KERNEL_CNRT_FUNCTIONS_H
2 changes: 2 additions & 0 deletions src/04kernel/test/kernels/batch_normalization/test_cnnl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "../../../src/kernels/batch_normalization/cnnl_kernel.hh"
#include "../../../src/kernels/batch_normalization/cpu_kernel.hh"
#include "../src/utilities/bang/cnrt_functions.h"
#include "hardware/device_manager.h"
#include <gtest/gtest.h>

Expand Down Expand Up @@ -57,6 +58,7 @@ TEST(kernel, BatchNormalizationCnnl) {
void const *inputs[]{*mluIn, *mluScale, *mluBias, *mluMean, *mluVar};
void *outputs[]{*mluOut};
rMlu(res, *workspace, inputs, outputs);
kernel::bang::sync();
}
// take output data
std::vector<float> result(outTensor->elementsSize());
Expand Down
4 changes: 3 additions & 1 deletion src/04kernel/test/kernels/cast/test_cnnl.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#ifdef USE_BANG

#include "../../../src/kernels/cast/cpu_kernel.hh"
#include "../../../src/kernels/cast/cnnl_kernel.hh"
#include "../../../src/kernels/cast/cpu_kernel.hh"
#include "../src/utilities/bang/cnrt_functions.h"
#include "hardware/device_manager.h"
#include <gtest/gtest.h>
#include <numeric>
Expand Down Expand Up @@ -34,6 +35,7 @@ TEST(kernel, CastCnnl) {
void const *inputs[]{*xMlu};
void *outputs[]{*yMlu};
routine(res, nullptr, inputs, outputs);
kernel::bang::sync();
}
{
void const *inputs[]{x_.data()};
Expand Down
4 changes: 3 additions & 1 deletion src/04kernel/test/kernels/clip/test_cnnl.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#ifdef USE_BANG

#include "../../../src/kernels/clip/cpu_kernel.hh"
#include "../../../src/kernels/clip/cnnl_kernel.hh"
#include "../../../src/kernels/clip/cpu_kernel.hh"
#include "../src/utilities/bang/cnrt_functions.h"
#include "hardware/device_manager.h"
#include <gtest/gtest.h>
#include <numeric>
Expand Down Expand Up @@ -36,6 +37,7 @@ TEST(kernel, ClipCnnl) {
void const *inputs[]{*mluMem, *mluMin, *mluMax};
void *outputs[]{*mluMem};
routine(res, nullptr, inputs, outputs);
kernel::bang::sync();
}
{
void const *inputs[]{value.data(), &min, &max};
Expand Down
4 changes: 3 additions & 1 deletion src/04kernel/test/kernels/concat/test_cnnl.cpp
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#ifdef USE_BANG

#include "../../../src/kernels/concat/cpu_kernel.hh"
#include "../../../src/kernels/concat/cnnl_kernel.hh"
#include "../../../src/kernels/concat/cpu_kernel.hh"
#include "../src/utilities/bang/cnrt_functions.h"
#include "hardware/device_manager.h"
#include <gtest/gtest.h>
#include <numeric>
Expand Down Expand Up @@ -65,6 +66,7 @@ TEST(kernel, ConcatCnnl) {
void const *inputs[]{*mluIns[0], *mluIns[1], *mluIns[2], *mluIns[3]};
void *outputs[]{*mluOut};
routine(res, *workspace, inputs, outputs);
kernel::bang::sync();
}
{
void const *inputs[]{cpuIns[0].data(), cpuIns[1].data(), cpuIns[2].data(), cpuIns[3].data()};
Expand Down
11 changes: 2 additions & 9 deletions src/04kernel/test/kernels/conv/test_cnnl.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#ifdef USE_BANG

#include "../../../src/kernels/conv/cnnl_kernel.hh"
#include "../src/utilities/bang/cnrt_functions.h"
#include "hardware/device_manager.h"
#include <gtest/gtest.h>
#include <numeric>
Expand Down Expand Up @@ -39,15 +40,7 @@ void testConvCnnl(int rank, const int64_t *pads, const int64_t *strides, const i
void const *inputs[]{*xMlu, *wMlu};
void *outputs[]{*yMlu};
routine(res, *workspace, inputs, outputs);

xMlu->copyToHost(xData.data(), xTensor->bytesSize());
wMlu->copyToHost(wData.data(), wTensor->bytesSize());
// fmt::println("{}", vec2str(xData));
// fmt::println("{}", vec2str(wData));

// std::vector<float> ws(workspaceSize);
// workspace->copyToHost(ws.data(), workspaceSize);
// fmt::println("{}", vec2str(ws));
kernel::bang::sync();

// take output data
std::vector<float> result(yTensor->elementsSize());
Expand Down
2 changes: 2 additions & 0 deletions src/04kernel/test/kernels/expand/test_cnnl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "../../../src/kernels/expand/cnnl_kernel.hh"
#include "../../../src/kernels/expand/cpu_kernel.hh"
#include "../src/utilities/bang/cnrt_functions.h"
#include "hardware/device_manager.h"
#include <gtest/gtest.h>
#include <numeric>
Expand Down Expand Up @@ -36,6 +37,7 @@ TEST(kernel, ExpandCnnl) {
void const *inputs[]{*mluIn};
void *outputs[]{*mluOut};
routine(res, nullptr, inputs, outputs);
kernel::bang::sync();
}
{
void const *inputs[]{data.data()};
Expand Down
6 changes: 5 additions & 1 deletion src/04kernel/test/kernels/gather/test_gather_cnnl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "../src/kernels/gather/cnnl_kernel.hh"
#include "../src/kernels/gather/cpu_kernel.hh"
#include "../src/utilities/bang/cnrt_functions.h"
#include "hardware/device_manager.h"
#include <gtest/gtest.h>

Expand Down Expand Up @@ -39,6 +40,7 @@ TEST(kernel, GatherCnnl) {
void const *inputs[]{*aMLU, *bMLU};
void *outputs[]{*cMLU};
cnnlRoutine(res, *workspace, inputs, outputs);
kernel::bang::sync();
}
{
void const *inputs[]{a.data(), b.data()};
Expand Down Expand Up @@ -81,6 +83,7 @@ TEST(kernel, GatherCnnl) {
void const *inputs[]{*aMLU, *bMLU};
void *outputs[]{*cMLU};
cnnlRoutine(res, *workspace, inputs, outputs);
kernel::bang::sync();
}
{
void const *inputs[]{a.data(), b.data()};
Expand Down Expand Up @@ -110,7 +113,7 @@ TEST(kernel, GatherCnnl) {
auto cpuRoutine = cpuKernel->lower(res).routine;
// Init inputs and outputs
std::vector<float> a;
for (auto i = 0; i < data->elementsSize(); i++) {
for (size_t i = 0; i < data->elementsSize(); i++) {
a.push_back(i + 0.1f);
}
std::vector<int64_t> b(indices->elementsSize(), 0);
Expand All @@ -126,6 +129,7 @@ TEST(kernel, GatherCnnl) {
void const *inputs[]{*aMLU, *bMLU};
void *outputs[]{*cMLU};
cnnlRoutine(res, *workspace, inputs, outputs);
kernel::bang::sync();
}
{
void const *inputs[]{a.data(), b.data()};
Expand Down
5 changes: 5 additions & 0 deletions src/04kernel/test/kernels/mat_mul/test_cnnl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

#include "../src/kernels/mat_mul/cnnl_kernel.hh"
#include "../src/kernels/mat_mul/cpu_kernel.hh"
#include "../src/utilities/bang/cnrt_functions.h"
#include "hardware/device_manager.h"
#include <gtest/gtest.h>

Expand Down Expand Up @@ -48,6 +49,7 @@ TEST(kernel, MatMulCnnl_OnlyBias) {
void const *inputs[]{*ma, *mb, *mc};
void *outputs[]{*my};
routine(res, *workspace, inputs, outputs);
kernel::bang::sync();
// take output data
std::vector<float> result(Y->elementsSize());
my->copyToHost(result.data(), Y->bytesSize());
Expand Down Expand Up @@ -91,6 +93,7 @@ TEST(kernel, MatMulCnnl_Broadcast) {
void const *inputs[]{*ma, *mb, *mc};
void *outputs[]{*my};
mluRoutine(res, *workspace, inputs, outputs);
kernel::bang::sync();
}
{
void const *inputs[]{dataA.data(), dataB.data(), dataC.data()};
Expand Down Expand Up @@ -135,6 +138,7 @@ TEST(kernel, MatMulCnnl_TransABNoBias) {
void const *inputs[]{*ma, *mb};
void *outputs[]{*my};
mluRoutine(res, *workspace, inputs, outputs);
kernel::bang::sync();
}
{
void const *inputs[]{dataA.data(), dataB.data()};
Expand Down Expand Up @@ -189,6 +193,7 @@ TEST(kernel, MatMulCnnl_Large) {
void const *inputs[]{*ma, *mb, *mc};
void *outputs[]{*my};
mluRoutine(res, *workspace, inputs, outputs);
kernel::bang::sync();
}
{
void const *inputs[]{dataA.data(), dataB.data(), dataC.data()};
Expand Down
Loading

0 comments on commit 848eb19

Please sign in to comment.