feat: 添加 DepthToSpace 算子，并在 kernel 层转发给 Transpose

Signed-off-by: YdrMaster <ydrml@hotmail.com>
InfiniTensor · Apr 24, 2024 · fc7e08d · fc7e08d
1 parent eca616f
commit fc7e08d
Show file tree

Hide file tree

Showing 6 changed files with 201 additions and 0 deletions.
diff --git a/src/04kernel/include/kernel/collectors/depth_to_space.h b/src/04kernel/include/kernel/collectors/depth_to_space.h
@@ -0,0 +1,25 @@
+#ifndef KERNEL_DEPTH_TO_SPACE_H
+#define KERNEL_DEPTH_TO_SPACE_H
+
+#include "../collector.h"
+
+namespace refactor::kernel {
+
+    enum class DepthToSpaceMode : uint8_t {
+        DCR,
+        CRD,
+    };
+
+    struct DepthToSpaceCollector final : public InfoCollector {
+        uint32_t blocksize;
+        DepthToSpaceMode mode;
+
+        DepthToSpaceCollector(decltype(_target), decltype(blocksize), decltype(mode)) noexcept;
+
+        std::vector<KernelBox>
+        filter(TensorRefs inputs, TensorRefs outputs) const final;
+    };
+
+}// namespace refactor::kernel
+
+#endif// KERNEL_DEPTH_TO_SPACE_H
diff --git a/src/04kernel/src/collectors/depth_to_space.cc b/src/04kernel/src/collectors/depth_to_space.cc
@@ -0,0 +1,50 @@
+#include "kernel/collectors/depth_to_space.h"
+#include "../kernels/transpose/cpu_kernel.hh"
+#include "../kernels/transpose/cuda_kernel.hh"
+
+namespace refactor::kernel {
+
+    DepthToSpaceCollector::DepthToSpaceCollector(
+        decltype(_target) target,
+        decltype(blocksize) blocksize_,
+        decltype(mode) mode_) noexcept
+        : InfoCollector(target),
+          blocksize(blocksize_),
+          mode(mode_) {}
+
+    std::vector<KernelBox>
+    DepthToSpaceCollector::filter(TensorRefs inputs, TensorRefs outputs) const {
+        auto const &input = inputs[0].get();
+        auto const n = input.shape[0];
+        auto const c = input.shape[1];
+        auto const h = input.shape[2];
+        auto const w = input.shape[3];
+        auto info = mode == DepthToSpaceMode::DCR
+                        ? TransposeInfo(
+                              input.dataType,
+                              {n, blocksize, blocksize, c / blocksize / blocksize, h, w},
+                              {0, 3, 4, 1, 5, 2})
+                        : TransposeInfo(
+                              input.dataType,
+                              {n, c / blocksize / blocksize, blocksize, blocksize, h, w},
+                              {0, 1, 4, 2, 5, 3});
+
+        std::vector<KernelBox> ans;
+        switch (_target) {
+            case decltype(_target)::Cpu:
+                if (auto ptr = TransposeCpu::build(info); ptr) {
+                    ans.emplace_back(std::move(ptr));
+                }
+                break;
+            case decltype(_target)::Nvidia:
+                if (auto ptr = TransposeCuda::build(info); ptr) {
+                    ans.emplace_back(std::move(ptr));
+                }
+                break;
+            default:
+                UNREACHABLEX(void, "Unknown target");
+        }
+        return ans;
+    }
+
+}// namespace refactor::kernel
diff --git a/src/05computation/include/computation/operators/depth_to_space.h b/src/05computation/include/computation/operators/depth_to_space.h
@@ -0,0 +1,25 @@
+#ifndef COMPUTATION_DEPTH_TO_SPACE_H
+#define COMPUTATION_DEPTH_TO_SPACE_H
+
+#include "../operator.h"
+#include "kernel/collectors/depth_to_space.h"
+
+namespace refactor::computation {
+    using kernel::DepthToSpaceMode;
+
+    struct DepthToSpace final : public Operator {
+        uint32_t blocksize;
+        DepthToSpaceMode mode;
+
+        DepthToSpace(decltype(blocksize), decltype(mode)) noexcept;
+
+        static size_t typeId() noexcept;
+        size_t opTypeId() const noexcept final;
+        std::string_view name() const noexcept final;
+        kernel::CollectorBox candidateKernels(Target) const noexcept final;
+        std::string serialize() const noexcept final;
+    };
+
+}// namespace refactor::computation
+
+#endif// COMPUTATION_DEPTH_TO_SPACE_H
diff --git a/src/05computation/src/operators/depth_to_space.cc b/src/05computation/src/operators/depth_to_space.cc
@@ -0,0 +1,23 @@
+#include "computation/operators/depth_to_space.h"
+
+namespace refactor::computation {
+    using Op = DepthToSpace;
+
+    Op::DepthToSpace(decltype(blocksize) blocksize_, decltype(mode) mode_) noexcept
+        : Operator(), blocksize(blocksize_), mode(mode_) {}
+
+    size_t Op::typeId() noexcept {
+        static uint8_t ID = 1;
+        return reinterpret_cast<size_t>(&ID);
+    }
+    size_t Op::opTypeId() const noexcept { return typeId(); }
+    std::string_view Op::name() const noexcept { return "DepthToSpace"; }
+    auto Op::candidateKernels(Target target) const noexcept -> kernel::CollectorBox {
+        using Collector_ = kernel::DepthToSpaceCollector;
+        return std::make_unique<Collector_>(target, blocksize, mode);
+    }
+    auto Op::serialize() const noexcept -> std::string {
+        return fmt::format("{}({}, {})", name(), blocksize, mode == DepthToSpaceMode::DCR ? "DCR" : "CRD");
+    }
+
+}// namespace refactor::computation
diff --git a/src/07onnx/src/operators/depth_to_space.cc b/src/07onnx/src/operators/depth_to_space.cc
@@ -0,0 +1,51 @@
+#include "depth_to_space.hh"
+#include "common.h"
+#include "computation/operators/depth_to_space.h"
+
+namespace refactor::onnx {
+    using Op = DepthToSpace;
+
+    Op::DepthToSpace(Int blocksize_, String mode_)
+        : Operator(), blocksize(blocksize_), mode(std::move(mode_)) {}
+
+    auto Op::build(ModelContext const &, std::string_view, Attributes attributes) -> OpBox {
+        auto blocksize = attributes["blocksize"].int_();
+        auto mode = attributes.getOrInsert("mode", {"DCR"}).string();
+        return OpBox(std::make_unique<Op>(blocksize, mode));
+    }
+    auto Op::typeId() -> size_t {
+        static uint8_t ID = 1;
+        return reinterpret_cast<size_t>(&ID);
+    }
+
+    auto Op::opTypeId() const -> size_t { return typeId(); }
+    auto Op::opTypeName() const -> std::string_view { return "onnx::DepthToSpace"; }
+    auto Op::valueDependentInputs() const -> InputVec { return {1}; }
+
+    auto Op::infer(TensorRefs inputs, InferOptions const &options) const -> InferResult {
+        EXPECT_SIZE(1)
+
+        if (mode != "DCR" && mode != "CRD") {
+            return Err(InferError(ERROR_MSG("Invalid mode for DepthToSpace operator")));
+        }
+
+        auto const &input = inputs[0];
+        if (input.rank() != 4) {
+            return Err(InferError(ERROR_MSG("Input layout should be NCHW")));
+        }
+        auto n = input.shape[0];
+        EXPECT_VAL(input.shape[1], c)
+        EXPECT_VAL(input.shape[2], h)
+        EXPECT_VAL(input.shape[3], w)
+        return Ok(Tensors{Tensor::share(
+            input.dataType,
+            Shape{n, DimExpr(c / blocksize / blocksize), DimExpr(h * blocksize), DimExpr(w * blocksize)},
+            extractDependency(inputs))});
+    }
+
+    auto Op::lower(TensorRefs) const -> computation::OpBox {
+        using Op_ = computation::DepthToSpace;
+        return std::make_unique<Op_>(blocksize, mode == "DCR" ? computation::DepthToSpaceMode::DCR : computation::DepthToSpaceMode::CRD);
+    }
+
+}// namespace refactor::onnx
diff --git a/src/07onnx/src/operators/depth_to_space.hh b/src/07onnx/src/operators/depth_to_space.hh
@@ -0,0 +1,27 @@
+#ifndef ONNX_DEPTH_TO_SPACE_HH
+#define ONNX_DEPTH_TO_SPACE_HH
+
+#include "frontend/operator.h"
+
+namespace refactor::onnx {
+    using namespace frontend;
+
+    struct DepthToSpace final : public Operator {
+        Int blocksize;
+        String mode;
+
+        explicit DepthToSpace(Int, String);
+
+        static OpBox build(ModelContext const &, std::string_view, Attributes);
+        static size_t typeId();
+
+        size_t opTypeId() const final;
+        std::string_view opTypeName() const final;
+        InputVec valueDependentInputs() const final;
+        InferResult infer(TensorRefs, InferOptions const &) const final;
+        computation::OpBox lower(TensorRefs) const final;
+    };
+
+}// namespace refactor::onnx
+
+#endif// ONNX_DEPTH_TO_SPACE_HH