From ab6fd82dbe88939fb7ee384e15469d1b0a5bdac7 Mon Sep 17 00:00:00 2001 From: xinetzone Date: Mon, 25 Sep 2023 13:40:15 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0=20doc/vta?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- doc/deploy/build.ipynb | 179 +++--- doc/deploy/get_source.ipynb | 40 +- .../basic/tests/python/tvm_ext/__init__.py | 17 - doc/tutorials/basic/tests/src/tvm_ext.cc | 4 +- doc/tutorials/index.md | 2 +- doc/tutorials/vta/ops/index.md | 2 - doc/{tutorials => }/vta/config.ipynb | 2 +- doc/{tutorials => }/vta/index.md | 2 +- doc/vta/insn/alu.ipynb | 246 ++++++++ doc/vta/insn/gemm.ipynb | 256 +++++++++ doc/vta/insn/index.md | 11 + doc/vta/insn/padded-load.ipynb | 246 ++++++++ doc/vta/insn/relu.ipynb | 142 +++++ doc/vta/insn/runtime-array.ipynb | 79 +++ doc/vta/insn/save-load-out.ipynb | 154 +++++ doc/vta/insn/shift-and-scale.ipynb | 140 +++++ doc/vta/ops/conv2d-transpose.ipynb | 19 + doc/vta/ops/conv2d.ipynb | 19 + doc/vta/ops/dense.ipynb | 218 +++++++ doc/vta/ops/gemm.ipynb | 531 ++++++++++++++++++ doc/vta/ops/group-conv2d.ipynb | 19 + doc/vta/ops/index.md | 9 + doc/vta/ops/test.py | 0 tests/test.ipynb | 49 ++ .../vta/test.ipynb => tests/tvmc.ipynb | 0 25 files changed, 2262 insertions(+), 124 deletions(-) delete mode 100644 doc/tutorials/vta/ops/index.md rename doc/{tutorials => }/vta/config.ipynb (99%) rename doc/{tutorials => }/vta/index.md (98%) create mode 100644 doc/vta/insn/alu.ipynb create mode 100644 doc/vta/insn/gemm.ipynb create mode 100644 doc/vta/insn/index.md create mode 100644 doc/vta/insn/padded-load.ipynb create mode 100644 doc/vta/insn/relu.ipynb create mode 100644 doc/vta/insn/runtime-array.ipynb create mode 100644 doc/vta/insn/save-load-out.ipynb create mode 100644 doc/vta/insn/shift-and-scale.ipynb create mode 100644 doc/vta/ops/conv2d-transpose.ipynb create mode 100644 doc/vta/ops/conv2d.ipynb create mode 100644 doc/vta/ops/dense.ipynb create mode 100644 doc/vta/ops/gemm.ipynb create mode 100644 doc/vta/ops/group-conv2d.ipynb create mode 100644 doc/vta/ops/index.md create mode 100644 doc/vta/ops/test.py create mode 100644 tests/test.ipynb rename doc/tutorials/vta/test.ipynb => tests/tvmc.ipynb (100%) diff --git a/doc/deploy/build.ipynb b/doc/deploy/build.ipynb index c37edb1b..2ee12623 100644 --- a/doc/deploy/build.ipynb +++ b/doc/deploy/build.ipynb @@ -40,7 +40,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -62,7 +62,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -81,7 +81,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 5, "metadata": {}, "outputs": [], "source": [ @@ -98,7 +98,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 6, "metadata": {}, "outputs": [], "source": [ @@ -116,7 +116,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -136,7 +136,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -155,7 +155,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 9, "metadata": {}, "outputs": [], "source": [ @@ -171,7 +171,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "metadata": {}, "outputs": [], "source": [ @@ -190,7 +190,7 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 11, "metadata": {}, "outputs": [ { @@ -947,100 +947,100 @@ "!20 = !{!\"ctx_ptr\", !21, i64 0}\n", "!21 = !{!\"tvm-tbaa\"}\n", "!22 = !{!23, !23, i64 0}\n", - "!23 = !{!\"0x4bcbc20.w4.b0\", !24, i64 0}\n", - "!24 = !{!\"0x4bcbc20.w8.b0\", !25, i64 0}\n", - "!25 = !{!\"0x4bcbc20.w16.b0\", !26, i64 0}\n", - "!26 = !{!\"0x4bcbc20.w32.b0\", !27, i64 0}\n", - "!27 = !{!\"0x4bcbc20.w64.b0\", !28, i64 0}\n", - "!28 = !{!\"0x4bcbc20.w128.b0\", !29, i64 0}\n", - "!29 = !{!\"0x4bcbc20.w256.b0\", !30, i64 0}\n", - "!30 = !{!\"0x4bcbc20.w512.b0\", !31, i64 0}\n", - "!31 = !{!\"0x4bcbc20.w1024.b0\", !32, i64 0}\n", - "!32 = !{!\"0x4bcbc20\", !21, i64 0}\n", + "!23 = !{!\"0x4b308b0.w4.b0\", !24, i64 0}\n", + "!24 = !{!\"0x4b308b0.w8.b0\", !25, i64 0}\n", + "!25 = !{!\"0x4b308b0.w16.b0\", !26, i64 0}\n", + "!26 = !{!\"0x4b308b0.w32.b0\", !27, i64 0}\n", + "!27 = !{!\"0x4b308b0.w64.b0\", !28, i64 0}\n", + "!28 = !{!\"0x4b308b0.w128.b0\", !29, i64 0}\n", + "!29 = !{!\"0x4b308b0.w256.b0\", !30, i64 0}\n", + "!30 = !{!\"0x4b308b0.w512.b0\", !31, i64 0}\n", + "!31 = !{!\"0x4b308b0.w1024.b0\", !32, i64 0}\n", + "!32 = !{!\"0x4b308b0\", !21, i64 0}\n", "!33 = !{!34, !34, i64 0}\n", - "!34 = !{!\"0x4bcbc20.w4.b4\", !24, i64 0}\n", + "!34 = !{!\"0x4b308b0.w4.b4\", !24, i64 0}\n", "!35 = !{!36, !36, i64 0}\n", - "!36 = !{!\"0x4bcbc20.w4.b8\", !37, i64 0}\n", - "!37 = !{!\"0x4bcbc20.w8.b8\", !25, i64 0}\n", + "!36 = !{!\"0x4b308b0.w4.b8\", !37, i64 0}\n", + "!37 = !{!\"0x4b308b0.w8.b8\", !25, i64 0}\n", "!38 = !{!39, !39, i64 0}\n", - "!39 = !{!\"0x342c800.w8.b0\", !40, i64 0}\n", - "!40 = !{!\"0x342c800.w16.b0\", !41, i64 0}\n", - "!41 = !{!\"0x342c800.w32.b0\", !42, i64 0}\n", - "!42 = !{!\"0x342c800.w64.b0\", !43, i64 0}\n", - "!43 = !{!\"0x342c800.w128.b0\", !44, i64 0}\n", - "!44 = !{!\"0x342c800.w256.b0\", !45, i64 0}\n", - "!45 = !{!\"0x342c800.w512.b0\", !46, i64 0}\n", - "!46 = !{!\"0x342c800.w1024.b0\", !47, i64 0}\n", - "!47 = !{!\"0x342c800\", !21, i64 0}\n", + "!39 = !{!\"0x4c2e650.w8.b0\", !40, i64 0}\n", + "!40 = !{!\"0x4c2e650.w16.b0\", !41, i64 0}\n", + "!41 = !{!\"0x4c2e650.w32.b0\", !42, i64 0}\n", + "!42 = !{!\"0x4c2e650.w64.b0\", !43, i64 0}\n", + "!43 = !{!\"0x4c2e650.w128.b0\", !44, i64 0}\n", + "!44 = !{!\"0x4c2e650.w256.b0\", !45, i64 0}\n", + "!45 = !{!\"0x4c2e650.w512.b0\", !46, i64 0}\n", + "!46 = !{!\"0x4c2e650.w1024.b0\", !47, i64 0}\n", + "!47 = !{!\"0x4c2e650\", !21, i64 0}\n", "!48 = !{!49, !49, i64 0}\n", - "!49 = !{!\"0x342c800.w8.b8\", !40, i64 0}\n", + "!49 = !{!\"0x4c2e650.w8.b8\", !40, i64 0}\n", "!50 = !{!\"branch_weights\", i32 1, i32 1048576}\n", "!51 = !{!52, !52, i64 0}\n", - "!52 = !{!\"0x4bce400.w8.b0\", !53, i64 0}\n", - "!53 = !{!\"0x4bce400.w16.b0\", !54, i64 0}\n", - "!54 = !{!\"0x4bce400.w32.b0\", !55, i64 0}\n", - "!55 = !{!\"0x4bce400.w64.b0\", !56, i64 0}\n", - "!56 = !{!\"0x4bce400.w128.b0\", !57, i64 0}\n", - "!57 = !{!\"0x4bce400.w256.b0\", !58, i64 0}\n", - "!58 = !{!\"0x4bce400.w512.b0\", !59, i64 0}\n", - "!59 = !{!\"0x4bce400.w1024.b0\", !60, i64 0}\n", - "!60 = !{!\"0x4bce400\", !21, i64 0}\n", + "!52 = !{!\"0x4c154b0.w8.b0\", !53, i64 0}\n", + "!53 = !{!\"0x4c154b0.w16.b0\", !54, i64 0}\n", + "!54 = !{!\"0x4c154b0.w32.b0\", !55, i64 0}\n", + "!55 = !{!\"0x4c154b0.w64.b0\", !56, i64 0}\n", + "!56 = !{!\"0x4c154b0.w128.b0\", !57, i64 0}\n", + "!57 = !{!\"0x4c154b0.w256.b0\", !58, i64 0}\n", + "!58 = !{!\"0x4c154b0.w512.b0\", !59, i64 0}\n", + "!59 = !{!\"0x4c154b0.w1024.b0\", !60, i64 0}\n", + "!60 = !{!\"0x4c154b0\", !21, i64 0}\n", "!61 = !{!62, !62, i64 0}\n", - "!62 = !{!\"0x4bce400.w8.b8\", !53, i64 0}\n", + "!62 = !{!\"0x4c154b0.w8.b8\", !53, i64 0}\n", "!63 = !{!64, !64, i64 0}\n", - "!64 = !{!\"0x4d29ca0.w8.b0\", !65, i64 0}\n", - "!65 = !{!\"0x4d29ca0.w16.b0\", !66, i64 0}\n", - "!66 = !{!\"0x4d29ca0.w32.b0\", !67, i64 0}\n", - "!67 = !{!\"0x4d29ca0.w64.b0\", !68, i64 0}\n", - "!68 = !{!\"0x4d29ca0.w128.b0\", !69, i64 0}\n", - "!69 = !{!\"0x4d29ca0.w256.b0\", !70, i64 0}\n", - "!70 = !{!\"0x4d29ca0.w512.b0\", !71, i64 0}\n", - "!71 = !{!\"0x4d29ca0.w1024.b0\", !72, i64 0}\n", - "!72 = !{!\"0x4d29ca0\", !21, i64 0}\n", + "!64 = !{!\"0x4ce8170.w8.b0\", !65, i64 0}\n", + "!65 = !{!\"0x4ce8170.w16.b0\", !66, i64 0}\n", + "!66 = !{!\"0x4ce8170.w32.b0\", !67, i64 0}\n", + "!67 = !{!\"0x4ce8170.w64.b0\", !68, i64 0}\n", + "!68 = !{!\"0x4ce8170.w128.b0\", !69, i64 0}\n", + "!69 = !{!\"0x4ce8170.w256.b0\", !70, i64 0}\n", + "!70 = !{!\"0x4ce8170.w512.b0\", !71, i64 0}\n", + "!71 = !{!\"0x4ce8170.w1024.b0\", !72, i64 0}\n", + "!72 = !{!\"0x4ce8170\", !21, i64 0}\n", "!73 = !{!74, !74, i64 0}\n", - "!74 = !{!\"0x4d29ca0.w8.b8\", !65, i64 0}\n", + "!74 = !{!\"0x4ce8170.w8.b8\", !65, i64 0}\n", "!75 = !{!76, !76, i64 0}\n", - "!76 = !{!\"0x4eabdc0.w8.b0\", !77, i64 0}\n", - "!77 = !{!\"0x4eabdc0.w16.b0\", !78, i64 0}\n", - "!78 = !{!\"0x4eabdc0.w32.b0\", !79, i64 0}\n", - "!79 = !{!\"0x4eabdc0.w64.b0\", !80, i64 0}\n", - "!80 = !{!\"0x4eabdc0.w128.b0\", !81, i64 0}\n", - "!81 = !{!\"0x4eabdc0.w256.b0\", !82, i64 0}\n", - "!82 = !{!\"0x4eabdc0.w512.b0\", !83, i64 0}\n", - "!83 = !{!\"0x4eabdc0.w1024.b0\", !84, i64 0}\n", - "!84 = !{!\"0x4eabdc0\", !21, i64 0}\n", + "!76 = !{!\"0x4c4bfb0.w8.b0\", !77, i64 0}\n", + "!77 = !{!\"0x4c4bfb0.w16.b0\", !78, i64 0}\n", + "!78 = !{!\"0x4c4bfb0.w32.b0\", !79, i64 0}\n", + "!79 = !{!\"0x4c4bfb0.w64.b0\", !80, i64 0}\n", + "!80 = !{!\"0x4c4bfb0.w128.b0\", !81, i64 0}\n", + "!81 = !{!\"0x4c4bfb0.w256.b0\", !82, i64 0}\n", + "!82 = !{!\"0x4c4bfb0.w512.b0\", !83, i64 0}\n", + "!83 = !{!\"0x4c4bfb0.w1024.b0\", !84, i64 0}\n", + "!84 = !{!\"0x4c4bfb0\", !21, i64 0}\n", "!85 = !{!86, !86, i64 0}\n", - "!86 = !{!\"0x4eabdc0.w8.b8\", !77, i64 0}\n", + "!86 = !{!\"0x4c4bfb0.w8.b8\", !77, i64 0}\n", "!87 = !{!88, !88, i64 0}\n", - "!88 = !{!\"0x5075980.w8.b0\", !89, i64 0}\n", - "!89 = !{!\"0x5075980.w16.b0\", !90, i64 0}\n", - "!90 = !{!\"0x5075980.w32.b0\", !91, i64 0}\n", - "!91 = !{!\"0x5075980.w64.b0\", !92, i64 0}\n", - "!92 = !{!\"0x5075980.w128.b0\", !93, i64 0}\n", - "!93 = !{!\"0x5075980.w256.b0\", !94, i64 0}\n", - "!94 = !{!\"0x5075980.w512.b0\", !95, i64 0}\n", - "!95 = !{!\"0x5075980.w1024.b0\", !96, i64 0}\n", - "!96 = !{!\"0x5075980\", !21, i64 0}\n", + "!88 = !{!\"0x4c63e90.w8.b0\", !89, i64 0}\n", + "!89 = !{!\"0x4c63e90.w16.b0\", !90, i64 0}\n", + "!90 = !{!\"0x4c63e90.w32.b0\", !91, i64 0}\n", + "!91 = !{!\"0x4c63e90.w64.b0\", !92, i64 0}\n", + "!92 = !{!\"0x4c63e90.w128.b0\", !93, i64 0}\n", + "!93 = !{!\"0x4c63e90.w256.b0\", !94, i64 0}\n", + "!94 = !{!\"0x4c63e90.w512.b0\", !95, i64 0}\n", + "!95 = !{!\"0x4c63e90.w1024.b0\", !96, i64 0}\n", + "!96 = !{!\"0x4c63e90\", !21, i64 0}\n", "!97 = !{!98, !98, i64 0}\n", - "!98 = !{!\"0x5075980.w8.b8\", !89, i64 0}\n", + "!98 = !{!\"0x4c63e90.w8.b8\", !89, i64 0}\n", "!99 = !{!100, !100, i64 0}\n", - "!100 = !{!\"0x4afbc10.w8.b0\", !101, i64 0}\n", - "!101 = !{!\"0x4afbc10.w16.b0\", !102, i64 0}\n", - "!102 = !{!\"0x4afbc10.w32.b0\", !103, i64 0}\n", - "!103 = !{!\"0x4afbc10.w64.b0\", !104, i64 0}\n", - "!104 = !{!\"0x4afbc10.w128.b0\", !105, i64 0}\n", - "!105 = !{!\"0x4afbc10.w256.b0\", !106, i64 0}\n", - "!106 = !{!\"0x4afbc10.w512.b0\", !107, i64 0}\n", - "!107 = !{!\"0x4afbc10.w1024.b0\", !108, i64 0}\n", - "!108 = !{!\"0x4afbc10\", !21, i64 0}\n", + "!100 = !{!\"0x4b45120.w8.b0\", !101, i64 0}\n", + "!101 = !{!\"0x4b45120.w16.b0\", !102, i64 0}\n", + "!102 = !{!\"0x4b45120.w32.b0\", !103, i64 0}\n", + "!103 = !{!\"0x4b45120.w64.b0\", !104, i64 0}\n", + "!104 = !{!\"0x4b45120.w128.b0\", !105, i64 0}\n", + "!105 = !{!\"0x4b45120.w256.b0\", !106, i64 0}\n", + "!106 = !{!\"0x4b45120.w512.b0\", !107, i64 0}\n", + "!107 = !{!\"0x4b45120.w1024.b0\", !108, i64 0}\n", + "!108 = !{!\"0x4b45120\", !21, i64 0}\n", "!109 = !{!110, !110, i64 0}\n", - "!110 = !{!\"0x4afbc10.w8.b8\", !101, i64 0}\n", + "!110 = !{!\"0x4b45120.w8.b8\", !101, i64 0}\n", "!111 = !{!112, !112, i64 0}\n", - "!112 = !{!\"0x49f16f0\", !21, i64 0}\n", + "!112 = !{!\"0x4ca5fd0\", !21, i64 0}\n", "!113 = !{!114, !114, i64 0}\n", - "!114 = !{!\"0x4d7dbe0\", !21, i64 0}\n", + "!114 = !{!\"0x4b9ce30\", !21, i64 0}\n", "!115 = !{!116, !116, i64 0}\n", - "!116 = !{!\"0x4aa1a30\", !21, i64 0}\n", + "!116 = !{!\"0x4c9f880\", !21, i64 0}\n", "\n" ] } @@ -1048,13 +1048,6 @@ "source": [ "print(compiled_lib.lib.get_source())" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -1073,7 +1066,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.9" + "version": "3.10.12" }, "orig_nbformat": 4, "vscode": { diff --git a/doc/deploy/get_source.ipynb b/doc/deploy/get_source.ipynb index de648398..963dd31f 100644 --- a/doc/deploy/get_source.ipynb +++ b/doc/deploy/get_source.ipynb @@ -11,8 +11,17 @@ "cell_type": "code", "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/media/pc/data/lxw/ai/tvm\n" + ] + } + ], "source": [ + "import set_env\n", "import numpy as np\n", "import tvm\n", "from tvm import relay" @@ -22,12 +31,29 @@ "cell_type": "code", "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "data": { + "text/html": [ + "
def @main(%x: Tensor[(1), float32], %y: Tensor[(1), float32]) {\n",
+       "  add(%x, %y)\n",
+       "}\n",
+       "
\n" + ], + "text/plain": [ + "" + ] + }, + "metadata": {}, + "output_type": "display_data" + } + ], "source": [ - "x = relay.var(\"x\", shape=(2,), dtype=\"float32\")\n", - "y = relay.var(\"y\", shape=(2,), dtype=\"float32\")\n", - "params = {\"y\": np.ones((2,), dtype=\"float32\")}\n", - "mod = tvm.IRModule.from_expr(relay.Function([x, y], x + y))" + "x = relay.var(\"x\", shape=(1,), dtype=\"float32\")\n", + "y = relay.var(\"y\", shape=(1,), dtype=\"float32\")\n", + "params = {\"y\": np.ones((1,), dtype=\"float32\")}\n", + "mod = tvm.IRModule.from_expr(relay.Function([x, y], x + y))\n", + "mod.show()" ] }, { @@ -79,7 +105,7 @@ " }\n", " if (!(tvmgen_default_fused_add_T_add_strides == NULL)) {\n", " }\n", - " *(float2*)(((float*)T_add_1) + 0) = (*(float2*)(((float*)p0_1) + 0) + *(float2*)(((float*)p1_1) + 0));\n", + " ((float*)T_add_1)[0] = (((float*)p0_1)[0] + ((float*)p1_1)[0]);\n", " return 0;\n", "}\n", "\n", diff --git a/doc/tutorials/basic/tests/python/tvm_ext/__init__.py b/doc/tutorials/basic/tests/python/tvm_ext/__init__.py index 0305da10..46feb50e 100755 --- a/doc/tutorials/basic/tests/python/tvm_ext/__init__.py +++ b/doc/tutorials/basic/tests/python/tvm_ext/__init__.py @@ -1,20 +1,3 @@ -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - """TVM 扩展包示例""" # 首先导入 TVM 以获取库 symbols from . import set_env diff --git a/doc/tutorials/basic/tests/src/tvm_ext.cc b/doc/tutorials/basic/tests/src/tvm_ext.cc index f351f8f3..ecec9b8b 100644 --- a/doc/tutorials/basic/tests/src/tvm_ext.cc +++ b/doc/tutorials/basic/tests/src/tvm_ext.cc @@ -3,7 +3,7 @@ using namespace tvm::runtime; -// 参考:https://daobook.github.io/tvm/docs/arch/runtime.html +// 参考:https://xinetzone.github.io/tvm/docs/arch/runtime.html void MyAdd(TVMArgs args, TVMRetValue* rv) { // 自动将参数转换为所需的类型。 int a = args[0]; @@ -19,4 +19,4 @@ TVM_REGISTER_GLOBAL("callhello") .set_body([](TVMArgs args, TVMRetValue* rv) { PackedFunc f = args[0]; f("hello world"); -}); \ No newline at end of file +}); diff --git a/doc/tutorials/index.md b/doc/tutorials/index.md index 1064f0ac..3da05665 100755 --- a/doc/tutorials/index.md +++ b/doc/tutorials/index.md @@ -11,7 +11,7 @@ frontend/index transform/index ../quantize/index relay/index -vta/index +../vta/index relax/index tedd/index datasets/index diff --git a/doc/tutorials/vta/ops/index.md b/doc/tutorials/vta/ops/index.md deleted file mode 100644 index 4f0a5e5e..00000000 --- a/doc/tutorials/vta/ops/index.md +++ /dev/null @@ -1,2 +0,0 @@ -# VTA 算子 - diff --git a/doc/tutorials/vta/config.ipynb b/doc/vta/config.ipynb similarity index 99% rename from doc/tutorials/vta/config.ipynb rename to doc/vta/config.ipynb index 04f42940..65cded35 100755 --- a/doc/tutorials/vta/config.ipynb +++ b/doc/vta/config.ipynb @@ -161,7 +161,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.10.11" + "version": "3.10.12" }, "orig_nbformat": 4, "vscode": { diff --git a/doc/tutorials/vta/index.md b/doc/vta/index.md similarity index 98% rename from doc/tutorials/vta/index.md rename to doc/vta/index.md index 7e96b8ac..5dd4fc0e 100644 --- a/doc/tutorials/vta/index.md +++ b/doc/vta/index.md @@ -15,6 +15,6 @@ VTA(发音为 vita,全称 versatile tensor accelerator,Open, Modular, Deep ```{toctree} config +insn/index ops/index -test ``` diff --git a/doc/vta/insn/alu.ipynb b/doc/vta/insn/alu.ipynb new file mode 100644 index 00000000..f3d755be --- /dev/null +++ b/doc/vta/insn/alu.ipynb @@ -0,0 +1,246 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# VTA ALU" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import tvm\n", + "from tvm import te\n", + "import numpy as np\n", + "from tvm import topi\n", + "from tvm.contrib.utils import tempdir\n", + "\n", + "import vta\n", + "import vta.testing\n", + "from vta.testing import simulator\n", + "\n", + "np.random.seed(0xDEADB)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-25 13:12:35.136 INFO load_module /tmp/tmp7oy9i8lt/load_act.o\n", + "2023-09-25 13:12:35.287 INFO load_module /tmp/tmp7oy9i8lt/load_act.o\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ALU SHL execution statistics:\n", + "\tinp_load_nbytes : 0\n", + "\twgt_load_nbytes : 0\n", + "\tacc_load_nbytes : 4096\n", + "\tuop_load_nbytes : 4\n", + "\tout_store_nbytes: 1024\n", + "\tgemm_counter : 0\n", + "\talu_counter : 64\n", + "ALU MAX execution statistics:\n", + "\tinp_load_nbytes : 0\n", + "\twgt_load_nbytes : 0\n", + "\tacc_load_nbytes : 4096\n", + "\tuop_load_nbytes : 4\n", + "\tout_store_nbytes: 1024\n", + "\tgemm_counter : 0\n", + "\talu_counter : 64\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-25 13:12:35.458 INFO load_module /tmp/tmp7oy9i8lt/load_act.o\n", + "2023-09-25 13:12:35.609 INFO load_module /tmp/tmp7oy9i8lt/load_act.o\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ALU MAX execution statistics:\n", + "\tinp_load_nbytes : 0\n", + "\twgt_load_nbytes : 0\n", + "\tacc_load_nbytes : 8192\n", + "\tuop_load_nbytes : 4\n", + "\tout_store_nbytes: 1024\n", + "\tgemm_counter : 0\n", + "\talu_counter : 64\n", + "ALU ADD execution statistics:\n", + "\tinp_load_nbytes : 0\n", + "\twgt_load_nbytes : 0\n", + "\tacc_load_nbytes : 4096\n", + "\tuop_load_nbytes : 4\n", + "\tout_store_nbytes: 1024\n", + "\tgemm_counter : 0\n", + "\talu_counter : 64\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-25 13:12:35.780 INFO load_module /tmp/tmp7oy9i8lt/load_act.o\n", + "2023-09-25 13:12:35.933 INFO load_module /tmp/tmp7oy9i8lt/load_act.o\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "ALU ADD execution statistics:\n", + "\tinp_load_nbytes : 0\n", + "\twgt_load_nbytes : 0\n", + "\tacc_load_nbytes : 8192\n", + "\tuop_load_nbytes : 4\n", + "\tout_store_nbytes: 1024\n", + "\tgemm_counter : 0\n", + "\talu_counter : 64\n", + "ALU SHR execution statistics:\n", + "\tinp_load_nbytes : 0\n", + "\twgt_load_nbytes : 0\n", + "\tacc_load_nbytes : 4096\n", + "\tuop_load_nbytes : 4\n", + "\tout_store_nbytes: 1024\n", + "\tgemm_counter : 0\n", + "\talu_counter : 64\n" + ] + } + ], + "source": [ + "def _run(env, remote):\n", + " def check_alu(tvm_op, np_op=None, use_imm=False, test_name=None):\n", + " \"\"\"Test ALU\"\"\"\n", + " m = 8\n", + " n = 8\n", + " imm = np.random.randint(1, 5)\n", + " # compute\n", + " a = te.placeholder((m, n, env.BATCH, env.BLOCK_OUT), name=\"a\", dtype=env.acc_dtype)\n", + " a_buf = te.compute(\n", + " (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: a(*i), \"a_buf\"\n", + " ) # DRAM->SRAM\n", + " if use_imm:\n", + " res_buf = te.compute(\n", + " (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: tvm_op(a_buf(*i), imm), \"res_buf\"\n", + " ) # compute\n", + " else:\n", + " b = te.placeholder((m, n, env.BATCH, env.BLOCK_OUT), name=\"b\", dtype=env.acc_dtype)\n", + " b_buf = te.compute(\n", + " (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: b(*i), \"b_buf\"\n", + " ) # DRAM->SRAM\n", + " res_buf = te.compute(\n", + " (m, n, env.BATCH, env.BLOCK_OUT),\n", + " lambda *i: tvm_op(a_buf(*i), b_buf(*i)),\n", + " \"res_buf\",\n", + " ) # compute5B\n", + " res = te.compute(\n", + " (m, n, env.BATCH, env.BLOCK_OUT),\n", + " lambda *i: res_buf(*i).astype(env.inp_dtype),\n", + " \"res\",\n", + " ) # SRAM->DRAM\n", + " # schedule\n", + " s = te.create_schedule(res.op)\n", + " s[a_buf].set_scope(env.acc_scope) # SRAM\n", + " s[a_buf].pragma(a_buf.op.axis[0], env.dma_copy) # DRAM->SRAM\n", + " s[res_buf].set_scope(env.acc_scope) # SRAM\n", + " s[res_buf].pragma(res_buf.op.axis[0], env.alu) # compute\n", + " s[res].pragma(res.op.axis[0], env.dma_copy) # SRAM->DRAM\n", + " if not use_imm:\n", + " s[b_buf].set_scope(env.acc_scope) # SRAM\n", + " s[b_buf].pragma(b_buf.op.axis[0], env.dma_copy) # DRAM->SRAM\n", + "\n", + " if not remote:\n", + " return\n", + "\n", + " # build\n", + " with vta.build_config():\n", + " if use_imm:\n", + " mod = vta.build(s, [a, res], tvm.target.Target(\"ext_dev\", host=env.target_host))\n", + " else:\n", + " mod = vta.build(\n", + " s, [a, b, res], tvm.target.Target(\"ext_dev\", host=env.target_host)\n", + " )\n", + " temp = tempdir()\n", + " mod.save(temp.relpath(\"load_act.o\"))\n", + " remote.upload(temp.relpath(\"load_act.o\"))\n", + " f = remote.load_module(\"load_act.o\")\n", + " # verify\n", + " dev = remote.ext_dev(0)\n", + " a_np = np.random.randint(-16, 16, size=(m, n, env.BATCH, env.BLOCK_OUT)).astype(a.dtype)\n", + " if use_imm:\n", + " res_np = np_op(a_np, imm) if np_op else tvm_op(a_np, imm)\n", + " else:\n", + " b_np = np.random.randint(-16, 16, size=(m, n, env.BATCH, env.BLOCK_OUT)).astype(\n", + " b.dtype\n", + " )\n", + " res_np = np_op(a_np, b_np) if np_op else tvm_op(a_np, b_np)\n", + " res_np = res_np.astype(res.dtype)\n", + " a_nd = tvm.nd.array(a_np, dev)\n", + " res_nd = tvm.nd.array(np.zeros((m, n, env.BATCH, env.BLOCK_OUT)).astype(res.dtype), dev)\n", + "\n", + " if env.TARGET in [\"sim\", \"tsim\"]:\n", + " simulator.clear_stats()\n", + "\n", + " if use_imm:\n", + " f(a_nd, res_nd)\n", + " else:\n", + " b_nd = tvm.nd.array(b_np, dev)\n", + " f(a_nd, b_nd, res_nd)\n", + "\n", + " np.testing.assert_equal(res_np, res_nd.numpy())\n", + "\n", + " if env.TARGET in [\"sim\", \"tsim\"]:\n", + " sim_stats = simulator.stats()\n", + " print(\"ALU {} execution statistics:\".format(test_name))\n", + " for k, v in sim_stats.items():\n", + " print(\"\\t{:<16}: {:>16}\".format(k, v))\n", + "\n", + " check_alu(lambda x, y: x << y, np.left_shift, use_imm=True, test_name=\"SHL\")\n", + " check_alu(tvm.te.max, np.maximum, use_imm=True, test_name=\"MAX\")\n", + " check_alu(tvm.te.max, np.maximum, test_name=\"MAX\")\n", + " check_alu(lambda x, y: x + y, use_imm=True, test_name=\"ADD\")\n", + " check_alu(lambda x, y: x + y, test_name=\"ADD\")\n", + " check_alu(lambda x, y: x >> y, np.right_shift, use_imm=True, test_name=\"SHR\")\n", + "\n", + "vta.testing.run(_run)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "tvmz", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/doc/vta/insn/gemm.ipynb b/doc/vta/insn/gemm.ipynb new file mode 100644 index 00000000..12b1e821 --- /dev/null +++ b/doc/vta/insn/gemm.ipynb @@ -0,0 +1,256 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# VTA GEMM" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import tvm\n", + "from tvm import te\n", + "import numpy as np\n", + "from tvm import topi\n", + "from tvm.contrib.utils import tempdir\n", + "\n", + "import vta\n", + "import vta.testing\n", + "from vta.testing import simulator\n", + "\n", + "np.random.seed(0xDEADB)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[13:11:21] /media/pc/data/lxw/ai/tvm/src/tir/transforms/arg_binder.cc:95: Warning: Trying to bind buffer to another one with lower alignment requirement required_alignment=256, provided_alignment=64\n", + "2023-09-25 13:11:21.765 INFO load_module /tmp/tmpmaomvjg0/gemm.o\n", + "[13:11:21] /media/pc/data/lxw/ai/tvm/src/tir/transforms/arg_binder.cc:95: Warning: Trying to bind buffer to another one with lower alignment requirement required_alignment=256, provided_alignment=64\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GEMM schedule:default execution statistics:\n", + "\tinp_load_nbytes : 64\n", + "\twgt_load_nbytes : 1024\n", + "\tacc_load_nbytes : 0\n", + "\tuop_load_nbytes : 20\n", + "\tout_store_nbytes: 256\n", + "\tgemm_counter : 16\n", + "\talu_counter : 48\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-25 13:11:22.029 INFO load_module /tmp/tmpmaomvjg0/gemm.o\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GEMM schedule:smt execution statistics:\n", + "\tinp_load_nbytes : 64\n", + "\twgt_load_nbytes : 2048\n", + "\tacc_load_nbytes : 0\n", + "\tuop_load_nbytes : 40\n", + "\tout_store_nbytes: 256\n", + "\tgemm_counter : 16\n", + "\talu_counter : 48\n" + ] + } + ], + "source": [ + "def _run(env, remote):\n", + " # declare\n", + " o = 4\n", + " n = 1\n", + " m = 4\n", + " x = te.placeholder((o, n, env.BATCH, env.BLOCK_IN), name=\"x\", dtype=env.inp_dtype)\n", + " w = te.placeholder((m, n, env.BLOCK_OUT, env.BLOCK_IN), name=\"w\", dtype=env.wgt_dtype)\n", + " x_buf = te.compute((o, n, env.BATCH, env.BLOCK_IN), lambda *i: x(*i), \"x_buf\")\n", + " w_buf = te.compute((m, n, env.BLOCK_OUT, env.BLOCK_IN), lambda *i: w(*i), \"w_buf\")\n", + " ko = te.reduce_axis((0, n), name=\"ko\")\n", + " ki = te.reduce_axis((0, env.BLOCK_IN), name=\"ki\")\n", + " y_gem = te.compute(\n", + " (o, m, env.BATCH, env.BLOCK_OUT),\n", + " lambda bo, co, bi, ci: te.sum(\n", + " x_buf[bo, ko, bi, ki].astype(env.acc_dtype)\n", + " * w_buf[co, ko, ci, ki].astype(env.acc_dtype),\n", + " axis=[ko, ki],\n", + " ),\n", + " name=\"y_gem\",\n", + " )\n", + " y_shf = te.compute(\n", + " (o, m, env.BATCH, env.BLOCK_OUT), lambda *i: y_gem(*i) >> 8, name=\"y_shf\"\n", + " )\n", + " y_max = te.compute(\n", + " (o, m, env.BATCH, env.BLOCK_OUT), lambda *i: tvm.te.max(y_shf(*i), 0), \"y_max\"\n", + " ) # relu\n", + " y_min = te.compute(\n", + " (o, m, env.BATCH, env.BLOCK_OUT),\n", + " lambda *i: tvm.te.min(y_max(*i), (1 << (env.INP_WIDTH - 1)) - 1),\n", + " \"y_min\",\n", + " ) # relu\n", + " y = te.compute(\n", + " (o, m, env.BATCH, env.BLOCK_OUT), lambda *i: y_min(*i).astype(env.inp_dtype), name=\"y\"\n", + " )\n", + "\n", + " if not remote:\n", + " return\n", + "\n", + " def verify(s, name=None):\n", + " # Build with the CSE pass disabled as otherwise it would complicate the test\n", + " with vta.build_config(disabled_pass={\"tir.CommonSubexprElimTIR\"}):\n", + " mod = vta.build(s, [x, w, y], tvm.target.Target(\"ext_dev\", host=env.target_host))\n", + " temp = tempdir()\n", + " mod.save(temp.relpath(\"gemm.o\"))\n", + " remote.upload(temp.relpath(\"gemm.o\"))\n", + " f = remote.load_module(\"gemm.o\")\n", + " # verify\n", + " dev = remote.ext_dev(0)\n", + " x_np = np.random.randint(-128, 128, size=(o, n, env.BATCH, env.BLOCK_IN)).astype(\n", + " x.dtype\n", + " )\n", + " w_np = np.random.randint(-128, 128, size=(m, n, env.BLOCK_OUT, env.BLOCK_IN)).astype(\n", + " w.dtype\n", + " )\n", + " y_np = np.zeros((o, m, env.BATCH, env.BLOCK_OUT)).astype(y.dtype)\n", + " x_nd = tvm.nd.array(x_np, dev)\n", + " w_nd = tvm.nd.array(w_np, dev)\n", + " y_nd = tvm.nd.array(y_np, dev)\n", + " y_np = y_np.astype(env.acc_dtype)\n", + " for b in range(o):\n", + " for i in range(m):\n", + " for j in range(n):\n", + " y_np[b, i, :] += np.dot(\n", + " x_np[b, j, :].astype(env.acc_dtype), w_np[i, j].T.astype(env.acc_dtype)\n", + " )\n", + " y_np = np.right_shift(y_np, 8)\n", + " y_np = np.clip(y_np, 0, (1 << (env.INP_WIDTH - 1)) - 1).astype(y.dtype)\n", + "\n", + " if env.TARGET in [\"sim\", \"tsim\"]:\n", + " simulator.clear_stats()\n", + "\n", + " f(x_nd, w_nd, y_nd)\n", + "\n", + " np.testing.assert_equal(y_np, y_nd.numpy())\n", + "\n", + " if env.TARGET in [\"sim\", \"tsim\"]:\n", + " sim_stats = simulator.stats()\n", + " print(\"GEMM schedule:{} execution statistics:\".format(name))\n", + " for k, v in sim_stats.items():\n", + " print(\"\\t{:<16}: {:>16}\".format(k, v))\n", + "\n", + " def test_schedule1():\n", + " # default schedule with no smt\n", + " s = te.create_schedule(y.op)\n", + " # set the scope of the SRAM buffers\n", + " s[x_buf].set_scope(env.inp_scope)\n", + " s[w_buf].set_scope(env.wgt_scope)\n", + " s[y_gem].set_scope(env.acc_scope)\n", + " s[y_shf].set_scope(env.acc_scope)\n", + " s[y_max].set_scope(env.acc_scope)\n", + " s[y_min].set_scope(env.acc_scope)\n", + " # set pragmas for DMA transfer and ALU ops\n", + " s[x_buf].compute_at(s[y_gem], ko)\n", + " s[x_buf].pragma(s[x_buf].op.axis[0], env.dma_copy)\n", + " s[w_buf].compute_at(s[y_gem], ko)\n", + " s[w_buf].pragma(s[w_buf].op.axis[0], env.dma_copy)\n", + " s[y_shf].pragma(s[y_shf].op.axis[0], env.alu)\n", + " s[y_max].pragma(s[y_max].op.axis[0], env.alu)\n", + " s[y_min].pragma(s[y_min].op.axis[0], env.alu)\n", + " s[y].pragma(s[y].op.axis[0], env.dma_copy)\n", + " # tensorization\n", + " s[y_gem].reorder(\n", + " ko,\n", + " s[y_gem].op.axis[0],\n", + " s[y_gem].op.axis[1],\n", + " s[y_gem].op.axis[2],\n", + " s[y_gem].op.axis[3],\n", + " ki,\n", + " )\n", + " s[y_gem].tensorize(s[y_gem].op.axis[2], env.gemm)\n", + " verify(s, name=\"default\")\n", + "\n", + " def test_smt():\n", + " # test smt schedule\n", + " s = te.create_schedule(y.op)\n", + " s[x_buf].set_scope(env.inp_scope)\n", + " s[w_buf].set_scope(env.wgt_scope)\n", + " s[y_gem].set_scope(env.acc_scope)\n", + " s[y_shf].set_scope(env.acc_scope)\n", + " s[y_max].set_scope(env.acc_scope)\n", + " s[y_min].set_scope(env.acc_scope)\n", + " abo, aco, abi, aci = s[y].op.axis\n", + " abo1, abo2 = s[y].split(abo, nparts=2)\n", + " s[y].bind(abo1, te.thread_axis(\"cthread\"))\n", + " s[y_gem].compute_at(s[y], abo1)\n", + " s[y_shf].compute_at(s[y], abo1)\n", + " s[y_max].compute_at(s[y], abo1)\n", + " s[y_min].compute_at(s[y], abo1)\n", + " s[y_gem].reorder(\n", + " ko,\n", + " s[y_gem].op.axis[0],\n", + " s[y_gem].op.axis[1],\n", + " s[y_gem].op.axis[2],\n", + " s[y_gem].op.axis[3],\n", + " ki,\n", + " )\n", + " s[y_gem].tensorize(s[y_gem].op.axis[2], env.gemm)\n", + " s[y_shf].pragma(s[y_shf].op.axis[0], env.alu)\n", + " s[y_max].pragma(s[y_max].op.axis[0], env.alu)\n", + " s[y_min].pragma(s[y_min].op.axis[0], env.alu)\n", + " s[x_buf].compute_at(s[y_gem], ko)\n", + " s[x_buf].pragma(s[x_buf].op.axis[0], env.dma_copy)\n", + " s[w_buf].compute_at(s[y_gem], ko)\n", + " s[w_buf].pragma(s[w_buf].op.axis[0], env.dma_copy)\n", + " s[y].pragma(abo2, env.dma_copy)\n", + " verify(s, name=\"smt\")\n", + "\n", + " test_schedule1()\n", + " test_smt()\n", + "\n", + "vta.testing.run(_run)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "tvmz", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/doc/vta/insn/index.md b/doc/vta/insn/index.md new file mode 100644 index 00000000..acba172c --- /dev/null +++ b/doc/vta/insn/index.md @@ -0,0 +1,11 @@ +# VTA 指令集 + +```{toctree} +runtime-array +save-load-out +padded-load +gemm +alu +relu +shift-and-scale +``` diff --git a/doc/vta/insn/padded-load.ipynb b/doc/vta/insn/padded-load.ipynb new file mode 100644 index 00000000..f17e1c5e --- /dev/null +++ b/doc/vta/insn/padded-load.ipynb @@ -0,0 +1,246 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# VTA padded load" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import tvm\n", + "from tvm import te\n", + "import numpy as np\n", + "from tvm import topi\n", + "from tvm.contrib.utils import tempdir\n", + "\n", + "import vta\n", + "import vta.testing\n", + "from vta.testing import simulator\n", + "\n", + "np.random.seed(0xDEADB)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-25 13:10:39.193 INFO load_module /tmp/tmpxhvhkw8k/padded_load.o\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Padded Y0 load execution statistics:\n", + "\tinp_load_nbytes : 0\n", + "\twgt_load_nbytes : 0\n", + "\tacc_load_nbytes : 960\n", + "\tuop_load_nbytes : 4\n", + "\tout_store_nbytes: 400\n", + "\tgemm_counter : 0\n", + "\talu_counter : 25\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-25 13:10:39.453 INFO load_module /tmp/tmpxhvhkw8k/padded_load.o\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Padded Y1 load execution statistics:\n", + "\tinp_load_nbytes : 0\n", + "\twgt_load_nbytes : 0\n", + "\tacc_load_nbytes : 960\n", + "\tuop_load_nbytes : 4\n", + "\tout_store_nbytes: 336\n", + "\tgemm_counter : 0\n", + "\talu_counter : 21\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-25 13:10:39.737 INFO load_module /tmp/tmpxhvhkw8k/padded_load.o\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Padded X0 load execution statistics:\n", + "\tinp_load_nbytes : 0\n", + "\twgt_load_nbytes : 0\n", + "\tacc_load_nbytes : 960\n", + "\tuop_load_nbytes : 4\n", + "\tout_store_nbytes: 400\n", + "\tgemm_counter : 0\n", + "\talu_counter : 25\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-25 13:10:39.993 INFO load_module /tmp/tmpxhvhkw8k/padded_load.o\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Padded X1 load execution statistics:\n", + "\tinp_load_nbytes : 0\n", + "\twgt_load_nbytes : 0\n", + "\tacc_load_nbytes : 960\n", + "\tuop_load_nbytes : 4\n", + "\tout_store_nbytes: 336\n", + "\tgemm_counter : 0\n", + "\talu_counter : 21\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-25 13:10:40.310 INFO load_module /tmp/tmpxhvhkw8k/padded_load.o\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Padded all load execution statistics:\n", + "\tinp_load_nbytes : 0\n", + "\twgt_load_nbytes : 0\n", + "\tacc_load_nbytes : 960\n", + "\tuop_load_nbytes : 4\n", + "\tout_store_nbytes: 560\n", + "\tgemm_counter : 0\n", + "\talu_counter : 35\n" + ] + } + ], + "source": [ + "def _run(env, remote):\n", + " def check_padded_load(pad_before, pad_after, test_name=None):\n", + " # declare\n", + " n = 3\n", + " m = 5\n", + " x = te.placeholder((n, m, env.BATCH, env.BLOCK_OUT), name=\"x\", dtype=env.acc_dtype)\n", + " x_buf = topi.nn.pad(x, pad_before, pad_after, name=\"y\")\n", + " # insert no-op that won't be optimized away\n", + " y_buf = te.compute(\n", + " (\n", + " n + pad_before[0] + pad_after[0],\n", + " m + pad_before[1] + pad_after[1],\n", + " env.BATCH,\n", + " env.BLOCK_OUT,\n", + " ),\n", + " lambda *i: x_buf(*i) >> 0,\n", + " \"y_buf\",\n", + " )\n", + " y = te.compute(\n", + " (\n", + " n + pad_before[0] + pad_after[0],\n", + " m + pad_before[1] + pad_after[1],\n", + " env.BATCH,\n", + " env.BLOCK_OUT,\n", + " ),\n", + " lambda *i: y_buf(*i).astype(env.inp_dtype),\n", + " \"y\",\n", + " )\n", + " # schedule\n", + " s = te.create_schedule(y.op)\n", + " s[x_buf].set_scope(env.acc_scope)\n", + " s[x_buf].pragma(x_buf.op.axis[0], env.dma_copy)\n", + " s[y_buf].set_scope(env.acc_scope)\n", + " s[y_buf].pragma(y_buf.op.axis[0], env.alu)\n", + " s[y].pragma(y.op.axis[0], env.dma_copy)\n", + " # build\n", + " with vta.build_config():\n", + " mod = vta.build(s, [x, y], tvm.target.Target(\"ext_dev\", host=env.target_host))\n", + "\n", + " if not remote:\n", + " return\n", + " temp = tempdir()\n", + " mod.save(temp.relpath(\"padded_load.o\"))\n", + " remote.upload(temp.relpath(\"padded_load.o\"))\n", + " f = remote.load_module(\"padded_load.o\")\n", + " # verify\n", + " dev = remote.ext_dev(0)\n", + " x_np = np.random.randint(0, 10, size=(n, m, env.BATCH, env.BLOCK_OUT)).astype(x.dtype)\n", + " y_np = np.zeros(\n", + " (\n", + " n + pad_before[0] + pad_after[0],\n", + " m + pad_before[1] + pad_after[1],\n", + " env.BATCH,\n", + " env.BLOCK_OUT,\n", + " )\n", + " ).astype(y.dtype)\n", + " y_np[pad_before[0] : pad_before[0] + n, pad_before[1] : pad_before[1] + m, :] = x_np\n", + " x_nd = tvm.nd.array(x_np, dev)\n", + " y_nd = tvm.nd.empty(y_np.shape, device=dev, dtype=y_np.dtype)\n", + "\n", + " if env.TARGET in [\"sim\", \"tsim\"]:\n", + " simulator.clear_stats()\n", + "\n", + " f(x_nd, y_nd)\n", + "\n", + " np.testing.assert_equal(y_np, y_nd.numpy())\n", + "\n", + " if env.TARGET in [\"sim\", \"tsim\"]:\n", + " sim_stats = simulator.stats()\n", + " print(f\"Padded {test_name} load execution statistics:\")\n", + " for k, v in sim_stats.items():\n", + " print(\"\\t{:<16}: {:>16}\".format(k, v))\n", + "\n", + " check_padded_load([2, 0, 0, 0], [0, 0, 0, 0], test_name=\"Y0\")\n", + " check_padded_load([0, 2, 0, 0], [0, 0, 0, 0], test_name=\"Y1\")\n", + " check_padded_load([0, 0, 0, 0], [2, 0, 0, 0], test_name=\"X0\")\n", + " check_padded_load([0, 0, 0, 0], [0, 2, 0, 0], test_name=\"X1\")\n", + " check_padded_load([1, 1, 0, 0], [1, 1, 0, 0], test_name=\"all\")\n", + "\n", + "vta.testing.run(_run)" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "tvmz", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/doc/vta/insn/relu.ipynb b/doc/vta/insn/relu.ipynb new file mode 100644 index 00000000..747ece26 --- /dev/null +++ b/doc/vta/insn/relu.ipynb @@ -0,0 +1,142 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# RELU on ALU" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import tvm\n", + "from tvm import te\n", + "import numpy as np\n", + "from tvm import topi\n", + "from tvm.contrib.utils import tempdir\n", + "\n", + "import vta\n", + "import vta.testing\n", + "from vta.testing import simulator\n", + "\n", + "np.random.seed(0xDEADB)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-25 13:13:49.037 INFO load_module /tmp/tmppyrqqdcq/load_act.o\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Relu execution statistics:\n", + "\tinp_load_nbytes : 0\n", + "\twgt_load_nbytes : 0\n", + "\tacc_load_nbytes : 5120\n", + "\tuop_load_nbytes : 8\n", + "\tout_store_nbytes: 1280\n", + "\tgemm_counter : 0\n", + "\talu_counter : 160\n" + ] + } + ], + "source": [ + "def _run(env, remote):\n", + " m = 8\n", + " n = 10\n", + " # compute\n", + " a = te.placeholder((m, n, env.BATCH, env.BLOCK_OUT), name=\"a\", dtype=env.acc_dtype)\n", + " a_buf = te.compute(\n", + " (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: a(*i), \"a_buf\"\n", + " ) # DRAM->SRAM\n", + " max_buf = te.compute(\n", + " (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: tvm.te.max(a_buf(*i), 0), \"res_buf\"\n", + " ) # relu\n", + " min_buf = te.compute(\n", + " (m, n, env.BATCH, env.BLOCK_OUT),\n", + " lambda *i: tvm.te.min(max_buf(*i), (1 << (env.INP_WIDTH - 1)) - 1),\n", + " \"max_buf\",\n", + " ) # relu\n", + " res = te.compute(\n", + " (m, n, env.BATCH, env.BLOCK_OUT),\n", + " lambda *i: min_buf(*i).astype(env.inp_dtype),\n", + " \"min_buf\",\n", + " ) # SRAM->DRAM\n", + " # schedule\n", + " s = te.create_schedule(res.op)\n", + " s[a_buf].set_scope(env.acc_scope) # SRAM\n", + " s[a_buf].pragma(a_buf.op.axis[0], env.dma_copy) # DRAM->SRAM\n", + " s[max_buf].set_scope(env.acc_scope) # SRAM\n", + " s[min_buf].set_scope(env.acc_scope) # SRAM\n", + " s[max_buf].pragma(max_buf.op.axis[0], env.alu) # compute\n", + " s[min_buf].pragma(min_buf.op.axis[0], env.alu) # compute\n", + " s[res].pragma(res.op.axis[0], env.dma_copy) # SRAM->DRAM\n", + " # build\n", + " with vta.build_config():\n", + " mod = vta.build(s, [a, res], tvm.target.Target(\"ext_dev\", host=env.target_host))\n", + " if not remote:\n", + " return\n", + " temp = tempdir()\n", + " mod.save(temp.relpath(\"load_act.o\"))\n", + " remote.upload(temp.relpath(\"load_act.o\"))\n", + " f = remote.load_module(\"load_act.o\")\n", + " # verify\n", + " dev = remote.ext_dev(0)\n", + " a_np = np.random.randint(-256, 256, size=(m, n, env.BATCH, env.BLOCK_OUT)).astype(a.dtype)\n", + " res_np = np.clip(a_np, 0, (1 << (env.INP_WIDTH - 1)) - 1).astype(res.dtype)\n", + " a_nd = tvm.nd.array(a_np, dev)\n", + " res_nd = tvm.nd.array(np.zeros((m, n, env.BATCH, env.BLOCK_OUT)).astype(res.dtype), dev)\n", + "\n", + " if env.TARGET in [\"sim\", \"tsim\"]:\n", + " simulator.clear_stats()\n", + "\n", + " f(a_nd, res_nd)\n", + "\n", + " np.testing.assert_equal(res_np, res_nd.numpy())\n", + "\n", + " if env.TARGET in [\"sim\", \"tsim\"]:\n", + " sim_stats = simulator.stats()\n", + " print(\"Relu execution statistics:\")\n", + " for k, v in sim_stats.items():\n", + " print(\"\\t{:<16}: {:>16}\".format(k, v))\n", + "\n", + "vta.testing.run(_run)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "tvmz", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/doc/vta/insn/runtime-array.ipynb b/doc/vta/insn/runtime-array.ipynb new file mode 100644 index 00000000..23048981 --- /dev/null +++ b/doc/vta/insn/runtime-array.ipynb @@ -0,0 +1,79 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# VTA 运行时数组" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import tvm\n", + "import numpy as np\n", + "\n", + "import vta.testing\n", + "\n", + "np.random.seed(0xDEADB)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "remote[0]:ext_dev(0)\n" + ] + } + ], + "source": [ + "def _run(env, remote):\n", + " n = 100\n", + " dev = remote.ext_dev(0)\n", + " x_np = np.random.randint(1, 10, size=(n, n, env.BATCH, env.BLOCK_OUT)).astype(\"int8\")\n", + " x_nd = tvm.nd.array(x_np, dev)\n", + " print(x_nd.device)\n", + " np.testing.assert_equal(x_np, x_nd.numpy())\n", + "\n", + "vta.testing.run(_run)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "tvmz", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/doc/vta/insn/save-load-out.ipynb b/doc/vta/insn/save-load-out.ipynb new file mode 100644 index 00000000..8b0fa52c --- /dev/null +++ b/doc/vta/insn/save-load-out.ipynb @@ -0,0 +1,154 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# VTA save/store 输出命令" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import tvm\n", + "from tvm import te\n", + "from tvm.contrib.utils import tempdir\n", + "\n", + "import vta\n", + "import vta.testing\n", + "from vta.testing import simulator\n", + "\n", + "np.random.seed(0xDEADB)" + ] + }, + { + "cell_type": "code", + "execution_count": 16, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-25 11:06:04.521 INFO load_module /tmp/tmp6wck92fv/load_act.o\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Save load execution statistics:\n", + "\tinp_load_nbytes : 0\n", + "\twgt_load_nbytes : 0\n", + "\tacc_load_nbytes : 2304\n", + "\tuop_load_nbytes : 4\n", + "\tout_store_nbytes: 576\n", + "\tgemm_counter : 0\n", + "\talu_counter : 36\n" + ] + } + ], + "source": [ + "def _run(env, remote):\n", + " n = 6\n", + " x = te.placeholder((n, n, env.BATCH, env.BLOCK_OUT), name=\"x\", dtype=env.acc_dtype)\n", + " x_buf = te.compute((n, n, env.BATCH, env.BLOCK_OUT), lambda *i: x(*i), \"x_buf\")\n", + " # 插入不会被优化掉的 no-op\n", + " y_buf = te.compute((n, n, env.BATCH, env.BLOCK_OUT), lambda *i: x_buf(*i) >> 0, \"y_buf\")\n", + " y = te.compute(\n", + " (n, n, env.BATCH, env.BLOCK_OUT), lambda *i: y_buf(*i).astype(env.inp_dtype), \"y\"\n", + " )\n", + " # schedule\n", + " s = te.create_schedule(y.op)\n", + " s[x_buf].set_scope(env.acc_scope)\n", + " s[x_buf].pragma(x_buf.op.axis[0], env.dma_copy)\n", + " s[y_buf].set_scope(env.acc_scope)\n", + " s[y_buf].pragma(y_buf.op.axis[0], env.alu)\n", + " s[y].pragma(y.op.axis[0], env.dma_copy)\n", + "\n", + " # 构建库\n", + " with vta.build_config():\n", + " m = vta.build(s, [x, y], tvm.target.Target(\"ext_dev\", host=env.target_host))\n", + "\n", + " temp = tempdir()\n", + " m.save(temp.relpath(\"load_act.o\"))\n", + " remote.upload(temp.relpath(\"load_act.o\"))\n", + " f = remote.load_module(\"load_act.o\")\n", + " # 验证\n", + " dev = remote.ext_dev(0)\n", + " x_np = np.random.randint(1, 10, size=(n, n, env.BATCH, env.BLOCK_OUT)).astype(x.dtype)\n", + " y_np = x_np.astype(y.dtype)\n", + " x_nd = tvm.nd.array(x_np, dev)\n", + " y_nd = tvm.nd.empty(y_np.shape, device=dev, dtype=y_np.dtype)\n", + "\n", + " assert env.TARGET in [\"sim\", \"tsim\"]\n", + " simulator.clear_stats()\n", + "\n", + " f(x_nd, y_nd)\n", + "\n", + " np.testing.assert_equal(y_np, y_nd.numpy())\n", + "\n", + " sim_stats = simulator.stats()\n", + " print(\"Save load execution statistics:\")\n", + " for k, v in sim_stats.items():\n", + " print(\"\\t{:<16}: {:>16}\".format(k, v))\n", + "\n", + "vta.testing.run(_run)" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "(576, 2304)" + ] + }, + "execution_count": 19, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "out_store_nbytes = np.prod([n, n, env.BATCH, env.BLOCK_OUT])\n", + "out_store_nbytes, out_store_nbytes*4" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "tvmz", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/doc/vta/insn/shift-and-scale.ipynb b/doc/vta/insn/shift-and-scale.ipynb new file mode 100644 index 00000000..59d27ff7 --- /dev/null +++ b/doc/vta/insn/shift-and-scale.ipynb @@ -0,0 +1,140 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# shift and scale on ALU" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import tvm\n", + "from tvm import te\n", + "import numpy as np\n", + "from tvm import topi\n", + "from tvm.contrib.utils import tempdir\n", + "\n", + "import vta\n", + "import vta.testing\n", + "from vta.testing import simulator\n", + "\n", + "np.random.seed(0xDEADB)" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-25 13:15:49.368 INFO load_module /tmp/tmpkcrs3ymc/load_act.o\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Shift and scale execution statistics:\n", + "\tinp_load_nbytes : 0\n", + "\twgt_load_nbytes : 0\n", + "\tacc_load_nbytes : 1024\n", + "\tuop_load_nbytes : 8\n", + "\tout_store_nbytes: 256\n", + "\tgemm_counter : 0\n", + "\talu_counter : 32\n" + ] + } + ], + "source": [ + "def _run(env, remote):\n", + " m = 2\n", + " n = 8\n", + " imm_shift = np.random.randint(0, 8)\n", + " imm_scale = np.random.randint(1, 5)\n", + " # compute\n", + " a = te.placeholder((m, n, env.BATCH, env.BLOCK_OUT), name=\"a\", dtype=env.acc_dtype)\n", + " a_buf = te.compute(\n", + " (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: a(*i), \"a_buf\"\n", + " ) # DRAM->SRAM\n", + " res_shift = te.compute(\n", + " (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: a_buf(*i) + imm_shift, \"res_shift\"\n", + " ) # compute\n", + " res_scale = te.compute(\n", + " (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: res_shift(*i) >> imm_scale, \"res_scale\"\n", + " ) # compute\n", + " res = te.compute(\n", + " (m, n, env.BATCH, env.BLOCK_OUT), lambda *i: res_scale(*i).astype(env.inp_dtype), \"res\"\n", + " ) # SRAM->DRAM\n", + " # schedule\n", + " s = te.create_schedule(res.op)\n", + " s[a_buf].set_scope(env.acc_scope) # SRAM\n", + " s[res_shift].set_scope(env.acc_scope) # SRAM\n", + " s[res_scale].set_scope(env.acc_scope) # SRAM\n", + " s[a_buf].pragma(a_buf.op.axis[0], env.dma_copy) # DRAM->SRAM\n", + " s[res_shift].pragma(res_shift.op.axis[0], env.alu) # compute\n", + " s[res_scale].pragma(res_scale.op.axis[0], env.alu) # compute\n", + " s[res].pragma(res.op.axis[0], env.dma_copy) # SRAM->DRAM\n", + " # build\n", + " mod = vta.build(s, [a, res], tvm.target.Target(\"ext_dev\", host=env.target_host))\n", + " if not remote:\n", + " return\n", + " temp = tempdir()\n", + " mod.save(temp.relpath(\"load_act.o\"))\n", + " remote.upload(temp.relpath(\"load_act.o\"))\n", + " f = remote.load_module(\"load_act.o\")\n", + " # verify\n", + " dev = remote.ext_dev(0)\n", + " a_np = np.random.randint(-10, 10, size=(m, n, env.BATCH, env.BLOCK_OUT)).astype(a.dtype)\n", + " res_np = np.right_shift((a_np + imm_shift), imm_scale)\n", + " res_np = res_np.astype(res.dtype)\n", + " a_nd = tvm.nd.array(a_np, dev)\n", + " res_nd = tvm.nd.array(np.zeros((m, n, env.BATCH, env.BLOCK_OUT)).astype(res.dtype), dev)\n", + "\n", + " if env.TARGET in [\"sim\", \"tsim\"]:\n", + " simulator.clear_stats()\n", + "\n", + " f(a_nd, res_nd)\n", + "\n", + " np.testing.assert_equal(res_np, res_nd.numpy())\n", + "\n", + " if env.TARGET in [\"sim\", \"tsim\"]:\n", + " sim_stats = simulator.stats()\n", + " print(\"Shift and scale execution statistics:\")\n", + " for k, v in sim_stats.items():\n", + " print(\"\\t{:<16}: {:>16}\".format(k, v))\n", + "\n", + "vta.testing.run(_run)\n" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "tvmz", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/doc/vta/ops/conv2d-transpose.ipynb b/doc/vta/ops/conv2d-transpose.ipynb new file mode 100644 index 00000000..95ec30a2 --- /dev/null +++ b/doc/vta/ops/conv2d-transpose.ipynb @@ -0,0 +1,19 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# VTA conv2d_transpose" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/doc/vta/ops/conv2d.ipynb b/doc/vta/ops/conv2d.ipynb new file mode 100644 index 00000000..7821b5ce --- /dev/null +++ b/doc/vta/ops/conv2d.ipynb @@ -0,0 +1,19 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# VTA topi.conv2d" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/doc/vta/ops/dense.ipynb b/doc/vta/ops/dense.ipynb new file mode 100644 index 00000000..d3f54fad --- /dev/null +++ b/doc/vta/ops/dense.ipynb @@ -0,0 +1,218 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# VTA topi.dense" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "\n", + "import tvm\n", + "from tvm import te\n", + "from tvm import autotvm, rpc\n", + "from tvm.contrib.utils import tempdir\n", + "# from tvm.contrib.pickle_memoize import memoize\n", + "from tvm import topi\n", + "import tvm.topi.testing\n", + "import vta\n", + "import vta.testing\n", + "from vta.testing import simulator\n", + "\n", + "# FIXME: we need a custom clip operator to circumvent a pattern detection limitation\n", + "@tvm.te.tag_scope(tag=topi.tag.ELEMWISE)\n", + "def my_clip(x, a_min, a_max):\n", + " \"\"\"Unlike topi's current clip, put min and max into two stages.\"\"\"\n", + " const_min = tvm.tir.const(a_min, x.dtype)\n", + " const_max = tvm.tir.const(a_max, x.dtype)\n", + " x = te.compute(x.shape, lambda *i: tvm.te.min(x(*i), const_max), name=\"clipA\")\n", + " x = te.compute(x.shape, lambda *i: tvm.te.max(x(*i), const_min), name=\"clipB\")\n", + " return x" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "env = vta.get_env()\n", + "remote = rpc.LocalSession()\n", + "target = env.target\n", + "batch_size = 16\n", + "in_feat = 512\n", + "out_feat = 1008" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "Cannot find config for target=ext_dev -keys=vta,cpu -device=vta -model=sim_1x16_i8w8a32_15_15_18_17, workload=('dense_packed.vta', ('TENSOR', (16, 32, 1, 16), 'int8'), ('TENSOR', (63, 32, 16, 16), 'int8'), None, 'int32'). A fallback configuration is used, which may bring great performance regression.\n", + "[13:34:39] /media/pc/data/lxw/ai/tvm/src/tir/transforms/arg_binder.cc:95: Warning: Trying to bind buffer to another one with lower alignment requirement required_alignment=256, provided_alignment=64\n" + ] + } + ], + "source": [ + "a_shape = (batch_size, in_feat)\n", + "w_shape = (out_feat, in_feat)\n", + "# data pack\n", + "data_shape = (batch_size // env.BATCH, in_feat // env.BLOCK_IN, env.BATCH, env.BLOCK_IN)\n", + "kernel_shape = (\n", + " out_feat // env.BLOCK_OUT,\n", + " in_feat // env.BLOCK_IN,\n", + " env.BLOCK_OUT,\n", + " env.BLOCK_IN,\n", + ")\n", + "fcompute = vta.top.dense_packed\n", + "fschedule = vta.top.schedule_dense_packed\n", + "# 声明计算\n", + "data = te.placeholder(data_shape, name=\"data\", dtype=env.inp_dtype)\n", + "kernel = te.placeholder(kernel_shape, name=\"kernel\", dtype=env.wgt_dtype)\n", + "# 定义调度\n", + "with target:\n", + " res = fcompute(data, kernel, None, env.acc_dtype)\n", + " res = topi.right_shift(res, 8)\n", + " res = my_clip(res, 0, (1 << env.OUT_WIDTH - 1) - 1)\n", + " res = topi.cast(res, env.out_dtype)\n", + " # Derive base schedule\n", + " s = fschedule([res])\n", + " # print(vta.lower(s, [data, kernel, res], simple_mode=True))\n", + "\n", + "num_ops = 2 * batch_size * in_feat * out_feat # 算子数量\n", + "# @memoize(\"vta.tests.test_benchmark_topi.dense.verify\")\n", + "def get_ref_data():\n", + " # derive min max for act, wgt types (max non inclusive)\n", + " a_min, a_max = 0 - (1 << (env.INP_WIDTH - 1)), (1 << (env.INP_WIDTH - 1))\n", + " w_min, w_max = 0 - (1 << (env.WGT_WIDTH - 1)), (1 << (env.WGT_WIDTH - 1))\n", + " a_np = np.random.randint(a_min, a_max, size=a_shape).astype(data.dtype)\n", + " w_np = np.random.randint(w_min, w_max, size=w_shape).astype(kernel.dtype)\n", + "\n", + " r_np = np.dot(a_np.astype(env.acc_dtype), w_np.T.astype(env.acc_dtype)).astype(\n", + " env.acc_dtype\n", + " )\n", + " return a_np, w_np, r_np\n", + "\n", + "data_np, kernel_np, res_ref = get_ref_data() # 原始数据\n", + "# 数据打包\n", + "data_np = data_np.reshape(\n", + " batch_size // env.BATCH, env.BATCH, in_feat // env.BLOCK_IN, env.BLOCK_IN\n", + ").transpose((0, 2, 1, 3))\n", + "kernel_np = kernel_np.reshape(\n", + " out_feat // env.BLOCK_OUT, env.BLOCK_OUT, in_feat // env.BLOCK_IN, env.BLOCK_IN\n", + ").transpose((0, 2, 1, 3))\n", + "\n", + "# 构建库\n", + "mod = vta.build(\n", + " s,\n", + " [data, kernel, res],\n", + " target=tvm.target.Target(target, host=env.target_host),\n", + " name=\"dense\",\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "2023-09-25 13:35:10.013 INFO load_module /tmp/tmp67j7fl33/dense.o\n" + ] + } + ], + "source": [ + "temp = tempdir()\n", + "mod.save(temp.relpath(\"dense.o\"))\n", + "remote.upload(temp.relpath(\"dense.o\"))\n", + "f = remote.load_module(\"dense.o\")\n", + "dev = remote.device(str(target))\n", + "res_np = np.zeros(topi.utils.get_const_tuple(res.shape)).astype(res.dtype)\n", + "data_arr = tvm.nd.array(data_np, dev)\n", + "kernel_arr = tvm.nd.array(kernel_np, dev)\n", + "res_arr = tvm.nd.array(res_np, dev)\n", + "time_f = f.time_evaluator(\"dense\", dev, number=4)" + ] + }, + { + "cell_type": "code", + "execution_count": 6, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "VTA DENSE TEST: Time cost = 0.0442777 sec/op, 0.372989 GOPS\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[13:35:11] /media/pc/data/lxw/ai/tvm/src/runtime/profiling.cc:101: Warning: No timer implementation for ext_dev, using default timer instead. It may be inaccurate or have extra overhead.\n" + ] + } + ], + "source": [ + "# In vta sim mode, collect simulator runtime statistics\n", + "simulator.clear_stats()\n", + "cost = time_f(data_arr, kernel_arr, res_arr)\n", + "stats = simulator.stats()\n", + "# 验证正确性\n", + "res_orig = res_arr.numpy()\n", + "res_orig = res_orig.reshape(batch_size, out_feat) # 数据打包\n", + "res_ref = res_ref >> 8\n", + "res_ref = np.clip(res_ref, 0, (1 << env.OUT_WIDTH - 1) - 1)\n", + "res_ref = res_ref.astype(env.out_dtype)\n", + "correct = np.allclose(res_orig, res_ref)\n", + "gops = (num_ops / cost.mean) / float(10**9)\n", + "print(f\"VTA DENSE TEST: Time cost = {cost.mean:g} sec/op, {gops: g} GOPS\")" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "tvmz", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/doc/vta/ops/gemm.ipynb b/doc/vta/ops/gemm.ipynb new file mode 100644 index 00000000..1a8dc12d --- /dev/null +++ b/doc/vta/ops/gemm.ipynb @@ -0,0 +1,531 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 通用矩阵乘法(VTA)" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [], + "source": [ + "import numpy as np\n", + "import tvm\n", + "from tvm import te\n", + "from tvm import rpc\n", + "from tvm.contrib.utils import tempdir\n", + "from vta.testing import simulator\n", + "import vta.testing" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [], + "source": [ + "env = vta.get_env()\n", + "assert env.TARGET == \"sim\" and simulator.enabled()\n", + "remote = rpc.LocalSession()" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [], + "source": [ + "batch_size, channel, block = 128, 128, 128\n", + "data_shape = (batch_size // env.BATCH, channel // env.BLOCK_IN, env.BATCH, env.BLOCK_IN)\n", + "weight_shape = (\n", + " channel // env.BLOCK_OUT,\n", + " channel // env.BLOCK_IN,\n", + " env.BLOCK_OUT,\n", + " env.BLOCK_IN,\n", + ")\n", + "res_shape = (batch_size // env.BATCH, channel // env.BLOCK_OUT, env.BATCH, env.BLOCK_OUT)\n", + "# To compute number of ops, use a x2 factor for FMA\n", + "num_ops = 2 * channel * channel * batch_size" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "ko = te.reduce_axis((0, channel // env.BLOCK_IN), name=\"ko\")\n", + "ki = te.reduce_axis((0, env.BLOCK_IN), name=\"ki\")\n", + "\n", + "data = te.placeholder(data_shape, name=\"data\", dtype=env.inp_dtype)\n", + "weight = te.placeholder(weight_shape, name=\"weight\", dtype=env.wgt_dtype)\n", + "data_buf = te.compute(data_shape, lambda *i: data(*i), \"data_buf\")\n", + "weight_buf = te.compute(weight_shape, lambda *i: weight(*i), \"weight_buf\")\n", + "res_gem = te.compute(\n", + " res_shape,\n", + " lambda bo, co, bi, ci: te.sum(\n", + " data_buf[bo, ko, bi, ki].astype(env.acc_dtype)\n", + " * weight_buf[co, ko, ci, ki].astype(env.acc_dtype),\n", + " axis=[ko, ki],\n", + " ),\n", + " name=\"res_gem\",\n", + ")\n", + "res_shf = te.compute(res_shape, lambda *i: res_gem(*i) >> 8, name=\"res_shf\")\n", + "res_max = te.compute(res_shape, lambda *i: tvm.te.max(res_shf(*i), 0), \"res_max\") # relu\n", + "res_min = te.compute(\n", + " res_shape, lambda *i: tvm.te.min(res_max(*i), (1 << (env.INP_WIDTH - 1)) - 1), \"res_min\"\n", + ") # relu\n", + "res = te.compute(res_shape, lambda *i: res_min(*i).astype(env.inp_dtype), name=\"res\")" + ] + }, + { + "cell_type": "code", + "execution_count": 5, + "metadata": {}, + "outputs": [], + "source": [ + "def verify(s):\n", + " mod = vta.build(\n", + " s,\n", + " [data, weight, res],\n", + " tvm.target.Target(\"ext_dev\", host=env.target_host),\n", + " name=\"gemm\",\n", + " )\n", + " temp = tempdir()\n", + " mod.save(temp.relpath(\"gemm.o\"))\n", + " remote.upload(temp.relpath(\"gemm.o\"))\n", + " f = remote.load_module(\"gemm.o\")\n", + " # verify\n", + " dev = remote.ext_dev(0)\n", + " # Data in original format\n", + " data_orig = np.random.randint(-128, 128, size=(batch_size, channel)).astype(data.dtype)\n", + " weight_orig = np.random.randint(-128, 128, size=(channel, channel)).astype(weight.dtype)\n", + " data_packed = data_orig.reshape(\n", + " batch_size // env.BATCH, env.BATCH, channel // env.BLOCK_IN, env.BLOCK_IN\n", + " ).transpose((0, 2, 1, 3))\n", + " weight_packed = weight_orig.reshape(\n", + " channel // env.BLOCK_OUT, env.BLOCK_OUT, channel // env.BLOCK_IN, env.BLOCK_IN\n", + " ).transpose((0, 2, 1, 3))\n", + " res_np = np.zeros(res_shape).astype(res.dtype)\n", + " data_arr = tvm.nd.array(data_packed, dev)\n", + " weight_arr = tvm.nd.array(weight_packed, dev)\n", + " res_arr = tvm.nd.array(res_np, dev)\n", + " res_ref = np.zeros(res_shape).astype(env.acc_dtype)\n", + " for b in range(batch_size // env.BATCH):\n", + " for i in range(channel // env.BLOCK_OUT):\n", + " for j in range(channel // env.BLOCK_IN):\n", + " res_ref[b, i, :] += np.dot(\n", + " data_packed[b, j, :].astype(env.acc_dtype),\n", + " weight_packed[i, j].T.astype(env.acc_dtype),\n", + " )\n", + " res_ref = np.right_shift(res_ref, 8)\n", + " res_ref = np.clip(res_ref, 0, (1 << (env.INP_WIDTH - 1)) - 1).astype(res.dtype)\n", + " time_f = f.time_evaluator(\"gemm\", dev, number=20)\n", + " if env.TARGET in [\"sim\", \"tsim\"]:\n", + " simulator.clear_stats()\n", + " cost = time_f(data_arr, weight_arr, res_arr)\n", + " if env.TARGET in [\"sim\", \"tsim\"]:\n", + " stats = simulator.stats()\n", + " print(\"Execution statistics:\")\n", + " for k, v in stats.items():\n", + " print(\"\\t{:<16}: {:>16}\".format(k, v))\n", + " res_unpack = res_arr.numpy().reshape(\n", + " batch_size // env.BATCH, channel // env.BLOCK_OUT, env.BATCH, env.BLOCK_OUT\n", + " )\n", + " return cost\n", + "\n", + "def run_schedule(load_inp, load_wgt, gemm, alu, store_out, print_ir, block):\n", + " s = te.create_schedule(res.op)\n", + " s[data_buf].set_scope(env.inp_scope)\n", + " s[weight_buf].set_scope(env.wgt_scope)\n", + " s[res_gem].set_scope(env.acc_scope)\n", + " s[res_shf].set_scope(env.acc_scope)\n", + " s[res_min].set_scope(env.acc_scope)\n", + " s[res_max].set_scope(env.acc_scope)\n", + "\n", + " if block:\n", + " bblock = block // env.BATCH\n", + " iblock = block // env.BLOCK_IN\n", + " oblock = block // env.BLOCK_OUT\n", + " xbo, xco, xbi, xci = s[res].op.axis\n", + " xb1, xco1, xb2, xco2 = s[res].tile(xbo, xco, bblock, oblock)\n", + " store_pt = xb2\n", + "\n", + " s[res_gem].compute_at(s[res], xco1)\n", + " s[res_shf].compute_at(s[res], xco1)\n", + " s[res_min].compute_at(s[res], xco1)\n", + " s[res_max].compute_at(s[res], xco1)\n", + "\n", + " xbo, xco, xbi, xci = s[res_gem].op.axis\n", + " # Compute one line at a time\n", + " ko1, ko2 = s[res_gem].split(ko, iblock)\n", + " s[res_gem].reorder(ko1, ko2, xbo, xco, xbi, xci, ki)\n", + " s[data_buf].compute_at(s[res_gem], ko1)\n", + " s[weight_buf].compute_at(s[res_gem], ko1)\n", + " # Use VTA instructions\n", + " s[data_buf].pragma(s[data_buf].op.axis[0], load_inp)\n", + " s[weight_buf].pragma(s[weight_buf].op.axis[0], load_wgt)\n", + " s[res_gem].tensorize(xbi, gemm)\n", + " s[res_shf].pragma(s[res_shf].op.axis[0], alu)\n", + " s[res_min].pragma(s[res_min].op.axis[0], alu)\n", + " s[res_max].pragma(s[res_max].op.axis[0], alu)\n", + " s[res].pragma(store_pt, store_out)\n", + " else:\n", + " xbo, xco, xbi, xci = s[res_gem].op.axis\n", + " s[res_gem].reorder(ko, xbo, xco, xbi, xci, ki)\n", + " # Use VTA instructions\n", + " s[data_buf].pragma(s[data_buf].op.axis[0], load_inp)\n", + " s[weight_buf].pragma(s[weight_buf].op.axis[0], load_wgt)\n", + " s[res_gem].tensorize(xbi, gemm)\n", + " s[res_shf].pragma(s[res_shf].op.axis[0], alu)\n", + " s[res_min].pragma(s[res_min].op.axis[0], alu)\n", + " s[res_max].pragma(s[res_max].op.axis[0], alu)\n", + " s[res].pragma(s[res].op.axis[0], store_out)\n", + "\n", + " if print_ir:\n", + " print(tvm.lower(s, [data, weight, res], simple_mode=True))\n", + " return verify(s)\n" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "GEMM GOPS End-to-End Test:" + ] + }, + { + "cell_type": "code", + "execution_count": 7, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[08:32:41] /media/pc/data/lxw/ai/tvm/src/tir/transforms/arg_binder.cc:95: Warning: Trying to bind buffer to another one with lower alignment requirement required_alignment=256, provided_alignment=64\n", + "2023-09-25 08:32:42.101 INFO load_module /tmp/tmp8u11kql8/gemm.o\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Execution statistics:\n", + "\tinp_load_nbytes : 344064\n", + "\twgt_load_nbytes : 344064\n", + "\tacc_load_nbytes : 0\n", + "\tuop_load_nbytes : 1008\n", + "\tout_store_nbytes: 344064\n", + "\tgemm_counter : 172032\n", + "\talu_counter : 64512\n", + "\tTime cost = 0.00169099 sec/op, 2.48038 GOPS\n" + ] + } + ], + "source": [ + "mock = env.mock\n", + "with vta.build_config():\n", + " cost = run_schedule(\n", + " env.dma_copy,\n", + " env.dma_copy,\n", + " env.gemm,\n", + " env.alu,\n", + " env.dma_copy,\n", + " print_ir=False,\n", + " block=block\n", + " )\n", + " gops = (num_ops / cost.mean) / float(10**9)\n", + " print(\"\\tTime cost = %g sec/op, %g GOPS\" % (cost.mean, gops))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "GEMM Unit Test:" + ] + }, + { + "cell_type": "code", + "execution_count": 9, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[08:34:29] /media/pc/data/lxw/ai/tvm/src/tir/transforms/arg_binder.cc:95: Warning: Trying to bind buffer to another one with lower alignment requirement required_alignment=256, provided_alignment=64\n", + "2023-09-25 08:34:29.973 INFO load_module /tmp/tmp8u11kql8/gemm.o\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Execution statistics:\n", + "\tinp_load_nbytes : 0\n", + "\twgt_load_nbytes : 0\n", + "\tacc_load_nbytes : 0\n", + "\tuop_load_nbytes : 756\n", + "\tout_store_nbytes: 0\n", + "\tgemm_counter : 172032\n", + "\talu_counter : 0\n", + "\tTime cost = 0.00688763 sec/op, 0.608962 GOPS\n" + ] + } + ], + "source": [ + "mock = env.mock\n", + "with vta.build_config():\n", + " cost = run_schedule(\n", + " mock.dma_copy, mock.dma_copy, env.gemm, mock.alu, mock.dma_copy,\n", + " print_ir=False,\n", + " block=block\n", + " )\n", + " gops = (num_ops / cost.mean) / float(10**9)\n", + " print(\"\\tTime cost = %g sec/op, %g GOPS\" % (cost.mean, gops))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "ALU 测试:" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[08:33:08] /media/pc/data/lxw/ai/tvm/src/tir/transforms/arg_binder.cc:95: Warning: Trying to bind buffer to another one with lower alignment requirement required_alignment=256, provided_alignment=64\n", + "2023-09-25 08:33:08.365 INFO load_module /tmp/tmp8u11kql8/gemm.o\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Execution statistics:\n", + "\tinp_load_nbytes : 0\n", + "\twgt_load_nbytes : 0\n", + "\tacc_load_nbytes : 0\n", + "\tuop_load_nbytes : 252\n", + "\tout_store_nbytes: 0\n", + "\tgemm_counter : 0\n", + "\talu_counter : 64512\n", + "\tTime cost = 0.000132332 sec/op, 31.6953 GOPS\n" + ] + } + ], + "source": [ + "mock = env.mock\n", + "with vta.build_config():\n", + " cost = run_schedule(\n", + " mock.dma_copy, mock.dma_copy, mock.gemm, env.alu, mock.dma_copy,\n", + " print_ir=False,\n", + " block=block\n", + " )\n", + " gops = (num_ops / cost.mean) / float(10**9)\n", + " print(\"\\tTime cost = %g sec/op, %g GOPS\" % (cost.mean, gops))" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "LoadInp Unit Test:" + ] + }, + { + "cell_type": "code", + "execution_count": 11, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[08:36:33] /media/pc/data/lxw/ai/tvm/src/tir/transforms/arg_binder.cc:95: Warning: Trying to bind buffer to another one with lower alignment requirement required_alignment=256, provided_alignment=64\n", + "2023-09-25 08:36:33.333 INFO load_module /tmp/tmp8u11kql8/gemm.o\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Execution statistics:\n", + "\tinp_load_nbytes : 344064\n", + "\twgt_load_nbytes : 0\n", + "\tacc_load_nbytes : 0\n", + "\tuop_load_nbytes : 0\n", + "\tout_store_nbytes: 0\n", + "\tgemm_counter : 0\n", + "\talu_counter : 0\n", + "\tTime cost = 2.45895e-06 sec/op, 1705.73 GOPS, bandwidth=53.3041 Gbits\n" + ] + } + ], + "source": [ + "mock = env.mock\n", + "with vta.build_config():\n", + " cost = run_schedule(\n", + " env.dma_copy, mock.dma_copy, mock.gemm, mock.alu, mock.dma_copy,\n", + " print_ir=False,\n", + " block=block\n", + " )\n", + " gops = (num_ops / cost.mean) / float(10**9)\n", + " gops = (num_ops / cost.mean) / float(10**9)\n", + " bandwith = (batch_size * channel * env.INP_WIDTH / cost.mean) / float(10**9)\n", + " print(\n", + " \"\\tTime cost = %g sec/op, %g GOPS, bandwidth=%g Gbits\"\n", + " % (cost.mean, gops, bandwith)\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "LoadWgt Unit Test:" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[08:37:20] /media/pc/data/lxw/ai/tvm/src/tir/transforms/arg_binder.cc:95: Warning: Trying to bind buffer to another one with lower alignment requirement required_alignment=256, provided_alignment=64\n", + "2023-09-25 08:37:20.333 INFO load_module /tmp/tmp8u11kql8/gemm.o\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Execution statistics:\n", + "\tinp_load_nbytes : 0\n", + "\twgt_load_nbytes : 344064\n", + "\tacc_load_nbytes : 0\n", + "\tuop_load_nbytes : 0\n", + "\tout_store_nbytes: 0\n", + "\tgemm_counter : 0\n", + "\talu_counter : 0\n", + "\tTime cost = 2.4185e-06 sec/op, 1734.26 GOPS, bandwidth=54.1956 Gbits\n" + ] + } + ], + "source": [ + "mock = env.mock\n", + "with vta.build_config():\n", + " cost = run_schedule(\n", + " mock.dma_copy, env.dma_copy, mock.gemm, mock.alu, mock.dma_copy,\n", + " print_ir=False,\n", + " block=block\n", + " )\n", + " gops = (num_ops / cost.mean) / float(10**9)\n", + " gops = (num_ops / cost.mean) / float(10**9)\n", + " bandwith = (batch_size * channel * env.INP_WIDTH / cost.mean) / float(10**9)\n", + " print(\n", + " \"\\tTime cost = %g sec/op, %g GOPS, bandwidth=%g Gbits\"\n", + " % (cost.mean, gops, bandwith)\n", + " )" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "StoreOut Unit Test:" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "[08:38:14] /media/pc/data/lxw/ai/tvm/src/tir/transforms/arg_binder.cc:95: Warning: Trying to bind buffer to another one with lower alignment requirement required_alignment=256, provided_alignment=64\n", + "2023-09-25 08:38:14.909 INFO load_module /tmp/tmp8u11kql8/gemm.o\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Execution statistics:\n", + "\tinp_load_nbytes : 0\n", + "\twgt_load_nbytes : 0\n", + "\tacc_load_nbytes : 0\n", + "\tuop_load_nbytes : 0\n", + "\tout_store_nbytes: 344064\n", + "\tgemm_counter : 0\n", + "\talu_counter : 0\n", + "\tTime cost = 2.62682e-05 sec/op, 159.672 GOPS, bandwidth=4.98975 Gbits\n" + ] + } + ], + "source": [ + "mock = env.mock\n", + "with vta.build_config():\n", + " cost = run_schedule(\n", + " mock.dma_copy, mock.dma_copy, mock.gemm, mock.alu, env.dma_copy,\n", + " print_ir=False,\n", + " block=block\n", + " )\n", + " gops = (num_ops / cost.mean) / float(10**9)\n", + " gops = (num_ops / cost.mean) / float(10**9)\n", + " bandwith = (batch_size * channel * env.INP_WIDTH / cost.mean) / float(10**9)\n", + " print(\n", + " \"\\tTime cost = %g sec/op, %g GOPS, bandwidth=%g Gbits\"\n", + " % (cost.mean, gops, bandwith)\n", + " )" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "tvmz", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/doc/vta/ops/group-conv2d.ipynb b/doc/vta/ops/group-conv2d.ipynb new file mode 100644 index 00000000..30d0f601 --- /dev/null +++ b/doc/vta/ops/group-conv2d.ipynb @@ -0,0 +1,19 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# VTA group-conv2d" + ] + } + ], + "metadata": { + "language_info": { + "name": "python" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/doc/vta/ops/index.md b/doc/vta/ops/index.md new file mode 100644 index 00000000..b218ba98 --- /dev/null +++ b/doc/vta/ops/index.md @@ -0,0 +1,9 @@ +# VTA 算子 + +```{toctree} +gemm +dense +conv2d +conv2d-transpose +group-conv2d +``` diff --git a/doc/vta/ops/test.py b/doc/vta/ops/test.py new file mode 100644 index 00000000..e69de29b diff --git a/tests/test.ipynb b/tests/test.ipynb new file mode 100644 index 00000000..5d23252b --- /dev/null +++ b/tests/test.ipynb @@ -0,0 +1,49 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# 测试" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import tvm" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "tvmz", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.12" + }, + "orig_nbformat": 4 + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/doc/tutorials/vta/test.ipynb b/tests/tvmc.ipynb similarity index 100% rename from doc/tutorials/vta/test.ipynb rename to tests/tvmc.ipynb