diff --git a/doc/tutorials/msc/pipeline/runner.ipynb b/doc/tutorials/msc/pipeline/runner.ipynb index 3bdb7d4..0d750c1 100644 --- a/doc/tutorials/msc/pipeline/runner.ipynb +++ b/doc/tutorials/msc/pipeline/runner.ipynb @@ -36,24 +36,7 @@ "cell_type": "code", "execution_count": 2, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "/media/pc/data/lxw/ai/tvm/python/tvm/contrib/msc/framework/torch/codegen/codegen.py:74: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n", - " state_dict = torch.load(folder.relpath(graph.name + \".pth\"))\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "torch output matmul:[1;1000,float32] Max 42.0085, Min -38.0906, Avg -0.31818\n", - "tvm output matmul:[1;1000,float32] Max 42.0085, Min -38.0906, Avg -0.31818\n" - ] - } - ], + "outputs": [], "source": [ "import numpy as np\n", "\n", @@ -71,27 +54,79 @@ "datas = [np.random.rand(*i[0]).astype(i[1]) for i in input_info]\n", "graph_model = fx.symbolic_trace(model)\n", "with torch.no_grad():\n", - " mod = from_fx(graph_model, input_info)\n", + " mod = from_fx(graph_model, input_info)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 构建并运行 `TorchRunner` " + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/media/pc/data/lxw/ai/tvm/python/tvm/contrib/msc/framework/torch/codegen/codegen.py:74: FutureWarning: You are using `torch.load` with `weights_only=False` (the current default value), which uses the default pickle module implicitly. It is possible to construct malicious pickle data which will execute arbitrary code during unpickling (See https://github.com/pytorch/pytorch/blob/main/SECURITY.md#untrusted-models for more details). In a future release, the default value for `weights_only` will be flipped to `True`. This limits the functions that could be executed during unpickling. Arbitrary objects will no longer be allowed to be loaded via this mode unless they are explicitly allowlisted by the user via `torch.serialization.add_safe_globals`. We recommend you start setting `weights_only=True` for any use case where you don't have full control of the loaded file. Please open an issue on GitHub for any issues related to this experimental feature.\n", + " state_dict = torch.load(folder.relpath(graph.name + \".pth\"))\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "torch output matmul:[1;1000,float32] Max 24.3415, Min -29.508, Avg -0.119518\n" + ] + } + ], + "source": [ "# build and run by torch\n", - "workspace = msc_utils.set_workspace(msc_utils.msc_dir(\"torch_test\"))\n", + "workspace = msc_utils.set_workspace(msc_utils.msc_dir(\".temp/torch_test\"))\n", "log_path = workspace.relpath(\"MSC_LOG\", keep_history=False)\n", "msc_utils.set_global_logger(\"critical\", log_path)\n", "torch_runner = TorchRunner(mod)\n", "torch_runner.build()\n", "outputs = torch_runner.run(datas)\n", "for k, v in outputs.items():\n", - " print(\"torch output {}:{}\".format(k, msc_utils.inspect_array(v)))\n", - "workspace.destory()\n", - "\n", - "# build and run by tvm\n", - "workspace = msc_utils.set_workspace(msc_utils.msc_dir(\"tvm_test\"))\n", + " print(f\"torch output {k}:{msc_utils.inspect_array(v)}\")\n", + "workspace.destory()" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## 构建并运行 `TVMRunner`" + ] + }, + { + "cell_type": "code", + "execution_count": 8, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "tvm output matmul:[1;1000,float32] Max 24.3415, Min -29.508, Avg -0.119518\n" + ] + } + ], + "source": [ + "workspace = msc_utils.set_workspace(msc_utils.msc_dir(\".temp/tvm_test\"))\n", "log_path = workspace.relpath(\"MSC_LOG\", keep_history=False)\n", "msc_utils.set_global_logger(\"critical\", log_path)\n", "tvm_runner = TVMRunner(mod)\n", "tvm_runner.build()\n", "outputs = tvm_runner.run(datas)\n", "for k, v in outputs.items():\n", - " print(\"tvm output {}:{}\".format(k, msc_utils.inspect_array(v)))\n", + " print(f\"tvm output {k}:{msc_utils.inspect_array(v)}\")\n", "workspace.destory()" ] }, @@ -99,7 +134,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "MSCRunner屏蔽了runtime类型的差异,可以让MSCManger专注于流程控制而不需要处理runtime的细节。一个MSCRunner中包含1到多个MSCGraph(BYOC的Runner可以有多个MSCGraph)以及MSCTools。核心方法是build(构建runnable对象)和 run(跑数据)\n", + "`MSCRunner` 屏蔽了 runtime 类型的差异,可以让 `MSCManger` 专注于流程控制而不需要处理 runtime 的细节。一个 `MSCRunner` 中包含 1 到多个 `MSCGraph` (BYOC 的 Runner 可以有多个 `MSCGraph`)以及 `MSCTools`。核心方法是 `build` (构建 runnable 对象)和 run(跑数据)\n", "\n", "![](../images/runner.jpg)" ] @@ -109,7 +144,8 @@ "metadata": {}, "source": [ "## {meth}`MSCRunner.build` 构建 runnable 对象\n", - "`build`方法用于构建runnable对象,runnable对象可以调用MSCTools对runtime过程进行控制(例如稀疏化过程的apply mask,量化过程的q/dq操作等),并且可以被runtime系统直接加载。(例如TorchRunner对应的runnable对象是torch.Module,TVMRunner对应的runnable是tvm.VirtualMachine)\n", + "\n", + "`build`方法用于构建 runnable 对象,runnable 对象可以调用 `MSCTools` 对 runtime 过程进行控制(例如稀疏化过程的 apply mask,量化过程的 q/dq 运算等),并且可以被 runtime 系统直接加载。(例如 `TorchRunner` 对应的 runnable 对象是 {class}`torch.nn.Module`,`TVMRunner` 对应的 runnable 是 `tvm.VirtualMachine`)\n", "\n", "build过程产生三个阶段的object,每个阶段都会尝试从cache中读取,以此减少构建时间,每个阶段的任务为:\n", "1. `IRModule -> MSCGraph`:通过translate模块的`from_relax`将传入的relax IRModule构建为MSCGraph,此过程参考[test_graph_build](https://github.com/apache/tvm/blob/main/tests/python/contrib/test_msc/test_graph_build.py)\n", diff --git a/src/tvm_book/utils.py b/src/tvm_book/utils.py new file mode 100644 index 0000000..bd7c13b --- /dev/null +++ b/src/tvm_book/utils.py @@ -0,0 +1,20 @@ +class Bunch(dict): + def __init__(self, *args, **kwargs): + super().__init__(*args, **kwargs) + self.__dict__ = self # 这意味着 Bunch 类的实例将具有与字典相同的行为,可以使用点符号访问和修改其键值对 + for k, v in self.__dict__.items(): + if isinstance(v, dict): + self.__dict__[k] = Bunch(**v) # 支持嵌套结构 + + def merge(self, other): + """提供递归合并功能""" + for k, v in other.items(): + if k not in self: + self[k] = other[k] + else: + if not isinstance(self[k], dict) and not isinstance(v, dict): + self[k] = v + elif isinstance(self[k], dict) and isinstance(v, dict): + self[k].update(v) + else: + raise TypeError(f"{other}不支持合并") \ No newline at end of file diff --git a/tests/a.toml b/tests/a.toml new file mode 100644 index 0000000..bdfac22 --- /dev/null +++ b/tests/a.toml @@ -0,0 +1,2 @@ +[a] +a.b = "d" \ No newline at end of file diff --git a/tests/test.ipynb b/tests/test.ipynb index bdbd1f5..5d4a4f1 100644 --- a/tests/test.ipynb +++ b/tests/test.ipynb @@ -38,6 +38,121 @@ "trace = torch.jit.trace(model, torch.rand(shape).float()).eval()\n", "torch.jit.save(trace, \".temp/mobilenet_v2.pt\")" ] + }, + { + "cell_type": "code", + "execution_count": 114, + "metadata": {}, + "outputs": [], + "source": [ + "from pathlib import Path\n", + "from tomlkit import dumps, loads, load\n", + "\n", + "default_config = \"\"\"\n", + "[compile]\n", + "target = \"vta2.0\" # vta2.0, sim_vta2.0, vta1.0, vta2.0_lite, sim_vta2.0_lite\n", + "quantized_dtype = \"a8w8\" # 支持对称量化 a8w8,a8w4,a8w4_custom\n", + "quantized_method = \"channel\" # 量化方法 layer,channel,channel_custom\n", + "activation_mode = \"percentile\" # 针对激活的量化算法 max,percentile,kl_divergence,mse\n", + "weight_mode = \"max\" # 针对权重的量化算法 max,mse\n", + "channel_custom_layers = [] # 支持指定层(conv2d,conv2d_transpose,dense)的权重进行逐通道量化,在channel_custom\n", + "a8w4_custom_layers = [] # 支持指定层(conv2d,conv2d_transpose,dense)进行4bit量化,在a8w4_custom下有效\n", + "cle = false # cross-layer equalization 跨层均衡提高量化精度\n", + "output_format = \"float\" # 输出数据格式 float,fixpoint,fixpoint_6axis\n", + "weight_pad = true # 是否使用硬件对weight进行16对齐操作\n", + "fuse_conv = true # 是否进行卷积和卷积融合\n", + "\n", + "[inference]\n", + "priority = \"bandwidth_first\"\t\t# 优先级,可选参数perf_first_L0,perf_first_L1,perf_first_L2,bandwidth_first\n", + "device = \"XM_V500\"\t\t\t# 板端芯片型号, 在目标平台为vta2.0时有效, 可选设备XM_V500,XM680V200,FPGA_XM_V500,FPGA_XM680V200,SOC_XM_V500,SOC_XM680V200,XM210V200,FPGA_XM210V200,SOC_XM210V200\n", + "device_mount_dir = \"/tmp\"\t\t# 板端挂载目录,用于指明推理所需资源在板端的目录\n", + "runtime_ext = 0\t\t\t\t\t# 深度可分离卷积weight扩展功能,在推理时动态扩展或者预热阶段扩展,可选参数0,1。0:预热阶段扩展,占用内存多,性能高 1:推理时动态扩展,省内存,性能低\n", + "\n", + "[performance]\n", + "frequency = 37.125\t\t\t# NPU工作频率(MHz), SOC_XM_V500工作频率620MHz, SOC_XM210V200工作频率500MHz\n", + "ref_frequency = 37.125 # SOC 参考频率(MHz)\n", + "mac = 256\t\t\t\t # NPU MAC数\n", + "roofline.SOC_XM_V500 = [123.30401032, 0.96533333] # SOC_XM_V500 空载下Imax点,[Imax, MAC利用率]\n", + "roofline.SOC_XM210V200 = [122.13009695, 0.9465] # SOC_XM210V200 空载下Imax点,[Imax, MAC利用率]\n", + "bandwidth_percent = [100, 50, 20] # 空闲带宽,单位百分比\n", + "ratio_discount = 100 # 模型性能折扣,单位百分比\n", + "\"\"\"" + ] + }, + { + "cell_type": "code", + "execution_count": 115, + "metadata": {}, + "outputs": [], + "source": [ + "default_config = loads(default_config)\n", + "path = \"/media/pc/data/board/arria10/lxw/tasks/tools/npuusertools/models/pytorch/resnet18/config.toml\"\n", + "with open(path, \"rb\") as fp:\n", + " config = load(fp)\n", + "default_config.update(config)\n", + "config = default_config\n", + "# config[\"model_dir\"] = str(Path(path).resolve().parent)\n", + "# with open(\"test.toml\", \"w\") as fp:\n", + "# dump(config, fp)" + ] + }, + { + "cell_type": "code", + "execution_count": 127, + "metadata": {}, + "outputs": [], + "source": [ + "import tomlkit" + ] + }, + { + "cell_type": "code", + "execution_count": 128, + "metadata": {}, + "outputs": [], + "source": [ + "with open(\"a.toml\") as fp:\n", + " w = tomlkit.load(fp)" + ] + }, + { + "cell_type": "code", + "execution_count": 131, + "metadata": {}, + "outputs": [], + "source": [ + "config = tomlkit.loads(\"\"\"\n", + "[a]\n", + "a.b = \"d\"\n", + "\"\"\")" + ] + }, + { + "cell_type": "code", + "execution_count": 132, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'a': {'a': {'b': 'd'}}}" + ] + }, + "execution_count": 132, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "config" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": {