From ffd186e5e360315bf2c077313175f27987a5a23c Mon Sep 17 00:00:00 2001 From: Naoya Maruyama Date: Tue, 17 Dec 2024 14:18:32 -0800 Subject: [PATCH] Add a dump option to dump a fusion IR graph (#3603) `NVFUSER_DUMP=fusion_ir_graph` saves the dot representation of a fusion before lowering to a file named like `__tmp_fusion_ir_graph_inner_persistent_f0_c1_r0_g0.dot`. Example visualization: https://github.com/NVIDIA/Fuser/issues/3498#issuecomment-2549421686 --- csrc/options.cpp | 9 +++++---- csrc/options.h | 1 + csrc/runtime/executor.cpp | 22 ++++++++++++++++++---- 3 files changed, 24 insertions(+), 8 deletions(-) diff --git a/csrc/options.cpp b/csrc/options.cpp index f53ef79893d..0ca177aff7d 100644 --- a/csrc/options.cpp +++ b/csrc/options.cpp @@ -112,12 +112,13 @@ std::unordered_map> Options< {"expr_sort_verbose", DebugDumpOption::ExprSortVerbose}, {"ftrace", DebugDumpOption::FunctionTrace}, {"fusion_args", DebugDumpOption::FusionArgs}, - {"fusion_ir_original", DebugDumpOption::FusionIrOriginal}, - {"fusion_ir_concretized", DebugDumpOption::FusionIrConcretized}, - {"fusion_ir_preseg", DebugDumpOption::FusionIrPreseg}, - {"fusion_ir_presched", DebugDumpOption::FusionIrPresched}, {"fusion_ir", DebugDumpOption::FusionIr}, + {"fusion_ir_concretized", DebugDumpOption::FusionIrConcretized}, + {"fusion_ir_graph", DebugDumpOption::FusionIrGraph}, {"fusion_ir_math", DebugDumpOption::FusionIrMath}, + {"fusion_ir_original", DebugDumpOption::FusionIrOriginal}, + {"fusion_ir_presched", DebugDumpOption::FusionIrPresched}, + {"fusion_ir_preseg", DebugDumpOption::FusionIrPreseg}, {"global_zeroed_memory", DebugDumpOption::GlobalZeroedMemory}, {"host_ir", DebugDumpOption::HostIr}, {"index_type", DebugDumpOption::IndexType}, diff --git a/csrc/options.h b/csrc/options.h index d10e739af60..6e313672a02 100644 --- a/csrc/options.h +++ b/csrc/options.h @@ -40,6 +40,7 @@ enum class DebugDumpOption { // TODO(wujingyue): name the following FusionIrSched FusionIr, //!< Dump the Fusion IR before lowering. This is the Fusion IR fed //!< to `KernelExecutor::compileFusion`. + FusionIrGraph, //!< Dump a GraphViz graph of the Fusion IR FusionIrMath, //!< Dump just the compute (math) part of the above `FusionIr` //!< for conciseness KernelIr, //!< Dump the compiler Kernel IR diff --git a/csrc/runtime/executor.cpp b/csrc/runtime/executor.cpp index 8becb528951..04f86b1edd0 100644 --- a/csrc/runtime/executor.cpp +++ b/csrc/runtime/executor.cpp @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include @@ -304,6 +305,9 @@ void KernelExecutor::compile( NVF_ERROR( !fusion->outputs().empty(), "No output found for this kernel, aborting."); + createKernelId( + scheduler_type, fusion_id_, concrete_id_, runtime_id_, group_id_); + // TODO: refactor the options_ passed through options_.device = c10::Device(c10::DeviceType::CUDA, args.getDeviceIndex()); @@ -346,10 +350,21 @@ void KernelExecutor::compile( } } + if (isDebugDumpEnabled(DebugDumpOption::FusionIrMath)) { + fusion->printMath(); + } + if (isDebugDumpEnabled(DebugDumpOption::FusionIr)) { fusion->print(); - } else if (isDebugDumpEnabled(DebugDumpOption::FusionIrMath)) { - fusion->printMath(); + } + + if (isDebugDumpEnabled(DebugDumpOption::FusionIrGraph)) { + std::stringstream file_name; + file_name << "__tmp_fusion_ir_graph_" << kernel_id_ << ".dot"; + IrGraphGenerator::print( + fusion, + file_name.str().c_str(), + IrGraphGenerator::DetailLevel::ComputeOnly); } //! Force index_type to int and disable magic zero if we detect that the @@ -418,8 +433,7 @@ void KernelExecutor::compile( for (const auto& hook : post_lowering_hooks_) { hook(kernel); } - createKernelId( - scheduler_type, fusion_id_, concrete_id_, runtime_id_, group_id_); + setUsedTVs(); if (isDebugDumpEnabled(DebugDumpOption::KernelIr)) {