From aef5c35a5523e95d5f7cbf13c3e72d21fa2c0dbc Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Tue, 7 Oct 2025 12:29:15 -0700 Subject: [PATCH 01/11] add capture and replay feature --- py/torch_tensorrt/dynamo/debug/_Debugger.py | 80 +++++++++++++++++++ .../dynamo/debug/_DebuggerConfig.py | 1 + 2 files changed, 81 insertions(+) diff --git a/py/torch_tensorrt/dynamo/debug/_Debugger.py b/py/torch_tensorrt/dynamo/debug/_Debugger.py index ec624ffc5a..83504dd621 100644 --- a/py/torch_tensorrt/dynamo/debug/_Debugger.py +++ b/py/torch_tensorrt/dynamo/debug/_Debugger.py @@ -2,6 +2,7 @@ import functools import logging import os +import sys import tempfile from logging.config import dictConfig from typing import Any, List, Optional @@ -9,6 +10,7 @@ import torch from torch_tensorrt._features import ENABLED_FEATURES +from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.dynamo._defaults import DEBUG_LOGGING_DIR from torch_tensorrt.dynamo.debug._DebuggerConfig import DebuggerConfig from torch_tensorrt.dynamo.debug._supports_debugger import ( @@ -32,6 +34,7 @@ def __init__( capture_fx_graph_before: Optional[List[str]] = None, capture_fx_graph_after: Optional[List[str]] = None, save_engine_profile: bool = False, + capture_shim: bool = False, profile_format: str = "perfetto", engine_builder_monitor: bool = True, logging_dir: str = DEBUG_LOGGING_DIR, @@ -49,6 +52,8 @@ def __init__( after execution of a lowering pass. Defaults to None. save_engine_profile (bool): Whether to save TensorRT engine profiling information. Defaults to False. + capture_shim (bool): Whether to save shim information. The directory to the shim output file are the logging_dir/shim/ + Defaults to False. profile_format (str): Format for profiling data. Choose from 'perfetto', 'trex', 'cudagraph'. If you need to generate engine graph using the profiling files, set it to 'trex' and use the C++ runtime. If you need to generate cudagraph visualization, set it to 'cudagraph'. @@ -62,9 +67,11 @@ def __init__( """ os.makedirs(logging_dir, exist_ok=True) + self.cfg = DebuggerConfig( log_level=log_level, save_engine_profile=save_engine_profile, + capture_shim=capture_shim, engine_builder_monitor=engine_builder_monitor, logging_dir=logging_dir, profile_format=profile_format, @@ -92,6 +99,26 @@ def __init__( self.capture_fx_graph_before = capture_fx_graph_before self.capture_fx_graph_after = capture_fx_graph_after + if self.cfg.capture_shim: + if not sys.platform.startswith("linux"): + _LOGGER.warning( + "capture_shim featureis only supported on linux, will not be enabled" + ) + self.cfg.capture_shim = False + return + if ENABLED_FEATURES.tensorrt_rtx: + raise ValueError( + "capture_shim feature is not supported on TensorRT-RTX, will not be enabled" + ) + self.cfg.capture_shim = False + return + if not is_tensorrt_version_supported("10.13.0"): + _LOGGER.warning( + "capture_shim feature is only supported on TensorRT 10.13 and above, will not be enabled" + ) + self.cfg.capture_shim = False + return + def __enter__(self) -> None: self.original_lvl = _LOGGER.getEffectiveLevel() if ENABLED_FEATURES.torch_tensorrt_runtime: @@ -143,9 +170,62 @@ def __enter__(self) -> None: for c in _DEBUG_ENABLED_CLS ] + if self.cfg.capture_shim: + self.original_environ_dict = {} + shim_lib_name = "libtensorrt_shim.so" + nvinfer_lib_name = "libnvinfer.so" + + def validate_setting() -> bool: + is_valid = True + # LD_PRELOAD and TRT_SHIM_NVINFER_LIB_NAME only read at exec-time; setting it during a running process won’t interpose already-loaded libs. + # so, must set them before the tensorrt is loaded, cannot set during the Debugger.__enter__ + if os.environ.get("LD_PRELOAD") is None: + _LOGGER.error( + f"LD_PRELOAD is not set, please add the {shim_lib_name} with full path to the LD_PRELOAD environment variable" + ) + is_valid = False + if os.environ.get("TRT_SHIM_NVINFER_LIB_NAME") is None: + _LOGGER.error( + f"TRT_SHIM_NVINFER_LIB_NAME is not set, please add the {nvinfer_lib_name} with full path to the TRT_SHIM_NVINFER_LIB_NAME environment variable" + ) + is_valid = False + return is_valid + + if not validate_setting(): + return + + self.original_environ_dict["TRT_SHIM_OUTPUT_JSON_FILE"] = os.environ.get( + "TRT_SHIM_OUTPUT_JSON_FILE" + ) + if os.environ.get("TRT_SHIM_OUTPUT_JSON_FILE") is None: + # TRT_SHIM_OUTPUT_JSON_FILE is not set, set it to the default shim json file path + shim_output_dir = os.path.join(self.cfg.logging_dir, "shim") + shim_output_json_file = os.path.join(shim_output_dir, "shim.json") + else: + shim_output_json_file = os.environ["TRT_SHIM_OUTPUT_JSON_FILE"] + # validate the shim_output_dir + # split the path from the full path of shim_output_dir + shim_output_dir = os.path.dirname(shim_output_json_file) + if len(shim_output_dir) == 0: + shim_output_dir = os.path.join(self.cfg.logging_dir, "shim") + shim_output_json_file = os.path.join(shim_output_dir, "shim.json") + + if not os.path.exists(shim_output_dir): + os.makedirs(shim_output_dir, exist_ok=True) + # if file alaredy exists, delete it first, so that we can create a fresh new one + if os.path.exists(shim_output_json_file): + os.remove(shim_output_json_file) + os.environ["TRT_SHIM_OUTPUT_JSON_FILE"] = shim_output_json_file + def __exit__(self, exc_type: Any, exc_value: Any, exc_tb: Any) -> None: dictConfig(self.get_logging_config(None)) + if self.cfg.capture_shim: + for k, v in self.original_environ_dict.items(): + if v is None: + os.environ.pop(k, None) + else: + os.environ[k] = v if ENABLED_FEATURES.torch_tensorrt_runtime: torch.ops.tensorrt.set_logging_level(self.rt_level) if self.capture_fx_graph_before or self.capture_fx_graph_after: diff --git a/py/torch_tensorrt/dynamo/debug/_DebuggerConfig.py b/py/torch_tensorrt/dynamo/debug/_DebuggerConfig.py index 27a5025e8b..97a7ae4055 100644 --- a/py/torch_tensorrt/dynamo/debug/_DebuggerConfig.py +++ b/py/torch_tensorrt/dynamo/debug/_DebuggerConfig.py @@ -7,6 +7,7 @@ class DebuggerConfig: log_level: str = "debug" save_engine_profile: bool = False + capture_shim: bool = False engine_builder_monitor: bool = True logging_dir: str = DEBUG_LOGGING_DIR profile_format: str = "perfetto" From 30b97072e1f23681ef33dcd791576e814dff2dfc Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Tue, 7 Oct 2025 14:06:17 -0700 Subject: [PATCH 02/11] test --- py/torch_tensorrt/dynamo/debug/_Debugger.py | 45 ++++++++------------- 1 file changed, 16 insertions(+), 29 deletions(-) diff --git a/py/torch_tensorrt/dynamo/debug/_Debugger.py b/py/torch_tensorrt/dynamo/debug/_Debugger.py index 83504dd621..1fc9c4027b 100644 --- a/py/torch_tensorrt/dynamo/debug/_Debugger.py +++ b/py/torch_tensorrt/dynamo/debug/_Debugger.py @@ -171,7 +171,6 @@ def __enter__(self) -> None: ] if self.cfg.capture_shim: - self.original_environ_dict = {} shim_lib_name = "libtensorrt_shim.so" nvinfer_lib_name = "libnvinfer.so" @@ -189,43 +188,31 @@ def validate_setting() -> bool: f"TRT_SHIM_NVINFER_LIB_NAME is not set, please add the {nvinfer_lib_name} with full path to the TRT_SHIM_NVINFER_LIB_NAME environment variable" ) is_valid = False + if os.environ.get("TRT_SHIM_OUTPUT_JSON_FILE") is None: + _LOGGER.error( + "TRT_SHIM_OUTPUT_JSON_FILE is not set, please add the shim output json file name with full path to the TRT_SHIM_OUTPUT_JSON_FILE environment variable" + ) + is_valid = False + else: + shim_output_json_file = os.environ["TRT_SHIM_OUTPUT_JSON_FILE"] + shim_output_dir = os.path.dirname(shim_output_json_file) + if len(shim_output_dir) > 0 and not os.path.exists(shim_output_dir): + _LOGGER.debug( + f"shim output directory {shim_output_dir} does not exist, creating it now" + ) + os.makedirs(shim_output_dir) return is_valid if not validate_setting(): return - - self.original_environ_dict["TRT_SHIM_OUTPUT_JSON_FILE"] = os.environ.get( - "TRT_SHIM_OUTPUT_JSON_FILE" + json_file_name = os.environ["TRT_SHIM_OUTPUT_JSON_FILE"] + _LOGGER.info( + f"capture_shim feature is enabled, shim output file is set to {json_file_name}" ) - if os.environ.get("TRT_SHIM_OUTPUT_JSON_FILE") is None: - # TRT_SHIM_OUTPUT_JSON_FILE is not set, set it to the default shim json file path - shim_output_dir = os.path.join(self.cfg.logging_dir, "shim") - shim_output_json_file = os.path.join(shim_output_dir, "shim.json") - else: - shim_output_json_file = os.environ["TRT_SHIM_OUTPUT_JSON_FILE"] - # validate the shim_output_dir - # split the path from the full path of shim_output_dir - shim_output_dir = os.path.dirname(shim_output_json_file) - if len(shim_output_dir) == 0: - shim_output_dir = os.path.join(self.cfg.logging_dir, "shim") - shim_output_json_file = os.path.join(shim_output_dir, "shim.json") - - if not os.path.exists(shim_output_dir): - os.makedirs(shim_output_dir, exist_ok=True) - # if file alaredy exists, delete it first, so that we can create a fresh new one - if os.path.exists(shim_output_json_file): - os.remove(shim_output_json_file) - os.environ["TRT_SHIM_OUTPUT_JSON_FILE"] = shim_output_json_file def __exit__(self, exc_type: Any, exc_value: Any, exc_tb: Any) -> None: dictConfig(self.get_logging_config(None)) - if self.cfg.capture_shim: - for k, v in self.original_environ_dict.items(): - if v is None: - os.environ.pop(k, None) - else: - os.environ[k] = v if ENABLED_FEATURES.torch_tensorrt_runtime: torch.ops.tensorrt.set_logging_level(self.rt_level) if self.capture_fx_graph_before or self.capture_fx_graph_after: From 96d7816ea8fa46afc50a13282feb49aaedca4038 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Tue, 7 Oct 2025 14:20:21 -0700 Subject: [PATCH 03/11] add doc --- py/torch_tensorrt/dynamo/debug/_Debugger.py | 11 ++-- tools/debug/capture_replay/README.md | 66 +++++++++++++++++++++ 2 files changed, 71 insertions(+), 6 deletions(-) create mode 100644 tools/debug/capture_replay/README.md diff --git a/py/torch_tensorrt/dynamo/debug/_Debugger.py b/py/torch_tensorrt/dynamo/debug/_Debugger.py index 1fc9c4027b..01bc44c899 100644 --- a/py/torch_tensorrt/dynamo/debug/_Debugger.py +++ b/py/torch_tensorrt/dynamo/debug/_Debugger.py @@ -52,7 +52,7 @@ def __init__( after execution of a lowering pass. Defaults to None. save_engine_profile (bool): Whether to save TensorRT engine profiling information. Defaults to False. - capture_shim (bool): Whether to save shim information. The directory to the shim output file are the logging_dir/shim/ + capture_shim (bool): Whether to enable the capture shim feature. It is part of the TensorRT capture and replay feature, the captured output will be able to replay for debug purpose. Defaults to False. profile_format (str): Format for profiling data. Choose from 'perfetto', 'trex', 'cudagraph'. If you need to generate engine graph using the profiling files, set it to 'trex' and use the C++ runtime. @@ -67,7 +67,6 @@ def __init__( """ os.makedirs(logging_dir, exist_ok=True) - self.cfg = DebuggerConfig( log_level=log_level, save_engine_profile=save_engine_profile, @@ -102,19 +101,19 @@ def __init__( if self.cfg.capture_shim: if not sys.platform.startswith("linux"): _LOGGER.warning( - "capture_shim featureis only supported on linux, will not be enabled" + "capture_shim featureis only supported on linux, will disable it" ) self.cfg.capture_shim = False return if ENABLED_FEATURES.tensorrt_rtx: - raise ValueError( - "capture_shim feature is not supported on TensorRT-RTX, will not be enabled" + _LOGGER.warning( + "capture_shim feature is not supported on TensorRT-RTX, will disable it" ) self.cfg.capture_shim = False return if not is_tensorrt_version_supported("10.13.0"): _LOGGER.warning( - "capture_shim feature is only supported on TensorRT 10.13 and above, will not be enabled" + "capture_shim feature is only supported on TensorRT 10.13 and above, will disable it" ) self.cfg.capture_shim = False return diff --git a/tools/debug/capture_replay/README.md b/tools/debug/capture_replay/README.md new file mode 100644 index 0000000000..9c08e4ae4c --- /dev/null +++ b/tools/debug/capture_replay/README.md @@ -0,0 +1,66 @@ +## Introduction + +This toolchain captures TensorRT network creation and build parameters at runtime via a shim, then deterministically replays them to reproduce an engine build. Use it to debug or reproduce builds independent of the originating framework. + +### Prerequisites +- TensorRT installed (ensure you know the absolute path to its `lib` and `bin` directories) +- `libtensorrt_shim.so` available in your TensorRT `lib` directory +- `tensorrt_player` available in your TensorRT `bin` directory + +### Quick start: Capture +1) Export environment for the shim and paths (adjust paths for your system): + +```bash +export TENSORRT_DIR=/path/to/TensorRT- +export LD_LIBRARY_PATH=$TENSORRT_DIR/lib:$TENSORRT_DIR/bin:$LD_LIBRARY_PATH +export PATH=$TENSORRT_DIR/bin:$PATH + +# Tell the shim which libnvinfer to interpose +export TRT_SHIM_NVINFER_LIB_NAME=$TENSORRT_DIR/lib/libnvinfer.so + +# Preload the shim so it intercepts TensorRT API calls +export LD_PRELOAD=$TENSORRT_DIR/lib/libtensorrt_shim.so + +# Where to write the capture (JSON metadata); the .bin payload will be co-located +export TRT_SHIM_OUTPUT_JSON_FILE=/absolute/path/to/shim_output.json +``` + +2) Run your program that builds TensorRT engines. For Torch-TensorRT Dynamo flows, wrap compilation with the debugger to trigger capture: + +```python +import torch +import torch_tensorrt as torchtrt + +model = ... # your model on CUDA, in eval() mode +compile_spec = { + "inputs": [torchtrt.Input(min_shape=(1, 3, 3), opt_shape=(2, 3, 3), max_shape=(3, 3, 3), dtype=torch.float32)], +} + +with torchtrt.dynamo.Debugger("graphs", logging_dir="debuglogs", capture_shim=True): + trt_mod = torchtrt.compile(model, **compile_spec) +``` + +3) After the run completes, verify the capture artifacts exist: +- JSON metadata: the path you set in `TRT_SHIM_OUTPUT_JSON_FILE` +- BIN payload: same directory as the JSON (e.g., `shim_output.bin`) + +### Replay: Build the engine from the capture +Use `tensorrt_player` to replay the captured build without the original framework: + +```bash +tensorrt_player -j /absolute/path/to/shim_output.json -o /absolute/path/to/output_engine +``` + +This produces a serialized TensorRT engine at `output_engine`. + +### Validate the engine +Run the engine with `trtexec`: + +```bash +trtexec --loadEngine=/absolute/path/to/output_engine +``` + +### Notes +- Ensure the `libnvinfer.so` used by the shim matches the TensorRT version in your environment. +- If multiple TensorRT versions are installed, prefer absolute paths as shown above. +- The capture is best-effort; if your program builds multiple engines, multiple captures may be produced. From 3cb680c8074471b3818049c30222be51689ae94a Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Tue, 7 Oct 2025 14:21:45 -0700 Subject: [PATCH 04/11] modify doc --- tools/debug/capture_replay/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/debug/capture_replay/README.md b/tools/debug/capture_replay/README.md index 9c08e4ae4c..e46a475877 100644 --- a/tools/debug/capture_replay/README.md +++ b/tools/debug/capture_replay/README.md @@ -36,7 +36,7 @@ compile_spec = { "inputs": [torchtrt.Input(min_shape=(1, 3, 3), opt_shape=(2, 3, 3), max_shape=(3, 3, 3), dtype=torch.float32)], } -with torchtrt.dynamo.Debugger("graphs", logging_dir="debuglogs", capture_shim=True): +with torchtrt.dynamo.Debugger(capture_shim=True): trt_mod = torchtrt.compile(model, **compile_spec) ``` From 9c8077851eb90fb5585e2a58dd49bfca60c6ee7a Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Tue, 7 Oct 2025 18:19:46 -0700 Subject: [PATCH 05/11] resolve comments --- py/torch_tensorrt/__init__.py | 65 ++++++++++++++++ py/torch_tensorrt/dynamo/debug/_Debugger.py | 74 +++---------------- .../dynamo/debug/_DebuggerConfig.py | 2 +- tools/debug/capture_replay/README.md | 35 +-------- 4 files changed, 78 insertions(+), 98 deletions(-) diff --git a/py/torch_tensorrt/__init__.py b/py/torch_tensorrt/__init__.py index d127f42690..830bb32ec5 100644 --- a/py/torch_tensorrt/__init__.py +++ b/py/torch_tensorrt/__init__.py @@ -3,6 +3,7 @@ import os import platform import sys +import tempfile from typing import Dict, List import torch @@ -24,6 +25,70 @@ import torch + +def is_capture_tensorrt_api_recording_enabled() -> bool: + if os.environ.get("TORCHTRT_ENABLE_TENSORRT_API_CAPTURE") == "1": + if not sys.platform.startswith("linux"): + _LOGGER.warning( + f"Capturing TensorRT API calls is only supported on Linux, therefore ignoring the capture_tensorrt_api_recording setting for {sys.platform}" + ) + os.environ.pop("TORCHTRT_ENABLE_TENSORRT_API_CAPTURE") + return False + if os.environ.get("USE_TRT_RTX", "False").lower() == "true": + _LOGGER.warning( + "Capturing TensorRT API calls is only supported on TensorRT, therefore ignoring the capture_tensorrt_api_recording setting for TensorRT-RTX" + ) + os.environ.pop("TORCHTRT_ENABLE_TENSORRT_API_CAPTURE") + return False + return True + return False + + +if is_capture_tensorrt_api_recording_enabled(): + linux_lib_path = [] + if "LD_LIBRARY_PATH" in os.environ: + linux_lib_path.extend(os.environ["LD_LIBRARY_PATH"].split(os.path.pathsep)) + + if platform.uname().processor == "x86_64": + linux_lib_path.append("/usr/lib/x86_64-linux-gnu") + elif platform.uname().processor == "aarch64": + linux_lib_path.append("/usr/lib/aarch64-linux-gnu") + + tensorrt_lib_path = None + for path in linux_lib_path: + try: + ctypes.CDLL( + os.path.join(path, "libtensorrt_shim.so"), mode=ctypes.RTLD_GLOBAL + ) + tensorrt_lib_path = path + break + except Exception as e: + continue + + if tensorrt_lib_path is None: + _LOGGER.error( + "Capturing TensorRT API calls is enabled, but libtensorrt_shim.so is not found, make sure TensorRT lib is in the LD_LIBRARY_PATH, therefore ignoring the capture_tensorrt_api_recording setting" + ) + os.environ.pop("TORCHTRT_ENABLE_TENSORRT_API_CAPTURE") + else: + os.environ["TRT_SHIM_NVINFER_LIB_NAME"] = os.path.join( + tensorrt_lib_path, "libnvinfer.so" + ) + + import pwd + + current_user = pwd.getpwuid(os.getuid())[0] + shim_temp_dir = os.path.join( + tempfile.gettempdir(), f"torch_tensorrt_{current_user}/shim" + ) + os.makedirs(shim_temp_dir, exist_ok=True) + os.environ["TRT_SHIM_OUTPUT_JSON_FILE"] = os.path.join( + shim_temp_dir, "shim.json" + ) + _LOGGER.debug("capture_shim feature is enabled") +else: + _LOGGER.info("capture_shim feature is disabled") + tensorrt_package_name = "" try: diff --git a/py/torch_tensorrt/dynamo/debug/_Debugger.py b/py/torch_tensorrt/dynamo/debug/_Debugger.py index 01bc44c899..acece43b93 100644 --- a/py/torch_tensorrt/dynamo/debug/_Debugger.py +++ b/py/torch_tensorrt/dynamo/debug/_Debugger.py @@ -2,7 +2,6 @@ import functools import logging import os -import sys import tempfile from logging.config import dictConfig from typing import Any, List, Optional @@ -10,7 +9,6 @@ import torch from torch_tensorrt._features import ENABLED_FEATURES -from torch_tensorrt._utils import is_tensorrt_version_supported from torch_tensorrt.dynamo._defaults import DEBUG_LOGGING_DIR from torch_tensorrt.dynamo.debug._DebuggerConfig import DebuggerConfig from torch_tensorrt.dynamo.debug._supports_debugger import ( @@ -34,7 +32,8 @@ def __init__( capture_fx_graph_before: Optional[List[str]] = None, capture_fx_graph_after: Optional[List[str]] = None, save_engine_profile: bool = False, - capture_shim: bool = False, + capture_tensorrt_api_recording: bool = False, + capture_tensorrt_api_recording_dir: Optional[str] = None, profile_format: str = "perfetto", engine_builder_monitor: bool = True, logging_dir: str = DEBUG_LOGGING_DIR, @@ -52,7 +51,8 @@ def __init__( after execution of a lowering pass. Defaults to None. save_engine_profile (bool): Whether to save TensorRT engine profiling information. Defaults to False. - capture_shim (bool): Whether to enable the capture shim feature. It is part of the TensorRT capture and replay feature, the captured output will be able to replay for debug purpose. + capture_tensorrt_api_recording (bool): Whether to enable the capture TensorRT API recording feature, when this is enabled, it will output the catputure TensorRT API recording in the /tmp/torch_tensorrt_{current_user}/shim directory. + It is part of the TensorRT capture and replay feature, the captured output will be able to replay for debug purpose. Defaults to False. profile_format (str): Format for profiling data. Choose from 'perfetto', 'trex', 'cudagraph'. If you need to generate engine graph using the profiling files, set it to 'trex' and use the C++ runtime. @@ -70,7 +70,7 @@ def __init__( self.cfg = DebuggerConfig( log_level=log_level, save_engine_profile=save_engine_profile, - capture_shim=capture_shim, + capture_tensorrt_api_recording=capture_tensorrt_api_recording, engine_builder_monitor=engine_builder_monitor, logging_dir=logging_dir, profile_format=profile_format, @@ -97,26 +97,10 @@ def __init__( self.capture_fx_graph_before = capture_fx_graph_before self.capture_fx_graph_after = capture_fx_graph_after - - if self.cfg.capture_shim: - if not sys.platform.startswith("linux"): - _LOGGER.warning( - "capture_shim featureis only supported on linux, will disable it" - ) - self.cfg.capture_shim = False - return - if ENABLED_FEATURES.tensorrt_rtx: - _LOGGER.warning( - "capture_shim feature is not supported on TensorRT-RTX, will disable it" - ) - self.cfg.capture_shim = False - return - if not is_tensorrt_version_supported("10.13.0"): - _LOGGER.warning( - "capture_shim feature is only supported on TensorRT 10.13 and above, will disable it" - ) - self.cfg.capture_shim = False - return + if os.environ.get("TORCHTRT_ENABLE_TENSORRT_API_CAPTURE") == "1": + self.cfg.capture_tensorrt_api_recording = True + else: + self.cfg.capture_tensorrt_api_recording = False def __enter__(self) -> None: self.original_lvl = _LOGGER.getEffectiveLevel() @@ -169,46 +153,6 @@ def __enter__(self) -> None: for c in _DEBUG_ENABLED_CLS ] - if self.cfg.capture_shim: - shim_lib_name = "libtensorrt_shim.so" - nvinfer_lib_name = "libnvinfer.so" - - def validate_setting() -> bool: - is_valid = True - # LD_PRELOAD and TRT_SHIM_NVINFER_LIB_NAME only read at exec-time; setting it during a running process won’t interpose already-loaded libs. - # so, must set them before the tensorrt is loaded, cannot set during the Debugger.__enter__ - if os.environ.get("LD_PRELOAD") is None: - _LOGGER.error( - f"LD_PRELOAD is not set, please add the {shim_lib_name} with full path to the LD_PRELOAD environment variable" - ) - is_valid = False - if os.environ.get("TRT_SHIM_NVINFER_LIB_NAME") is None: - _LOGGER.error( - f"TRT_SHIM_NVINFER_LIB_NAME is not set, please add the {nvinfer_lib_name} with full path to the TRT_SHIM_NVINFER_LIB_NAME environment variable" - ) - is_valid = False - if os.environ.get("TRT_SHIM_OUTPUT_JSON_FILE") is None: - _LOGGER.error( - "TRT_SHIM_OUTPUT_JSON_FILE is not set, please add the shim output json file name with full path to the TRT_SHIM_OUTPUT_JSON_FILE environment variable" - ) - is_valid = False - else: - shim_output_json_file = os.environ["TRT_SHIM_OUTPUT_JSON_FILE"] - shim_output_dir = os.path.dirname(shim_output_json_file) - if len(shim_output_dir) > 0 and not os.path.exists(shim_output_dir): - _LOGGER.debug( - f"shim output directory {shim_output_dir} does not exist, creating it now" - ) - os.makedirs(shim_output_dir) - return is_valid - - if not validate_setting(): - return - json_file_name = os.environ["TRT_SHIM_OUTPUT_JSON_FILE"] - _LOGGER.info( - f"capture_shim feature is enabled, shim output file is set to {json_file_name}" - ) - def __exit__(self, exc_type: Any, exc_value: Any, exc_tb: Any) -> None: dictConfig(self.get_logging_config(None)) diff --git a/py/torch_tensorrt/dynamo/debug/_DebuggerConfig.py b/py/torch_tensorrt/dynamo/debug/_DebuggerConfig.py index 97a7ae4055..82cd3ba83a 100644 --- a/py/torch_tensorrt/dynamo/debug/_DebuggerConfig.py +++ b/py/torch_tensorrt/dynamo/debug/_DebuggerConfig.py @@ -7,7 +7,7 @@ class DebuggerConfig: log_level: str = "debug" save_engine_profile: bool = False - capture_shim: bool = False + capture_tensorrt_api_recording: bool = False engine_builder_monitor: bool = True logging_dir: str = DEBUG_LOGGING_DIR profile_format: str = "perfetto" diff --git a/tools/debug/capture_replay/README.md b/tools/debug/capture_replay/README.md index e46a475877..d8079cf5b9 100644 --- a/tools/debug/capture_replay/README.md +++ b/tools/debug/capture_replay/README.md @@ -8,47 +8,18 @@ This toolchain captures TensorRT network creation and build parameters at runtim - `tensorrt_player` available in your TensorRT `bin` directory ### Quick start: Capture -1) Export environment for the shim and paths (adjust paths for your system): ```bash -export TENSORRT_DIR=/path/to/TensorRT- -export LD_LIBRARY_PATH=$TENSORRT_DIR/lib:$TENSORRT_DIR/bin:$LD_LIBRARY_PATH -export PATH=$TENSORRT_DIR/bin:$PATH - -# Tell the shim which libnvinfer to interpose -export TRT_SHIM_NVINFER_LIB_NAME=$TENSORRT_DIR/lib/libnvinfer.so - -# Preload the shim so it intercepts TensorRT API calls -export LD_PRELOAD=$TENSORRT_DIR/lib/libtensorrt_shim.so - -# Where to write the capture (JSON metadata); the .bin payload will be co-located -export TRT_SHIM_OUTPUT_JSON_FILE=/absolute/path/to/shim_output.json -``` - -2) Run your program that builds TensorRT engines. For Torch-TensorRT Dynamo flows, wrap compilation with the debugger to trigger capture: - -```python -import torch -import torch_tensorrt as torchtrt - -model = ... # your model on CUDA, in eval() mode -compile_spec = { - "inputs": [torchtrt.Input(min_shape=(1, 3, 3), opt_shape=(2, 3, 3), max_shape=(3, 3, 3), dtype=torch.float32)], -} - -with torchtrt.dynamo.Debugger(capture_shim=True): - trt_mod = torchtrt.compile(model, **compile_spec) +TORCHTRT_ENABLE_TENSORRT_API_CAPTURE=1 python test.py ``` +you should be able to see the shim.json shim.bin in being generated in /tmp/torch_tensorrt_{current_user}/shim directory -3) After the run completes, verify the capture artifacts exist: -- JSON metadata: the path you set in `TRT_SHIM_OUTPUT_JSON_FILE` -- BIN payload: same directory as the JSON (e.g., `shim_output.bin`) ### Replay: Build the engine from the capture Use `tensorrt_player` to replay the captured build without the original framework: ```bash -tensorrt_player -j /absolute/path/to/shim_output.json -o /absolute/path/to/output_engine +tensorrt_player -j /absolute/path/to/shim.json -o /absolute/path/to/output_engine ``` This produces a serialized TensorRT engine at `output_engine`. From e77b493bc98d95b0d2477c2d4da5d5933c31da8b Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Tue, 7 Oct 2025 18:24:35 -0700 Subject: [PATCH 06/11] resolve comments --- py/torch_tensorrt/__init__.py | 38 ++++++++++----------- py/torch_tensorrt/dynamo/debug/_Debugger.py | 1 - 2 files changed, 19 insertions(+), 20 deletions(-) diff --git a/py/torch_tensorrt/__init__.py b/py/torch_tensorrt/__init__.py index 830bb32ec5..c16c0d547f 100644 --- a/py/torch_tensorrt/__init__.py +++ b/py/torch_tensorrt/__init__.py @@ -2,6 +2,7 @@ import logging import os import platform +import pwd import sys import tempfile from typing import Dict, List @@ -27,21 +28,21 @@ def is_capture_tensorrt_api_recording_enabled() -> bool: - if os.environ.get("TORCHTRT_ENABLE_TENSORRT_API_CAPTURE") == "1": - if not sys.platform.startswith("linux"): - _LOGGER.warning( - f"Capturing TensorRT API calls is only supported on Linux, therefore ignoring the capture_tensorrt_api_recording setting for {sys.platform}" - ) - os.environ.pop("TORCHTRT_ENABLE_TENSORRT_API_CAPTURE") - return False - if os.environ.get("USE_TRT_RTX", "False").lower() == "true": - _LOGGER.warning( - "Capturing TensorRT API calls is only supported on TensorRT, therefore ignoring the capture_tensorrt_api_recording setting for TensorRT-RTX" - ) - os.environ.pop("TORCHTRT_ENABLE_TENSORRT_API_CAPTURE") - return False - return True - return False + if os.environ.get("TORCHTRT_ENABLE_TENSORRT_API_CAPTURE") != "1": + return False + if not sys.platform.startswith("linux"): + _LOGGER.warning( + f"Capturing TensorRT API calls is only supported on Linux, therefore ignoring the capture_tensorrt_api_recording setting for {sys.platform}" + ) + os.environ.pop("TORCHTRT_ENABLE_TENSORRT_API_CAPTURE") + return False + if os.environ.get("USE_TRT_RTX", "False").lower() == "true": + _LOGGER.warning( + "Capturing TensorRT API calls is only supported on TensorRT, therefore ignoring the capture_tensorrt_api_recording setting for TensorRT-RTX" + ) + os.environ.pop("TORCHTRT_ENABLE_TENSORRT_API_CAPTURE") + return False + return True if is_capture_tensorrt_api_recording_enabled(): @@ -74,9 +75,6 @@ def is_capture_tensorrt_api_recording_enabled() -> bool: os.environ["TRT_SHIM_NVINFER_LIB_NAME"] = os.path.join( tensorrt_lib_path, "libnvinfer.so" ) - - import pwd - current_user = pwd.getpwuid(os.getuid())[0] shim_temp_dir = os.path.join( tempfile.gettempdir(), f"torch_tensorrt_{current_user}/shim" @@ -85,7 +83,9 @@ def is_capture_tensorrt_api_recording_enabled() -> bool: os.environ["TRT_SHIM_OUTPUT_JSON_FILE"] = os.path.join( shim_temp_dir, "shim.json" ) - _LOGGER.debug("capture_shim feature is enabled") + _LOGGER.debug( + f"capture_shim feature is enabled and the captured output is in the {shim_temp_dir} directory" + ) else: _LOGGER.info("capture_shim feature is disabled") diff --git a/py/torch_tensorrt/dynamo/debug/_Debugger.py b/py/torch_tensorrt/dynamo/debug/_Debugger.py index acece43b93..499b95e346 100644 --- a/py/torch_tensorrt/dynamo/debug/_Debugger.py +++ b/py/torch_tensorrt/dynamo/debug/_Debugger.py @@ -33,7 +33,6 @@ def __init__( capture_fx_graph_after: Optional[List[str]] = None, save_engine_profile: bool = False, capture_tensorrt_api_recording: bool = False, - capture_tensorrt_api_recording_dir: Optional[str] = None, profile_format: str = "perfetto", engine_builder_monitor: bool = True, logging_dir: str = DEBUG_LOGGING_DIR, From 9391b629e8620e91d4cd95fed09647f365d15c54 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Thu, 9 Oct 2025 11:53:25 -0700 Subject: [PATCH 07/11] resolve comments --- py/torch_tensorrt/_TensorRTProxyModule.py | 70 +++++++++++++++++++++ py/torch_tensorrt/__init__.py | 65 ------------------- py/torch_tensorrt/dynamo/debug/_Debugger.py | 17 +++-- 3 files changed, 83 insertions(+), 69 deletions(-) diff --git a/py/torch_tensorrt/_TensorRTProxyModule.py b/py/torch_tensorrt/_TensorRTProxyModule.py index c5917a3ae0..794e6f7dcb 100644 --- a/py/torch_tensorrt/_TensorRTProxyModule.py +++ b/py/torch_tensorrt/_TensorRTProxyModule.py @@ -1,12 +1,16 @@ import ctypes import importlib import importlib.util +import logging import os import platform +import pwd import sys +import tempfile from types import ModuleType from typing import Any, Dict, List +_LOGGER = logging.getLogger(__name__) package_imported = False package_name = "" @@ -28,10 +32,71 @@ def _find_lib(name: str, paths: List[str]) -> str: raise FileNotFoundError(f"Could not find {name}\n Search paths: {paths}") +def enable_capture_tensorrt_api_recording() -> None: + + os_env_flag = os.environ.get("TORCHTRT_ENABLE_TENSORRT_API_CAPTURE", None) + if os_env_flag is None or (os_env_flag != "1" and os_env_flag.lower() != "true"): + _LOGGER.debug("Capturing TensorRT API calls is not enabled") + return + if not sys.platform.startswith("linux"): + _LOGGER.warning( + f"Capturing TensorRT API calls is only supported on Linux, therefore ignoring the capture_tensorrt_api_recording setting for {sys.platform}" + ) + os.environ.pop("TORCHTRT_ENABLE_TENSORRT_API_CAPTURE") + return + + linux_lib_path = [] + if "LD_LIBRARY_PATH" in os.environ: + linux_lib_path.extend(os.environ["LD_LIBRARY_PATH"].split(os.path.pathsep)) + + if platform.uname().processor == "x86_64": + linux_lib_path.append("/usr/lib/x86_64-linux-gnu") + elif platform.uname().processor == "aarch64": + linux_lib_path.append("/usr/lib/aarch64-linux-gnu") + + for path in linux_lib_path: + if os.path.isfile(os.path.join(path, "libtensorrt_shim.so")): + try: + ctypes.CDLL( + os.path.join(path, "libtensorrt_shim.so"), mode=ctypes.RTLD_GLOBAL + ) + tensorrt_lib_path = path + break + except Exception as e: + continue + + if tensorrt_lib_path is None: + _LOGGER.error( + "Capturing TensorRT API calls is enabled, but libtensorrt_shim.so is not found, make sure TensorRT lib is in the LD_LIBRARY_PATH, therefore ignoring the capture_tensorrt_api_recording setting" + ) + os.environ.pop("TORCHTRT_ENABLE_TENSORRT_API_CAPTURE") + else: + os.environ["TRT_SHIM_NVINFER_LIB_NAME"] = os.path.join( + tensorrt_lib_path, "libnvinfer.so" + ) + current_user = pwd.getpwuid(os.getuid())[0] + shim_temp_dir = os.path.join( + tempfile.gettempdir(), f"torch_tensorrt_{current_user}/shim" + ) + os.makedirs(shim_temp_dir, exist_ok=True) + json_file_name = os.path.join(shim_temp_dir, "shim.json") + os.environ["TRT_SHIM_OUTPUT_JSON_FILE"] = json_file_name + bin_file_name = os.path.join(shim_temp_dir, "shim.bin") + # if exists, delete the file, so that we can capture the new one + if os.path.exists(json_file_name): + os.remove(json_file_name) + if os.path.exists(bin_file_name): + os.remove(bin_file_name) + _LOGGER.debug( + f"capture_shim feature is enabled and the captured output is in the {shim_temp_dir} directory" + ) + + # TensorRTProxyModule is a proxy module that allows us to register the tensorrt or tensorrt-rtx package # since tensorrt-rtx is the drop-in replacement for tensorrt, we can use the same interface to use tensorrt-rtx class TensorRTProxyModule(ModuleType): def __init__(self, target_module: ModuleType) -> None: + breakpoint() spec = importlib.util.spec_from_loader("tensorrt", loader=None) self.__spec__ = spec self.__package__ = target_module.__package__ @@ -86,6 +151,11 @@ def alias_tensorrt() -> None: if use_rtx_env_var.lower() == "true": use_rtx = True package_name = "tensorrt_rtx" if use_rtx else "tensorrt" + + if not use_rtx: + # enable capture tensorrt api recording has to be done before importing the tensorrt library + enable_capture_tensorrt_api_recording() + # Import the appropriate package try: target_module = importlib.import_module(package_name) diff --git a/py/torch_tensorrt/__init__.py b/py/torch_tensorrt/__init__.py index c16c0d547f..d127f42690 100644 --- a/py/torch_tensorrt/__init__.py +++ b/py/torch_tensorrt/__init__.py @@ -2,9 +2,7 @@ import logging import os import platform -import pwd import sys -import tempfile from typing import Dict, List import torch @@ -26,69 +24,6 @@ import torch - -def is_capture_tensorrt_api_recording_enabled() -> bool: - if os.environ.get("TORCHTRT_ENABLE_TENSORRT_API_CAPTURE") != "1": - return False - if not sys.platform.startswith("linux"): - _LOGGER.warning( - f"Capturing TensorRT API calls is only supported on Linux, therefore ignoring the capture_tensorrt_api_recording setting for {sys.platform}" - ) - os.environ.pop("TORCHTRT_ENABLE_TENSORRT_API_CAPTURE") - return False - if os.environ.get("USE_TRT_RTX", "False").lower() == "true": - _LOGGER.warning( - "Capturing TensorRT API calls is only supported on TensorRT, therefore ignoring the capture_tensorrt_api_recording setting for TensorRT-RTX" - ) - os.environ.pop("TORCHTRT_ENABLE_TENSORRT_API_CAPTURE") - return False - return True - - -if is_capture_tensorrt_api_recording_enabled(): - linux_lib_path = [] - if "LD_LIBRARY_PATH" in os.environ: - linux_lib_path.extend(os.environ["LD_LIBRARY_PATH"].split(os.path.pathsep)) - - if platform.uname().processor == "x86_64": - linux_lib_path.append("/usr/lib/x86_64-linux-gnu") - elif platform.uname().processor == "aarch64": - linux_lib_path.append("/usr/lib/aarch64-linux-gnu") - - tensorrt_lib_path = None - for path in linux_lib_path: - try: - ctypes.CDLL( - os.path.join(path, "libtensorrt_shim.so"), mode=ctypes.RTLD_GLOBAL - ) - tensorrt_lib_path = path - break - except Exception as e: - continue - - if tensorrt_lib_path is None: - _LOGGER.error( - "Capturing TensorRT API calls is enabled, but libtensorrt_shim.so is not found, make sure TensorRT lib is in the LD_LIBRARY_PATH, therefore ignoring the capture_tensorrt_api_recording setting" - ) - os.environ.pop("TORCHTRT_ENABLE_TENSORRT_API_CAPTURE") - else: - os.environ["TRT_SHIM_NVINFER_LIB_NAME"] = os.path.join( - tensorrt_lib_path, "libnvinfer.so" - ) - current_user = pwd.getpwuid(os.getuid())[0] - shim_temp_dir = os.path.join( - tempfile.gettempdir(), f"torch_tensorrt_{current_user}/shim" - ) - os.makedirs(shim_temp_dir, exist_ok=True) - os.environ["TRT_SHIM_OUTPUT_JSON_FILE"] = os.path.join( - shim_temp_dir, "shim.json" - ) - _LOGGER.debug( - f"capture_shim feature is enabled and the captured output is in the {shim_temp_dir} directory" - ) -else: - _LOGGER.info("capture_shim feature is disabled") - tensorrt_package_name = "" try: diff --git a/py/torch_tensorrt/dynamo/debug/_Debugger.py b/py/torch_tensorrt/dynamo/debug/_Debugger.py index 499b95e346..fc8fa8dc02 100644 --- a/py/torch_tensorrt/dynamo/debug/_Debugger.py +++ b/py/torch_tensorrt/dynamo/debug/_Debugger.py @@ -2,6 +2,7 @@ import functools import logging import os +import sys import tempfile from logging.config import dictConfig from typing import Any, List, Optional @@ -96,10 +97,18 @@ def __init__( self.capture_fx_graph_before = capture_fx_graph_before self.capture_fx_graph_after = capture_fx_graph_after - if os.environ.get("TORCHTRT_ENABLE_TENSORRT_API_CAPTURE") == "1": - self.cfg.capture_tensorrt_api_recording = True - else: - self.cfg.capture_tensorrt_api_recording = False + + if self.cfg.capture_tensorrt_api_recording: + env_flag = os.environ.get("TORCHTRT_ENABLE_TENSORRT_API_CAPTURE", None) + if env_flag is None or (env_flag != "1" and env_flag.lower() != "true"): + # currently this feature is only supported for TensorRT on Linux platform + if ( + sys.platform.startswith("linux") + and not ENABLED_FEATURES.tensorrt_rtx + ): + _LOGGER.warning( + "In order to capture TensorRT API calls, please invoke the script with environment variable TORCHTRT_ENABLE_TENSORRT_API_CAPTURE=1" + ) def __enter__(self) -> None: self.original_lvl = _LOGGER.getEffectiveLevel() From 6104bc0ba2fd813389df7cf4d2e39df8f5bf48cb Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Thu, 9 Oct 2025 11:55:07 -0700 Subject: [PATCH 08/11] resolve comments --- py/torch_tensorrt/_TensorRTProxyModule.py | 1 - 1 file changed, 1 deletion(-) diff --git a/py/torch_tensorrt/_TensorRTProxyModule.py b/py/torch_tensorrt/_TensorRTProxyModule.py index 794e6f7dcb..8bb972290d 100644 --- a/py/torch_tensorrt/_TensorRTProxyModule.py +++ b/py/torch_tensorrt/_TensorRTProxyModule.py @@ -96,7 +96,6 @@ def enable_capture_tensorrt_api_recording() -> None: # since tensorrt-rtx is the drop-in replacement for tensorrt, we can use the same interface to use tensorrt-rtx class TensorRTProxyModule(ModuleType): def __init__(self, target_module: ModuleType) -> None: - breakpoint() spec = importlib.util.spec_from_loader("tensorrt", loader=None) self.__spec__ = spec self.__package__ = target_module.__package__ From ba3259f9b77ecf8918ce60bc7d1e6cd63e4809df Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Thu, 9 Oct 2025 13:52:40 -0700 Subject: [PATCH 09/11] resolve comments --- py/torch_tensorrt/_TensorRTProxyModule.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/py/torch_tensorrt/_TensorRTProxyModule.py b/py/torch_tensorrt/_TensorRTProxyModule.py index 8bb972290d..80388b193a 100644 --- a/py/torch_tensorrt/_TensorRTProxyModule.py +++ b/py/torch_tensorrt/_TensorRTProxyModule.py @@ -87,8 +87,8 @@ def enable_capture_tensorrt_api_recording() -> None: os.remove(json_file_name) if os.path.exists(bin_file_name): os.remove(bin_file_name) - _LOGGER.debug( - f"capture_shim feature is enabled and the captured output is in the {shim_temp_dir} directory" + _LOGGER.info( + f"Capturing TensorRT API calls feature is enabled and the captured output is in the {shim_temp_dir} directory" ) From 6d8c1ec042d21f5591e0b5b0138d435281814f4c Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Thu, 9 Oct 2025 14:57:43 -0700 Subject: [PATCH 10/11] resolve comments --- py/torch_tensorrt/_TensorRTProxyModule.py | 2 +- py/torch_tensorrt/dynamo/debug/_Debugger.py | 19 ++++++++++++------- 2 files changed, 13 insertions(+), 8 deletions(-) diff --git a/py/torch_tensorrt/_TensorRTProxyModule.py b/py/torch_tensorrt/_TensorRTProxyModule.py index 80388b193a..84d2c50f5f 100644 --- a/py/torch_tensorrt/_TensorRTProxyModule.py +++ b/py/torch_tensorrt/_TensorRTProxyModule.py @@ -66,7 +66,7 @@ def enable_capture_tensorrt_api_recording() -> None: continue if tensorrt_lib_path is None: - _LOGGER.error( + _LOGGER.warning( "Capturing TensorRT API calls is enabled, but libtensorrt_shim.so is not found, make sure TensorRT lib is in the LD_LIBRARY_PATH, therefore ignoring the capture_tensorrt_api_recording setting" ) os.environ.pop("TORCHTRT_ENABLE_TENSORRT_API_CAPTURE") diff --git a/py/torch_tensorrt/dynamo/debug/_Debugger.py b/py/torch_tensorrt/dynamo/debug/_Debugger.py index fc8fa8dc02..39e4217f73 100644 --- a/py/torch_tensorrt/dynamo/debug/_Debugger.py +++ b/py/torch_tensorrt/dynamo/debug/_Debugger.py @@ -99,16 +99,21 @@ def __init__( self.capture_fx_graph_after = capture_fx_graph_after if self.cfg.capture_tensorrt_api_recording: - env_flag = os.environ.get("TORCHTRT_ENABLE_TENSORRT_API_CAPTURE", None) - if env_flag is None or (env_flag != "1" and env_flag.lower() != "true"): - # currently this feature is only supported for TensorRT on Linux platform - if ( - sys.platform.startswith("linux") - and not ENABLED_FEATURES.tensorrt_rtx - ): + if not sys.platform.startswith("linux"): + _LOGGER.warning( + f"Capturing TensorRT API calls is only supported on Linux, therefore ignoring the capture_tensorrt_api_recording setting for {sys.platform}" + ) + elif ENABLED_FEATURES.tensorrt_rtx: + _LOGGER.warning( + "Capturing TensorRT API calls is not supported for TensorRT-RTX, therefore ignoring the capture_tensorrt_api_recording setting" + ) + else: + env_flag = os.environ.get("TORCHTRT_ENABLE_TENSORRT_API_CAPTURE", None) + if env_flag is None or (env_flag != "1" and env_flag.lower() != "true"): _LOGGER.warning( "In order to capture TensorRT API calls, please invoke the script with environment variable TORCHTRT_ENABLE_TENSORRT_API_CAPTURE=1" ) + _LOGGER.info("Capturing TensorRT API calls feature is enabled") def __enter__(self) -> None: self.original_lvl = _LOGGER.getEffectiveLevel() From 86a3cf248bbf9e8d02c269d7fa131c085052e001 Mon Sep 17 00:00:00 2001 From: lanluo-nvidia Date: Sun, 12 Oct 2025 18:31:55 -0700 Subject: [PATCH 11/11] change doc to rst format --- docsrc/getting_started/capture_and_replay.rst | 49 +++++++++++++++++++ docsrc/index.rst | 1 + tools/debug/capture_replay/README.md | 37 -------------- 3 files changed, 50 insertions(+), 37 deletions(-) create mode 100644 docsrc/getting_started/capture_and_replay.rst delete mode 100644 tools/debug/capture_replay/README.md diff --git a/docsrc/getting_started/capture_and_replay.rst b/docsrc/getting_started/capture_and_replay.rst new file mode 100644 index 0000000000..e04e1899c7 --- /dev/null +++ b/docsrc/getting_started/capture_and_replay.rst @@ -0,0 +1,49 @@ +Introduction +============ + +This toolchain captures TensorRT network creation and build parameters at runtime via a shim, then deterministically replays them to reproduce an engine build. Use it to debug or reproduce builds independent of the originating framework. + +Prerequisites +------------- + +- TensorRT installed (ensure you know the absolute path to its ``lib`` and ``bin`` directories) +- ``libtensorrt_shim.so`` available in your TensorRT ``lib`` directory +- ``tensorrt_player`` available in your TensorRT ``bin`` directory + +Quick start: Capture +-------------------- + +.. code-block:: bash + + TORCHTRT_ENABLE_TENSORRT_API_CAPTURE=1 python test.py + +You should see ``shim.json`` and ``shim.bin`` generated in ``/tmp/torch_tensorrt_{current_user}/shim``. + +Replay: Build the engine from the capture +----------------------------------------- + +Use ``tensorrt_player`` to replay the captured build without the original framework: + +.. code-block:: bash + + tensorrt_player -j /absolute/path/to/shim.json -o /absolute/path/to/output_engine + +This produces a serialized TensorRT engine at ``output_engine``. + +Validate the engine +------------------- + +Run the engine with ``trtexec``: + +.. code-block:: bash + + trtexec --loadEngine=/absolute/path/to/output_engine + +Notes +----- + +- Ensure the ``libnvinfer.so`` used by the shim matches the TensorRT version in your environment. +- If multiple TensorRT versions are installed, prefer absolute paths as shown above. +- Currently, it is not supported to capture multiple engines, in case of graph break, only the first engine will be captured. + + diff --git a/docsrc/index.rst b/docsrc/index.rst index 68e1ba5259..671379d004 100644 --- a/docsrc/index.rst +++ b/docsrc/index.rst @@ -29,6 +29,7 @@ Getting Started getting_started/jetpack getting_started/quick_start getting_started/tensorrt_rtx + getting_started/capture_and_replay User Guide ------------ diff --git a/tools/debug/capture_replay/README.md b/tools/debug/capture_replay/README.md deleted file mode 100644 index d8079cf5b9..0000000000 --- a/tools/debug/capture_replay/README.md +++ /dev/null @@ -1,37 +0,0 @@ -## Introduction - -This toolchain captures TensorRT network creation and build parameters at runtime via a shim, then deterministically replays them to reproduce an engine build. Use it to debug or reproduce builds independent of the originating framework. - -### Prerequisites -- TensorRT installed (ensure you know the absolute path to its `lib` and `bin` directories) -- `libtensorrt_shim.so` available in your TensorRT `lib` directory -- `tensorrt_player` available in your TensorRT `bin` directory - -### Quick start: Capture - -```bash -TORCHTRT_ENABLE_TENSORRT_API_CAPTURE=1 python test.py -``` -you should be able to see the shim.json shim.bin in being generated in /tmp/torch_tensorrt_{current_user}/shim directory - - -### Replay: Build the engine from the capture -Use `tensorrt_player` to replay the captured build without the original framework: - -```bash -tensorrt_player -j /absolute/path/to/shim.json -o /absolute/path/to/output_engine -``` - -This produces a serialized TensorRT engine at `output_engine`. - -### Validate the engine -Run the engine with `trtexec`: - -```bash -trtexec --loadEngine=/absolute/path/to/output_engine -``` - -### Notes -- Ensure the `libnvinfer.so` used by the shim matches the TensorRT version in your environment. -- If multiple TensorRT versions are installed, prefer absolute paths as shown above. -- The capture is best-effort; if your program builds multiple engines, multiple captures may be produced.