pytorch · gs-olive · Sep 7, 2023
diff --git a/py/torch_tensorrt/dynamo/_defaults.py b/py/torch_tensorrt/dynamo/_defaults.py
@@ -15,6 +15,7 @@
 USE_FAST_PARTITIONER = True
 ENABLE_EXPERIMENTAL_DECOMPOSITIONS = False
 REQUIRE_FULL_COMPILATION = False
+FALLBACK_TO_INDUCTOR = True
 
 
 def default_device() -> Device:

diff --git a/py/torch_tensorrt/dynamo/_settings.py b/py/torch_tensorrt/dynamo/_settings.py
@@ -6,6 +6,7 @@
 from torch_tensorrt.dynamo._defaults import (
     DEBUG,
     ENABLE_EXPERIMENTAL_DECOMPOSITIONS,
+    FALLBACK_TO_INDUCTOR,
     MAX_AUX_STREAMS,
     MIN_BLOCK_SIZE,
     OPTIMIZATION_LEVEL,
@@ -42,6 +43,8 @@ class CompilationSettings:
         truncate_long_and_double (bool): Truncate int64/float64 TRT engine inputs or weights to int32/float32
         enable_experimental_decompositions (bool): Whether to enable all core aten decompositions
             or only a selected subset of them
+        fallback_to_inductor (bool): Whether to fallback to inductor on Torch-TRT Compilation Errors.
+            Is overridden by pass_through_build_failures.
     """
 
     precision: torch.dtype = PRECISION
@@ -59,3 +62,4 @@ class CompilationSettings:
     enable_experimental_decompositions: bool = ENABLE_EXPERIMENTAL_DECOMPOSITIONS
     device: Device = field(default_factory=default_device)
     require_full_compilation: bool = REQUIRE_FULL_COMPILATION
+    fallback_to_inductor: bool = FALLBACK_TO_INDUCTOR
diff --git a/py/torch_tensorrt/dynamo/backend/backends.py b/py/torch_tensorrt/dynamo/backend/backends.py
@@ -83,14 +83,17 @@ def _pretraced_backend(
             settings=settings,
         )
         return trt_compiled
-    except AssertionError:
+    except (AssertionError, RuntimeError):
         if not settings.pass_through_build_failures:
             logger.warning(
                 "TRT conversion failed on the subgraph. See trace above. "
                 + "Returning GraphModule forward instead.",
                 exc_info=True,
             )
-            return gm.forward
+            if settings.fallback_to_inductor:
+                pass
+            else:
+                return gm
         else:
             logger.critical(
                 "Halting compilation on build failure since "
@@ -100,3 +103,18 @@ def _pretraced_backend(
                 + "specify pass_through_build_failures=False."
             )
             raise
+
+    # If Inductor fallback is desired, attempt model compilation with inductor
+    try:
+        inductor_compiled = torch._inductor.compile(
+            gm,
+            sample_inputs,
+        )
+        return inductor_compiled
+    except (AssertionError, RuntimeError):
+        logger.warning(
+            "Inductor compilation failed on the subgraph. See trace above. "
+            + "Returning GraphModule forward instead.",
+            exc_info=True,
+        )
+        return gm
diff --git a/py/torch_tensorrt/dynamo/compile.py b/py/torch_tensorrt/dynamo/compile.py
@@ -15,6 +15,7 @@
     DEBUG,
     DEVICE,
     ENABLE_EXPERIMENTAL_DECOMPOSITIONS,
+    FALLBACK_TO_INDUCTOR,
     MAX_AUX_STREAMS,
     MIN_BLOCK_SIZE,
     OPTIMIZATION_LEVEL,
@@ -69,6 +70,7 @@ def compile(
     use_python_runtime: bool = USE_PYTHON_RUNTIME,
     use_fast_partitioner: bool = USE_FAST_PARTITIONER,
     enable_experimental_decompositions: bool = ENABLE_EXPERIMENTAL_DECOMPOSITIONS,
+    fallback_to_inductor: bool = FALLBACK_TO_INDUCTOR,
     **kwargs: Any,
 ) -> torch.fx.GraphModule:
     if debug:
@@ -84,7 +86,7 @@ def compile(
         "max_aux_streams, version_compatible, optimization_level, "
         "torch_executed_ops, pass_through_build_failures, "
         "use_fast_partitioner, enable_experimental_decompositions, "
-        "require_full_compilation}"
+        "require_full_compilation, fallback_to_inductor}"
     )
 
     if not isinstance(inputs, collections.abc.Sequence):
@@ -130,6 +132,7 @@ def compile(
         "use_fast_partitioner": use_fast_partitioner,
         "enable_experimental_decompositions": enable_experimental_decompositions,
         "require_full_compilation": require_full_compilation,
+        "fallback_to_inductor": fallback_to_inductor,
     }
 
     settings = CompilationSettings(**compilation_options)