From 70ce5e7851577f357dcdd0b6d3f38d228c6cb931 Mon Sep 17 00:00:00 2001 From: IvanKobzarev Date: Mon, 9 Feb 2026 08:45:23 -0800 Subject: [PATCH] [ci] Add DSv3 SimpleFSDP auto_bucketing to h100 ci jobs This CI flow is experimental and non-blocking - PRs can land if only this flow fails. --- tests/integration_tests/h100.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/tests/integration_tests/h100.py b/tests/integration_tests/h100.py index cd75548e56..ed5281d078 100755 --- a/tests/integration_tests/h100.py +++ b/tests/integration_tests/h100.py @@ -78,5 +78,24 @@ def build_h100_tests_list() -> list[OverrideDefinitions]: "hsdp+cp+compile+float8", ngpu=8, ), + # Experimental, non-blocking: PRs can land if only this test fails + OverrideDefinitions( + [ + [ + "--job.config_file ./torchtitan/models/deepseek_v3/train_configs/debug_model.toml", + "--model.name simple_fsdp.deepseek_v3", + "--parallelism.tensor_parallel_degree 1", + "--parallelism.expert_parallel_degree 8", + "--job.custom_config_module=torchtitan.experiments.simple_fsdp.job_config", + "--compile.graph_passes auto_bucketing", + "--activation_checkpoint.mode none", + "--compile.backend inductor", + "--compile.enable", + ] + ], + "[Experimental, non-blocking landing if fails] SimpleFSDP DeepSeekV3 auto_bucketing", + "simplefsdp_deepseekv3_auto_bucketing", + ngpu=8, + ), ] return integration_tests_flavors