Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
14 changes: 14 additions & 0 deletions torchtitan/config/job_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -987,6 +987,20 @@ class Debug:
moe_force_load_balance: bool = False
"""If True, we force each experts to get the same amount of tokens via round-robin. This option is for debugging usage only."""

log_sharding_info: bool = False
"""
If True, logs DTensor sharding/mesh info for module inputs, params,
outputs during one fwd/bwd pass. Only the first step will recorded and
this flag should only be used for debugging purpose and should not
be enabled during the real training.
"""

collapse_identical_layers: bool = False
"""
If True, and log_sharding_info is True, collapse repeated layer modules with
with identical sharding patterns in the sharding log.
"""


@dataclass
class JobConfig:
Expand Down
Loading
Loading