-
Notifications
You must be signed in to change notification settings - Fork 2
Description
Bug description
Training stops/exits when it finds a file with no text. For example, when training on phonemes, the file LJ045-0058 looks like this:
LJ045-0058|eng|speaker_0|1963.|.
It has no 'text' because G2P resulted in no phonemes.
This is an issue with other files in LJ-Speech too, but I don't have them all.
How to reproduce the bug
If you create an EV project based on phonemized LJ speech and begin training (make sure to set target_text_representation_level: phones) you'll see the error I describe.
Error messages and logs
[rank0]: ╭───────────────────── Traceback (most recent call last) ──────────────────────╮
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/everyvoice/model/feature_prediction/Fast │
[rank0]: │ Speech2_lightning/fs2/variance_adaptor.py:293 in forward │
[rank0]: │ │
[rank0]: │ 290 │ │ │ │ equal_dur_targets = torch.eq( │
[rank0]: │ 291 │ │ │ │ │ duration_target.sum(dim=1), batch["mel_lens"] │
[rank0]: │ 292 │ │ │ │ ) │
[rank0]: │ ❱ 293 │ │ │ │ assert torch.all(equal_dur_targets) │
[rank0]: │ 294 │ │ │ except AssertionError as e: │
[rank0]: │ 295 │ │ │ │ from itertools import compress │
[rank0]: │ 296 │
[rank0]: ╰──────────────────────────────────────────────────────────────────────────────╯
[rank0]: AssertionError
[rank0]: The above exception was the direct cause of the following exception:
[rank0]: ╭───────────────────── Traceback (most recent call last) ──────────────────────╮
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/everyvoice/model/feature_prediction/Fast │
[rank0]: │ Speech2_lightning/fs2/cli/train.py:33 in train │
[rank0]: │ │
[rank0]: │ 30 │ │
[rank0]: │ 31 │ model_kwargs = {"lang2id": lang2id, "speaker2id": speaker2id, "stat │
[rank0]: │ 32 │ │
[rank0]: │ ❱ 33 │ train_base_command( │
[rank0]: │ 34 │ │ model_config=FastSpeech2Config, │
[rank0]: │ 35 │ │ model=FastSpeech2, │
[rank0]: │ 36 │ │ data_module=FastSpeech2DataModule, │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/everyvoice/base_cli/helpers.py:278 in │
[rank0]: │ train_base_command │
[rank0]: │ │
[rank0]: │ 275 │ │ model_obj = model(config, **model_kwargs) │
[rank0]: │ 276 │ │ logger.info(f"Model's architecture\n{model_obj}") │
[rank0]: │ 277 │ │ tensorboard_logger.log_hyperparams(config.model_dump()) │
[rank0]: │ ❱ 278 │ │ trainer.fit(model_obj, data) │
[rank0]: │ 279 │ else: │
[rank0]: │ 280 │ │ try: │
[rank0]: │ 281 │ │ │ model_obj = model.load_from_checkpoint(last_ckpt) │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/pytor │
[rank0]: │ ch_lightning/trainer/trainer.py:561 in fit │
[rank0]: │ │
[rank0]: │ 558 │ │ self.state.status = TrainerStatus.RUNNING │
[rank0]: │ 559 │ │ self.training = True │
[rank0]: │ 560 │ │ self.should_stop = False │
[rank0]: │ ❱ 561 │ │ call._call_and_handle_interrupt( │
[rank0]: │ 562 │ │ │ self, self._fit_impl, model, train_dataloaders, val_datal │
[rank0]: │ 563 │ │ ) │
[rank0]: │ 564 │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/pytor │
[rank0]: │ ch_lightning/trainer/call.py:47 in _call_and_handle_interrupt │
[rank0]: │ │
[rank0]: │ 44 │ """ │
[rank0]: │ 45 │ try: │
[rank0]: │ 46 │ │ if trainer.strategy.launcher is not None: │
[rank0]: │ ❱ 47 │ │ │ return trainer.strategy.launcher.launch(trainer_fn, *args, │
[rank0]: │ 48 │ │ return trainer_fn(*args, **kwargs) │
[rank0]: │ 49 │ │
[rank0]: │ 50 │ except _TunerExitException: │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/pytor │
[rank0]: │ ch_lightning/strategies/launchers/subprocess_script.py:105 in launch │
[rank0]: │ │
[rank0]: │ 102 │ │ │ _launch_process_observer(self.procs) │
[rank0]: │ 103 │ │ │
[rank0]: │ 104 │ │ _set_num_threads_if_needed(num_processes=self.num_processes) │
[rank0]: │ ❱ 105 │ │ return function(*args, **kwargs) │
[rank0]: │ 106 │ │
[rank0]: │ 107 │ @OverRide │
[rank0]: │ 108 │ def kill(self, signum: _SIGNUM) -> None: │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/pytor │
[rank0]: │ ch_lightning/trainer/trainer.py:599 in _fit_impl │
[rank0]: │ │
[rank0]: │ 596 │ │ │ model_provided=True, │
[rank0]: │ 597 │ │ │ model_connected=self.lightning_module is not None, │
[rank0]: │ 598 │ │ ) │
[rank0]: │ ❱ 599 │ │ self._run(model, ckpt_path=ckpt_path) │
[rank0]: │ 600 │ │ │
[rank0]: │ 601 │ │ assert self.state.stopped │
[rank0]: │ 602 │ │ self.training = False │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/pytor │
[rank0]: │ ch_lightning/trainer/trainer.py:1012 in _run │
[rank0]: │ │
[rank0]: │ 1009 │ │ # ---------------------------- │
[rank0]: │ 1010 │ │ # RUN THE TRAINER │
[rank0]: │ 1011 │ │ # ---------------------------- │
[rank0]: │ ❱ 1012 │ │ results = self._run_stage() │
[rank0]: │ 1013 │ │ │
[rank0]: │ 1014 │ │ # ---------------------------- │
[rank0]: │ 1015 │ │ # POST-Training CLEAN UP │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/pytor │
[rank0]: │ ch_lightning/trainer/trainer.py:1056 in _run_stage │
[rank0]: │ │
[rank0]: │ 1053 │ │ │ with isolate_rng(): │
[rank0]: │ 1054 │ │ │ │ self._run_sanity_check() │
[rank0]: │ 1055 │ │ │ with torch.autograd.set_detect_anomaly(self._detect_anoma │
[rank0]: │ ❱ 1056 │ │ │ │ self.fit_loop.run() │
[rank0]: │ 1057 │ │ │ return None │
[rank0]: │ 1058 │ │ raise RuntimeError(f"Unexpected state {self.state}") │
[rank0]: │ 1059 │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/pytor │
[rank0]: │ ch_lightning/loops/fit_loop.py:216 in run │
[rank0]: │ │
[rank0]: │ 213 │ │ while not self.done: │
[rank0]: │ 214 │ │ │ try: │
[rank0]: │ 215 │ │ │ │ self.on_advance_start() │
[rank0]: │ ❱ 216 │ │ │ │ self.advance() │
[rank0]: │ 217 │ │ │ │ self.on_advance_end() │
[rank0]: │ 218 │ │ │ except StopIteration: │
[rank0]: │ 219 │ │ │ │ break │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/pytor │
[rank0]: │ ch_lightning/loops/fit_loop.py:455 in advance │
[rank0]: │ │
[rank0]: │ 452 │ │ │ ) │
[rank0]: │ 453 │ │ with self.trainer.profiler.profile("run_training_epoch"): │
[rank0]: │ 454 │ │ │ assert self._data_fetcher is not None │
[rank0]: │ ❱ 455 │ │ │ self.epoch_loop.run(self._data_fetcher) │
[rank0]: │ 456 │ │
[rank0]: │ 457 │ def on_advance_end(self) -> None: │
[rank0]: │ 458 │ │ trainer = self.trainer │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/pytor │
[rank0]: │ ch_lightning/loops/training_epoch_loop.py:152 in run │
[rank0]: │ │
[rank0]: │ 149 │ │ self.on_run_start(data_fetcher) │
[rank0]: │ 150 │ │ while not self.done: │
[rank0]: │ 151 │ │ │ try: │
[rank0]: │ ❱ 152 │ │ │ │ self.advance(data_fetcher) │
[rank0]: │ 153 │ │ │ │ self.on_advance_end(data_fetcher) │
[rank0]: │ 154 │ │ │ except StopIteration: │
[rank0]: │ 155 │ │ │ │ break │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/pytor │
[rank0]: │ ch_lightning/loops/training_epoch_loop.py:344 in advance │
[rank0]: │ │
[rank0]: │ 341 │ │ │ with trainer.profiler.profile("run_training_batch"): │
[rank0]: │ 342 │ │ │ │ if trainer.lightning_module.automatic_optimization: │
[rank0]: │ 343 │ │ │ │ │ # in automatic optimization, there can only be one │
[rank0]: │ ❱ 344 │ │ │ │ │ batch_output = self.automatic_optimization.run(tra │
[rank0]: │ 345 │ │ │ │ else: │
[rank0]: │ 346 │ │ │ │ │ batch_output = self.manual_optimization.run(kwargs │
[rank0]: │ 347 │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/pytor │
[rank0]: │ ch_lightning/loops/optimization/automatic.py:192 in run │
[rank0]: │ │
[rank0]: │ 189 │ │ # ------------------------------ │
[rank0]: │ 190 │ │ # gradient update with accumulated gradients │
[rank0]: │ 191 │ │ else: │
[rank0]: │ ❱ 192 │ │ │ self._optimizer_step(batch_idx, closure) │
[rank0]: │ 193 │ │ │
[rank0]: │ 194 │ │ result = closure.consume_result() │
[rank0]: │ 195 │ │ if result.loss is None: │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/pytor │
[rank0]: │ ch_lightning/loops/optimization/automatic.py:270 in _optimizer_step │
[rank0]: │ │
[rank0]: │ 267 │ │ │ self.optim_progress.optimizer.step.increment_ready() │
[rank0]: │ 268 │ │ │
[rank0]: │ 269 │ │ # model hook │
[rank0]: │ ❱ 270 │ │ call._call_lightning_module_hook( │
[rank0]: │ 271 │ │ │ trainer, │
[rank0]: │ 272 │ │ │ "optimizer_step", │
[rank0]: │ 273 │ │ │ trainer.current_epoch, │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/pytor │
[rank0]: │ ch_lightning/trainer/call.py:176 in _call_lightning_module_hook │
[rank0]: │ │
[rank0]: │ 173 │ pl_module._current_fx_name = hook_name │
[rank0]: │ 174 │ │
[rank0]: │ 175 │ with trainer.profiler.profile(f"[LightningModule]{pl_module.__clas │
[rank0]: │ ❱ 176 │ │ output = fn(*args, **kwargs) │
[rank0]: │ 177 │ │
[rank0]: │ 178 │ # restore current_fx when nested context │
[rank0]: │ 179 │ pl_module._current_fx_name = prev_fx_name │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/pytor │
[rank0]: │ ch_lightning/core/module.py:1328 in optimizer_step │
[rank0]: │ │
[rank0]: │ 1325 │ │ │ │ # Add your custom logic to run directly after optimi │ [rank0]: │ 1326 │ │ │ [rank0]: │ 1327 │ │ """ │ [rank0]: │ ❱ 1328 │ │ optimizer.step(closure=optimizer_closure) │ [rank0]: │ 1329 │ │ [rank0]: │ 1330 │ def optimizer_zero_grad(self, epoch: int, batch_idx: int, optimiz │ [rank0]: │ 1331 │ │ """Override this method to change the default behaviour of `` │ [rank0]: │ │ [rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/pytor │ [rank0]: │ ch_lightning/core/optimizer.py:154 in step │ [rank0]: │ │ [rank0]: │ 151 │ │ │ raise MisconfigurationException("When optimizer.step(clos │
[rank0]: │ 152 │ │ │
[rank0]: │ 153 │ │ assert self._strategy is not None │
[rank0]: │ ❱ 154 │ │ step_output = self._strategy.optimizer_step(self._optimizer, c │
[rank0]: │ 155 │ │ │
[rank0]: │ 156 │ │ self._on_after_step() │
[rank0]: │ 157 │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/pytor │
[rank0]: │ ch_lightning/strategies/ddp.py:270 in optimizer_step │
[rank0]: │ │
[rank0]: │ 267 │ │ │ **kwargs: Any extra arguments to optimizer.step │
[rank0]: │ 268 │ │ │
[rank0]: │ 269 │ │ """ │
[rank0]: │ ❱ 270 │ │ optimizer_output = super().optimizer_step(optimizer, closure, │
[rank0]: │ 271 │ │ │
[rank0]: │ 272 │ │ if self._model_averager is None: │
[rank0]: │ 273 │ │ │ return optimizer_output │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/pytor │
[rank0]: │ ch_lightning/strategies/strategy.py:239 in optimizer_step │
[rank0]: │ │
[rank0]: │ 236 │ │ model = model or self.lightning_module │
[rank0]: │ 237 │ │ # TODO(fabric): remove assertion once strategy's optimizer_ste │
[rank0]: │ 238 │ │ assert isinstance(model, pl.LightningModule) │
[rank0]: │ ❱ 239 │ │ return self.precision_plugin.optimizer_step(optimizer, model=m │
[rank0]: │ 240 │ │
[rank0]: │ 241 │ def _setup_model_and_optimizers(self, model: Module, optimizers: l │
[rank0]: │ 242 │ │ """Setup a model and multiple optimizers together. │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/pytor │
[rank0]: │ ch_lightning/plugins/precision/precision.py:123 in optimizer_step │
[rank0]: │ │
[rank0]: │ 120 │ ) -> Any: │
[rank0]: │ 121 │ │ """Hook to run the optimizer step.""" │
[rank0]: │ 122 │ │ closure = partial(self._wrap_closure, model, optimizer, closur │
[rank0]: │ ❱ 123 │ │ return optimizer.step(closure=closure, **kwargs) │
[rank0]: │ 124 │ │
[rank0]: │ 125 │ def _clip_gradients( │
[rank0]: │ 126 │ │ self, │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/torch │
[rank0]: │ /optim/lr_scheduler.py:124 in wrapper │
[rank0]: │ │
[rank0]: │ 121 │ │ │ │ def wrapper(*args, **kwargs): │
[rank0]: │ 122 │ │ │ │ │ opt = opt_ref() │
[rank0]: │ 123 │ │ │ │ │ opt._opt_called = True # type: ignore[union-attr │
[rank0]: │ ❱ 124 │ │ │ │ │ return func.get(opt, opt.class)(*args, ** │
[rank0]: │ 125 │ │ │ │ │
[rank0]: │ 126 │ │ │ │ wrapper._wrapped_by_lr_sched = True # type: ignore[a │
[rank0]: │ 127 │ │ │ │ return wrapper │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/torch │
[rank0]: │ /optim/optimizer.py:485 in wrapper │
[rank0]: │ │
[rank0]: │ 482 │ │ │ │ │ │ │ │ f"{func} must return None or a tuple │
[rank0]: │ 483 │ │ │ │ │ │ │ ) │
[rank0]: │ 484 │ │ │ │ │
[rank0]: │ ❱ 485 │ │ │ │ out = func(*args, **kwargs) │
[rank0]: │ 486 │ │ │ │ self._optimizer_step_code() │
[rank0]: │ 487 │ │ │ │ │
[rank0]: │ 488 │ │ │ │ # call optimizer step post hooks │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/torch │
[rank0]: │ /optim/optimizer.py:79 in _use_grad │
[rank0]: │ │
[rank0]: │ 76 │ │ │ # see https://github.com/pytorch/pytorch/issues/104053 │
[rank0]: │ 77 │ │ │ torch.set_grad_enabled(self.defaults["differentiable"]) │
[rank0]: │ 78 │ │ │ torch._dynamo.graph_break() │
[rank0]: │ ❱ 79 │ │ │ ret = func(self, *args, **kwargs) │
[rank0]: │ 80 │ │ finally: │
[rank0]: │ 81 │ │ │ torch._dynamo.graph_break() │
[rank0]: │ 82 │ │ │ torch.set_grad_enabled(prev_grad) │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/torch │
[rank0]: │ /optim/adam.py:225 in step │
[rank0]: │ │
[rank0]: │ 222 │ │ loss = None │
[rank0]: │ 223 │ │ if closure is not None: │
[rank0]: │ 224 │ │ │ with torch.enable_grad(): │
[rank0]: │ ❱ 225 │ │ │ │ loss = closure() │
[rank0]: │ 226 │ │ │
[rank0]: │ 227 │ │ for group in self.param_groups: │
[rank0]: │ 228 │ │ │ params_with_grad: list[Tensor] = [] │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/pytor │
[rank0]: │ ch_lightning/plugins/precision/precision.py:109 in _wrap_closure │
[rank0]: │ │
[rank0]: │ 106 │ │ consistent with the Precision subclasses that cannot pass │
[rank0]: │ 107 │ │ │
[rank0]: │ 108 │ │ """ │
[rank0]: │ ❱ 109 │ │ closure_result = closure() │
[rank0]: │ 110 │ │ self._after_closure(model, optimizer) │
[rank0]: │ 111 │ │ return closure_result │
[rank0]: │ 112 │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/pytor │
[rank0]: │ ch_lightning/loops/optimization/automatic.py:146 in call │
[rank0]: │ │
[rank0]: │ 143 │ │
[rank0]: │ 144 │ @OverRide │
[rank0]: │ 145 │ def call(self, *args: Any, **kwargs: Any) -> Optional[Tensor]: │
[rank0]: │ ❱ 146 │ │ self._result = self.closure(*args, **kwargs) │
[rank0]: │ 147 │ │ return self._result.loss │
[rank0]: │ 148 │
[rank0]: │ 149 │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/torch │
[rank0]: │ /utils/_contextlib.py:116 in decorate_context │
[rank0]: │ │
[rank0]: │ 113 │ @functools.wraps(func) │
[rank0]: │ 114 │ def decorate_context(*args, **kwargs): │
[rank0]: │ 115 │ │ with ctx_factory(): │
[rank0]: │ ❱ 116 │ │ │ return func(*args, **kwargs) │
[rank0]: │ 117 │ │
[rank0]: │ 118 │ return decorate_context │
[rank0]: │ 119 │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/pytor │
[rank0]: │ ch_lightning/loops/optimization/automatic.py:131 in closure │
[rank0]: │ │
[rank0]: │ 128 │ @OverRide │
[rank0]: │ 129 │ @torch.enable_grad() │
[rank0]: │ 130 │ def closure(self, *args: Any, **kwargs: Any) -> ClosureResult: │
[rank0]: │ ❱ 131 │ │ step_output = self._step_fn() │
[rank0]: │ 132 │ │ │
[rank0]: │ 133 │ │ if step_output.closure_loss is None: │
[rank0]: │ 134 │ │ │ self.warning_cache.warn("training_step returned None. │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/pytor │
[rank0]: │ ch_lightning/loops/optimization/automatic.py:319 in _training_step │
[rank0]: │ │
[rank0]: │ 316 │ │ """ │
[rank0]: │ 317 │ │ trainer = self.trainer │
[rank0]: │ 318 │ │ │
[rank0]: │ ❱ 319 │ │ training_step_output = call._call_strategy_hook(trainer, "trai │
[rank0]: │ 320 │ │ self.trainer.strategy.post_training_step() # unused hook - ca │
[rank0]: │ 321 │ │ │
[rank0]: │ 322 │ │ if training_step_output is None and trainer.world_size > 1: │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/pytor │
[rank0]: │ ch_lightning/trainer/call.py:328 in _call_strategy_hook │
[rank0]: │ │
[rank0]: │ 325 │ │ return None │
[rank0]: │ 326 │ │
[rank0]: │ 327 │ with trainer.profiler.profile(f"[Strategy]{trainer.strategy.__clas │
[rank0]: │ ❱ 328 │ │ output = fn(*args, **kwargs) │
[rank0]: │ 329 │ │
[rank0]: │ 330 │ # restore current_fx when nested context │
[rank0]: │ 331 │ pl_module._current_fx_name = prev_fx_name │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/pytor │
[rank0]: │ ch_lightning/strategies/strategy.py:390 in training_step │
[rank0]: │ │
[rank0]: │ 387 │ │ assert self.model is not None │
[rank0]: │ 388 │ │ with self.precision_plugin.train_step_context(): │
[rank0]: │ 389 │ │ │ if self.model != self.lightning_module: │
[rank0]: │ ❱ 390 │ │ │ │ return self._forward_redirection(self.model, self.ligh │
[rank0]: │ 391 │ │ │ return self.lightning_module.training_step(*args, **kwargs │
[rank0]: │ 392 │ │
[rank0]: │ 393 │ def post_training_step(self) -> None: │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/pytor │
[rank0]: │ ch_lightning/strategies/strategy.py:641 in call │
[rank0]: │ │
[rank0]: │ 638 │ │ # Patch the original_module's forward so we can redirect the a │
[rank0]: │ 639 │ │ original_module.forward = wrapped_forward # type: ignore[meth │
[rank0]: │ 640 │ │ │
[rank0]: │ ❱ 641 │ │ wrapper_output = wrapper_module(*args, **kwargs) │
[rank0]: │ 642 │ │ self.on_after_outer_forward(wrapper_module, original_module) │
[rank0]: │ 643 │ │ return wrapper_output │
[rank0]: │ 644 │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/torch │
[rank0]: │ /nn/modules/module.py:1751 in _wrapped_call_impl │
[rank0]: │ │
[rank0]: │ 1748 │ │ if self._compiled_call_impl is not None: │
[rank0]: │ 1749 │ │ │ return self._compiled_call_impl(*args, **kwargs) # type: │
[rank0]: │ 1750 │ │ else: │
[rank0]: │ ❱ 1751 │ │ │ return self._call_impl(*args, **kwargs) │
[rank0]: │ 1752 │ │
[rank0]: │ 1753 │ # torchrec tests the code consistency with the following code │
[rank0]: │ 1754 │ # fmt: off │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/torch │
[rank0]: │ /nn/modules/module.py:1762 in _call_impl │
[rank0]: │ │
[rank0]: │ 1759 │ │ if not (self._backward_hooks or self._backward_pre_hooks or s │
[rank0]: │ 1760 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hoo │
[rank0]: │ 1761 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks │
[rank0]: │ ❱ 1762 │ │ │ return forward_call(*args, **kwargs) │
[rank0]: │ 1763 │ │ │
[rank0]: │ 1764 │ │ result = None │
[rank0]: │ 1765 │ │ called_always_called_hooks = set() │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/torch │
[rank0]: │ /nn/parallel/distributed.py:1637 in forward │
[rank0]: │ │
[rank0]: │ 1634 │ │ │ output = ( │
[rank0]: │ 1635 │ │ │ │ self.module.forward(*inputs, **kwargs) │
[rank0]: │ 1636 │ │ │ │ if self._delay_all_reduce_all_params │
[rank0]: │ ❱ 1637 │ │ │ │ else self._run_ddp_forward(*inputs, **kwargs) │
[rank0]: │ 1638 │ │ │ ) │
[rank0]: │ 1639 │ │ │ return self._post_forward(output) │
[rank0]: │ 1640 │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/torch │
[rank0]: │ /nn/parallel/distributed.py:1464 in _run_ddp_forward │
[rank0]: │ │
[rank0]: │ 1461 │ │ │ return self.module(*inputs, **kwargs) # type: ignore[ind │
[rank0]: │ 1462 │ │ else: │
[rank0]: │ 1463 │ │ │ with self._inside_ddp_forward(): │
[rank0]: │ ❱ 1464 │ │ │ │ return self.module(*inputs, **kwargs) # type: ignore │
[rank0]: │ 1465 │ │
[rank0]: │ 1466 │ def _clear_grad_buffer(self): │
[rank0]: │ 1467 │ │ # Making param.grad points to the grad buffers before backwar │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/torch │
[rank0]: │ /nn/modules/module.py:1751 in _wrapped_call_impl │
[rank0]: │ │
[rank0]: │ 1748 │ │ if self._compiled_call_impl is not None: │
[rank0]: │ 1749 │ │ │ return self._compiled_call_impl(*args, **kwargs) # type: │
[rank0]: │ 1750 │ │ else: │
[rank0]: │ ❱ 1751 │ │ │ return self._call_impl(*args, **kwargs) │
[rank0]: │ 1752 │ │
[rank0]: │ 1753 │ # torchrec tests the code consistency with the following code │
[rank0]: │ 1754 │ # fmt: off │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/torch │
[rank0]: │ /nn/modules/module.py:1762 in _call_impl │
[rank0]: │ │
[rank0]: │ 1759 │ │ if not (self._backward_hooks or self._backward_pre_hooks or s │
[rank0]: │ 1760 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hoo │
[rank0]: │ 1761 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks │
[rank0]: │ ❱ 1762 │ │ │ return forward_call(args, **kwargs) │
[rank0]: │ 1763 │ │ │
[rank0]: │ 1764 │ │ result = None │
[rank0]: │ 1765 │ │ called_always_called_hooks = set() │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/pytor │
[rank0]: │ ch_lightning/strategies/strategy.py:634 in wrapped_forward │
[rank0]: │ │
[rank0]: │ 631 │ │ │ original_module.forward = original_forward # type: ignore │
[rank0]: │ 632 │ │ │ # Call the actual method e.g. .training_step(...) │
[rank0]: │ 633 │ │ │ method = getattr(original_module, method_name) │
[rank0]: │ ❱ 634 │ │ │ out = method(_args, **_kwargs) │
[rank0]: │ 635 │ │ │ self.on_after_inner_forward(wrapper_module, original_modul │
[rank0]: │ 636 │ │ │ return out │
[rank0]: │ 637 │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/everyvoice/model/feature_prediction/Fast │
[rank0]: │ Speech2_lightning/fs2/model.py:387 in training_step │
[rank0]: │ │
[rank0]: │ 384 │ │ │ return self(batch, inference=True) │
[rank0]: │ 385 │ │
[rank0]: │ 386 │ def training_step(self, batch, batch_idx): │
[rank0]: │ ❱ 387 │ │ output = self(batch) │
[rank0]: │ 388 │ │ losses = self.loss(output, batch, self.current_epoch) │
[rank0]: │ 389 │ │ self.log_dict( │
[rank0]: │ 390 │ │ │ {f"training/{k}_loss": v.item() for k, v in losses.items() │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/torch │
[rank0]: │ /nn/modules/module.py:1751 in _wrapped_call_impl │
[rank0]: │ │
[rank0]: │ 1748 │ │ if self._compiled_call_impl is not None: │
[rank0]: │ 1749 │ │ │ return self._compiled_call_impl(*args, **kwargs) # type: │
[rank0]: │ 1750 │ │ else: │
[rank0]: │ ❱ 1751 │ │ │ return self._call_impl(*args, **kwargs) │
[rank0]: │ 1752 │ │
[rank0]: │ 1753 │ # torchrec tests the code consistency with the following code │
[rank0]: │ 1754 │ # fmt: off │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/torch │
[rank0]: │ /nn/modules/module.py:1762 in _call_impl │
[rank0]: │ │
[rank0]: │ 1759 │ │ if not (self._backward_hooks or self._backward_pre_hooks or s │
[rank0]: │ 1760 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hoo │
[rank0]: │ 1761 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks │
[rank0]: │ ❱ 1762 │ │ │ return forward_call(*args, **kwargs) │
[rank0]: │ 1763 │ │ │
[rank0]: │ 1764 │ │ result = None │
[rank0]: │ 1765 │ │ called_always_called_hooks = set() │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/everyvoice/model/feature_prediction/Fast │
[rank0]: │ Speech2_lightning/fs2/model.py:218 in forward │
[rank0]: │ │
[rank0]: │ 215 │ │ │ x = x + lang_emb.unsqueeze(1) │
[rank0]: │ 216 │ │ │
[rank0]: │ 217 │ │ # VarianceAdaptor out │
[rank0]: │ ❱ 218 │ │ variance_adaptor_out = self.variance_adaptor( │
[rank0]: │ 219 │ │ │ inputs, │
[rank0]: │ 220 │ │ │ x, │
[rank0]: │ 221 │ │ │ batch, │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/torch │
[rank0]: │ /nn/modules/module.py:1751 in _wrapped_call_impl │
[rank0]: │ │
[rank0]: │ 1748 │ │ if self._compiled_call_impl is not None: │
[rank0]: │ 1749 │ │ │ return self._compiled_call_impl(*args, **kwargs) # type: │
[rank0]: │ 1750 │ │ else: │
[rank0]: │ ❱ 1751 │ │ │ return self._call_impl(*args, **kwargs) │
[rank0]: │ 1752 │ │
[rank0]: │ 1753 │ # torchrec tests the code consistency with the following code │
[rank0]: │ 1754 │ # fmt: off │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/.venv/lib/python3.12/site-packages/torch │
[rank0]: │ /nn/modules/module.py:1762 in _call_impl │
[rank0]: │ │
[rank0]: │ 1759 │ │ if not (self._backward_hooks or self._backward_pre_hooks or s │
[rank0]: │ 1760 │ │ │ │ or _global_backward_pre_hooks or _global_backward_hoo │
[rank0]: │ 1761 │ │ │ │ or _global_forward_hooks or _global_forward_pre_hooks │
[rank0]: │ ❱ 1762 │ │ │ return forward_call(*args, **kwargs) │
[rank0]: │ 1763 │ │ │
[rank0]: │ 1764 │ │ result = None │
[rank0]: │ 1765 │ │ called_always_called_hooks = set() │
[rank0]: │ │
[rank0]: │ /gpfs/fs3c/nrc/dt/jol004/EveryVoice/everyvoice/model/feature_prediction/Fast │
[rank0]: │ Speech2_lightning/fs2/variance_adaptor.py:302 in forward │
[rank0]: │ │
[rank0]: │ 299 │ │ │ │ │ │ batch["basename"], [not x for x in equal_dur_t │
[rank0]: │ 300 │ │ │ │ │ ) │
[rank0]: │ 301 │ │ │ │ ) │
[rank0]: │ ❱ 302 │ │ │ │ raise BadDataError( │
[rank0]: │ 303 │ │ │ │ │ f"Something failed with the following items, pleas │
[rank0]: │ 304 │ │ │ │ ) from e │
[rank0]: │ 305 │ │ │ │ sys.exit(1) │
[rank0]: ╰──────────────────────────────────────────────────────────────────────────────╯
[rank0]: BadDataError: Something failed with the following items, please check them for
[rank0]: errors: ['LJ045-0058']
Environment
Current environment
# Please paste the output of `everyvoice --diagnostic` here # EveryVoice Diagnostic information
More info
No response