From bd4dd6b716c3d6544d6da6238cc5f6ca01c3eda8 Mon Sep 17 00:00:00 2001 From: Qingwen Zhang <35365764+Kin-Zhang@users.noreply.github.com> Date: Wed, 9 Jul 2025 10:45:12 +0100 Subject: [PATCH 1/3] hotfix(dataloader): need specific len otherwise it may stuck because of out of index. --- train_recammaster.py | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/train_recammaster.py b/train_recammaster.py index d687f21..a529934 100644 --- a/train_recammaster.py +++ b/train_recammaster.py @@ -48,7 +48,9 @@ def crop_and_resize(self, image): ) return image - + def __len__(self): + return len(self.path) + def load_frames_using_imageio(self, file_path, max_num_frames, start_frame_id, interval, num_frames, frame_process): reader = imageio.get_reader(file_path) if reader.count_frames() < max_num_frames or reader.count_frames() - 1 < start_frame_id + (num_frames - 1) * interval: @@ -115,8 +117,9 @@ def __getitem__(self, data_id): else: data = {"text": text, "video": video, "path": path} break - except: - data_id += 1 + except Exception as e: + print(f"ERROR WHEN LOADING: {e}") + self.__getitem__(data_id+1 if data_id+1 < len(self.path) else 0) return data @@ -202,7 +205,9 @@ def get_relative_pose(self, cam_params): ret_poses = [target_cam_c2w, ] + [abs2rel @ abs_c2w for abs_c2w in abs_c2ws[1:]] ret_poses = np.array(ret_poses, dtype=np.float32) return ret_poses - + + def __len__(self): + return len(self.path) def __getitem__(self, index): # Return: From b9dcc6341c0c9cb658d093a160d1fd53d87d0d41 Mon Sep 17 00:00:00 2001 From: Qingwen Zhang <35365764+Kin-Zhang@users.noreply.github.com> Date: Wed, 9 Jul 2025 17:40:23 +0100 Subject: [PATCH 2/3] fix: tensordataset, should be min between file num itself and step_pre_epoch. --- train_recammaster.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/train_recammaster.py b/train_recammaster.py index a529934..ac60ef7 100644 --- a/train_recammaster.py +++ b/train_recammaster.py @@ -205,9 +205,6 @@ def get_relative_pose(self, cam_params): ret_poses = [target_cam_c2w, ] + [abs2rel @ abs_c2w for abs_c2w in abs_c2ws[1:]] ret_poses = np.array(ret_poses, dtype=np.float32) return ret_poses - - def __len__(self): - return len(self.path) def __getitem__(self, index): # Return: @@ -268,7 +265,7 @@ def __getitem__(self, index): def __len__(self): - return self.steps_per_epoch + return min(len(self.path), self.steps_per_epoch) From c84c05e2c68afd746495a8924f32fa07b1367b59 Mon Sep 17 00:00:00 2001 From: Qingwen Zhang <35365764+Kin-Zhang@users.noreply.github.com> Date: Mon, 14 Jul 2025 11:21:46 +0100 Subject: [PATCH 3/3] docs: Update README.md on batch size. --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index c07aa9b..c81dc92 100644 --- a/README.md +++ b/README.md @@ -136,7 +136,7 @@ Step 3: Training ```shell CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" python train_recammaster.py --task train --dataset_path recam_train_data --output_path ./models/train --dit_path "models/Wan-AI/Wan2.1-T2V-1.3B/diffusion_pytorch_model.safetensors" --steps_per_epoch 8000 --max_epochs 100 --learning_rate 1e-4 --accumulate_grad_batches 1 --use_gradient_checkpointing --dataloader_num_workers 4 ``` -We do not explore the optimal set of hyper-parameters and train with a batch size of 1 on each GPU. You may achieve better model performance by adjusting hyper-parameters such as the learning rate and increasing the batch size. +We do not explore the optimal set of hyper-parameters and train with a batch size of 1 on each GPU. You may achieve better model performance by adjusting hyper-parameters such as the learning rate ~and increasing the batch size~. We only support batch size=1, see more discussion here: [Wan2.1 finetuning script seems to only support bs = 1](https://github.com/modelscope/DiffSynth-Studio/issues/600) Step 4: Test the model