From bd4dd6b716c3d6544d6da6238cc5f6ca01c3eda8 Mon Sep 17 00:00:00 2001
From: Qingwen Zhang <35365764+Kin-Zhang@users.noreply.github.com>
Date: Wed, 9 Jul 2025 10:45:12 +0100
Subject: [PATCH 1/3] hotfix(dataloader): need specific len otherwise it may
 stuck because of out of index.

---
 train_recammaster.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/train_recammaster.py b/train_recammaster.py
index d687f21..a529934 100644
--- a/train_recammaster.py
+++ b/train_recammaster.py
@@ -48,7 +48,9 @@ def crop_and_resize(self, image):
         )
         return image
 
-
+    def __len__(self):
+        return len(self.path)
+        
     def load_frames_using_imageio(self, file_path, max_num_frames, start_frame_id, interval, num_frames, frame_process):
         reader = imageio.get_reader(file_path)
         if reader.count_frames() < max_num_frames or reader.count_frames() - 1 < start_frame_id + (num_frames - 1) * interval:
@@ -115,8 +117,9 @@ def __getitem__(self, data_id):
                 else:
                     data = {"text": text, "video": video, "path": path}
                 break
-            except:
-                data_id += 1
+            except Exception as e:
+                print(f"ERROR WHEN LOADING: {e}")
+                self.__getitem__(data_id+1 if data_id+1 < len(self.path) else 0)
         return data
     
 
@@ -202,7 +205,9 @@ def get_relative_pose(self, cam_params):
         ret_poses = [target_cam_c2w, ] + [abs2rel @ abs_c2w for abs_c2w in abs_c2ws[1:]]
         ret_poses = np.array(ret_poses, dtype=np.float32)
         return ret_poses
-
+        
+    def __len__(self):
+        return len(self.path)
 
     def __getitem__(self, index):
         # Return: 

From b9dcc6341c0c9cb658d093a160d1fd53d87d0d41 Mon Sep 17 00:00:00 2001
From: Qingwen Zhang <35365764+Kin-Zhang@users.noreply.github.com>
Date: Wed, 9 Jul 2025 17:40:23 +0100
Subject: [PATCH 2/3] fix: tensordataset, should be min between file num itself
 and step_pre_epoch.

---
 train_recammaster.py | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/train_recammaster.py b/train_recammaster.py
index a529934..ac60ef7 100644
--- a/train_recammaster.py
+++ b/train_recammaster.py
@@ -205,9 +205,6 @@ def get_relative_pose(self, cam_params):
         ret_poses = [target_cam_c2w, ] + [abs2rel @ abs_c2w for abs_c2w in abs_c2ws[1:]]
         ret_poses = np.array(ret_poses, dtype=np.float32)
         return ret_poses
-        
-    def __len__(self):
-        return len(self.path)
 
     def __getitem__(self, index):
         # Return: 
@@ -268,7 +265,7 @@ def __getitem__(self, index):
     
 
     def __len__(self):
-        return self.steps_per_epoch
+        return min(len(self.path), self.steps_per_epoch)
 
 
 

From c84c05e2c68afd746495a8924f32fa07b1367b59 Mon Sep 17 00:00:00 2001
From: Qingwen Zhang <35365764+Kin-Zhang@users.noreply.github.com>
Date: Mon, 14 Jul 2025 11:21:46 +0100
Subject: [PATCH 3/3] docs: Update README.md on batch size.

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c07aa9b..c81dc92 100644
--- a/README.md
+++ b/README.md
@@ -136,7 +136,7 @@ Step 3: Training
 ```shell
 CUDA_VISIBLE_DEVICES="0,1,2,3,4,5,6,7" python train_recammaster.py   --task train  --dataset_path recam_train_data   --output_path ./models/train   --dit_path "models/Wan-AI/Wan2.1-T2V-1.3B/diffusion_pytorch_model.safetensors"   --steps_per_epoch 8000   --max_epochs 100   --learning_rate 1e-4   --accumulate_grad_batches 1   --use_gradient_checkpointing  --dataloader_num_workers 4
 ```
-We do not explore the optimal set of hyper-parameters and train with a batch size of 1 on each GPU. You may achieve better model performance by adjusting hyper-parameters such as the learning rate and increasing the batch size.
+We do not explore the optimal set of hyper-parameters and train with a batch size of 1 on each GPU. You may achieve better model performance by adjusting hyper-parameters such as the learning rate ~and increasing the batch size~. We only support batch size=1, see more discussion here: [Wan2.1 finetuning script seems to only support bs = 1](https://github.com/modelscope/DiffSynth-Studio/issues/600)
 
 Step 4: Test the model