From 53b06ff7f290a3c4d3a97fc277cf1e5fcce4caea Mon Sep 17 00:00:00 2001
From: Mohamed Sameh <mohamedsameh2210@gmail.com>
Date: Fri, 26 Apr 2024 11:19:18 +0200
Subject: [PATCH 1/2] feat: allow changing the LR_scheduler

---
 peptdeep/model/model_interface.py | 198 ++++++++++++++++++++++--------
 1 file changed, 144 insertions(+), 54 deletions(-)

diff --git a/peptdeep/model/model_interface.py b/peptdeep/model/model_interface.py
index 2aa8ef5d..3b048b0f 100644
--- a/peptdeep/model/model_interface.py
+++ b/peptdeep/model/model_interface.py
@@ -32,56 +32,125 @@
     get_batch_mod_feature
 )
 
-# `transformers.optimization.get_cosine_schedule_with_warmup` will import tensorflow,
-# resulting in some package version issues.
-# Here we copy the code from transformers.optimization
-def _get_cosine_schedule_with_warmup_lr_lambda(
-    current_step: int, *, num_warmup_steps: int, num_training_steps: int, num_cycles: float
-):
-    if current_step < num_warmup_steps:
-        return float(current_step+1) / float(max(1, num_warmup_steps))
-    progress = float(current_step - num_warmup_steps) / float(num_training_steps - num_warmup_steps)
-    return (
-        max(1e-10, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress)))
-    )
-    # if current_step < num_warmup_steps:
-    #     return float(current_step) / float(max(1, num_warmup_steps))
-    # progress = float(current_step - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps))
-    # return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress)))
-
-
-def get_cosine_schedule_with_warmup(
-    optimizer, num_warmup_steps: int, num_training_steps: int, num_cycles: float = 0.5, last_epoch: int = -1
-):
+class LR_SchedulerInterface(object):
+    def __init__(self, optimizer:torch.optim.Optimizer, **kwargs):
+        raise NotImplementedError
+
+    def step(self, epoch:int, loss:float):
+        """
+        This method must be implemented in the sub-class. It will be called to get the learning rate for the next epoch. 
+        While the one we are using here does not need the loss value, this is left incase of using something like the ReduceLROnPlateau scheduler.
+
+        Parameters
+        ----------
+        epoch : int
+            The current epoch number.
+        loss : float
+            The loss value of the current epoch.
+        """
+        raise NotImplementedError
+    
+    def get_last_lr(self)->float:
+        """
+        Get the last learning rate.
+
+        Returns
+        -------
+        float
+            The last learning rate.
+        """
+        raise NotImplementedError
+    
+class WarmupLR_Scheduler(LR_SchedulerInterface):
     """
-    Create a schedule with a learning rate that decreases following the values of the cosine function between the
-    initial lr set in the optimizer to 0, after a warmup period during which it increases linearly between 0 and the
-    initial lr set in the optimizer.
-
-    Args:
-        optimizer ([`~torch.optim.Optimizer`]):
-            The optimizer for which to schedule the learning rate.
-        num_warmup_steps (`int`):
-            The number of steps for the warmup phase.
-        num_training_steps (`int`):
-            The total number of training steps.
-        num_cycles (`float`, *optional*, defaults to 0.5):
-            The number of waves in the cosine schedule (the defaults is to just decrease from the max value to 0
-            following a half-cosine).
-        last_epoch (`int`, *optional*, defaults to -1):
-            The index of the last epoch when resuming training.
-
-    Return:
-        `torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
+    A learning rate scheduler that includes a warmup phase and then a cosine annealing phase. 
     """
 
-    lr_lambda = functools.partial(
-        _get_cosine_schedule_with_warmup_lr_lambda,
-        num_warmup_steps=num_warmup_steps,
-        num_training_steps=num_training_steps,
-        num_cycles=num_cycles,
-    )
-    return LambdaLR(optimizer, lr_lambda, last_epoch)
+    def __init__(self,
+        optimizer:torch.optim.Optimizer,
+        num_warmup_steps:int,
+        num_training_steps:int,
+        num_cycles:float=0.5,
+        last_epoch:int=-1
+    ):
+        self.optimizer = optimizer
+        self.lambda_lr = self.get_cosine_schedule_with_warmup(
+            optimizer, num_warmup_steps, num_training_steps, num_cycles, last_epoch
+        )
+
+    def step(self, epoch:int, loss:float):
+        """
+        Get the learning rate for the next epoch.
+
+        Parameters
+        ----------
+        epoch : int
+            The current epoch number.
+        loss : float
+            The loss value of the current epoch.
+
+        """
+        return self.lambda_lr.step(epoch)
+    
+    def get_last_lr(self)->float:
+        """
+        Get the last learning rate.
+
+        Returns
+        -------
+        float
+            The last learning rate.
+        """
+        return self.lambda_lr.get_last_lr()
+
+
+    # `transformers.optimization.get_cosine_schedule_with_warmup` will import tensorflow,
+    # resulting in some package version issues.
+    # Here we copy the code from transformers.optimization
+    def _get_cosine_schedule_with_warmup_lr_lambda(self,
+        current_step: int, *, num_warmup_steps: int, num_training_steps: int, num_cycles: float
+    ):
+        if current_step < num_warmup_steps:
+            return float(current_step+1) / float(max(1, num_warmup_steps))
+        
+        progress = float(current_step - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps))
+        return max(1e-10, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress)))
+
+
+
+    def get_cosine_schedule_with_warmup( self,
+        optimizer, num_warmup_steps: int, num_training_steps: int, num_cycles: float = 0.5, last_epoch: int = -1
+    ):
+        """
+        Create a schedule with a learning rate that decreases following the values of the cosine function between the
+        initial lr set in the optimizer to 0, after a warmup period during which it increases linearly between 0 and the
+        initial lr set in the optimizer.
+
+        Args:
+            optimizer ([`~torch.optim.Optimizer`]):
+                The optimizer for which to schedule the learning rate.
+            num_warmup_steps (`int`):
+                The number of steps for the warmup phase.
+            num_training_steps (`int`):
+                The total number of training steps.
+            num_cycles (`float`, *optional*, defaults to 0.5):
+                The number of waves in the cosine schedule (the defaults is to just decrease from the max value to 0
+                following a half-cosine).
+            last_epoch (`int`, *optional*, defaults to -1):
+                The index of the last epoch when resuming training.
+
+        Return:
+            `torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
+        """
+
+        lr_lambda = functools.partial(
+            self._get_cosine_schedule_with_warmup_lr_lambda,
+            num_warmup_steps=num_warmup_steps,
+            num_training_steps=num_training_steps,
+            num_cycles=num_cycles,
+        )
+        return LambdaLR(optimizer, lr_lambda, last_epoch)
+
 
 def append_nAA_column_if_missing(precursor_df):
     """
@@ -125,6 +194,7 @@ def __init__(self,
         self.set_device(device)
         self.fixed_sequence_len = fixed_sequence_len
         self.min_pred_value = min_pred_value
+        self.lr_scheduler_class = WarmupLR_Scheduler
 
     @property
     def fixed_sequence_len(self)->int:
@@ -185,6 +255,24 @@ def target_column_to_train(self)->str:
     def target_column_to_train(self, column:str):
         self._target_column_to_train = column
 
+    def set_lr_scheduler_class(self, lr_scheduler_class:LR_SchedulerInterface) -> None:
+        """
+        Set the learning rate scheduler class. We require the user pass a class that is a subclass of 
+        LR_SchedulerInterface because the current implementation will create an instance of it within this class.
+
+        Parameters
+        ----------
+        lr_scheduler_class : LR_SchedulerInterface
+            The learning rate scheduler class. Since we create an instance of it within this class, 
+            the ModelInterface needs the class to take the arguments `optimizer`, `num_warmup_steps`, `num_training_steps`
+
+        """
+        if not issubclass(lr_scheduler_class, LR_SchedulerInterface):
+            raise ValueError(
+                "The lr_scheduler_class must be a subclass of LR_SchedulerInterface"
+            )
+        else:
+            self.lr_scheduler_class = lr_scheduler_class
     def set_device(self, 
         device_type:str = 'gpu', 
         device_ids:list = []
@@ -304,8 +392,8 @@ def train_with_warmup(self,
                     batch_size, verbose_each_epoch,
                     **kwargs
                 )
-
-            lr_scheduler.step()
+    
+            lr_scheduler.step(epoch=epoch, loss=np.mean(batch_cost))
             if verbose: print(
                 f'[Training] Epoch={epoch+1}, lr={lr_scheduler.get_last_lr()[0]}, loss={np.mean(batch_cost)}'
             )
@@ -635,8 +723,8 @@ def _train_one_epoch_by_padding_zeros(self,
             else:
                 batch_cost.append(
                     self._train_one_batch(targets, features)
-                )
-                
+                ) 
+
         if verbose_each_epoch:
             batch_tqdm.set_description(
                 f'Epoch={epoch+1}, batch={len(batch_cost)}, loss={batch_cost[-1]:.4f}'
@@ -896,8 +984,10 @@ def set_lr(self, lr:float):
     def _get_lr_schedule_with_warmup(self, warmup_epoch, epoch):
         if warmup_epoch > epoch:
             warmup_epoch = epoch//2
-        return get_cosine_schedule_with_warmup(
-            self.optimizer, warmup_epoch, epoch
+        return self.lr_scheduler_class(
+            self.optimizer, 
+            num_warmup_steps=warmup_epoch, 
+            num_training_steps=epoch
         )
 
     def _pad_zeros_if_fixed_len(self, precursor_df:pd.DataFrame):
@@ -928,4 +1018,4 @@ def _check_predict_in_order(self, precursor_df:pd.DataFrame):
         if is_precursor_sorted(precursor_df):
             self._predict_in_order = True
         else:
-            self._predict_in_order = False
+            self._predict_in_order = False
\ No newline at end of file

From 333f8dead15b160ee93f37826f7b292f6182a045 Mon Sep 17 00:00:00 2001
From: Mohamed Sameh <mohamedsameh2210@gmail.com>
Date: Sat, 27 Apr 2024 13:45:25 +0200
Subject: [PATCH 2/2] fix: typo

---
 peptdeep/model/model_interface.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/peptdeep/model/model_interface.py b/peptdeep/model/model_interface.py
index 3b048b0f..7efc6332 100644
--- a/peptdeep/model/model_interface.py
+++ b/peptdeep/model/model_interface.py
@@ -39,7 +39,7 @@ def __init__(self, optimizer:torch.optim.Optimizer, **kwargs):
     def step(self, epoch:int, loss:float):
         """
         This method must be implemented in the sub-class. It will be called to get the learning rate for the next epoch. 
-        While the one we are using here does not need the loss value, this is left incase of using something like the ReduceLROnPlateau scheduler.
+        While the one we are using here does not need the loss value, this is left in case of using something like the ReduceLROnPlateau scheduler.
 
         Parameters
         ----------