Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update interface to allow different learning rate schedulers #155

Merged
merged 3 commits into from
May 1, 2024
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
198 changes: 144 additions & 54 deletions peptdeep/model/model_interface.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,56 +32,125 @@
get_batch_mod_feature
)

# `transformers.optimization.get_cosine_schedule_with_warmup` will import tensorflow,
# resulting in some package version issues.
# Here we copy the code from transformers.optimization
def _get_cosine_schedule_with_warmup_lr_lambda(
current_step: int, *, num_warmup_steps: int, num_training_steps: int, num_cycles: float
):
if current_step < num_warmup_steps:
return float(current_step+1) / float(max(1, num_warmup_steps))
progress = float(current_step - num_warmup_steps) / float(num_training_steps - num_warmup_steps)
return (
max(1e-10, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress)))
)
# if current_step < num_warmup_steps:
# return float(current_step) / float(max(1, num_warmup_steps))
# progress = float(current_step - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps))
# return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress)))


def get_cosine_schedule_with_warmup(
optimizer, num_warmup_steps: int, num_training_steps: int, num_cycles: float = 0.5, last_epoch: int = -1
):
class LR_SchedulerInterface(object):
def __init__(self, optimizer:torch.optim.Optimizer, **kwargs):
raise NotImplementedError

def step(self, epoch:int, loss:float):
"""
This method must be implemented in the sub-class. It will be called to get the learning rate for the next epoch.
While the one we are using here does not need the loss value, this is left in case of using something like the ReduceLROnPlateau scheduler.

Parameters
----------
epoch : int
The current epoch number.
loss : float
The loss value of the current epoch.
"""
raise NotImplementedError

def get_last_lr(self)->float:
"""
Get the last learning rate.

Returns
-------
float
The last learning rate.
"""
raise NotImplementedError

class WarmupLR_Scheduler(LR_SchedulerInterface):
"""
Create a schedule with a learning rate that decreases following the values of the cosine function between the
initial lr set in the optimizer to 0, after a warmup period during which it increases linearly between 0 and the
initial lr set in the optimizer.

Args:
optimizer ([`~torch.optim.Optimizer`]):
The optimizer for which to schedule the learning rate.
num_warmup_steps (`int`):
The number of steps for the warmup phase.
num_training_steps (`int`):
The total number of training steps.
num_cycles (`float`, *optional*, defaults to 0.5):
The number of waves in the cosine schedule (the defaults is to just decrease from the max value to 0
following a half-cosine).
last_epoch (`int`, *optional*, defaults to -1):
The index of the last epoch when resuming training.

Return:
`torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
A learning rate scheduler that includes a warmup phase and then a cosine annealing phase.
"""

lr_lambda = functools.partial(
_get_cosine_schedule_with_warmup_lr_lambda,
num_warmup_steps=num_warmup_steps,
num_training_steps=num_training_steps,
num_cycles=num_cycles,
)
return LambdaLR(optimizer, lr_lambda, last_epoch)
def __init__(self,
optimizer:torch.optim.Optimizer,
num_warmup_steps:int,
num_training_steps:int,
num_cycles:float=0.5,
last_epoch:int=-1
):
self.optimizer = optimizer
self.lambda_lr = self.get_cosine_schedule_with_warmup(
optimizer, num_warmup_steps, num_training_steps, num_cycles, last_epoch
)

def step(self, epoch:int, loss:float):
"""
Get the learning rate for the next epoch.

Parameters
----------
epoch : int
The current epoch number.
loss : float
The loss value of the current epoch.

"""
return self.lambda_lr.step(epoch)

def get_last_lr(self)->float:
"""
Get the last learning rate.

Returns
-------
float
The last learning rate.
"""
return self.lambda_lr.get_last_lr()


# `transformers.optimization.get_cosine_schedule_with_warmup` will import tensorflow,
# resulting in some package version issues.
# Here we copy the code from transformers.optimization
def _get_cosine_schedule_with_warmup_lr_lambda(self,
current_step: int, *, num_warmup_steps: int, num_training_steps: int, num_cycles: float
):
if current_step < num_warmup_steps:
return float(current_step+1) / float(max(1, num_warmup_steps))

progress = float(current_step - num_warmup_steps) / float(max(1, num_training_steps - num_warmup_steps))
return max(1e-10, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress)))



def get_cosine_schedule_with_warmup( self,
optimizer, num_warmup_steps: int, num_training_steps: int, num_cycles: float = 0.5, last_epoch: int = -1
):
"""
Create a schedule with a learning rate that decreases following the values of the cosine function between the
initial lr set in the optimizer to 0, after a warmup period during which it increases linearly between 0 and the
initial lr set in the optimizer.

Args:
optimizer ([`~torch.optim.Optimizer`]):
The optimizer for which to schedule the learning rate.
num_warmup_steps (`int`):
The number of steps for the warmup phase.
num_training_steps (`int`):
The total number of training steps.
num_cycles (`float`, *optional*, defaults to 0.5):
The number of waves in the cosine schedule (the defaults is to just decrease from the max value to 0
following a half-cosine).
last_epoch (`int`, *optional*, defaults to -1):
The index of the last epoch when resuming training.

Return:
`torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
"""

lr_lambda = functools.partial(
self._get_cosine_schedule_with_warmup_lr_lambda,
num_warmup_steps=num_warmup_steps,
num_training_steps=num_training_steps,
num_cycles=num_cycles,
)
return LambdaLR(optimizer, lr_lambda, last_epoch)


def append_nAA_column_if_missing(precursor_df):
"""
Expand Down Expand Up @@ -125,6 +194,7 @@ def __init__(self,
self.set_device(device)
self.fixed_sequence_len = fixed_sequence_len
self.min_pred_value = min_pred_value
self.lr_scheduler_class = WarmupLR_Scheduler

@property
def fixed_sequence_len(self)->int:
Expand Down Expand Up @@ -185,6 +255,24 @@ def target_column_to_train(self)->str:
def target_column_to_train(self, column:str):
self._target_column_to_train = column

def set_lr_scheduler_class(self, lr_scheduler_class:LR_SchedulerInterface) -> None:
"""
Set the learning rate scheduler class. We require the user pass a class that is a subclass of
LR_SchedulerInterface because the current implementation will create an instance of it within this class.
jalew188 marked this conversation as resolved.
Show resolved Hide resolved

Parameters
----------
lr_scheduler_class : LR_SchedulerInterface
The learning rate scheduler class. Since we create an instance of it within this class,
the ModelInterface needs the class to take the arguments `optimizer`, `num_warmup_steps`, `num_training_steps`

"""
if not issubclass(lr_scheduler_class, LR_SchedulerInterface):
raise ValueError(
"The lr_scheduler_class must be a subclass of LR_SchedulerInterface"
)
else:
self.lr_scheduler_class = lr_scheduler_class
def set_device(self,
device_type:str = 'gpu',
device_ids:list = []
Expand Down Expand Up @@ -304,8 +392,8 @@ def train_with_warmup(self,
batch_size, verbose_each_epoch,
**kwargs
)

lr_scheduler.step()
lr_scheduler.step(epoch=epoch, loss=np.mean(batch_cost))
if verbose: print(
f'[Training] Epoch={epoch+1}, lr={lr_scheduler.get_last_lr()[0]}, loss={np.mean(batch_cost)}'
)
Expand Down Expand Up @@ -635,8 +723,8 @@ def _train_one_epoch_by_padding_zeros(self,
else:
batch_cost.append(
self._train_one_batch(targets, features)
)
)

if verbose_each_epoch:
batch_tqdm.set_description(
f'Epoch={epoch+1}, batch={len(batch_cost)}, loss={batch_cost[-1]:.4f}'
Expand Down Expand Up @@ -896,8 +984,10 @@ def set_lr(self, lr:float):
def _get_lr_schedule_with_warmup(self, warmup_epoch, epoch):
if warmup_epoch > epoch:
warmup_epoch = epoch//2
return get_cosine_schedule_with_warmup(
self.optimizer, warmup_epoch, epoch
return self.lr_scheduler_class(
self.optimizer,
num_warmup_steps=warmup_epoch,
num_training_steps=epoch
)

def _pad_zeros_if_fixed_len(self, precursor_df:pd.DataFrame):
Expand Down Expand Up @@ -928,4 +1018,4 @@ def _check_predict_in_order(self, precursor_df:pd.DataFrame):
if is_precursor_sorted(precursor_df):
self._predict_in_order = True
else:
self._predict_in_order = False
self._predict_in_order = False
Loading