-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathactivation.py
78 lines (57 loc) · 1.99 KB
/
activation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
import numpy as np
import pandas as pd
import os
import cv2
import timm
import albumentations
from albumentations.pytorch.transforms import ToTensorV2
import torch
import torch.nn.functional as F
from torch import nn
from torch.optim import Adam
import math
from configuration import *
#credit : https://github.com/tyunist/memory_efficient_mish_swish/blob/master/mish.py
''' I just wanted to understand and implement custom backward activation in PyTorch so I choose this.
You can also simply use this function below too.
class Mish(nn.Module):
def __init__(self):
super(Mish, self).__init__()
def forward(self, input):
return input * (torch.tanh(F.softplus(input)))
'''
class Mish_func(torch.autograd.Function):
@staticmethod
def forward(ctx, i):
result = i * torch.tanh(F.softplus(i))
ctx.save_for_backward(i)
return result
@staticmethod
def backward(ctx, grad_output):
i = ctx.saved_tensors[0]
v = 1. + i.exp()
h = v.log()
grad_gh = 1./h.cosh().pow_(2)
# Note that grad_hv * grad_vx = sigmoid(x)
#grad_hv = 1./v
#grad_vx = i.exp()
grad_hx = i.sigmoid()
grad_gx = grad_gh * grad_hx #grad_hv * grad_vx
grad_f = torch.tanh(F.softplus(i)) + i * grad_gx
return grad_output * grad_f
class Mish(nn.Module):
def __init__(self, **kwargs):
super().__init__()
print("Mish initialized")
pass
def forward(self, input_tensor):
return Mish_func.apply(input_tensor)
def replace_activations(model, existing_layer, new_layer):
for name, module in reversed(model._modules.items()):
if len(list(module.children())) > 0:
model._modules[name] = replace_activations(module, existing_layer, new_layer)
if type(module) == existing_layer:
layer_old = module
layer_new = new_layer
model._modules[name] = layer_new
return model