forked from Pay20Y/GCAN
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathattention_loss.py
183 lines (154 loc) · 7.17 KB
/
attention_loss.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
import tensorflow as tf
def smooth_L1(inputs, targets):
inside = tf.subtract(inputs, targets)
smooth_l1_sign = tf.cast(tf.less(tf.abs(inside), 1.0), tf.float32)
smooth_l1_option1 = tf.multiply(tf.multiply(inside, inside), 0.5)
smooth_l1_option2 = tf.subtract(tf.abs(inside), 0.5)
smooth_l1_result = tf.add(tf.multiply(smooth_l1_option1, smooth_l1_sign),
tf.multiply(smooth_l1_option2, tf.abs(tf.subtract(smooth_l1_sign, 1.0))))
return smooth_l1_result
def L2_loss(inputs, targets, reducation=True):
with tf.name_scope("l2_regress_loss"):
inside = tf.pow(tf.subtract(inputs, targets), 2) # N * T * 4
if reducation:
l2_loss = 0.5 * tf.reduce_sum(inside, axis=-1) # N * T
else:
l2_loss = 0.5 * inside
return l2_loss
def params_regress_loss(pred_params, gt_params, img_size=None, char_size=None, type='l1'):
"""
:param pred_params: N * T * 4
:param gt_params: N * T * 4
:param mask: N * T
:param char_size: N * T * 2
:param img_size: [H, W]
:return:
"""
with tf.name_scope("smooth_l1_regress_loss"):
# mask = tf.cast(mask, tf.float32)
N, T, C = pred_params.shape.as_list()
if img_size is not None:
img_size_ = tf.concat([img_size, 0.25 * tf.pow(img_size, 2)], axis=0) # 4: [W, H, (0.5*W)^2, (0.5*H)^2]
img_size_ = tf.cast(img_size_, tf.float32)
img_size_ = tf.expand_dims(tf.expand_dims(img_size_, axis=0), axis=0)
img_size_ = tf.tile(img_size_, [N, T, 1]) # N * T * 4: [W, H, (0.5*W)^2, (0.5*H)^2]
pred_params = pred_params / img_size_
gt_params = gt_params / img_size_
if char_size is not None:
# Normalize
char_size_ = tf.concat([0.5 * char_size, 0.25 * tf.pow(char_size, 2)], axis=2) # N * T * 4: [0.5*W, 0.5*H, (0.5*W)^2, (0.5*H)^2]
pred_params = pred_params / char_size_
gt_params = gt_params / char_size_
if type == 'l1':
l1_loss = smooth_L1(tf.reshape(pred_params, [-1, C]), tf.reshape(gt_params, [-1, C]))
l1_loss = tf.reduce_sum(l1_loss, axis=1)
loss = tf.reshape(l1_loss, shape=[N, T]) # N * T
elif type == 'l2':
loss = L2_loss(pred_params, gt_params) # N * T
return loss
def attention_regress_loss(x, y, type="l1"):
"""
regress attention value with gt
:param x: N * T * (H * W)
:param y: N * T * (H * W)
:return:
"""
N, T, C = x.shape.as_list()
x = tf.reshape(x, [-1, C])
y = tf.reshape(y, [-1, C])
if type == 'l1':
l1_loss = smooth_L1(x, y)
l1_loss = tf.reduce_sum(l1_loss, axis=1)
loss = tf.reshape(l1_loss, shape=[N, T]) # N * T
else:
print("Not implement!")
loss = tf.zeros(dtype=tf.float32, shape=[N, T])
return loss
def CrossEntropyLoss(x , y, axis=-1, reduction=True):
with tf.name_scope("cross_entropy_loss"):
# x = x + 1e-5
# ce = -1. * tf.multiply(y, tf.log(x))
# if reduction == True:
# ce = tf.reduce_sum(ce, axis=axis)
# Here we use official implementation please use logits before softmax
print("Official implementation of cross-entropy be careful of softmax")
ce = tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=x, dim=-1)
return ce
def KLDivLoss(x, y, axis=-1, reduction=True):
with tf.name_scope("kl_div_loss"):
D = x.shape.as_list()[axis]
x = tf.add(x, 1e-5) # avoid nan or inf
y = tf.add(y, 1e-5)
kl = tf.multiply(y, tf.log(y) - tf.log(x))
# kl = tf.where(tf.is_inf(kl), tf.zeros_like(kl), kl)
# kl = tf.where(tf.is_nan(kl), tf.zeros_like(kl), kl)
if reduction == True:
kl = tf.reduce_sum(kl, axis=axis)
return kl
def KLDivLossContirb(x, y):
def kl_double_loss(x, y):
with tf.name_scope("kl_loss"):
x = x + 1e-5
y = y + 1e-5
x = tf.distributions.Categorical(probs=x)
y = tf.distributions.Categorical(probs=y)
return 0.5 * (tf.distributions.kl_divergence(x, y) + tf.distributions.kl_divergence(y, x))
return kl_double_loss(x, y)
def gaussian_kl(p, q, reduction=True):
"""Computes KL divergence between two isotropic Gaussian distributions.
To ensure numerical stability, this op uses mu, log(sigma^2) to represent
the distribution. If q is not provided, it's assumed to be unit Gaussian.
Args:
q: A tuple (mu, log(sigma^2)) representing a multi-variatie Gaussian. target
q: N * T * 2 target
p: A tuple (mu, log(sigma^2)) representing a multi-variatie Gaussian. source
p: N * T * 2 source
Returns:
A tensor representing KL(q, p).
"""
# mu1, log_sigma1_sq = q
# mu2, log_sigma2_sq = p
mu2, sigma_2 = tf.unstack(p, axis=2)
mu1, sigma_1 = tf.unstack(q, axis=2)
# log_sigma2_sq = tf.log(sigma_2) # N * T
# log_sigma1_sq = tf.log(sigma_1) # N * T
if reduction:
return tf.reduce_sum(0.5 * (tf.log(sigma_2) - tf.log(sigma_1) + sigma_1 / sigma_2 + tf.square(mu1 - mu2) / sigma_2 - 1), axis=-1) # N
else:
return 0.5 * (tf.log(sigma_2) - tf.log(sigma_1) + sigma_1 / sigma_2 + tf.square(mu1 - mu2) / sigma_2 - 1) # N * T
def two_d_gaussian_kl_div_loss(pred_params, gt_params):
with tf.name_scope("gauss_kl_div_loss"):
p_mu_x, p_mu_y, p_sigma_x, p_sigma_y = tf.unstack(pred_params, axis=2)
q_mu_x, q_mu_y, q_sigma_x, q_sigma_y = tf.unstack(gt_params, axis=2)
p_x = tf.stack([p_mu_x, p_sigma_x], axis=2)
q_x = tf.stack([q_mu_x, q_sigma_x], axis=2)
p_y = tf.stack([p_mu_y, p_sigma_y], axis=2)
q_y = tf.stack([q_mu_y, q_sigma_y], axis=2)
x_guass_kl = gaussian_kl(p_x, q_x, False)
y_guass_kl = gaussian_kl(p_y, q_y, False)
return 0.5 * (x_guass_kl + y_guass_kl)
if __name__ == '__main__':
import numpy as np
import os
os.environ['CUDA_VISIBLE_DEVICES'] = '6'
# demo = np.random.randn(4, 30, 240)
# demo = tf.constant(dtype=tf.float32, value=demo)
# label = np.random.randn(4, 30, 240)
# label = tf.constant(dtype=tf.float32, value=label)
#
# loss1 = tf.nn.softmax_cross_entropy_with_logits_v2(labels=label, logits=demo, dim=-1)
# loss2 = CrossEntropyLoss(tf.nn.softmax(demo, axis=-1), label)
#
# with tf.Session() as sess:
# print("Official:", loss1.eval(session=sess))
# print("Mine:", loss2.eval(session=sess))
# demo = np.random.randn(4, 25, 4)
# demo = tf.constant(dtype=tf.float32, value=demo)
# label = np.random.randn(4, 25, 4)
# label = tf.constant(dtype=tf.float32, value=label)
demo = tf.concat([tf.zeros(dtype=tf.float32, shape=[4, 25, 2]), tf.ones(dtype=tf.float32, shape=[4, 25, 2])], axis=-1)
label = tf.concat([tf.zeros(dtype=tf.float32, shape=[4, 25, 2]), tf.zeros(dtype=tf.float32, shape=[4, 25, 2])], axis=-1)
loss = two_d_gaussian_kl_div_loss(demo, label)
with tf.Session() as sess:
print(loss.eval())
print("pause")