diff --git a/nets/classification_net.py b/nets/classification_net.py
index a60bbe2..c20036e 100644
--- a/nets/classification_net.py
+++ b/nets/classification_net.py
@@ -10,7 +10,9 @@ class ClassificationNet(keras.Model):
     def __init__(self, num_class, **kwargs):
         super().__init__(self, **kwargs)
         # classification net
-        self.conv1 = DeformableConvLayer(32, [5, 5], num_deformable_group=1, activation='relu')  # out 24
+        self.conv1 = DeformableConvLayer(32, [5, 5],
+                                         num_deformable_group=1,
+                                         activation='relu')  # out 24
         # self.conv1 = Conv2D(32, [5, 5], activation='relu')
         self.conv2 = Conv2D(32, [5, 5], activation='relu')  # out 20
         self.max_pool1 = MaxPool2D(2, [2, 2])  # out 10
@@ -34,12 +36,15 @@ def call(self, inputs, training=None, mask=None):
     def train(self, optimizer, x, y):
         with tf.GradientTape() as tape:
             logits = self.__call__(x)
-            loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=logits)
+            loss = tf.nn.softmax_cross_entropy_with_logits(labels=y,
+                                                           logits=logits)
             loss = tf.reduce_mean(loss)
         grads = tape.gradient(loss, self.variables)
         optimizer.apply_gradients(zip(grads, self.variables))
         return loss, tf.nn.softmax(logits)
 
     def accuracy(self, prediction, y):
-        eq = tf.to_float(tf.equal(tf.argmax(prediction, axis=-1), tf.argmax(y, axis=-1)))
+        eq = tf.cast(
+            tf.equal(tf.argmax(prediction, axis=-1), tf.argmax(y, axis=-1)),
+            tf.float32)
         return tf.reduce_mean(eq)
diff --git a/nets/deformable_conv_layer.py b/nets/deformable_conv_layer.py
index 06b2128..172180c 100644
--- a/nets/deformable_conv_layer.py
+++ b/nets/deformable_conv_layer.py
@@ -33,23 +33,22 @@ def __init__(self,
         :param num_deformable_group: split output channels into groups, offset shared in each group. If
         this parameter is None, then set  num_deformable_group=filters.
         """
-        super().__init__(
-            filters=filters,
-            kernel_size=kernel_size,
-            strides=strides,
-            padding=padding,
-            data_format=data_format,
-            dilation_rate=dilation_rate,
-            activation=activation,
-            use_bias=use_bias,
-            kernel_initializer=kernel_initializer,
-            bias_initializer=bias_initializer,
-            kernel_regularizer=kernel_regularizer,
-            bias_regularizer=bias_regularizer,
-            activity_regularizer=activity_regularizer,
-            kernel_constraint=kernel_constraint,
-            bias_constraint=bias_constraint,
-            **kwargs)
+        super().__init__(filters=filters,
+                         kernel_size=kernel_size,
+                         strides=strides,
+                         padding=padding,
+                         data_format=data_format,
+                         dilation_rate=dilation_rate,
+                         activation=activation,
+                         use_bias=use_bias,
+                         kernel_initializer=kernel_initializer,
+                         bias_initializer=bias_initializer,
+                         kernel_regularizer=kernel_regularizer,
+                         bias_regularizer=bias_regularizer,
+                         activity_regularizer=activity_regularizer,
+                         kernel_constraint=kernel_constraint,
+                         bias_constraint=bias_constraint,
+                         **kwargs)
         self.kernel = None
         self.bias = None
         self.offset_layer_kernel = None
@@ -57,7 +56,8 @@ def __init__(self,
         if num_deformable_group is None:
             num_deformable_group = filters
         if filters % num_deformable_group != 0:
-            raise ValueError('"filters" mod "num_deformable_group" must be zero')
+            raise ValueError(
+                '"filters" mod "num_deformable_group" must be zero')
         self.num_deformable_group = num_deformable_group
 
     def build(self, input_shape):
@@ -65,36 +65,36 @@ def build(self, input_shape):
         # kernel_shape = self.kernel_size + (input_dim, self.filters)
         # we want to use depth-wise conv
         kernel_shape = self.kernel_size + (self.filters * input_dim, 1)
-        self.kernel = self.add_weight(
-            name='kernel',
-            shape=kernel_shape,
-            initializer=self.kernel_initializer,
-            regularizer=self.kernel_regularizer,
-            constraint=self.kernel_constraint,
-            trainable=True,
-            dtype=self.dtype)
+        self.kernel = self.add_weight(name='kernel',
+                                      shape=kernel_shape,
+                                      initializer=self.kernel_initializer,
+                                      regularizer=self.kernel_regularizer,
+                                      constraint=self.kernel_constraint,
+                                      trainable=True,
+                                      dtype=self.dtype)
         if self.use_bias:
-            self.bias = self.add_weight(
-                name='bias',
-                shape=(self.filters,),
-                initializer=self.bias_initializer,
-                regularizer=self.bias_regularizer,
-                constraint=self.bias_constraint,
-                trainable=True,
-                dtype=self.dtype)
+            self.bias = self.add_weight(name='bias',
+                                        shape=(self.filters, ),
+                                        initializer=self.bias_initializer,
+                                        regularizer=self.bias_regularizer,
+                                        constraint=self.bias_constraint,
+                                        trainable=True,
+                                        dtype=self.dtype)
 
         # create offset conv layer
-        offset_num = self.kernel_size[0] * self.kernel_size[1] * self.num_deformable_group
+        offset_num = self.kernel_size[0] * self.kernel_size[
+            1] * self.num_deformable_group
         self.offset_layer_kernel = self.add_weight(
             name='offset_layer_kernel',
-            shape=self.kernel_size + (input_dim, offset_num * 2),  # 2 means x and y axis
+            shape=self.kernel_size +
+            (input_dim, offset_num * 2),  # 2 means x and y axis
             initializer=tf.zeros_initializer(),
             regularizer=self.kernel_regularizer,
             trainable=True,
             dtype=self.dtype)
         self.offset_layer_bias = self.add_weight(
             name='offset_layer_bias',
-            shape=(offset_num * 2,),
+            shape=(offset_num * 2, ),
             initializer=tf.zeros_initializer(),
             # initializer=tf.random_uniform_initializer(-5, 5),
             regularizer=self.bias_regularizer,
@@ -105,7 +105,7 @@ def build(self, input_shape):
     def call(self, inputs, training=None, **kwargs):
         # get offset, shape [batch_size, out_h, out_w, filter_h, * filter_w * channel_out * 2]
         offset = tf.nn.conv2d(inputs,
-                              filter=self.offset_layer_kernel,
+                              filters=self.offset_layer_kernel,
                               strides=[1, *self.strides, 1],
                               padding=self.padding.upper(),
                               dilations=[1, *self.dilation_rate, 1])
@@ -117,8 +117,10 @@ def call(self, inputs, training=None, **kwargs):
         # some length
         batch_size = int(inputs.get_shape()[0])
         channel_in = int(inputs.get_shape()[-1])
-        in_h, in_w = [int(i) for i in inputs.get_shape()[1: 3]]  # input feature map size
-        out_h, out_w = [int(i) for i in offset.get_shape()[1: 3]]  # output feature map size
+        in_h, in_w = [int(i) for i in inputs.get_shape()[1:3]
+                      ]  # input feature map size
+        out_h, out_w = [int(i) for i in offset.get_shape()[1:3]
+                        ]  # output feature map size
         filter_h, filter_w = self.kernel_size
 
         # get x, y axis offset
@@ -128,9 +130,12 @@ def call(self, inputs, training=None, **kwargs):
         # input feature map gird coordinates
         y, x = self._get_conv_indices([in_h, in_w])
         y, x = [tf.expand_dims(i, axis=-1) for i in [y, x]]
-        y, x = [tf.tile(i, [batch_size, 1, 1, 1, self.num_deformable_group]) for i in [y, x]]
-        y, x = [tf.reshape(i, [*i.shape[0: 3], -1]) for i in [y, x]]
-        y, x = [tf.to_float(i) for i in [y, x]]
+        y, x = [
+            tf.tile(i, [batch_size, 1, 1, 1, self.num_deformable_group])
+            for i in [y, x]
+        ]
+        y, x = [tf.reshape(i, [*i.shape[0:3], -1]) for i in [y, x]]
+        y, x = [tf.cast(i, tf.float32) for i in [y, x]]
 
         # add offset
         y, x = y + y_off, x + x_off
@@ -138,7 +143,7 @@ def call(self, inputs, training=None, **kwargs):
         x = tf.clip_by_value(x, 0, in_w - 1)
 
         # get four coordinates of points around (x, y)
-        y0, x0 = [tf.to_int32(tf.floor(i)) for i in [y, x]]
+        y0, x0 = [tf.cast(tf.floor(i), tf.int32) for i in [y, x]]
         y1, x1 = y0 + 1, x0 + 1
         # clip
         y0, y1 = [tf.clip_by_value(i, 0, in_h - 1) for i in [y0, y1]]
@@ -146,10 +151,13 @@ def call(self, inputs, training=None, **kwargs):
 
         # get pixel values
         indices = [[y0, x0], [y0, x1], [y1, x0], [y1, x1]]
-        p0, p1, p2, p3 = [DeformableConvLayer._get_pixel_values_at_point(inputs, i) for i in indices]
+        p0, p1, p2, p3 = [
+            DeformableConvLayer._get_pixel_values_at_point(inputs, i)
+            for i in indices
+        ]
 
         # cast to float
-        x0, x1, y0, y1 = [tf.to_float(i) for i in [x0, x1, y0, y1]]
+        x0, x1, y0, y1 = [tf.cast(i, tf.float32) for i in [x0, x1, y0, y1]]
         # weights
         w0 = (y1 - y) * (x1 - x)
         w1 = (y1 - y) * (x - x0)
@@ -161,19 +169,28 @@ def call(self, inputs, training=None, **kwargs):
         pixels = tf.add_n([w0 * p0, w1 * p1, w2 * p2, w3 * p3])
 
         # reshape the "big" feature map
-        pixels = tf.reshape(pixels, [batch_size, out_h, out_w, filter_h, filter_w, self.num_deformable_group, channel_in])
+        pixels = tf.reshape(pixels, [
+            batch_size, out_h, out_w, filter_h, filter_w,
+            self.num_deformable_group, channel_in
+        ])
         pixels = tf.transpose(pixels, [0, 1, 3, 2, 4, 5, 6])
-        pixels = tf.reshape(pixels, [batch_size, out_h * filter_h, out_w * filter_w, self.num_deformable_group, channel_in])
+        pixels = tf.reshape(pixels, [
+            batch_size, out_h * filter_h, out_w * filter_w,
+            self.num_deformable_group, channel_in
+        ])
 
         # copy channels to same group
         feat_in_group = self.filters // self.num_deformable_group
         pixels = tf.tile(pixels, [1, 1, 1, 1, feat_in_group])
-        pixels = tf.reshape(pixels, [batch_size, out_h * filter_h, out_w * filter_w, -1])
+        pixels = tf.reshape(
+            pixels, [batch_size, out_h * filter_h, out_w * filter_w, -1])
 
         # depth-wise conv
-        out = tf.nn.depthwise_conv2d(pixels, self.kernel, [1, filter_h, filter_w, 1], 'VALID')
+        out = tf.nn.depthwise_conv2d(pixels, self.kernel,
+                                     [1, filter_h, filter_w, 1], 'VALID')
         # add the output feature maps in the same group
-        out = tf.reshape(out, [batch_size, out_h, out_w, self.filters, channel_in])
+        out = tf.reshape(out,
+                         [batch_size, out_h, out_w, self.filters, channel_in])
         out = tf.reduce_sum(out, axis=-1)
         if self.use_bias:
             out += self.bias
@@ -188,14 +205,17 @@ def _pad_input(self, inputs):
         # When padding is 'same', we should pad the feature map.
         # if padding == 'same', output size should be `ceil(input / stride)`
         if self.padding == 'same':
-            in_shape = inputs.get_shape().as_list()[1: 3]
+            in_shape = inputs.get_shape().as_list()[1:3]
             padding_list = []
             for i in range(2):
                 filter_size = self.kernel_size[i]
                 dilation = self.dilation_rate[i]
-                dilated_filter_size = filter_size + (filter_size - 1) * (dilation - 1)
-                same_output = (in_shape[i] + self.strides[i] - 1) // self.strides[i]
-                valid_output = (in_shape[i] - dilated_filter_size + self.strides[i]) // self.strides[i]
+                dilated_filter_size = filter_size + (filter_size -
+                                                     1) * (dilation - 1)
+                same_output = (in_shape[i] + self.strides[i] -
+                               1) // self.strides[i]
+                valid_output = (in_shape[i] - dilated_filter_size +
+                                self.strides[i]) // self.strides[i]
                 if same_output == valid_output:
                     padding_list += [0, 0]
                 else:
@@ -203,10 +223,12 @@ def _pad_input(self, inputs):
                     p_0 = p // 2
                     padding_list += [p_0, p - p_0]
             if sum(padding_list) != 0:
-                padding = [[0, 0],
-                           [padding_list[0], padding_list[1]],  # top, bottom padding
-                           [padding_list[2], padding_list[3]],  # left, right padding
-                           [0, 0]]
+                padding = [
+                    [0, 0],
+                    [padding_list[0], padding_list[1]],  # top, bottom padding
+                    [padding_list[2], padding_list[3]],  # left, right padding
+                    [0, 0]
+                ]
                 inputs = tf.pad(inputs, padding)
         return inputs
 
@@ -216,16 +238,17 @@ def _get_conv_indices(self, feature_map_size):
         :param feature_map_size:
         :return: y, x with shape [1, out_h, out_w, filter_h * filter_w]
         """
-        feat_h, feat_w = [int(i) for i in feature_map_size[0: 2]]
+        feat_h, feat_w = [int(i) for i in feature_map_size[0:2]]
 
         x, y = tf.meshgrid(tf.range(feat_w), tf.range(feat_h))
-        x, y = [tf.reshape(i, [1, *i.get_shape(), 1]) for i in [x, y]]  # shape [1, h, w, 1]
-        x, y = [tf.image.extract_image_patches(i,
-                                               [1, *self.kernel_size, 1],
-                                               [1, *self.strides, 1],
-                                               [1, *self.dilation_rate, 1],
-                                               'VALID')
-                for i in [x, y]]  # shape [1, out_h, out_w, filter_h * filter_w]
+        x, y = [tf.reshape(i, [1, *i.get_shape(), 1])
+                for i in [x, y]]  # shape [1, h, w, 1]
+        x, y = [
+            tf.image.extract_patches(i, [1, *self.kernel_size, 1],
+                                     [1, *self.strides, 1],
+                                     [1, *self.dilation_rate, 1], 'VALID')
+            for i in [x, y]
+        ]  # shape [1, out_h, out_w, filter_h * filter_w]
         return y, x
 
     @staticmethod
@@ -237,10 +260,9 @@ def _get_pixel_values_at_point(inputs, indices):
         :return:
         """
         y, x = indices
-        batch, h, w, n = y.get_shape().as_list()[0: 4]
+        batch, h, w, n = y.get_shape().as_list()[0:4]
 
         batch_idx = tf.reshape(tf.range(0, batch), (batch, 1, 1, 1))
         b = tf.tile(batch_idx, (1, h, w, n))
         pixel_idx = tf.stack([b, y, x], axis=-1)
         return tf.gather_nd(inputs, pixel_idx)
-
diff --git a/train.py b/train.py
index d920954..904d420 100644
--- a/train.py
+++ b/train.py
@@ -4,71 +4,84 @@
 import tensorflow as tf
 from nets.classification_net import ClassificationNet
 
-conf = tf.ConfigProto()
-conf.gpu_options.allow_growth = True
-tf.enable_eager_execution(conf)
+# conf = tf.config.experimental.ConfigProto()
+# conf.gpu_options.allow_growth = True
+# tf.enable_eager_execution(conf)
 
 SEED = 1234
-tf.set_random_seed(SEED)
-
+tf.random.set_seed(SEED)
 
 NUM_CLASS = 10
 IMG_SHAPE = [28, 28]
-(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data('/data/ajy/datasets/MNIST/mnist.npz')
+(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data()
 # scale to (-1, 1), shape is (28, 28, 1)
-x_train, x_test = [(np.expand_dims(i / 127.5 - 1, axis=-1)).astype(np.float32) for i in [x_train, x_test]]
-y_train, y_test = tf.one_hot(y_train, depth=NUM_CLASS), tf.one_hot(y_test, depth=NUM_CLASS)
+x_train, x_test = [(np.expand_dims(i / 127.5 - 1, axis=-1)).astype(np.float32)
+                   for i in [x_train, x_test]]
+y_train, y_test = tf.one_hot(y_train,
+                             depth=NUM_CLASS), tf.one_hot(y_test,
+                                                          depth=NUM_CLASS)
 
 
 def get_dataset(batch_size, x, y, map_fn, repeat=False):
     dataset = tf.data.Dataset.from_tensor_slices((x, y))
     if repeat:
         dataset = dataset.repeat()
-    dataset = dataset.shuffle(batch_size * 10).map(map_fn, num_parallel_calls=2).batch(batch_size).prefetch(1)
+    dataset = dataset.shuffle(batch_size * 10).map(
+        map_fn, num_parallel_calls=2).batch(batch_size).prefetch(1)
     return dataset
 
 
 def distorted_image_fn(image, label):
+    return image, label
     # random rotate
     # 80% ->(-30°, 30°), 20%->(-90°,-30°)&(30°,90°)
-    tf.set_random_seed(SEED)
-    small_angle = tf.cast(tf.random_uniform([1], maxval=1.) <= 0.8, tf.int32)
-    angle = tf.random_uniform([1], minval=0, maxval=30, dtype=tf.int32) * small_angle + \
-            tf.random_uniform([1], minval=30, maxval=90, dtype=tf.int32) * (1 - small_angle)
-    negative = -1 + 2 * tf.random_uniform([1], minval=0, maxval=2, dtype=tf.int32)
-    angle = tf.to_float(negative * angle)
-    rotated_image = tf.contrib.image.rotate(image, angle * 3.1415926 / 180)
+    tf.random.set_seed(SEED)
+    small_angle = tf.cast(tf.random.uniform([1], maxval=1.) <= 0.8, tf.int32)
+    angle = tf.random.uniform([1], minval=0, maxval=30, dtype=tf.int32) * small_angle + \
+            tf.random.uniform([1], minval=30, maxval=90, dtype=tf.int32) * (1 - small_angle)
+    negative = -1 + 2 * tf.random.uniform(
+        [1], minval=0, maxval=2, dtype=tf.int32)
+    angle = tf.cast(negative * angle, tf.float32)
+    # rotated_image = tf.contrib.image.rotate(image, angle * 3.1415926 / 180)
     return rotated_image, label
 
 
 def distorted_image_test_fn(image, label):
+    return image, label
     # random rotate
     # (-135°, 135°)
-    tf.set_random_seed(SEED)
-    angle = tf.random_uniform([1], minval=0, maxval=135, dtype=tf.int32)
-    negative = -1 + 2 * tf.random_uniform([1], minval=0, maxval=2, dtype=tf.int32)
-    angle = tf.to_float(negative * angle)
-    rotated_image = tf.contrib.image.rotate(image, angle * 3.1415926 / 180)
+    tf.random.set_seed(SEED)
+    angle = tf.random.uniform([1], minval=0, maxval=135, dtype=tf.int32)
+    negative = -1 + 2 * tf.random.uniform(
+        [1], minval=0, maxval=2, dtype=tf.int32)
+    angle = tf.cast(negative * angle, tf.float32)
+    # rotated_image = tf.contrib.image.rotate(image, angle * 3.1415926 / 180)
     return rotated_image, label
 
 
 def main():
     batch_size = 16
 
-    dataset = get_dataset(batch_size, x_train, y_train, distorted_image_fn, repeat=True)
+    dataset = get_dataset(batch_size,
+                          x_train,
+                          y_train,
+                          distorted_image_fn,
+                          repeat=True)
     model = ClassificationNet(num_class=NUM_CLASS)
-    optimizer = tf.train.AdamOptimizer(learning_rate=0.001)
-    global_step = tf.train.get_or_create_global_step()
+    optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
+    global_step = tf.compat.v1.train.get_or_create_global_step()
 
     for i, (rotated_image, label) in enumerate(dataset, start=1):
         global_step.assign_add(1)
-        loss, prediction= model.train(optimizer, rotated_image, label)
+        loss, prediction = model.train(optimizer, rotated_image, label)
         acc = model.accuracy(prediction, label)
 
         # test
         if i % 1000 == 0:
             total_acc = 0
-            dataset_test = get_dataset(1000, x_test, y_test, distorted_image_test_fn).make_one_shot_iterator()
+            dataset_test = get_dataset(
+                1000, x_test, y_test,
+                distorted_image_test_fn).make_one_shot_iterator()
             split = 10000 // 1000
             for _ in range(split):
                 rotated_image_test, label_test = dataset_test.get_next()
@@ -79,9 +92,9 @@ def main():
             print('test accuracy: {}'.format(total_acc / split))
 
         if i % 10 == 0:
-            print("step: {}, loss: {}, train accuracy: {}".format(int(global_step), float(loss), float(acc)))
+            print("step: {}, loss: {}, train accuracy: {}".format(
+                int(global_step), float(loss), float(acc)))
 
 
 if __name__ == '__main__':
     main()
-