diff --git a/nets/classification_net.py b/nets/classification_net.py index a60bbe2..c20036e 100644 --- a/nets/classification_net.py +++ b/nets/classification_net.py @@ -10,7 +10,9 @@ class ClassificationNet(keras.Model): def __init__(self, num_class, **kwargs): super().__init__(self, **kwargs) # classification net - self.conv1 = DeformableConvLayer(32, [5, 5], num_deformable_group=1, activation='relu') # out 24 + self.conv1 = DeformableConvLayer(32, [5, 5], + num_deformable_group=1, + activation='relu') # out 24 # self.conv1 = Conv2D(32, [5, 5], activation='relu') self.conv2 = Conv2D(32, [5, 5], activation='relu') # out 20 self.max_pool1 = MaxPool2D(2, [2, 2]) # out 10 @@ -34,12 +36,15 @@ def call(self, inputs, training=None, mask=None): def train(self, optimizer, x, y): with tf.GradientTape() as tape: logits = self.__call__(x) - loss = tf.nn.softmax_cross_entropy_with_logits_v2(labels=y, logits=logits) + loss = tf.nn.softmax_cross_entropy_with_logits(labels=y, + logits=logits) loss = tf.reduce_mean(loss) grads = tape.gradient(loss, self.variables) optimizer.apply_gradients(zip(grads, self.variables)) return loss, tf.nn.softmax(logits) def accuracy(self, prediction, y): - eq = tf.to_float(tf.equal(tf.argmax(prediction, axis=-1), tf.argmax(y, axis=-1))) + eq = tf.cast( + tf.equal(tf.argmax(prediction, axis=-1), tf.argmax(y, axis=-1)), + tf.float32) return tf.reduce_mean(eq) diff --git a/nets/deformable_conv_layer.py b/nets/deformable_conv_layer.py index 06b2128..172180c 100644 --- a/nets/deformable_conv_layer.py +++ b/nets/deformable_conv_layer.py @@ -33,23 +33,22 @@ def __init__(self, :param num_deformable_group: split output channels into groups, offset shared in each group. If this parameter is None, then set num_deformable_group=filters. """ - super().__init__( - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - kernel_constraint=kernel_constraint, - bias_constraint=bias_constraint, - **kwargs) + super().__init__(filters=filters, + kernel_size=kernel_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + activation=activation, + use_bias=use_bias, + kernel_initializer=kernel_initializer, + bias_initializer=bias_initializer, + kernel_regularizer=kernel_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + kernel_constraint=kernel_constraint, + bias_constraint=bias_constraint, + **kwargs) self.kernel = None self.bias = None self.offset_layer_kernel = None @@ -57,7 +56,8 @@ def __init__(self, if num_deformable_group is None: num_deformable_group = filters if filters % num_deformable_group != 0: - raise ValueError('"filters" mod "num_deformable_group" must be zero') + raise ValueError( + '"filters" mod "num_deformable_group" must be zero') self.num_deformable_group = num_deformable_group def build(self, input_shape): @@ -65,36 +65,36 @@ def build(self, input_shape): # kernel_shape = self.kernel_size + (input_dim, self.filters) # we want to use depth-wise conv kernel_shape = self.kernel_size + (self.filters * input_dim, 1) - self.kernel = self.add_weight( - name='kernel', - shape=kernel_shape, - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint, - trainable=True, - dtype=self.dtype) + self.kernel = self.add_weight(name='kernel', + shape=kernel_shape, + initializer=self.kernel_initializer, + regularizer=self.kernel_regularizer, + constraint=self.kernel_constraint, + trainable=True, + dtype=self.dtype) if self.use_bias: - self.bias = self.add_weight( - name='bias', - shape=(self.filters,), - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint, - trainable=True, - dtype=self.dtype) + self.bias = self.add_weight(name='bias', + shape=(self.filters, ), + initializer=self.bias_initializer, + regularizer=self.bias_regularizer, + constraint=self.bias_constraint, + trainable=True, + dtype=self.dtype) # create offset conv layer - offset_num = self.kernel_size[0] * self.kernel_size[1] * self.num_deformable_group + offset_num = self.kernel_size[0] * self.kernel_size[ + 1] * self.num_deformable_group self.offset_layer_kernel = self.add_weight( name='offset_layer_kernel', - shape=self.kernel_size + (input_dim, offset_num * 2), # 2 means x and y axis + shape=self.kernel_size + + (input_dim, offset_num * 2), # 2 means x and y axis initializer=tf.zeros_initializer(), regularizer=self.kernel_regularizer, trainable=True, dtype=self.dtype) self.offset_layer_bias = self.add_weight( name='offset_layer_bias', - shape=(offset_num * 2,), + shape=(offset_num * 2, ), initializer=tf.zeros_initializer(), # initializer=tf.random_uniform_initializer(-5, 5), regularizer=self.bias_regularizer, @@ -105,7 +105,7 @@ def build(self, input_shape): def call(self, inputs, training=None, **kwargs): # get offset, shape [batch_size, out_h, out_w, filter_h, * filter_w * channel_out * 2] offset = tf.nn.conv2d(inputs, - filter=self.offset_layer_kernel, + filters=self.offset_layer_kernel, strides=[1, *self.strides, 1], padding=self.padding.upper(), dilations=[1, *self.dilation_rate, 1]) @@ -117,8 +117,10 @@ def call(self, inputs, training=None, **kwargs): # some length batch_size = int(inputs.get_shape()[0]) channel_in = int(inputs.get_shape()[-1]) - in_h, in_w = [int(i) for i in inputs.get_shape()[1: 3]] # input feature map size - out_h, out_w = [int(i) for i in offset.get_shape()[1: 3]] # output feature map size + in_h, in_w = [int(i) for i in inputs.get_shape()[1:3] + ] # input feature map size + out_h, out_w = [int(i) for i in offset.get_shape()[1:3] + ] # output feature map size filter_h, filter_w = self.kernel_size # get x, y axis offset @@ -128,9 +130,12 @@ def call(self, inputs, training=None, **kwargs): # input feature map gird coordinates y, x = self._get_conv_indices([in_h, in_w]) y, x = [tf.expand_dims(i, axis=-1) for i in [y, x]] - y, x = [tf.tile(i, [batch_size, 1, 1, 1, self.num_deformable_group]) for i in [y, x]] - y, x = [tf.reshape(i, [*i.shape[0: 3], -1]) for i in [y, x]] - y, x = [tf.to_float(i) for i in [y, x]] + y, x = [ + tf.tile(i, [batch_size, 1, 1, 1, self.num_deformable_group]) + for i in [y, x] + ] + y, x = [tf.reshape(i, [*i.shape[0:3], -1]) for i in [y, x]] + y, x = [tf.cast(i, tf.float32) for i in [y, x]] # add offset y, x = y + y_off, x + x_off @@ -138,7 +143,7 @@ def call(self, inputs, training=None, **kwargs): x = tf.clip_by_value(x, 0, in_w - 1) # get four coordinates of points around (x, y) - y0, x0 = [tf.to_int32(tf.floor(i)) for i in [y, x]] + y0, x0 = [tf.cast(tf.floor(i), tf.int32) for i in [y, x]] y1, x1 = y0 + 1, x0 + 1 # clip y0, y1 = [tf.clip_by_value(i, 0, in_h - 1) for i in [y0, y1]] @@ -146,10 +151,13 @@ def call(self, inputs, training=None, **kwargs): # get pixel values indices = [[y0, x0], [y0, x1], [y1, x0], [y1, x1]] - p0, p1, p2, p3 = [DeformableConvLayer._get_pixel_values_at_point(inputs, i) for i in indices] + p0, p1, p2, p3 = [ + DeformableConvLayer._get_pixel_values_at_point(inputs, i) + for i in indices + ] # cast to float - x0, x1, y0, y1 = [tf.to_float(i) for i in [x0, x1, y0, y1]] + x0, x1, y0, y1 = [tf.cast(i, tf.float32) for i in [x0, x1, y0, y1]] # weights w0 = (y1 - y) * (x1 - x) w1 = (y1 - y) * (x - x0) @@ -161,19 +169,28 @@ def call(self, inputs, training=None, **kwargs): pixels = tf.add_n([w0 * p0, w1 * p1, w2 * p2, w3 * p3]) # reshape the "big" feature map - pixels = tf.reshape(pixels, [batch_size, out_h, out_w, filter_h, filter_w, self.num_deformable_group, channel_in]) + pixels = tf.reshape(pixels, [ + batch_size, out_h, out_w, filter_h, filter_w, + self.num_deformable_group, channel_in + ]) pixels = tf.transpose(pixels, [0, 1, 3, 2, 4, 5, 6]) - pixels = tf.reshape(pixels, [batch_size, out_h * filter_h, out_w * filter_w, self.num_deformable_group, channel_in]) + pixels = tf.reshape(pixels, [ + batch_size, out_h * filter_h, out_w * filter_w, + self.num_deformable_group, channel_in + ]) # copy channels to same group feat_in_group = self.filters // self.num_deformable_group pixels = tf.tile(pixels, [1, 1, 1, 1, feat_in_group]) - pixels = tf.reshape(pixels, [batch_size, out_h * filter_h, out_w * filter_w, -1]) + pixels = tf.reshape( + pixels, [batch_size, out_h * filter_h, out_w * filter_w, -1]) # depth-wise conv - out = tf.nn.depthwise_conv2d(pixels, self.kernel, [1, filter_h, filter_w, 1], 'VALID') + out = tf.nn.depthwise_conv2d(pixels, self.kernel, + [1, filter_h, filter_w, 1], 'VALID') # add the output feature maps in the same group - out = tf.reshape(out, [batch_size, out_h, out_w, self.filters, channel_in]) + out = tf.reshape(out, + [batch_size, out_h, out_w, self.filters, channel_in]) out = tf.reduce_sum(out, axis=-1) if self.use_bias: out += self.bias @@ -188,14 +205,17 @@ def _pad_input(self, inputs): # When padding is 'same', we should pad the feature map. # if padding == 'same', output size should be `ceil(input / stride)` if self.padding == 'same': - in_shape = inputs.get_shape().as_list()[1: 3] + in_shape = inputs.get_shape().as_list()[1:3] padding_list = [] for i in range(2): filter_size = self.kernel_size[i] dilation = self.dilation_rate[i] - dilated_filter_size = filter_size + (filter_size - 1) * (dilation - 1) - same_output = (in_shape[i] + self.strides[i] - 1) // self.strides[i] - valid_output = (in_shape[i] - dilated_filter_size + self.strides[i]) // self.strides[i] + dilated_filter_size = filter_size + (filter_size - + 1) * (dilation - 1) + same_output = (in_shape[i] + self.strides[i] - + 1) // self.strides[i] + valid_output = (in_shape[i] - dilated_filter_size + + self.strides[i]) // self.strides[i] if same_output == valid_output: padding_list += [0, 0] else: @@ -203,10 +223,12 @@ def _pad_input(self, inputs): p_0 = p // 2 padding_list += [p_0, p - p_0] if sum(padding_list) != 0: - padding = [[0, 0], - [padding_list[0], padding_list[1]], # top, bottom padding - [padding_list[2], padding_list[3]], # left, right padding - [0, 0]] + padding = [ + [0, 0], + [padding_list[0], padding_list[1]], # top, bottom padding + [padding_list[2], padding_list[3]], # left, right padding + [0, 0] + ] inputs = tf.pad(inputs, padding) return inputs @@ -216,16 +238,17 @@ def _get_conv_indices(self, feature_map_size): :param feature_map_size: :return: y, x with shape [1, out_h, out_w, filter_h * filter_w] """ - feat_h, feat_w = [int(i) for i in feature_map_size[0: 2]] + feat_h, feat_w = [int(i) for i in feature_map_size[0:2]] x, y = tf.meshgrid(tf.range(feat_w), tf.range(feat_h)) - x, y = [tf.reshape(i, [1, *i.get_shape(), 1]) for i in [x, y]] # shape [1, h, w, 1] - x, y = [tf.image.extract_image_patches(i, - [1, *self.kernel_size, 1], - [1, *self.strides, 1], - [1, *self.dilation_rate, 1], - 'VALID') - for i in [x, y]] # shape [1, out_h, out_w, filter_h * filter_w] + x, y = [tf.reshape(i, [1, *i.get_shape(), 1]) + for i in [x, y]] # shape [1, h, w, 1] + x, y = [ + tf.image.extract_patches(i, [1, *self.kernel_size, 1], + [1, *self.strides, 1], + [1, *self.dilation_rate, 1], 'VALID') + for i in [x, y] + ] # shape [1, out_h, out_w, filter_h * filter_w] return y, x @staticmethod @@ -237,10 +260,9 @@ def _get_pixel_values_at_point(inputs, indices): :return: """ y, x = indices - batch, h, w, n = y.get_shape().as_list()[0: 4] + batch, h, w, n = y.get_shape().as_list()[0:4] batch_idx = tf.reshape(tf.range(0, batch), (batch, 1, 1, 1)) b = tf.tile(batch_idx, (1, h, w, n)) pixel_idx = tf.stack([b, y, x], axis=-1) return tf.gather_nd(inputs, pixel_idx) - diff --git a/train.py b/train.py index d920954..904d420 100644 --- a/train.py +++ b/train.py @@ -4,71 +4,84 @@ import tensorflow as tf from nets.classification_net import ClassificationNet -conf = tf.ConfigProto() -conf.gpu_options.allow_growth = True -tf.enable_eager_execution(conf) +# conf = tf.config.experimental.ConfigProto() +# conf.gpu_options.allow_growth = True +# tf.enable_eager_execution(conf) SEED = 1234 -tf.set_random_seed(SEED) - +tf.random.set_seed(SEED) NUM_CLASS = 10 IMG_SHAPE = [28, 28] -(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data('/data/ajy/datasets/MNIST/mnist.npz') +(x_train, y_train), (x_test, y_test) = tf.keras.datasets.mnist.load_data() # scale to (-1, 1), shape is (28, 28, 1) -x_train, x_test = [(np.expand_dims(i / 127.5 - 1, axis=-1)).astype(np.float32) for i in [x_train, x_test]] -y_train, y_test = tf.one_hot(y_train, depth=NUM_CLASS), tf.one_hot(y_test, depth=NUM_CLASS) +x_train, x_test = [(np.expand_dims(i / 127.5 - 1, axis=-1)).astype(np.float32) + for i in [x_train, x_test]] +y_train, y_test = tf.one_hot(y_train, + depth=NUM_CLASS), tf.one_hot(y_test, + depth=NUM_CLASS) def get_dataset(batch_size, x, y, map_fn, repeat=False): dataset = tf.data.Dataset.from_tensor_slices((x, y)) if repeat: dataset = dataset.repeat() - dataset = dataset.shuffle(batch_size * 10).map(map_fn, num_parallel_calls=2).batch(batch_size).prefetch(1) + dataset = dataset.shuffle(batch_size * 10).map( + map_fn, num_parallel_calls=2).batch(batch_size).prefetch(1) return dataset def distorted_image_fn(image, label): + return image, label # random rotate # 80% ->(-30°, 30°), 20%->(-90°,-30°)&(30°,90°) - tf.set_random_seed(SEED) - small_angle = tf.cast(tf.random_uniform([1], maxval=1.) <= 0.8, tf.int32) - angle = tf.random_uniform([1], minval=0, maxval=30, dtype=tf.int32) * small_angle + \ - tf.random_uniform([1], minval=30, maxval=90, dtype=tf.int32) * (1 - small_angle) - negative = -1 + 2 * tf.random_uniform([1], minval=0, maxval=2, dtype=tf.int32) - angle = tf.to_float(negative * angle) - rotated_image = tf.contrib.image.rotate(image, angle * 3.1415926 / 180) + tf.random.set_seed(SEED) + small_angle = tf.cast(tf.random.uniform([1], maxval=1.) <= 0.8, tf.int32) + angle = tf.random.uniform([1], minval=0, maxval=30, dtype=tf.int32) * small_angle + \ + tf.random.uniform([1], minval=30, maxval=90, dtype=tf.int32) * (1 - small_angle) + negative = -1 + 2 * tf.random.uniform( + [1], minval=0, maxval=2, dtype=tf.int32) + angle = tf.cast(negative * angle, tf.float32) + # rotated_image = tf.contrib.image.rotate(image, angle * 3.1415926 / 180) return rotated_image, label def distorted_image_test_fn(image, label): + return image, label # random rotate # (-135°, 135°) - tf.set_random_seed(SEED) - angle = tf.random_uniform([1], minval=0, maxval=135, dtype=tf.int32) - negative = -1 + 2 * tf.random_uniform([1], minval=0, maxval=2, dtype=tf.int32) - angle = tf.to_float(negative * angle) - rotated_image = tf.contrib.image.rotate(image, angle * 3.1415926 / 180) + tf.random.set_seed(SEED) + angle = tf.random.uniform([1], minval=0, maxval=135, dtype=tf.int32) + negative = -1 + 2 * tf.random.uniform( + [1], minval=0, maxval=2, dtype=tf.int32) + angle = tf.cast(negative * angle, tf.float32) + # rotated_image = tf.contrib.image.rotate(image, angle * 3.1415926 / 180) return rotated_image, label def main(): batch_size = 16 - dataset = get_dataset(batch_size, x_train, y_train, distorted_image_fn, repeat=True) + dataset = get_dataset(batch_size, + x_train, + y_train, + distorted_image_fn, + repeat=True) model = ClassificationNet(num_class=NUM_CLASS) - optimizer = tf.train.AdamOptimizer(learning_rate=0.001) - global_step = tf.train.get_or_create_global_step() + optimizer = tf.keras.optimizers.Adam(learning_rate=0.001) + global_step = tf.compat.v1.train.get_or_create_global_step() for i, (rotated_image, label) in enumerate(dataset, start=1): global_step.assign_add(1) - loss, prediction= model.train(optimizer, rotated_image, label) + loss, prediction = model.train(optimizer, rotated_image, label) acc = model.accuracy(prediction, label) # test if i % 1000 == 0: total_acc = 0 - dataset_test = get_dataset(1000, x_test, y_test, distorted_image_test_fn).make_one_shot_iterator() + dataset_test = get_dataset( + 1000, x_test, y_test, + distorted_image_test_fn).make_one_shot_iterator() split = 10000 // 1000 for _ in range(split): rotated_image_test, label_test = dataset_test.get_next() @@ -79,9 +92,9 @@ def main(): print('test accuracy: {}'.format(total_acc / split)) if i % 10 == 0: - print("step: {}, loss: {}, train accuracy: {}".format(int(global_step), float(loss), float(acc))) + print("step: {}, loss: {}, train accuracy: {}".format( + int(global_step), float(loss), float(acc))) if __name__ == '__main__': main() -