-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathvideo_retinanet.py
153 lines (128 loc) · 6.71 KB
/
video_retinanet.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
from __future__ import absolute_import
import tensorflow as tf
slim = tf.contrib.slim
metrics = tf.contrib.metrics
import squeezenext_architecture as squeezenext
import tensorflow_extentions as tfe
from optimizer import PolyOptimizer
from dataloader import ReadTFRecords
import tools
import os
import model_heads
from loss import detection_loss
metrics = tf.contrib.metrics
class Model(object):
def __init__(self, config):
self.image_size = config["image_size"]
self.num_classes = config["num_classes"]
self._config = config
def define_batch_size(self, features, labels, batch_size):
"""
Define batch size of dictionary
:param features:
Feature dict
:param labels:
Labels dict
:return:
(features,label)
"""
features = tools.define_first_dim(features, batch_size)
labels = tools.define_first_dim(labels, batch_size)
return (features, labels)
def input_fn(self, file_pattern, training, batch_size, sequence_length):
"""
Input fn of model
:param file_pattern:
Glob file pattern
:param training:
Whether or not the model is training
:return:
Input generator
"""
read_tf_records = ReadTFRecords(batch_size, self._config)
return self.define_batch_size(*read_tf_records(file_pattern, sequence_length, training=training),
batch_size=batch_size)
def model_fn(self, features, labels, mode, params):
"""
Function to create squeezenext model and setup training environment
:param features:
Feature dict from estimators input fn
:param labels:
Label dict from estimators input fn
:param mode:
What mode the model is in tf.estimator.ModeKeys
:param params:
Dictionary of parameters used to configurate the network
:return:
Train op, predictions, or eval op depening on mode
"""
training = mode == tf.estimator.ModeKeys.TRAIN
batch_size, sequence_length = tuple(labels["loss_masks"].get_shape().as_list())
unpadded_features = features["example_length"][:, 0]
classifier = squeezenext.SqueezeNext(self.num_classes, params["block_defs"], params["input_def"],
params["groups"], params["seperate_relus"])
with slim.arg_scope(classifier.model_arg_scope(training)):
with tf.variable_scope("classifier"):
classifier_endpoints = tfe.time_distributed(features["images"], unpadded_features, classifier,
[[28, 28, 64], [14, 14, 128], [7, 7, 256]],
endpoints=["block_1/unit_0", "block_2/unit_0", "block_3/unit_0"])
conv_lstm = tfe.BottleneckLSTM(classifier_endpoints,num_filters=32)
with tf.variable_scope("bottleneck_lstm"):
initializer = tfe.initial_state.make_gaussian_state_initializer(tfe.initial_state.make_variable_state_initializer(),
tf.constant(False))
init_state = tfe.initial_state.get_initial_cell_state(conv_lstm, initializer, batch_size, tf.float32)
predictions, last_states = tf.nn.dynamic_rnn(
cell=conv_lstm,
dtype=tf.float32,
sequence_length=unpadded_features,
inputs=classifier_endpoints,
initial_state=init_state)
model_head = model_heads.ModelHead(params)
with slim.arg_scope(model_head.model_arg_scope(training)):
with tf.variable_scope("model_head"):
predictions = tfe.time_distributed(predictions, unpadded_features,
model_head,
model_head.output_size(predictions))
loss, cls_loss, box_loss = detection_loss(predictions, labels, params)
# create histogram of class spread
tf.summary.histogram("classes", labels["cls_targets"][params["min_level"]])
tf.summary.histogram("sequence_length", unpadded_features)
if training:
tf.summary.scalar("box_loss", box_loss)
tf.summary.scalar("cls_loss", cls_loss)
# init poly optimizer
optimizer = PolyOptimizer(params)
# define train op
train_op = optimizer.optimize(loss, training, params["total_steps"])
# if params["output_train_images"] is true output images during training
if params["output_train_images"]:
tools.draw_box_predictions(features["images"], predictions, labels, params, sequence_length)
stats_hook = tools.stats.ModelStats(params["model_dir"],batch_size*sequence_length)
# setup fine tune scaffold
scaffold = tf.train.Scaffold(init_op=None,
init_fn=tools.fine_tune.init_weights(
"classifier/rnn/TimeDistributedWrapper", params["fine_tune_ckpt"],
ignore_strings=["/squeezenext/fully_connected/weights","RMSProp"]))
# create estimator training spec, which also outputs the model_stats of the model to params["model_dir"]
return tf.estimator.EstimatorSpec(mode, loss=loss, train_op=train_op, scaffold=scaffold,training_hooks=[stats_hook])
if mode == tf.estimator.ModeKeys.EVAL:
# eval_metric = tools.coco_metrics.EvaluationMetric()
# coco_metrics = eval_metric.estimator_metric_fn(tools.eval_predictions(predictions,params),tools.eval_labels(labels))
#
# # Define the metrics:
# # metrics_dict = {
# # 'Recall@1': tf.metrics.accuracy(tf.argmax(predictions, axis=-1), labels["class_idx"][:, 0]),
# # 'Recall@5': metrics.streaming_sparse_recall_at_k(predictions, tf.cast(labels["class_idx"], tf.int64),
# # 5)
# # }
# # output eval images
# eval_summary_hook = tf.train.SummarySaverHook(
# save_steps=100,
# output_dir=os.path.join(params["model_dir"],"eval"),
# summary_op=tf.summary.image("validation", features["images"][0,:,:,:,:]))
#return eval spec
return tf.estimator.EstimatorSpec(
mode, loss=loss)
return tf.estimator.EstimatorSpec(
mode, loss=loss, eval_metric_ops=coco_metrics,
evaluation_hooks=[eval_summary_hook])