Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Batchnorm+sru+swish #9

Open
wants to merge 24 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
174 changes: 0 additions & 174 deletions Chiron+weight_visual/README.md

This file was deleted.

21 changes: 16 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -29,18 +29,29 @@ we modified the docker from `https://github.com/anurag/fastai-course-1.git`


```
nvidia-docker run -it \
DATADIR=/data/nanopore

nvidia-docker run \
--rm -it \
--entrypoint /bin/zsh \
-v /data/nanopore/new/fast5Dir/:/data \
-p 8889:8888 \
-v $DATADIR:/data \
-p 8890:8888 \
--name haruhi \
-w /home/docker \
etheleon/chiron
```

To train deepore we need to run chiron_rcnn_train.py

```
cd $HOME
python Chiron/chiron/chiron_rcnn_train.py
export CUDA_VISIBLE_DEVICES="1"
python deepore_SRU+swish/chiron/chiron_rcnn_train.py
```

If u want to log weights and the gradients u can use the log flag.

```
python deepore_SRU+swish/chiron/chiron__rcnn_train.py --logs 1
```

# Reference
Expand Down
File renamed without changes.
File renamed without changes.
2 changes: 2 additions & 0 deletions deepore_SRU+Swish/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
# Deepore
This modified version of Chiron allows us to visualise both the weights and the gradients to address training problem in Chiron, like local minima and bottlenecks during training that we have observed.
Binary file added deepore_SRU+Swish/chiron/chiron_input.pyc
Binary file not shown.
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,25 @@

@author: haotianteng
"""
import time,os
import argparse
import tensorflow as tf
from distutils.dir_util import copy_tree
from chiron_input import read_raw_data_sets
from cnn import getcnnfeature
#from cnn import getcnnlogit
#from rnn import rnn_layers
from rnn import rnn_layers_one_direction
import time,os
from summary import variable_summaries

def save_model():
copy_tree(os.path.dirname(os.path.abspath(__file__)),FLAGS.log_dir+FLAGS.model_name+'/model')
def inference(x,seq_length,training):
cnn_feature = getcnnfeature(x,training = training)
def inference(x,seq_length,training,verbose=False):
cnn_feature = getcnnfeature(x,training = training,verbose=verbose)
feashape = cnn_feature.get_shape().as_list()
ratio = FLAGS.sequence_len/feashape[1]
# logits = rnn_layers(cnn_feature,seq_length/ratio,training,class_n = 4**FLAGS.k_mer+1 )
logits = rnn_layers_one_direction(cnn_feature,seq_length/ratio,training,class_n = 4**FLAGS.k_mer+1 )
logits = rnn_layers_one_direction(cnn_feature,seq_length/ratio,training,class_n = 4**FLAGS.k_mer+1,verbose=verbose )
# logits = getcnnlogit(cnn_feature)
return logits,ratio

Expand All @@ -31,11 +33,15 @@ def loss(logits,seq_len,label):
tf.summary.scalar('loss',loss)
return loss

def train_step(loss,global_step = None):
opt = tf.train.AdamOptimizer(FLAGS.step_rate).minimize(loss,global_step=global_step)
def train_step(loss,global_step = None,verbose=False):
opt = tf.train.AdamOptimizer(FLAGS.step_rate)
# opt = tf.train.GradientDescentOptimizer(FLAGS.step_rate).minimize(loss)
# opt = tf.train.RMSPropOptimizer(FLAGS.step_rate).minimize(loss)
# opt = tf.train.MomentumOptimizer(FLAGS.step_rate,0.9).minimize(loss)
if verbose :
grad = opt.compute_gradients(loss)
tf.summary.scalar('grad',tf.reduce_mean(grad[0][0]))
opt = opt.minimize(loss,global_step=global_step)
return opt
def prediction(logits,seq_length,label,top_paths=1):
"""
Expand All @@ -58,6 +64,16 @@ def prediction(logits,seq_length,label,top_paths=1):
return error

def train():

parser = argparse.ArgumentParser()
parser.add_argument("--logs", help="logs")
args = parser.parse_args()
if args.logs:
print("logs turned on")
verbose = True
else:
print("logs turned off")
verbose = False
training = tf.placeholder(tf.bool)
global_step=tf.get_variable('global_step',trainable=False,shape=(),dtype = tf.int32,initializer = tf.zeros_initializer())
x = tf.placeholder(tf.float32,shape = [FLAGS.batch_size,FLAGS.sequence_len])
Expand All @@ -66,9 +82,9 @@ def train():
y_values = tf.placeholder(tf.int32)
y_shape = tf.placeholder(tf.int64)
y = tf.SparseTensor(y_indexs,y_values,y_shape)
logits,ratio = inference(x,seq_length,training)
logits,ratio = inference(x,seq_length,training,verbose)
ctc_loss = loss(logits,seq_length,y)
opt = train_step(ctc_loss,global_step = global_step)
opt = train_step(ctc_loss,global_step = global_step,verbose=verbose)
error = prediction(logits,seq_length,y)
init = tf.global_variables_initializer()
saver = tf.train.Saver()
Expand Down Expand Up @@ -119,13 +135,14 @@ def run(args):
if __name__ == "__main__":
class Flags():
def __init__(self):
self.data_dir = '/media/haotianteng/Linux_ex/Nanopore_data/Lambda_R9.4/raw'
self.cache_dir = '/media/haotianteng/Linux_ex/Nanopore_data/Lambda_R9.4/cache'
self.log_dir = '/media/haotianteng/Linux_ex/GVM_model'
self.data_dir = '/home/docker/raw' #human
#self.data_dir = '/home/docker/ecoli' #ecoli
self.cache_dir = '/home/docker/out/cache'
self.log_dir = '/home/docker/out/logs'
self.sequence_len = 300
self.batch_size = 750
self.batch_size = 64
self.step_rate = 1e-3
self.max_steps = 20000
self.max_steps = 10000
self.k_mer = 1
self.model_name = 'test'
self.retrain =False
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,14 +10,15 @@
from tensorflow.contrib.layers import batch_norm
from summary import variable_summaries

def conv_layer(indata,ksize,padding,training,name,dilate = 1,strides=[1,1,1,1],bias_term = False,active = True,BN= True):
def conv_layer(indata,ksize,padding,training,name,dilate = 1,strides=[1,1,1,1],bias_term = False,active = True,BN= True,verbose=False):
"""A standard convlotional layer"""
with tf.variable_scope(name):
W = tf.get_variable("weights", dtype = tf.float32, shape=ksize,initializer=tf.contrib.layers.xavier_initializer())
variable_summaries(W)
beta = tf.get_variable("beta",dtype=tf.float32,shape=[1],initializer = tf.contrib.layers.xavier_initializer())
if verbose : variable_summaries(W)
if bias_term:
b = tf.get_variable("bias", dtype=tf.float32,shape=[ksize[-1]])
variable_summaries(b)
if verbose : variable_summaries(b)
if dilate>1:
if bias_term:
conv_out = b + tf.nn.atrous_conv2d(indata,W,rate = dilate,padding=padding,name=name)
Expand All @@ -33,8 +34,8 @@ def conv_layer(indata,ksize,padding,training,name,dilate = 1,strides=[1,1,1,1],b
# conv_out = batchnorm(conv_out,scope=scope,training = training)
conv_out = simple_global_bn(conv_out,name = name+'_bn')
if active:
with tf.variable_scope(name+'_relu'):
conv_out = tf.nn.relu(conv_out,name='relu')
with tf.variable_scope(name+'_swish'):
conv_out = conv_out*tf.nn.sigmoid(beta*conv_out,name='swish')
return conv_out
def batchnorm(inp,scope,training,decay = 0.99,epsilon = 1e-5):
with tf.variable_scope(scope):
Expand Down Expand Up @@ -86,19 +87,19 @@ def inception_layer(indata,training,times=16):
conv0f = conv_layer(indata,ksize=[1,1,in_channel,times*2],padding = 'SAME',training = training,name = 'conv0f_1x1')
conv1f = conv_layer(conv0f,ksize=[1,3,times*2,times*3],padding = 'SAME',training = training,name = 'conv1f_1x3_d3',dilate = 3)
return(tf.concat([conv1a,conv0b,conv1c,conv1d,conv1e,conv1f],axis = -1,name = 'concat'))
def residual_layer(indata,out_channel,training,i_bn = False):
def residual_layer(indata,out_channel,training,i_bn = False,verbose=False):
fea_shape = indata.get_shape().as_list()
in_channel = fea_shape[-1]
with tf.variable_scope('branch1'):
indata_cp = conv_layer(indata,ksize = [1,1,in_channel,out_channel],padding = 'SAME',training = training,name = 'conv1',BN = i_bn,active = False)
indata_cp = conv_layer(indata,ksize = [1,1,in_channel,out_channel],padding = 'SAME',training = training,name = 'conv1',BN = i_bn,active = False,verbose=verbose)
with tf.variable_scope('branch2'):
conv_out1 = conv_layer(indata,ksize = [1,1,in_channel,out_channel],padding = 'SAME',training = training,name = 'conv2a',bias_term = False)
conv_out2 = conv_layer(conv_out1,ksize = [1,3,out_channel,out_channel],padding = 'SAME',training=training,name = 'conv2b',bias_term = False)
conv_out3 = conv_layer(conv_out2,ksize = [1,1,out_channel,out_channel],padding = 'SAME',training=training,name = 'conv2c',bias_term = False,active = False)
conv_out1 = conv_layer(indata,ksize = [1,1,in_channel,out_channel],padding = 'SAME',training = training,name = 'conv2a',bias_term = False,verbose=verbose)
conv_out2 = conv_layer(conv_out1,ksize = [1,3,out_channel,out_channel],padding = 'SAME',training=training,name = 'conv2b',bias_term = False,verbose=verbose)
conv_out3 = conv_layer(conv_out2,ksize = [1,1,out_channel,out_channel],padding = 'SAME',training=training,name = 'conv2c',bias_term = False,active = False,verbose=verbose)
with tf.variable_scope('plus'):
relu_out = tf.nn.relu(indata_cp+conv_out3,name = 'final_relu')
return relu_out
def getcnnfeature(signal,training):
def getcnnfeature(signal,training,verbose=False):
signal_shape = signal.get_shape().as_list()
signal = tf.reshape(signal,[signal_shape[0],1,signal_shape[1],1])
print(signal.get_shape())
Expand Down Expand Up @@ -147,11 +148,11 @@ def getcnnfeature(signal,training):

# Residual Layer x 5
with tf.variable_scope('res_layer1'):
res1 = residual_layer(signal,out_channel = 256,training = training,i_bn = True)
res1 = residual_layer(signal,out_channel = 256,training = training,i_bn = True,verbose=verbose)
with tf.variable_scope('res_layer2'):
res2 = residual_layer(res1,out_channel = 256,training = training)
res2 = residual_layer(res1,out_channel = 256,training = training,verbose=verbose)
with tf.variable_scope('res_layer3'):
res3 = residual_layer(res2,out_channel = 256,training = training)
res3 = residual_layer(res2,out_channel = 256,training = training,verbose=verbose)
# with tf.variable_scope('res_layer4'):
# res4 = residual_layer(res3,out_channel = 512,training = training)
# with tf.variable_scope('res_layer5'):
Expand Down
Binary file added deepore_SRU+Swish/chiron/cnn.pyc
Binary file not shown.
File renamed without changes.
Loading