From 8abe2c79654e5ec773adf1180ab34000b0007258 Mon Sep 17 00:00:00 2001 From: Olivares Date: Thu, 18 Jan 2018 22:17:52 -0600 Subject: [PATCH] updates ~80%acc --- prototypes/v0.ipynb | 206 ++++++++++++++++++++++---------------------- prototypes/v0.py | 52 +++++++---- 2 files changed, 139 insertions(+), 119 deletions(-) diff --git a/prototypes/v0.ipynb b/prototypes/v0.ipynb index 961ae4a..63feb67 100644 --- a/prototypes/v0.ipynb +++ b/prototypes/v0.ipynb @@ -23,7 +23,6 @@ "source": [ "# reset everything to rerun in jupyter\n", "tf.reset_default_graph()\n", - "\n", "print(\"TensorFlow v{}\".format(tf.__version__))" ] }, @@ -109,24 +108,6 @@ "na_vectores = utils.get_vectors(na,index_na)" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Testing\n", - "La función get_top10_closest obtiene los 10 vectores más cercanos al vector de referencia." - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "top_10 = utils.get_top10_closest(es_vectores[0],na_vectores[100:])\n", - "top_10" - ] - }, { "cell_type": "markdown", "metadata": {}, @@ -142,25 +123,22 @@ }, "outputs": [], "source": [ - "LEARNING_RATE = 0.5\n", - "\n", - "# Dimensión de vectores de entrada (número de neuronas en capa de entrada).\n", - "NODES_INPUT = es_vectores[0].size \n", - "\n", - "# Número de neuronas en capas ocultas.\n", - "NODES_H1 = 70 - 20 \n", - "NODES_H2 = 42 - 20\n", - "NODES_H3 = 70 - 20\n", - "\n", - "# (número de neuronas en capa de entrada).\n", + "LEARNING_RATE = 0.46\n", + "NODES_INPUT = es_vectores[0].size\n", + "NODES_H1 = 300\n", + "#NODES_H2 = 1\n", + "#NODES_H3 = 1\n", "NODES_OUPUT = na_vectores[0].size\n", - "\n", - "\n", "EPOCHS = 100000\n", "\n", + "# Inicializar pesos con método xavier_init\n", + "XAVIER_INIT = False\n", + "\n", "# Ruta donde se guarda el grafo para visualizar en TensorBoard.\n", - "LOGPATH = utils.make_hparam_string(\"MSE\",\"RELU\",\"Adagrad\",\"H\",NODES_H1,\n", - " NODES_H2,NODES_H3,\"LR\",LEARNING_RATE)" + "LOGPATH = utils.make_hparam_string(\"80ACC_Adagrad\", \"H\", NODES_H1, \"LR\", LEARNING_RATE)\n", + "\n", + "# Ruta para guardar el modelo generado.\n", + "SAVE_PATH = \"./models/Adagrad_H_305_LR_0.433.ckpt\"" ] }, { @@ -197,21 +175,33 @@ "metadata": {}, "source": [ "# Función para crear las capas de la red.\n", + " \n", + "Arguments:\n", + "* input {Tensor} -- Tensor de entrada a la capa.\n", + "* size_in {int}, size_out {int} -- Dimensiones de entrada y salida de la capa.\n", + "* name {str} -- Nombre de la capa. Default: fc.\n", + "Keyword Arguments:\n", + "* xavier_init {bool} -- Inicializar pesos empleando el método Xavier.\n", + "* stddev {float} -- Desviación estándar con la que se inicializan los pesos de la capa. (default: {0})\n", + "* dtype {function} -- Floating-point representation. (default: {tf.float64})\n", "\n", + "Returns:\n", + "* Tensor -- Salida de la capa: (input * Weights) + bias\n", "\n", - "Función para crear capas.\n", - "\n", - "Args:\n", - "* input (Tensor): Tensor de entrada a la capa.\n", - "* size_in, size_out (int): Dimensiones de entrada y salida de la capa.\n", - "* name (str): Nombre de la capa. Default: fc.\n", - "* stddev (float): Desviación estándar con la que se inicializan los pesos de la capa.\n", - "* dtype: Floating-point representation.\n", + "# Inicialización de pesos.\n", + "Si la bandera `XAVIER_INIT` es `True` se emplea el método Xavier, en caso contrario los pesos se inicializan con valores siguiendo una distribución normal.\n", + "## Xavier Initialization\n", + ">This initializer is designed to keep the scale of the gradients roughly the same in all layers. In uniform distribution this ends up being the \n", + "range: x = sqrt(6. / (in + out)); [-x, x] and for normal distribution a standard deviation of sqrt(2. / (in + out)) is used.\n", + "[Xavier Glorot and Yoshua Bengio (2010)](http://www.jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf)\n", "\n", - "Returns:\n", - "* act (Tensor): $(input * weights) + bias $\n", + ">Why’s Xavier initialization important?\n", + "In short, it helps signals reach deep into the network.\n", + "* If the weights in a network start too small, then the signal shrinks as it passes through each layer until it’s too tiny to be useful.\n", + "* If the weights in a network start too large, then the signal grows as it passes through each layer until it’s too massive to be useful.\n", "\n", - "\n" + ">Xavier initialization makes sure the weights are ‘just right’, keeping the signal in a reasonable range of values through many layers.\n", + "[Ver liga](http://andyljones.tumblr.com/post/110998971763/an-explanation-of-xavier-initialization)" ] }, { @@ -222,25 +212,26 @@ }, "outputs": [], "source": [ - "def fully_connected_layer(input, size_in, size_out, name, stddev=0.1,\n", - " dtype = tf.float64):\n", + "def fully_connected_layer(input, size_in, size_out, name,xavier_init=True, stddev=0.1, dtype=tf.float64):\n", " with tf.name_scope(name):\n", - " # Tensor de pesos.\n", - " W = tf.Variable(tf.truncated_normal([size_in, size_out], stddev=stddev,\n", - " dtype=dtype), name=\"W\")\n", - " # Bias.\n", - " b = tf.Variable(tf.constant(0.1, shape=[size_out], dtype = dtype), name=\"b\")\n", - " \n", - " # Realiza la operación input * + b (tf.nn.xw_plus_b)\n", - " act = tf.add(tf.matmul(input,W), b)\n", + " # Inicializar pesos\n", + " if xavier_init:\n", + " W = tf.get_variable(name=\"W_\" + name, shape=[size_in, size_out], dtype=dtype,\n", + " initializer=tf.contrib.layers.xavier_initializer(dtype=dtype),\n", + " use_resource=True)\n", + " else:\n", + " W = tf.Variable(tf.truncated_normal([size_in, size_out], stddev=stddev,dtype=dtype), name=\"W\")\n", + " # Bias\n", + " b = tf.Variable(tf.constant(0.1, shape=[size_out], dtype=dtype), name=\"b\")\n", + "\n", + " # h(x) = (input * weights) + bias\n", + " output = tf.nn.xw_plus_b(input, W, b)\n", " \n", - " # Se generan histogramas de los pesos y la salida de la capa para poder\n", - " # visualizarlos en TensorBoard.\n", + " # visualizarlos pesos en TensorBoard.\n", " tf.summary.histogram(\"weights\", W)\n", - " #tf.summary.histogram(\"biases\", b)\n", - " tf.summary.histogram(\"activations\", act)\n", - " \n", - " return act" + " tf.summary.histogram(\"xw_plus_b\", output)\n", + "\n", + " return output" ] }, { @@ -248,16 +239,20 @@ "metadata": {}, "source": [ "# Activación de capas.\n", - "Función para activar la salida de las capas.\n", + "Esta función aplica la activación a la capa neuronal.\n", "\n", - "Args:\n", - "* layer (Tensor): Capa que será activada.\n", - "* name (string): Nombre de la capa para mostrar en `TensorBoard`.\n", - "* act (string): Función de activación. Default: [ReLU](https://www.tensorflow.org/api_docs/python/tf/nn/relu). También se pueden utilizar [Leaky ReLU](https://www.tensorflow.org/api_docs/python/tf/nn/leaky_relu) con un parámetro `alpha = 0.2` por defecto y [Softmax](https://www.tensorflow.org/api_docs/python/tf/nn/softmax) para la capa de salida.\n", + "Arguments:\n", + "* layer {Tensor} -- Capa a activar.\n", + "* act {tf.function} -- Función de activación (default: {tf.nn.relu}).\n", + "* name {str} -- Nombre para visualización de activación en TensorBoard.\n", + "\n", + "Keyword Arguments:\n", + "* alpha {tf.constant} -- Constante que se usa como argumento para leaky_relu (default: {tf.constant(0.2)})\n", + "* dtype {tf.function} -- Floating-point representation. (default: {tf.float64})\n", "\n", "Returns:\n", - " Capa con función de activación aplicada.\n", - " \n", + "* Tensor -- Capa con función de activación aplicada.\n", + "\n", "**NOTA:**\n", ">3.4 Why do we use a leaky ReLU and not a ReLU as an activation function?\n", "We want gradients to flow while we backpropagate through the network. \n", @@ -279,15 +274,17 @@ }, "outputs": [], "source": [ - "def activation_function(layer, act, name, alpha = tf.constant(0.2, dtype=tf.float64)):\n", + "def activation_function(layer, act, name, alpha=tf.constant(0.2, dtype=tf.float64)):\n", + " # Funciones de activación.\n", " if act == \"leaky_relu\":\n", - " #print(\"leaky_relu\")\n", - " return tf.nn.leaky_relu(layer, alpha, name = name)\n", + " return tf.nn.leaky_relu(layer, alpha, name=name)\n", " elif act == \"softmax\":\n", - " #print(\"softmax\")\n", - " return tf.nn.softmax(layer, name = name)\n", - " #print(\"relu\")\n", - " return tf.nn.relu(layer, name = name)" + " return tf.nn.softmax(layer, name=name)\n", + " elif act == \"sigmoid\":\n", + " return tf.nn.sigmoid(layer, name=name)\n", + " elif act == \"tanh\":\n", + " return tf.nn.tanh(layer, name=name)\n", + " return tf.nn.relu(layer, name=name)" ] }, { @@ -304,35 +301,27 @@ "outputs": [], "source": [ "# Se calcula la salida de la capa.\n", - "fc1 = fully_connected_layer(X,NODES_INPUT,NODES_H1,\"fc1\")\n", + "fc1 = fully_connected_layer(X, NODES_INPUT, NODES_H1, \"fc1\",xavier_init=XAVIER_INIT)\n", "\n", "# Activación de la capa.\n", "fc1 = activation_function(fc1, \"relu\", \"fc1\")\n", "\n", - "# Se añade histograma de activación de la capa para visualizar en\n", - "# TensorBoard.\n", + "# Se añade histograma de activación de la capa para visualizar en TensorBoard.\n", "tf.summary.histogram(\"fc1/relu\", fc1)" ] }, { - "cell_type": "code", - "execution_count": null, + "cell_type": "raw", "metadata": {}, - "outputs": [], "source": [ - "fc2 = fully_connected_layer(fc1,NODES_H1,NODES_H2,\"fc2\")\n", - "fc2 = activation_function(fc2,\"relu\",\"fc2\")\n", - "tf.summary.histogram(\"fc2/relu\", fc2)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "fc3 = fully_connected_layer(fc2,NODES_H2,NODES_H3,\"fc3\")\n", - "fc3 = activation_function(fc3,\"relu\",\"fc3\")\n", + "fc2 = fully_connected_layer(fc1, NODES_H1, NODES_H2, \"fc2\")\n", + "fc2 = activation_function(fc2, \"relu\", \"fc2\")\n", + "tf.summary.histogram(\"fc2/relu\", fc2)\n", + "#fc2 = tf.nn.dropout(fc2, pkeep)\n", + "# In[ ]:\n", + "#fc2 = tf.nn.dropout(fc2, pkeep)\n", + "fc3 = fully_connected_layer(fc2, NODES_H2, NODES_H3, \"fc3\")\n", + "fc3 = activation_function(fc3, \"relu\", \"fc3\")\n", "tf.summary.histogram(\"fc2/relu\", fc3)" ] }, @@ -342,9 +331,9 @@ "metadata": {}, "outputs": [], "source": [ - "output = fully_connected_layer(fc3, NODES_H3, NODES_OUPUT,\"output\")\n", - "nah_predicted = activation_function(output, \"softmax\",\"output\")\n", - "tf.summary.histogram(\"output/softmax\", output)" + "output = fully_connected_layer(fc1, NODES_H1, NODES_OUPUT, \"output\",xavier_init=XAVIER_INIT)\n", + "nah_predicted = activation_function(output, \"sigmoid\", \"output\")\n", + "tf.summary.histogram(\"output/sigmoid\", output)" ] }, { @@ -484,6 +473,9 @@ "# Se inicializan los valores de los tensores.\n", "init = tf.global_variables_initializer()\n", "\n", + "# Add ops to save and restore all the variables.\n", + "saver = tf.train.Saver()\n", + "\n", "# Ejecutando sesión\n", "sess.run(init)" ] @@ -496,7 +488,7 @@ }, "outputs": [], "source": [ - "def feed_dict(*placeholders, memUsage = False): \n", + "def feed_dict(*placeholders):\n", " return {X: placeholders[0],\n", " y: placeholders[1]}" ] @@ -508,18 +500,24 @@ "outputs": [], "source": [ "for i in range(EPOCHS):\n", - " \n", + " # learning rate decay\n", + " # https://github.com/martin-gorner/tensorflow-mnist-tutorial/blob/master/mnist_2.1_five_layers_relu_lrdecay.py\n", " # Se corre la sesión y se pasan como argumentos la función de error (loss),\n", " # el optimizador de backpropagation (train_op) y los histogramas (summaryMerged)\n", - " _loss, _, sumOut = sess.run([loss, train_op, summaryMerged],\n", - " feed_dict=feed_dict(es_vectores,na_vectores))\n", + "\n", + " _loss, _, sumOut = sess.run([loss, train_op, summaryMerged],feed_dict={X: es_vectores,y: na_vectores})\n", " # Actualiza los histogramas.\n", " writer.add_summary(sumOut, i)\n", - " \n", + "\n", " # Muestra el valor del error cada 500 pasos de entrenamiento.\n", " if (i % 500) == 0:\n", - " print(\"Epoch:\",i,\"/\",EPOCHS, \"\\tLoss:\", _loss)\n", - "\n", + " train_accuracy = accuracy.eval(session=sess, feed_dict={X: es_vectores,y: na_vectores})\n", + " print(\"Epoch:\", i, \"/\", EPOCHS, \"\\tLoss:\",_loss, \"\\tAccuracy:\", train_accuracy)\n", + " \n", + "SAVE_PATH = \"./models/Adagrad_H_305_LR_0.433.ckpt\"\n", + "save_model = saver.save(sess, SAVE_PATH)\n", + "print(\"Model saved in file: %s\", save_path)\n", + " #print(\"\\nAccuracy:\", accuracy.eval(feed_dict=feed_dict(es_vectores, na_vectores)))\n", "writer.close()\n" ] }, diff --git a/prototypes/v0.py b/prototypes/v0.py index 3331347..81033ff 100644 --- a/prototypes/v0.py +++ b/prototypes/v0.py @@ -5,7 +5,7 @@ import tensorflow as tf import utils -from math import exp +# from math import exp __author__ = "Olivares Castillo José Luis" # reset everything to rerun in jupyter @@ -69,8 +69,10 @@ NODES_H2 = 79 # 42 - 20 NODES_OUPUT = na_vectores[0].size +# Inicializar pesos usando xavier_init +XAVIER_INIT = False -EPOCHS = 350000 +EPOCHS = 370000 # Ruta donde se guarda el grafo para visualizar en TensorBoard. @@ -125,7 +127,8 @@ # In[ ]: -def fully_connected_layer(input, size_in, size_out, name, stddev=0.1, dtype=tf.float64): +def fully_connected_layer(input, size_in, size_out, name, + xavier_init=True, stddev=0.1, dtype=tf.float64): """Función para crear capas. Arguments: @@ -133,6 +136,8 @@ def fully_connected_layer(input, size_in, size_out, name, stddev=0.1, dtype=tf.f size_in {int}, size_out {int} -- Dimensiones de entrada y salida de la capa. name {str} -- Nombre de la capa. Default: fc. Keyword Arguments: + xavier_init {bool} -- Inicializar pesos usando el método Xavier. + Xavier Glorot and Yoshua Bengio (2010) stddev {float} -- Desviación estándar con la que se inicializan los pesos de la capa. (default: {0}) dtype {function} -- Floating-point representation. (default: {tf.float64}) @@ -142,15 +147,27 @@ def fully_connected_layer(input, size_in, size_out, name, stddev=0.1, dtype=tf.f with tf.name_scope(name): # Tensor de pesos. - W = tf.Variable(tf.truncated_normal([size_in, size_out], stddev=stddev, - dtype=dtype), name="W") + """This initializer is designed to keep the scale of the gradients + roughly the same in all layers. In uniform distribution this ends up + being the range: x = sqrt(6. / (in + out)); [-x, x] and for normal + distribution a standard deviation of sqrt(2. / (in + out)) is used. + Xavier Glorot and Yoshua Bengio (2010) + http://www.jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf + """ + + if xavier_init: + W = tf.get_variable(name="W" + name, shape=[size_in, size_out], dtype=dtype, + initializer=tf.contrib.layers.xavier_initializer( + dtype=dtype), + use_resource=True) + else: + W = tf.Variable(tf.truncated_normal([size_in, size_out], stddev=stddev, + dtype=dtype), name="W") # Bias. b = tf.Variable(tf.constant( 0.1, shape=[size_out], dtype=dtype), name="b") - # Realiza la operación input * + b (tf.nn.xw_plus_b) - #output = tf.add(tf.matmul(input, W), b) - + # h(x) = (input * weights) + bias output = tf.nn.xw_plus_b(input, W, b) # visualizarlos en TensorBoard. tf.summary.histogram("weights", W) @@ -223,7 +240,8 @@ def activation_function(layer, act, name, alpha=tf.constant(0.2, dtype=tf.float6 # Se calcula la salida de la capa. -fc1 = fully_connected_layer(X, NODES_INPUT, NODES_H1, "fc1") +fc1 = fully_connected_layer(X, NODES_INPUT, NODES_H1, "fc1", + xavier_init=XAVIER_INIT) # Activación de la capa. fc1 = activation_function(fc1, "relu", "fc1") @@ -255,7 +273,8 @@ def activation_function(layer, act, name, alpha=tf.constant(0.2, dtype=tf.float6 # In[ ]: -output = fully_connected_layer(fc1, NODES_H1, NODES_OUPUT, "output") +output = fully_connected_layer(fc1, NODES_H1, NODES_OUPUT, "output", + xavier_init=XAVIER_INIT) nah_predicted = activation_function(output, "sigmoid", "output") tf.summary.histogram("output/sigmoid", output) @@ -336,8 +355,7 @@ def activation_function(layer, act, name, alpha=tf.constant(0.2, dtype=tf.float6 # In[ ]: -LOGPATH = utils.make_hparam_string( - "Adagrad", "H", NODES_H1, "LR", LEARNING_RATE) +LOGPATH = utils.make_hparam_string("80ACC_Adagrad", "H", NODES_H1, "LR", LEARNING_RATE) print("logpath:", LOGPATH) @@ -368,11 +386,13 @@ def activation_function(layer, act, name, alpha=tf.constant(0.2, dtype=tf.float6 # Se inicializan los valores de los tensores. init = tf.global_variables_initializer() +# Add ops to save and restore all the variables. +saver = tf.train.Saver() + # Ejecutando sesión sess.run(init) - # In[ ]: @@ -383,7 +403,7 @@ def activation_function(layer, act, name, alpha=tf.constant(0.2, dtype=tf.float6 min_learning_rate = 0.03 decay_speed = 5000 learning_rate = min_learning_rate + \ - (max_learning_rate - min_learning_rate) * exp(-i / decay_speed)""" + # (max_learning_rate - min_learning_rate) * exp(-i / decay_speed)""" # Se corre la sesión y se pasan como argumentos la función de error (loss), # el optimizador de backpropagation (train_op) y los histogramas (summaryMerged) @@ -400,6 +420,8 @@ def activation_function(layer, act, name, alpha=tf.constant(0.2, dtype=tf.float6 y: na_vectores}) print("Epoch:", i, "/", EPOCHS, "\tLoss:", _loss, "\tAccuracy:", train_accuracy) - +SAVE_PATH = "./models/Adagrad_H_305_LR_0.433.ckpt" +save_model = saver.save(sess, SAVE_PATH) +print("Model saved in file: %s", save_path) #print("\nAccuracy:", accuracy.eval(feed_dict=feed_dict(es_vectores, na_vectores))) writer.close()