From 8abe2c79654e5ec773adf1180ab34000b0007258 Mon Sep 17 00:00:00 2001
From: Olivares <o.castillojl@gmail.com>
Date: Thu, 18 Jan 2018 22:17:52 -0600
Subject: [PATCH] updates ~80%acc

---
 prototypes/v0.ipynb | 206 ++++++++++++++++++++++----------------------
 prototypes/v0.py    |  52 +++++++----
 2 files changed, 139 insertions(+), 119 deletions(-)

diff --git a/prototypes/v0.ipynb b/prototypes/v0.ipynb
index 961ae4a..63feb67 100644
--- a/prototypes/v0.ipynb
+++ b/prototypes/v0.ipynb
@@ -23,7 +23,6 @@
    "source": [
     "# reset everything to rerun in jupyter\n",
     "tf.reset_default_graph()\n",
-    "\n",
     "print(\"TensorFlow v{}\".format(tf.__version__))"
    ]
   },
@@ -109,24 +108,6 @@
     "na_vectores = utils.get_vectors(na,index_na)"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "# Testing\n",
-    "La función get_top10_closest obtiene los 10 vectores más cercanos al vector de referencia."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "top_10 = utils.get_top10_closest(es_vectores[0],na_vectores[100:])\n",
-    "top_10"
-   ]
-  },
   {
    "cell_type": "markdown",
    "metadata": {},
@@ -142,25 +123,22 @@
    },
    "outputs": [],
    "source": [
-    "LEARNING_RATE = 0.5\n",
-    "\n",
-    "# Dimensión de vectores de entrada (número de neuronas en capa de entrada).\n",
-    "NODES_INPUT = es_vectores[0].size   \n",
-    "\n",
-    "# Número de neuronas en capas ocultas.\n",
-    "NODES_H1 = 70 - 20                       \n",
-    "NODES_H2 = 42 - 20\n",
-    "NODES_H3 = 70 - 20\n",
-    "\n",
-    "# (número de neuronas en capa de entrada).\n",
+    "LEARNING_RATE = 0.46\n",
+    "NODES_INPUT = es_vectores[0].size\n",
+    "NODES_H1 = 300\n",
+    "#NODES_H2 = 1\n",
+    "#NODES_H3 = 1\n",
     "NODES_OUPUT = na_vectores[0].size\n",
-    "\n",
-    "\n",
     "EPOCHS = 100000\n",
     "\n",
+    "# Inicializar pesos con método xavier_init\n",
+    "XAVIER_INIT = False\n",
+    "\n",
     "# Ruta donde se guarda el grafo para visualizar en TensorBoard.\n",
-    "LOGPATH = utils.make_hparam_string(\"MSE\",\"RELU\",\"Adagrad\",\"H\",NODES_H1,\n",
-    "                                   NODES_H2,NODES_H3,\"LR\",LEARNING_RATE)"
+    "LOGPATH = utils.make_hparam_string(\"80ACC_Adagrad\", \"H\", NODES_H1, \"LR\", LEARNING_RATE)\n",
+    "\n",
+    "# Ruta para guardar el modelo generado.\n",
+    "SAVE_PATH = \"./models/Adagrad_H_305_LR_0.433.ckpt\""
    ]
   },
   {
@@ -197,21 +175,33 @@
    "metadata": {},
    "source": [
     "# Función para crear las capas de la red.\n",
+    "    \n",
+    "Arguments:\n",
+    "* input {Tensor} -- Tensor de entrada a la capa.\n",
+    "* size_in {int}, size_out {int} -- Dimensiones de entrada y salida de la capa.\n",
+    "* name {str} -- Nombre de la capa. Default: fc.\n",
+    "Keyword Arguments:\n",
+    "* xavier_init {bool} -- Inicializar pesos empleando el método Xavier.\n",
+    "* stddev {float} -- Desviación estándar con la que se inicializan los pesos de la capa. (default: {0})\n",
+    "* dtype {function} -- Floating-point representation. (default: {tf.float64})\n",
     "\n",
+    "Returns:\n",
+    "* Tensor -- Salida de la capa: (input * Weights) + bias\n",
     "\n",
-    "Función para crear capas.\n",
-    "\n",
-    "Args:\n",
-    "* input (Tensor): Tensor de entrada a la capa.\n",
-    "* size_in, size_out (int): Dimensiones de entrada y salida de la capa.\n",
-    "* name (str): Nombre de la capa. Default: fc.\n",
-    "* stddev (float): Desviación estándar con la que se inicializan los pesos de la capa.\n",
-    "* dtype: Floating-point representation.\n",
+    "# Inicialización de pesos.\n",
+    "Si la bandera `XAVIER_INIT` es `True` se emplea el método Xavier, en caso contrario los pesos se inicializan con valores siguiendo una distribución normal.\n",
+    "## Xavier Initialization\n",
+    ">This initializer is designed to keep the scale of the gradients roughly the same in all layers. In uniform distribution this ends up being the \n",
+    "range: x = sqrt(6. / (in + out)); [-x, x] and for normal distribution a standard deviation of sqrt(2. / (in + out)) is used.\n",
+    "[Xavier Glorot and Yoshua Bengio (2010)](http://www.jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf)\n",
     "\n",
-    "Returns:\n",
-    "* act (Tensor): $(input * weights) + bias $\n",
+    ">Why’s Xavier initialization important?\n",
+    "In short, it helps signals reach deep into the network.\n",
+    "* If the weights in a network start too small, then the signal shrinks as it passes through each layer until it’s too tiny to be useful.\n",
+    "* If the weights in a network start too large, then the signal grows as it passes through each layer until it’s too massive to be useful.\n",
     "\n",
-    "\n"
+    ">Xavier initialization makes sure the weights are ‘just right’, keeping the signal in a reasonable range of values through many layers.\n",
+    "[Ver liga](http://andyljones.tumblr.com/post/110998971763/an-explanation-of-xavier-initialization)"
    ]
   },
   {
@@ -222,25 +212,26 @@
    },
    "outputs": [],
    "source": [
-    "def fully_connected_layer(input, size_in, size_out, name, stddev=0.1,\n",
-    "                          dtype = tf.float64):\n",
+    "def fully_connected_layer(input, size_in, size_out, name,xavier_init=True, stddev=0.1, dtype=tf.float64):\n",
     "    with tf.name_scope(name):\n",
-    "        # Tensor de pesos.\n",
-    "        W = tf.Variable(tf.truncated_normal([size_in, size_out], stddev=stddev,\n",
-    "                                            dtype=dtype), name=\"W\")\n",
-    "        # Bias.\n",
-    "        b = tf.Variable(tf.constant(0.1, shape=[size_out], dtype = dtype), name=\"b\")\n",
-    "        \n",
-    "        # Realiza la operación input * + b (tf.nn.xw_plus_b)\n",
-    "        act = tf.add(tf.matmul(input,W), b)\n",
+    "        # Inicializar pesos\n",
+    "        if xavier_init:\n",
+    "            W = tf.get_variable(name=\"W_\" + name, shape=[size_in, size_out], dtype=dtype,\n",
+    "                                initializer=tf.contrib.layers.xavier_initializer(dtype=dtype),\n",
+    "                                use_resource=True)\n",
+    "        else:\n",
+    "            W = tf.Variable(tf.truncated_normal([size_in, size_out], stddev=stddev,dtype=dtype), name=\"W\")\n",
+    "        # Bias\n",
+    "        b = tf.Variable(tf.constant(0.1, shape=[size_out], dtype=dtype), name=\"b\")\n",
+    "\n",
+    "        # h(x) = (input * weights) + bias\n",
+    "        output = tf.nn.xw_plus_b(input, W, b)\n",
     "        \n",
-    "        # Se generan histogramas de los pesos y la salida de la capa para poder\n",
-    "        # visualizarlos en TensorBoard.\n",
+    "        # visualizarlos pesos en TensorBoard.\n",
     "        tf.summary.histogram(\"weights\", W)\n",
-    "        #tf.summary.histogram(\"biases\", b)\n",
-    "        tf.summary.histogram(\"activations\", act)\n",
-    "        \n",
-    "        return act"
+    "        tf.summary.histogram(\"xw_plus_b\", output)\n",
+    "\n",
+    "        return output"
    ]
   },
   {
@@ -248,16 +239,20 @@
    "metadata": {},
    "source": [
     "# Activación de capas.\n",
-    "Función para activar la salida de las capas.\n",
+    "Esta función aplica la activación a la capa neuronal.\n",
     "\n",
-    "Args:\n",
-    "* layer (Tensor): Capa que será activada.\n",
-    "* name (string): Nombre de la capa para mostrar en `TensorBoard`.\n",
-    "* act (string): Función de activación. Default: [ReLU](https://www.tensorflow.org/api_docs/python/tf/nn/relu). También se pueden utilizar [Leaky ReLU](https://www.tensorflow.org/api_docs/python/tf/nn/leaky_relu) con un parámetro `alpha = 0.2` por defecto y [Softmax](https://www.tensorflow.org/api_docs/python/tf/nn/softmax) para la capa de salida.\n",
+    "Arguments:\n",
+    "* layer {Tensor} -- Capa a activar.\n",
+    "* act {tf.function} -- Función de activación (default: {tf.nn.relu}).\n",
+    "* name {str} -- Nombre para visualización de activación en TensorBoard.\n",
+    "\n",
+    "Keyword Arguments:\n",
+    "* alpha {tf.constant} -- Constante que se usa como argumento para leaky_relu (default: {tf.constant(0.2)})\n",
+    "* dtype {tf.function} -- Floating-point representation. (default: {tf.float64})\n",
     "\n",
     "Returns:\n",
-    "    Capa con función de activación aplicada.\n",
-    "    \n",
+    "* Tensor -- Capa con función de activación aplicada.\n",
+    "\n",
     "**NOTA:**\n",
     ">3.4 Why do we use a leaky ReLU and not a ReLU as an activation function?\n",
     "We want gradients to flow while we backpropagate through the network. \n",
@@ -279,15 +274,17 @@
    },
    "outputs": [],
    "source": [
-    "def activation_function(layer, act, name, alpha = tf.constant(0.2, dtype=tf.float64)):\n",
+    "def activation_function(layer, act, name, alpha=tf.constant(0.2, dtype=tf.float64)):\n",
+    "    # Funciones de activación.\n",
     "    if act == \"leaky_relu\":\n",
-    "        #print(\"leaky_relu\")\n",
-    "        return tf.nn.leaky_relu(layer, alpha, name = name)\n",
+    "        return tf.nn.leaky_relu(layer, alpha, name=name)\n",
     "    elif act == \"softmax\":\n",
-    "        #print(\"softmax\")\n",
-    "        return tf.nn.softmax(layer, name = name)\n",
-    "    #print(\"relu\")\n",
-    "    return tf.nn.relu(layer, name = name)"
+    "        return tf.nn.softmax(layer, name=name)\n",
+    "    elif act == \"sigmoid\":\n",
+    "        return tf.nn.sigmoid(layer, name=name)\n",
+    "    elif act == \"tanh\":\n",
+    "        return tf.nn.tanh(layer, name=name)\n",
+    "    return tf.nn.relu(layer, name=name)"
    ]
   },
   {
@@ -304,35 +301,27 @@
    "outputs": [],
    "source": [
     "# Se calcula la salida de la capa.\n",
-    "fc1 = fully_connected_layer(X,NODES_INPUT,NODES_H1,\"fc1\")\n",
+    "fc1 = fully_connected_layer(X, NODES_INPUT, NODES_H1, \"fc1\",xavier_init=XAVIER_INIT)\n",
     "\n",
     "# Activación de la capa.\n",
     "fc1 = activation_function(fc1, \"relu\", \"fc1\")\n",
     "\n",
-    "# Se añade histograma de activación de la capa para visualizar en\n",
-    "# TensorBoard.\n",
+    "# Se añade histograma de activación de la capa para visualizar en TensorBoard.\n",
     "tf.summary.histogram(\"fc1/relu\", fc1)"
    ]
   },
   {
-   "cell_type": "code",
-   "execution_count": null,
+   "cell_type": "raw",
    "metadata": {},
-   "outputs": [],
    "source": [
-    "fc2 = fully_connected_layer(fc1,NODES_H1,NODES_H2,\"fc2\")\n",
-    "fc2 = activation_function(fc2,\"relu\",\"fc2\")\n",
-    "tf.summary.histogram(\"fc2/relu\", fc2)"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "fc3 = fully_connected_layer(fc2,NODES_H2,NODES_H3,\"fc3\")\n",
-    "fc3 = activation_function(fc3,\"relu\",\"fc3\")\n",
+    "fc2 = fully_connected_layer(fc1, NODES_H1, NODES_H2, \"fc2\")\n",
+    "fc2 = activation_function(fc2, \"relu\", \"fc2\")\n",
+    "tf.summary.histogram(\"fc2/relu\", fc2)\n",
+    "#fc2 = tf.nn.dropout(fc2, pkeep)\n",
+    "# In[ ]:\n",
+    "#fc2 = tf.nn.dropout(fc2, pkeep)\n",
+    "fc3 = fully_connected_layer(fc2, NODES_H2, NODES_H3, \"fc3\")\n",
+    "fc3 = activation_function(fc3, \"relu\", \"fc3\")\n",
     "tf.summary.histogram(\"fc2/relu\", fc3)"
    ]
   },
@@ -342,9 +331,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "output = fully_connected_layer(fc3, NODES_H3, NODES_OUPUT,\"output\")\n",
-    "nah_predicted = activation_function(output, \"softmax\",\"output\")\n",
-    "tf.summary.histogram(\"output/softmax\", output)"
+    "output = fully_connected_layer(fc1, NODES_H1, NODES_OUPUT, \"output\",xavier_init=XAVIER_INIT)\n",
+    "nah_predicted = activation_function(output, \"sigmoid\", \"output\")\n",
+    "tf.summary.histogram(\"output/sigmoid\", output)"
    ]
   },
   {
@@ -484,6 +473,9 @@
     "# Se inicializan los valores de los tensores.\n",
     "init = tf.global_variables_initializer()\n",
     "\n",
+    "# Add ops to save and restore all the variables.\n",
+    "saver = tf.train.Saver()\n",
+    "\n",
     "# Ejecutando sesión\n",
     "sess.run(init)"
    ]
@@ -496,7 +488,7 @@
    },
    "outputs": [],
    "source": [
-    "def feed_dict(*placeholders, memUsage = False):        \n",
+    "def feed_dict(*placeholders):\n",
     "    return {X: placeholders[0],\n",
     "            y: placeholders[1]}"
    ]
@@ -508,18 +500,24 @@
    "outputs": [],
    "source": [
     "for i in range(EPOCHS):\n",
-    "    \n",
+    "    # learning rate decay\n",
+    "    # https://github.com/martin-gorner/tensorflow-mnist-tutorial/blob/master/mnist_2.1_five_layers_relu_lrdecay.py\n",
     "    # Se corre la sesión y se pasan como argumentos la función de error (loss),\n",
     "    # el optimizador de backpropagation (train_op) y los histogramas (summaryMerged)\n",
-    "    _loss, _, sumOut = sess.run([loss, train_op, summaryMerged],\n",
-    "                                feed_dict=feed_dict(es_vectores,na_vectores))\n",
+    "\n",
+    "    _loss, _, sumOut = sess.run([loss, train_op, summaryMerged],feed_dict={X: es_vectores,y: na_vectores})\n",
     "    # Actualiza los histogramas.\n",
     "    writer.add_summary(sumOut, i)\n",
-    "    \n",
+    "\n",
     "    # Muestra el valor del error cada 500 pasos de entrenamiento.\n",
     "    if (i % 500) == 0:\n",
-    "        print(\"Epoch:\",i,\"/\",EPOCHS, \"\\tLoss:\", _loss)\n",
-    "\n",
+    "        train_accuracy = accuracy.eval(session=sess, feed_dict={X: es_vectores,y: na_vectores})\n",
+    "        print(\"Epoch:\", i, \"/\", EPOCHS, \"\\tLoss:\",_loss, \"\\tAccuracy:\", train_accuracy)\n",
+    "        \n",
+    "SAVE_PATH = \"./models/Adagrad_H_305_LR_0.433.ckpt\"\n",
+    "save_model = saver.save(sess, SAVE_PATH)\n",
+    "print(\"Model saved in file: %s\", save_path)\n",
+    "    #print(\"\\nAccuracy:\", accuracy.eval(feed_dict=feed_dict(es_vectores, na_vectores)))\n",
     "writer.close()\n"
    ]
   },
diff --git a/prototypes/v0.py b/prototypes/v0.py
index 3331347..81033ff 100644
--- a/prototypes/v0.py
+++ b/prototypes/v0.py
@@ -5,7 +5,7 @@
 
 import tensorflow as tf
 import utils
-from math import exp
+# from math import exp
 __author__ = "Olivares Castillo José Luis"
 
 # reset everything to rerun in jupyter
@@ -69,8 +69,10 @@
 NODES_H2 = 79  # 42 - 20
 NODES_OUPUT = na_vectores[0].size
 
+# Inicializar pesos usando xavier_init
+XAVIER_INIT = False
 
-EPOCHS = 350000
+EPOCHS = 370000
 
 # Ruta donde se guarda el grafo para visualizar en TensorBoard.
 
@@ -125,7 +127,8 @@
 # In[ ]:
 
 
-def fully_connected_layer(input, size_in, size_out, name, stddev=0.1, dtype=tf.float64):
+def fully_connected_layer(input, size_in, size_out, name,
+                          xavier_init=True, stddev=0.1, dtype=tf.float64):
     """Función para crear capas.
     
     Arguments:
@@ -133,6 +136,8 @@ def fully_connected_layer(input, size_in, size_out, name, stddev=0.1, dtype=tf.f
         size_in {int}, size_out {int} -- Dimensiones de entrada y salida de la capa.
         name {str} -- Nombre de la capa. Default: fc.
     Keyword Arguments:
+        xavier_init {bool} -- Inicializar pesos usando el método Xavier.
+                              Xavier Glorot and Yoshua Bengio (2010)
         stddev {float} -- Desviación estándar con la que se inicializan los pesos de la capa. (default: {0})
         dtype {function} -- Floating-point representation. (default: {tf.float64})
     
@@ -142,15 +147,27 @@ def fully_connected_layer(input, size_in, size_out, name, stddev=0.1, dtype=tf.f
 
     with tf.name_scope(name):
         # Tensor de pesos.
-        W = tf.Variable(tf.truncated_normal([size_in, size_out], stddev=stddev,
-                                            dtype=dtype), name="W")
+        """This initializer is designed to keep the scale of the gradients 
+        roughly the same in all layers. In uniform distribution this ends up 
+        being the range: x = sqrt(6. / (in + out)); [-x, x] and for normal 
+        distribution a standard deviation of sqrt(2. / (in + out)) is used.
+        Xavier Glorot and Yoshua Bengio (2010)
+        http://www.jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf
+        """
+
+        if xavier_init:
+            W = tf.get_variable(name="W" + name, shape=[size_in, size_out], dtype=dtype,
+                                initializer=tf.contrib.layers.xavier_initializer(
+                                    dtype=dtype),
+                                use_resource=True)
+        else:
+            W = tf.Variable(tf.truncated_normal([size_in, size_out], stddev=stddev,
+                                                dtype=dtype), name="W")
         # Bias.
         b = tf.Variable(tf.constant(
             0.1, shape=[size_out], dtype=dtype), name="b")
 
-        # Realiza la operación input * + b (tf.nn.xw_plus_b)
-        #output = tf.add(tf.matmul(input, W), b)
-
+        # h(x) = (input * weights) + bias
         output = tf.nn.xw_plus_b(input, W, b)
         # visualizarlos en TensorBoard.
         tf.summary.histogram("weights", W)
@@ -223,7 +240,8 @@ def activation_function(layer, act, name, alpha=tf.constant(0.2, dtype=tf.float6
 
 
 # Se calcula la salida de la capa.
-fc1 = fully_connected_layer(X, NODES_INPUT, NODES_H1, "fc1")
+fc1 = fully_connected_layer(X, NODES_INPUT, NODES_H1, "fc1",
+                            xavier_init=XAVIER_INIT)
 
 # Activación de la capa.
 fc1 = activation_function(fc1, "relu", "fc1")
@@ -255,7 +273,8 @@ def activation_function(layer, act, name, alpha=tf.constant(0.2, dtype=tf.float6
 # In[ ]:
 
 
-output = fully_connected_layer(fc1, NODES_H1, NODES_OUPUT, "output")
+output = fully_connected_layer(fc1, NODES_H1, NODES_OUPUT, "output",
+                               xavier_init=XAVIER_INIT)
 nah_predicted = activation_function(output, "sigmoid", "output")
 tf.summary.histogram("output/sigmoid", output)
 
@@ -336,8 +355,7 @@ def activation_function(layer, act, name, alpha=tf.constant(0.2, dtype=tf.float6
 
 # In[ ]:
 
-LOGPATH = utils.make_hparam_string(
-    "Adagrad", "H", NODES_H1, "LR", LEARNING_RATE)
+LOGPATH = utils.make_hparam_string("80ACC_Adagrad", "H", NODES_H1, "LR", LEARNING_RATE)
 print("logpath:", LOGPATH)
 
 
@@ -368,11 +386,13 @@ def activation_function(layer, act, name, alpha=tf.constant(0.2, dtype=tf.float6
 # Se inicializan los valores de los tensores.
 init = tf.global_variables_initializer()
 
+# Add ops to save and restore all the variables.
+saver = tf.train.Saver()
+
 # Ejecutando sesión
 sess.run(init)
 
 
-
 # In[ ]:
 
 
@@ -383,7 +403,7 @@ def activation_function(layer, act, name, alpha=tf.constant(0.2, dtype=tf.float6
     min_learning_rate = 0.03
     decay_speed = 5000
     learning_rate = min_learning_rate + \
-        (max_learning_rate - min_learning_rate) * exp(-i / decay_speed)"""
+        # (max_learning_rate - min_learning_rate) * exp(-i / decay_speed)"""
 
     # Se corre la sesión y se pasan como argumentos la función de error (loss),
     # el optimizador de backpropagation (train_op) y los histogramas (summaryMerged)
@@ -400,6 +420,8 @@ def activation_function(layer, act, name, alpha=tf.constant(0.2, dtype=tf.float6
                                                                 y: na_vectores})
         print("Epoch:", i, "/", EPOCHS, "\tLoss:",
               _loss, "\tAccuracy:", train_accuracy)
-
+SAVE_PATH = "./models/Adagrad_H_305_LR_0.433.ckpt"
+save_model = saver.save(sess, SAVE_PATH)
+print("Model saved in file: %s", save_path)
     #print("\nAccuracy:", accuracy.eval(feed_dict=feed_dict(es_vectores, na_vectores)))
 writer.close()