imad24
diff --git a/‎models/7SEF/7S_p2_clusters_Autumn.csv
Lines changed: 1047 additions & 0 deletions b/‎models/7SEF/7S_p2_clusters_Autumn.csv
Lines changed: 1047 additions & 0 deletions
diff --git a/‎models/7SEF/7S_p2_clusters_Autumn_v99.csv
Lines changed: 262 additions & 262 deletions b/‎models/7SEF/7S_p2_clusters_Autumn_v99.csv
Lines changed: 262 additions & 262 deletions
diff --git a/‎notebooks/7SEF/1.0-imad-automatic-script.ipynb
Lines changed: 4 additions & 4 deletions b/‎notebooks/7SEF/1.0-imad-automatic-script.ipynb
Lines changed: 4 additions & 4 deletions
diff --git a/‎notebooks/7SEF/Untitled.ipynb
Lines changed: 343 additions & 0 deletions b/‎notebooks/7SEF/Untitled.ipynb
Lines changed: 343 additions & 0 deletions
diff --git a/‎src/app.py
Lines changed: 5 additions & 3 deletions b/‎src/app.py
Lines changed: 5 additions & 3 deletions
@@ -214,7 +214,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 25,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -224,9 +224,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python (dev_py34)",
+   "display_name": "Python (dev_py36)",
    "language": "python",
-   "name": "dev_py34"
+   "name": "dev_py36"
   },
   "language_info": {
    "codemirror_mode": {
@@ -238,7 +238,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.4.5"
+   "version": "3.6.6"
   }
  },
  "nbformat": 4,
 
@@ -0,0 +1,343 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 52,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import math\n",
+    "import statsmodels.api as sm\n",
+    "from scipy.stats import chisquare\n",
+    "\n",
+    "\n",
+    "import settings\n",
+    "import itertools\n",
+    "from sklearn.preprocessing import (LabelBinarizer, LabelEncoder, MinMaxScaler,\n",
+    "                                   OneHotEncoder, StandardScaler, RobustScaler)\n",
+    "\n",
+    "\n",
+    "\n",
+    "\n",
+    "def get_encoders(le_name,ohe_name,scaler_name):\n",
+    "    le_encoder = np.load(settings.models_path + le_name + '.npy').item()\n",
+    "    ohe_encoder = np.load(settings.models_path + ohe_name + '.npy').item()\n",
+    "    scaler = np.load(settings.models_path + scaler_name + '.npy').item()\n",
+    "\n",
+    "    return le_encoder,ohe_encoder,scaler\n",
+    "\n",
+    "\n",
+    "def create_encoder(df, le_name = None, ohe_name = None, scaler_name=None, categorical_features=None, numeric_features=None):\n",
+    "    \"\"\"Creates and stores a categorical encoder of a given dataframe\n",
+    "    \n",
+    "    Arguments:\n",
+    "        df {Dataframe} -- The Pandas Dataframe to encode\n",
+    "    \n",
+    "    Keyword Arguments:\n",
+    "        categorical_features {list} -- The list of categorical features to consider (default: {None})\n",
+    "        numeric_features {list} -- The list of non categorical features to ignore (default: {None})\n",
+    "    \n",
+    "    Returns:\n",
+    "        tuple(dict,dict,OneHotEncoder) -- Return the encoders used in every columns as a dictionnary\n",
+    "    \"\"\"\n",
+    "\n",
+    "\n",
+    "    if (categorical_features is None):\n",
+    "        categorical_features = sorted(df.drop(numeric_features,axis=1).columns)\n",
+    "    le_dict = {}\n",
+    "    ohe_dict = {}\n",
+    "    scalers = {}\n",
+    "    for index, col in df[categorical_features].sort_index(axis=1).iteritems():\n",
+    "        if (numeric_features is not None) and (index in numeric_features):\n",
+    "            continue\n",
+    "        if index not in categorical_features:\n",
+    "            continue\n",
+    "        le = LabelEncoder().fit(col)\n",
+    "        le_dict[index] = le\n",
+    "        ohe = OneHotEncoder(categories=\"auto\").fit(le.transform(col).reshape((-1, 1)))\n",
+    "        ohe_dict[index] = ohe\n",
+    "\n",
+    "    labeled_df = df[categorical_features].sort_index(axis=1).apply(lambda x: le_dict[x.name].transform(x))\n",
+    "    ohe_encoder = OneHotEncoder(categories=\"auto\").fit(labeled_df)\n",
+    "\n",
+    "    # add numeric features\n",
+    "    if len(numeric_features)==0:\n",
+    "        numeric_features = (list(df.columns.to_series().groupby(df.dtypes).groups[np.dtype('float64')]))\n",
+    "    for f in numeric_features:\n",
+    "        values = df[[f]].values\n",
+    "        scaler = MinMaxScaler().fit(values)\n",
+    "        scalers[f] = scaler\n",
+    "\n",
+    "\n",
+    "    # if le_name is not None:\n",
+    "    #     np.save(settings.models_path + le_name + '.npy', le_dict)\n",
+    "    # if ohe_name is not None:\n",
+    "    #     np.save(settings.models_path + ohe_name + '.npy', ohe_encoder)\n",
+    "    # if scaler_name is not None:\n",
+    "    #     np.save(settings.models_path + scaler_name + '.npy', scalers)\n",
+    "    \n",
+    "    return labeled_df, le_dict, ohe_encoder, scalers, categorical_features, numeric_features\n",
+    "    \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 66,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<div>\n",
+       "<style scoped>\n",
+       "    .dataframe tbody tr th:only-of-type {\n",
+       "        vertical-align: middle;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe tbody tr th {\n",
+       "        vertical-align: top;\n",
+       "    }\n",
+       "\n",
+       "    .dataframe thead th {\n",
+       "        text-align: right;\n",
+       "    }\n",
+       "</style>\n",
+       "<table border=\"1\" class=\"dataframe\">\n",
+       "  <thead>\n",
+       "    <tr style=\"text-align: right;\">\n",
+       "      <th></th>\n",
+       "      <th>Color</th>\n",
+       "      <th>Size</th>\n",
+       "      <th>Ldate</th>\n",
+       "      <th>Age Group</th>\n",
+       "      <th>Person</th>\n",
+       "      <th>Pname</th>\n",
+       "      <th>Ptype</th>\n",
+       "      <th>Tprice</th>\n",
+       "      <th>Currency</th>\n",
+       "      <th>Sales Season</th>\n",
+       "      <th>s1</th>\n",
+       "      <th>s2</th>\n",
+       "      <th>s3</th>\n",
+       "      <th>s4</th>\n",
+       "      <th>s5</th>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>Product</th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "      <th></th>\n",
+       "    </tr>\n",
+       "  </thead>\n",
+       "  <tbody>\n",
+       "    <tr>\n",
+       "      <th>3E+101_2</th>\n",
+       "      <td>Blue</td>\n",
+       "      <td>Thick</td>\n",
+       "      <td>45</td>\n",
+       "      <td>4-6</td>\n",
+       "      <td>Girls</td>\n",
+       "      <td>One Internal Pants</td>\n",
+       "      <td>Thick</td>\n",
+       "      <td>39.0</td>\n",
+       "      <td>$</td>\n",
+       "      <td>Winter</td>\n",
+       "      <td>101.0</td>\n",
+       "      <td>261.0</td>\n",
+       "      <td>309.0</td>\n",
+       "      <td>297.0</td>\n",
+       "      <td>323.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3E+201_2</th>\n",
+       "      <td>Red</td>\n",
+       "      <td>Thick</td>\n",
+       "      <td>45</td>\n",
+       "      <td>4-6</td>\n",
+       "      <td>Girls</td>\n",
+       "      <td>One Internal Pants</td>\n",
+       "      <td>Thick</td>\n",
+       "      <td>39.0</td>\n",
+       "      <td>$</td>\n",
+       "      <td>Winter</td>\n",
+       "      <td>81.0</td>\n",
+       "      <td>266.0</td>\n",
+       "      <td>297.0</td>\n",
+       "      <td>270.0</td>\n",
+       "      <td>257.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>3E+301_2</th>\n",
+       "      <td>Blue</td>\n",
+       "      <td>Thick</td>\n",
+       "      <td>45</td>\n",
+       "      <td>4-6</td>\n",
+       "      <td>Girls</td>\n",
+       "      <td>One Internal Pants</td>\n",
+       "      <td>Thick</td>\n",
+       "      <td>39.0</td>\n",
+       "      <td>$</td>\n",
+       "      <td>Winter</td>\n",
+       "      <td>49.0</td>\n",
+       "      <td>179.0</td>\n",
+       "      <td>190.0</td>\n",
+       "      <td>192.0</td>\n",
+       "      <td>179.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30E000400_2</th>\n",
+       "      <td>Black</td>\n",
+       "      <td>Thick</td>\n",
+       "      <td>45</td>\n",
+       "      <td>4-6</td>\n",
+       "      <td>Girls</td>\n",
+       "      <td>One Internal Pants</td>\n",
+       "      <td>Thick</td>\n",
+       "      <td>39.0</td>\n",
+       "      <td>$</td>\n",
+       "      <td>Winter</td>\n",
+       "      <td>55.0</td>\n",
+       "      <td>222.0</td>\n",
+       "      <td>261.0</td>\n",
+       "      <td>275.0</td>\n",
+       "      <td>279.0</td>\n",
+       "    </tr>\n",
+       "    <tr>\n",
+       "      <th>30E823101_2</th>\n",
+       "      <td>Grey</td>\n",
+       "      <td>No Size</td>\n",
+       "      <td>39</td>\n",
+       "      <td>4-6</td>\n",
+       "      <td>Girls</td>\n",
+       "      <td>One Internal Pants</td>\n",
+       "      <td>Thick</td>\n",
+       "      <td>39.0</td>\n",
+       "      <td>$</td>\n",
+       "      <td>Winter</td>\n",
+       "      <td>3.0</td>\n",
+       "      <td>15.0</td>\n",
+       "      <td>18.0</td>\n",
+       "      <td>30.0</td>\n",
+       "      <td>30.0</td>\n",
+       "    </tr>\n",
+       "  </tbody>\n",
+       "</table>\n",
+       "</div>"
+      ],
+      "text/plain": [
+       "             Color     Size  Ldate Age Group Person               Pname  \\\n",
+       "Product                                                                   \n",
+       "3E+101_2      Blue    Thick     45       4-6  Girls  One Internal Pants   \n",
+       "3E+201_2       Red    Thick     45       4-6  Girls  One Internal Pants   \n",
+       "3E+301_2      Blue    Thick     45       4-6  Girls  One Internal Pants   \n",
+       "30E000400_2  Black    Thick     45       4-6  Girls  One Internal Pants   \n",
+       "30E823101_2   Grey  No Size     39       4-6  Girls  One Internal Pants   \n",
+       "\n",
+       "             Ptype  Tprice Currency Sales Season     s1     s2     s3     s4  \\\n",
+       "Product                                                                        \n",
+       "3E+101_2     Thick    39.0        $       Winter  101.0  261.0  309.0  297.0   \n",
+       "3E+201_2     Thick    39.0        $       Winter   81.0  266.0  297.0  270.0   \n",
+       "3E+301_2     Thick    39.0        $       Winter   49.0  179.0  190.0  192.0   \n",
+       "30E000400_2  Thick    39.0        $       Winter   55.0  222.0  261.0  275.0   \n",
+       "30E823101_2  Thick    39.0        $       Winter    3.0   15.0   18.0   30.0   \n",
+       "\n",
+       "                s5  \n",
+       "Product             \n",
+       "3E+101_2     323.0  \n",
+       "3E+201_2     257.0  \n",
+       "3E+301_2     179.0  \n",
+       "30E000400_2  279.0  \n",
+       "30E823101_2   30.0  "
+      ]
+     },
+     "execution_count": 66,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "from data.preprocessing import load_file\n",
+    "\n",
+    "df = load_file(\"clf_features\", type_=\"P\", index = [\"Product\"])\n",
+    "\n",
+    "categorical_features = [\"Color\",\"Size\",\"Age Group\",\"Ldate\",\"Person\",\"Pname\",\"Ptype\",\"Currency\",\"Sales Season\"]\n",
+    "numeric_features = [\"Tprice\",\"s1\",\"s2\",\"s3\",\"s4\",\"s5\"]\n",
+    "df.head()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 68,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "23.3 ms ± 3.11 ms per loop (mean ± std. dev. of 7 runs, 10 loops each)\n"
+     ]
+    }
+   ],
+   "source": [
+    "%timeit labeled_df, le_dict, ohe_encoder, scalers, categorical_features, num_features = create_encoder(df, numeric_features=numeric_features)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 76,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "numpy.ndarray"
+      ]
+     },
+     "execution_count": 76,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "t= np.zeros((1,1))\n",
+    "\n",
+    "type(t)"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python (dev_py36)",
+   "language": "python",
+   "name": "dev_py36"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.6"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
@@ -1,4 +1,5 @@
 from flask import Flask, jsonify
+from flask_api import status
 import random
 
 from data import import_data
@@ -7,9 +8,10 @@
 
 @app.route("/api/train")
 def train_model():
-    import_data.import_data() 
-    return jsonify("model training"), 404
-
+    if (import_data.import_data()): 
+        return jsonify("model training"), status.HTTP_200_OK
+    else:
+        return "The files are not ready to launch the training", status.HTTP_204_NO_CONTENT