From 8c493dd838e8edae229fcbf915902d012f7de766 Mon Sep 17 00:00:00 2001 From: Quentin Fortier Date: Sat, 18 Nov 2023 14:09:04 +0000 Subject: [PATCH 1/2] Add prog_dyn.ipynb --- files/dl/algo/prog_dyn/cours/prog_dyn.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/dl/algo/prog_dyn/cours/prog_dyn.ipynb b/files/dl/algo/prog_dyn/cours/prog_dyn.ipynb index 7a98136d..9cb41927 100644 --- a/files/dl/algo/prog_dyn/cours/prog_dyn.ipynb +++ b/files/dl/algo/prog_dyn/cours/prog_dyn.ipynb @@ -1 +1 @@ -{"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["# Programmation dynamique\n\n", "\n", "\n", ""]}, {"cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": ["def knapsack(c, w, v):\n", " \"\"\" \n", " Renvoie la valeur maximum que l'on peut mettre\n", " dans un sac \u00e0 dos de capacit\u00e9 c.\n", " Le i\u00e8me objet a pour poids w[i] et valeur v[i].\n", " \"\"\"\n", " n = len(w) # nombre d'objets\n", " dp = [[0]*(n + 1) for i in range(c + 1)]\n", " # dp[i][j] = valeur max dans un sac de capacit\u00e9 i\n", " # o\u00f9 j est le nombre d'objets autoris\u00e9s\n", " for i in range(1, c + 1):\n", " for j in range(1, n + 1):\n", " if w[j - 1] <= i:\n", " x = v[j - 1] + dp[i - w[j - 1]][j - 1]\n", " dp[i][j] = max(dp[i][j - 1], x)\n", " else:\n", " dp[i][j] = dp[i][j - 1]\n", " return dp[c][n]"]}, {"cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [{"data": {"text/plain": ["19"]}, "execution_count": 6, "metadata": {}, "output_type": "execute_result"}], "source": ["w = [2, 2, 2, 3, 5, 6, 8]\n", "v = [1, 1, 1, 7, 10, 10, 18]\n", "knapsack(10, w, v)"]}, {"cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [{"data": {"text/plain": ["19"]}, "execution_count": 38, "metadata": {}, "output_type": "execute_result"}], "source": ["def knapsack2(c, w, v):\n", " n = len(w)\n", " dp = [0]*(c + 1)\n", " for j in range(n):\n", " dp_ = dp[:] # copie de dp\n", " for i in range(c + 1):\n", " if w[j] <= i:\n", " dp[i] = max(dp_[i], v[j] + dp_[i - w[j]])\n", " return dp[-1]\n", "\n", "knapsack2(10, w, v)"]}, {"cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [{"ename": "NameError", "evalue": "name 'w' is not defined", "output_type": "error", "traceback": ["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn [2], line 13\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[39mreturn\u001b[39;00m dp[(i, j)]\n\u001b[1;32m 11\u001b[0m \u001b[39mreturn\u001b[39;00m aux(c, \u001b[39mlen\u001b[39m(w))\n\u001b[0;32m---> 13\u001b[0m knapsack_memo(\u001b[39m10\u001b[39m, w, v)\n", "\u001b[0;31mNameError\u001b[0m: name 'w' is not defined"]}], "source": ["def knapsack_memo(c, w, v):\n", " dp = {}\n", " def aux(i, j):\n", " if i == 0 or j == 0:\n", " return 0\n", " if (i, j) not in dp:\n", " dp[(i, j)] = aux(i, j - 1)\n", " if w[j - 1] <= i:\n", " dp[(i, j)] = max(dp[(i, j)], v[j - 1] + aux(i - w[j - 1], j - 1))\n", " return dp[(i, j)]\n", " return aux(c, len(w))\n", "\n", "knapsack_memo(10, w, v)"]}, {"cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [{"ename": "NameError", "evalue": "name 'w' is not defined", "output_type": "error", "traceback": ["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn [1], line 14\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[39mreturn\u001b[39;00m v\n\u001b[1;32m 12\u001b[0m \u001b[39mreturn\u001b[39;00m aux(c, \u001b[39mlen\u001b[39m(w))\n\u001b[0;32m---> 14\u001b[0m knapsack_memo2(\u001b[39m10\u001b[39m, w, v)\n", "\u001b[0;31mNameError\u001b[0m: name 'w' is not defined"]}], "source": ["from functools import cache\n", "\n", "def knapsack_memo2(c, w, v):\n", " @cache\n", " def aux(i, j):\n", " if i == 0 or j == 0:\n", " return 0\n", " v = aux(i, j - 1)\n", " if w[j - 1] <= i:\n", " v = max(v, v[j - 1] + aux(i - w[j - 1], j - 1))\n", " return v\n", " return aux(c, len(w))\n", "\n", "knapsack_memo2(10, w, v)"]}, {"cell_type": "markdown", "metadata": {}, "source": ["## Plus courts chemins\n", "\n", "$$M = \n", "\\begin{pmatrix}\n", "0 & 3 & 8 & \\infty & -4\\\\\n", "\\infty & 0 & \\infty & 1 & 7\\\\\n", "\\infty & 4 & 0 & \\infty & \\infty\\\\\n", "2 & \\infty & -5 & 0 & \\infty\\\\\n", "\\infty & \\infty & \\infty & 6 & 0\\\\\n", "\\end{pmatrix} \n", "$$"]}, {"cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [{"data": {"text/plain": ["[[0, 3, 8, inf, -4],\n", " [inf, 0, inf, 1, 7],\n", " [inf, 4, 0, inf, inf],\n", " [2, inf, -5, 0, inf],\n", " [inf, inf, inf, 6, 0]]"]}, "execution_count": 8, "metadata": {}, "output_type": "execute_result"}], "source": ["g = [\n", " [0, 3, 8, float(\"inf\"), -4],\n", " [float(\"inf\"), 0, float(\"inf\"), 1, 7],\n", " [float(\"inf\"), 4, 0, float(\"inf\"), float(\"inf\")],\n", " [2, float(\"inf\"), -5, 0, float(\"inf\")],\n", " [float(\"inf\"), float(\"inf\"), float(\"inf\"), 6, 0]\n", "]\n", "\n", "g"]}, {"cell_type": "markdown", "metadata": {}, "source": ["\n", "### Bellman-Ford"]}, {"cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": ["def bellman(g, r):\n", " n = len(g)\n", " dist = [float(\"inf\") for i in range(n)]\n", " dist[r] = 0\n", " for k in range(n - 2):\n", " for u in range(n):\n", " for v in range(len(g[u])):\n", " if g[u][v] != 0:\n", " dist[v] = min(dist[v], dist[u] + g[u][v])\n", " return dist"]}, {"cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [{"data": {"text/plain": ["[0, 1, -3, 2, -4]"]}, "execution_count": 10, "metadata": {}, "output_type": "execute_result"}], "source": ["bellman(g, 0)"]}, {"cell_type": "markdown", "metadata": {}, "source": ["### Floyd-Warshall"]}, {"cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [{"data": {"text/plain": ["[[0, 1, -3, 2, -4],\n", " [3, 0, -4, 1, -1],\n", " [7, 4, 0, 5, 3],\n", " [2, -1, -5, 0, -2],\n", " [8, 5, 1, 6, 0]]"]}, "execution_count": 12, "metadata": {}, "output_type": "execute_result"}], "source": ["import copy\n", "\n", "def floydwarshall(g):\n", " n = len(g)\n", " dist = copy.deepcopy(g)\n", " for k in range(n):\n", " for u in range(n):\n", " for v in range(n):\n", " dist[u][v] = min(dist[u][v], dist[u][k] + dist[k][v])\n", " return dist\n", "\n", "floydwarshall(g)"]}], "metadata": {"kernelspec": {"display_name": "Python 3.10.6 64-bit", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.6"}, "vscode": {"interpreter": {"hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"}}}, "nbformat": 4, "nbformat_minor": 2} \ No newline at end of file +{"cells": [{"cell_type": "markdown", "metadata": {}, "source": ["# Programmation dynamique\n\n", "\n", "\n", "\n", ""]}, {"cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": ["def knapsack(c, w, v):\n", " \"\"\" \n", " Renvoie la valeur maximum que l'on peut mettre\n", " dans un sac \u00e0 dos de capacit\u00e9 c.\n", " Le i\u00e8me objet a pour poids w[i] et valeur v[i].\n", " \"\"\"\n", " n = len(w) # nombre d'objets\n", " dp = [[0]*(n + 1) for i in range(c + 1)]\n", " # dp[i][j] = valeur max dans un sac de capacit\u00e9 i\n", " # o\u00f9 j est le nombre d'objets autoris\u00e9s\n", " for i in range(1, c + 1):\n", " for j in range(1, n + 1):\n", " if w[j - 1] <= i:\n", " x = v[j - 1] + dp[i - w[j - 1]][j - 1]\n", " dp[i][j] = max(dp[i][j - 1], x)\n", " else:\n", " dp[i][j] = dp[i][j - 1]\n", " return dp[c][n]"]}, {"cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [{"data": {"text/plain": ["19"]}, "execution_count": 6, "metadata": {}, "output_type": "execute_result"}], "source": ["w = [2, 2, 2, 3, 5, 6, 8]\n", "v = [1, 1, 1, 7, 10, 10, 18]\n", "knapsack(10, w, v)"]}, {"cell_type": "code", "execution_count": 38, "metadata": {}, "outputs": [{"data": {"text/plain": ["19"]}, "execution_count": 38, "metadata": {}, "output_type": "execute_result"}], "source": ["def knapsack2(c, w, v):\n", " n = len(w)\n", " dp = [0]*(c + 1)\n", " for j in range(n):\n", " dp_ = dp[:] # copie de dp\n", " for i in range(c + 1):\n", " if w[j] <= i:\n", " dp[i] = max(dp_[i], v[j] + dp_[i - w[j]])\n", " return dp[-1]\n", "\n", "knapsack2(10, w, v)"]}, {"cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [{"ename": "NameError", "evalue": "name 'w' is not defined", "output_type": "error", "traceback": ["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn [2], line 13\u001b[0m\n\u001b[1;32m 10\u001b[0m \u001b[39mreturn\u001b[39;00m dp[(i, j)]\n\u001b[1;32m 11\u001b[0m \u001b[39mreturn\u001b[39;00m aux(c, \u001b[39mlen\u001b[39m(w))\n\u001b[0;32m---> 13\u001b[0m knapsack_memo(\u001b[39m10\u001b[39m, w, v)\n", "\u001b[0;31mNameError\u001b[0m: name 'w' is not defined"]}], "source": ["def knapsack_memo(c, w, v):\n", " dp = {}\n", " def aux(i, j):\n", " if i == 0 or j == 0:\n", " return 0\n", " if (i, j) not in dp:\n", " dp[(i, j)] = aux(i, j - 1)\n", " if w[j - 1] <= i:\n", " dp[(i, j)] = max(dp[(i, j)], v[j - 1] + aux(i - w[j - 1], j - 1))\n", " return dp[(i, j)]\n", " return aux(c, len(w))\n", "\n", "knapsack_memo(10, w, v)"]}, {"cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [{"ename": "NameError", "evalue": "name 'w' is not defined", "output_type": "error", "traceback": ["\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn [1], line 14\u001b[0m\n\u001b[1;32m 11\u001b[0m \u001b[39mreturn\u001b[39;00m v\n\u001b[1;32m 12\u001b[0m \u001b[39mreturn\u001b[39;00m aux(c, \u001b[39mlen\u001b[39m(w))\n\u001b[0;32m---> 14\u001b[0m knapsack_memo2(\u001b[39m10\u001b[39m, w, v)\n", "\u001b[0;31mNameError\u001b[0m: name 'w' is not defined"]}], "source": ["from functools import cache\n", "\n", "def knapsack_memo2(c, w, v):\n", " @cache\n", " def aux(i, j):\n", " if i == 0 or j == 0:\n", " return 0\n", " v = aux(i, j - 1)\n", " if w[j - 1] <= i:\n", " v = max(v, v[j - 1] + aux(i - w[j - 1], j - 1))\n", " return v\n", " return aux(c, len(w))\n", "\n", "knapsack_memo2(10, w, v)"]}, {"cell_type": "markdown", "metadata": {}, "source": ["## Plus courts chemins\n", "\n", "$$M = \n", "\\begin{pmatrix}\n", "0 & 3 & 8 & \\infty & -4\\\\\n", "\\infty & 0 & \\infty & 1 & 7\\\\\n", "\\infty & 4 & 0 & \\infty & \\infty\\\\\n", "2 & \\infty & -5 & 0 & \\infty\\\\\n", "\\infty & \\infty & \\infty & 6 & 0\\\\\n", "\\end{pmatrix} \n", "$$"]}, {"cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [{"data": {"text/plain": ["[[0, 3, 8, inf, -4],\n", " [inf, 0, inf, 1, 7],\n", " [inf, 4, 0, inf, inf],\n", " [2, inf, -5, 0, inf],\n", " [inf, inf, inf, 6, 0]]"]}, "execution_count": 8, "metadata": {}, "output_type": "execute_result"}], "source": ["g = [\n", " [0, 3, 8, float(\"inf\"), -4],\n", " [float(\"inf\"), 0, float(\"inf\"), 1, 7],\n", " [float(\"inf\"), 4, 0, float(\"inf\"), float(\"inf\")],\n", " [2, float(\"inf\"), -5, 0, float(\"inf\")],\n", " [float(\"inf\"), float(\"inf\"), float(\"inf\"), 6, 0]\n", "]\n", "\n", "g"]}, {"cell_type": "markdown", "metadata": {}, "source": ["\n", "### Bellman-Ford"]}, {"cell_type": "code", "execution_count": 11, "metadata": {}, "outputs": [], "source": ["def bellman(g, r):\n", " n = len(g)\n", " dist = [float(\"inf\") for i in range(n)]\n", " dist[r] = 0\n", " for k in range(n - 2):\n", " for u in range(n):\n", " for v in range(len(g[u])):\n", " if g[u][v] != 0:\n", " dist[v] = min(dist[v], dist[u] + g[u][v])\n", " return dist"]}, {"cell_type": "code", "execution_count": 10, "metadata": {}, "outputs": [{"data": {"text/plain": ["[0, 1, -3, 2, -4]"]}, "execution_count": 10, "metadata": {}, "output_type": "execute_result"}], "source": ["bellman(g, 0)"]}, {"cell_type": "markdown", "metadata": {}, "source": ["### Floyd-Warshall"]}, {"cell_type": "code", "execution_count": 12, "metadata": {}, "outputs": [{"data": {"text/plain": ["[[0, 1, -3, 2, -4],\n", " [3, 0, -4, 1, -1],\n", " [7, 4, 0, 5, 3],\n", " [2, -1, -5, 0, -2],\n", " [8, 5, 1, 6, 0]]"]}, "execution_count": 12, "metadata": {}, "output_type": "execute_result"}], "source": ["import copy\n", "\n", "def floydwarshall(g):\n", " n = len(g)\n", " dist = copy.deepcopy(g)\n", " for k in range(n):\n", " for u in range(n):\n", " for v in range(n):\n", " dist[u][v] = min(dist[u][v], dist[u][k] + dist[k][v])\n", " return dist\n", "\n", "floydwarshall(g)"]}], "metadata": {"kernelspec": {"display_name": "Python 3.10.6 64-bit", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.10.6"}, "vscode": {"interpreter": {"hash": "916dbcbb3f70747c44a77c7bcd40155683ae19c65e1c03b4aa3499c5328201f1"}}}, "nbformat": 4, "nbformat_minor": 2} \ No newline at end of file From 697a26ea2197d4397dc750522f1c947d678ef0fe Mon Sep 17 00:00:00 2001 From: Quentin Fortier Date: Sat, 18 Nov 2023 14:09:09 +0000 Subject: [PATCH 2/2] Add knn.ipynb --- files/dl/apprentissage/cours/knn/knn.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/files/dl/apprentissage/cours/knn/knn.ipynb b/files/dl/apprentissage/cours/knn/knn.ipynb index b1f000ec..64325ac4 100644 --- a/files/dl/apprentissage/cours/knn/knn.ipynb +++ b/files/dl/apprentissage/cours/knn/knn.ipynb @@ -1 +1 @@ -{"cells": [{"attachments": {}, "cell_type": "markdown", "metadata": {}, "source": ["# Cours : Algorithme des k plus proches voisins\n\n", "\n", ""]}, {"attachments": {}, "cell_type": "markdown", "metadata": {}, "source": ["Nous allons classifier deux ensembles de points (rouge et bleu) issus de deux distributions gaussiennes (de param\u00e8tres diff\u00e9rents), avec la m\u00e9thode des $k$ plus proches voisins :"]}, {"cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [{"data": {"image/png": "", "text/plain": ["
"]}, "metadata": {}, "output_type": "display_data"}], "source": ["import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "def points(n):\n", " tX1 = np.random.multivariate_normal([4, 3], 5*np.eye(2), n) # points de classe 0\n", " tX2 = np.random.multivariate_normal([-2, -1], 5*np.eye(2), n) # points de classe 1\n", " X = np.vstack((tX1, tX2))\n", " y = np.hstack((np.zeros(n), np.ones(n)))\n", " return X, y\n", "\n", "X, y = points(100)\n", "plt.scatter(X[:,0], X[:,1], c=y, cmap=plt.cm.Spectral);"]}, {"attachments": {}, "cell_type": "markdown", "metadata": {}, "source": ["## Algorithme des $k$ plus proches voisins"]}, {"cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": ["def d(x, y):\n", " s = 0\n", " for i in range(len(x)):\n", " s += (x[i] - y[i])**2\n", " return s**.5"]}, {"cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": ["def voisins(x, X, k): \n", "\t# renvoie les k plus proches voisins de x dans X\n", "\tindices = sorted(range(len(X)), key=lambda i: d(x, X[i]))\n", "\treturn indices[:k]"]}, {"cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": ["def maj(L):\n", "\tcompte = {} # compte[e] = nombre d'occurrences de e dans L\n", "\tfor e in L:\n", "\t\tcompte[e] = compte.get(e, 0) + 1\n", "\treturn max(compte, key=compte.get)"]}, {"cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": ["def knn(x, X, Y, k):\n", "\tV = voisins(x, X, k)\n", "\treturn maj([Y[i] for i in V])"]}, {"cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": ["def separer(X, Y, p):\n", "\tX_train, X_test, Y_train, Y_test = [], [], [], []\n", "\tfor i in range(len(X)):\n", "\t\tif i <= p * len(X):\n", "\t\t\tX_train.append(X[i])\n", "\t\t\tY_train.append(Y[i])\n", "\t\telse:\n", "\t\t\tX_test.append(X[i])\n", "\t\t\tY_test.append(Y[i])\n", "\treturn X_train, X_test, Y_train, Y_test\n", "\n", "X_train, X_test, Y_train, Y_test = separer(X, y, 0.8)\n", "\n", "def predict(x, k):\n", "\treturn knn(x, X_train, Y_train, k)"]}, {"cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [{"data": {"text/plain": ["0.8974358974358975"]}, "execution_count": 7, "metadata": {}, "output_type": "execute_result"}], "source": ["def precision(k):\n", "\tn = len(X_test)\n", "\tp = 0\n", "\tfor i in range(n):\n", "\t\tif predict(X_test[i], k) == Y_test[i]:\n", "\t\t\tp += 1\n", "\treturn p/n\n", "\n", "precision(5)"]}, {"attachments": {}, "cell_type": "markdown", "metadata": {}, "source": ["## En utilisant sklearn\n", "\n", "`sklearn` donne le m\u00eame r\u00e9sultat, mais est beaucoup plus rapide (tester avec $n = 10000$ points, par exemple) :"]}, {"cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [{"data": {"text/plain": ["0.8974358974358975"]}, "execution_count": 8, "metadata": {}, "output_type": "execute_result"}], "source": ["from sklearn.neighbors import KNeighborsClassifier\n", "neigh = KNeighborsClassifier(n_neighbors=5)\n", "neigh.fit(X_train, Y_train)\n", "neigh.score(X_test, Y_test)"]}], "metadata": {"celltoolbar": "Tags", "kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.2 (default, Feb 28 2021, 17:03:44) \n[GCC 10.2.1 20210110]"}, "vscode": {"interpreter": {"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"}}}, "nbformat": 4, "nbformat_minor": 4} \ No newline at end of file +{"cells": [{"attachments": {}, "cell_type": "markdown", "metadata": {}, "source": ["# Cours : Algorithme des k plus proches voisins\n\n", "\n", "\n", ""]}, {"attachments": {}, "cell_type": "markdown", "metadata": {}, "source": ["Nous allons classifier deux ensembles de points (rouge et bleu) issus de deux distributions gaussiennes (de param\u00e8tres diff\u00e9rents), avec la m\u00e9thode des $k$ plus proches voisins :"]}, {"cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [{"data": {"image/png": "", "text/plain": ["
"]}, "metadata": {}, "output_type": "display_data"}], "source": ["import numpy as np\n", "import matplotlib.pyplot as plt\n", "\n", "def points(n):\n", " tX1 = np.random.multivariate_normal([4, 3], 5*np.eye(2), n) # points de classe 0\n", " tX2 = np.random.multivariate_normal([-2, -1], 5*np.eye(2), n) # points de classe 1\n", " X = np.vstack((tX1, tX2))\n", " y = np.hstack((np.zeros(n), np.ones(n)))\n", " return X, y\n", "\n", "X, y = points(100)\n", "plt.scatter(X[:,0], X[:,1], c=y, cmap=plt.cm.Spectral);"]}, {"attachments": {}, "cell_type": "markdown", "metadata": {}, "source": ["## Algorithme des $k$ plus proches voisins"]}, {"cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": ["def d(x, y):\n", " s = 0\n", " for i in range(len(x)):\n", " s += (x[i] - y[i])**2\n", " return s**.5"]}, {"cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": ["def voisins(x, X, k): \n", "\t# renvoie les k plus proches voisins de x dans X\n", "\tindices = sorted(range(len(X)), key=lambda i: d(x, X[i]))\n", "\treturn indices[:k]"]}, {"cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": ["def maj(L):\n", "\tcompte = {} # compte[e] = nombre d'occurrences de e dans L\n", "\tfor e in L:\n", "\t\tcompte[e] = compte.get(e, 0) + 1\n", "\treturn max(compte, key=compte.get)"]}, {"cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": ["def knn(x, X, Y, k):\n", "\tV = voisins(x, X, k)\n", "\treturn maj([Y[i] for i in V])"]}, {"cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [], "source": ["def separer(X, Y, p):\n", "\tX_train, X_test, Y_train, Y_test = [], [], [], []\n", "\tfor i in range(len(X)):\n", "\t\tif i <= p * len(X):\n", "\t\t\tX_train.append(X[i])\n", "\t\t\tY_train.append(Y[i])\n", "\t\telse:\n", "\t\t\tX_test.append(X[i])\n", "\t\t\tY_test.append(Y[i])\n", "\treturn X_train, X_test, Y_train, Y_test\n", "\n", "X_train, X_test, Y_train, Y_test = separer(X, y, 0.8)\n", "\n", "def predict(x, k):\n", "\treturn knn(x, X_train, Y_train, k)"]}, {"cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [{"data": {"text/plain": ["0.8974358974358975"]}, "execution_count": 7, "metadata": {}, "output_type": "execute_result"}], "source": ["def precision(k):\n", "\tn = len(X_test)\n", "\tp = 0\n", "\tfor i in range(n):\n", "\t\tif predict(X_test[i], k) == Y_test[i]:\n", "\t\t\tp += 1\n", "\treturn p/n\n", "\n", "precision(5)"]}, {"attachments": {}, "cell_type": "markdown", "metadata": {}, "source": ["## En utilisant sklearn\n", "\n", "`sklearn` donne le m\u00eame r\u00e9sultat, mais est beaucoup plus rapide (tester avec $n = 10000$ points, par exemple) :"]}, {"cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [{"data": {"text/plain": ["0.8974358974358975"]}, "execution_count": 8, "metadata": {}, "output_type": "execute_result"}], "source": ["from sklearn.neighbors import KNeighborsClassifier\n", "neigh = KNeighborsClassifier(n_neighbors=5)\n", "neigh.fit(X_train, Y_train)\n", "neigh.score(X_test, Y_test)"]}], "metadata": {"celltoolbar": "Tags", "kernelspec": {"display_name": "Python 3", "language": "python", "name": "python3"}, "language_info": {"codemirror_mode": {"name": "ipython", "version": 3}, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.2 (default, Feb 28 2021, 17:03:44) \n[GCC 10.2.1 20210110]"}, "vscode": {"interpreter": {"hash": "31f2aee4e71d21fbe5cf8b01ff0e069b9275f58929596ceb00d14d90e3e16cd6"}}}, "nbformat": 4, "nbformat_minor": 4} \ No newline at end of file