Add files via upload

arpita8 · Sep 21, 2021 · 9139bb8 · 9139bb8
commit 9139bb8
Show file tree

Hide file tree

Showing 2 changed files with 557 additions and 0 deletions.
diff --git a/RL-1.ipynb b/RL-1.ipynb
@@ -0,0 +1,396 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import gym"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+---------+\n",
+      "|R: | : :\u001b[35mG\u001b[0m|\n",
+      "| : | : : |\n",
+      "| : : : : |\n",
+      "| | : | : |\n",
+      "|Y| : |\u001b[34;1mB\u001b[0m:\u001b[43m \u001b[0m|\n",
+      "+---------+\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "env = gym.make(\"Taxi-v3\").env\n",
+    "env.render()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 5,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "7"
+      ]
+     },
+     "execution_count": 5,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "env.reset()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 7,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+---------+\n",
+      "|\u001b[43mR\u001b[0m: | : :\u001b[34;1mG\u001b[0m|\n",
+      "| : | : : |\n",
+      "| : : : : |\n",
+      "| | : | : |\n",
+      "|Y| : |\u001b[35mB\u001b[0m: |\n",
+      "+---------+\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "env.render()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 9,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "+---------+\n",
+      "|R: | : :\u001b[35mG\u001b[0m|\n",
+      "| : |\u001b[43m \u001b[0m: : |\n",
+      "| : : : : |\n",
+      "| | : | : |\n",
+      "|\u001b[34;1mY\u001b[0m| : |B: |\n",
+      "+---------+\n",
+      "\n",
+      "Action SpaceDiscrete(6)\n",
+      "Action SpaceDiscrete(500)\n"
+     ]
+    }
+   ],
+   "source": [
+    "env.reset()\n",
+    "env.render()\n",
+    "print(\"Action Space{}\".format(env.action_space))\n",
+    "print(\"Action Space{}\".format(env.observation_space))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "State : 328\n",
+      "+---------+\n",
+      "|R: | : :\u001b[35mG\u001b[0m|\n",
+      "| : |\u001b[43m \u001b[0m: : |\n",
+      "| : : : : |\n",
+      "| | : | : |\n",
+      "|\u001b[34;1mY\u001b[0m| : |B: |\n",
+      "+---------+\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "state = env.encode(3,1,2,0)\n",
+    "print(\"State :\", state)\n",
+    "env.render()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{0: [(1.0, 428, -1, False)],\n",
+       " 1: [(1.0, 228, -1, False)],\n",
+       " 2: [(1.0, 348, -1, False)],\n",
+       " 3: [(1.0, 328, -1, False)],\n",
+       " 4: [(1.0, 328, -10, False)],\n",
+       " 5: [(1.0, 328, -10, False)]}"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "env.P[328]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 17,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Timesteps taken: 1079\n",
+      "Penalties incurred: 354\n"
+     ]
+    }
+   ],
+   "source": [
+    "env.s = 328\n",
+    "epochs = 0\n",
+    "penalties, reward = 0,0\n",
+    "frames= []\n",
+    "done = False\n",
+    "while not done:\n",
+    "    action = env.action_space.sample()\n",
+    "    state, reward, done, info = env.step(action)\n",
+    "    if reward == -10:\n",
+    "        penalties += 1\n",
+    "        \n",
+    "    frames.append({   \n",
+    "    'frame': env.render(mode='ansi'),\n",
+    "        'state': state,\n",
+    "        'action': action,\n",
+    "        'reward': reward\n",
+    "    }\n",
+    "    )\n",
+    "    epochs +=1\n",
+    "print(\"Timesteps taken: {}\".format(epochs))\n",
+    "print(\"Penalties incurred: {}\".format(penalties))\n",
+    "    \n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 25,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Timestep: 1079\n",
+      "State: 0\n",
+      "Action: 5\n",
+      "Reward: 20\n"
+     ]
+    }
+   ],
+   "source": [
+    "from IPython.display import clear_output\n",
+    "from time import sleep\n",
+    "\n",
+    "def print_frames(frames):\n",
+    "    for i, frame in enumerate(frames):\n",
+    "        clear_output(wait=True)\n",
+    "#         print(frame['frame'].getvalue())\n",
+    "        print(f\"Timestep: {i + 1}\")\n",
+    "        print(f\"State: {frame['state']}\")\n",
+    "        print(f\"Action: {frame['action']}\")\n",
+    "        print(f\"Reward: {frame['reward']}\")\n",
+    "        sleep(.1)\n",
+    "        \n",
+    "print_frames(frames)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 27,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import numpy  as np\n",
+    "q_table = np.zeros([env.observation_space.n, env.action_space.n])"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 29,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Episode: 100000\n",
+      "Training finished.\n",
+      "\n"
+     ]
+    }
+   ],
+   "source": [
+    "\"\"\" Training The Agent \"\"\"\n",
+    "import random\n",
+    "from IPython.display import clear_output\n",
+    "\n",
+    "alpha = 0.1\n",
+    "gamma = 0.6\n",
+    "epsilon = 0.1\n",
+    "all_epochs = []\n",
+    "all_penalties = []\n",
+    "for i in range(1, 100001):\n",
+    "    state = env.reset()\n",
+    "    epochs, penalties, reward = 0, 0, 0\n",
+    "    done = False\n",
+    "    \n",
+    "    while not done:\n",
+    "        if random.uniform(0,1) < epsilon:\n",
+    "            action = env.action_space.sample()  #Explore action space\n",
+    "        else:\n",
+    "            action = np.argmax(q_table[state]) #Exploit learned values\n",
+    "        \n",
+    "        next_state, reward, done, info = env.step(action) \n",
+    "        \n",
+    "        old_value = q_table[state, action]\n",
+    "        next_max = np.max(q_table[next_state])\n",
+    "        \n",
+    "        new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)\n",
+    "        q_table[state, action] = new_value\n",
+    "\n",
+    "        if reward == -10:\n",
+    "            penalties += 1\n",
+    "\n",
+    "        state = next_state\n",
+    "        epochs += 1\n",
+    "        \n",
+    "    if i % 100 == 0:\n",
+    "        clear_output(wait=True)\n",
+    "        print(f\"Episode: {i}\")\n",
+    "\n",
+    "print(\"Training finished.\\n\")\n",
+    "\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 30,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "array([ -2.40943541,  -2.27325184,  -2.41396927,  -2.36299859,\n",
+       "       -10.52639717, -10.68579624])"
+      ]
+     },
+     "execution_count": 30,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "q_table[328]"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 32,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Results after 500 episodes:\n",
+      "Average timesteps per episode: 12.906\n",
+      "Average penalties per episode: 0.0\n"
+     ]
+    }
+   ],
+   "source": [
+    "\"\"\"Evaluate agent's performance after Q-learning\"\"\"\n",
+    "\n",
+    "total_epochs, total_penalties = 0, 0\n",
+    "episodes = 500\n",
+    "\n",
+    "for _ in range(episodes):\n",
+    "    state = env.reset()\n",
+    "    epochs, penalties, reward = 0, 0, 0\n",
+    "    \n",
+    "    done = False\n",
+    "    \n",
+    "    while not done:\n",
+    "        action = np.argmax(q_table[state])\n",
+    "        state, reward, done, info = env.step(action)\n",
+    "\n",
+    "        if reward == -10:\n",
+    "            penalties += 1\n",
+    "\n",
+    "        epochs += 1\n",
+    "\n",
+    "    total_penalties += penalties\n",
+    "    total_epochs += epochs\n",
+    "\n",
+    "print(f\"Results after {episodes} episodes:\")\n",
+    "print(f\"Average timesteps per episode: {total_epochs / episodes}\")\n",
+    "print(f\"Average penalties per episode: {total_penalties / episodes}\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.7.3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}