Skip to content

Commit

Permalink
Add files via upload
Browse files Browse the repository at this point in the history
  • Loading branch information
arpita8 authored Sep 21, 2021
0 parents commit 9139bb8
Show file tree
Hide file tree
Showing 2 changed files with 557 additions and 0 deletions.
396 changes: 396 additions & 0 deletions RL-1.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,396 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"import gym"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+---------+\n",
"|R: | : :\u001b[35mG\u001b[0m|\n",
"| : | : : |\n",
"| : : : : |\n",
"| | : | : |\n",
"|Y| : |\u001b[34;1mB\u001b[0m:\u001b[43m \u001b[0m|\n",
"+---------+\n",
"\n"
]
}
],
"source": [
"env = gym.make(\"Taxi-v3\").env\n",
"env.render()"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"7"
]
},
"execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"env.reset()"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+---------+\n",
"|\u001b[43mR\u001b[0m: | : :\u001b[34;1mG\u001b[0m|\n",
"| : | : : |\n",
"| : : : : |\n",
"| | : | : |\n",
"|Y| : |\u001b[35mB\u001b[0m: |\n",
"+---------+\n",
"\n"
]
}
],
"source": [
"env.render()"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"+---------+\n",
"|R: | : :\u001b[35mG\u001b[0m|\n",
"| : |\u001b[43m \u001b[0m: : |\n",
"| : : : : |\n",
"| | : | : |\n",
"|\u001b[34;1mY\u001b[0m| : |B: |\n",
"+---------+\n",
"\n",
"Action SpaceDiscrete(6)\n",
"Action SpaceDiscrete(500)\n"
]
}
],
"source": [
"env.reset()\n",
"env.render()\n",
"print(\"Action Space{}\".format(env.action_space))\n",
"print(\"Action Space{}\".format(env.observation_space))"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"State : 328\n",
"+---------+\n",
"|R: | : :\u001b[35mG\u001b[0m|\n",
"| : |\u001b[43m \u001b[0m: : |\n",
"| : : : : |\n",
"| | : | : |\n",
"|\u001b[34;1mY\u001b[0m| : |B: |\n",
"+---------+\n",
"\n"
]
}
],
"source": [
"state = env.encode(3,1,2,0)\n",
"print(\"State :\", state)\n",
"env.render()"
]
},
{
"cell_type": "code",
"execution_count": 13,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{0: [(1.0, 428, -1, False)],\n",
" 1: [(1.0, 228, -1, False)],\n",
" 2: [(1.0, 348, -1, False)],\n",
" 3: [(1.0, 328, -1, False)],\n",
" 4: [(1.0, 328, -10, False)],\n",
" 5: [(1.0, 328, -10, False)]}"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"env.P[328]"
]
},
{
"cell_type": "code",
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Timesteps taken: 1079\n",
"Penalties incurred: 354\n"
]
}
],
"source": [
"env.s = 328\n",
"epochs = 0\n",
"penalties, reward = 0,0\n",
"frames= []\n",
"done = False\n",
"while not done:\n",
" action = env.action_space.sample()\n",
" state, reward, done, info = env.step(action)\n",
" if reward == -10:\n",
" penalties += 1\n",
" \n",
" frames.append({ \n",
" 'frame': env.render(mode='ansi'),\n",
" 'state': state,\n",
" 'action': action,\n",
" 'reward': reward\n",
" }\n",
" )\n",
" epochs +=1\n",
"print(\"Timesteps taken: {}\".format(epochs))\n",
"print(\"Penalties incurred: {}\".format(penalties))\n",
" \n"
]
},
{
"cell_type": "code",
"execution_count": 25,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Timestep: 1079\n",
"State: 0\n",
"Action: 5\n",
"Reward: 20\n"
]
}
],
"source": [
"from IPython.display import clear_output\n",
"from time import sleep\n",
"\n",
"def print_frames(frames):\n",
" for i, frame in enumerate(frames):\n",
" clear_output(wait=True)\n",
"# print(frame['frame'].getvalue())\n",
" print(f\"Timestep: {i + 1}\")\n",
" print(f\"State: {frame['state']}\")\n",
" print(f\"Action: {frame['action']}\")\n",
" print(f\"Reward: {frame['reward']}\")\n",
" sleep(.1)\n",
" \n",
"print_frames(frames)"
]
},
{
"cell_type": "code",
"execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"q_table = np.zeros([env.observation_space.n, env.action_space.n])"
]
},
{
"cell_type": "code",
"execution_count": 29,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Episode: 100000\n",
"Training finished.\n",
"\n"
]
}
],
"source": [
"\"\"\" Training The Agent \"\"\"\n",
"import random\n",
"from IPython.display import clear_output\n",
"\n",
"alpha = 0.1\n",
"gamma = 0.6\n",
"epsilon = 0.1\n",
"all_epochs = []\n",
"all_penalties = []\n",
"for i in range(1, 100001):\n",
" state = env.reset()\n",
" epochs, penalties, reward = 0, 0, 0\n",
" done = False\n",
" \n",
" while not done:\n",
" if random.uniform(0,1) < epsilon:\n",
" action = env.action_space.sample() #Explore action space\n",
" else:\n",
" action = np.argmax(q_table[state]) #Exploit learned values\n",
" \n",
" next_state, reward, done, info = env.step(action) \n",
" \n",
" old_value = q_table[state, action]\n",
" next_max = np.max(q_table[next_state])\n",
" \n",
" new_value = (1 - alpha) * old_value + alpha * (reward + gamma * next_max)\n",
" q_table[state, action] = new_value\n",
"\n",
" if reward == -10:\n",
" penalties += 1\n",
"\n",
" state = next_state\n",
" epochs += 1\n",
" \n",
" if i % 100 == 0:\n",
" clear_output(wait=True)\n",
" print(f\"Episode: {i}\")\n",
"\n",
"print(\"Training finished.\\n\")\n",
"\n"
]
},
{
"cell_type": "code",
"execution_count": 30,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"array([ -2.40943541, -2.27325184, -2.41396927, -2.36299859,\n",
" -10.52639717, -10.68579624])"
]
},
"execution_count": 30,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"q_table[328]"
]
},
{
"cell_type": "code",
"execution_count": 32,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Results after 500 episodes:\n",
"Average timesteps per episode: 12.906\n",
"Average penalties per episode: 0.0\n"
]
}
],
"source": [
"\"\"\"Evaluate agent's performance after Q-learning\"\"\"\n",
"\n",
"total_epochs, total_penalties = 0, 0\n",
"episodes = 500\n",
"\n",
"for _ in range(episodes):\n",
" state = env.reset()\n",
" epochs, penalties, reward = 0, 0, 0\n",
" \n",
" done = False\n",
" \n",
" while not done:\n",
" action = np.argmax(q_table[state])\n",
" state, reward, done, info = env.step(action)\n",
"\n",
" if reward == -10:\n",
" penalties += 1\n",
"\n",
" epochs += 1\n",
"\n",
" total_penalties += penalties\n",
" total_epochs += epochs\n",
"\n",
"print(f\"Results after {episodes} episodes:\")\n",
"print(f\"Average timesteps per episode: {total_epochs / episodes}\")\n",
"print(f\"Average penalties per episode: {total_penalties / episodes}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
Loading

0 comments on commit 9139bb8

Please sign in to comment.