work in progress

hallvardnmbu · Feb 10, 2024 · 3130cca · 3130cca
1 parent 032e3bd
commit 3130cca
Show file tree

Hide file tree

Showing 9 changed files with 2,176 additions and 23 deletions.
diff --git a/README.txt → README.md b/README.txt → README.md
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -0,0 +1,17 @@
+[tool.poetry]
+name = "special-syllabus"
+version = "0.1.0"
+description = "Leo and Hallvard's Special Syllabus about RL and transformers"
+authors = ["Your Name <you@example.com>"]
+license = "n"
+readme = "README.md"
+packages = [{include = "reinforcement_learning", from = "src"}]
+
+[tool.poetry.dependencies]
+python = "^3.10"
+jupyter = "^1.0.0"
+
+
+[build-system]
+requires = ["poetry-core"]
+build-backend = "poetry.core.masonry.api"
diff --git a/src/reinforcement_learning/__init__.py b/src/reinforcement_learning/__init__.py
@@ -1 +1 @@
-# to make python treat this as package
+"Reinforcement learning package by Hallvard and Leo"
diff --git a/src/reinforcement_learning/agents/__init__.py b/src/reinforcement_learning/agents/__init__.py
@@ -1,8 +1,8 @@
 """Defines base and policy gradient agents for reinforcement learning applications."""
 
 
-from base_agent import Agent
-from policy_gradient_agent import PolicyGradientAgent
-from tabular_q_agent import TabularQAgent
+from .base_agent import Agent
+from .policy_gradient_agent import PolicyGradientAgent
+from .tabular_q_agent import TabularQAgent
 
 __all__ = ['Agent', 'PolicyGradientAgent', 'TabularQAgent']
diff --git a/src/reinforcement_learning/agents/value_deep_q_agent.py b/src/reinforcement_learning/agents/value_deep_q_agent.py
@@ -2,7 +2,7 @@
 import random
 import numpy as np
 import torch
-from base_agent import Agent
+from reinforcement_learning.agents.base_agent import Agent
 
 
 class ValueDeepQAgent(Agent):

diff --git a/src/reinforcement_learning/applications/policy_based_cartpole.ipynb b/src/reinforcement_learning/applications/policy_based_cartpole.ipynb
@@ -21,7 +21,19 @@
     },
     "collapsed": true
    },
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'agents'",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[1], line 7\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mgymnasium\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mgym\u001b[39;00m\n\u001b[1;32m      5\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpyplot\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mplt\u001b[39;00m\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01magents\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m PolicyGradientAgent\n",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'agents'"
+     ]
+    }
+   ],
    "source": [
     "import time\n",
     "import torch\n",
@@ -61,7 +73,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "id": "82fb48c186edca6",
    "metadata": {
     "ExecuteTime": {
@@ -87,7 +99,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "id": "2476ac8633268c19",
    "metadata": {
     "ExecuteTime": {
@@ -104,7 +116,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "id": "4efdbf0dbc4e327f",
    "metadata": {
     "ExecuteTime": {
@@ -121,7 +133,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "id": "b4abfe2d67240d1f",
    "metadata": {
     "ExecuteTime": {
@@ -239,7 +251,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "id": "510380cc5f3c5685",
    "metadata": {
     "ExecuteTime": {
@@ -301,7 +313,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": null,
    "id": "4183f64c47eb5090",
    "metadata": {
     "ExecuteTime": {
@@ -339,7 +351,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "id": "2f63ff4b67fe2c7",
    "metadata": {
     "ExecuteTime": {
@@ -363,14 +375,14 @@
   "language_info": {
    "codemirror_mode": {
     "name": "ipython",
-    "version": 2
+    "version": 3
    },
    "file_extension": ".py",
    "mimetype": "text/x-python",
    "name": "python",
    "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython2",
-   "version": "2.7.6"
+   "pygments_lexer": "ipython3",
+   "version": "3.10.4"
   }
  },
  "nbformat": 4,

diff --git a/src/reinforcement_learning/applications/tabular_q_frozen_lake.ipynb b/src/reinforcement_learning/applications/tabular_q_frozen_lake.ipynb
@@ -2,18 +2,18 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
     {
-     "ename": "ImportError",
-     "evalue": "attempted relative import with no known parent package",
+     "ename": "ModuleNotFoundError",
+     "evalue": "No module named 'src'",
      "output_type": "error",
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mImportError\u001b[0m                               Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[3], line 7\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mgymnasium\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mgym\u001b[39;00m\n\u001b[1;32m      5\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpyplot\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mplt\u001b[39;00m\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01magents\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TabularQAgent\n",
-      "\u001b[0;31mImportError\u001b[0m: attempted relative import with no known parent package"
+      "\u001b[0;31mModuleNotFoundError\u001b[0m                       Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[1], line 7\u001b[0m\n\u001b[1;32m      4\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mgymnasium\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mgym\u001b[39;00m\n\u001b[1;32m      5\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mmatplotlib\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mpyplot\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mplt\u001b[39;00m\n\u001b[0;32m----> 7\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01msrc\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01mreinforcement_learning\u001b[39;00m\u001b[38;5;21;01m.\u001b[39;00m\u001b[38;5;21;01magents\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TabularQAgent\n",
+      "\u001b[0;31mModuleNotFoundError\u001b[0m: No module named 'src'"
      ]
     }
    ],
@@ -24,7 +24,7 @@
     "import gymnasium as gym\n",
     "import matplotlib.pyplot as plt\n",
     "\n",
-    "from ..agents import TabularQAgent"
+    "from src.reinforcement_learning.agents import TabularQAgent"
    ]
   },
   {

diff --git a/src/reinforcement_learning/applications/tabular_q_frozen_lake.py b/src/reinforcement_learning/applications/tabular_q_frozen_lake.py
@@ -1 +1,3 @@
+import os
+print(os.getcwd())
 from reinforcement_learning.agents import TabularQAgent
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		# to make python treat this as package
		"Reinforcement learning package by Hallvard and Leo"