From 702637874baae9981267f242f5e836be1c6ff66a Mon Sep 17 00:00:00 2001 From: Shubham Jha Date: Thu, 11 Jun 2020 20:19:24 +0530 Subject: [PATCH] Add basic training iteration --- requirements.txt | 2 +- rlkit/__init__.py | 4 -- rlkit/__main__.py | 23 ++++++++++++ rlkit/agents/__init__.py | 1 + rlkit/agents/random_agent.py | 9 +++++ rlkit/core/__init__.py | 3 ++ rlkit/core/base_action.py | 1 + rlkit/core/base_agent.py | 10 +++++ rlkit/core/base_environment.py | 16 ++++++++ rlkit/core/base_trainer.py | 8 ++-- rlkit/environments/gym_environment.py | 45 +++++++++++++++++++++++ rlkit/environments/vizdoom_environment.py | 14 +++++++ rlkit/trainers/__init__.py | 1 + rlkit/trainers/basic_trainer.py | 40 ++++++++++++++++++-- 14 files changed, 166 insertions(+), 11 deletions(-) create mode 100644 rlkit/agents/random_agent.py create mode 100644 rlkit/environments/vizdoom_environment.py diff --git a/requirements.txt b/requirements.txt index 660403b..96bf383 100755 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,3 @@ tensorflow==1.11.0 -gym==0.10.8 +gym==0.17.2 numpy==1.15.4 \ No newline at end of file diff --git a/rlkit/__init__.py b/rlkit/__init__.py index 1d5f453..e69de29 100755 --- a/rlkit/__init__.py +++ b/rlkit/__init__.py @@ -1,4 +0,0 @@ -from .algorithms.random_agent import RandomAgent -from .algorithms.dqn import DQN -from .algorithms.policy_gradients import REINFORCE -from .algorithms.agent import Agent \ No newline at end of file diff --git a/rlkit/__main__.py b/rlkit/__main__.py index e69de29..272df3f 100644 --- a/rlkit/__main__.py +++ b/rlkit/__main__.py @@ -0,0 +1,23 @@ +from rlkit.agents import RandomAgent +from rlkit.environments.gym_environment import GymEnvironment +from rlkit.trainers import BasicTrainer + +params = { + "environment_params": { + "env_name": "SpaceInvaders-v0", + }, + "agent_params": { + + }, + "training_params": { + "run_name": "test_run", + "train_interval": 10, + "episodes": 5, + "steps": 500, + }, +} + +env = GymEnvironment(params["environment_params"]) +agent = RandomAgent(params["agent_params"], env.get_action_space()) +trainer = BasicTrainer(params["training_params"], agent, env) +trainer.train() diff --git a/rlkit/agents/__init__.py b/rlkit/agents/__init__.py index e69de29..fe0737e 100644 --- a/rlkit/agents/__init__.py +++ b/rlkit/agents/__init__.py @@ -0,0 +1 @@ +from .random_agent import RandomAgent \ No newline at end of file diff --git a/rlkit/agents/random_agent.py b/rlkit/agents/random_agent.py new file mode 100644 index 0000000..615b1a9 --- /dev/null +++ b/rlkit/agents/random_agent.py @@ -0,0 +1,9 @@ +from rlkit.core.base_agent import BaseAgent + + +class RandomAgent(BaseAgent): + def __init__(self, params, action_space): + super(RandomAgent, self).__init__(params, action_space) + + def get_action(self, state): + return self.action_space.sample() \ No newline at end of file diff --git a/rlkit/core/__init__.py b/rlkit/core/__init__.py index e69de29..8c7f768 100644 --- a/rlkit/core/__init__.py +++ b/rlkit/core/__init__.py @@ -0,0 +1,3 @@ +from .base_agent import BaseAgent +from .base_environment import BaseEnvironment +from .base_trainer import BaseTrainer \ No newline at end of file diff --git a/rlkit/core/base_action.py b/rlkit/core/base_action.py index e69de29..11109cb 100644 --- a/rlkit/core/base_action.py +++ b/rlkit/core/base_action.py @@ -0,0 +1 @@ +class BaseAction \ No newline at end of file diff --git a/rlkit/core/base_agent.py b/rlkit/core/base_agent.py index e69de29..93c99cf 100644 --- a/rlkit/core/base_agent.py +++ b/rlkit/core/base_agent.py @@ -0,0 +1,10 @@ +class BaseAgent: + def __init__(self, params, action_space): + self.params = params + self.action_space = action_space + + def train(self): + pass + + def get_action(self, state): + pass \ No newline at end of file diff --git a/rlkit/core/base_environment.py b/rlkit/core/base_environment.py index e69de29..318519e 100644 --- a/rlkit/core/base_environment.py +++ b/rlkit/core/base_environment.py @@ -0,0 +1,16 @@ +class BaseEnvironment: + def __init__(self): + self.to_render = False + self.reset() + + def execute_action(self, action): + pass + + def reset(self): + pass + + def render(self): + pass + + def setRender(self, to_render): + self.to_render = to_render diff --git a/rlkit/core/base_trainer.py b/rlkit/core/base_trainer.py index 0046369..f88eaca 100644 --- a/rlkit/core/base_trainer.py +++ b/rlkit/core/base_trainer.py @@ -1,8 +1,10 @@ class BaseTrainer: - def __init__(self): - pass + def __init__(self, params): + self.global_step = 0 + self.episodes = params.get("episodes", 10); + self.steps = params.get("steps", 100) - def step(self): + def do_step(self): pass def train(self): diff --git a/rlkit/environments/gym_environment.py b/rlkit/environments/gym_environment.py index e69de29..1b44c21 100644 --- a/rlkit/environments/gym_environment.py +++ b/rlkit/environments/gym_environment.py @@ -0,0 +1,45 @@ +import gym +from rlkit.core import BaseEnvironment + + +class GymEnvironment(BaseEnvironment): + def __init__(self, params): + self.params = params + self.env_name = params["env_name"] + self.env = gym.make(self.env_name) + super(GymEnvironment, self).__init__() + + def execute_action(self, action): + self.env.step(action) + + def get_action_space(self): + return self.env.action_space + + def reset(self, reset_values=True): + if reset_values: + self.reset_values() + self.reset_env() + + def reset_values(self): + self.state = None + self.reward = None + self.done = False + self.info = None + + def reset_env(self): + self.env.reset() + + def close(self): + print("closing env") + return self.env.close() + + def render(self): + self.env.render() + + def step(self, action): + self.state, self.reward, self.done, self.info = self.env.step(action) + return (self.state, self.reward, self.done, self.info, ) + + +if __name__ == "__main__": + test_env = GymEnvironment("MountainCarContinuous-v0") diff --git a/rlkit/environments/vizdoom_environment.py b/rlkit/environments/vizdoom_environment.py new file mode 100644 index 0000000..b60a174 --- /dev/null +++ b/rlkit/environments/vizdoom_environment.py @@ -0,0 +1,14 @@ +from rlkit.core import BaseEnvironment +from vizdoom import * + +class VizDoomEnvironment(BaseEnvironment): + def __init__(self, params): + super(VizDoomEnvironment, self).__init__() + self.env_name = params["env_name"] + + pass + + def initialize_env(self): + self.env = DoomGame() + self.env.load_config("../config/basic.cfg") + self.env.init() \ No newline at end of file diff --git a/rlkit/trainers/__init__.py b/rlkit/trainers/__init__.py index e69de29..6df86b3 100644 --- a/rlkit/trainers/__init__.py +++ b/rlkit/trainers/__init__.py @@ -0,0 +1 @@ +from .basic_trainer import BasicTrainer \ No newline at end of file diff --git a/rlkit/trainers/basic_trainer.py b/rlkit/trainers/basic_trainer.py index b301e63..9123e15 100644 --- a/rlkit/trainers/basic_trainer.py +++ b/rlkit/trainers/basic_trainer.py @@ -1,3 +1,37 @@ -class BasicTrainer: - def __init__(self): - pass \ No newline at end of file +from rlkit.core import BaseTrainer + + +class BasicTrainer(BaseTrainer): + def __init__(self, params, agent, environment): + self.agent = agent + self.environment = environment + super(BasicTrainer, self).__init__(params) + + self.train_interval = params["train_interval"] + self.run_name = params["run_name"] + self.episodes = params["episodes"] + self.steps = params["steps"] + + def do_step(self): + action = self.agent.get_action(self.environment.state) + self.environment.step(action) + self.environment.render() # TODO: find better solution + + def train(self): + try: + for episode in range(1, self.episodes+1): + step = 0 + self.environment.reset() + while step < self.steps and not self.environment.done: + print("episode: {}, step: {}".format(episode, step)) + self.do_step() + + # Train agent + if self.global_step > 0 and not self.global_step % self.train_interval: + self.agent.train() + + # Increment step counts + step += 1 + self.global_step += 1 + finally: + self.environment.close()