diff --git a/examples/_vizdoom.ini b/examples/_vizdoom.ini new file mode 100644 index 0000000..9dd8946 --- /dev/null +++ b/examples/_vizdoom.ini @@ -0,0 +1,568 @@ +# This file was generated by ViZDoom 1.1.8 (ZDOOM 2.8.1) on Thu Jun 11 22:00:16 2020 + +# These are the directories to automatically search for IWADs. +# Each directory should be on a separate line, preceded by Path= +[IWADSearch.Directories] +Path=. +Path=$DOOMWADDIR +Path=/Users/sjha/Documents/_vizdoom +Path=/Users/sjha/Library/Application Support/_vizdoom +Path=$PROGDIR +Path=/Library/Application Support/_vizdoom + +# These are the directories to search for wads added with the -file +# command line parameter, if they cannot be found with the path +# as-is. Layout is the same as for IWADSearch.Directories +[FileSearch.Directories] +Path=$PROGDIR +Path=/Library/Application Support/_vizdoom +Path=$DOOMWADDIR + +# Files to automatically execute when running the corresponding game. +# Each file should be on its own line, preceded by Path= + +[Doom.AutoExec] +Path=/Users/sjha/Documents/_vizdoom/autoexec.cfg + +[Heretic.AutoExec] +Path=/Users/sjha/Documents/_vizdoom/autoexec.cfg + +[Hexen.AutoExec] +Path=/Users/sjha/Documents/_vizdoom/autoexec.cfg + +[Strife.AutoExec] +Path=/Users/sjha/Documents/_vizdoom/autoexec.cfg + +[Chex.AutoExec] +Path=/Users/sjha/Documents/_vizdoom/autoexec.cfg + +# WAD files to always load. These are loaded after the IWAD but before +# any files added with -file. Place each file on its own line, preceded +# by Path= +[Global.Autoload] + +# Wad files to automatically load depending on the game and IWAD you are +# playing. You may have have files that are loaded for all similar IWADs +# (the game) and files that are only loaded for particular IWADs. For example, +# any files listed under 'doom.Autoload' will be loaded for any version of Doom, +# but files listed under 'doom.doom2.Autoload' will only load when you are +# playing a Doom 2 based game (doom2.wad, tnt.wad or plutonia.wad), and files listed under +# 'doom.doom2.commercial.Autoload' only when playing doom2.wad. + +[doom.Autoload] + +[doom.doom2.Autoload] + +[doom.doom2.commercial.Autoload] + +[doom.doom2.bfg.Autoload] + +[doom.doom2.plutonia.Autoload] + +[doom.doom2.tnt.Autoload] + +[doom.doom1.Autoload] + +[doom.doom1.registered.Autoload] + +[doom.doom1.ultimate.Autoload] + +[doom.doom1.bfg.Autoload] + +[doom.freedoom.Autoload] + +[doom.freedoom.demo.Autoload] + +[doom.freedoom.phase1.Autoload] + +[doom.freedoom.phase2.Autoload] + +[doom.freedoom.freedm.Autoload] + +[heretic.Autoload] + +[heretic.heretic.Autoload] + +[heretic.shadow.Autoload] + +[blasphemer.Autoload] + +[hexen.Autoload] + +[hexen.deathkings.Autoload] + +[hexen.hexen.Autoload] + +[strife.Autoload] + +[chex.Autoload] + +[chex.chex1.Autoload] + +[chex.chex3.Autoload] + +[urbanbrawl.Autoload] + +[hacx.Autoload] + +[hacx.hacx1.Autoload] + +[hacx.hacx2.Autoload] + +[harmony.Autoload] + +[square.Autoload] + +[square.squareware.Autoload] + +[square.square.Autoload] + +[LastRun] +Version=211 + +[GlobalSettings] +gus_memsize=0 +midi_dmxgus=true +gus_patchdir= +midi_voices=32 +midi_config=timidity.cfg +snd_efx=true +snd_aldevice=Default +wildmidi_enhanced_resampling=true +wildmidi_reverb=false +wildmidi_frequency=0 +wildmidi_config= +fluid_chorus_type=0 +fluid_chorus_depth=8 +fluid_chorus_speed=0.3 +fluid_chorus_level=1 +fluid_chorus_voices=3 +fluid_reverb_level=0.57 +fluid_reverb_width=0.76 +fluid_reverb_damping=0.23 +fluid_reverb_roomsize=0.61 +fluid_threads=1 +fluid_samplerate=0 +fluid_interp=1 +fluid_voices=128 +fluid_chorus=true +fluid_reverb=true +fluid_gain=0.5 +fluid_patchset= +opl_core=0 +opl_numchips=2 +timidity_frequency=44100 +timidity_pipe=90 +timidity_mastervolume=1 +timidity_byteswap=false +timidity_8bit=false +timidity_stereo=true +timidity_reverb=0 +timidity_chorus=0 +timidity_extargs= +timidity_exe=timidity +snd_mididevice=-1 +spc_amp=1.875 +mod_dumb_mastervolume=1 +mod_autochip_scan_threshold=12 +mod_autochip_size_scan=500 +mod_autochip_size_force=100 +mod_autochip=false +mod_interp=2 +mod_volramp=2 +mod_samplerate=0 +mod_dumb=true +snd_sfxvolume=1 +snd_backend=openal +snd_output=default +snd_buffersize=0 +snd_samplerate=0 +snd_musicvolume=0.5 +snd_waterlp=250 +snd_midipatchset= +snd_output_format=PCM-16 +snd_speakermode=Auto +snd_resampler=Linear +snd_waterreverb=true +snd_hrtf=false +snd_buffercount=0 +snd_driver=0 +opl_fullpan=true +vid_tft=true +m_showinputgrid=false +m_show_backbutton=0 +m_use_mouse=1 +show_messages=true +mouse_sensitivity=1 +map_point_coordinates=true +vid_aspect=3 +vid_nowidescreen=false +vid_refreshrate=0 +vid_vsync=false +vid_defbits=8 +vid_defheight=480 +vid_defwidth=640 +Gamma=1 +statfile=zdoomstat.txt +savestatistics=0 +snd_flipstereo=false +snd_channels=32 +r_columnmethod=1 +r_quakeintensity=1 +cl_predict_lerpthreshold=2 +cl_predict_lerpscale=0.05 +cl_predict_specials=true +cl_noprediction=false +telezoom=true +r_fakecontrast=1 +chase_dist=90 +chase_height=-8 +gl_cachetime=0.6 +gl_cachenodes=true +nomonsterinterpolation=false +png_gamma=0 +png_level=5 +screenshot_dir= +screenshot_type=png +screenshot_quiet=false +use_joystick=false +autosavecount=4 +disableautosave=0 +autosavenum=0 +smooth_mouse=false +m_side=2 +m_forward=1 +m_yaw=1 +m_pitch=1 +lookstrafe=false +freelook=false +invertmouse=false +cl_run=false +demo_compress=true +cl_waitforsave=true +save_dir= +longsavemessages=true +storesavepic=true +nofilecompression=false +cl_capfps=true +defaultiwad= +queryiwad=true +con_ctrl_d= +con_buffersize=-1 +osx_additional_parameters= +showendoom=0 +bgamma=1 +ggamma=1 +rgamma=1 +vid_forcesurface=false +vid_displaybits=32 +vid_adapter=0 +mouse_capturemode=1 +m_filter=false +m_noprescale=false +use_mouse=false +vid_winscale=1 +fullscreen=false +vid_maxfps=200 + +[GlobalSettings.Unknown] + +[Doom.Player] +wi_noautostartmap=false +playerclass=Fighter +stillbob=0 +movebob=0.25 +neverswitchonpickup=false +gender=male +team=255 +skin=base +colorset=0 +color=40 cf 00 +name=Player +autoaim=35 + +[Doom.ConsoleVariables] +r_drawfuzz=1 +vid_nopalsubstitutions=false +snd_pitched=false +menu_screenratios=-1 +snd_menuvolume=0.6 +show_obituaries=true +am_showmaplabel=2 +crosshairgrow=false +crosshairscale=false +crosshairhealth=true +crosshaircolor=ff 00 00 +crosshairforce=false +crosshair=0 +st_scale=true +paletteflash=0 +hudcolor_stats=3 +hudcolor_statnames=6 +hudcolor_xyco=3 +hudcolor_ttim=5 +hudcolor_ltim=8 +hudcolor_time=6 +hudcolor_titl=10 +hud_berserk_health=true +hud_armor_green=100 +hud_armor_yellow=50 +hud_armor_red=25 +hud_health_green=100 +hud_health_yellow=50 +hud_health_red=25 +hud_ammo_yellow=50 +hud_ammo_red=25 +hud_showlag=0 +hud_timecolor=5 +hud_showtime=0 +hud_showammo=2 +hud_showweapons=true +hud_showscore=false +hud_showstats=false +hud_showitems=false +hud_showmonsters=true +hud_showsecrets=true +hud_althud=false +hud_althudscale=2 +st_oldouch=false +cl_maxdecals=1024 +cl_spreaddecals=true +transsouls=0.75 +wi_showtotaltime=true +wi_percents=true +dimcolor=ff d7 00 +dimamount=-1 +hud_scale=true +allcheats=false +r_stretchsky=true +r_shadercolormaps=true +screenblocks=12 +r_deathcamera=false +cl_showsecretmessage=true +cl_bloodtype=1 +cl_pufftype=0 +addrocketexplosion=false +cl_missiledecals=true +cl_doautoaim=false +cl_bloodsplats=true +cl_showmultikills=false +cl_showsprees=false +r_maxparticles=4092 +r_rail_trailsparsity=1 +r_rail_spiralsparsity=1 +r_rail_smartspiral=false +cl_rockettrails=3 +dlg_musicvolume=1 +sb_teamdeathmatch_headingcolor=6 +sb_teamdeathmatch_enable=true +sb_deathmatch_otherplayercolor=2 +sb_deathmatch_yourplayercolor=3 +sb_deathmatch_headingcolor=6 +sb_deathmatch_enable=true +sb_cooperative_otherplayercolor=2 +sb_cooperative_yourplayercolor=3 +sb_cooperative_headingcolor=6 +sb_cooperative_enable=true +nametagcolor=5 +displaynametags=0 +language=auto +compatmode=0 +vid_cursor=None +wipetype=0 +dehload=0 +chat_substitution=false +chatmacro0=No +chatmacro9=Yes +chatmacro8=I'll take care of it. +chatmacro7=Come here! +chatmacro6=Next time, scumbag... +chatmacro5=You suck! +chatmacro4=Help! +chatmacro3=I'm not looking too good! +chatmacro2=I'm OK. +chatmacro1=I'm ready to kick butt! +lookspring=true +con_midtime=0 +msgmidcolor2=4 +msgmidcolor=5 +msg4color=3 +msg3color=3 +msg2color=2 +msg1color=5 +msg0color=6 +msg=0 +con_alpha=0.75 +con_scaletext=0 +con_centernotify=false +con_notifytime=0 +con_notablist=false +cl_bbannounce=false +am_followplayer=true +am_textured=true +am_ovthingcolor_citem=e8 88 00 +am_ovthingcolor_item=e8 88 00 +am_ovthingcolor_ncmonster=e8 88 00 +am_ovthingcolor_monster=e8 88 00 +am_ovthingcolor_friend=e8 88 00 +am_ovthingcolor=e8 88 00 +am_ovsecretsectorcolor=00 ff ff +am_ovinterlevelcolor=ff ff 00 +am_ovtelecolor=ff ff 00 +am_ovunseencolor=00 22 6e +am_ovcdwallcolor=00 88 44 +am_ovfdwallcolor=00 88 44 +am_ovefwallcolor=00 88 44 +am_ovlockedcolor=00 88 44 +am_ovotherwallscolor=00 88 44 +am_ovspecialwallcolor=ff ff ff +am_ovsecretwallcolor=00 88 44 +am_ovwallcolor=00 ff 00 +am_ovyourcolor=fc e8 d8 +am_thingcolor_citem=fc fc fc +am_thingcolor_item=fc fc fc +am_thingcolor_ncmonster=fc fc fc +am_thingcolor_monster=fc fc fc +am_thingcolor_friend=fc fc fc +am_secretsectorcolor=ff 00 ff +am_interlevelcolor=ff 00 00 +am_intralevelcolor=00 00 ff +am_lockedcolor=00 78 00 +am_notseencolor=6c 6c 6c +am_xhaircolor=80 80 80 +am_gridcolor=8b 5a 2b +am_thingcolor=fc fc fc +am_efwallcolor=66 55 55 +am_cdwallcolor=4c 38 20 +am_fdwallcolor=88 70 58 +am_tswallcolor=88 88 88 +am_specialwallcolor=ff ff ff +am_secretwallcolor=00 00 00 +am_wallcolor=2c 18 08 +am_yourcolor=fc e8 d8 +am_backcolor=6c 54 40 +am_showthingsprites=0 +am_showtriggerlines=true +am_showkeys=true +am_drawmapback=0 +am_map_secrets=1 +am_customcolors=true +am_colorset=0 +am_showtotaltime=false +am_showtime=false +am_showitems=false +am_showmonsters=false +am_showsecrets=false +am_overlay=0 +am_rotate=0 + +[Doom.LocalServerInfo] +sv_corpsequeuesize=64 +forcewater=false +sv_smartaim=0 +sv_disableautohealth=false +sv_dropstyle=0 +compatflags2=0 +compatflags=0 + +[Doom.UnknownConsoleVariables] + +[Doom.ConsoleAliases] + +[Doom.Bindings] +1=slot 1 +2=slot 2 +3=slot 3 +4=slot 4 +5=slot 5 +6=slot 6 +7=slot 7 +8=slot 8 +9=slot 9 +0=slot 0 +-=sizedown +Equals=sizeup +tab=togglemap +t=messagemode +LeftBracket=invprev +RightBracket=invnext +enter=invuse +ctrl=+attack +`=toggleconsole +shift=+speed +\=+showscores +,=+moveleft +.=+moveright +alt=+strafe +space=+use +capslock=toggle cl_run +f1=menu_help +f2=menu_save +f3=menu_load +f4=menu_options +f5=menu_display +f6=quicksave +f7=menu_endgame +f8=togglemessages +f9=quickload +f10=menu_quit +f11=bumpgamma +f12=spynext +sysrq=screenshot +pause=pause +home=land +uparrow=+forward +pgup=+moveup +leftarrow=+left +rightarrow=+right +end=centerview +downarrow=+back +pgdn=+lookup +ins=+movedown +del=+lookdown +mouse1=+attack +mouse2=+strafe +mouse3=+forward +mouse4=+speed +joy1=+attack +joy2=+strafe +joy3=+speed +joy4=+use +mwheelup=weapprev +mwheeldown=weapnext +mwheelright=invnext +mwheelleft=invprev +dpadup=togglemap +dpaddown=invuse +dpadleft=invprev +dpadright=invnext +pad_start=pause +pad_back=menu_main +lthumb=crouch +lshoulder=weapprev +rshoulder=weapnext +ltrigger=+altattack +rtrigger=+attack +pad_a=+use +pad_y=+jump + +[Doom.DoubleBindings] + +[Doom.AutomapBindings] +0=am_gobig +-=+am_zoomout +Equals=+am_zoomin +p=am_toggletexture +f=am_togglefollow +g=am_togglegrid +c=am_clearmarks +m=am_setmark +kp-=+am_zoomout +kp+=+am_zoomin +uparrow=+am_panup +leftarrow=+am_panleft +rightarrow=+am_panright +downarrow=+am_pandown +mwheelup=am_zoom 1.2 +mwheeldown=am_zoom -1.2 + diff --git a/examples/basic.cfg b/examples/basic.cfg new file mode 100644 index 0000000..a21ea9e --- /dev/null +++ b/examples/basic.cfg @@ -0,0 +1,38 @@ +# Lines starting with # are treated as comments (or with whitespaces+#). +# It doesn't matter if you use capital letters or not. +# It doesn't matter if you use underscore or camel notation for keys, e.g. episode_timeout is the same as episodeTimeout. + +doom_map = map01 + +# Rewards +living_reward = -1 + +# Rendering options +screen_resolution = RES_320X240 +screen_format = CRCGCB +render_hud = True +render_crosshair = false +render_weapon = true +render_decals = false +render_particles = false +window_visible = true + +# make episodes start after 20 tics (after unholstering the gun) +episode_start_time = 14 + +# make episodes finish after 300 actions (tics) +episode_timeout = 300 + +# Available buttons +available_buttons = + { + MOVE_LEFT + MOVE_RIGHT + ATTACK + } + +# Game variables that will be in the state +available_game_variables = { AMMO2} + +mode = PLAYER +doom_skill = 5 \ No newline at end of file diff --git a/examples/random_agent_example.py b/examples/random_agent_example.py new file mode 100644 index 0000000..2e039b0 --- /dev/null +++ b/examples/random_agent_example.py @@ -0,0 +1,25 @@ +from rlkit.agents import RandomAgent +from rlkit.environments.gym_environment import GymEnvironment +from rlkit.environments.vizdoom_environment import VizDoomEnvironment +from rlkit.trainers import BasicTrainer + +params = { + "environment_params": { + # "env_name": "SpaceInvaders-v0", + }, + "agent_params": { + + }, + "training_params": { + "run_name": "test_run", + "train_interval": 10, + "episodes": 5, + "steps": 500, + }, +} + +# env = GymEnvironment(params["environment_params"]) +env = VizDoomEnvironment(params["environment_params"]) +agent = RandomAgent(params["agent_params"], env.get_action_space()) +trainer = BasicTrainer(params["training_params"], agent, env) +trainer.train() diff --git a/rlkit/core/__init__.py b/rlkit/core/__init__.py index 8c7f768..dbe1e9b 100644 --- a/rlkit/core/__init__.py +++ b/rlkit/core/__init__.py @@ -1,3 +1,4 @@ +from .base_action_space import BaseActionSpace from .base_agent import BaseAgent from .base_environment import BaseEnvironment from .base_trainer import BaseTrainer \ No newline at end of file diff --git a/rlkit/core/base_action.py b/rlkit/core/base_action.py deleted file mode 100644 index 11109cb..0000000 --- a/rlkit/core/base_action.py +++ /dev/null @@ -1 +0,0 @@ -class BaseAction \ No newline at end of file diff --git a/rlkit/core/base_action_space.py b/rlkit/core/base_action_space.py new file mode 100644 index 0000000..b138e2c --- /dev/null +++ b/rlkit/core/base_action_space.py @@ -0,0 +1,6 @@ +class BaseActionSpace: + def __init__(self): + pass + + def sample(self): + pass \ No newline at end of file diff --git a/rlkit/core/base_environment.py b/rlkit/core/base_environment.py index 318519e..313e10e 100644 --- a/rlkit/core/base_environment.py +++ b/rlkit/core/base_environment.py @@ -1,8 +1,12 @@ class BaseEnvironment: def __init__(self): self.to_render = False + self.done = False self.reset() + def close(self): + pass + def execute_action(self, action): pass @@ -14,3 +18,6 @@ def render(self): def setRender(self, to_render): self.to_render = to_render + + def get_action_space(self): + pass diff --git a/rlkit/environments/vizdoom_environment.py b/rlkit/environments/vizdoom_environment.py index b60a174..1c6e559 100644 --- a/rlkit/environments/vizdoom_environment.py +++ b/rlkit/environments/vizdoom_environment.py @@ -1,14 +1,64 @@ -from rlkit.core import BaseEnvironment +import random +import time + +from rlkit.core import BaseEnvironment, BaseActionSpace from vizdoom import * class VizDoomEnvironment(BaseEnvironment): + + class VizDoomActionSpace(BaseActionSpace): + def __init__(self): + self.actions = [ + # http://www.cs.put.poznan.pl/visualdoomai/tutorial.html + [0, 0, 1], # shoot + [1, 0, 0], # left + [0, 1, 0], # right + ] + super(VizDoomEnvironment.VizDoomActionSpace, self).__init__() + + def sample(self): + return random.sample(self.actions, 1)[0] + def __init__(self, params): + self.action_space = self.VizDoomActionSpace() + self.initialize_env() super(VizDoomEnvironment, self).__init__() - self.env_name = params["env_name"] - - pass def initialize_env(self): self.env = DoomGame() - self.env.load_config("../config/basic.cfg") - self.env.init() \ No newline at end of file + self.env.load_config("./basic.cfg") # TODO: load via params + self.env.init() + + def get_action_space(self): + return self.action_space + + def reset(self, reset_values=True): + if reset_values: + self.reset_values() + self.reset_env() + + def reset_values(self): + self.state = None + self.reward = None + self.done = False + self.info = None + + def reset_env(self): + self.env.new_episode() + + def step(self, action): + self.reward = self.env.make_action(action) + + # TODO: see if need to get image buffer + # TODO: see if this happens before/after reward + self.state = self.env.get_state() + + self.done = self.env.is_episode_finished() + if not self.done: + self.info = self.state.game_variables + else: + self.info = None + + print(action, self.done, self.env.get_total_reward(), self.info) + time.sleep(0.02) # TODO: remove + return (self.state, self.reward, self.done, self.info, ) diff --git a/rlkit/trainers/basic_trainer.py b/rlkit/trainers/basic_trainer.py index 9123e15..02bdac0 100644 --- a/rlkit/trainers/basic_trainer.py +++ b/rlkit/trainers/basic_trainer.py @@ -15,7 +15,7 @@ def __init__(self, params, agent, environment): def do_step(self): action = self.agent.get_action(self.environment.state) self.environment.step(action) - self.environment.render() # TODO: find better solution + # self.environment.render() # TODO: find better solution def train(self): try: