From f8c640cf48446fb062e3e4de80db6a6205d93c89 Mon Sep 17 00:00:00 2001 From: Luke Marshall Date: Fri, 6 Mar 2020 17:19:39 +1100 Subject: [PATCH] Some updates to run with more price bands etc. --- dqn_adversarial.py | 12 +++++++++--- market_config.py | 6 +++--- marketsim/logbook/logbook.py | 2 +- pyproject.toml | 1 + 4 files changed, 14 insertions(+), 7 deletions(-) diff --git a/dqn_adversarial.py b/dqn_adversarial.py index 16e0e62..ff31560 100644 --- a/dqn_adversarial.py +++ b/dqn_adversarial.py @@ -24,10 +24,10 @@ from market_config import params as market_config -label = "S2 Random lr=1e-3" +label = "S3 Random lr=1e-3" notes = """ - Max demand 8, num bands 4, max price 5. Reduces action space to 70. + Max demand 10, num bands 5, max price 10. 1e-3 LR. 50 mil steps. """ @@ -162,6 +162,7 @@ steps_per_testing_training_iteration = 250000 logbook().record_hyperparameter('steps_per_testing_training_iteration', steps_per_testing_training_iteration) + num_steps_completed = 0 # Train for a number of steps, then test and report. while num_steps_completed < nb_steps: @@ -173,12 +174,17 @@ nb_episodes = 20 dqn.test(env, nb_episodes=5, visualize=True) # Record to logbook. + logbook().set_label(label+" i="+str(num_steps_completed)) logbook().record_metadata('nb_episodes (testing)', nb_episodes) logbook().record_notes(notes + " \n Iteration"+str(num_steps_completed)+" "+pendulum.now().format('D/M HH:mm')) logbook().save_json() logbook().trim() - logbook().submit() + try: + logbook().submit() + except: + print(pendulum.now().format('D/M HH:mm')) + print("Logbook Submission Failed :( ") # After training is done, we save the weights. dqn.save_weights('dqn_{}_{}_weights.h5f'.format(ENV_NAME,participant_name), overwrite=True) diff --git a/market_config.py b/market_config.py index 0ea4d14..6043a90 100644 --- a/market_config.py +++ b/market_config.py @@ -4,7 +4,7 @@ 'PARTICIPANTS' : sorted(['GEN_1', 'GEN_2']), # This needs to be more than or equal to no_participants * no_bands. - 'MAX_DEMAND' : 8, + 'MAX_DEMAND' : 10, # Include the last set of bids in the observation space. 'REVEAL_PREVIOUS_BIDS':True, # Give the agent an example of how bidders behaved last time next demand was seen. @@ -17,9 +17,9 @@ # 'DEMAND_TYPE':'fixed', #Permanently set at half of max. # 'DEMAND_TYPE':'evolving', #Stays put, with low probability (1 in ten) change of moving up or down. - 'NUM_BANDS' : 4, + 'NUM_BANDS' : 5, 'MIN_PRICE' : 0, - 'MAX_PRICE' : 5, + 'MAX_PRICE' : 10, # 'MARKET_SERVER':'tcp://localhost:5570', #local # 'MARKET_SERVER':'tcp://138.68.254.184:5570', #digitalocean market-server-149-150 diff --git a/marketsim/logbook/logbook.py b/marketsim/logbook/logbook.py index 3fb5682..8b255d1 100644 --- a/marketsim/logbook/logbook.py +++ b/marketsim/logbook/logbook.py @@ -81,7 +81,7 @@ def record_epoch_reward(self, reward): self.data['timeseries']['epoch_reward']['data'].append(reward) def record_notes(self, notes): - self.data['notes'] += notes+"\n" + self.data['notes'] = notes+"\n" def record_bid(self, participant_label, price, volume, step_no): step_no = int(step_no) diff --git a/pyproject.toml b/pyproject.toml index 64c48d4..887809a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -19,6 +19,7 @@ requests = "^2.22.0" space-wrappers = {git = "https://github.com/ngc92/space-wrappers"} zmq = "^0.0.0" tensorflow = "1.13.1" +pydigree = "^1.0.1" [tool.poetry.dev-dependencies]